diff --git a/hl.vcxproj b/hl.vcxproj
index 88e95b28b..791c08677 100644
--- a/hl.vcxproj
+++ b/hl.vcxproj
@@ -45,55 +45,55 @@
Application
true
Unicode
- v142
+ v143
Application
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v120
+ v143
Application
false
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v142
+ v143
Application
false
true
Unicode
- v120
+ v143
@@ -361,14 +361,18 @@
+
+
+
+
diff --git a/hl.vcxproj.filters b/hl.vcxproj.filters
index f86723996..9e66b8869 100644
--- a/hl.vcxproj.filters
+++ b/hl.vcxproj.filters
@@ -4,14 +4,18 @@
-
+
+
+
+
+
\ No newline at end of file
diff --git a/libhl.vcxproj b/libhl.vcxproj
index 40f1a2eff..1f86fe1a7 100644
--- a/libhl.vcxproj
+++ b/libhl.vcxproj
@@ -36,40 +36,40 @@
DynamicLibrary
true
- v142
+ v143
Unicode
DynamicLibrary
false
- v142
+ v143
true
Unicode
DynamicLibrary
false
- v120
+ v143
true
Unicode
DynamicLibrary
true
- v142
+ v143
Unicode
DynamicLibrary
false
- v142
+ v143
true
Unicode
DynamicLibrary
false
- v120
+ v143
true
Unicode
diff --git a/src/data_struct.c b/src/data_struct.c
new file mode 100644
index 000000000..12a466216
--- /dev/null
+++ b/src/data_struct.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C)2015-2026 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifdef S_TYPE
+
+// is included by data_struct.h
+
+#ifdef S_MAP
+# define S_ARGS S_KEY k, S_VALUE v
+#else
+# define S_ARGS S_VALUE k
+# define S_KEY S_VALUE
+# define keys values
+#endif
+
+#ifndef S_DEFVAL
+# define S_DEFVAL (S_VALUE)0
+#endif
+
+#ifndef S_CMP
+# define S_CMP(a,b) a > b
+#endif
+
+typedef struct {
+ int cur;
+ int max;
+ S_KEY *keys;
+# ifdef S_MAP
+ S_VALUE *values;
+# endif
+} S_TYPE;
+
+typedef S_VALUE S_NAME(_value);
+
+INLINE static void S_NAME(check_size)( hl_alloc *alloc, S_TYPE *st ) {
+ if( st->cur == st->max ) {
+ int n = st->max ? (st->max << 1) : STRUCT_DEF_SIZE;
+ S_KEY *keys = (S_KEY*)hl_malloc(alloc,sizeof(S_KEY) * n);
+ memcpy(keys,st->keys,sizeof(S_KEY) * st->cur);
+ st->keys = keys;
+# ifdef S_MAP
+ S_VALUE *vals = (S_VALUE*)hl_malloc(alloc,sizeof(S_VALUE) * n);
+ memcpy(vals,st->values,sizeof(S_VALUE) * st->cur);
+ st->values = vals;
+# endif
+ st->max = n;
+ }
+}
+
+#ifndef S_SORTED
+
+INLINE static void S_NAME(add_impl)( hl_alloc *alloc, S_TYPE *st, S_ARGS ) {
+ S_NAME(check_size)(alloc,st);
+ st->keys[st->cur] = k;
+# ifdef S_MAP
+ st->values[st->cur] = v;
+# endif
+ st->cur++;
+}
+
+INLINE static bool S_NAME(exists)( S_TYPE st, S_KEY k ) {
+ for(int i=0;icur;
+ int pos;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st->keys[mid];
+ if( S_CMP(k,k2) ) min = mid + 1; else if( S_CMP(k2,k) ) max = mid; else return false;
+ }
+ S_NAME(check_size)(alloc,st);
+ pos = (min + max) >> 1;
+ memmove(st->keys + pos + 1, st->keys + pos, (st->cur - pos) * sizeof(S_KEY));
+# ifdef S_MAP
+ memmove(st->values + pos + 1, st->values + pos, (st->cur - pos) * sizeof(S_VALUE));
+# endif
+ st->keys[pos] = k;
+# ifdef S_MAP
+ st->values[pos] = v;
+# endif
+ st->cur++;
+ return true;
+}
+
+#ifdef S_MAP
+INLINE static void S_NAME(replace_impl)( hl_alloc *alloc, S_TYPE *st, S_ARGS ) {
+ int min = 0;
+ int max = st->cur;
+ int pos;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st->keys[mid];
+ if( k2 < k ) min = mid + 1; else if( k2 > k ) max = mid; else {
+ st->values[mid] = v;
+ return;
+ }
+ }
+ S_NAME(check_size)(alloc,st);
+ pos = (min + max) >> 1;
+ memmove(st->keys + pos + 1, st->keys + pos, (st->cur - pos) * sizeof(S_KEY));
+ memmove(st->values + pos + 1, st->values + pos, (st->cur - pos) * sizeof(S_VALUE));
+ st->keys[pos] = k;
+ st->values[pos] = v;
+ st->cur++;
+}
+#endif
+
+INLINE static bool S_NAME(exists)( S_TYPE st, S_KEY k ) {
+ int min = 0;
+ int max = st.cur;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st.keys[mid];
+ if( S_CMP(k,k2) ) min = mid + 1; else if( S_CMP(k2,k) ) max = mid; else return true;
+ }
+ return false;
+}
+
+#ifdef S_MAP
+INLINE static S_VALUE S_NAME(find)( S_TYPE st, S_KEY k ) {
+ int min = 0;
+ int max = st.cur;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st.keys[mid];
+ if( k2 < k ) min = mid + 1; else if( k2 > k ) max = mid; else return st.values[mid];
+ }
+ return S_DEFVAL;
+}
+#endif
+
+INLINE static bool S_NAME(remove)( S_TYPE *st, S_KEY k ) {
+ int min = 0;
+ int max = st->cur;
+ while( min < max ) {
+ int mid = (min + max) >> 1;
+ S_KEY k2 = st->keys[mid];
+ if( S_CMP(k,k2) ) min = mid + 1; else if( S_CMP(k2,k) ) max = mid; else {
+ int pos = mid;
+ memmove(st->keys + pos, st->keys + pos + 1, (st->cur - pos - 1) * sizeof(S_KEY));
+# ifdef S_MAP
+ memmove(st->values + pos, st->values + pos + 1, (st->cur - pos - 1) * sizeof(S_VALUE));
+# endif
+ st->cur--;
+ return true;
+ }
+ }
+ return false;
+}
+
+#endif
+
+INLINE static void S_NAME(reset)( S_TYPE *st ) {
+ st->cur = 0;
+}
+
+INLINE static S_VALUE *S_NAME(free)( S_TYPE *st ) {
+ st->cur = 0;
+ st->max = 0;
+ S_VALUE *vals = st->values;
+# ifdef S_MAP
+ st->keys = NULL;
+# endif
+ st->values = NULL;
+ return vals;
+}
+
+INLINE static int S_NAME(count)( S_TYPE st ) {
+ return st.cur;
+}
+
+INLINE static S_VALUE S_NAME(get)( S_TYPE st, int idx ) {
+ return st.values[idx];
+}
+
+INLINE static S_VALUE S_NAME(first)( S_TYPE st ) {
+ return st.cur == 0 ? S_DEFVAL : st.values[0];
+}
+
+INLINE static bool S_NAME(iter_next)( S_TYPE st, S_VALUE *val, int idx ) {
+ if( idx < st.cur ) *val = st.values[idx];
+ return idx < st.cur;
+}
+
+#undef S_NAME
+#undef S_TYPE
+#undef S_VALUE
+#undef S_KEY
+#undef S_ARGS
+#undef STRUCT_NAME
+#undef S_CMP
+#undef S_DEFVAL
+#undef keys
+
+#endif
diff --git a/src/data_struct.h b/src/data_struct.h
new file mode 100644
index 000000000..1bd52c652
--- /dev/null
+++ b/src/data_struct.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C)2015-2026 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef HL_DATA_STRUCT_H
+#define HL_DATA_STRUCT_H
+
+#include
+
+#if defined(__GNUC__) || defined(__clang__)
+#define INLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#define INLINE __forceinline
+#else
+#define INLINE inline
+#endif
+
+#define STRUCT_DEF_SIZE 2
+#define for_iter(name,var,set) name##__value var; for(int __idx=0;name##_iter_next(set,&var,__idx);__idx++)
+
+#define S_TYPE ptr_set
+#define S_NAME(name) ptr_set_##name
+#define S_VALUE void*
+#include "data_struct.c"
+#define ptr_set_add(set,v) ptr_set_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_TYPE int_arr
+#define S_NAME(name) int_arr_##name
+#define S_VALUE int
+#include "data_struct.c"
+#define int_arr_add(set,v) int_arr_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_SORTED
+
+#define S_TYPE int_set
+#define S_NAME(name) int_set_##name
+#define S_VALUE int
+#include "data_struct.c"
+#define int_set_add(set,v) int_set_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_MAP
+
+#define S_TYPE int_map
+#define S_NAME(name) int_map_##name
+#define S_KEY int
+#define S_VALUE int
+#include "data_struct.c"
+#define int_map_add(map,k,v) int_map_add_impl(DEF_ALLOC,&(map),k,v)
+#define int_map_replace(map,k,v) int_map_replace_impl(DEF_ALLOC,&(map),k,v)
+
+#define S_TYPE ptr_map
+#define S_NAME(name) ptr_map_##name
+#define S_KEY int
+#define S_VALUE void*
+#include "data_struct.c"
+#define ptr_map_add(map,k,v) ptr_map_add_impl(DEF_ALLOC,&(map),k,v)
+#define ptr_map_replace(map,k,v) ptr_map_replace_impl(DEF_ALLOC,&(map),k,v)
+
+#undef S_MAP
+#undef S_SORTED
+
+#endif
diff --git a/src/hlmodule.h b/src/hlmodule.h
index d8ea8c912..01ab8be2e 100644
--- a/src/hlmodule.h
+++ b/src/hlmodule.h
@@ -19,6 +19,9 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
+#ifndef HL_MODULE_H
+#define HL_MODULE_H
+
#include
#include
#include "opcodes.h"
@@ -104,9 +107,6 @@ typedef struct {
bool large;
} hl_debug_infos;
-typedef struct _jit_ctx jit_ctx;
-
-
typedef struct {
hl_code *code;
int *types_hashes;
@@ -120,6 +120,8 @@ typedef struct {
#define WIN64_UNWIND_TABLES
#endif
+typedef struct _jit_ctx jit_ctx;
+
typedef struct {
hl_code *code;
int codesize;
@@ -161,10 +163,4 @@ hl_type *hl_module_resolve_type( hl_module *m, hl_type *t, bool err );
void hl_profile_setup( int sample_count );
void hl_profile_end();
-jit_ctx *hl_jit_alloc();
-void hl_jit_free( jit_ctx *ctx, h_bool can_reset );
-void hl_jit_reset( jit_ctx *ctx, hl_module *m );
-void hl_jit_init( jit_ctx *ctx, hl_module *m );
-int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f );
-void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous );
-void hl_jit_patch_method( void *old_fun, void **new_fun_table );
+#endif
diff --git a/src/jit.c b/src/jit.c
index 7e4e6e88b..19e8a233c 100644
--- a/src/jit.c
+++ b/src/jit.c
@@ -19,4712 +19,94 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
-#ifdef _MSC_VER
-#pragma warning(disable:4820)
-#endif
-#include
-#include
-#include "hlsystem.h"
+#include
-#ifdef __arm__
-# error "JIT does not support ARM processors, only x86 and x86-64 are supported, please use HashLink/C native compilation instead"
-#endif
+static jit_ctx *current_ctx = NULL;
-#ifdef HL_DEBUG
-# define JIT_DEBUG
-#endif
-
-typedef enum {
- Eax = 0,
- Ecx = 1,
- Edx = 2,
- Ebx = 3,
- Esp = 4,
- Ebp = 5,
- Esi = 6,
- Edi = 7,
-#ifdef HL_64
- R8 = 8,
- R9 = 9,
- R10 = 10,
- R11 = 11,
- R12 = 12,
- R13 = 13,
- R14 = 14,
- R15 = 15,
-#endif
- _LAST = 0xFF
-} CpuReg;
-
-typedef enum {
- MOV,
- LEA,
- PUSH,
- ADD,
- SUB,
- IMUL, // only overflow flag changes compared to MUL
- DIV,
- IDIV,
- CDQ,
- CDQE,
- POP,
- RET,
- CALL,
- AND,
- OR,
- XOR,
- CMP,
- TEST,
- NOP,
- SHL,
- SHR,
- SAR,
- INC,
- DEC,
- JMP,
- // FPU
- FSTP,
- FSTP32,
- FLD,
- FLD32,
- FLDCW,
- // SSE
- MOVSD,
- MOVSS,
- COMISD,
- COMISS,
- ADDSD,
- SUBSD,
- MULSD,
- DIVSD,
- ADDSS,
- SUBSS,
- MULSS,
- DIVSS,
- XORPD,
- CVTSI2SD,
- CVTSI2SS,
- CVTSD2SI,
- CVTSD2SS,
- CVTSS2SD,
- CVTSS2SI,
- STMXCSR,
- LDMXCSR,
- // 8-16 bits
- MOV8,
- CMP8,
- TEST8,
- PUSH8,
- MOV16,
- CMP16,
- TEST16,
- // prefetchs
- PREFETCHT0,
- PREFETCHT1,
- PREFETCHT2,
- PREFETCHNTA,
- PREFETCHW,
- // --
- _CPU_LAST
-} CpuOp;
-
-#define JAlways 0
-#define JOverflow 0x80
-#define JULt 0x82
-#define JUGte 0x83
-#define JEq 0x84
-#define JNeq 0x85
-#define JULte 0x86
-#define JUGt 0x87
-#define JParity 0x8A
-#define JNParity 0x8B
-#define JSLt 0x8C
-#define JSGte 0x8D
-#define JSLte 0x8E
-#define JSGt 0x8F
-
-#define JCarry JLt
-#define JZero JEq
-#define JNotZero JNeq
-
-#define B(bv) *ctx->buf.b++ = (unsigned char)(bv)
-#define W(wv) *ctx->buf.w++ = wv
-
-#ifdef HL_64
-# define W64(wv) *ctx->buf.w64++ = wv
-#else
-# define W64(wv) W(wv)
-#endif
-
-static const int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3};
-
-#define MOD_RM(mod,reg,rm) B(((mod) << 6) | (((reg)&7) << 3) | ((rm)&7))
-#define SIB(mult,rmult,rbase) B((SIB_MULT[mult]<<6) | (((rmult)&7)<<3) | ((rbase)&7))
-#define IS_SBYTE(c) ( (c) >= -128 && (c) < 128 )
-
-#define AddJump(how,local) { if( (how) == JAlways ) { B(0xE9); } else { B(0x0F); B(how); }; local = BUF_POS(); W(0); }
-#define AddJump_small(how,local) { if( (how) == JAlways ) { B(0xEB); } else B(how - 0x10); local = BUF_POS() | 0x40000000; B(0); }
-#define XJump(how,local) AddJump(how,local)
-#define XJump_small(how,local) AddJump_small(how,local)
-
-#define MAX_OP_SIZE 256
-
-#define BUF_POS() ((int)(ctx->buf.b - ctx->startBuf))
-#define RTYPE(r) r->t->kind
-
-#ifdef HL_64
-# define RESERVE_ADDRESS 0x8000000000000000
-#else
-# define RESERVE_ADDRESS 0x80000000
-#endif
-
-#if defined(HL_WIN_CALL) && defined(HL_64)
-# define IS_WINCALL64 1
-#else
-# define IS_WINCALL64 0
-#endif
-
-typedef struct jlist jlist;
-struct jlist {
- int pos;
- int target;
- jlist *next;
-};
-
-typedef struct vreg vreg;
-
-typedef enum {
- RCPU = 0,
- RFPU = 1,
- RSTACK = 2,
- RCONST = 3,
- RADDR = 4,
- RMEM = 5,
- RUNUSED = 6,
- RCPU_CALL = 1 | 8,
- RCPU_8BITS = 1 | 16
-} preg_kind;
-
-typedef struct {
- preg_kind kind;
- int id;
- int lock;
- vreg *holds;
-} preg;
-
-struct vreg {
- int stackPos;
- int size;
- hl_type *t;
- preg *current;
- preg stack;
-};
-
-#define REG_AT(i) (ctx->pregs + (i))
-
-#ifdef HL_64
-# define RCPU_COUNT 16
-# define RFPU_COUNT 16
-# ifdef HL_WIN_CALL
-# define CALL_NREGS 4
-# define RCPU_SCRATCH_COUNT 7
-# define RFPU_SCRATCH_COUNT 6
-static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, R8, R9, R10, R11 };
-static const CpuReg CALL_REGS[] = { Ecx, Edx, R8, R9 };
-# else
-# define CALL_NREGS 6 // TODO : XMM6+XMM7 are FPU reg parameters
-# define RCPU_SCRATCH_COUNT 9
-# define RFPU_SCRATCH_COUNT 16
-static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, Esi, Edi, R8, R9, R10, R11 };
-static const CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 };
-# endif
-#else
-# define CALL_NREGS 0
-# define RCPU_COUNT 8
-# define RFPU_COUNT 8
-# define RCPU_SCRATCH_COUNT 3
-# define RFPU_SCRATCH_COUNT 8
-static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx };
-#endif
-
-#define XMM(i) ((i) + RCPU_COUNT)
-#define PXMM(i) REG_AT(XMM(i))
-#define REG_IS_FPU(i) ((i) >= RCPU_COUNT)
-
-#define PEAX REG_AT(Eax)
-#define PESP REG_AT(Esp)
-#define PEBP REG_AT(Ebp)
-
-#define REG_COUNT (RCPU_COUNT + RFPU_COUNT)
-
-#define ID2(a,b) ((a) | ((b)<<8))
-#define R(id) (ctx->vregs + (id))
-#define ASSERT(i) { printf("JIT ERROR %d (jit.c line %d)\n",i,(int)__LINE__); jit_exit(); }
-#define IS_FLOAT(r) ((r)->t->kind == HF64 || (r)->t->kind == HF32)
-#define RLOCK(r) if( (r)->lock < ctx->currentPos ) (r)->lock = ctx->currentPos
-#define RUNLOCK(r) if( (r)->lock == ctx->currentPos ) (r)->lock = 0
-
-#define BREAK() B(0xCC)
-
-static preg _unused = { RUNUSED, 0, 0, NULL };
-static preg *UNUSED = &_unused;
-
-struct _jit_ctx {
- union {
- unsigned char *b;
- unsigned int *w;
- unsigned long long *w64;
- int *i;
- double *d;
- } buf;
- vreg *vregs;
- preg pregs[REG_COUNT];
- vreg *savedRegs[REG_COUNT];
- int savedLocks[REG_COUNT];
- int *opsPos;
- int maxRegs;
- int maxOps;
- int bufSize;
- int totalRegsSize;
- int functionPos;
- int allocOffset;
- int currentPos;
- int nativeArgsCount;
- unsigned char *startBuf;
- hl_module *m;
- hl_function *f;
- jlist *jumps;
- jlist *calls;
- jlist *switchs;
- hl_alloc falloc; // cleared per-function
- hl_alloc galloc;
- vclosure *closure_list;
- hl_debug_infos *debug;
- int c2hl;
- int hl2c;
- void *static_functions[8];
- bool static_function_offset;
-#ifdef WIN64_UNWIND_TABLES
- int unwind_offset;
- int nunwind;
- PRUNTIME_FUNCTION unwind_table;
-#endif
-};
-
-#ifdef WIN64_UNWIND_TABLES
-
-typedef enum _UNWIND_OP_CODES
-{
- UWOP_PUSH_NONVOL = 0, /* info == register number */
- UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */
- UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */
- UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
- UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */
- UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */
- UWOP_SAVE_XMM128 = 8, /* info == XMM reg number, offset in next slot */
- UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
- UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */
-} UNWIND_CODE_OPS;
-
-void write_uwcode(jit_ctx *ctx, unsigned char offset, UNWIND_CODE_OPS code, unsigned char info)
-{
- B(offset);
- B((code) | (info) << 4);
-}
-
-void write_unwind_data(jit_ctx *ctx)
-{
- // All generated functions use a frame pointer, so the same unwind info can be used for all of them
- unsigned char version = 1;
- unsigned char flags = 0;
- unsigned char CountOfCodes = 2;
- unsigned char SizeOfProlog = 4;
- unsigned char FrameRegister = 5; // RBP
- unsigned char FrameOffset = 0;
- B((version) | (flags) << 3);
- B(SizeOfProlog);
- B(CountOfCodes);
- B((FrameRegister) | (FrameOffset) << 4);
- write_uwcode(ctx, 4, UWOP_SET_FPREG, 0);
- write_uwcode(ctx, 1, UWOP_PUSH_NONVOL, 5);
-}
-#endif
-
-#define jit_exit() { hl_debug_break(); exit(-1); }
-#define jit_error(msg) _jit_error(ctx,msg,__LINE__)
-
-#ifndef HL_64
-# ifdef HL_DEBUG
-# define error_i64() jit_error("i64-32")
-# else
-void error_i64() {
- printf("The module you are loading is using 64 bit ints that are not supported by the HL32.\nPlease run using HL64 or compile with -D hl-legacy32");
- jit_exit();
-}
-# endif
-#endif
-
-static void _jit_error( jit_ctx *ctx, const char *msg, int line );
-static void on_jit_error( const char *msg, int_val line );
-
-static preg *pmem( preg *r, CpuReg reg, int offset ) {
- r->kind = RMEM;
- r->id = 0 | (reg << 4) | (offset << 8);
- return r;
-}
-
-static preg *pmem2( preg *r, CpuReg reg, CpuReg reg2, int mult, int offset ) {
- r->kind = RMEM;
- r->id = mult | (reg << 4) | (reg2 << 8);
- r->holds = (void*)(int_val)offset;
- return r;
-}
-
-#ifdef HL_64
-static preg *pcodeaddr( preg *r, int offset ) {
- r->kind = RMEM;
- r->id = 15 | (offset << 4);
- return r;
-}
-#endif
-
-static preg *pconst( preg *r, int c ) {
- r->kind = RCONST;
- r->holds = NULL;
- r->id = c;
- return r;
-}
-
-static preg *pconst64( preg *r, int_val c ) {
-#ifdef HL_64
- if( ((int)c) == c )
- return pconst(r,(int)c);
- r->kind = RCONST;
- r->id = 0xC064C064;
- r->holds = (vreg*)c;
- return r;
-#else
- return pconst(r,(int)c);
-#endif
-}
-
-#ifndef HL_64
-// it is not possible to access direct 64 bit address in x86-64
-static preg *paddr( preg *r, void *p ) {
- r->kind = RADDR;
- r->holds = (vreg*)p;
- return r;
-}
-#endif
-
-static void save_regs( jit_ctx *ctx ) {
- int i;
- for(i=0;isavedRegs[i] = ctx->pregs[i].holds;
- ctx->savedLocks[i] = ctx->pregs[i].lock;
- }
-}
-
-static void restore_regs( jit_ctx *ctx ) {
- int i;
- for(i=0;imaxRegs;i++)
- ctx->vregs[i].current = NULL;
- for(i=0;isavedRegs[i];
- preg *p = ctx->pregs + i;
- p->holds = r;
- p->lock = ctx->savedLocks[i];
- if( r ) r->current = p;
- }
-}
-
-static void jit_buf( jit_ctx *ctx ) {
- if( BUF_POS() > ctx->bufSize - MAX_OP_SIZE ) {
- int nsize = ctx->bufSize * 4 / 3;
- unsigned char *nbuf;
- int curpos;
- if( nsize == 0 ) {
- int i;
- for(i=0;im->code->nfunctions;i++)
- nsize += ctx->m->code->functions[i].nops;
- nsize *= 4;
- }
- if( nsize < ctx->bufSize + MAX_OP_SIZE * 4 ) nsize = ctx->bufSize + MAX_OP_SIZE * 4;
- curpos = BUF_POS();
- nbuf = (unsigned char*)malloc(nsize);
- if( nbuf == NULL ) ASSERT(nsize);
- if( ctx->startBuf ) {
- memcpy(nbuf,ctx->startBuf,curpos);
- free(ctx->startBuf);
- }
- ctx->startBuf = nbuf;
- ctx->buf.b = nbuf + curpos;
- ctx->bufSize = nsize;
- }
-}
-
-static const char *KNAMES[] = { "cpu","fpu","stack","const","addr","mem","unused" };
-#define ERRIF(c) if( c ) { printf("%s(%s,%s)\n",f?f->name:"???",KNAMES[a->kind], KNAMES[b->kind]); ASSERT(0); }
-
-typedef struct {
- const char *name; // single operand
- int r_mem; // r32 / r/m32 r32
- int mem_r; // r/m32 / r32 r/m32
- int r_const; // r32 / imm32 imm32
- int r_i8; // r32 / imm8 imm8
- int mem_const; // r/m32 / imm32 N/A
-} opform;
-
-#define FLAG_LONGOP 0x80000000
-#define FLAG_16B 0x40000000
-#define FLAG_8B 0x20000000
-#define FLAG_DUAL 0x10000000
-
-#define RM(op,id) ((op) | (((id)+1)<<8))
-#define GET_RM(op) (((op) >> ((op) < 0 ? 24 : 8)) & 15)
-#define SBYTE(op) ((op) << 16)
-#define LONG_OP(op) ((op) | FLAG_LONGOP)
-#define OP16(op) LONG_OP((op) | FLAG_16B)
-#define LONG_RM(op,id) LONG_OP(op | (((id) + 1) << 24))
-
-static opform OP_FORMS[_CPU_LAST] = {
- { "MOV", 0x8B, 0x89, 0xB8, 0, RM(0xC7,0) },
- { "LEA", 0x8D },
- { "PUSH", 0x50, RM(0xFF,6), 0x68, 0x6A },
- { "ADD", 0x03, 0x01, RM(0x81,0), RM(0x83,0) },
- { "SUB", 0x2B, 0x29, RM(0x81,5), RM(0x83,5) },
- { "IMUL", LONG_OP(0x0FAF), 0, 0x69 | FLAG_DUAL, 0x6B | FLAG_DUAL },
- { "DIV", RM(0xF7,6), RM(0xF7,6) },
- { "IDIV", RM(0xF7,7), RM(0xF7,7) },
- { "CDQ", 0x99 },
- { "CDQE", 0x98 },
- { "POP", 0x58, RM(0x8F,0) },
- { "RET", 0xC3 },
- { "CALL", RM(0xFF,2), RM(0xFF,2), 0xE8 },
- { "AND", 0x23, 0x21, RM(0x81,4), RM(0x83,4) },
- { "OR", 0x0B, 0x09, RM(0x81,1), RM(0x83,1) },
- { "XOR", 0x33, 0x31, RM(0x81,6), RM(0x83,6) },
- { "CMP", 0x3B, 0x39, RM(0x81,7), RM(0x83,7) },
- { "TEST", 0x85, 0x85/*SWP?*/, RM(0xF7,0) },
- { "NOP", 0x90 },
- { "SHL", RM(0xD3,4), 0, 0, RM(0xC1,4) },
- { "SHR", RM(0xD3,5), 0, 0, RM(0xC1,5) },
- { "SAR", RM(0xD3,7), 0, 0, RM(0xC1,7) },
- { "INC", IS_64 ? RM(0xFF,0) : 0x40, RM(0xFF,0) },
- { "DEC", IS_64 ? RM(0xFF,1) : 0x48, RM(0xFF,1) },
- { "JMP", RM(0xFF,4) },
- // FPU
- { "FSTP", 0, RM(0xDD,3) },
- { "FSTP32", 0, RM(0xD9,3) },
- { "FLD", 0, RM(0xDD,0) },
- { "FLD32", 0, RM(0xD9,0) },
- { "FLDCW", 0, RM(0xD9, 5) },
- // SSE
- { "MOVSD", 0xF20F10, 0xF20F11 },
- { "MOVSS", 0xF30F10, 0xF30F11 },
- { "COMISD", 0x660F2F },
- { "COMISS", LONG_OP(0x0F2F) },
- { "ADDSD", 0xF20F58 },
- { "SUBSD", 0xF20F5C },
- { "MULSD", 0xF20F59 },
- { "DIVSD", 0xF20F5E },
- { "ADDSS", 0xF30F58 },
- { "SUBSS", 0xF30F5C },
- { "MULSS", 0xF30F59 },
- { "DIVSS", 0xF30F5E },
- { "XORPD", 0x660F57 },
- { "CVTSI2SD", 0xF20F2A },
- { "CVTSI2SS", 0xF30F2A },
- { "CVTSD2SI", 0xF20F2D },
- { "CVTSD2SS", 0xF20F5A },
- { "CVTSS2SD", 0xF30F5A },
- { "CVTSS2SI", 0xF30F2D },
- { "STMXCSR", 0, LONG_RM(0x0FAE,3) },
- { "LDMXCSR", 0, LONG_RM(0x0FAE,2) },
- // 8 bits,
- { "MOV8", 0x8A, 0x88, 0, 0xB0, RM(0xC6,0) },
- { "CMP8", 0x3A, 0x38, 0, RM(0x80,7) },
- { "TEST8", 0x84, 0x84, RM(0xF6,0) },
- { "PUSH8", 0, 0, 0x6A | FLAG_8B },
- { "MOV16", OP16(0x8B), OP16(0x89), OP16(0xB8) },
- { "CMP16", OP16(0x3B), OP16(0x39) },
- { "TEST16", OP16(0x85) },
- // prefetchs
- { "PREFETCHT0", 0, LONG_RM(0x0F18,1) },
- { "PREFETCHT1", 0, LONG_RM(0x0F18,2) },
- { "PREFETCHT2", 0, LONG_RM(0x0F18,3) },
- { "PREFETCHNTA", 0, LONG_RM(0x0F18,0) },
- { "PREFETCHW", 0, LONG_RM(0x0F0D,1) },
-};
-
-#ifdef HL_64
-# define REX() if( r64 ) B(r64 | 0x40)
-#else
-# define REX()
-#endif
-
-#define OP(b) \
- if( (b) & 0xFF0000 ) { \
- B((b)>>16); \
- if( r64 ) B(r64 | 0x40); /* also in 32 bits mode */ \
- B((b)>>8); \
- B(b); \
- } else { \
- if( (b) & FLAG_16B ) { \
- B(0x66); \
- REX(); \
- } else {\
- REX(); \
- if( (b) & FLAG_LONGOP ) B((b)>>8); \
- }\
- B(b); \
+void hl_jit_error( const char *msg, const char *func, int line ) {
+ printf("*** JIT ERROR %s:%d (%s)****\n", func, line, msg);
+ if( current_ctx ) {
+ jit_ctx *ctx = current_ctx;
+ current_ctx = NULL;
+ hl_emit_dump(ctx);
}
-
-static bool is_reg8( preg *a ) {
- return a->kind == RSTACK || a->kind == RMEM || a->kind == RCONST || (a->kind == RCPU && a->id != Esi && a->id != Edi);
+ fflush(stdout);
}
-static void op( jit_ctx *ctx, CpuOp o, preg *a, preg *b, bool mode64 ) {
- opform *f = &OP_FORMS[o];
- int r64 = mode64 && (o != PUSH && o != POP && o != CALL && o != PUSH8 && o < PREFETCHT0) ? 8 : 0;
- switch( o ) {
- case CMP8:
- case TEST8:
- case MOV8:
- if( !is_reg8(a) || !is_reg8(b) )
- ASSERT(0);
- break;
- default:
- break;
- }
- switch( ID2(a->kind,b->kind) ) {
- case ID2(RUNUSED,RUNUSED):
- ERRIF(f->r_mem == 0);
- OP(f->r_mem);
- break;
- case ID2(RCPU,RCPU):
- case ID2(RFPU,RFPU):
- ERRIF( f->r_mem == 0 );
- if( a->id > 7 ) r64 |= 4;
- if( b->id > 7 ) r64 |= 1;
- OP(f->r_mem);
- MOD_RM(3,a->id,b->id);
- break;
- case ID2(RCPU,RFPU):
- case ID2(RFPU,RCPU):
- ERRIF( (f->r_mem>>16) == 0 );
- if( a->id > 7 ) r64 |= 4;
- if( b->id > 7 ) r64 |= 1;
- OP(f->r_mem);
- MOD_RM(3,a->id,b->id);
- break;
- case ID2(RCPU,RUNUSED):
- ERRIF( f->r_mem == 0 );
- if( a->id > 7 ) r64 |= 1;
- if( GET_RM(f->r_mem) > 0 ) {
- OP(f->r_mem);
- MOD_RM(3, GET_RM(f->r_mem)-1, a->id);
- } else
- OP(f->r_mem + (a->id&7));
- break;
- case ID2(RSTACK,RUNUSED):
- ERRIF( f->mem_r == 0 || GET_RM(f->mem_r) == 0 );
- {
- int stackPos = R(a->id)->stackPos;
- OP(f->mem_r);
- if( IS_SBYTE(stackPos) ) {
- MOD_RM(1,GET_RM(f->mem_r)-1,Ebp);
- B(stackPos);
- } else {
- MOD_RM(2,GET_RM(f->mem_r)-1,Ebp);
- W(stackPos);
- }
- }
- break;
- case ID2(RCPU,RCONST):
- ERRIF( f->r_const == 0 && f->r_i8 == 0 );
- if( a->id > 7 ) r64 |= 1;
- {
- int_val cval = b->holds ? (int_val)b->holds : b->id;
- // short byte form
- if( f->r_i8 && IS_SBYTE(cval) ) {
- if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4;
- OP(f->r_i8);
- if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_i8)-1,a->id);
- B((int)cval);
- } else if( GET_RM(f->r_const) > 0 || (f->r_const&FLAG_DUAL) ) {
- if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4;
- OP(f->r_const&0xFF);
- if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_const)-1,a->id);
- if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
- } else {
- ERRIF( f->r_const == 0);
- OP((f->r_const&0xFF) + (a->id&7));
- if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
- }
- }
- break;
- case ID2(RSTACK,RCPU):
- case ID2(RSTACK,RFPU):
- ERRIF( f->mem_r == 0 );
- if( b->id > 7 ) r64 |= 4;
- {
- int stackPos = R(a->id)->stackPos;
- OP(f->mem_r);
- if( IS_SBYTE(stackPos) ) {
- MOD_RM(1,b->id,Ebp);
- B(stackPos);
- } else {
- MOD_RM(2,b->id,Ebp);
- W(stackPos);
- }
- }
- break;
- case ID2(RCPU,RSTACK):
- case ID2(RFPU,RSTACK):
- ERRIF( f->r_mem == 0 );
- if( a->id > 7 ) r64 |= 4;
- {
- int stackPos = R(b->id)->stackPos;
- OP(f->r_mem);
- if( IS_SBYTE(stackPos) ) {
- MOD_RM(1,a->id,Ebp);
- B(stackPos);
- } else {
- MOD_RM(2,a->id,Ebp);
- W(stackPos);
- }
- }
- break;
- case ID2(RCONST,RUNUSED):
- ERRIF( f->r_const == 0 );
- {
- int_val cval = a->holds ? (int_val)a->holds : a->id;
- OP(f->r_const);
- if( f->r_const & FLAG_8B ) B((int)cval); else W((int)cval);
- }
- break;
- case ID2(RMEM,RUNUSED):
- ERRIF( f->mem_r == 0 );
- {
- int mult = a->id & 0xF;
- int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
- CpuReg reg = (a->id >> 4) & 0xF;
- if( mult == 15 ) {
- ERRIF(1);
- } else if( mult == 0 ) {
- if( reg > 7 ) r64 |= 1;
- OP(f->mem_r);
- if( regOrOffs == 0 && (reg&7) != Ebp ) {
- MOD_RM(0,GET_RM(f->mem_r)-1,reg);
- if( (reg&7) == Esp ) B(0x24);
- } else if( IS_SBYTE(regOrOffs) ) {
- MOD_RM(1,GET_RM(f->mem_r)-1,reg);
- if( (reg&7) == Esp ) B(0x24);
- B(regOrOffs);
- } else {
- MOD_RM(2,GET_RM(f->mem_r)-1,reg);
- if( (reg&7) == Esp ) B(0x24);
- W(regOrOffs);
- }
- } else {
- // [eax + ebx * M]
- ERRIF(1);
- }
- }
- break;
- case ID2(RCPU, RMEM):
- case ID2(RFPU, RMEM):
- ERRIF( f->r_mem == 0 );
- {
- int mult = b->id & 0xF;
- int regOrOffs = mult == 15 ? b->id >> 4 : b->id >> 8;
- CpuReg reg = (b->id >> 4) & 0xF;
- if( mult == 15 ) {
- int pos;
- if( a->id > 7 ) r64 |= 4;
- OP(f->r_mem);
- MOD_RM(0,a->id,5);
- if( IS_64 ) {
- // offset wrt current code
- pos = BUF_POS() + 4;
- W(regOrOffs - pos);
- } else {
- ERRIF(1);
- }
- } else if( mult == 0 ) {
- if( a->id > 7 ) r64 |= 4;
- if( reg > 7 ) r64 |= 1;
- OP(f->r_mem);
- if( regOrOffs == 0 && (reg&7) != Ebp ) {
- MOD_RM(0,a->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- } else if( IS_SBYTE(regOrOffs) ) {
- MOD_RM(1,a->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- B(regOrOffs);
- } else {
- MOD_RM(2,a->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- W(regOrOffs);
- }
- } else {
- int offset = (int)(int_val)b->holds;
- if( a->id > 7 ) r64 |= 4;
- if( reg > 7 ) r64 |= 1;
- if( regOrOffs > 7 ) r64 |= 2;
- OP(f->r_mem);
- MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,a->id,4);
- SIB(mult,regOrOffs,reg);
- if( offset ) {
- if( IS_SBYTE(offset) ) B(offset); else W(offset);
- }
- }
- }
- break;
-# ifndef HL_64
- case ID2(RFPU,RADDR):
-# endif
- case ID2(RCPU,RADDR):
- ERRIF( f->r_mem == 0 );
- if( a->id > 7 ) r64 |= 4;
- OP(f->r_mem);
- MOD_RM(0,a->id,5);
- if( IS_64 )
- W64((int_val)b->holds);
- else
- W((int)(int_val)b->holds);
- break;
-# ifndef HL_64
- case ID2(RADDR,RFPU):
-# endif
- case ID2(RADDR,RCPU):
- ERRIF( f->mem_r == 0 );
- if( b->id > 7 ) r64 |= 4;
- OP(f->mem_r);
- MOD_RM(0,b->id,5);
- if( IS_64 )
- W64((int_val)a->holds);
- else
- W((int)(int_val)a->holds);
- break;
- case ID2(RMEM, RCPU):
- case ID2(RMEM, RFPU):
- ERRIF( f->mem_r == 0 );
- {
- int mult = a->id & 0xF;
- int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
- CpuReg reg = (a->id >> 4) & 0xF;
- if( mult == 15 ) {
- int pos;
- if( b->id > 7 ) r64 |= 4;
- OP(f->mem_r);
- MOD_RM(0,b->id,5);
- if( IS_64 ) {
- // offset wrt current code
- pos = BUF_POS() + 4;
- W(regOrOffs - pos);
- } else {
- ERRIF(1);
- }
- } else if( mult == 0 ) {
- if( b->id > 7 ) r64 |= 4;
- if( reg > 7 ) r64 |= 1;
- OP(f->mem_r);
- if( regOrOffs == 0 && (reg&7) != Ebp ) {
- MOD_RM(0,b->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- } else if( IS_SBYTE(regOrOffs) ) {
- MOD_RM(1,b->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- B(regOrOffs);
- } else {
- MOD_RM(2,b->id,reg);
- if( (reg&7) == Esp ) B(0x24);
- W(regOrOffs);
- }
- } else {
- int offset = (int)(int_val)a->holds;
- if( b->id > 7 ) r64 |= 4;
- if( reg > 7 ) r64 |= 1;
- if( regOrOffs > 7 ) r64 |= 2;
- OP(f->mem_r);
- MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,b->id,4);
- SIB(mult,regOrOffs,reg);
- if( offset ) {
- if( IS_SBYTE(offset) ) B(offset); else W(offset);
- }
- }
- }
- break;
- default:
- ERRIF(1);
- }
- if( ctx->debug && ctx->f && o == CALL ) {
- preg p;
- op(ctx,MOV,pmem(&p,Esp,-HL_WSIZE),PEBP,true); // erase EIP (clean stack report)
- }
-}
+void hl_jit_null_field_access() { jit_assert(); }
+void hl_jit_null_access() { jit_assert(); }
+void hl_jit_assert() { jit_assert(); }
-static void op32( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
- op(ctx,o,a,b,false);
+void int_alloc_reset( int_alloc *a ) {
+ a->cur = 0;
}
-static void op64( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
-#ifndef HL_64
- op(ctx,o,a,b,false);
-#else
- op(ctx,o,a,b,true);
-#endif
+void int_alloc_free( int_alloc *a ) {
+ free(a->data);
+ a->cur = 0;
+ a->max = 0;
+ a->data = NULL;
}
-static void patch_jump( jit_ctx *ctx, int p ) {
- if( p == 0 ) return;
- if( p & 0x40000000 ) {
- int d;
- p &= 0x3FFFFFFF;
- d = BUF_POS() - (p + 1);
- if( d < -128 || d >= 128 ) ASSERT(d);
- *(char*)(ctx->startBuf + p) = (char)d;
- } else {
- *(int*)(ctx->startBuf + p) = BUF_POS() - (p + 4);
+int *int_alloc_get( int_alloc *a, int count ) {
+ while( a->cur + count > a->max ) {
+ int next_size = a->max ? a->max << 1 : 128;
+ int *new_data = (int*)malloc(sizeof(int) * next_size);
+ if( new_data == NULL ) jit_error("Out of memory");
+ memcpy(new_data, a->data, sizeof(int) * a->cur);
+ free(a->data);
+ a->data = new_data;
+ a->max = next_size;
}
+ int *ptr = a->data + a->cur;
+ a->cur += count;
+ return ptr;
}
-static void patch_jump_to( jit_ctx *ctx, int p, int target ) {
- if( p == 0 ) return;
- if( p & 0x40000000 ) {
- int d;
- p &= 0x3FFFFFFF;
- d = target - (p + 1);
- if( d < -128 || d >= 128 ) ASSERT(d);
- *(char*)(ctx->startBuf + p) = (char)d;
- } else {
- *(int*)(ctx->startBuf + p) = target - (p + 4);
- }
-}
-
-static int stack_size( hl_type *t ) {
- switch( t->kind ) {
- case HUI8:
- case HUI16:
- case HBOOL:
-# ifdef HL_64
- case HI32:
- case HF32:
-# endif
- return sizeof(int_val);
- case HI64:
- default:
- return hl_type_size(t);
- }
-}
-
-static int call_reg_index( int reg ) {
-# ifdef HL_64
- int i;
- for(i=0;ikind == RFPU )
- return p->id < CALL_NREGS;
- for(i=0;ikind == RCPU && p->id == CALL_REGS[i] )
- return true;
- return false;
-# else
- return false;
-# endif
-}
-
-static preg *alloc_reg( jit_ctx *ctx, preg_kind k ) {
- int i;
- preg *p;
- switch( k ) {
- case RCPU:
- case RCPU_CALL:
- case RCPU_8BITS:
- {
- int off = ctx->allocOffset++;
- const int count = RCPU_SCRATCH_COUNT;
- for(i=0;ipregs + r;
- if( p->lock >= ctx->currentPos ) continue;
- if( k == RCPU_CALL && is_call_reg(p) ) continue;
- if( k == RCPU_8BITS && !is_reg8(p) ) continue;
- if( p->holds == NULL ) {
- RLOCK(p);
- return p;
- }
- }
- for(i=0;ipregs + RCPU_SCRATCH_REGS[(i + off)%count];
- if( p->lock >= ctx->currentPos ) continue;
- if( k == RCPU_CALL && is_call_reg(p) ) continue;
- if( k == RCPU_8BITS && !is_reg8(p) ) continue;
- if( p->holds ) {
- RLOCK(p);
- p->holds->current = NULL;
- p->holds = NULL;
- return p;
- }
- }
- }
- break;
- case RFPU:
- {
- int off = ctx->allocOffset++;
- const int count = RFPU_SCRATCH_COUNT;
- for(i=0;ilock >= ctx->currentPos ) continue;
- if( p->holds == NULL ) {
- RLOCK(p);
- return p;
- }
- }
- for(i=0;ilock >= ctx->currentPos ) continue;
- if( p->holds ) {
- RLOCK(p);
- p->holds->current = NULL;
- p->holds = NULL;
- return p;
- }
- }
- }
- break;
- default:
- ASSERT(k);
- }
- ASSERT(0); // out of registers !
- return NULL;
-}
-
-static preg *fetch( vreg *r ) {
- if( r->current )
- return r->current;
- return &r->stack;
+void int_alloc_store( int_alloc *a, int v ) {
+ *int_alloc_get(a,1) = v;
}
-static void scratch( preg *r ) {
- if( r && r->holds ) {
- r->holds->current = NULL;
- r->holds = NULL;
- r->lock = 0;
- }
-}
-
-static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size );
-
-static void load( jit_ctx *ctx, preg *r, vreg *v ) {
- preg *from = fetch(v);
- if( from == r || v->size == 0 ) return;
- if( r->holds ) r->holds->current = NULL;
- if( v->current ) {
- v->current->holds = NULL;
- from = r;
- }
- r->holds = v;
- v->current = r;
- copy(ctx,r,from,v->size);
-}
-
-static preg *alloc_fpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
- preg *p = fetch(r);
- if( p->kind != RFPU ) {
- if( !IS_FLOAT(r) && (IS_64 || r->t->kind != HI64) ) ASSERT(r->t->kind);
- p = alloc_reg(ctx, RFPU);
- if( andLoad )
- load(ctx,p,r);
- else {
- if( r->current )
- r->current->holds = NULL;
- r->current = p;
- p->holds = r;
- }
- } else
- RLOCK(p);
- return p;
-}
-
-static void reg_bind( vreg *r, preg *p ) {
- if( r->current )
- r->current->holds = NULL;
- r->current = p;
- p->holds = r;
-}
-
-static preg *alloc_cpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
- preg *p = fetch(r);
- if( p->kind != RCPU ) {
-# ifndef HL_64
- if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,andLoad);
- if( r->size > 4 ) ASSERT(r->size);
-# endif
- p = alloc_reg(ctx, RCPU);
- if( andLoad )
- load(ctx,p,r);
- else
- reg_bind(r,p);
- } else
- RLOCK(p);
- return p;
-}
-
-// allocate a register that is not a call parameter
-static preg *alloc_cpu_call( jit_ctx *ctx, vreg *r ) {
- preg *p = fetch(r);
- if( p->kind != RCPU ) {
-# ifndef HL_64
- if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,true);
- if( r->size > 4 ) ASSERT(r->size);
-# endif
- p = alloc_reg(ctx, RCPU_CALL);
- load(ctx,p,r);
- } else if( is_call_reg(p) ) {
- preg *p2 = alloc_reg(ctx, RCPU_CALL);
- op64(ctx,MOV,p2,p);
- scratch(p);
- reg_bind(r,p2);
- return p2;
- } else
- RLOCK(p);
- return p;
-}
-
-static preg *fetch32( jit_ctx *ctx, vreg *r ) {
- if( r->current )
- return r->current;
- // make sure that the register is correctly erased
- if( r->size < 4 ) {
- preg *p = alloc_cpu(ctx, r, true);
- RUNLOCK(p);
- return p;
- }
- return fetch(r);
-}
-
-// make sure higher bits are zeroes
-static preg *alloc_cpu64( jit_ctx *ctx, vreg *r, bool andLoad ) {
-# ifndef HL_64
- return alloc_cpu(ctx,r,andLoad);
-# else
- preg *p = fetch(r);
- if( !andLoad ) ASSERT(0);
- if( p->kind != RCPU ) {
- p = alloc_reg(ctx, RCPU);
- op64(ctx,XOR,p,p);
- load(ctx,p,r);
- } else {
- // remove higher bits
- preg tmp;
- op64(ctx,SHL,p,pconst(&tmp,32));
- op64(ctx,SHR,p,pconst(&tmp,32));
- RLOCK(p);
- }
- return p;
-# endif
-}
-
-// make sure the register can be used with 8 bits access
-static preg *alloc_cpu8( jit_ctx *ctx, vreg *r, bool andLoad ) {
- preg *p = fetch(r);
- if( p->kind != RCPU ) {
- p = alloc_reg(ctx, RCPU_8BITS);
- load(ctx,p,r);
- } else if( !is_reg8(p) ) {
- preg *p2 = alloc_reg(ctx, RCPU_8BITS);
- op64(ctx,MOV,p2,p);
- scratch(p);
- reg_bind(r,p2);
- return p2;
- } else
- RLOCK(p);
- return p;
-}
-
-static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size ) {
- if( size == 0 || to == from ) return to;
- switch( ID2(to->kind,from->kind) ) {
- case ID2(RMEM,RCPU):
- case ID2(RSTACK,RCPU):
- case ID2(RCPU,RSTACK):
- case ID2(RCPU,RMEM):
- case ID2(RCPU,RCPU):
-# ifndef HL_64
- case ID2(RCPU,RADDR):
- case ID2(RADDR,RCPU):
-# endif
- switch( size ) {
- case 1:
- if( to->kind == RCPU ) {
- op64(ctx,XOR,to,to);
- if( !is_reg8(to) ) {
- preg p;
- op32(ctx,MOV16,to,from);
- op32(ctx,SHL,to,pconst(&p,24));
- op32(ctx,SHR,to,pconst(&p,24));
- break;
- }
- }
- if( !is_reg8(from) ) {
- preg *r = alloc_reg(ctx, RCPU_CALL);
- op32(ctx, MOV, r, from);
- RUNLOCK(r);
- op32(ctx,MOV8,to,r);
- return from;
- }
- op32(ctx,MOV8,to,from);
- break;
- case 2:
- if( to->kind == RCPU )
- op64(ctx,XOR,to,to);
- op32(ctx,MOV16,to,from);
- break;
- case 4:
- op32(ctx,MOV,to,from);
- break;
- case 8:
- if( IS_64 ) {
- op64(ctx,MOV,to,from);
- break;
- }
- default:
- ASSERT(size);
- }
- return to->kind == RCPU ? to : from;
- case ID2(RFPU,RFPU):
- case ID2(RMEM,RFPU):
- case ID2(RSTACK,RFPU):
- case ID2(RFPU,RMEM):
- case ID2(RFPU,RSTACK):
- switch( size ) {
- case 8:
- op64(ctx,MOVSD,to,from);
- break;
- case 4:
- op32(ctx,MOVSS,to,from);
- break;
- default:
- ASSERT(size);
- }
- return to->kind == RFPU ? to : from;
- case ID2(RMEM,RSTACK):
- {
- vreg *rfrom = R(from->id);
- if( IS_FLOAT(rfrom) )
- return copy(ctx,to,alloc_fpu(ctx,rfrom,true),size);
- return copy(ctx,to,alloc_cpu(ctx,rfrom,true),size);
- }
- case ID2(RMEM,RMEM):
- case ID2(RSTACK,RMEM):
- case ID2(RSTACK,RSTACK):
-# ifndef HL_64
- case ID2(RMEM,RADDR):
- case ID2(RSTACK,RADDR):
- case ID2(RADDR,RSTACK):
-# endif
- {
- preg *tmp;
- if( (!IS_64 && size == 8) || (to->kind == RSTACK && IS_FLOAT(R(to->id))) || (from->kind == RSTACK && IS_FLOAT(R(from->id))) ) {
- tmp = alloc_reg(ctx, RFPU);
- op64(ctx,size == 8 ? MOVSD : MOVSS,tmp,from);
- } else {
- tmp = alloc_reg(ctx, RCPU);
- copy(ctx,tmp,from,size);
- }
- return copy(ctx,to,tmp,size);
- }
-# ifdef HL_64
- case ID2(RCPU,RADDR):
- case ID2(RMEM,RADDR):
- case ID2(RSTACK,RADDR):
- {
- preg p;
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx,MOV,tmp,pconst64(&p,(int_val)from->holds));
- return copy(ctx,to,pmem(&p,tmp->id,0),size);
- }
- case ID2(RADDR,RCPU):
- case ID2(RADDR,RMEM):
- case ID2(RADDR,RSTACK):
- {
- preg p;
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx,MOV,tmp,pconst64(&p,(int_val)to->holds));
- return copy(ctx,pmem(&p,tmp->id,0),from,size);
- }
-# endif
- default:
- break;
- }
- printf("copy(%s,%s)\n",KNAMES[to->kind], KNAMES[from->kind]);
- ASSERT(0);
- return NULL;
-}
-
-static void store( jit_ctx *ctx, vreg *r, preg *v, bool bind ) {
- if( r->current && r->current != v ) {
- r->current->holds = NULL;
- r->current = NULL;
- }
- v = copy(ctx,&r->stack,v,r->size);
- if( IS_FLOAT(r) != (v->kind == RFPU) )
- ASSERT(0);
- if( bind && r->current != v && (v->kind == RCPU || v->kind == RFPU) ) {
- scratch(v);
- r->current = v;
- v->holds = r;
- }
-}
-
-static void store_result( jit_ctx *ctx, vreg *r ) {
-# ifndef HL_64
- switch( r->t->kind ) {
- case HF64:
- scratch(r->current);
- op64(ctx,FSTP,&r->stack,UNUSED);
- break;
- case HF32:
- scratch(r->current);
- op64(ctx,FSTP32,&r->stack,UNUSED);
- break;
- case HI64:
- scratch(r->current);
- error_i64();
- break;
- default:
-# endif
- store(ctx,r,IS_FLOAT(r) ? REG_AT(XMM(0)) : PEAX,true);
-# ifndef HL_64
- break;
- }
-# endif
-}
-
-static void op_mov( jit_ctx *ctx, vreg *to, vreg *from ) {
- preg *r = fetch(from);
-# ifndef HL_64
- if( to->t->kind == HI64 ) {
- error_i64();
- return;
- }
-# endif
- if( from->t->kind == HF32 && r->kind != RFPU )
- r = alloc_fpu(ctx,from,true);
- store(ctx, to, r, true);
-}
-
-static void copy_to( jit_ctx *ctx, vreg *to, preg *from ) {
- store(ctx,to,from,true);
-}
-
-static void copy_from( jit_ctx *ctx, preg *to, vreg *from ) {
- copy(ctx,to,fetch(from),from->size);
-}
-
-static void store_const( jit_ctx *ctx, vreg *r, int c ) {
- preg p;
- if( c == 0 )
- op(ctx,XOR,alloc_cpu(ctx,r,false),alloc_cpu(ctx,r,false),r->size == 8);
- else if( r->size == 8 )
- op64(ctx,MOV,alloc_cpu(ctx,r,false),pconst64(&p,c));
- else
- op32(ctx,MOV,alloc_cpu(ctx,r,false),pconst(&p,c));
- store(ctx,r,r->current,false);
-}
-
-static void discard_regs( jit_ctx *ctx, bool native_call ) {
- int i;
- for(i=0;ipregs + RCPU_SCRATCH_REGS[i];
- if( r->holds ) {
- r->holds->current = NULL;
- r->holds = NULL;
- }
- }
- for(i=0;ipregs + XMM(i);
- if( r->holds ) {
- r->holds->current = NULL;
- r->holds = NULL;
- }
- }
-}
-
-static int pad_before_call( jit_ctx *ctx, int size ) {
- int total = size + ctx->totalRegsSize + HL_WSIZE * 2; // EIP+EBP
- if( total & 15 ) {
- int pad = 16 - (total & 15);
- preg p;
- if( pad ) op64(ctx,SUB,PESP,pconst(&p,pad));
- size += pad;
- }
- return size;
-}
-
-static void push_reg( jit_ctx *ctx, vreg *r ) {
- preg p;
- switch( stack_size(r->t) ) {
- case 1:
- op64(ctx,SUB,PESP,pconst(&p,1));
- op32(ctx,MOV8,pmem(&p,Esp,0),alloc_cpu8(ctx,r,true));
- break;
- case 2:
- op64(ctx,SUB,PESP,pconst(&p,2));
- op32(ctx,MOV16,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
- break;
- case 4:
- if( r->size < 4 )
- alloc_cpu(ctx,r,true); // force fetch (higher bits set to 0)
- if( !IS_64 ) {
- if( r->current != NULL && r->current->kind == RFPU ) scratch(r->current);
- op32(ctx,PUSH,fetch(r),UNUSED);
- } else {
- // pseudo push32 (not available)
- op64(ctx,SUB,PESP,pconst(&p,4));
- op32(ctx,MOV,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
- }
- break;
- case 8:
- if( fetch(r)->kind == RFPU ) {
- op64(ctx,SUB,PESP,pconst(&p,8));
- op64(ctx,MOVSD,pmem(&p,Esp,0),fetch(r));
- } else if( IS_64 )
- op64(ctx,PUSH,fetch(r),UNUSED);
- else if( r->stack.kind == RSTACK ) {
- scratch(r->current);
- r->stackPos += 4;
- op32(ctx,PUSH,&r->stack,UNUSED);
- r->stackPos -= 4;
- op32(ctx,PUSH,&r->stack,UNUSED);
- } else
- ASSERT(0);
- break;
- default:
- ASSERT(r->size);
- }
-}
-
-static int begin_native_call( jit_ctx *ctx, int nargs ) {
- ctx->nativeArgsCount = nargs;
- return pad_before_call(ctx, nargs > CALL_NREGS ? (nargs - CALL_NREGS) * HL_WSIZE : 0);
-}
-
-static preg *alloc_native_arg( jit_ctx *ctx ) {
-# ifdef HL_64
- int rid = ctx->nativeArgsCount - 1;
- preg *r = rid < CALL_NREGS ? REG_AT(CALL_REGS[rid]) : alloc_reg(ctx,RCPU_CALL);
- scratch(r);
- return r;
-# else
- return alloc_reg(ctx, RCPU);
-# endif
-}
-
-static void set_native_arg( jit_ctx *ctx, preg *r ) {
- if( r->kind == RSTACK ) {
- vreg *v = ctx->vregs + r->id;
- if( v->size < 4 )
- r = fetch32(ctx, v);
- }
-# ifdef HL_64
- if( r->kind == RFPU ) ASSERT(0);
- int rid = --ctx->nativeArgsCount;
- preg *target;
- if( rid >= CALL_NREGS ) {
- op64(ctx,PUSH,r,UNUSED);
- return;
- }
- target = REG_AT(CALL_REGS[rid]);
- if( target != r ) {
- op64(ctx, MOV, target, r);
- scratch(target);
- }
-# else
- op32(ctx,PUSH,r,UNUSED);
-# endif
-}
-
-static void set_native_arg_fpu( jit_ctx *ctx, preg *r, bool isf32 ) {
-# ifdef HL_64
- if( r->kind == RCPU ) ASSERT(0);
- // can only be used if last argument !!
- ctx->nativeArgsCount--;
- preg *target = REG_AT(XMM(IS_WINCALL64 ? ctx->nativeArgsCount : 0));
- if( target != r ) {
- op64(ctx, isf32 ? MOVSS : MOVSD, target, r);
- scratch(target);
- }
-# else
- op32(ctx,PUSH,r,UNUSED);
-# endif
-}
-
-typedef struct {
- int nextCpu;
- int nextFpu;
- int mapped[REG_COUNT];
-} call_regs;
-
-static int select_call_reg( call_regs *regs, hl_type *t, int id ) {
-# ifndef HL_64
- return -1;
-#else
- bool isFloat = t->kind == HF32 || t->kind == HF64;
-# ifdef HL_WIN_CALL
- int index = regs->nextCpu++;
-# else
- int index = isFloat ? regs->nextFpu++ : regs->nextCpu++;
-# endif
- if( index >= CALL_NREGS )
- return -1;
- int reg = isFloat ? XMM(index) : CALL_REGS[index];
- regs->mapped[reg] = id + 1;
- return reg;
-#endif
-}
-
-static int mapped_reg( call_regs *regs, int id ) {
-# ifndef HL_64
- return -1;
-#else
- int i;
- for(i=0;imapped[r] == id + 1 ) return r;
- r = XMM(i);
- if( regs->mapped[r] == id + 1 ) return r;
- }
- return -1;
-#endif
-}
-
-static int prepare_call_args( jit_ctx *ctx, int count, int *args, vreg *vregs, int extraSize ) {
- int i;
- int size = extraSize, paddedSize;
- call_regs ctmp = {0};
- for(i=0;it, i);
- if( cr >= 0 ) {
- preg *c = REG_AT(cr);
- preg *cur = fetch(r);
- if( cur != c ) {
- copy(ctx,c,cur,r->size);
- scratch(c);
- }
- RLOCK(c);
- continue;
- }
- size += stack_size(r->t);
- }
- paddedSize = pad_before_call(ctx,size);
- for(i=0;i= 0 ) continue;
- push_reg(ctx,r);
- if( r->current ) RUNLOCK(r->current);
- }
- return paddedSize;
-}
-
-static void op_call( jit_ctx *ctx, preg *r, int size ) {
- preg p;
-# ifdef JIT_DEBUG
- if( IS_64 && size >= 0 ) {
- int jchk;
- op32(ctx,TEST,PESP,pconst(&p,15));
- XJump(JZero,jchk);
- BREAK(); // unaligned ESP
- patch_jump(ctx, jchk);
- }
-# endif
- if( IS_WINCALL64 ) {
- // MSVC requires 32bytes of free space here
- op64(ctx,SUB,PESP,pconst(&p,32));
- if( size >= 0 ) size += 32;
- }
- op32(ctx, CALL, r, UNUSED);
- if( size > 0 ) op64(ctx,ADD,PESP,pconst(&p,size));
-}
-
-static void call_native( jit_ctx *ctx, void *nativeFun, int size ) {
- bool isExc = nativeFun == hl_assert || nativeFun == hl_throw || nativeFun == on_jit_error;
- preg p;
- // native function, already resolved
- op64(ctx,MOV,PEAX,pconst64(&p,(int_val)nativeFun));
- op_call(ctx,PEAX, isExc ? -1 : size);
- if( isExc )
- return;
- discard_regs(ctx, true);
-}
-
-static void op_call_fun( jit_ctx *ctx, vreg *dst, int findex, int count, int *args ) {
- int fid = findex < 0 ? -1 : ctx->m->functions_indexes[findex];
- bool isNative = fid >= ctx->m->code->nfunctions;
- int size = prepare_call_args(ctx,count,args,ctx->vregs,0);
- preg p;
- if( fid < 0 ) {
- ASSERT(fid);
- } else if( isNative ) {
- call_native(ctx,ctx->m->functions_ptrs[findex],size);
- } else {
- int cpos = BUF_POS() + (IS_WINCALL64 ? 4 : 0);
-# ifdef JIT_DEBUG
- if( IS_64 ) cpos += 13; // ESP CHECK
-# endif
- if( ctx->m->functions_ptrs[findex] ) {
- // already compiled
- op_call(ctx,pconst(&p,(int)(int_val)ctx->m->functions_ptrs[findex] - (cpos + 5)), size);
- } else if( ctx->m->code->functions + fid == ctx->f ) {
- // our current function
- op_call(ctx,pconst(&p, ctx->functionPos - (cpos + 5)), size);
- } else {
- // stage for later
- jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
- j->pos = cpos;
- j->target = findex;
- j->next = ctx->calls;
- ctx->calls = j;
- op_call(ctx,pconst(&p,0), size);
- }
- discard_regs(ctx, false);
- }
- if( dst )
- store_result(ctx,dst);
-}
-
-static void op_enter( jit_ctx *ctx ) {
- preg p;
- op64(ctx, PUSH, PEBP, UNUSED);
- op64(ctx, MOV, PEBP, PESP);
- if( ctx->totalRegsSize ) op64(ctx, SUB, PESP, pconst(&p,ctx->totalRegsSize));
-}
-
-static void op_ret( jit_ctx *ctx, vreg *r ) {
- preg p;
- switch( r->t->kind ) {
- case HF32:
-# ifdef HL_64
- op64(ctx, MOVSS, PXMM(0), fetch(r));
-# else
- op64(ctx,FLD32,&r->stack,UNUSED);
-# endif
- break;
- case HF64:
-# ifdef HL_64
- op64(ctx, MOVSD, PXMM(0), fetch(r));
-# else
- op64(ctx,FLD,&r->stack,UNUSED);
-# endif
- break;
- default:
- if( r->size < 4 && !r->current )
- fetch32(ctx, r);
- if( r->current != PEAX )
- op64(ctx,MOV,PEAX,fetch(r));
- break;
- }
- if( ctx->totalRegsSize ) op64(ctx, ADD, PESP, pconst(&p, ctx->totalRegsSize));
-# ifdef JIT_DEBUG
- {
- int jeq;
- op64(ctx, CMP, PESP, PEBP);
- XJump_small(JEq,jeq);
- jit_error("invalid ESP");
- patch_jump(ctx,jeq);
- }
-# endif
- op64(ctx, POP, PEBP, UNUSED);
- op64(ctx, RET, UNUSED, UNUSED);
-}
-
-static void call_native_consts( jit_ctx *ctx, void *nativeFun, int_val *args, int nargs ) {
- int size = pad_before_call(ctx, IS_64 ? 0 : HL_WSIZE*nargs);
- preg p;
- int i;
-# ifdef HL_64
- for(i=0;i=0;i--)
- op32(ctx, PUSH, pconst64(&p, args[i]), UNUSED);
-# endif
- call_native(ctx, nativeFun, size);
-}
-
-static void on_jit_error( const char *msg, int_val line ) {
- char buf[256];
- int iline = (int)line;
- sprintf(buf,"%s (line %d)",msg,iline);
-#ifdef HL_WIN_DESKTOP
- MessageBoxA(NULL,buf,"JIT ERROR",MB_OK);
-#else
- printf("JIT ERROR : %s\n",buf);
-#endif
- hl_debug_break();
- hl_throw(NULL);
-}
-
-static void _jit_error( jit_ctx *ctx, const char *msg, int line ) {
- int_val args[2] = { (int_val)msg, (int_val)line };
- call_native_consts(ctx,on_jit_error,args,2);
-}
-
-
-static preg *op_binop( jit_ctx *ctx, vreg *dst, vreg *a, vreg *b, hl_op bop ) {
- preg *pa = fetch(a), *pb = fetch(b), *out = NULL;
- CpuOp o;
- if( IS_FLOAT(a) ) {
- bool isf32 = a->t->kind == HF32;
- switch( bop ) {
- case OAdd: o = isf32 ? ADDSS : ADDSD; break;
- case OSub: o = isf32 ? SUBSS : SUBSD; break;
- case OMul: o = isf32 ? MULSS : MULSD; break;
- case OSDiv: o = isf32 ? DIVSS : DIVSD; break;
- case OJSLt:
- case OJSGte:
- case OJSLte:
- case OJSGt:
- case OJEq:
- case OJNotEq:
- case OJNotLt:
- case OJNotGte:
- o = isf32 ? COMISS : COMISD;
- break;
- case OSMod:
- {
- int args[] = { a->stack.id, b->stack.id };
- int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
- void *mod_fun;
- if( isf32 ) mod_fun = fmodf; else mod_fun = fmod;
- call_native(ctx,mod_fun,size);
- store_result(ctx,dst);
- return fetch(dst);
- }
- default:
- printf("%s\n", hl_op_name(bop));
- ASSERT(bop);
- }
- } else {
- bool is64 = a->t->kind == HI64;
-# ifndef HL_64
- if( is64 ) {
- error_i64();
- return fetch(a);
- }
-# endif
- switch( bop ) {
- case OAdd: o = ADD; break;
- case OSub: o = SUB; break;
- case OMul: o = IMUL; break;
- case OAnd: o = AND; break;
- case OOr: o = OR; break;
- case OXor: o = XOR; break;
- case OShl:
- case OUShr:
- case OSShr:
- if( !b->current || b->current->kind != RCPU || b->current->id != Ecx ) {
- scratch(REG_AT(Ecx));
- op(ctx,MOV,REG_AT(Ecx),pb,is64);
- RLOCK(REG_AT(Ecx));
- pa = fetch(a);
- } else
- RLOCK(b->current);
- if( pa->kind != RCPU ) {
- pa = alloc_reg(ctx, RCPU);
- op(ctx,MOV,pa,fetch(a), is64);
- }
- op(ctx,bop == OShl ? SHL : (bop == OUShr ? SHR : SAR), pa, UNUSED,is64);
- if( dst ) store(ctx, dst, pa, true);
- return pa;
- case OSDiv:
- case OUDiv:
- case OSMod:
- case OUMod:
- {
- preg *out = bop == OSMod || bop == OUMod ? REG_AT(Edx) : PEAX;
- preg *r = pb;
- preg p;
- int jz, jz1 = 0, jend;
- if( pa->kind == RCPU && pa->id == Eax ) RLOCK(pa);
- // ensure b in CPU reg and not in Eax/Edx (for UI8/UI16)
- if( pb->kind != RCPU || (pb->id == Eax || pb->id == Edx) ) {
- scratch(REG_AT(Ecx));
- scratch(pb);
- load(ctx,REG_AT(Ecx),b);
- r = REG_AT(Ecx);
- }
- // integer div 0 => 0
- op(ctx,TEST,r,r,is64);
- XJump_small(JZero, jz);
- // Prevent MIN/-1 overflow exception
- // OSMod: r = (b == 0 || b == -1) ? 0 : a % b
- // OSDiv: r = (b == 0 || b == -1) ? a * b : a / b
- if( bop == OSMod || bop == OSDiv ) {
- op(ctx, CMP, r, pconst(&p,-1), is64);
- XJump_small(JEq, jz1);
- }
- pa = fetch(a);
- if( pa->kind != RCPU || pa->id != Eax ) {
- scratch(PEAX);
- scratch(pa);
- load(ctx,PEAX,a);
- }
- scratch(REG_AT(Edx));
- scratch(REG_AT(Eax));
- if( bop == OUDiv || bop == OUMod )
- op(ctx, XOR, REG_AT(Edx), REG_AT(Edx), is64);
- else
- op(ctx, CDQ, UNUSED, UNUSED, is64); // sign-extend Eax into Eax:Edx
- op(ctx, bop == OUDiv || bop == OUMod ? DIV : IDIV, r, UNUSED, is64);
- XJump_small(JAlways, jend);
- patch_jump(ctx, jz);
- patch_jump(ctx, jz1);
- if( bop != OSDiv ) {
- op(ctx, XOR, out, out, is64);
- } else {
- load(ctx, out, a);
- op(ctx, IMUL, out, r, is64);
- }
- patch_jump(ctx, jend);
- if( dst ) store(ctx, dst, out, true);
- return out;
- }
- case OJSLt:
- case OJSGte:
- case OJSLte:
- case OJSGt:
- case OJULt:
- case OJUGte:
- case OJEq:
- case OJNotEq:
- switch( a->t->kind ) {
- case HUI8:
- case HBOOL:
- o = CMP8;
- break;
- case HUI16:
- o = CMP16;
- break;
- default:
- o = CMP;
- break;
- }
- break;
- default:
- printf("%s\n", hl_op_name(bop));
- ASSERT(bop);
- }
- }
- switch( RTYPE(a) ) {
- case HI32:
- case HUI8:
- case HUI16:
- case HBOOL:
-# ifndef HL_64
- case HDYNOBJ:
- case HVIRTUAL:
- case HOBJ:
- case HSTRUCT:
- case HFUN:
- case HMETHOD:
- case HBYTES:
- case HNULL:
- case HENUM:
- case HDYN:
- case HTYPE:
- case HABSTRACT:
- case HARRAY:
-# endif
- switch( ID2(pa->kind, pb->kind) ) {
- case ID2(RCPU,RCPU):
- case ID2(RCPU,RSTACK):
- op32(ctx, o, pa, pb);
- scratch(pa);
- out = pa;
- break;
- case ID2(RSTACK,RCPU):
- if( dst == a && o != IMUL ) {
- op32(ctx, o, pa, pb);
- dst = NULL;
- out = pa;
- } else {
- alloc_cpu(ctx,a, true);
- return op_binop(ctx,dst,a,b,bop);
- }
- break;
- case ID2(RSTACK,RSTACK):
- alloc_cpu(ctx, a, true);
- return op_binop(ctx, dst, a, b, bop);
- default:
- printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
- ASSERT(ID2(pa->kind, pb->kind));
- }
- if( dst ) store(ctx, dst, out, true);
- return out;
-# ifdef HL_64
- case HOBJ:
- case HSTRUCT:
- case HDYNOBJ:
- case HVIRTUAL:
- case HFUN:
- case HMETHOD:
- case HBYTES:
- case HNULL:
- case HENUM:
- case HDYN:
- case HTYPE:
- case HABSTRACT:
- case HARRAY:
- case HI64:
- case HGUID:
- switch( ID2(pa->kind, pb->kind) ) {
- case ID2(RCPU,RCPU):
- case ID2(RCPU,RSTACK):
- op64(ctx, o, pa, pb);
- scratch(pa);
- out = pa;
- break;
- case ID2(RSTACK,RCPU):
- if( dst == a && OP_FORMS[o].mem_r ) {
- op64(ctx, o, pa, pb);
- dst = NULL;
- out = pa;
- } else {
- alloc_cpu(ctx,a, true);
- return op_binop(ctx,dst,a,b,bop);
- }
- break;
- case ID2(RSTACK,RSTACK):
- alloc_cpu(ctx, a, true);
- return op_binop(ctx, dst, a, b, bop);
- default:
- printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
- ASSERT(ID2(pa->kind, pb->kind));
- }
- if( dst ) store(ctx, dst, out, true);
- return out;
-# endif
- case HF64:
- case HF32:
- pa = alloc_fpu(ctx, a, true);
- pb = alloc_fpu(ctx, b, true);
- switch( ID2(pa->kind, pb->kind) ) {
- case ID2(RFPU,RFPU):
- op64(ctx,o,pa,pb);
- if( (o == COMISD || o == COMISS) && bop != OJSGt ) {
- int jnotnan;
- XJump_small(JNParity,jnotnan);
- switch( bop ) {
- case OJSLt:
- case OJNotLt:
- {
- preg *r = alloc_reg(ctx,RCPU);
- // set CF=0, ZF=1
- op64(ctx,XOR,r,r);
- RUNLOCK(r);
- break;
- }
- case OJSGte:
- case OJNotGte:
- {
- preg *r = alloc_reg(ctx,RCPU);
- // set ZF=0, CF=1
- op64(ctx,XOR,r,r);
- op64(ctx,CMP,r,PESP);
- RUNLOCK(r);
- break;
- }
- break;
- case OJNotEq:
- case OJEq:
- // set ZF=0, CF=?
- case OJSLte:
- // set ZF=0, CF=0
- op64(ctx,TEST,PESP,PESP);
- break;
- default:
- ASSERT(bop);
- }
- patch_jump(ctx,jnotnan);
- }
- scratch(pa);
- out = pa;
- break;
- default:
- printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
- ASSERT(ID2(pa->kind, pb->kind));
- }
- if( dst ) store(ctx, dst, out, true);
- return out;
- default:
- ASSERT(RTYPE(a));
- }
- return NULL;
-}
-
-static int do_jump( jit_ctx *ctx, hl_op op, bool isFloat ) {
- int j;
- switch( op ) {
- case OJAlways:
- XJump(JAlways,j);
- break;
- case OJSGte:
- XJump(isFloat ? JUGte : JSGte,j);
- break;
- case OJSGt:
- XJump(isFloat ? JUGt : JSGt,j);
- break;
- case OJUGte:
- XJump(JUGte,j);
- break;
- case OJSLt:
- XJump(isFloat ? JULt : JSLt,j);
- break;
- case OJSLte:
- XJump(isFloat ? JULte : JSLte,j);
- break;
- case OJULt:
- XJump(JULt,j);
- break;
- case OJEq:
- XJump(JEq,j);
- break;
- case OJNotEq:
- XJump(JNeq,j);
- break;
- case OJNotLt:
- XJump(JUGte,j);
- break;
- case OJNotGte:
- XJump(JULt,j);
- break;
- default:
- j = 0;
- printf("Unknown JUMP %d\n",op);
- break;
- }
- return j;
-}
-
-static void register_jump( jit_ctx *ctx, int pos, int target ) {
- jlist *j = (jlist*)hl_malloc(&ctx->falloc, sizeof(jlist));
- j->pos = pos;
- j->target = target;
- j->next = ctx->jumps;
- ctx->jumps = j;
- if( target != 0 && ctx->opsPos[target] == 0 )
- ctx->opsPos[target] = -1;
-}
-
-#define HDYN_VALUE 8
-
-static void dyn_value_compare( jit_ctx *ctx, preg *a, preg *b, hl_type *t ) {
- preg p;
- switch( t->kind ) {
- case HUI8:
- case HBOOL:
- op32(ctx,MOV8,a,pmem(&p,a->id,HDYN_VALUE));
- op32(ctx,MOV8,b,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,CMP8,a,b);
- break;
- case HUI16:
- op32(ctx,MOV16,a,pmem(&p,a->id,HDYN_VALUE));
- op32(ctx,MOV16,b,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,CMP16,a,b);
- break;
- case HI32:
- op32(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
- op32(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,CMP,a,b);
- break;
- case HF32:
- {
- preg *fa = alloc_reg(ctx, RFPU);
- preg *fb = alloc_reg(ctx, RFPU);
- op64(ctx,MOVSS,fa,pmem(&p,a->id,HDYN_VALUE));
- op64(ctx,MOVSS,fb,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,COMISD,fa,fb);
- }
- break;
- case HF64:
- {
- preg *fa = alloc_reg(ctx, RFPU);
- preg *fb = alloc_reg(ctx, RFPU);
- op64(ctx,MOVSD,fa,pmem(&p,a->id,HDYN_VALUE));
- op64(ctx,MOVSD,fb,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,COMISD,fa,fb);
- }
- break;
- case HI64:
- default:
- // ptr comparison
- op64(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
- op64(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
- op64(ctx,CMP,a,b);
- break;
- }
-}
-
-static void op_jump( jit_ctx *ctx, vreg *a, vreg *b, hl_opcode *op, int targetPos ) {
- if( a->t->kind == HDYN || b->t->kind == HDYN || a->t->kind == HFUN || b->t->kind == HFUN ) {
- int args[] = { a->stack.id, b->stack.id };
- int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
- call_native(ctx,hl_dyn_compare,size);
- if( op->op == OJSGt || op->op == OJSGte ) {
- preg p;
- int jinvalid;
- op32(ctx,CMP,PEAX,pconst(&p,hl_invalid_comparison));
- XJump_small(JEq,jinvalid);
- op32(ctx,TEST,PEAX,PEAX);
- register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
- patch_jump(ctx,jinvalid);
- return;
- }
- op32(ctx,TEST,PEAX,PEAX);
- } else switch( a->t->kind ) {
- case HTYPE:
- {
- int args[] = { a->stack.id, b->stack.id };
- int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
- preg p;
- call_native(ctx,hl_same_type,size);
- op64(ctx,CMP8,PEAX,pconst(&p,1));
- }
- break;
- case HNULL:
- {
- preg *pa = hl_type_size(a->t->tparam) == 1 ? alloc_cpu8(ctx,a,true) : alloc_cpu(ctx,a,true);
- preg *pb = hl_type_size(b->t->tparam) == 1 ? alloc_cpu8(ctx,b,true) : alloc_cpu(ctx,b,true);
- if( op->op == OJEq ) {
- // if( a == b || (a && b && a->v == b->v) ) goto
- int ja, jb;
- // if( a != b && (!a || !b || a->v != b->v) ) goto
- op64(ctx,CMP,pa,pb);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- dyn_value_compare(ctx,pa,pb,a->t->tparam);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- scratch(pa);
- scratch(pb);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- } else if( op->op == OJNotEq ) {
- int jeq, jcmp;
- // if( a != b && (!a || !b || a->v != b->v) ) goto
- op64(ctx,CMP,pa,pb);
- XJump_small(JEq,jeq);
- op64(ctx,TEST,pa,pa);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- op64(ctx,TEST,pb,pb);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- dyn_value_compare(ctx,pa,pb,a->t->tparam);
- XJump_small(JZero,jcmp);
- scratch(pa);
- scratch(pb);
- register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
- patch_jump(ctx,jcmp);
- patch_jump(ctx,jeq);
- } else
- ASSERT(op->op);
- return;
- }
- case HVIRTUAL:
- {
- preg p;
- preg *pa = alloc_cpu(ctx,a,true);
- preg *pb = alloc_cpu(ctx,b,true);
- int ja,jb,jav,jbv,jvalue;
- if( b->t->kind == HOBJ ) {
- if( op->op == OJEq ) {
- // if( a ? (b && a->value == b) : (b == NULL) ) goto
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
- op64(ctx,CMP,pa,pb);
- XJump_small(JAlways,jvalue);
- patch_jump(ctx,ja);
- op64(ctx,TEST,pb,pb);
- patch_jump(ctx,jvalue);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- patch_jump(ctx,jb);
- } else if( op->op == OJNotEq ) {
- // if( a ? (b == NULL || a->value != b) : (b != NULL) ) goto
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
- op64(ctx,CMP,pa,pb);
- XJump_small(JAlways,jvalue);
- patch_jump(ctx,ja);
- op64(ctx,TEST,pb,pb);
- patch_jump(ctx,jvalue);
- register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
- } else
- ASSERT(op->op);
- scratch(pa);
- return;
- }
- op64(ctx,CMP,pa,pb);
- if( op->op == OJEq ) {
- // if( a == b || (a && b && a->value && b->value && a->value == b->value) ) goto
- register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,jav);
- op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jbv);
- op64(ctx,CMP,pa,pb);
- XJump_small(JNeq,jvalue);
- register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- patch_jump(ctx,jav);
- patch_jump(ctx,jbv);
- patch_jump(ctx,jvalue);
- } else if( op->op == OJNotEq ) {
- int jnext;
- // if( a != b && (!a || !b || !a->value || !b->value || a->value != b->value) ) goto
- XJump_small(JEq,jnext);
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,jav);
- op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jbv);
- op64(ctx,CMP,pa,pb);
- XJump_small(JEq,jvalue);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- patch_jump(ctx,jav);
- patch_jump(ctx,jbv);
- register_jump(ctx,do_jump(ctx,OJAlways, false),targetPos);
- patch_jump(ctx,jnext);
- patch_jump(ctx,jvalue);
- } else
- ASSERT(op->op);
- scratch(pa);
- scratch(pb);
- return;
- }
- break;
- case HOBJ:
- case HSTRUCT:
- if( b->t->kind == HVIRTUAL ) {
- op_jump(ctx,b,a,op,targetPos); // inverse
- return;
- }
- if( hl_get_obj_rt(a->t)->compareFun ) {
- preg *pa = alloc_cpu(ctx,a,true);
- preg *pb = alloc_cpu(ctx,b,true);
- preg p;
- int jeq, ja, jb, jcmp;
- int args[] = { a->stack.id, b->stack.id };
- switch( op->op ) {
- case OJEq:
- // if( a == b || (a && b && cmp(a,b) == 0) ) goto
- op64(ctx,CMP,pa,pb);
- XJump_small(JEq,jeq);
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
- op32(ctx,TEST,PEAX,PEAX);
- XJump_small(JNotZero,jcmp);
- patch_jump(ctx,jeq);
- register_jump(ctx,do_jump(ctx,OJAlways,false),targetPos);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- patch_jump(ctx,jcmp);
- break;
- case OJNotEq:
- // if( a != b && (!a || !b || cmp(a,b) != 0) ) goto
- op64(ctx,CMP,pa,pb);
- XJump_small(JEq,jeq);
- op64(ctx,TEST,pa,pa);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
- op64(ctx,TEST,pb,pb);
- register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
-
- op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
- op32(ctx,TEST,PEAX,PEAX);
- XJump_small(JZero,jcmp);
-
- register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
- patch_jump(ctx,jcmp);
- patch_jump(ctx,jeq);
- break;
- default:
- // if( a && b && cmp(a,b) ?? 0 ) goto
- op64(ctx,TEST,pa,pa);
- XJump_small(JZero,ja);
- op64(ctx,TEST,pb,pb);
- XJump_small(JZero,jb);
- op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
- op32(ctx,CMP,PEAX,pconst(&p,0));
- register_jump(ctx,do_jump(ctx,op->op,false),targetPos);
- patch_jump(ctx,ja);
- patch_jump(ctx,jb);
- break;
- }
- return;
- }
- // fallthrough
- default:
- // make sure we have valid 8 bits registers
- if( a->size == 1 ) alloc_cpu8(ctx,a,true);
- if( b->size == 1 ) alloc_cpu8(ctx,b,true);
- op_binop(ctx,NULL,a,b,op->op);
- break;
- }
- register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
-}
+void hl_emit_alloc( jit_ctx *jit );
+void hl_emit_free( jit_ctx *jit );
+void hl_emit_function( jit_ctx *jit );
jit_ctx *hl_jit_alloc() {
- int i;
jit_ctx *ctx = (jit_ctx*)malloc(sizeof(jit_ctx));
- if( ctx == NULL ) return NULL;
memset(ctx,0,sizeof(jit_ctx));
hl_alloc_init(&ctx->falloc);
- hl_alloc_init(&ctx->galloc);
- for(i=0;iid = i;
- r->kind = RCPU;
- }
- for(i=0;iid = i;
- r->kind = RFPU;
- }
+ hl_emit_alloc(ctx);
return ctx;
}
-void hl_jit_free( jit_ctx *ctx, h_bool can_reset ) {
- free(ctx->vregs);
- free(ctx->opsPos);
- free(ctx->startBuf);
- ctx->maxRegs = 0;
- ctx->vregs = NULL;
- ctx->maxOps = 0;
- ctx->opsPos = NULL;
- ctx->startBuf = NULL;
- ctx->bufSize = 0;
- ctx->buf.b = NULL;
- ctx->calls = NULL;
- ctx->switchs = NULL;
- ctx->closure_list = NULL;
- hl_free(&ctx->falloc);
- hl_free(&ctx->galloc);
- if( !can_reset ) free(ctx);
-}
-
-static void jit_nops( jit_ctx *ctx ) {
- while( BUF_POS() & 15 )
- op32(ctx, NOP, UNUSED, UNUSED);
-}
-
-#define MAX_ARGS 16
-
-static void *call_jit_c2hl = NULL;
-static void *call_jit_hl2c = NULL;
-
-static void *callback_c2hl( void *_f, hl_type *t, void **args, vdynamic *ret ) {
- /*
- prepare stack and regs according to prepare_call_args, but by reading runtime type information
- from the function type. The stack and regs will be setup by the trampoline function.
- */
- void **f = (void**)_f;
- unsigned char stack[MAX_ARGS * 8];
- call_regs cregs = {0};
- if( t->fun->nargs > MAX_ARGS )
- hl_error("Too many arguments for dynamic call");
- int i, size = 0, pad = 0, pos = 0;
- for(i=0;ifun->nargs;i++) {
- hl_type *at = t->fun->args[i];
- int creg = select_call_reg(&cregs,at,i);
- if( creg >= 0 )
- continue;
- size += stack_size(at);
- }
- pad = (-size) & 15;
- size += pad;
- pos = 0;
- for(i=0;ifun->nargs;i++) {
- // RTL
- hl_type *at = t->fun->args[i];
- void *v = args[i];
- int creg = mapped_reg(&cregs,i);
- void *store;
- if( creg >= 0 ) {
- if( REG_IS_FPU(creg) ) {
- store = stack + size + CALL_NREGS * HL_WSIZE + (creg - XMM(0)) * sizeof(double);
- } else {
- store = stack + size + call_reg_index(creg) * HL_WSIZE;
- }
- switch( at->kind ) {
- case HBOOL:
- case HUI8:
- *(int_val*)store = *(unsigned char*)v;
- break;
- case HUI16:
- *(int_val*)store = *(unsigned short*)v;
- break;
- case HI32:
- *(int_val*)store = *(int*)v;
- break;
- case HF32:
- *(void**)store = 0;
- *(float*)store = *(float*)v;
- break;
- case HF64:
- *(double*)store = *(double*)v;
- break;
- case HI64:
- case HGUID:
- *(int64*)store = *(int64*)v;
- break;
- default:
- *(void**)store = v;
- break;
- }
- } else {
- int tsize = stack_size(at);
- store = stack + pos;
- pos += tsize;
- switch( at->kind ) {
- case HBOOL:
- case HUI8:
- *(int*)store = *(unsigned char*)v;
- break;
- case HUI16:
- *(int*)store = *(unsigned short*)v;
- break;
- case HI32:
- case HF32:
- *(int*)store = *(int*)v;
- break;
- case HF64:
- *(double*)store = *(double*)v;
- break;
- case HI64:
- case HGUID:
- *(int64*)store = *(int64*)v;
- break;
- default:
- *(void**)store = v;
- break;
- }
- }
- }
- pos += pad;
- pos >>= IS_64 ? 3 : 2;
- switch( t->fun->ret->kind ) {
- case HUI8:
- case HUI16:
- case HI32:
- case HBOOL:
- ret->v.i = ((int (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- return &ret->v.i;
- case HI64:
- case HGUID:
- ret->v.i64 = ((int64 (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- return &ret->v.i64;
- case HF32:
- ret->v.f = ((float (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- return &ret->v.f;
- case HF64:
- ret->v.d = ((double (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- return &ret->v.d;
- default:
- return ((void *(*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
- }
-}
-
-static void jit_c2hl( jit_ctx *ctx ) {
- // create the function that will be called by callback_c2hl
- // it will make sure to prepare the stack/regs according to native calling conventions
- int jeq, jloop, jstart;
- preg *fptr, *stack, *stend;
- preg p;
-
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
-
-# ifdef HL_64
-
- fptr = REG_AT(R10);
- stack = PEAX;
- stend = REG_AT(R11);
- op64(ctx, MOV, fptr, REG_AT(CALL_REGS[0]));
- op64(ctx, MOV, stack, REG_AT(CALL_REGS[1]));
- op64(ctx, MOV, stend, REG_AT(CALL_REGS[2]));
-
- // set native call regs
- int i;
- for(i=0;iid,i*HL_WSIZE));
- for(i=0;iid,(i+CALL_NREGS)*HL_WSIZE));
-
-# else
-
- // make sure the stack is aligned on 16 bytes
- // the amount of push we will do afterwards is guaranteed to be a multiple of 16bytes by hl_callback
-# ifdef HL_VCC
- // VCC does not guarantee us an aligned stack...
- op64(ctx,MOV,PEAX,PESP);
- op64(ctx,AND,PEAX,pconst(&p,15));
- op64(ctx,SUB,PESP,PEAX);
-# else
- op64(ctx,SUB,PESP,pconst(&p,8));
-# endif
-
- // mov arguments to regs
- fptr = REG_AT(Eax);
- stack = REG_AT(Edx);
- stend = REG_AT(Ecx);
- op64(ctx,MOV,fptr,pmem(&p,Ebp,HL_WSIZE*2));
- op64(ctx,MOV,stack,pmem(&p,Ebp,HL_WSIZE*3));
- op64(ctx,MOV,stend,pmem(&p,Ebp,HL_WSIZE*4));
-
-# endif
-
- // push stack args
- jstart = BUF_POS();
- op64(ctx,CMP,stack,stend);
- XJump(JEq,jeq);
- op64(ctx,SUB,stack,pconst(&p,HL_WSIZE));
- op64(ctx,PUSH,pmem(&p,stack->id,0),UNUSED);
- XJump(JAlways,jloop);
- patch_jump(ctx,jeq);
- patch_jump_to(ctx, jloop, jstart);
-
- op_call(ctx,fptr,0);
-
- // cleanup and ret
- op64(ctx,MOV,PESP,PEBP);
- op64(ctx,POP,PEBP, UNUSED);
- op64(ctx,RET,UNUSED,UNUSED);
-}
-
-static vdynamic *jit_wrapper_call( vclosure_wrapper *c, char *stack_args, void **regs ) {
- vdynamic *args[MAX_ARGS];
- int i;
- int nargs = c->cl.t->fun->nargs;
- call_regs cregs = {0};
- if( nargs > MAX_ARGS )
- hl_error("Too many arguments for wrapped call");
- cregs.nextCpu++; // skip fptr in HL64 - was passed as arg0
- for(i=0;icl.t->fun->args[i];
- int creg = select_call_reg(&cregs,t,i);
- if( creg < 0 ) {
- args[i] = hl_is_dynamic(t) ? *(vdynamic**)stack_args : hl_make_dyn(stack_args,t);
- stack_args += stack_size(t);
- } else if( hl_is_dynamic(t) ) {
- args[i] = *(vdynamic**)(regs + call_reg_index(creg));
- } else if( t->kind == HF32 || t->kind == HF64 ) {
- args[i] = hl_make_dyn(regs + CALL_NREGS + creg - XMM(0),&hlt_f64);
- } else {
- args[i] = hl_make_dyn(regs + call_reg_index(creg),t);
- }
- }
- return hl_dyn_call(c->wrappedFun,args,nargs);
-}
-
-static void *jit_wrapper_ptr( vclosure_wrapper *c, char *stack_args, void **regs ) {
- vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
- hl_type *tret = c->cl.t->fun->ret;
- switch( tret->kind ) {
- case HVOID:
- return NULL;
- case HUI8:
- case HUI16:
- case HI32:
- case HBOOL:
- return (void*)(int_val)hl_dyn_casti(&ret,&hlt_dyn,tret);
- case HI64:
- case HGUID:
- return (void*)(int_val)hl_dyn_casti64(&ret,&hlt_dyn);
- default:
- return hl_dyn_castp(&ret,&hlt_dyn,tret);
- }
-}
-
-static double jit_wrapper_d( vclosure_wrapper *c, char *stack_args, void **regs ) {
- vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
- return hl_dyn_castd(&ret,&hlt_dyn);
-}
-
-static void jit_hl2c( jit_ctx *ctx ) {
- // create a function that is called with a vclosure_wrapper* and native args
- // and pack and pass the args to callback_hl2c
- preg p;
- int jfloat1, jfloat2, jexit;
- hl_type_fun *ft = NULL;
- int size;
-# ifdef HL_64
- preg *cl = REG_AT(CALL_REGS[0]);
- preg *tmp = REG_AT(CALL_REGS[1]);
-# else
- preg *cl = REG_AT(Ecx);
- preg *tmp = REG_AT(Edx);
-# endif
-
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
-
-# ifdef HL_64
- // push registers
- int i;
- op64(ctx,SUB,PESP,pconst(&p,CALL_NREGS*8));
- for(i=0;it->fun->ret->kind ) {
- // case HF32: case HF64: return jit_wrapper_d(arg0,&args);
- // default: return jit_wrapper_ptr(arg0,&args);
- // }
- if( !IS_64 )
- op64(ctx,MOV,cl,pmem(&p,Ebp,HL_WSIZE*2)); // load arg0
- op64(ctx,MOV,tmp,pmem(&p,cl->id,0)); // ->t
- op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE)); // ->fun
- op64(ctx,MOV,tmp,pmem(&p,tmp->id,(int)(int_val)&ft->ret)); // ->ret
- op32(ctx,MOV,tmp,pmem(&p,tmp->id,0)); // -> kind
-
- op32(ctx,CMP,tmp,pconst(&p,HF64));
- XJump_small(JEq,jfloat1);
- op32(ctx,CMP,tmp,pconst(&p,HF32));
- XJump_small(JEq,jfloat2);
-
- // 64 bits : ESP + EIP (+WIN64PAD)
- // 32 bits : ESP + EIP + PARAM0
- int args_pos = IS_64 ? ((IS_WINCALL64 ? 32 : 0) + HL_WSIZE * 2) : (HL_WSIZE*3);
-
- size = begin_native_call(ctx,3);
- op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
- set_native_arg(ctx, tmp);
- op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
- set_native_arg(ctx, tmp);
- set_native_arg(ctx, cl);
- call_native(ctx, jit_wrapper_ptr, size);
- XJump_small(JAlways, jexit);
-
- patch_jump(ctx,jfloat1);
- patch_jump(ctx,jfloat2);
- size = begin_native_call(ctx,3);
- op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
- set_native_arg(ctx, tmp);
- op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
- set_native_arg(ctx, tmp);
- set_native_arg(ctx, cl);
- call_native(ctx, jit_wrapper_d, size);
-
- patch_jump(ctx,jexit);
- op64(ctx,MOV,PESP,PEBP);
- op64(ctx,POP,PEBP, UNUSED);
- op64(ctx,RET,UNUSED,UNUSED);
-}
-
-static void jit_fail( uchar *msg ) {
- if( msg == NULL ) {
- hl_debug_break();
- msg = USTR("assert");
- }
- vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
- d->v.ptr = msg;
- hl_throw(d);
-}
-
-static void jit_null_access( jit_ctx *ctx ) {
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
- int_val arg = (int_val)USTR("Null access");
- call_native_consts(ctx, jit_fail, &arg, 1);
-}
-
-static void jit_null_fail( int fhash ) {
- vbyte *field = hl_field_name(fhash);
- hl_buffer *b = hl_alloc_buffer();
- hl_buffer_str(b, USTR("Null access ."));
- hl_buffer_str(b, (uchar*)field);
- vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
- d->v.ptr = hl_buffer_content(b,NULL);
- hl_throw(d);
-}
-
-static void jit_null_field_access( jit_ctx *ctx ) {
- preg p;
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
- int size = begin_native_call(ctx, 1);
- int args_pos = (IS_WINCALL64 ? 32 : 0) + HL_WSIZE*2;
- set_native_arg(ctx, pmem(&p,Ebp,args_pos));
- call_native(ctx,jit_null_fail,size);
-}
-
-static void jit_assert( jit_ctx *ctx ) {
- op64(ctx,PUSH,PEBP,UNUSED);
- op64(ctx,MOV,PEBP,PESP);
- int_val arg = 0;
- call_native_consts(ctx, jit_fail, &arg, 1);
-}
-
-static int jit_build( jit_ctx *ctx, void (*fbuild)( jit_ctx *) ) {
- int pos;
- jit_buf(ctx);
- jit_nops(ctx);
- pos = BUF_POS();
- fbuild(ctx);
- int endPos = BUF_POS();
- jit_nops(ctx);
-#ifdef WIN64_UNWIND_TABLES
- int fid = ctx->nunwind++;
- ctx->unwind_table[fid].BeginAddress = pos;
- ctx->unwind_table[fid].EndAddress = endPos;
- ctx->unwind_table[fid].UnwindData = ctx->unwind_offset;
-#endif
- return pos;
-}
-
-static void hl_jit_init_module( jit_ctx *ctx, hl_module *m ) {
- int i;
- ctx->m = m;
- if( m->code->hasdebug ) {
- ctx->debug = (hl_debug_infos*)malloc(sizeof(hl_debug_infos) * m->code->nfunctions);
- memset(ctx->debug, -1, sizeof(hl_debug_infos) * m->code->nfunctions);
- }
- for(i=0;icode->nfloats;i++) {
- jit_buf(ctx);
- *ctx->buf.d++ = m->code->floats[i];
- }
-#ifdef WIN64_UNWIND_TABLES
- jit_buf(ctx);
- ctx->unwind_offset = BUF_POS();
- write_unwind_data(ctx);
-
- ctx->unwind_table = malloc(sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10));
- memset(ctx->unwind_table, 0, sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10));
-#endif
-}
-
void hl_jit_init( jit_ctx *ctx, hl_module *m ) {
- hl_jit_init_module(ctx,m);
- ctx->c2hl = jit_build(ctx, jit_c2hl);
- ctx->hl2c = jit_build(ctx, jit_hl2c);
- ctx->static_functions[0] = (void*)(int_val)jit_build(ctx,jit_null_access);
- ctx->static_functions[1] = (void*)(int_val)jit_build(ctx,jit_assert);
- ctx->static_functions[2] = (void*)(int_val)jit_build(ctx,jit_null_field_access);
-}
-
-void hl_jit_reset( jit_ctx *ctx, hl_module *m ) {
- ctx->debug = NULL;
- hl_jit_init_module(ctx,m);
-}
-
-static void *get_dyncast( hl_type *t ) {
- switch( t->kind ) {
- case HF32:
- return hl_dyn_castf;
- case HF64:
- return hl_dyn_castd;
- case HI64:
- case HGUID:
- return hl_dyn_casti64;
- case HI32:
- case HUI16:
- case HUI8:
- case HBOOL:
- return hl_dyn_casti;
- default:
- return hl_dyn_castp;
- }
-}
-
-static void *get_dynset( hl_type *t ) {
- switch( t->kind ) {
- case HF32:
- return hl_dyn_setf;
- case HF64:
- return hl_dyn_setd;
- case HI64:
- case HGUID:
- return hl_dyn_seti64;
- case HI32:
- case HUI16:
- case HUI8:
- case HBOOL:
- return hl_dyn_seti;
- default:
- return hl_dyn_setp;
- }
-}
-
-static void *get_dynget( hl_type *t ) {
- switch( t->kind ) {
- case HF32:
- return hl_dyn_getf;
- case HF64:
- return hl_dyn_getd;
- case HI64:
- case HGUID:
- return hl_dyn_geti64;
- case HI32:
- case HUI16:
- case HUI8:
- case HBOOL:
- return hl_dyn_geti;
- default:
- return hl_dyn_getp;
- }
-}
-
-static double uint_to_double( unsigned int v ) {
- return v;
}
-static vclosure *alloc_static_closure( jit_ctx *ctx, int fid ) {
- hl_module *m = ctx->m;
- vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure));
- int fidx = m->functions_indexes[fid];
- c->hasValue = 0;
- if( fidx >= m->code->nfunctions ) {
- // native
- c->t = m->code->natives[fidx - m->code->nfunctions].t;
- c->fun = m->functions_ptrs[fid];
- c->value = NULL;
- } else {
- c->t = m->code->functions[fidx].type;
- c->fun = (void*)(int_val)fid;
- c->value = ctx->closure_list;
- ctx->closure_list = c;
- }
- return c;
+void hl_jit_free( jit_ctx *ctx, h_bool can_reset ) {
+ hl_emit_free(ctx);
+ hl_free(&ctx->falloc);
+ free(ctx);
}
-static void make_dyn_cast( jit_ctx *ctx, vreg *dst, vreg *v ) {
- int size;
- preg p;
- preg *tmp;
- if( v->t->kind == HNULL && v->t->tparam->kind == dst->t->kind ) {
- int jnull, jend;
- preg *out;
- switch( dst->t->kind ) {
- case HUI8:
- case HUI16:
- case HI32:
- case HBOOL:
- case HI64:
- case HGUID:
- tmp = alloc_cpu(ctx, v, true);
- op64(ctx, TEST, tmp, tmp);
- XJump_small(JZero, jnull);
- op64(ctx, MOV, tmp, pmem(&p,tmp->id,8));
- XJump_small(JAlways, jend);
- patch_jump(ctx, jnull);
- op64(ctx, XOR, tmp, tmp);
- patch_jump(ctx, jend);
- store(ctx, dst, tmp, true);
- return;
- case HF32:
- case HF64:
- tmp = alloc_cpu(ctx, v, true);
- out = alloc_fpu(ctx, dst, false);
- op64(ctx, TEST, tmp, tmp);
- XJump_small(JZero, jnull);
- op64(ctx, dst->t->kind == HF32 ? MOVSS : MOVSD, out, pmem(&p,tmp->id,8));
- XJump_small(JAlways, jend);
- patch_jump(ctx, jnull);
- op64(ctx, XORPD, out, out);
- patch_jump(ctx, jend);
- store(ctx, dst, out, true);
- return;
- default:
- break;
- }
- }
- switch( dst->t->kind ) {
- case HF32:
- case HF64:
- case HI64:
- case HGUID:
- size = begin_native_call(ctx, 2);
- set_native_arg(ctx, pconst64(&p,(int_val)v->t));
- break;
- default:
- size = begin_native_call(ctx, 3);
- set_native_arg(ctx, pconst64(&p,(int_val)dst->t));
- set_native_arg(ctx, pconst64(&p,(int_val)v->t));
- break;
- }
- tmp = alloc_native_arg(ctx);
- op64(ctx,MOV,tmp,REG_AT(Ebp));
- if( v->stackPos >= 0 )
- op64(ctx,ADD,tmp,pconst(&p,v->stackPos));
- else
- op64(ctx,SUB,tmp,pconst(&p,-v->stackPos));
- set_native_arg(ctx,tmp);
- call_native(ctx,get_dyncast(dst->t),size);
- store_result(ctx, dst);
+void hl_jit_reset( jit_ctx *ctx, hl_module *m ) {
}
int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) {
- int i, size = 0, opCount;
- int codePos = BUF_POS();
- int nargs = f->type->fun->nargs;
- unsigned short *debug16 = NULL;
- int *debug32 = NULL;
- call_regs cregs = {0};
- hl_thread_info *tinf = NULL;
- preg p;
- ctx->f = f;
- ctx->allocOffset = 0;
- if( f->nregs > ctx->maxRegs ) {
- free(ctx->vregs);
- ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1));
- if( ctx->vregs == NULL ) {
- ctx->maxRegs = 0;
- return -1;
- }
- ctx->maxRegs = f->nregs;
- }
- if( f->nops > ctx->maxOps ) {
- free(ctx->opsPos);
- ctx->opsPos = (int*)malloc(sizeof(int) * (f->nops + 1));
- if( ctx->opsPos == NULL ) {
- ctx->maxOps = 0;
- return -1;
- }
- ctx->maxOps = f->nops;
- }
- memset(ctx->opsPos,0,(f->nops+1)*sizeof(int));
- for(i=0;inregs;i++) {
- vreg *r = R(i);
- r->t = f->regs[i];
- r->size = hl_type_size(r->t);
- r->current = NULL;
- r->stack.holds = NULL;
- r->stack.id = i;
- r->stack.kind = RSTACK;
- }
- size = 0;
- int argsSize = 0;
- for(i=0;it,i);
- if( creg < 0 || IS_WINCALL64 ) {
- // use existing stack storage
- r->stackPos = argsSize + HL_WSIZE * 2;
- argsSize += stack_size(r->t);
- } else {
- // make room in local vars
- size += r->size;
- size += hl_pad_size(size,r->t);
- r->stackPos = -size;
- }
- }
- for(i=nargs;inregs;i++) {
- vreg *r = R(i);
- size += r->size;
- size += hl_pad_size(size,r->t); // align local vars
- r->stackPos = -size;
- }
-# ifdef HL_64
- size += (-size) & 15; // align on 16 bytes
-# else
- size += hl_pad_size(size,&hlt_dyn); // align on word size
-# endif
- ctx->totalRegsSize = size;
- jit_buf(ctx);
- ctx->functionPos = BUF_POS();
- // make sure currentPos is > 0 before any reg allocations happen
- // otherwise `alloc_reg` thinks that all registers are locked
- ctx->currentPos = 1;
- op_enter(ctx);
-# ifdef HL_64
- {
- // store in local var
- for(i=0;isize);
- p->holds = r;
- r->current = p;
- }
- }
-# endif
- if( ctx->m->code->hasdebug ) {
- debug16 = (unsigned short*)malloc(sizeof(unsigned short) * (f->nops + 1));
- debug16[0] = (unsigned short)(BUF_POS() - codePos);
- }
- ctx->opsPos[0] = BUF_POS();
-
- for(opCount=0;opCountnops;opCount++) {
- int jump;
- hl_opcode *o = f->ops + opCount;
- vreg *dst = R(o->p1);
- vreg *ra = R(o->p2);
- vreg *rb = R(o->p3);
- ctx->currentPos = opCount + 1;
- jit_buf(ctx);
-# ifdef JIT_DEBUG
- if( opCount == 0 || f->ops[opCount-1].op != OAsm ) {
- int uid = opCount + (f->findex<<16);
- op32(ctx, PUSH, pconst(&p,uid), UNUSED);
- op64(ctx, ADD, PESP, pconst(&p,HL_WSIZE));
- }
-# endif
- // emit code
- switch( o->op ) {
- case OMov:
- case OUnsafeCast:
- op_mov(ctx, dst, ra);
- break;
- case OInt:
- store_const(ctx, dst, m->code->ints[o->p2]);
- break;
- case OBool:
- store_const(ctx, dst, o->p2);
- break;
- case OGetGlobal:
- {
- void *addr = m->globals_data + m->globals_indexes[o->p2];
-# ifdef HL_64
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
- copy_to(ctx, dst, pmem(&p,tmp->id,0));
-# else
- copy_to(ctx, dst, paddr(&p,addr));
-# endif
- }
- break;
- case OSetGlobal:
- {
- void *addr = m->globals_data + m->globals_indexes[o->p1];
-# ifdef HL_64
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
- copy_from(ctx, pmem(&p,tmp->id,0), ra);
-# else
- copy_from(ctx, paddr(&p,addr), ra);
-# endif
- }
- break;
- case OCall3:
- {
- int args[3] = { o->p3, o->extra[0], o->extra[1] };
- op_call_fun(ctx, dst, o->p2, 3, args);
- }
- break;
- case OCall4:
- {
- int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] };
- op_call_fun(ctx, dst, o->p2, 4, args);
- }
- break;
- case OCallN:
- op_call_fun(ctx, dst, o->p2, o->p3, o->extra);
- break;
- case OCall0:
- op_call_fun(ctx, dst, o->p2, 0, NULL);
- break;
- case OCall1:
- op_call_fun(ctx, dst, o->p2, 1, &o->p3);
- break;
- case OCall2:
- {
- int args[2] = { o->p3, (int)(int_val)o->extra };
- op_call_fun(ctx, dst, o->p2, 2, args);
- }
- break;
- case OSub:
- case OAdd:
- case OMul:
- case OSDiv:
- case OUDiv:
- case OShl:
- case OSShr:
- case OUShr:
- case OAnd:
- case OOr:
- case OXor:
- case OSMod:
- case OUMod:
- op_binop(ctx, dst, ra, rb, o->op);
- break;
- case ONeg:
- {
- if( IS_FLOAT(ra) ) {
- preg *pa = alloc_reg(ctx,RFPU);
- preg *pb = alloc_fpu(ctx,ra,true);
- op64(ctx,XORPD,pa,pa);
- op64(ctx,ra->t->kind == HF32 ? SUBSS : SUBSD,pa,pb);
- store(ctx,dst,pa,true);
- } else if( ra->t->kind == HI64 ) {
-# ifdef HL_64
- preg *pa = alloc_reg(ctx,RCPU);
- preg *pb = alloc_cpu(ctx,ra,true);
- op64(ctx,XOR,pa,pa);
- op64(ctx,SUB,pa,pb);
- store(ctx,dst,pa,true);
-# else
- error_i64();
-# endif
- } else {
- preg *pa = alloc_reg(ctx,RCPU);
- preg *pb = alloc_cpu(ctx,ra,true);
- op32(ctx,XOR,pa,pa);
- op32(ctx,SUB,pa,pb);
- store(ctx,dst,pa,true);
- }
- }
- break;
- case ONot:
- {
- preg *v = alloc_cpu(ctx,ra,true);
- op32(ctx,XOR,v,pconst(&p,1));
- store(ctx,dst,v,true);
- }
- break;
- case OJFalse:
- case OJTrue:
- case OJNotNull:
- case OJNull:
- {
- preg *r = dst->t->kind == HBOOL ? alloc_cpu8(ctx, dst, true) : alloc_cpu(ctx, dst, true);
- op64(ctx, dst->t->kind == HBOOL ? TEST8 : TEST, r, r);
- XJump( o->op == OJFalse || o->op == OJNull ? JZero : JNotZero,jump);
- register_jump(ctx,jump,(opCount + 1) + o->p2);
- }
- break;
- case OJEq:
- case OJNotEq:
- case OJSLt:
- case OJSGte:
- case OJSLte:
- case OJSGt:
- case OJULt:
- case OJUGte:
- case OJNotLt:
- case OJNotGte:
- op_jump(ctx,dst,ra,o,(opCount + 1) + o->p3);
- break;
- case OJAlways:
- jump = do_jump(ctx,o->op,false);
- register_jump(ctx,jump,(opCount + 1) + o->p1);
- break;
- case OToDyn:
- if( ra->t->kind == HBOOL ) {
- int size = begin_native_call(ctx, 1);
- set_native_arg(ctx, fetch(ra));
- call_native(ctx, hl_alloc_dynbool, size);
- store(ctx, dst, PEAX, true);
- } else {
- int_val rt = (int_val)ra->t;
- int jskip = 0;
- if( hl_is_ptr(ra->t) ) {
- int jnz;
- preg *a = alloc_cpu(ctx,ra,true);
- op64(ctx,TEST,a,a);
- XJump_small(JNotZero,jnz);
- op64(ctx,XOR,PEAX,PEAX); // will replace the result of alloc_dynamic at jump land
- XJump_small(JAlways,jskip);
- patch_jump(ctx,jnz);
- }
- call_native_consts(ctx, hl_alloc_dynamic, &rt, 1);
- // copy value to dynamic
- if( (IS_FLOAT(ra) || ra->size == 8) && !IS_64 ) {
- preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
- op64(ctx,MOV,tmp,&ra->stack);
- op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
- if( ra->t->kind == HF64 ) {
- ra->stackPos += 4;
- op64(ctx,MOV,tmp,&ra->stack);
- op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE+4),tmp);
- ra->stackPos -= 4;
- }
- } else {
- preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
- copy_from(ctx,tmp,ra);
- op64(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
- }
- if( hl_is_ptr(ra->t) ) patch_jump(ctx,jskip);
- store(ctx, dst, PEAX, true);
- }
- break;
- case OToSFloat:
- if( ra == dst ) break;
- if (ra->t->kind == HI32 || ra->t->kind == HUI16 || ra->t->kind == HUI8) {
- preg* r = alloc_cpu(ctx, ra, true);
- preg* w = alloc_fpu(ctx, dst, false);
- op32(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r);
- store(ctx, dst, w, true);
- } else if (ra->t->kind == HI64 ) {
- preg* r = alloc_cpu(ctx, ra, true);
- preg* w = alloc_fpu(ctx, dst, false);
- op64(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r);
- store(ctx, dst, w, true);
- } else if( ra->t->kind == HF64 && dst->t->kind == HF32 ) {
- preg *r = alloc_fpu(ctx,ra,true);
- preg *w = alloc_fpu(ctx,dst,false);
- op32(ctx,CVTSD2SS,w,r);
- store(ctx, dst, w, true);
- } else if( ra->t->kind == HF32 && dst->t->kind == HF64 ) {
- preg *r = alloc_fpu(ctx,ra,true);
- preg *w = alloc_fpu(ctx,dst,false);
- op32(ctx,CVTSS2SD,w,r);
- store(ctx, dst, w, true);
- } else
- ASSERT(0);
- break;
- case OToUFloat:
- {
- int size;
- size = prepare_call_args(ctx,1,&o->p2,ctx->vregs,0);
- call_native(ctx,uint_to_double,size);
- store_result(ctx,dst);
- }
- break;
- case OToInt:
- if( ra == dst ) break;
- if( ra->t->kind == HF64 ) {
- preg *r = alloc_fpu(ctx,ra,true);
- preg *w = alloc_cpu(ctx,dst,false);
- preg *tmp = alloc_reg(ctx,RCPU);
- op32(ctx,STMXCSR,pmem(&p,Esp,-4),UNUSED);
- op32(ctx,MOV,tmp,&p);
- op32(ctx,OR,tmp,pconst(&p,0x6000)); // set round towards 0
- op32(ctx,MOV,pmem(&p,Esp,-8),tmp);
- op32(ctx,LDMXCSR,&p,UNUSED);
- op32(ctx,CVTSD2SI,w,r);
- op32(ctx,LDMXCSR,pmem(&p,Esp,-4),UNUSED);
- store(ctx, dst, w, true);
- } else if (ra->t->kind == HF32) {
- preg *r = alloc_fpu(ctx, ra, true);
- preg *w = alloc_cpu(ctx, dst, false);
- preg *tmp = alloc_reg(ctx, RCPU);
- op32(ctx, STMXCSR, pmem(&p, Esp, -4), UNUSED);
- op32(ctx, MOV, tmp, &p);
- op32(ctx, OR, tmp, pconst(&p, 0x6000)); // set round towards 0
- op32(ctx, MOV, pmem(&p, Esp, -8), tmp);
- op32(ctx, LDMXCSR, &p, UNUSED);
- op32(ctx, CVTSS2SI, w, r);
- op32(ctx, LDMXCSR, pmem(&p, Esp, -4), UNUSED);
- store(ctx, dst, w, true);
- } else if( (dst->t->kind == HI64 || dst->t->kind == HGUID) && ra->t->kind == HI32 ) {
- if( ra->current != PEAX ) {
- op32(ctx, MOV, PEAX, fetch(ra));
- scratch(PEAX);
- }
-# ifdef HL_64
- op64(ctx, CDQE, UNUSED, UNUSED); // sign-extend Eax into Rax
- store(ctx, dst, PEAX, true);
-# else
- op32(ctx, CDQ, UNUSED, UNUSED); // sign-extend Eax into Eax:Edx
- scratch(REG_AT(Edx));
- op32(ctx, MOV, fetch(dst), PEAX);
- dst->stackPos += 4;
- op32(ctx, MOV, fetch(dst), REG_AT(Edx));
- dst->stackPos -= 4;
- } else if( dst->t->kind == HI32 && ra->t->kind == HI64 ) {
- error_i64();
-# endif
- } else {
- preg *r = alloc_cpu(ctx,dst,false);
- copy_from(ctx, r, ra);
- store(ctx, dst, r, true);
- }
- break;
- case ORet:
- op_ret(ctx, dst);
- break;
- case OIncr:
- {
- if( IS_FLOAT(dst) ) {
- ASSERT(0);
- } else {
- preg *v = fetch32(ctx,dst);
- op32(ctx,INC,v,UNUSED);
- if( v->kind != RSTACK ) store(ctx, dst, v, false);
- }
- }
- break;
- case ODecr:
- {
- if( IS_FLOAT(dst) ) {
- ASSERT(0);
- } else {
- preg *v = fetch32(ctx,dst);
- op32(ctx,DEC,v,UNUSED);
- if( v->kind != RSTACK ) store(ctx, dst, v, false);
- }
- }
- break;
- case OFloat:
- {
- if( m->code->floats[o->p2] == 0 ) {
- preg *f = alloc_fpu(ctx,dst,false);
- op64(ctx,XORPD,f,f);
- } else switch( dst->t->kind ) {
- case HF64:
- case HF32:
-# ifdef HL_64
- op64(ctx,dst->t->kind == HF32 ? CVTSD2SS : MOVSD,alloc_fpu(ctx,dst,false),pcodeaddr(&p,o->p2 * 8));
-# else
- op64(ctx,dst->t->kind == HF32 ? MOVSS : MOVSD,alloc_fpu(ctx,dst,false),paddr(&p,m->code->floats + o->p2));
-# endif
- break;
- default:
- ASSERT(dst->t->kind);
- }
- store(ctx,dst,dst->current,false);
- }
- break;
- case OString:
- op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)hl_get_ustring(m->code,o->p2)));
- store(ctx,dst,dst->current,false);
- break;
- case OBytes:
- {
- char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2];
- op64(ctx,MOV,alloc_cpu(ctx,dst,false),pconst64(&p,(int_val)b));
- store(ctx,dst,dst->current,false);
- }
- break;
- case ONull:
- {
- op64(ctx,XOR,alloc_cpu(ctx, dst, false),alloc_cpu(ctx, dst, false));
- store(ctx,dst,dst->current,false);
- }
- break;
- case ONew:
- {
- int_val args[] = { (int_val)dst->t };
- void *allocFun;
- int nargs = 1;
- switch( dst->t->kind ) {
- case HOBJ:
- case HSTRUCT:
- allocFun = hl_alloc_obj;
- break;
- case HDYNOBJ:
- allocFun = hl_alloc_dynobj;
- nargs = 0;
- break;
- case HVIRTUAL:
- allocFun = hl_alloc_virtual;
- break;
- default:
- ASSERT(dst->t->kind);
- }
- call_native_consts(ctx, allocFun, args, nargs);
- store(ctx, dst, PEAX, true);
- }
- break;
- case OInstanceClosure:
- {
- preg *r = alloc_cpu(ctx, rb, true);
- jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
- int size = begin_native_call(ctx,3);
- set_native_arg(ctx,r);
-
- j->pos = BUF_POS();
- j->target = o->p2;
- j->next = ctx->calls;
- ctx->calls = j;
-
- set_native_arg(ctx,pconst64(&p,RESERVE_ADDRESS));
- set_native_arg(ctx,pconst64(&p,(int_val)m->code->functions[m->functions_indexes[o->p2]].type));
- call_native(ctx,hl_alloc_closure_ptr,size);
- store(ctx,dst,PEAX,true);
- }
- break;
- case OVirtualClosure:
- {
- int size, i;
- preg *r = alloc_cpu_call(ctx, ra);
- hl_type *t = NULL;
- hl_type *ot = ra->t;
- while( t == NULL ) {
- for(i=0;iobj->nproto;i++) {
- hl_obj_proto *pp = ot->obj->proto + i;
- if( pp->pindex == o->p3 ) {
- t = m->code->functions[m->functions_indexes[pp->findex]].type;
- break;
- }
- }
- ot = ot->obj->super;
- }
- size = begin_native_call(ctx,3);
- set_native_arg(ctx,r);
- // read r->type->vobj_proto[i] for function address
- op64(ctx,MOV,r,pmem(&p,r->id,0));
- op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*2));
- op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*o->p3));
- set_native_arg(ctx,r);
- op64(ctx,MOV,r,pconst64(&p,(int_val)t));
- set_native_arg(ctx,r);
- call_native(ctx,hl_alloc_closure_ptr,size);
- store(ctx,dst,PEAX,true);
- }
- break;
- case OCallClosure:
- if( ra->t->kind == HDYN ) {
- // ASM for {
- // vdynamic *args[] = {args};
- // vdynamic *ret = hl_dyn_call(closure,args,nargs);
- // dst = hl_dyncast(ret,t_dynamic,t_dst);
- // }
- int offset = o->p3 * HL_WSIZE;
- preg *r = alloc_reg(ctx, RCPU_CALL);
- if( offset & 15 ) offset += 16 - (offset & 15);
- op64(ctx,SUB,PESP,pconst(&p,offset));
- op64(ctx,MOV,r,PESP);
- for(i=0;ip3;i++) {
- vreg *a = R(o->extra[i]);
- if( !hl_is_dynamic(a->t) ) ASSERT(0);
- preg *v = alloc_cpu(ctx,a,true);
- op64(ctx,MOV,pmem(&p,r->id,i * HL_WSIZE),v);
- RUNLOCK(v);
- }
-# ifdef HL_64
- int size = begin_native_call(ctx, 3) + offset;
- set_native_arg(ctx, pconst(&p,o->p3));
- set_native_arg(ctx, r);
- set_native_arg(ctx, fetch(ra));
-# else
- int size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(int) + offset);
- op64(ctx,PUSH,pconst(&p,o->p3),UNUSED);
- op64(ctx,PUSH,r,UNUSED);
- op64(ctx,PUSH,alloc_cpu(ctx,ra,true),UNUSED);
-# endif
- call_native(ctx,hl_dyn_call,size);
- if( dst->t->kind != HVOID ) {
- store(ctx,dst,PEAX,true);
- make_dyn_cast(ctx,dst,dst);
- }
- } else {
- int jhasvalue, jend, size;
- // ASM for if( c->hasValue ) c->fun(value,args) else c->fun(args)
- preg *r = alloc_cpu(ctx,ra,true);
- preg *tmp = alloc_reg(ctx, RCPU);
- op32(ctx,MOV,tmp,pmem(&p,r->id,HL_WSIZE*2));
- op32(ctx,TEST,tmp,tmp);
- scratch(tmp);
- XJump_small(JNotZero,jhasvalue);
- save_regs(ctx);
- size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
- preg *rr = r;
- if( rr->holds != ra ) rr = alloc_cpu(ctx, ra, true);
- op_call(ctx, pmem(&p,rr->id,HL_WSIZE), size);
- XJump_small(JAlways,jend);
- patch_jump(ctx,jhasvalue);
- restore_regs(ctx);
-# ifdef HL_64
- {
- int regids[64];
- preg *pc = REG_AT(CALL_REGS[0]);
- vreg *sc = R(f->nregs); // scratch register that we temporary rebind
- if( o->p3 >= 63 ) jit_error("assert");
- memcpy(regids + 1, o->extra, o->p3 * sizeof(int));
- regids[0] = f->nregs;
- sc->size = HL_WSIZE;
- sc->t = &hlt_dyn;
- op64(ctx, MOV, pc, pmem(&p,r->id,HL_WSIZE*3));
- scratch(pc);
- sc->current = pc;
- pc->holds = sc;
- size = prepare_call_args(ctx,o->p3 + 1,regids,ctx->vregs,0);
- if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
- }
-# else
- size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,HL_WSIZE);
- if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
- op64(ctx, PUSH,pmem(&p,r->id,HL_WSIZE*3),UNUSED); // push closure value
-# endif
- op_call(ctx, pmem(&p,r->id,HL_WSIZE), size);
- discard_regs(ctx,false);
- patch_jump(ctx,jend);
- store_result(ctx, dst);
- }
- break;
- case OStaticClosure:
- {
- vclosure *c = alloc_static_closure(ctx,o->p2);
- preg *r = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, r, pconst64(&p,(int_val)c));
- store(ctx,dst,r,true);
- }
- break;
- case OField:
- {
-# ifndef HL_64
- if( dst->t->kind == HI64 ) {
- error_i64();
- break;
- }
-# endif
- switch( ra->t->kind ) {
- case HOBJ:
- case HSTRUCT:
- {
- hl_runtime_obj *rt = hl_get_obj_rt(ra->t);
- preg *rr = alloc_cpu(ctx,ra, true);
- if( dst->t->kind == HSTRUCT ) {
- hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t;
- if( ft->kind == HPACKED ) {
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p3]));
- store(ctx,dst,r,true);
- break;
- }
- }
- copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p3]));
- }
- break;
- case HVIRTUAL:
- // ASM for --> if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt)
- {
- int jhasfield, jend, size;
- bool need_type = !(IS_FLOAT(dst) || dst->t->kind == HI64);
- preg *v = alloc_cpu_call(ctx,ra);
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p3));
- op64(ctx,TEST,r,r);
- XJump_small(JNotZero,jhasfield);
- size = begin_native_call(ctx, need_type ? 3 : 2);
- if( need_type ) set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
- set_native_arg(ctx,pconst64(&p,(int_val)ra->t->virt->fields[o->p3].hashed_name));
- set_native_arg(ctx,v);
- call_native(ctx,get_dynget(dst->t),size);
- store_result(ctx,dst);
- XJump_small(JAlways,jend);
- patch_jump(ctx,jhasfield);
- copy_to(ctx, dst, pmem(&p,(CpuReg)r->id,0));
- patch_jump(ctx,jend);
- scratch(dst->current);
- }
- break;
- default:
- ASSERT(ra->t->kind);
- break;
- }
- }
- break;
- case OSetField:
- {
- switch( dst->t->kind ) {
- case HOBJ:
- case HSTRUCT:
- {
- hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
- preg *rr = alloc_cpu(ctx, dst, true);
- if( rb->t->kind == HSTRUCT ) {
- hl_type *ft = hl_obj_field_fetch(dst->t,o->p2)->t;
- if( ft->kind == HPACKED ) {
- hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam);
- preg *prb = alloc_cpu(ctx, rb, true);
- preg *tmp = alloc_reg(ctx, RCPU_CALL);
- int offset = 0;
- while( offset < frt->size ) {
- int remain = frt->size - offset;
- int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
- copy(ctx, tmp, pmem(&p, (CpuReg)prb->id, offset), copy_size);
- copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]+offset), tmp, copy_size);
- offset += copy_size;
- }
- break;
- }
- }
- copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]), rb);
- }
- break;
- case HVIRTUAL:
- // ASM for --> if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v)
- {
- int jhasfield, jend;
- preg *obj = alloc_cpu_call(ctx,dst);
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,MOV,r,pmem(&p,obj->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
- op64(ctx,TEST,r,r);
- XJump_small(JNotZero,jhasfield);
-# ifdef HL_64
- switch( rb->t->kind ) {
- case HF64:
- case HF32:
- size = begin_native_call(ctx,3);
- set_native_arg_fpu(ctx, fetch(rb), rb->t->kind == HF32);
- break;
- case HI64:
- case HGUID:
- size = begin_native_call(ctx,3);
- set_native_arg(ctx, fetch(rb));
- break;
- default:
- size = begin_native_call(ctx, 4);
- set_native_arg(ctx, fetch(rb));
- set_native_arg(ctx, pconst64(&p,(int_val)rb->t));
- break;
- }
- set_native_arg(ctx,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
- set_native_arg(ctx,obj);
-# else
- switch( rb->t->kind ) {
- case HF64:
- case HI64:
- case HGUID:
- size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(double));
- push_reg(ctx,rb);
- break;
- case HF32:
- size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(float));
- push_reg(ctx,rb);
- break;
- default:
- size = pad_before_call(ctx,HL_WSIZE*4);
- op64(ctx,PUSH,fetch32(ctx,rb),UNUSED);
- op64(ctx,MOV,r,pconst64(&p,(int_val)rb->t));
- op64(ctx,PUSH,r,UNUSED);
- break;
- }
- op32(ctx,MOV,r,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
- op64(ctx,PUSH,r,UNUSED);
- op64(ctx,PUSH,obj,UNUSED);
-# endif
- call_native(ctx,get_dynset(rb->t),size);
- XJump_small(JAlways,jend);
- patch_jump(ctx,jhasfield);
- copy_from(ctx, pmem(&p,(CpuReg)r->id,0), rb);
- patch_jump(ctx,jend);
- scratch(rb->current);
- }
- break;
- default:
- ASSERT(dst->t->kind);
- break;
- }
- }
- break;
- case OGetThis:
- {
- vreg *r = R(0);
- hl_runtime_obj *rt = hl_get_obj_rt(r->t);
- preg *rr = alloc_cpu(ctx,r, true);
- if( dst->t->kind == HSTRUCT ) {
- hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t;
- if( ft->kind == HPACKED ) {
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p2]));
- store(ctx,dst,r,true);
- break;
- }
- }
- copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]));
- }
- break;
- case OSetThis:
- {
- vreg *r = R(0);
- hl_runtime_obj *rt = hl_get_obj_rt(r->t);
- preg *rr = alloc_cpu(ctx, r, true);
- if( ra->t->kind == HSTRUCT ) {
- hl_type *ft = hl_obj_field_fetch(r->t,o->p1)->t;
- if( ft->kind == HPACKED ) {
- hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam);
- preg *pra = alloc_cpu(ctx, ra, true);
- preg *tmp = alloc_reg(ctx, RCPU_CALL);
- int offset = 0;
- while( offset < frt->size ) {
- int remain = frt->size - offset;
- int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
- copy(ctx, tmp, pmem(&p, (CpuReg)pra->id, offset), copy_size);
- copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]+offset), tmp, copy_size);
- offset += copy_size;
- }
- break;
- }
- }
- copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]), ra);
- }
- break;
- case OCallThis:
- {
- int nargs = o->p3 + 1;
- int *args = (int*)hl_malloc(&ctx->falloc,sizeof(int) * nargs);
- int size;
- preg *r = alloc_cpu(ctx, R(0), true);
- preg *tmp;
- tmp = alloc_reg(ctx, RCPU_CALL);
- op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
- op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
- args[0] = 0;
- for(i=1;iextra[i-1];
- size = prepare_call_args(ctx,nargs,args,ctx->vregs,0);
- op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
- discard_regs(ctx, false);
- store_result(ctx, dst);
- }
- break;
- case OCallMethod:
- switch( R(o->extra[0])->t->kind ) {
- case HOBJ: {
- int size;
- preg *r = alloc_cpu(ctx, R(o->extra[0]), true);
- preg *tmp;
- tmp = alloc_reg(ctx, RCPU_CALL);
- op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
- op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
- size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
- op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
- discard_regs(ctx, false);
- store_result(ctx, dst);
- break;
- }
- case HVIRTUAL:
- // ASM for --> if( hl_vfields(o)[f] ) dst = *hl_vfields(o)[f](o->value,args...); else dst = hl_dyn_call_obj(o->value,field,args,&ret)
- {
- int size;
- int paramsSize;
- int jhasfield, jend;
- bool need_dyn;
- bool obj_in_args = false;
- vreg *obj = R(o->extra[0]);
- preg *v = alloc_cpu_call(ctx,obj);
- preg *r = alloc_reg(ctx,RCPU_CALL);
- op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
- op64(ctx,TEST,r,r);
- save_regs(ctx);
-
- if( o->p3 < 6 ) {
- XJump_small(JNotZero,jhasfield);
- } else {
- XJump(JNotZero,jhasfield);
- }
-
- need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID;
- paramsSize = (o->p3 - 1) * HL_WSIZE;
- if( need_dyn ) paramsSize += sizeof(vdynamic);
- if( paramsSize & 15 ) paramsSize += 16 - (paramsSize&15);
- op64(ctx,SUB,PESP,pconst(&p,paramsSize));
- op64(ctx,MOV,r,PESP);
-
- for(i=0;ip3-1;i++) {
- vreg *a = R(o->extra[i+1]);
- if( hl_is_ptr(a->t) ) {
- op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),alloc_cpu(ctx,a,true));
- if( a->current != v ) {
- RUNLOCK(a->current);
- } else
- obj_in_args = true;
- } else {
- preg *r2 = alloc_reg(ctx,RCPU);
- op64(ctx,LEA,r2,&a->stack);
- op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),r2);
- if( r2 != v ) RUNLOCK(r2);
- }
- }
-
- jit_buf(ctx);
-
- if( !need_dyn ) {
- size = begin_native_call(ctx, 5);
- set_native_arg(ctx, pconst(&p,0));
- } else {
- preg *rtmp = alloc_reg(ctx,RCPU);
- op64(ctx,LEA,rtmp,pmem(&p,Esp,paramsSize - sizeof(vdynamic)));
- size = begin_native_call(ctx, 5);
- set_native_arg(ctx,rtmp);
- if( !IS_64 ) RUNLOCK(rtmp);
- }
- set_native_arg(ctx,r);
- set_native_arg(ctx,pconst(&p,obj->t->virt->fields[o->p2].hashed_name)); // fid
- set_native_arg(ctx,pconst64(&p,(int_val)obj->t->virt->fields[o->p2].t)); // ftype
- set_native_arg(ctx,pmem(&p,v->id,HL_WSIZE)); // o->value
- call_native(ctx,hl_dyn_call_obj,size + paramsSize);
- if( need_dyn ) {
- preg *r = IS_FLOAT(dst) ? REG_AT(XMM(0)) : PEAX;
- copy(ctx,r,pmem(&p,Esp,HDYN_VALUE - (int)sizeof(vdynamic)),dst->size);
- store(ctx, dst, r, false);
- } else
- store(ctx, dst, PEAX, false);
-
- XJump_small(JAlways,jend);
- patch_jump(ctx,jhasfield);
- restore_regs(ctx);
-
- if( !obj_in_args ) {
- // o = o->value hack
- if( v->holds ) v->holds->current = NULL;
- obj->current = v;
- v->holds = obj;
- op64(ctx,MOV,v,pmem(&p,v->id,HL_WSIZE));
- size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
- } else {
- // keep o->value in R(f->nregs)
- int regids[64];
- preg *pc = alloc_reg(ctx,RCPU_CALL);
- vreg *sc = R(f->nregs); // scratch register that we temporary rebind
- if( o->p3 >= 63 ) jit_error("assert");
- memcpy(regids, o->extra, o->p3 * sizeof(int));
- regids[0] = f->nregs;
- sc->size = HL_WSIZE;
- sc->t = &hlt_dyn;
- op64(ctx, MOV, pc, pmem(&p,v->id,HL_WSIZE));
- scratch(pc);
- sc->current = pc;
- pc->holds = sc;
- size = prepare_call_args(ctx,o->p3,regids,ctx->vregs,0);
- }
-
- op_call(ctx,r,size);
- discard_regs(ctx, false);
- store_result(ctx, dst);
- patch_jump(ctx,jend);
- }
- break;
- default:
- ASSERT(0);
- break;
- }
- break;
- case ORethrow:
- {
- int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
- call_native(ctx,hl_rethrow,size);
- }
- break;
- case OThrow:
- {
- int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
- call_native(ctx,hl_throw,size);
- }
- break;
- case OLabel:
- // NOP for now
- discard_regs(ctx,false);
- break;
- case OGetI8:
- case OGetI16:
- {
- preg *base = alloc_cpu(ctx, ra, true);
- preg *offset = alloc_cpu64(ctx, rb, true);
- preg *r = alloc_reg(ctx,o->op == OGetI8 ? RCPU_8BITS : RCPU);
- op64(ctx,XOR,r,r);
- op32(ctx, o->op == OGetI8 ? MOV8 : MOV16,r,pmem2(&p,base->id,offset->id,1,0));
- store(ctx, dst, r, true);
- }
- break;
- case OGetMem:
- {
- #ifndef HL_64
- if (dst->t->kind == HI64) {
- error_i64();
- }
- #endif
- preg *base = alloc_cpu(ctx, ra, true);
- preg *offset = alloc_cpu64(ctx, rb, true);
- store(ctx, dst, pmem2(&p,base->id,offset->id,1,0), false);
- }
- break;
- case OSetI8:
- {
- preg *base = alloc_cpu(ctx, dst, true);
- preg *offset = alloc_cpu64(ctx, ra, true);
- preg *value = alloc_cpu8(ctx, rb, true);
- op32(ctx,MOV8,pmem2(&p,base->id,offset->id,1,0),value);
- }
- break;
- case OSetI16:
- {
- preg *base = alloc_cpu(ctx, dst, true);
- preg *offset = alloc_cpu64(ctx, ra, true);
- preg *value = alloc_cpu(ctx, rb, true);
- op32(ctx,MOV16,pmem2(&p,base->id,offset->id,1,0),value);
- }
- break;
- case OSetMem:
- {
- preg *base = alloc_cpu(ctx, dst, true);
- preg *offset = alloc_cpu64(ctx, ra, true);
- preg *value;
- switch( rb->t->kind ) {
- case HI32:
- value = alloc_cpu(ctx, rb, true);
- op32(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
- break;
- case HF32:
- value = alloc_fpu(ctx, rb, true);
- op32(ctx,MOVSS,pmem2(&p,base->id,offset->id,1,0),value);
- break;
- case HF64:
- value = alloc_fpu(ctx, rb, true);
- op32(ctx,MOVSD,pmem2(&p,base->id,offset->id,1,0),value);
- break;
- case HI64:
- case HGUID:
- value = alloc_cpu(ctx, rb, true);
- op64(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
- break;
- default:
- ASSERT(rb->t->kind);
- break;
- }
- }
- break;
- case OType:
- {
- op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)(m->code->types + o->p2)));
- store(ctx,dst,dst->current,false);
- }
- break;
- case OGetType:
- {
- int jnext, jend;
- preg *r = alloc_cpu(ctx, ra, true);
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx,TEST,r,r);
- XJump_small(JNotZero,jnext);
- op64(ctx,MOV, tmp, pconst64(&p,(int_val)&hlt_void));
- XJump_small(JAlways,jend);
- patch_jump(ctx,jnext);
- op64(ctx, MOV, tmp, pmem(&p,r->id,0));
- patch_jump(ctx,jend);
- store(ctx,dst,tmp,true);
- }
- break;
- case OGetArray:
- {
- preg *rdst = IS_FLOAT(dst) ? alloc_fpu(ctx,dst,false) : alloc_cpu(ctx,dst,false);
- if( ra->t->kind == HABSTRACT ) {
- int osize;
- bool isRead = dst->t->kind != HOBJ && dst->t->kind != HSTRUCT;
- if( isRead )
- osize = sizeof(void*);
- else {
- hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
- osize = rt->size;
- }
- preg *idx = alloc_cpu64(ctx, rb, true);
- op64(ctx, IMUL, idx, pconst(&p,osize));
- op64(ctx, isRead?MOV:LEA, rdst, pmem2(&p,alloc_cpu(ctx,ra, true)->id,idx->id,1,0));
- store(ctx,dst,dst->current,false);
- scratch(idx);
- } else {
- copy(ctx, rdst, pmem2(&p,alloc_cpu(ctx,ra,true)->id,alloc_cpu64(ctx,rb,true)->id,hl_type_size(dst->t),sizeof(varray)), dst->size);
- store(ctx,dst,dst->current,false);
- }
- }
- break;
- case OSetArray:
- {
- if( dst->t->kind == HABSTRACT ) {
- int osize;
- bool isWrite = rb->t->kind != HOBJ && rb->t->kind != HSTRUCT;
- if( isWrite ) {
- osize = sizeof(void*);
- } else {
- hl_runtime_obj *rt = hl_get_obj_rt(rb->t);
- osize = rt->size;
- }
- preg *pdst = alloc_cpu(ctx,dst,true);
- preg *pra = alloc_cpu64(ctx,ra,true);
- op64(ctx, IMUL, pra, pconst(&p,osize));
- op64(ctx, ADD, pdst, pra);
- scratch(pra);
- preg *prb = alloc_cpu(ctx,rb,true);
- preg *tmp = alloc_reg(ctx, RCPU_CALL);
- int offset = 0;
- while( offset < osize ) {
- int remain = osize - offset;
- int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
- copy(ctx, tmp, pmem(&p, prb->id, offset), copy_size);
- copy(ctx, pmem(&p, pdst->id, offset), tmp, copy_size);
- offset += copy_size;
- }
- scratch(pdst);
- } else {
- preg *rrb = IS_FLOAT(rb) ? alloc_fpu(ctx,rb,true) : alloc_cpu(ctx,rb,true);
- copy(ctx, pmem2(&p,alloc_cpu(ctx,dst,true)->id,alloc_cpu64(ctx,ra,true)->id,hl_type_size(rb->t),sizeof(varray)), rrb, rb->size);
- }
- }
- break;
- case OArraySize:
- {
- op32(ctx,MOV,alloc_cpu(ctx,dst,false),pmem(&p,alloc_cpu(ctx,ra,true)->id,ra->t->kind == HABSTRACT ? HL_WSIZE + 4 : HL_WSIZE*2));
- store(ctx,dst,dst->current,false);
- }
- break;
- case ORef:
- {
- scratch(ra->current);
- op64(ctx,MOV,alloc_cpu(ctx,dst,false),REG_AT(Ebp));
- if( ra->stackPos < 0 )
- op64(ctx,SUB,dst->current,pconst(&p,-ra->stackPos));
- else
- op64(ctx,ADD,dst->current,pconst(&p,ra->stackPos));
- store(ctx,dst,dst->current,false);
- }
- break;
- case OUnref:
- copy_to(ctx,dst,pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
- break;
- case OSetref:
- copy_from(ctx,pmem(&p,alloc_cpu(ctx,dst,true)->id,0),ra);
- break;
- case ORefData:
- switch( ra->t->kind ) {
- case HARRAY:
- {
- preg *r = fetch(ra);
- preg *d = alloc_cpu(ctx,dst,false);
- op64(ctx,MOV,d,r);
- op64(ctx,ADD,d,pconst(&p,sizeof(varray)));
- store(ctx,dst,dst->current,false);
- }
- break;
- default:
- ASSERT(ra->t->kind);
- }
- break;
- case ORefOffset:
- {
- preg *d = alloc_cpu(ctx,rb,true);
- preg *r2 = alloc_cpu(ctx,dst,false);
- preg *r = fetch(ra);
- int size = hl_type_size(dst->t->tparam);
- op64(ctx,MOV,r2,r);
- switch( size ) {
- case 1:
- break;
- case 2:
- op64(ctx,SHL,d,pconst(&p,1));
- break;
- case 4:
- op64(ctx,SHL,d,pconst(&p,2));
- break;
- case 8:
- op64(ctx,SHL,d,pconst(&p,3));
- break;
- default:
- op64(ctx,IMUL,d,pconst(&p,size));
- break;
- }
- op64(ctx,ADD,r2,d);
- scratch(d);
- store(ctx,dst,dst->current,false);
- }
- break;
- case OToVirtual:
- {
-# ifdef HL_64
- int size = pad_before_call(ctx, 0);
- op64(ctx,MOV,REG_AT(CALL_REGS[1]),fetch(ra));
- op64(ctx,MOV,REG_AT(CALL_REGS[0]),pconst64(&p,(int_val)dst->t));
-# else
- int size = pad_before_call(ctx, HL_WSIZE*2);
- op32(ctx,PUSH,fetch(ra),UNUSED);
- op32(ctx,PUSH,pconst(&p,(int)(int_val)dst->t),UNUSED);
-# endif
- if( ra->t->kind == HOBJ ) hl_get_obj_rt(ra->t); // ensure it's initialized
- call_native(ctx,hl_to_virtual,size);
- store(ctx,dst,PEAX,true);
- }
- break;
- case OMakeEnum:
- {
- hl_enum_construct *c = &dst->t->tenum->constructs[o->p2];
- int_val args[] = { (int_val)dst->t, o->p2 };
- int i;
- call_native_consts(ctx, hl_alloc_enum, args, 2);
- RLOCK(PEAX);
- for(i=0;inparams;i++) {
- preg *r = fetch(R(o->extra[i]));
- copy(ctx, pmem(&p,Eax,c->offsets[i]),r, R(o->extra[i])->size);
- RUNLOCK(fetch(R(o->extra[i])));
- if ((i & 15) == 0) jit_buf(ctx);
- }
- store(ctx, dst, PEAX, true);
- }
- break;
- case OEnumAlloc:
- {
- int_val args[] = { (int_val)dst->t, o->p2 };
- call_native_consts(ctx, hl_alloc_enum, args, 2);
- store(ctx, dst, PEAX, true);
- }
- break;
- case OEnumField:
- {
- hl_enum_construct *c = &ra->t->tenum->constructs[o->p3];
- preg *r = alloc_cpu(ctx,ra,true);
- copy_to(ctx,dst,pmem(&p,r->id,c->offsets[(int)(int_val)o->extra]));
- }
- break;
- case OSetEnumField:
- {
- hl_enum_construct *c = &dst->t->tenum->constructs[0];
- preg *r = alloc_cpu(ctx,dst,true);
- switch( rb->t->kind ) {
- case HF64:
- {
- preg *d = alloc_fpu(ctx,rb,true);
- copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),d,8);
- break;
- }
- default:
- copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),alloc_cpu(ctx,rb,true),hl_type_size(c->params[o->p2]));
- break;
- }
- }
- break;
- case ONullCheck:
- {
- int jz;
- preg *r = alloc_cpu(ctx,dst,true);
- op64(ctx,TEST,r,r);
- XJump_small(JNotZero,jz);
-
- hl_opcode *next = f->ops + opCount + 1;
- bool null_field_access = false;
- int hashed_name = 0;
- // skip const and operation between nullcheck and access
- while( (next < f->ops + f->nops - 1) && (next->op >= OInt && next->op <= ODecr) ) {
- next++;
- }
- if( (next->op == OField && next->p2 == o->p1) || (next->op == OSetField && next->p1 == o->p1) ) {
- int fid = next->op == OField ? next->p3 : next->p2;
- hl_obj_field *f = NULL;
- if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT )
- f = hl_obj_field_fetch(dst->t, fid);
- else if( dst->t->kind == HVIRTUAL )
- f = dst->t->virt->fields + fid;
- if( f == NULL ) ASSERT(dst->t->kind);
- null_field_access = true;
- hashed_name = f->hashed_name;
- } else if( (next->op >= OCall1 && next->op <= OCallN) && next->p3 == o->p1 ) {
- int fid = next->p2 < 0 ? -1 : ctx->m->functions_indexes[next->p2];
- hl_function *cf = ctx->m->code->functions + fid;
- const uchar *name = fun_field_name(cf);
- null_field_access = true;
- hashed_name = hl_hash_gen(name, true);
- }
-
- if( null_field_access ) {
- pad_before_call(ctx, HL_WSIZE);
- if( hashed_name >= 0 && hashed_name < 256 )
- op64(ctx,PUSH8,pconst(&p,hashed_name),UNUSED);
- else
- op32(ctx,PUSH,pconst(&p,hashed_name),UNUSED);
- } else {
- pad_before_call(ctx, 0);
- }
-
- jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
- j->pos = BUF_POS();
- j->target = null_field_access ? -3 : -1;
- j->next = ctx->calls;
- ctx->calls = j;
-
- op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
- op_call(ctx,PEAX,-1);
- patch_jump(ctx,jz);
- }
- break;
- case OSafeCast:
- make_dyn_cast(ctx, dst, ra);
- break;
- case ODynGet:
- {
- int size;
-# ifdef HL_64
- if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
- size = begin_native_call(ctx,2);
- } else {
- size = begin_native_call(ctx,3);
- set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
- }
- set_native_arg(ctx,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
- set_native_arg(ctx,fetch(ra));
-# else
- preg *r;
- r = alloc_reg(ctx,RCPU);
- if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
- size = pad_before_call(ctx,HL_WSIZE*2);
- } else {
- size = pad_before_call(ctx,HL_WSIZE*3);
- op64(ctx,MOV,r,pconst64(&p,(int_val)dst->t));
- op64(ctx,PUSH,r,UNUSED);
- }
- op64(ctx,MOV,r,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
- op64(ctx,PUSH,r,UNUSED);
- op64(ctx,PUSH,fetch(ra),UNUSED);
-# endif
- call_native(ctx,get_dynget(dst->t),size);
- store_result(ctx,dst);
- }
- break;
- case ODynSet:
- {
- int size;
-# ifdef HL_64
- switch( rb->t->kind ) {
- case HF32:
- case HF64:
- size = begin_native_call(ctx, 3);
- set_native_arg_fpu(ctx,fetch(rb),rb->t->kind == HF32);
- set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
- set_native_arg(ctx,fetch(dst));
- call_native(ctx,get_dynset(rb->t),size);
- break;
- case HI64:
- case HGUID:
- size = begin_native_call(ctx, 3);
- set_native_arg(ctx,fetch(rb));
- set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
- set_native_arg(ctx,fetch(dst));
- call_native(ctx,get_dynset(rb->t),size);
- break;
- default:
- size = begin_native_call(ctx,4);
- set_native_arg(ctx,fetch(rb));
- set_native_arg(ctx,pconst64(&p,(int_val)rb->t));
- set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
- set_native_arg(ctx,fetch(dst));
- call_native(ctx,get_dynset(rb->t),size);
- break;
- }
-# else
- switch( rb->t->kind ) {
- case HF32:
- size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(float));
- push_reg(ctx,rb);
- op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
- op32(ctx,PUSH,fetch(dst),UNUSED);
- call_native(ctx,get_dynset(rb->t),size);
- break;
- case HF64:
- case HI64:
- case HGUID:
- size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(double));
- push_reg(ctx,rb);
- op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
- op32(ctx,PUSH,fetch(dst),UNUSED);
- call_native(ctx,get_dynset(rb->t),size);
- break;
- default:
- size = pad_before_call(ctx, HL_WSIZE*4);
- op32(ctx,PUSH,fetch32(ctx,rb),UNUSED);
- op32(ctx,PUSH,pconst64(&p,(int_val)rb->t),UNUSED);
- op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
- op32(ctx,PUSH,fetch(dst),UNUSED);
- call_native(ctx,get_dynset(rb->t),size);
- break;
- }
-# endif
- }
- break;
- case OTrap:
- {
- int size, jenter, jtrap;
- int offset = 0;
- int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
- hl_trap_ctx *t = NULL;
-# ifndef HL_THREADS
- if( tinf == NULL ) tinf = hl_get_thread(); // single thread
-# endif
-
-# ifdef HL_64
- preg *trap = REG_AT(CALL_REGS[0]);
-# else
- preg *trap = PEAX;
-# endif
- RLOCK(trap);
-
- preg *treg = alloc_reg(ctx, RCPU);
- if( !tinf ) {
- call_native(ctx, hl_get_thread, 0);
- op64(ctx,MOV,treg,PEAX);
- offset = (int)(int_val)&tinf->trap_current;
- } else {
- offset = 0;
- op64(ctx,MOV,treg,pconst64(&p,(int_val)&tinf->trap_current));
- }
- op64(ctx,MOV,trap,pmem(&p,treg->id,offset));
- op64(ctx,SUB,PESP,pconst(&p,trap_size));
- op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->prev),trap);
- op64(ctx,MOV,trap,PESP);
- op64(ctx,MOV,pmem(&p,treg->id,offset),trap);
-
- /*
- trap E,@catch
- catch g
- catch g2
- ...
- @:catch
-
- // Before haxe 5
- This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following
- sequence of HL opcodes:
-
- trap E,@catch
- ...
- @catch:
- global R, _
- call _, ???(R,E)
-
- ??? is expected to be hl.BaseType.check
- */
- hl_opcode *cat = f->ops + opCount + 1;
- hl_opcode *next = f->ops + opCount + 1 + o->p2;
- hl_opcode *next2 = f->ops + opCount + 2 + o->p2;
- if( cat->op == OCatch || (next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->stack.id == (int)(int_val)next2->extra) ) {
- int gindex = cat->op == OCatch ? cat->p1 : next->p2;
- hl_type *gt = m->code->globals[gindex];
- while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super;
- if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE ) {
- void *addr = m->globals_data + m->globals_indexes[gindex];
-# ifdef HL_64
- op64(ctx,MOV,treg,pconst64(&p,(int_val)addr));
- op64(ctx,MOV,treg,pmem(&p,treg->id,0));
-# else
- op64(ctx,MOV,treg,paddr(&p,addr));
-# endif
- } else
- op64(ctx,MOV,treg,pconst(&p,0));
- } else {
- op64(ctx,MOV,treg,pconst(&p,0));
- }
- op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->tcheck),treg);
-
- // On Win64 setjmp actually takes two arguments
- // the jump buffer and the frame pointer (or the stack pointer if there is no FP)
-#if defined(HL_WIN) && defined(HL_64)
- size = begin_native_call(ctx, 2);
- set_native_arg(ctx, REG_AT(Ebp));
-#else
- size = begin_native_call(ctx, 1);
-#endif
- set_native_arg(ctx,trap);
-#ifdef HL_MINGW
- call_native(ctx,_setjmp,size);
-#else
- call_native(ctx,setjmp,size);
-#endif
- op64(ctx,TEST,PEAX,PEAX);
- XJump_small(JZero,jenter);
- op64(ctx,ADD,PESP,pconst(&p,trap_size));
- if( !tinf ) {
- call_native(ctx, hl_get_thread, 0);
- op64(ctx,MOV,PEAX,pmem(&p, Eax, (int)(int_val)&tinf->exc_value));
- } else {
- op64(ctx,MOV,PEAX,pconst64(&p,(int_val)&tinf->exc_value));
- op64(ctx,MOV,PEAX,pmem(&p, Eax, 0));
- }
- store(ctx,dst,PEAX,false);
-
- jtrap = do_jump(ctx,OJAlways,false);
- register_jump(ctx,jtrap,(opCount + 1) + o->p2);
- patch_jump(ctx,jenter);
- }
- break;
- case OEndTrap:
- {
- int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
- hl_trap_ctx *tmp = NULL;
- preg *addr,*r;
- int offset;
- if (!tinf) {
- call_native(ctx, hl_get_thread, 0);
- addr = PEAX;
- RLOCK(addr);
- offset = (int)(int_val)&tinf->trap_current;
- } else {
- offset = 0;
- addr = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, addr, pconst64(&p, (int_val)&tinf->trap_current));
- }
- r = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, r, pmem(&p,addr->id,offset));
- op64(ctx, MOV, r, pmem(&p,r->id,(int)(int_val)&tmp->prev));
- op64(ctx, MOV, pmem(&p,addr->id, offset), r);
-# ifdef HL_WIN
- // erase eip (prevent false positive)
- {
- _JUMP_BUFFER *b = NULL;
-# ifdef HL_64
- op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&(b->Rip)),PEAX);
-# else
- op64(ctx,MOV,pmem(&p,Esp,(int)&(b->Eip)),PEAX);
-# endif
- }
-# endif
- op64(ctx,ADD,PESP,pconst(&p,trap_size));
- }
- break;
- case OEnumIndex:
- {
- preg *r = alloc_reg(ctx,RCPU);
- op64(ctx,MOV,r,pmem(&p,alloc_cpu(ctx,ra,true)->id,HL_WSIZE));
- store(ctx,dst,r,true);
- break;
- }
- break;
- case OSwitch:
- {
- int jdefault;
- int i;
- preg *r = alloc_cpu(ctx, dst, true);
- preg *r2 = alloc_reg(ctx, RCPU);
- op32(ctx, CMP, r, pconst(&p,o->p2));
- XJump(JUGte,jdefault);
- // r2 = r * 5 + eip
-# ifdef HL_64
- op64(ctx, XOR, r2, r2);
-# endif
- op32(ctx, MOV, r2, r);
- op32(ctx, SHL, r2, pconst(&p,2));
- op32(ctx, ADD, r2, r);
-# ifdef HL_64
- preg *tmp = alloc_reg(ctx, RCPU);
- op64(ctx, MOV, tmp, pconst64(&p,RESERVE_ADDRESS));
-# else
- op64(ctx, ADD, r2, pconst64(&p,RESERVE_ADDRESS));
-# endif
- {
- jlist *s = (jlist*)hl_malloc(&ctx->galloc, sizeof(jlist));
- s->pos = BUF_POS() - sizeof(void*);
- s->next = ctx->switchs;
- ctx->switchs = s;
- }
-# ifdef HL_64
- op64(ctx, ADD, r2, tmp);
-# endif
- op64(ctx, JMP, r2, UNUSED);
- for(i=0;ip2;i++) {
- int j = do_jump(ctx,OJAlways,false);
- register_jump(ctx,j,(opCount + 1) + o->extra[i]);
- if( (i & 15) == 0 ) jit_buf(ctx);
- }
- patch_jump(ctx, jdefault);
- }
- break;
- case OGetTID:
- op32(ctx, MOV, alloc_cpu(ctx,dst,false), pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
- store(ctx,dst,dst->current,false);
- break;
- case OAssert:
- {
- pad_before_call(ctx, 0);
- jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
- j->pos = BUF_POS();
- j->target = -2;
- j->next = ctx->calls;
- ctx->calls = j;
-
- op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
- op_call(ctx,PEAX,-1);
- }
- break;
- case ONop:
- break;
- case OPrefetch:
- {
- preg *r = alloc_cpu(ctx, dst, true);
- if( o->p2 > 0 ) {
- switch( dst->t->kind ) {
- case HOBJ:
- case HSTRUCT:
- {
- hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
- preg *r2 = alloc_reg(ctx, RCPU);
- op64(ctx, LEA, r2, pmem(&p, r->id, rt->fields_indexes[o->p2-1]));
- r = r2;
- }
- break;
- default:
- ASSERT(dst->t->kind);
- break;
- }
- }
- switch( o->p3 ) {
- case 0:
- op64(ctx, PREFETCHT0, pmem(&p,r->id,0), UNUSED);
- break;
- case 1:
- op64(ctx, PREFETCHT1, pmem(&p,r->id,0), UNUSED);
- break;
- case 2:
- op64(ctx, PREFETCHT2, pmem(&p,r->id,0), UNUSED);
- break;
- case 3:
- op64(ctx, PREFETCHNTA, pmem(&p,r->id,0), UNUSED);
- break;
- case 4:
- op64(ctx, PREFETCHW, pmem(&p,r->id,0), UNUSED);
- break;
- default:
- ASSERT(o->p3);
- break;
- }
- }
- break;
- case OAsm:
- {
- switch( o->p1 ) {
- case 0: // byte output
- B(o->p2);
- break;
- case 1: // scratch cpu reg
- scratch(REG_AT(o->p2));
- break;
- case 2: // read vm reg
- rb--;
- copy(ctx, REG_AT(o->p2), &rb->stack, rb->size);
- scratch(REG_AT(o->p2));
- break;
- case 3: // write vm reg
- rb--;
- copy(ctx, &rb->stack, REG_AT(o->p2), rb->size);
- scratch(rb->current);
- break;
- case 4:
- if( ctx->totalRegsSize != 0 )
- hl_fatal("Asm naked function should not have local variables");
- if( opCount != 0 )
- hl_fatal("Asm naked function should be on first opcode");
- ctx->buf.b -= BUF_POS() - ctx->functionPos; // reset to our function start
- break;
- default:
- ASSERT(o->p1);
- break;
- }
- }
- break;
- case OCatch:
- // Only used by OTrap typing
- break;
- default:
- jit_error(hl_op_name(o->op));
- break;
- }
- // we are landing at this position, assume we have lost our registers
- if( ctx->opsPos[opCount+1] == -1 )
- discard_regs(ctx,true);
- ctx->opsPos[opCount+1] = BUF_POS();
-
- // write debug infos
- size = BUF_POS() - codePos;
- if( debug16 && size > 0xFF00 ) {
- debug32 = malloc(sizeof(int) * (f->nops + 1));
- for(i=0;icurrentPos;i++)
- debug32[i] = debug16[i];
- free(debug16);
- debug16 = NULL;
- }
- if( debug16 ) debug16[ctx->currentPos] = (unsigned short)size; else if( debug32 ) debug32[ctx->currentPos] = size;
-
- }
- // patch jumps
- {
- jlist *j = ctx->jumps;
- while( j ) {
- *(int*)(ctx->startBuf + j->pos) = ctx->opsPos[j->target] - (j->pos + 4);
- j = j->next;
- }
- ctx->jumps = NULL;
- }
- int codeEndPos = BUF_POS();
- // add nops padding
- jit_nops(ctx);
- // clear regs
- for(i=0;iholds = NULL;
- r->lock = 0;
- }
- // save debug infos
- if( ctx->debug ) {
- int fid = (int)(f - m->code->functions);
- ctx->debug[fid].start = codePos;
- ctx->debug[fid].offsets = debug32 ? (void*)debug32 : (void*)debug16;
- ctx->debug[fid].large = debug32 != NULL;
- }
- // unwind info
-#ifdef WIN64_UNWIND_TABLES
- int uw_idx = ctx->nunwind++;
- ctx->unwind_table[uw_idx].BeginAddress = codePos;
- ctx->unwind_table[uw_idx].EndAddress = codeEndPos;
- ctx->unwind_table[uw_idx].UnwindData = ctx->unwind_offset;
-#endif
- // reset tmp allocator
hl_free(&ctx->falloc);
- return codePos;
-}
-
-static void *get_wrapper( hl_type *t ) {
- return call_jit_hl2c;
-}
-
-void hl_jit_patch_method( void *old_fun, void **new_fun_table ) {
- // mov eax, addr
- // jmp [eax]
- unsigned char *b = (unsigned char*)old_fun;
- unsigned long long addr = (unsigned long long)(int_val)new_fun_table;
-# ifdef HL_64
- *b++ = 0x48;
- *b++ = 0xB8;
- *b++ = (unsigned char)addr;
- *b++ = (unsigned char)(addr>>8);
- *b++ = (unsigned char)(addr>>16);
- *b++ = (unsigned char)(addr>>24);
- *b++ = (unsigned char)(addr>>32);
- *b++ = (unsigned char)(addr>>40);
- *b++ = (unsigned char)(addr>>48);
- *b++ = (unsigned char)(addr>>56);
-# else
- *b++ = 0xB8;
- *b++ = (unsigned char)addr;
- *b++ = (unsigned char)(addr>>8);
- *b++ = (unsigned char)(addr>>16);
- *b++ = (unsigned char)(addr>>24);
-# endif
- *b++ = 0xFF;
- *b++ = 0x20;
-}
-
-static void missing_closure() {
- hl_error("Missing static closure");
+ ctx->mod = m;
+ ctx->fun = f;
+ current_ctx = ctx;
+ hl_emit_function(ctx);
+ current_ctx = NULL;
+ return 0;
}
void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ) {
- jlist *c;
- int size = BUF_POS();
- unsigned char *code;
- if( size & 4095 ) size += 4096 - (size&4095);
- code = (unsigned char*)hl_alloc_executable_memory(size);
- if( code == NULL ) return NULL;
- memcpy(code,ctx->startBuf,BUF_POS());
- *codesize = size;
- *debug = ctx->debug;
- if( !call_jit_c2hl ) {
- call_jit_c2hl = code + ctx->c2hl;
- call_jit_hl2c = code + ctx->hl2c;
- hl_setup.get_wrapper = get_wrapper;
- hl_setup.static_call = callback_c2hl;
- hl_setup.static_call_ref = true;
- }
-#ifdef WIN64_UNWIND_TABLES
- m->unwind_table = ctx->unwind_table;
- RtlAddFunctionTable(m->unwind_table, ctx->nunwind, (DWORD64)code);
-#endif
- if( !ctx->static_function_offset ) {
- int i;
- ctx->static_function_offset = true;
- for(i=0;i<(int)(sizeof(ctx->static_functions)/sizeof(void*));i++)
- ctx->static_functions[i] = (void*)(code + (int)(int_val)ctx->static_functions[i]);
- }
- // patch calls
- c = ctx->calls;
- while( c ) {
- void *fabs;
- if( c->target < 0 )
- fabs = ctx->static_functions[-c->target-1];
- else {
- fabs = m->functions_ptrs[c->target];
- if( fabs == NULL ) {
- // read absolute address from previous module
- int old_idx = m->hash->functions_hashes[m->functions_indexes[c->target]];
- if( old_idx < 0 )
- return NULL;
- fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
- } else {
- // relative
- fabs = (unsigned char*)code + (int)(int_val)fabs;
- }
- }
- if( (code[c->pos]&~3) == (IS_64?0x48:0xB8) || code[c->pos] == 0x68 ) // MOV : absolute | PUSH
- *(void**)(code + c->pos + (IS_64?2:1)) = fabs;
- else {
- int_val delta = (int_val)fabs - (int_val)code - (c->pos + 5);
- int rpos = (int)delta;
- if( (int_val)rpos != delta ) {
- printf("Target code too far too rebase\n");
- return NULL;
- }
- *(int*)(code + c->pos + 1) = rpos;
- }
- c = c->next;
- }
- // patch switchs
- c = ctx->switchs;
- while( c ) {
- *(void**)(code + c->pos) = code + c->pos + (IS_64 ? 14 : 6);
- c = c->next;
- }
- // patch closures
- {
- vclosure *c = ctx->closure_list;
- while( c ) {
- vclosure *next;
- int fidx = (int)(int_val)c->fun;
- void *fabs = m->functions_ptrs[fidx];
- if( fabs == NULL ) {
- // read absolute address from previous module
- int old_idx = m->hash->functions_hashes[m->functions_indexes[fidx]];
- if( old_idx < 0 )
- fabs = missing_closure;
- else
- fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
- } else {
- // relative
- fabs = (unsigned char*)code + (int)(int_val)fabs;
- }
- c->fun = fabs;
- next = (vclosure*)c->value;
- c->value = NULL;
- c = next;
- }
- }
- return code;
+ printf("TODO:emit_code\n");
+ exit(0);
+ return NULL;
}
+void hl_jit_patch_method( void*fun, void**newt ) {
+ jit_assert();
+}
diff --git a/src/jit.h b/src/jit.h
new file mode 100644
index 000000000..820e79bd5
--- /dev/null
+++ b/src/jit.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C)2005-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef JIT_H
+#define JIT_H
+
+#include
+
+typedef enum {
+ LOAD_ADDR,
+ LOAD_IMM,
+ LOAD_ARG,
+ STORE,
+ LEA,
+ TEST,
+ CMP,
+ JCOND,
+ JUMP,
+ JUMP_TABLE,
+ BINOP,
+ UNOP,
+ CONV,
+ CONV_UNSIGNED,
+ RET,
+ CALL_PTR,
+ CALL_REG,
+ CALL_FUN,
+ MOV,
+ ALLOC_STACK,
+ FREE_STACK,
+ NATIVE_REG,
+ PREFETCH,
+ DEBUG_BREAK,
+} emit_op;
+
+typedef enum {
+ REG_RBP,
+} native_reg;
+
+typedef enum {
+ M_NONE,
+ M_UI8,
+ M_UI16,
+ M_I32,
+ M_I64,
+ M_F32,
+ M_F64,
+ M_PTR,
+ M_VOID,
+ M_NORET,
+} emit_mode;
+
+typedef struct {
+ int index;
+} ereg;
+
+typedef struct {
+ union {
+ struct {
+ unsigned char op;
+ unsigned char mode;
+ unsigned char nargs;
+ unsigned char _unused;
+ };
+ int header;
+ };
+ int size_offs;
+ union {
+ struct {
+ ereg a;
+ ereg b;
+ };
+ uint64 value;
+ };
+} einstr;
+
+#define VAL_NULL 0x80000000
+#define IS_NULL(e) ((e).index == VAL_NULL)
+
+typedef struct {
+ int *data;
+ int max;
+ int cur;
+} int_alloc;
+
+typedef struct _ephi ephi;
+
+struct _ephi {
+ ereg value;
+ int nvalues;
+ ereg *values;
+};
+
+typedef struct _eblock {
+ int id;
+ int start_pos;
+ int end_pos;
+ int next_count;
+ int pred_count;
+ int phi_count;
+ int *nexts;
+ int *preds;
+ ephi *phis;
+} eblock;
+
+typedef struct _emit_ctx emit_ctx;
+
+typedef struct _jit_ctx ji_ctx;
+
+struct _jit_ctx {
+ hl_module *mod;
+ hl_function *fun;
+ hl_alloc falloc;
+ emit_ctx *emit;
+ // emit output
+ int instr_count;
+ int block_count;
+ int value_count;
+ einstr *instrs;
+ eblock *blocks;
+ int *values_writes;
+ int *emit_pos_map;
+};
+
+jit_ctx *hl_jit_alloc();
+void hl_jit_free( jit_ctx *ctx, h_bool can_reset );
+void hl_jit_reset( jit_ctx *ctx, hl_module *m );
+void hl_jit_init( jit_ctx *ctx, hl_module *m );
+int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f );
+
+void hl_jit_null_field_access();
+void hl_jit_null_access();
+void hl_jit_assert();
+
+void int_alloc_reset( int_alloc *a );
+void int_alloc_free( int_alloc *a );
+int *int_alloc_get( int_alloc *a, int count );
+void int_alloc_store( int_alloc *a, int v );
+
+void hl_emit_dump( jit_ctx *ctx );
+const char *hl_emit_regstr( ereg v );
+ereg *hl_emit_get_args( emit_ctx *ctx, einstr *e );
+
+#define val_str(v) hl_emit_regstr(v)
+
+
+#ifdef HL_DEBUG
+# define JIT_DEBUG
+#endif
+
+#define jit_error(msg) { hl_jit_error(msg,__func__,__LINE__); hl_debug_break(); exit(-1); }
+#define jit_assert() jit_error("")
+
+#ifdef JIT_DEBUG
+# define jit_debug(...) printf(__VA_ARGS__)
+#else
+# define jit_debug(...)
+#endif
+
+void hl_jit_error( const char *msg, const char *func, int line );
+
+void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous );
+void hl_jit_patch_method( void *old_fun, void **new_fun_table );
+
+#endif
diff --git a/src/jit_dump.c b/src/jit_dump.c
new file mode 100644
index 000000000..f9c8ce348
--- /dev/null
+++ b/src/jit_dump.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (C)2015-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include
+
+static const char *op_names[] = {
+ "load-addr",
+ "load-imm",
+ "load-arg",
+ "store",
+ "lea",
+ "test",
+ "cmp",
+ "jcond",
+ "jump",
+ "jump-table",
+ "binop",
+ "unop",
+ "conv",
+ "conv-unsigned",
+ "ret",
+ "call",
+ "call",
+ "call",
+ "mov",
+ "alloc-stack",
+ "free-stack",
+ "native-reg",
+ "prefetch",
+ "debug-break",
+};
+
+const char *hl_emit_regstr( ereg v ) {
+ static char fmts[4][10];
+ static int flip = 0;
+ // allow up to four concurrent val_str
+ char *fmt = fmts[flip++&3];
+ if( IS_NULL(v) )
+ sprintf(fmt,"NULL???");
+ else if( v.index < 0 )
+ sprintf(fmt,"P%d",-v.index);
+ else
+ sprintf(fmt,"V%d",v.index);
+ return fmt;
+}
+
+static void hl_dump_arg( hl_function *fun, int fmt, int val, char sep, int pos ) {
+ if( fmt == 0 ) return;
+ printf("%c", sep);
+ switch( fmt ) {
+ case 1:
+ case 2:
+ printf("R%d", val);
+ if( val < 0 || val >= fun->nregs ) printf("?");
+ break;
+ case 3:
+ printf("%d", val);
+ break;
+ case 4:
+ printf("[%d]", val);
+ break;
+ case 5:
+ case 6:
+ printf("@%X", val + pos + 1);
+ break;
+ default:
+ printf("?#%d", fmt);
+ break;
+ }
+}
+
+#define OP(_,_a,_b,_c) ((_a) | (((_b)&0xFF) << 8) | (((_c)&0xFF) << 16)),
+#define OP_BEGIN static int hl_op_fmt[] = {
+#define OP_END };
+#undef R
+#include "opcodes.h"
+
+static void hl_dump_op( hl_function *fun, hl_opcode *op ) {
+ printf("%s", hl_op_name(op->op) + 1);
+ int fmt = hl_op_fmt[op->op];
+ int pos = (int)(op - fun->ops);
+ hl_dump_arg(fun, fmt & 0xFF, op->p1, ' ', pos);
+ if( ((fmt >> 8) & 0xFF) == 5 ) {
+ int count = (fmt >> 16) & 0xFF;
+ printf(" [");
+ if( count == 4 ) {
+ printf("%d", op->p2);
+ printf(",%d", op->p3);
+ printf(",%d", (int)(int_val)op->extra);
+ } else if( op->op == OSwitch ) {
+ for(int i=0;ip2;i++) {
+ if( i != 0 ) printf(",");
+ printf("@%X", (op->extra[i] + pos + 1));
+ }
+ printf(",def=@%X", op->p3 + pos + 1);
+ } else {
+ if( count == 0xFF )
+ count = op->p3;
+ else {
+ printf("%d,%d,",op->p2,op->p3);
+ count -= 3;
+ }
+ for(int i=0;iextra[i]);
+ }
+ }
+ printf("]");
+ } else {
+ hl_dump_arg(fun, (fmt >> 8) & 0xFF, op->p2,',', pos);
+ hl_dump_arg(fun, fmt >> 16, op->p3,',', pos);
+ }
+}
+
+static const char *emit_mode_str( emit_mode mode ) {
+ switch( mode ) {
+ case M_UI8: return "-ui8";
+ case M_UI16: return "-ui16";
+ case M_I32: return "-i32";
+ case M_I64: return "-i64";
+ case M_F32: return "-f32";
+ case M_F64: return "-f64";
+ case M_PTR: return "";
+ case M_VOID: return "-void";
+ case M_NORET: return "-noret";
+ default:
+ static char buf[50];
+ sprintf(buf,"?%d",mode);
+ return buf;
+ }
+}
+
+static void dump_value( jit_ctx *ctx, uint64 value, emit_mode mode ) {
+ union {
+ uint64 v;
+ double d;
+ float f;
+ } tmp;
+ hl_module *mod = ctx->mod;
+ hl_code *code = ctx->mod->code;
+ switch( mode ) {
+ case M_NONE:
+ printf("?0x%llX",value);
+ break;
+ case M_UI8:
+ case M_UI16:
+ case M_I32:
+ if( (int)value >= -0x10000 && (int)value <= 0x10000 )
+ printf("%d",(int)value);
+ else
+ printf("0x%X",(int)value);
+ break;
+ case M_F32:
+ tmp.v = value;
+ printf("%f",tmp.f);
+ break;
+ case M_F64:
+ tmp.v = value;
+ printf("%g",tmp.d);
+ break;
+ default:
+ if( value == 0 )
+ printf("NULL");
+ else if( mode == M_PTR && value >= (uint64)code->types && value < (uint64)(code->types + code->ntypes) )
+ uprintf(USTR("<%s>"),hl_type_str((hl_type*)value));
+ else if( mode == M_PTR && value >= (uint64)mod->globals_data && value < (uint64)(mod->globals_data + mod->globals_size) )
+ printf("",(int)(value - (uint64)mod->globals_data));
+ else if( value == (uint64)&hlt_void )
+ printf("");
+ else
+ printf("0x%llX",value);
+ break;
+ }
+}
+
+static void hl_dump_fun_name( hl_function *f ) {
+ if( f->obj )
+ uprintf(USTR("%s.%s"),f->obj->name,f->field.name);
+ else if( f->field.ref )
+ uprintf(USTR("%s.~%s.%d"),f->field.ref->obj->name, f->field.ref->field.name, f->ref);
+ printf("[%X]", f->findex);
+}
+
+static void hl_dump_args( jit_ctx *ctx, einstr *e ) {
+ ereg *v = hl_emit_get_args(ctx->emit, e);
+ printf("(");
+ for(int i=0;inargs;i++) {
+ if( i != 0 ) printf(",");
+ printf("%s", val_str(v[i]));
+ }
+ printf(")");
+}
+
+typedef struct { const char *name; void *ptr; } named_ptr;
+static void hl_dump_ptr_name( jit_ctx *ctx, void *ptr ) {
+# define N(v) ptr_names[i].name = #v; ptr_names[i].ptr = v; i++
+# define N2(n,v) ptr_names[i].name = n; ptr_names[i].ptr = v; i++
+# define DYN(p) N2("dyn_get" #p, hl_dyn_get##p); N2("dyn_set" #p, hl_dyn_set##p); N2("dyn_cast" #p, hl_dyn_cast##p)
+ static named_ptr ptr_names[256] = { NULL };
+ int i = 0;
+ if( !ptr_names[0].ptr ) {
+ N(hl_alloc_dynbool);
+ N(hl_alloc_dynamic);
+ N(hl_alloc_obj);
+ N(hl_alloc_dynobj);
+ N(hl_alloc_virtual);
+ N(hl_alloc_closure_ptr);
+ N(hl_dyn_call);
+ N(hl_dyn_call_obj);
+ N(hl_throw);
+ N(hl_rethrow);
+ N(hl_to_virtual);
+ N(hl_alloc_enum);
+ DYN(f);
+ DYN(d);
+ DYN(i64);
+ DYN(i);
+ DYN(p);
+ N2("null_field",hl_jit_null_field_access);
+ N2("null_access",hl_jit_null_access);
+ N(hl_get_thread);
+ N(setjmp);
+ N(_setjmp);
+ N2("assert",hl_jit_assert);
+ i = 0;
+ }
+# undef N
+# undef N2
+ while( true ) {
+ named_ptr p = ptr_names[i++];
+ if( !p.ptr ) break;
+ if( p.ptr == ptr ) {
+ printf("<%s>",p.name);
+ return;
+ }
+ }
+ for(i=0;imod->code->nnatives;i++) {
+ hl_native *n = ctx->mod->code->natives + i;
+ if( ctx->mod->functions_ptrs[n->findex] == ptr ) {
+ printf("<%s.%s>",n->lib[0] == '?' ? n->lib + 1 : n->lib,n->name);
+ return;
+ }
+ }
+ printf("0x%llX>",(uint64)ptr);
+}
+
+void hl_emit_flush( jit_ctx *ctx );
+
+void hl_emit_dump( jit_ctx *ctx ) {
+ int i;
+ int cur_op = 0;
+ hl_function *f = ctx->fun;
+ int nargs = f->type->fun->nargs;
+ hl_emit_flush(ctx); // if it not was not before (in case of dump during emit)
+ printf("function ");
+ hl_dump_fun_name(f);
+ printf("(");
+ for(i=0;i 0 ) printf(",");
+ uprintf(USTR("R%d"), i);
+ }
+ printf(")\n");
+ for(i=0;inregs;i++)
+ uprintf(USTR("\tR%d : %s\n"),i, hl_type_str(f->regs[i]));
+ // check blocks intervals
+ int cur = 0;
+ for(i=0;iblock_count;i++) {
+ eblock *b = ctx->blocks + i;
+ if( b->id != i ) printf(" ??? BLOCK @%d ID is %d\n",i,b->id);
+ if( b->start_pos != cur ) printf(" ??? BLOCK %d START AT %X != %X\n", i, b->start_pos, cur);
+ if( b->end_pos < b->start_pos ) printf(" ??? BLOCK %d RANGE [%X,%X]\n", i, b->start_pos, b->end_pos);
+ cur = b->end_pos + 1;
+ }
+ if( cur != ctx->instr_count )
+ printf(" ??? MISSING BLOCK FOR RANGE %X-%X\n", cur, ctx->instr_count);
+ // print instrs
+ int vpos = 0;
+ cur = 0;
+ for(i=0;iinstr_count;i++) {
+ while( cur < ctx->block_count && ctx->blocks[cur].start_pos == i ) {
+ eblock *b = &ctx->blocks[cur];
+ printf("--- BLOCK #%d ---\n", cur);
+ for(int k=0;kphi_count;k++) {
+ ephi *p = b->phis + k;
+ printf("\t\t@%X %s = phi(",i,val_str(p->value));
+ for(int n=0;nnvalues;n++) {
+ if( n > 0 ) printf(",");
+ printf("%s",val_str(p->values[n]));
+ }
+ printf(")");
+ if( p->nvalues <= 1 )
+ printf(" ???");
+ printf("\n");
+ }
+ cur++;
+ }
+ while( ctx->emit_pos_map[cur_op] == i ) {
+ printf("@%X ", cur_op);
+ hl_dump_op(ctx->fun, f->ops + cur_op);
+ printf("\n");
+ cur_op++;
+ }
+ einstr *e = ctx->instrs + i;
+ printf("\t\t@%X ", i);
+ if( vpos < ctx->value_count && ctx->values_writes[vpos] == i )
+ printf("V%d = ", vpos++);
+ printf("%s", op_names[e->op]);
+ bool show_size = true;
+ switch( e->op ) {
+ case TEST:
+ case CMP:
+ printf("-%s", hl_op_name(e->size_offs)+2);
+ show_size = false;
+ break;
+ case BINOP:
+ case UNOP:
+ printf("-%s", hl_op_name(e->size_offs)+1);
+ show_size = false;
+ break;
+ default:
+ break;
+ }
+ if( e->mode )
+ printf("%s", emit_mode_str(e->mode));
+ switch( e->op ) {
+ case CALL_FUN:
+ printf(" ");
+ {
+ int fid = ctx->mod->functions_indexes[e->a.index];
+ hl_code *code = ctx->mod->code;
+ if( fid < code->nfunctions ) {
+ hl_dump_fun_name(&code->functions[fid]);
+ } else {
+ printf("???");
+ }
+ }
+ hl_dump_args(ctx,e);
+ break;
+ case CALL_REG:
+ printf(" %s", val_str(e->a));
+ hl_dump_args(ctx,e);
+ break;
+ case CALL_PTR:
+ printf(" ");
+ hl_dump_ptr_name(ctx, (void*)e->value);
+ hl_dump_args(ctx,e);
+ break;
+ case JUMP:
+ case JCOND:
+ printf(" @%X", i + 1 + e->size_offs);
+ break;
+ case LOAD_IMM:
+ printf(" ");
+ dump_value(ctx, e->value, e->mode);
+ break;
+ case LOAD_ADDR:
+ if( (e->size_offs>>8) )
+ printf(" %s[%Xh]", val_str(e->a), e->size_offs);
+ else
+ printf(" %s[%d]", val_str(e->a), e->size_offs);
+ break;
+ case STORE:
+ {
+ int offs = e->size_offs;
+ if( offs == 0 )
+ printf(" [%s]", val_str(e->a));
+ else
+ printf(" %s[%d]", val_str(e->a), offs);
+ printf(" = %s", val_str(e->b));
+ //if( e->mode == 0 || e->mode != ctx->instrs[ctx->values_writes[e->b.index]].mode )
+ // printf(" ???");
+ }
+ break;
+ default:
+ if( !IS_NULL(e->a) ) {
+ printf(" %s", val_str(e->a));
+ if( !IS_NULL(e->b) ) printf(", %s", val_str(e->b));
+ if( e->a.index >= vpos || e->b.index >= vpos ) printf(" ???");
+ }
+ if( show_size && e->size_offs != 0 )
+ printf(" %d", e->size_offs);
+ break;
+ }
+ printf("\n");
+ }
+ // invalid ?
+ while( vpos < ctx->value_count )
+ printf(" ??? UNWRITTEN VALUE V%d @%X\n", vpos, ctx->values_writes[vpos++]);
+ // interrupted
+ if( cur_op < f->nops ) {
+ printf("@%X ", cur_op);
+ hl_dump_op(ctx->fun, f->ops + cur_op);
+ printf("\n\t\t...\n");
+ }
+ printf("\n\n");
+ fflush(stdout);
+}
diff --git a/src/jit_emit.c b/src/jit_emit.c
new file mode 100644
index 000000000..5ebf22ec2
--- /dev/null
+++ b/src/jit_emit.c
@@ -0,0 +1,1921 @@
+/*
+ * Copyright (C)2015-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include
+#include
+#include "data_struct.h"
+
+static ereg ENULL = {VAL_NULL};
+static void __ignore( void *value ) {}
+
+typedef struct {
+ hl_type *t;
+ int id;
+} vreg;
+
+#define MAX_TMP_ARGS 32
+#define MAX_TRAPS 32
+#define MAX_REFS 512 // TODO : different impl
+
+typedef struct _linked_inf linked_inf;
+typedef struct _emit_block emit_block;
+typedef struct _tmp_phi tmp_phi;
+
+#define DEF_ALLOC &ctx->jit->falloc
+
+#define S_TYPE blocks
+#define S_NAME(name) blocks_##name
+#define S_VALUE emit_block*
+#include "data_struct.c"
+#define blocks_add(set,v) blocks_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_TYPE phi_arr
+#define S_NAME(name) phi_##name
+#define S_VALUE tmp_phi*
+#include "data_struct.c"
+#define phi_add(set,v) phi_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_SORTED
+
+#define S_DEFVAL ENULL
+#define S_CMP(a,b) a.index > b.index
+#define S_TYPE ereg_map
+#define S_NAME(name) ereg_##name
+#define S_VALUE ereg
+#include "data_struct.c"
+#define ereg_add(set,v) ereg_add_impl(DEF_ALLOC,&(set),v)
+
+#define S_MAP
+
+#define S_DEFVAL ENULL
+#define S_TYPE vreg_map
+#define S_NAME(name) vreg_##name
+#define S_KEY int
+#define S_VALUE ereg
+#include "data_struct.c"
+#define vreg_replace(set,k,v) vreg_replace_impl(DEF_ALLOC,&(set),k,v)
+
+struct _linked_inf {
+ int id;
+ void *ptr;
+ linked_inf *next;
+};
+
+struct _emit_block {
+ int id;
+ int start_pos;
+ int end_pos;
+ int wait_nexts;
+ int mark;
+ bool sealed;
+ blocks nexts;
+ blocks preds;
+ vreg_map written_vars;
+ phi_arr phis;
+ emit_block *wait_seal_next;
+};
+
+struct _tmp_phi {
+ ereg value;
+ vreg *r;
+ ereg target;
+ int final_id;
+ bool locked;
+ bool opt;
+ unsigned char mode;
+ emit_block *b;
+ ereg_map vals;
+ phi_arr ref_phis;
+ linked_inf *ref_blocks;
+};
+
+struct _emit_ctx {
+ hl_module *mod;
+ hl_function *fun;
+ jit_ctx *jit;
+
+ einstr *instrs;
+ vreg *vregs;
+ tmp_phi **phis;
+ int max_instrs;
+ int max_regs;
+ int max_phis;
+ int emit_pos;
+ int op_pos;
+ int phi_count;
+ int phi_depth;
+ bool flushed;
+
+ ereg tmp_args[MAX_TMP_ARGS];
+ ereg traps[MAX_TRAPS];
+ struct {
+ ereg r;
+ int reg;
+ } refs[MAX_REFS];
+ int *pos_map;
+ int pos_map_size;
+ int trap_count;
+ int ref_count;
+
+ int_alloc args_data;
+ int_alloc jump_regs;
+ int_alloc values;
+
+ emit_block *root_block;
+ emit_block *current_block;
+ emit_block *wait_seal;
+ linked_inf *arrival_points;
+ void *closure_list; // TODO : patch with good addresses
+};
+
+#define R(i) (ctx->vregs + (i))
+
+#define LOAD(r) emit_load_reg(ctx, r)
+#define STORE(r, v) emit_store_reg(ctx, r, v)
+#define LOAD_CONST(v, t) emit_load_const(ctx, (uint64)(v), t)
+#define LOAD_CONST_PTR(v) LOAD_CONST(v,&hlt_bytes)
+#define LOAD_MEM(v, offs, t) emit_load_mem(ctx, v, offs, t)
+#define LOAD_MEM_PTR(v, offs) LOAD_MEM(v, offs, &hlt_bytes)
+#define STORE_MEM(to, offs, v) emit_store_mem(ctx, to, offs, v)
+#define LOAD_OBJ_METHOD(obj,id) LOAD_MEM_PTR(LOAD_MEM_PTR(LOAD_MEM_PTR(obj,0),HL_WSIZE*2),HL_WSIZE*(id))
+#define OFFSET(base,index,mult,offset) emit_gen_ext(ctx, LEA, base, index, M_PTR, (mult) | ((offset) << 8))
+#define BREAK() emit_gen(ctx, DEBUG_BREAK, ENULL, ENULL, 0)
+#define GET_MODE(r) emit_get_mode(ctx,r)
+#define GET_PHI(r) ctx->phis[-(r).index-1]
+#define HDYN_VALUE 8
+
+#define IS_FLOAT(t) ((t)->kind == HF64 || (t)->kind == HF32)
+
+static hl_type hlt_ui8 = { HUI8, 0 };
+static hl_type hlt_ui16 = { HUI16, 0 };
+
+static linked_inf *link_add( emit_ctx *ctx, int id, void *ptr, linked_inf *head ) {
+ linked_inf *l = hl_malloc(&ctx->jit->falloc,sizeof(linked_inf));
+ l->id = id;
+ l->ptr = ptr;
+ l->next = head;
+ return l;
+}
+
+static linked_inf *link_add_sort_unique( emit_ctx *ctx, int id, void *ptr, linked_inf *head ) {
+ linked_inf *prev = NULL;
+ linked_inf *cur = head;
+ while( cur && cur->id < id ) {
+ prev = cur;
+ cur = cur->next;
+ }
+ // check duplicate
+ while( cur && cur->id == id ) {
+ if( cur->ptr == ptr )
+ return head;
+ cur = cur->next;
+ }
+ // insert
+ linked_inf *l = hl_malloc(&ctx->jit->falloc,sizeof(linked_inf));
+ l->id = id;
+ l->ptr = ptr;
+ if( !prev ) {
+ l->next = head;
+ return l;
+ } else {
+ l->next = prev->next;
+ prev->next = l;
+ return head;
+ }
+}
+
+static linked_inf *link_add_sort_replace( emit_ctx *ctx, int id, void *ptr, linked_inf *head ) {
+ linked_inf *prev = NULL;
+ linked_inf *cur = head;
+ while( cur && cur->id < id ) {
+ prev = cur;
+ cur = cur->next;
+ }
+ // replace duplicate
+ if( cur && cur->id == id ) {
+ cur->ptr = ptr;
+ return head;
+ }
+ // insert
+ linked_inf *l = hl_malloc(&ctx->jit->falloc,sizeof(linked_inf));
+ l->id = id;
+ l->ptr = ptr;
+ if( !prev ) {
+ l->next = head;
+ return l;
+ } else {
+ l->next = prev->next;
+ prev->next = l;
+ return head;
+ }
+}
+
+static void *link_sort_lookup( linked_inf *head, int id ) {
+ while( head && head->id < id )
+ head = head->next;
+ if( head && head->id == id )
+ return head->ptr;
+ return NULL;
+}
+
+static linked_inf *link_sort_remove( linked_inf *head, int id ) {
+ linked_inf *prev = NULL;
+ linked_inf *cur = head;
+ while( cur && cur->id < id ) {
+ prev = cur;
+ cur = cur->next;
+ }
+ if( cur && cur->id == id ) {
+ if( !prev )
+ return cur->next;
+ prev->next = cur->next;
+ return head;
+ }
+ return head;
+}
+
+static emit_mode hl_type_mode( hl_type *t ) {
+ if( t->kind == HVOID )
+ return M_VOID;
+ if( t->kind < HBOOL )
+ return (emit_mode)t->kind;
+ if( t->kind == HBOOL )
+ return sizeof(bool) == 1 ? M_UI8 : M_I32;
+ if( t->kind == HGUID )
+ return M_I64;
+ return M_PTR;
+}
+
+static ereg new_value( emit_ctx *ctx ) {
+ ereg r = {ctx->values.cur};
+ int_alloc_store(&ctx->values, ctx->emit_pos-1);
+ return r;
+}
+
+static ereg *get_tmp_args( emit_ctx *ctx, int count ) {
+ if( count > MAX_TMP_ARGS ) jit_error("Too many arguments");
+ return ctx->tmp_args;
+}
+
+static ereg resolve_ref( emit_ctx *ctx, int reg ) {
+ for(int i=0;iref_count;i++) {
+ if( ctx->refs[i].reg == reg )
+ return ctx->refs[i].r;
+ }
+ return ENULL;
+}
+
+static unsigned char emit_get_mode( emit_ctx *ctx, ereg v ) {
+ if( IS_NULL(v) ) jit_assert();
+ if( v.index < 0 )
+ return GET_PHI(v)->mode;
+ return ctx->instrs[ctx->values.data[v.index]].mode;
+}
+
+static const char *phi_prefix( emit_ctx *ctx ) {
+ static char tmp[20];
+ int sp = 3 + ctx->phi_depth * 2;
+ if( sp > 19 ) sp = 19;
+ memset(tmp,0x20,sp);
+ tmp[sp] = 0;
+ return tmp;
+}
+
+static einstr *emit_instr( emit_ctx *ctx, emit_op op ) {
+ if( ctx->emit_pos == ctx->max_instrs ) {
+ int pos = ctx->emit_pos;
+ int next_size = ctx->max_instrs ? (ctx->max_instrs << 1) : 256;
+ einstr *instrs = (einstr*)malloc(sizeof(einstr) * next_size);
+ if( instrs == NULL ) jit_error("Out of memory");
+ memcpy(instrs, ctx->instrs, pos * sizeof(einstr));
+ memset(instrs + pos, 0, (next_size - pos) * sizeof(einstr));
+ free(ctx->instrs);
+ ctx->instrs = instrs;
+ ctx->max_instrs = next_size;
+ } else if( (ctx->emit_pos & 0xFF) == 0 )
+ memset(ctx->instrs + ctx->emit_pos, 0, 256 * sizeof(einstr));
+ einstr *e = ctx->instrs + ctx->emit_pos++;
+ e->op = op;
+ return e;
+}
+
+static void emit_store_mem( emit_ctx *ctx, ereg to, int offs, ereg from ) {
+ einstr *e = emit_instr(ctx, STORE);
+ e->mode = GET_MODE(from);
+ e->size_offs = offs;
+ e->a = to;
+ e->b = from;
+}
+
+static void store_args( emit_ctx *ctx, einstr *e, ereg *args, int count ) {
+ if( count < 0 ) jit_assert();
+ if( count > 64 ) jit_error("Too many arguments");
+ e->nargs = (unsigned char)count;
+ if( count == 0 ) return;
+ if( count == 1 ) {
+ e->size_offs = args[0].index;
+ return;
+ }
+ int *args_data = int_alloc_get(&ctx->args_data, count);
+ e->size_offs = (int)(args_data - ctx->args_data.data);
+ memcpy(args_data, args, sizeof(int) * count);
+}
+
+ereg *hl_emit_get_args( emit_ctx *ctx, einstr *e ) {
+ if( e->nargs == 0 )
+ return NULL;
+ if( e->nargs == 1 )
+ return (ereg*)&e->size_offs;
+ return (ereg*)(ctx->args_data.data + e->size_offs);
+}
+
+static ereg emit_gen_ext( emit_ctx *ctx, emit_op op, ereg a, ereg b, int mode, int size_offs ) {
+ einstr *e = emit_instr(ctx, op);
+ if( (unsigned char)mode != mode ) jit_assert();
+ e->mode = (unsigned char)mode;
+ e->size_offs = size_offs;
+ e->a = a;
+ e->b = b;
+ return mode == 0 || mode == M_NORET ? ENULL : new_value(ctx);
+}
+
+static ereg emit_gen( emit_ctx *ctx, emit_op op, ereg a, ereg b, int mode ) {
+ return emit_gen_ext(ctx,op,a,b,mode,0);
+}
+
+static ereg emit_gen_size( emit_ctx *ctx, emit_op op, int size_offs ) {
+ return emit_gen_ext(ctx,op,ENULL,ENULL,op==ALLOC_STACK ? M_PTR : 0,size_offs);
+}
+
+static void patch_instr_mode( emit_ctx *ctx, int mode ) {
+ ctx->instrs[ctx->emit_pos-1].mode = (unsigned char)mode;
+}
+
+static tmp_phi *alloc_phi( emit_ctx *ctx, emit_block *b, vreg *r ) {
+ if( ctx->phi_count == ctx->max_phis ) {
+ int new_size = ctx->max_phis ? ctx->max_phis << 1 : 64;
+ tmp_phi **phis = (tmp_phi**)malloc(sizeof(tmp_phi*) * new_size);
+ if( phis == NULL ) jit_error("Out of memory");
+ memcpy(phis, ctx->phis, sizeof(tmp_phi*) * ctx->phi_count);
+ free(ctx->phis);
+ ctx->phis = phis;
+ ctx->max_phis = new_size;
+ }
+ tmp_phi *p = (tmp_phi*)hl_zalloc(&ctx->jit->falloc, sizeof(tmp_phi));
+ p->b = b;
+ p->r = r;
+ if( r ) p->mode = hl_type_mode(r->t);
+ p->value.index = -(++ctx->phi_count);
+ phi_add(b->phis,p);
+ GET_PHI(p->value) = p;
+ return p;
+}
+
+static int emit_jump( emit_ctx *ctx, bool cond ) {
+ int p = ctx->emit_pos;
+ emit_gen(ctx, cond ? JCOND : JUMP, ENULL, ENULL, 0);
+ return p;
+}
+
+static void patch_jump( emit_ctx *ctx, int jpos ) {
+ ctx->instrs[jpos].size_offs = ctx->emit_pos - (jpos + 1);
+}
+
+static emit_block *alloc_block( emit_ctx *ctx ) {
+ return hl_zalloc(&ctx->jit->falloc, sizeof(emit_block));
+}
+
+static void block_add_pred( emit_ctx *ctx, emit_block *b, emit_block *p ) {
+ blocks_add(b->preds,p);
+ blocks_add(p->nexts,b);
+ jit_debug(" PRED #%d\n",p->id);
+}
+
+static void store_block_var( emit_ctx *ctx, emit_block *b, vreg *r, ereg v ) {
+ if( IS_NULL(v) ) jit_assert();
+ vreg_replace(b->written_vars,r->id,v);
+ if( v.index < 0 ) {
+ tmp_phi *p = GET_PHI(v);
+ p->ref_blocks = link_add_sort_unique(ctx,b->id,b,p->ref_blocks);
+ }
+}
+
+static void split_block( emit_ctx *ctx ) {
+ emit_block *b = alloc_block(ctx);
+ b->sealed = true;
+ b->id = ctx->current_block->id + 1;
+ b->start_pos = ctx->emit_pos;
+ jit_debug("BLOCK #%d@%X[%X]\n",b->id,b->start_pos,ctx->op_pos);
+ while( ctx->arrival_points && ctx->arrival_points->id == ctx->op_pos ) {
+ block_add_pred(ctx, b, (emit_block*)ctx->arrival_points->ptr);
+ ctx->arrival_points = ctx->arrival_points->next;
+ }
+ bool dead_code = blocks_count(b->preds) == 0; // if we have no reach, force previous block dependency, this is rare dead code emit by compiler
+ einstr *eprev = &ctx->instrs[ctx->emit_pos-1];
+ if( (eprev->op != JUMP && eprev->op != RET && eprev->mode != M_NORET) || ctx->fun->ops[ctx->op_pos].op == OTrap || dead_code )
+ block_add_pred(ctx, b, ctx->current_block);
+ ctx->current_block->end_pos = ctx->emit_pos - 1;
+ ctx->current_block = b;
+}
+
+static void register_jump( emit_ctx *ctx, int jpos, int offs ) {
+ int target = offs + ctx->op_pos + 1;
+ int_alloc_store(&ctx->jump_regs, jpos);
+ int_alloc_store(&ctx->jump_regs, target);
+ if( offs > 0 ) {
+ ctx->arrival_points = link_add_sort_unique(ctx, target, ctx->current_block, ctx->arrival_points);
+ if( ctx->arrival_points->id != ctx->op_pos + 1 && ctx->fun->ops[ctx->op_pos].op != OSwitch )
+ split_block(ctx);
+ }
+}
+
+static ereg emit_load_const( emit_ctx *ctx, uint64 value, hl_type *size_t ) {
+ einstr *e = emit_instr(ctx, LOAD_IMM);
+ e->mode = hl_type_mode(size_t);
+ e->value = value;
+ return new_value(ctx);
+}
+
+static ereg emit_load_mem( emit_ctx *ctx, ereg v, int offset, hl_type *size_t ) {
+ einstr *e = emit_instr(ctx, LOAD_ADDR);
+ e->mode = hl_type_mode(size_t);
+ e->a = v;
+ e->b = ENULL;
+ e->size_offs = offset;
+ return new_value(ctx);
+}
+
+static void emit_store_reg( emit_ctx *ctx, vreg *to, ereg v ) {
+ if( to->t->kind == HVOID ) return;
+ if( IS_NULL(v) ) jit_assert();
+ store_block_var(ctx,ctx->current_block,to,v);
+}
+
+static ereg emit_native_call( emit_ctx *ctx, void *native_ptr, ereg args[], int nargs, hl_type *ret ) {
+ einstr *e = emit_instr(ctx, CALL_PTR);
+ e->mode = (unsigned char)(ret ? hl_type_mode(ret) : M_NORET);
+ e->value = (int_val)native_ptr;
+ store_args(ctx, e, args, nargs);
+ return ret == NULL || e->mode == M_VOID ? ENULL : new_value(ctx);
+}
+
+static ereg emit_dyn_call( emit_ctx *ctx, ereg f, ereg args[], int nargs, hl_type *ret ) {
+ einstr *e = emit_instr(ctx, CALL_REG);
+ e->mode = hl_type_mode(ret);
+ e->a = f;
+ store_args(ctx, e, args, nargs);
+ return e->mode == M_VOID ? ENULL : new_value(ctx);
+}
+
+static void emit_test( emit_ctx *ctx, ereg v, hl_op o ) {
+ emit_gen_ext(ctx, TEST, v, ENULL, 0, o);
+ patch_instr_mode(ctx, GET_MODE(v));
+}
+
+static void phi_remove_val( emit_ctx *ctx, tmp_phi *p, ereg v ) {
+ ereg_remove(&p->vals,v);
+ jit_debug("%sPHI-REM-DEP %s = %s\n", phi_prefix(ctx), val_str(p->value), val_str(v));
+}
+
+static void phi_add_val( emit_ctx *ctx, tmp_phi *p, ereg v ) {
+ if( !p->b ) jit_assert();
+ if( IS_NULL(v) ) jit_assert();
+ if( p->value.index == v.index )
+ return;
+ if( !ereg_add(p->vals,v) )
+ return;
+ jit_debug("%sPHI-DEP %s = %s\n", phi_prefix(ctx), val_str(p->value), val_str(v));
+ if( v.index < 0 ) {
+ tmp_phi *p2 = GET_PHI(v);
+ phi_add(p2->ref_phis,p);
+ }
+}
+
+static ereg optimize_phi_rec( emit_ctx *ctx, tmp_phi *p ) {
+
+ if( p->locked ) jit_assert();
+ ereg same = ENULL;
+ for_iter(ereg,v,p->vals) {
+ if( v.index == same.index || v.index == p->value.index )
+ continue;
+ if( !IS_NULL(same) )
+ return p->value;
+ same = v;
+ }
+ if( IS_NULL(same) )
+ return p->value; // sealed (no dep yet)
+
+ if( !phi_count(p->ref_phis) && !p->ref_blocks )
+ return same;
+
+ if( p->locked || p->opt ) jit_assert();
+
+ jit_debug("%sPHI-OPT %s = %s\n", phi_prefix(ctx), val_str(p->value), val_str(same));
+ p->opt = true;
+ ctx->phi_depth++;
+ linked_inf *l = p->ref_blocks;
+ while( l ) {
+ emit_block *b = (emit_block*)l->ptr;
+ if( vreg_find(b->written_vars,p->r->id).index == p->value.index )
+ store_block_var(ctx,b,p->r,same);
+ l = l->next;
+ }
+ for_iter(phi,p2,p->ref_phis) {
+ phi_remove_val(ctx,p2,p->value);
+ phi_add_val(ctx,p2,same);
+ }
+ p->ref_blocks = NULL;
+ int count = phi_count(p->ref_phis);
+ tmp_phi **phis = phi_free(&p->ref_phis);
+ for(int i=0;iphi_depth--;
+ jit_debug("%sPHI-OPT-DONE %s = %s\n", phi_prefix(ctx), val_str(p->value), val_str(same));
+ return optimize_phi_rec(ctx,p);
+}
+
+static ereg emit_load_reg_block( emit_ctx *ctx, emit_block *b, vreg *r );
+
+static ereg gather_phis( emit_ctx *ctx, tmp_phi *p ) {
+ p->locked = true;
+ for_iter(blocks,b,p->b->preds) {
+ ereg r = p->r ? emit_load_reg_block(ctx, b, p->r) : p->value;
+ phi_add_val(ctx, p, r);
+ }
+ p->locked = false;
+ return optimize_phi_rec(ctx, p);
+}
+
+static ereg emit_load_reg_block( emit_ctx *ctx, emit_block *b, vreg *r ) {
+ ereg v = vreg_find(b->written_vars,r->id);
+ if( !IS_NULL(v) )
+ return v;
+ if( !b->sealed ) {
+ tmp_phi *p = alloc_phi(ctx,b,r);
+ jit_debug("%sPHI-SEALED %s = R%d\n",phi_prefix(ctx),val_str(p->value),r->id);
+ v = p->value;
+ } else if( blocks_count(b->preds) == 1 )
+ v = emit_load_reg_block(ctx, blocks_get(b->preds,0), r);
+ else {
+ tmp_phi *p = alloc_phi(ctx,b,r);
+ store_block_var(ctx,b,r,p->value);
+ v = gather_phis(ctx, p);
+ }
+ store_block_var(ctx,b,r,v);
+ return v;
+}
+
+static void emit_walk_blocks_rec( emit_ctx *ctx, emit_block *b, int mark, void (*fun)(emit_ctx*,emit_block*) ) {
+ if( b->mark == mark ) return;
+ b->mark = mark;
+ fun(ctx, b);
+ for_iter(blocks,n,b->nexts)
+ emit_walk_blocks_rec(ctx,n,mark,fun);
+}
+
+static void emit_walk_blocks( emit_ctx *ctx, void (*fun)(emit_ctx*,emit_block*) ) {
+ static int MARK_UID = 0;
+ int mark = ++MARK_UID;
+ if( mark == 0 ) mark = ++MARK_UID;
+ emit_walk_blocks_rec(ctx, ctx->root_block, mark, fun);
+}
+
+static ereg emit_load_reg( emit_ctx *ctx, vreg *r ) {
+ ereg ref = resolve_ref(ctx, r->id);
+ if( ref.index >= 0 )
+ return LOAD_MEM(ref,0,r->t);
+ return emit_load_reg_block(ctx, ctx->current_block, r);
+}
+
+static void seal_block( emit_ctx *ctx, emit_block *b ) {
+ jit_debug(" SEAL #%d\n",b->id);
+ for_iter(phi,p,b->phis)
+ gather_phis(ctx, p);
+ b->sealed = true;
+}
+
+static ereg emit_phi( emit_ctx *ctx, ereg v1, ereg v2 ) {
+ unsigned char mode = GET_MODE(v1);
+ if( mode != GET_MODE(v2) ) jit_assert();
+ tmp_phi *p = alloc_phi(ctx, ctx->current_block, NULL);
+ p->mode = mode;
+ phi_add_val(ctx, p, v1);
+ phi_add_val(ctx, p, v2);
+ return p->value;
+}
+
+static void emit_call_fun( emit_ctx *ctx, vreg *dst, int findex, int count, int *args_regs ) {
+ hl_module *m = ctx->mod;
+ int fid = m->functions_indexes[findex];
+ bool isNative = fid >= m->code->nfunctions;
+ ereg *args = get_tmp_args(ctx, count);
+ for(int i=0;ifunctions_ptrs[findex], args, count, dst->t));
+ else {
+ einstr *e = emit_instr(ctx, CALL_FUN);
+ e->mode = hl_type_mode(dst->t);
+ e->a.index = findex;
+ store_args(ctx, e, args, count);
+ STORE(dst, e->mode == M_VOID ? ENULL : new_value(ctx));
+ }
+}
+
+static vclosure *alloc_static_closure( emit_ctx *ctx, int fid ) {
+ hl_module *m = ctx->mod;
+ vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure));
+ int fidx = m->functions_indexes[fid];
+ c->hasValue = 0;
+ if( fidx >= m->code->nfunctions ) {
+ // native
+ c->t = m->code->natives[fidx - m->code->nfunctions].t;
+ c->fun = m->functions_ptrs[fid];
+ c->value = NULL;
+ } else {
+ c->t = m->code->functions[fidx].type;
+ c->fun = (void*)(int_val)fid;
+ c->value = ctx->closure_list;
+ ctx->closure_list = c;
+ }
+ return c;
+}
+
+static void *get_dynget( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_getf;
+ case HF64:
+ return hl_dyn_getd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_geti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_geti;
+ default:
+ return hl_dyn_getp;
+ }
+}
+
+static void *get_dynset( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_setf;
+ case HF64:
+ return hl_dyn_setd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_seti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_seti;
+ default:
+ return hl_dyn_setp;
+ }
+}
+
+static void *get_dyncast( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_castf;
+ case HF64:
+ return hl_dyn_castd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_casti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_casti;
+ default:
+ return hl_dyn_castp;
+ }
+}
+
+static void emit_store_size( emit_ctx *ctx, ereg dst, int dst_offset, ereg src, int src_offset, int total_size ) {
+ int offset = 0;
+ while( offset < total_size) {
+ int remain = total_size - offset;
+ hl_type *ct = remain >= HL_WSIZE ? &hlt_bytes : (remain >= 4 ? &hlt_i32 : &hlt_ui8);
+ STORE_MEM(dst, dst_offset+offset, LOAD_MEM(src,src_offset+offset,ct));
+ offset += hl_type_size(ct);
+ }
+}
+
+static ereg emit_conv( emit_ctx *ctx, ereg v, emit_mode mode, bool _unsigned ) {
+ return emit_gen(ctx, _unsigned ? CONV_UNSIGNED : CONV, v, ENULL, mode);
+}
+
+static bool dyn_need_type( hl_type *t ) {
+ return !(IS_FLOAT(t) || t->kind == HI64 || t->kind == HGUID);
+}
+
+static ereg emit_dyn_cast( emit_ctx *ctx, ereg v, hl_type *t, hl_type *dt ) {
+ if( t->kind == HNULL && t->tparam->kind == dt->kind ) {
+ emit_test(ctx, v, OJNotNull);
+ int jnot = emit_jump(ctx, false);
+ ereg v1 = LOAD_CONST(0,dt);
+ int jend = emit_jump(ctx, true);
+ patch_jump(ctx, jnot);
+ ereg v2 = LOAD_MEM(v,0,dt);
+ patch_jump(ctx, jend);
+ return emit_phi(ctx, v1, v2);
+ }
+ bool need_dyn = dyn_need_type(dt);
+ ereg st = emit_gen_size(ctx, ALLOC_STACK, 1);
+ STORE_MEM(st, 0, v);
+ ereg args[3];
+ args[0] = st;
+ args[1] = LOAD_CONST_PTR(t);
+ if( need_dyn ) args[2] = LOAD_CONST_PTR(dt);
+ ereg r = emit_native_call(ctx, get_dyncast(dt), args, need_dyn ? 3 : 2, dt);
+ emit_gen_size(ctx, FREE_STACK, 1);
+ return r;
+}
+
+static void emit_opcode( emit_ctx *ctx, hl_opcode *o );
+
+static void remap_phi_reg( emit_ctx *ctx, ereg *r ) {
+ if( r->index >= 0 || IS_NULL(*r) )
+ return;
+ tmp_phi *p = GET_PHI(*r);
+ while( p->final_id < 0 ) {
+ if( p->target.index >= 0 ) {
+ r->index = p->target.index;
+ return;
+ }
+ p = GET_PHI(p->target);
+ }
+ if( p->final_id == 0 )
+ return;
+ r->index = -p->final_id; // new phis
+}
+
+static void emit_write_block( emit_ctx *ctx, emit_block *b ) {
+ jit_ctx *jit = ctx->jit;
+ eblock *bl = jit->blocks + b->id;
+ bl->id = b->id;
+ bl->start_pos = b->start_pos;
+ bl->end_pos = b->end_pos;
+ bl->pred_count = blocks_count(b->preds);
+ bl->next_count = blocks_count(b->nexts);
+ bl->preds = (int*)hl_malloc(&jit->falloc,sizeof(int)*bl->pred_count);
+ bl->nexts = (int*)hl_malloc(&jit->falloc,sizeof(int)*bl->next_count);
+ for(int i=0;ipred_count;i++)
+ bl->preds[i++] = blocks_get(b->preds,i)->id;
+ for(int i=0;inext_count;i++)
+ bl->nexts[i++] = blocks_get(b->nexts,i)->id;
+ // write phis
+ {
+ for_iter(phi,p,b->phis)
+ if( p->final_id >= 0 )
+ bl->phi_count++;
+ }
+ bl->phis = (ephi*)hl_zalloc(&jit->falloc,sizeof(ephi)*bl->phi_count);
+ int i = 0;
+ for_iter(phi,p,b->phis) {
+ if( p->final_id < 0 )
+ continue;
+ ephi *p2 = bl->phis + i++;
+ if( p->final_id == 0 )
+ p2->value = p->value;
+ else
+ p2->value.index = -p->final_id;
+ p2->nvalues = ereg_count(p->vals);
+ p2->values = (ereg*)hl_malloc(&jit->falloc,sizeof(ereg)*p2->nvalues);
+ int k = 0;
+ for_iter(ereg,v,p->vals) {
+ remap_phi_reg(ctx, &v);
+ p2->values[k++] = v;
+ }
+ }
+}
+
+void hl_emit_flush( jit_ctx *jit ) {
+ emit_ctx *ctx = jit->emit;
+ int i = 0;
+ if( ctx->flushed ) return;
+ ctx->flushed = true;
+ while( i < ctx->jump_regs.cur ) {
+ int pos = ctx->jump_regs.data[i++];
+ einstr *e = ctx->instrs + pos;
+ int target = ctx->jump_regs.data[i++];
+ e->size_offs = ctx->pos_map[target] - (pos + 1);
+ }
+ ctx->pos_map[ctx->fun->nops] = -1;
+ ctx->current_block->end_pos = ctx->emit_pos - 1;
+ jit->instrs = ctx->instrs;
+ jit->instr_count = ctx->emit_pos;
+ jit->emit_pos_map = ctx->pos_map;
+ jit->block_count = ctx->current_block->id + 1;
+ jit->blocks = hl_zalloc(&jit->falloc,sizeof(eblock) * jit->block_count);
+ for(i=0;iblock_count;i++)
+ jit->blocks[i].id = -1;
+ jit->value_count = ctx->values.cur;
+ jit->values_writes = ctx->values.data;
+ emit_walk_blocks(ctx,emit_write_block);
+}
+
+static void hl_iter_instr_reg( einstr *e, void *ctx, void (*iter_reg)( void *, ereg * ) ) {
+ switch( e->op ) {
+ case CALL_REG:
+ iter_reg(ctx,&e->a);
+ case CALL_FUN:
+ case CALL_PTR:
+ {
+ int i;
+ ereg *args = hl_emit_get_args(ctx, e);
+ for(i=0;inargs;i++)
+ iter_reg(ctx, args + i);
+ }
+ break;
+ case LOAD_IMM:
+ // skip
+ break;
+ default:
+ if( !IS_NULL(e->a) ) {
+ iter_reg(ctx,&e->a);
+ if( !IS_NULL(e->b) )
+ iter_reg(ctx,&e->b);
+ }
+ break;
+ }
+}
+
+static void hl_emit_clean_phis( emit_ctx *ctx ) {
+ for(int i=0;iphi_count;i++) {
+ tmp_phi *p = ctx->phis[i];
+ tmp_phi *cur = p;
+ ereg r;
+ while( true ) {
+ cur->opt = false;
+ r = optimize_phi_rec(ctx,cur);
+ if( r.index >= 0 || r.index == cur->value.index ) break;
+ cur = GET_PHI(r);
+ }
+ p->target = r;
+ }
+ int new_phis = 0;
+ for(int i=0;iphi_count;i++) {
+ tmp_phi *p = ctx->phis[i];
+ if( p->target.index == p->value.index )
+ p->final_id = ++new_phis;
+ else
+ p->final_id = -1;
+ }
+ for(int i=0;iemit_pos;i++)
+ hl_iter_instr_reg(ctx->instrs + i, ctx, remap_phi_reg);
+}
+
+void hl_emit_function( jit_ctx *jit ) {
+ emit_ctx *ctx = jit->emit;
+ hl_function *f = jit->fun;
+ int i;
+ ctx->mod = jit->mod;
+ ctx->fun = f;
+ ctx->emit_pos = 0;
+ ctx->trap_count = 0;
+ ctx->ref_count = 0;
+ ctx->phi_count = 0;
+ ctx->flushed = false;
+ int_alloc_reset(&ctx->args_data);
+ int_alloc_reset(&ctx->jump_regs);
+ int_alloc_reset(&ctx->values);
+ ctx->root_block = ctx->current_block = alloc_block(ctx);
+ ctx->current_block->sealed = true;
+ ctx->arrival_points = NULL;
+ jit_debug("---- begin [%X] ----\n",f->findex);
+ if( f->nregs > ctx->max_regs ) {
+ free(ctx->vregs);
+ ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1));
+ if( ctx->vregs == NULL ) jit_assert();
+ for(i=0;inregs;i++)
+ R(i)->id = i;
+ ctx->max_regs = f->nregs;
+ }
+
+ if( f->nops >= ctx->pos_map_size ) {
+ free(ctx->pos_map);
+ ctx->pos_map = (int*)malloc(sizeof(int) * (f->nops+1));
+ if( ctx->pos_map == NULL ) jit_assert();
+ ctx->pos_map_size = f->nops;
+ }
+
+ for(i=0;inregs;i++) {
+ vreg *r = R(i);
+ r->t = f->regs[i];
+ }
+
+ for(i=0;itype->fun->nargs;i++) {
+ hl_type *t = f->type->fun->args[i];
+ if( t->kind == HVOID ) continue;
+ STORE(R(i), emit_gen(ctx, LOAD_ARG, ENULL, ENULL, hl_type_mode(t)));
+ }
+
+ for(i=f->nops-1;i>=0;i--) {
+ hl_opcode *o = f->ops + i;
+ if( o->op == ORef ) {
+ ereg ref = resolve_ref(ctx, o->p2);
+ if( ref.index >= 0 ) continue;
+ if( ctx->ref_count == MAX_REFS ) jit_error("Too many refs");
+ ctx->refs[ctx->ref_count].r = emit_gen_size(ctx, ALLOC_STACK, hl_type_size(R(o->p2)->t));
+ ctx->refs[ctx->ref_count].reg = o->p2;
+ ctx->ref_count++;
+ }
+ }
+
+ for(int op_pos=0;op_posnops;op_pos++) {
+ ctx->op_pos = op_pos;
+ ctx->pos_map[op_pos] = ctx->emit_pos;
+ if( ctx->arrival_points && ctx->arrival_points->id == op_pos )
+ split_block(ctx);
+ emit_opcode(ctx,f->ops + op_pos);
+ }
+
+ hl_emit_clean_phis(ctx);
+ hl_emit_flush(ctx->jit);
+}
+
+void hl_emit_alloc( jit_ctx *jit ) {
+ emit_ctx *ctx = (emit_ctx*)malloc(sizeof(emit_ctx));
+ if( ctx == NULL ) jit_assert();
+ memset(ctx,0,sizeof(emit_ctx));
+ ctx->jit = jit;
+ jit->emit = ctx;
+ if( sizeof(einstr) != 16 ) jit_assert();
+}
+
+void hl_emit_free( jit_ctx *jit ) {
+ emit_ctx *ctx = jit->emit;
+ free(ctx->vregs);
+ free(ctx->instrs);
+ free(ctx->pos_map);
+ int_alloc_free(&ctx->jump_regs);
+ int_alloc_free(&ctx->args_data);
+ int_alloc_free(&ctx->values);
+ free(ctx);
+ jit->emit = NULL;
+}
+
+static bool seal_block_rec( emit_ctx *ctx, emit_block *b, int target ) {
+ if( b->start_pos < target )
+ return false;
+ if( b->start_pos == target ) {
+ b->wait_nexts--;
+ block_add_pred(ctx, b, ctx->current_block);
+ while( b && b->wait_nexts == 0 && ctx->wait_seal == b ) {
+ seal_block(ctx,b);
+ b = b->wait_seal_next;
+ ctx->wait_seal = b;
+ }
+ return true;
+ }
+ for_iter(blocks,p,b->preds)
+ if( p->start_pos < b->start_pos && seal_block_rec(ctx,p,target) )
+ return true;
+ return false;
+}
+
+static void register_block_jump( emit_ctx *ctx, int offs, bool cond ) {
+ int jidx = emit_jump(ctx, cond);
+ register_jump(ctx, jidx, offs);
+ if( offs < 0 ) {
+ int target = ctx->pos_map[ctx->op_pos + 1 + offs];
+ emit_block *b = ctx->current_block;
+ if( !seal_block_rec(ctx, b, target) ) jit_assert();
+ }
+}
+
+static void prepare_loop_block( emit_ctx *ctx ) {
+ int i, last_jump = -1;
+ emit_block *b = ctx->current_block;
+ // gather all backward jumps to know when the block will be finished
+ for(i=ctx->op_pos+1;ifun->nops;i++) {
+ hl_opcode *op = &ctx->fun->ops[i];
+ int offs = 0;
+ switch( op->op ) {
+ case OJFalse:
+ case OJTrue:
+ case OJNotNull:
+ case OJNull:
+ offs = op->p2;
+ break;
+ case OJAlways:
+ offs = op->p1;
+ break;
+ case OJEq:
+ case OJNotEq:
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJULt:
+ case OJUGte:
+ case OJNotLt:
+ case OJNotGte:
+ offs = op->p3;
+ break;
+ default:
+ break;
+ }
+ if( offs < 0 && i + 1 + offs == ctx->op_pos ) {
+ jit_debug(" WAIT @%X\n",i);
+ b->wait_nexts++;
+ if( b->sealed ) {
+ b->sealed = false;
+ b->wait_seal_next = ctx->wait_seal;
+ ctx->wait_seal = b;
+ }
+ last_jump = i;
+ }
+ }
+}
+
+static void emit_opcode( emit_ctx *ctx, hl_opcode *o ) {
+ vreg *dst = R(o->p1);
+ vreg *ra = R(o->p2);
+ vreg *rb = R(o->p3);
+ hl_module *m = ctx->mod;
+#ifdef HL_DEBUG
+ int uid = (ctx->fun->findex << 16) | ctx->op_pos;
+ __ignore(&uid);
+#endif
+ switch( o->op ) {
+ case OMov:
+ case OUnsafeCast:
+ STORE(dst, emit_gen(ctx,MOV,LOAD(ra),ENULL,hl_type_mode(ra->t)));
+ break;
+ case OInt:
+ STORE(dst, LOAD_CONST(m->code->ints[o->p2], dst->t));
+ break;
+ case OBool:
+ STORE(dst, LOAD_CONST(o->p2, &hlt_bool));
+ break;
+ case ONull:
+ STORE(dst, LOAD_CONST(0, dst->t));
+ break;
+ case OFloat:
+ {
+ union {
+ float f;
+ double d;
+ uint64 i;
+ } v;
+ if( dst->t->kind == HF32 )
+ v.f = (float)m->code->floats[o->p2];
+ else
+ v.d = m->code->floats[o->p2];
+ STORE(dst, LOAD_CONST(v.i, dst->t));
+ }
+ break;
+ case OString:
+ STORE(dst, LOAD_CONST_PTR(hl_get_ustring(m->code,o->p2)));
+ break;
+ case OBytes:
+ {
+ char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2];
+ STORE(dst,LOAD_CONST_PTR(b));
+ }
+ break;
+ case OGetGlobal:
+ {
+ void *addr = m->globals_data + m->globals_indexes[o->p2];
+ STORE(dst, LOAD_MEM_PTR(LOAD_CONST_PTR(addr),0));
+ }
+ break;
+ case OSetGlobal:
+ {
+ void *addr = m->globals_data + m->globals_indexes[o->p1];
+ STORE_MEM(LOAD_CONST_PTR(addr),0,LOAD(ra));
+ }
+ break;
+ case OCall0:
+ emit_call_fun(ctx, dst, o->p2, 0, NULL);
+ break;
+ case OCall1:
+ emit_call_fun(ctx, dst, o->p2, 1, &o->p3);
+ break;
+ case OCall2:
+ {
+ int args[2] = { o->p3, (int)(int_val)o->extra };
+ emit_call_fun(ctx, dst, o->p2, 2, args);
+ }
+ break;
+ case OCall3:
+ {
+ int args[3] = { o->p3, o->extra[0], o->extra[1] };
+ emit_call_fun(ctx, dst, o->p2, 3, args);
+ }
+ break;
+ case OCall4:
+ {
+ int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] };
+ emit_call_fun(ctx, dst, o->p2, 4, args);
+ }
+ break;
+ case OCallN:
+ emit_call_fun(ctx, dst, o->p2, o->p3, o->extra);
+ break;
+ case OSub:
+ case OAdd:
+ case OMul:
+ case OSDiv:
+ case OUDiv:
+ case OShl:
+ case OSShr:
+ case OUShr:
+ case OAnd:
+ case OOr:
+ case OXor:
+ case OSMod:
+ case OUMod:
+ {
+ ereg va = LOAD(ra);
+ ereg vb = LOAD(rb);
+ STORE(dst, emit_gen_ext(ctx, BINOP, va, vb, hl_type_mode(dst->t), o->op));
+ }
+ break;
+ case ONeg:
+ case ONot:
+ STORE(dst, emit_gen_ext(ctx, UNOP, LOAD(ra), ENULL, hl_type_mode(dst->t), o->op));
+ break;
+ case OJFalse:
+ case OJTrue:
+ case OJNotNull:
+ case OJNull:
+ {
+ emit_test(ctx, LOAD(dst), o->op);
+ register_block_jump(ctx, o->p2, true);
+ }
+ break;
+ case OJEq:
+ case OJNotEq:
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJULt:
+ case OJUGte:
+ case OJNotLt:
+ case OJNotGte:
+ {
+ emit_gen_ext(ctx, CMP, LOAD(dst), LOAD(ra), 0, o->op);
+ patch_instr_mode(ctx, hl_type_mode(dst->t));
+ register_block_jump(ctx, o->p3, true);
+ }
+ break;
+ case OJAlways:
+ register_block_jump(ctx, o->p1, false);
+ break;
+ case OToDyn:
+ if( ra->t->kind == HBOOL ) {
+ ereg arg = LOAD(ra);
+ STORE(dst, emit_native_call(ctx,hl_alloc_dynbool,&arg,1,&hlt_dyn));
+ } else {
+ ereg arg = LOAD_CONST_PTR(ra->t);
+ ereg ret = emit_native_call(ctx,hl_alloc_dynamic,&arg,1,&hlt_dyn);
+ STORE_MEM(ret,HDYN_VALUE,LOAD(ra));
+ STORE(dst, ret);
+ }
+ break;
+ case OToSFloat:
+ case OToInt:
+ case OToUFloat:
+ STORE(dst, emit_conv(ctx,LOAD(ra),hl_type_mode(dst->t), o->op == OToUFloat));
+ break;
+ case ORet:
+ emit_gen(ctx, RET, dst->t->kind == HVOID ? ENULL : LOAD(dst), ENULL, M_NORET);
+ patch_instr_mode(ctx, hl_type_mode(dst->t));
+ break;
+ case OIncr:
+ case ODecr:
+ {
+ if( IS_FLOAT(dst->t) ) {
+ jit_assert();
+ } else {
+ STORE(dst, emit_gen_ext(ctx,UNOP,LOAD(dst),ENULL,hl_type_mode(dst->t),o->op));
+ }
+ }
+ break;
+ case ONew:
+ {
+ ereg arg = ENULL;
+ void *allocFun = NULL;
+ int nargs = 1;
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ allocFun = hl_alloc_obj;
+ break;
+ case HDYNOBJ:
+ allocFun = hl_alloc_dynobj;
+ nargs = 0;
+ break;
+ case HVIRTUAL:
+ allocFun = hl_alloc_virtual;
+ break;
+ default:
+ jit_assert();
+ }
+ if( nargs ) arg = LOAD_CONST_PTR(dst->t);
+ STORE(dst, emit_native_call(ctx,allocFun,&arg,nargs,dst->t));
+ }
+ break;
+ case OInstanceClosure:
+ {
+ ereg args[3];
+ args[0] = LOAD_CONST_PTR(m->code->functions[m->functions_indexes[o->p2]].type);
+ // TODO : WRITE (emit_pos + op_count) to process later and replace address !
+ args[1] = LOAD_CONST_PTR(0);
+ args[2] = LOAD(rb);
+ STORE(dst, emit_native_call(ctx,hl_alloc_closure_ptr,args,3,dst->t));
+ }
+ break;
+ case OVirtualClosure:
+ {
+ hl_type *t = NULL;
+ hl_type *ot = ra->t;
+ while( t == NULL ) {
+ int i;
+ for(i=0;iobj->nproto;i++) {
+ hl_obj_proto *pp = ot->obj->proto + i;
+ if( pp->pindex == o->p3 ) {
+ t = m->code->functions[m->functions_indexes[pp->findex]].type;
+ break;
+ }
+ }
+ ot = ot->obj->super;
+ }
+ ereg args[3];
+ ereg obj = LOAD(ra);
+ args[0] = LOAD_CONST_PTR(t);
+ args[1] = LOAD_OBJ_METHOD(obj,o->p3);
+ args[2] = obj;
+ STORE(dst, emit_native_call(ctx,hl_alloc_closure_ptr,args,3,dst->t));
+ }
+ break;
+ case OCallClosure:
+ if( ra->t->kind == HDYN ) {
+ int i;
+ ereg st = emit_gen_size(ctx, ALLOC_STACK, o->p3);
+ for(i=0;ip3;i++) {
+ vreg *r = R(o->extra[i]);
+ if( !hl_is_dynamic(r->t) ) jit_assert();
+ STORE_MEM(st,i*HL_WSIZE,LOAD(r));
+ }
+ ereg args[3];
+ args[0] = LOAD(ra);
+ args[1] = st;
+ args[2] = LOAD_CONST(o->p3,&hlt_i32);
+ STORE(dst, emit_dyn_cast(ctx,emit_native_call(ctx,hl_dyn_call,args,3,dst->t),ra->t,dst->t));
+ emit_gen_size(ctx, FREE_STACK, o->p3);
+ } else {
+ ereg r = LOAD(ra);
+ ereg *args = get_tmp_args(ctx,o->p3+1);
+ // Code for if( c->hasValue ) c->fun(c->value,args) else c->fun(args)
+ ereg has = LOAD_MEM(r,HL_WSIZE*2,&hlt_i32);
+ emit_test(ctx, has, OJNull);
+ int jidx = emit_jump(ctx, true);
+ int i;
+ args[0] = LOAD_MEM_PTR(r,HL_WSIZE * 3);
+ for(i=0;ip3;i++)
+ args[i+1] = LOAD(R(o->extra[i]));
+ ereg v1 = emit_dyn_call(ctx,LOAD_MEM_PTR(r,HL_WSIZE),args,o->p3 + 1,dst->t);
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+ for(i=0;ip3;i++)
+ args[i] = LOAD(R(o->extra[i]));
+ ereg v2 = emit_dyn_call(ctx,LOAD_MEM_PTR(r,HL_WSIZE),args,o->p3,dst->t);
+ patch_jump(ctx, jend);
+ if( dst->t->kind != HVOID ) STORE(dst, emit_phi(ctx,v1,v2));
+ }
+ break;
+ case OStaticClosure:
+ {
+ vclosure *c = alloc_static_closure(ctx,o->p2);
+ STORE(dst, LOAD_CONST_PTR(c));
+ }
+ break;
+ case OField:
+ {
+ switch( ra->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(ra->t);
+ ereg r = LOAD(ra);
+ if( dst->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t;
+ if( ft->kind == HPACKED ) {
+ STORE(dst,OFFSET(r, ENULL, 0, rt->fields_indexes[o->p3]));
+ break;
+ }
+ }
+ STORE(dst, LOAD_MEM(r,rt->fields_indexes[o->p3],dst->t));
+ }
+ break;
+ case HVIRTUAL:
+ // code for : if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt)
+ {
+ ereg obj = LOAD(ra);
+ ereg field = LOAD_MEM_PTR(obj,sizeof(vvirtual)+HL_WSIZE*o->p3);
+ emit_test(ctx, field, OJNull);
+ int jidx = emit_jump(ctx, true);
+ ereg v1 = LOAD_MEM(field,0,dst->t);
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+ bool need_type = dyn_need_type(dst->t);
+ ereg args[3];
+ args[0] = obj;
+ args[1] = LOAD_CONST(ra->t->virt->fields[o->p3].hashed_name,&hlt_i32);
+ if( need_type ) args[2] = LOAD_CONST_PTR(dst->t);
+ ereg v2 = emit_native_call(ctx,get_dynget(dst->t),args,need_type?3:2,dst->t);
+ patch_jump(ctx, jend);
+ STORE(dst, emit_phi(ctx, v1, v2));
+ }
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ }
+ break;
+ case OSetField:
+ {
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ ereg obj = LOAD(dst);
+ ereg val = LOAD(rb);
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ int field_pos = rt->fields_indexes[o->p2];
+ if( rb->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(dst->t,o->p2)->t;
+ if( ft->kind == HPACKED ) {
+ emit_store_size(ctx,obj,field_pos,val,0,hl_get_obj_rt(ft->tparam)->size);
+ break;
+ }
+ }
+ STORE_MEM(obj,field_pos, val);
+ }
+ break;
+ case HVIRTUAL:
+ // code for : if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v)
+ {
+ ereg obj = LOAD(dst);
+ ereg val = LOAD(rb);
+ ereg field = LOAD_MEM_PTR(obj,sizeof(vvirtual)+HL_WSIZE*o->p2);
+ emit_test(ctx, field, OJNull);
+ int jidx = emit_jump(ctx, true);
+ STORE_MEM(field, 0, val);
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+ bool need_type = dyn_need_type(dst->t);
+ ereg args[4];
+ args[0] = obj;
+ args[1] = LOAD_CONST(dst->t->virt->fields[o->p2].hashed_name,&hlt_i32);
+ if( need_type ) {
+ args[2] = LOAD_CONST_PTR(rb->t);
+ args[3] = val;
+ } else {
+ args[2] = val;
+ }
+ emit_native_call(ctx,get_dynset(dst->t),args,need_type?4:3,dst->t);
+ patch_jump(ctx, jend);
+ }
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ }
+ break;
+ case OGetThis:
+ {
+ vreg *r = R(0);
+ ereg obj = LOAD(r);
+ hl_runtime_obj *rt = hl_get_obj_rt(r->t);
+ int field_pos = rt->fields_indexes[o->p2];
+ if( dst->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t;
+ if( ft->kind == HPACKED ) {
+ STORE(dst, OFFSET(obj, ENULL, 0, field_pos));
+ break;
+ }
+ }
+ STORE(dst, LOAD_MEM(obj, field_pos, dst->t));
+ }
+ break;
+ case OSetThis:
+ {
+ vreg *r = R(0);
+ ereg obj = LOAD(r);
+ ereg val = LOAD(ra);
+ hl_runtime_obj *rt = hl_get_obj_rt(r->t);
+ int field_pos = rt->fields_indexes[o->p1];
+ if( ra->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(r->t,o->p1)->t;
+ if( ft->kind == HPACKED ) {
+ emit_store_size(ctx, obj, field_pos, val, 0, hl_get_obj_rt(ft->tparam)->size);
+ break;
+ }
+ }
+ STORE_MEM(obj,field_pos,val);
+ }
+ break;
+ case OCallThis:
+ {
+ int i;
+ int nargs = o->p3 + 1;
+ ereg obj = LOAD(R(0));
+ ereg *args = get_tmp_args(ctx, nargs);
+ args[0] = obj;
+ for(i=1;iextra[i-1]));
+ ereg fun = LOAD_OBJ_METHOD(obj, o->p2);
+ STORE(dst, emit_dyn_call(ctx,fun,args,nargs,dst->t));
+ }
+ break;
+ case OCallMethod:
+ {
+ vreg *r = R(o->extra[0]);
+ ereg obj = LOAD(r);
+ switch( r->t->kind ) {
+ case HOBJ:
+ {
+ int i;
+ int nargs = o->p3;
+ ereg *args = get_tmp_args(ctx, nargs);
+ for(i=0;iextra[i]));
+ ereg fun = LOAD_OBJ_METHOD(obj, o->p2);
+ STORE(dst, emit_dyn_call(ctx,fun,args,nargs,dst->t));
+ }
+ break;
+ case HVIRTUAL:
+ // code for : if( hl_vfields(o)[f] ) dst = *hl_vfields(o)[f](o->value,args...); else dst = hl_dyn_call_obj(o->value,field,args,&ret)
+ {
+ vreg *_o = R(o->extra[0]);
+ ereg obj = LOAD(_o);
+ ereg field = LOAD_MEM_PTR(obj,sizeof(vvirtual)+HL_WSIZE*o->p2);
+ emit_test(ctx, field, OJNull);
+ int jidx = emit_jump(ctx, true);
+
+ int nargs = o->p3;
+ ereg *args = get_tmp_args(ctx, nargs);
+ int i;
+ args[0] = LOAD_MEM_PTR(obj,HL_WSIZE);
+ for(i=1;iextra[i]));
+ ereg v1 = emit_dyn_call(ctx,LOAD_MEM_PTR(field,0),args,nargs,dst->t);
+
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+
+ nargs = o->p3 - 1;
+ ereg eargs = emit_gen_size(ctx, ALLOC_STACK, nargs);
+ for(i=0;iextra[i+1])));
+ bool need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID;
+ int dyn_size = sizeof(vdynamic)/HL_WSIZE;
+ ereg edyn = need_dyn ? emit_gen_size(ctx, ALLOC_STACK, dyn_size) : LOAD_CONST_PTR(NULL);
+
+ args = get_tmp_args(ctx, 4);
+ args[0] = LOAD_MEM_PTR(obj,HL_WSIZE);
+ args[1] = LOAD_CONST(_o->t->virt->fields[o->p2].hashed_name,&hlt_i32);
+ args[2] = eargs;
+ args[3] = edyn;
+
+ ereg v2 = emit_native_call(ctx, hl_dyn_call_obj, args, 4, dst->t);
+
+ emit_gen_size(ctx, FREE_STACK, o->p3 + (need_dyn ? dyn_size : 0));
+ patch_jump(ctx, jend);
+
+ if( dst->t->kind != HVOID ) STORE(dst, emit_phi(ctx, v1, v2));
+ }
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ }
+ break;
+ case OThrow:
+ case ORethrow:
+ {
+ ereg arg = LOAD(dst);
+ emit_native_call(ctx, o->op == OThrow ? hl_throw : hl_rethrow, &arg, 1, NULL);
+ }
+ break;
+ case OLabel:
+ if( ctx->current_block->start_pos != ctx->emit_pos )
+ split_block(ctx);
+ prepare_loop_block(ctx);
+ break;
+ case OGetI8:
+ case OGetI16:
+ case OGetMem:
+ {
+ ereg offs = OFFSET(LOAD(ra),LOAD(rb),1,0);
+ ereg val = LOAD_MEM(offs, 0, dst->t);
+ if( o->op != OGetMem ) val = emit_conv(ctx, val, M_I32, false);
+ STORE(dst, val);
+ }
+ break;
+ case OSetI8:
+ case OSetI16:
+ case OSetMem:
+ {
+ ereg offs = OFFSET(LOAD(dst), LOAD(ra),1,0);
+ ereg val = LOAD(rb);
+ if( o->op != OSetMem ) val = emit_conv(ctx, val, M_I32, false);
+ STORE_MEM(offs, 0, val);
+ }
+ break;
+ case OType:
+ STORE(dst, LOAD_CONST_PTR(m->code->types + o->p2));
+ break;
+ case OGetType:
+ {
+ ereg r = LOAD(ra);
+ emit_test(ctx, r, OJNotNull);
+ int jidx = emit_jump(ctx, true);
+ ereg v1 = LOAD_CONST_PTR(&hlt_void);
+ int jend = emit_jump(ctx, false);
+ patch_jump(ctx, jidx);
+ ereg v2 = LOAD_MEM_PTR(r,0);
+ patch_jump(ctx, jend);
+ STORE(dst, emit_phi(ctx, v1, v2));
+ }
+ break;
+ case OGetArray:
+ {
+ if( ra->t->kind == HABSTRACT ) {
+ int osize;
+ bool isPtr = dst->t->kind != HOBJ && dst->t->kind != HSTRUCT;
+ if( isPtr )
+ osize = HL_WSIZE; // a pointer into the carray
+ else {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ osize = rt->size; // a mem offset into it
+ }
+ ereg pos = OFFSET(LOAD(ra), LOAD(rb), osize, 0);
+ ereg val = isPtr ? LOAD_MEM_PTR(pos,0) : pos;
+ STORE(dst, val);
+ } else {
+ ereg pos = OFFSET(LOAD(ra), LOAD(rb), hl_type_size(dst->t), sizeof(varray));
+ STORE(dst, LOAD_MEM_PTR(pos,0));
+ }
+ }
+ break;
+ case OSetArray:
+ {
+ if( dst->t->kind == HABSTRACT ) {
+ int osize;
+ bool isPtr = rb->t->kind != HOBJ && rb->t->kind != HSTRUCT;
+ if( isPtr) {
+ osize = HL_WSIZE;
+ } else {
+ hl_runtime_obj *rt = hl_get_obj_rt(rb->t);
+ osize = rt->size;
+ }
+ ereg pos = OFFSET(LOAD(dst), LOAD(ra), osize, 0);
+ emit_store_size(ctx, pos, 0, LOAD(rb), 0, osize);
+ } else {
+ ereg pos = OFFSET(LOAD(dst), LOAD(ra), hl_type_size(dst->t), sizeof(varray));
+ STORE_MEM(pos, 0, LOAD(rb));
+ }
+ }
+ break;
+ case OArraySize:
+ STORE(dst, LOAD_MEM(LOAD(ra),HL_WSIZE*2,&hlt_i32));
+ break;
+ case ORef:
+ {
+ ereg ref = resolve_ref(ctx, ra->id);
+ if( IS_NULL(ref) ) jit_assert();
+ ereg r = vreg_find(ctx->current_block->written_vars, ra->id);
+ if( !IS_NULL(r) ) {
+ STORE_MEM(ref, 0, LOAD(ra));
+ vreg_remove(&ctx->current_block->written_vars, ra->id);
+ }
+ STORE(dst, ref);
+ }
+ break;
+ case OUnref:
+ STORE(dst, LOAD_MEM(LOAD(ra),0,dst->t));
+ break;
+ case OSetref:
+ STORE_MEM(LOAD(dst),0,LOAD(ra));
+ break;
+ case ORefData:
+ switch( ra->t->kind ) {
+ case HARRAY:
+ STORE(dst, OFFSET(LOAD(ra),ENULL,0,sizeof(varray)));
+ break;
+ default:
+ jit_assert();
+ }
+ break;
+ case ORefOffset:
+ STORE(dst, OFFSET(LOAD(ra),LOAD(rb), hl_type_size(dst->t->tparam),0));
+ break;
+ case OToVirtual:
+ {
+ ereg args[2];
+ args[0] = LOAD(ra);
+ args[1] = LOAD_CONST_PTR(dst->t);
+ STORE(dst, emit_native_call(ctx,hl_to_virtual,args,2, dst->t));
+ }
+ break;
+ case OMakeEnum:
+ {
+ ereg args[2];
+ args[0] = LOAD_CONST_PTR(dst->t);
+ args[1] = LOAD_CONST(o->p2,&hlt_i32);
+ ereg en = emit_native_call(ctx, hl_alloc_enum, args, 2, dst->t);
+ STORE(dst, en);
+ hl_enum_construct *c = &dst->t->tenum->constructs[o->p2];
+ for(int i=0;inparams;i++)
+ STORE_MEM(en, c->offsets[i], LOAD(R(o->extra[i])));
+ }
+ break;
+ case OEnumAlloc:
+ {
+ ereg args[2];
+ args[0] = LOAD_CONST_PTR(dst->t);
+ args[1] = LOAD_CONST(o->p2,&hlt_i32);
+ STORE(dst, emit_native_call(ctx, hl_alloc_enum, args, 2, dst->t));
+ }
+ break;
+ case OEnumField:
+ {
+ hl_enum_construct *c = &ra->t->tenum->constructs[o->p3];
+ int slot = (int)(int_val)o->extra;
+ STORE(dst, LOAD_MEM(LOAD(ra),c->offsets[slot], dst->t));
+ }
+ break;
+ case OEnumIndex:
+ STORE(dst, LOAD_MEM(LOAD(ra),HL_WSIZE,dst->t));
+ break;
+ case OSetEnumField:
+ {
+ hl_enum_construct *c = &dst->t->tenum->constructs[0];
+ STORE_MEM(LOAD(dst), c->offsets[o->p2], LOAD(rb));
+ }
+ break;
+ case ONullCheck:
+ {
+ emit_test(ctx, LOAD(dst), OJNotNull);
+ int jok = emit_jump(ctx, true);
+
+ // ----- DETECT FIELD ACCESS ----------------
+ hl_function *f = ctx->fun;
+ hl_opcode *next = f->ops + ctx->op_pos + 1;
+ bool null_field_access = false;
+ int hashed_name = 0;
+ // skip const and operation between nullcheck and access
+ while( (next < f->ops + f->nops - 1) && (next->op >= OInt && next->op <= ODecr) ) {
+ next++;
+ }
+ if( (next->op == OField && next->p2 == o->p1) || (next->op == OSetField && next->p1 == o->p1) ) {
+ int fid = next->op == OField ? next->p3 : next->p2;
+ hl_obj_field *f = NULL;
+ if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT )
+ f = hl_obj_field_fetch(dst->t, fid);
+ else if( dst->t->kind == HVIRTUAL )
+ f = dst->t->virt->fields + fid;
+ if( f == NULL ) jit_assert();
+ null_field_access = true;
+ hashed_name = f->hashed_name;
+ } else if( (next->op >= OCall1 && next->op <= OCallN) && next->p3 == o->p1 ) {
+ int fid = next->p2 < 0 ? -1 : m->functions_indexes[next->p2];
+ hl_function *cf = m->code->functions + fid;
+ const uchar *name = fun_field_name(cf);
+ null_field_access = true;
+ hashed_name = hl_hash_gen(name, true);
+ }
+ // -----------------------------------------
+ ereg arg = null_field_access ? LOAD_CONST(hashed_name,&hlt_i32) : ENULL;
+ emit_native_call(ctx, null_field_access ? hl_jit_null_field_access : hl_jit_null_access, &arg, null_field_access ? 1 : 0, NULL);
+ patch_jump(ctx, jok);
+ }
+ break;
+ case OSafeCast:
+ STORE(dst, emit_dyn_cast(ctx, LOAD(ra), ra->t, dst->t));
+ break;
+ case ODynGet:
+ {
+ bool need_type = dyn_need_type(dst->t);
+ ereg args[3];
+ args[0] = LOAD(ra);
+ args[1] = LOAD_CONST(hl_hash_utf8(m->code->strings[o->p3]),&hlt_i32);
+ if( need_type ) args[2] = LOAD_CONST_PTR(dst->t);
+ STORE(dst, emit_native_call(ctx, get_dynget(dst->t), args, need_type ? 3 : 2, dst->t));
+ }
+ break;
+ case ODynSet:
+ {
+ bool need_type = dyn_need_type(dst->t);
+ ereg args[4];
+ args[0] = LOAD(dst);
+ args[1] = LOAD_CONST(hl_hash_utf8(m->code->strings[o->p2]),&hlt_i32);
+ if( need_type ) {
+ args[2] = LOAD_CONST_PTR(rb->t);
+ args[3] = LOAD(rb);
+ } else
+ args[2] = LOAD(rb);
+ emit_native_call(ctx, get_dynset(rb->t), args, need_type ? 4 : 3, &hlt_void);
+ }
+ break;
+ case OTrap:
+ {
+ ereg st = emit_gen_size(ctx, ALLOC_STACK, sizeof(hl_trap_ctx));
+
+ ereg thread, current_addr;
+ static hl_thread_info *tinf = NULL;
+ static hl_trap_ctx *trap = NULL;
+# ifndef HL_THREADS
+ if( tinf == NULL ) tinf = hl_get_thread();
+ current_addr = LOAD_CONST_PTR(&tinf->trap_current);
+# else
+ thread = emit_native_call(ctx, hl_get_thread, NULL, 0, &hlt_bytes);
+ current_addr = OFFSET(thread, ENULL, 0, (int)(int_val)&tinf->trap_current);
+# endif
+ STORE_MEM(st, (int)(int_val)&trap->prev, LOAD_MEM_PTR(current_addr,0));
+ STORE_MEM(current_addr, 0, st);
+
+
+ /*
+ trap E,@catch
+ catch g
+ catch g2
+ ...
+ @:catch
+
+ // Before haxe 5
+ This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following
+ sequence of HL opcodes:
+
+ trap E,@catch
+ ...
+ @catch:
+ global R, _
+ call _, ???(R,E)
+
+ ??? is expected to be hl.BaseType.check
+ */
+ hl_function *f = ctx->fun;
+ hl_opcode *cat = f->ops + ctx->op_pos + 1;
+ hl_opcode *next = f->ops + ctx->op_pos + 1 + o->p2;
+ hl_opcode *next2 = f->ops + ctx->op_pos + 2 + o->p2;
+ void *addr = NULL;
+ if( cat->op == OCatch || (next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->id == (int)(int_val)next2->extra) ) {
+ int gindex = cat->op == OCatch ? cat->p1 : next->p2;
+ hl_type *gt = m->code->globals[gindex];
+ while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super;
+ if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE )
+ addr = m->globals_data + m->globals_indexes[gindex];
+ }
+ STORE_MEM(st, (int)(int_val)&trap->tcheck, addr ? LOAD_MEM_PTR(LOAD_CONST_PTR(addr),0) : LOAD_CONST_PTR(NULL));
+
+ void *fun = setjmp;
+ ereg args[2];
+ int nargs = 1;
+ args[0] = OFFSET(st, ENULL, 0, (int)(int_val)&trap->buf);
+#if defined(HL_WIN) && defined(HL_64)
+ // On Win64 setjmp actually takes two arguments
+ // the jump buffer and the frame pointer (or the stack pointer if there is no FP)
+ nargs = 2;
+ args[1] = emit_gen(ctx, NATIVE_REG, ENULL, ENULL, REG_RBP);
+#endif
+#ifdef HL_MINGW
+ fun = _setjmp;
+#endif
+ ereg ret = emit_native_call(ctx, fun, args, nargs, &hlt_i32);
+ emit_test(ctx, ret, OJNull);
+ int jskip = emit_jump(ctx, true);
+ emit_gen_size(ctx, FREE_STACK, sizeof(hl_trap_ctx));
+ STORE(dst, tinf ? LOAD_CONST_PTR(&tinf->exc_value) : LOAD_MEM_PTR(thread,(int)(int_val)&tinf->exc_value));
+
+ int jtrap = emit_jump(ctx, false);
+ register_jump(ctx, jtrap, o->p2);
+ patch_jump(ctx, jskip);
+
+ if( ctx->trap_count == MAX_TRAPS ) jit_error("Too many try/catch depth");
+ ctx->traps[ctx->trap_count++] = st;
+ }
+ break;
+ case OEndTrap:
+ {
+ if( ctx->trap_count == 0 ) jit_assert();
+ ereg st = ctx->traps[ctx->trap_count - 1];
+
+ ereg thread, current_addr;
+ static hl_thread_info *tinf = NULL;
+ static hl_trap_ctx *trap = NULL;
+# ifndef HL_THREADS
+ if( tinf == NULL ) tinf = hl_get_thread();
+ current_addr = LOAD_CONST_PTR(&tinf->trap_current);
+# else
+ thread = emit_native_call(ctx, hl_get_thread, NULL, 0, &hlt_bytes);
+ current_addr = OFFSET(thread, ENULL, 0, (int)(int_val)&tinf->trap_current);
+# endif
+
+ STORE_MEM(current_addr, 0, LOAD_MEM_PTR(st,(int)(int_val)&trap->prev));
+
+# ifdef HL_WIN
+ // erase eip (prevent false positive in exception stack)
+ {
+ _JUMP_BUFFER *b = NULL;
+# ifdef HL_64
+ int offset = (int)(int_val)&(b->Rip);
+# else
+ int offset = (int)(int_val)&(b->Eip);
+# endif
+ STORE_MEM(st, offset, LOAD_CONST_PTR(NULL));
+ }
+# endif
+
+ emit_gen_size(ctx, FREE_STACK, sizeof(hl_trap_ctx));
+ }
+ break;
+ case OSwitch:
+ {
+ ereg v = LOAD(dst);
+ int count = o->p2;
+ emit_gen_ext(ctx, CMP, v, LOAD_CONST(count,&hlt_i32), 0, OJUGte);
+ patch_instr_mode(ctx, M_I32);
+ int jdefault = emit_jump(ctx, true);
+ emit_gen_ext(ctx, JUMP_TABLE, v, ENULL, 0, count);
+ for(int i=0; iextra[i]);
+ }
+ patch_jump(ctx, jdefault);
+ }
+ break;
+ case OGetTID:
+ STORE(dst, LOAD_MEM(LOAD(ra),0,&hlt_i32));
+ break;
+ case OAssert:
+ emit_native_call(ctx, hl_jit_assert, NULL, 0, &hlt_void);
+ break;
+ case ONop:
+ break;
+ case OPrefetch:
+ {
+ ereg r = LOAD(dst);
+ if( o->p2 > 0 ) {
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ r = OFFSET(r, ENULL, 0, rt->fields_indexes[o->p2-1]);
+ }
+ break;
+ default:
+ jit_assert();
+ break;
+ }
+ }
+ emit_gen(ctx, PREFETCH, r, ENULL, o->p3);
+ }
+ break;
+ case OAsm:
+ jit_assert();
+ break;
+ case OCatch:
+ // Only used by OTrap typing
+ break;
+ default:
+ jit_error(hl_op_name(o->op));
+ break;
+ }
+}
diff --git a/src/jit_old.c b/src/jit_old.c
new file mode 100644
index 000000000..7e4e6e88b
--- /dev/null
+++ b/src/jit_old.c
@@ -0,0 +1,4730 @@
+/*
+ * Copyright (C)2015-2016 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifdef _MSC_VER
+#pragma warning(disable:4820)
+#endif
+#include
+#include
+#include "hlsystem.h"
+
+#ifdef __arm__
+# error "JIT does not support ARM processors, only x86 and x86-64 are supported, please use HashLink/C native compilation instead"
+#endif
+
+#ifdef HL_DEBUG
+# define JIT_DEBUG
+#endif
+
+typedef enum {
+ Eax = 0,
+ Ecx = 1,
+ Edx = 2,
+ Ebx = 3,
+ Esp = 4,
+ Ebp = 5,
+ Esi = 6,
+ Edi = 7,
+#ifdef HL_64
+ R8 = 8,
+ R9 = 9,
+ R10 = 10,
+ R11 = 11,
+ R12 = 12,
+ R13 = 13,
+ R14 = 14,
+ R15 = 15,
+#endif
+ _LAST = 0xFF
+} CpuReg;
+
+typedef enum {
+ MOV,
+ LEA,
+ PUSH,
+ ADD,
+ SUB,
+ IMUL, // only overflow flag changes compared to MUL
+ DIV,
+ IDIV,
+ CDQ,
+ CDQE,
+ POP,
+ RET,
+ CALL,
+ AND,
+ OR,
+ XOR,
+ CMP,
+ TEST,
+ NOP,
+ SHL,
+ SHR,
+ SAR,
+ INC,
+ DEC,
+ JMP,
+ // FPU
+ FSTP,
+ FSTP32,
+ FLD,
+ FLD32,
+ FLDCW,
+ // SSE
+ MOVSD,
+ MOVSS,
+ COMISD,
+ COMISS,
+ ADDSD,
+ SUBSD,
+ MULSD,
+ DIVSD,
+ ADDSS,
+ SUBSS,
+ MULSS,
+ DIVSS,
+ XORPD,
+ CVTSI2SD,
+ CVTSI2SS,
+ CVTSD2SI,
+ CVTSD2SS,
+ CVTSS2SD,
+ CVTSS2SI,
+ STMXCSR,
+ LDMXCSR,
+ // 8-16 bits
+ MOV8,
+ CMP8,
+ TEST8,
+ PUSH8,
+ MOV16,
+ CMP16,
+ TEST16,
+ // prefetchs
+ PREFETCHT0,
+ PREFETCHT1,
+ PREFETCHT2,
+ PREFETCHNTA,
+ PREFETCHW,
+ // --
+ _CPU_LAST
+} CpuOp;
+
+#define JAlways 0
+#define JOverflow 0x80
+#define JULt 0x82
+#define JUGte 0x83
+#define JEq 0x84
+#define JNeq 0x85
+#define JULte 0x86
+#define JUGt 0x87
+#define JParity 0x8A
+#define JNParity 0x8B
+#define JSLt 0x8C
+#define JSGte 0x8D
+#define JSLte 0x8E
+#define JSGt 0x8F
+
+#define JCarry JLt
+#define JZero JEq
+#define JNotZero JNeq
+
+#define B(bv) *ctx->buf.b++ = (unsigned char)(bv)
+#define W(wv) *ctx->buf.w++ = wv
+
+#ifdef HL_64
+# define W64(wv) *ctx->buf.w64++ = wv
+#else
+# define W64(wv) W(wv)
+#endif
+
+static const int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3};
+
+#define MOD_RM(mod,reg,rm) B(((mod) << 6) | (((reg)&7) << 3) | ((rm)&7))
+#define SIB(mult,rmult,rbase) B((SIB_MULT[mult]<<6) | (((rmult)&7)<<3) | ((rbase)&7))
+#define IS_SBYTE(c) ( (c) >= -128 && (c) < 128 )
+
+#define AddJump(how,local) { if( (how) == JAlways ) { B(0xE9); } else { B(0x0F); B(how); }; local = BUF_POS(); W(0); }
+#define AddJump_small(how,local) { if( (how) == JAlways ) { B(0xEB); } else B(how - 0x10); local = BUF_POS() | 0x40000000; B(0); }
+#define XJump(how,local) AddJump(how,local)
+#define XJump_small(how,local) AddJump_small(how,local)
+
+#define MAX_OP_SIZE 256
+
+#define BUF_POS() ((int)(ctx->buf.b - ctx->startBuf))
+#define RTYPE(r) r->t->kind
+
+#ifdef HL_64
+# define RESERVE_ADDRESS 0x8000000000000000
+#else
+# define RESERVE_ADDRESS 0x80000000
+#endif
+
+#if defined(HL_WIN_CALL) && defined(HL_64)
+# define IS_WINCALL64 1
+#else
+# define IS_WINCALL64 0
+#endif
+
+typedef struct jlist jlist;
+struct jlist {
+ int pos;
+ int target;
+ jlist *next;
+};
+
+typedef struct vreg vreg;
+
+typedef enum {
+ RCPU = 0,
+ RFPU = 1,
+ RSTACK = 2,
+ RCONST = 3,
+ RADDR = 4,
+ RMEM = 5,
+ RUNUSED = 6,
+ RCPU_CALL = 1 | 8,
+ RCPU_8BITS = 1 | 16
+} preg_kind;
+
+typedef struct {
+ preg_kind kind;
+ int id;
+ int lock;
+ vreg *holds;
+} preg;
+
+struct vreg {
+ int stackPos;
+ int size;
+ hl_type *t;
+ preg *current;
+ preg stack;
+};
+
+#define REG_AT(i) (ctx->pregs + (i))
+
+#ifdef HL_64
+# define RCPU_COUNT 16
+# define RFPU_COUNT 16
+# ifdef HL_WIN_CALL
+# define CALL_NREGS 4
+# define RCPU_SCRATCH_COUNT 7
+# define RFPU_SCRATCH_COUNT 6
+static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, R8, R9, R10, R11 };
+static const CpuReg CALL_REGS[] = { Ecx, Edx, R8, R9 };
+# else
+# define CALL_NREGS 6 // TODO : XMM6+XMM7 are FPU reg parameters
+# define RCPU_SCRATCH_COUNT 9
+# define RFPU_SCRATCH_COUNT 16
+static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, Esi, Edi, R8, R9, R10, R11 };
+static const CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 };
+# endif
+#else
+# define CALL_NREGS 0
+# define RCPU_COUNT 8
+# define RFPU_COUNT 8
+# define RCPU_SCRATCH_COUNT 3
+# define RFPU_SCRATCH_COUNT 8
+static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx };
+#endif
+
+#define XMM(i) ((i) + RCPU_COUNT)
+#define PXMM(i) REG_AT(XMM(i))
+#define REG_IS_FPU(i) ((i) >= RCPU_COUNT)
+
+#define PEAX REG_AT(Eax)
+#define PESP REG_AT(Esp)
+#define PEBP REG_AT(Ebp)
+
+#define REG_COUNT (RCPU_COUNT + RFPU_COUNT)
+
+#define ID2(a,b) ((a) | ((b)<<8))
+#define R(id) (ctx->vregs + (id))
+#define ASSERT(i) { printf("JIT ERROR %d (jit.c line %d)\n",i,(int)__LINE__); jit_exit(); }
+#define IS_FLOAT(r) ((r)->t->kind == HF64 || (r)->t->kind == HF32)
+#define RLOCK(r) if( (r)->lock < ctx->currentPos ) (r)->lock = ctx->currentPos
+#define RUNLOCK(r) if( (r)->lock == ctx->currentPos ) (r)->lock = 0
+
+#define BREAK() B(0xCC)
+
+static preg _unused = { RUNUSED, 0, 0, NULL };
+static preg *UNUSED = &_unused;
+
+struct _jit_ctx {
+ union {
+ unsigned char *b;
+ unsigned int *w;
+ unsigned long long *w64;
+ int *i;
+ double *d;
+ } buf;
+ vreg *vregs;
+ preg pregs[REG_COUNT];
+ vreg *savedRegs[REG_COUNT];
+ int savedLocks[REG_COUNT];
+ int *opsPos;
+ int maxRegs;
+ int maxOps;
+ int bufSize;
+ int totalRegsSize;
+ int functionPos;
+ int allocOffset;
+ int currentPos;
+ int nativeArgsCount;
+ unsigned char *startBuf;
+ hl_module *m;
+ hl_function *f;
+ jlist *jumps;
+ jlist *calls;
+ jlist *switchs;
+ hl_alloc falloc; // cleared per-function
+ hl_alloc galloc;
+ vclosure *closure_list;
+ hl_debug_infos *debug;
+ int c2hl;
+ int hl2c;
+ void *static_functions[8];
+ bool static_function_offset;
+#ifdef WIN64_UNWIND_TABLES
+ int unwind_offset;
+ int nunwind;
+ PRUNTIME_FUNCTION unwind_table;
+#endif
+};
+
+#ifdef WIN64_UNWIND_TABLES
+
+typedef enum _UNWIND_OP_CODES
+{
+ UWOP_PUSH_NONVOL = 0, /* info == register number */
+ UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */
+ UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */
+ UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
+ UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */
+ UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */
+ UWOP_SAVE_XMM128 = 8, /* info == XMM reg number, offset in next slot */
+ UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
+ UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */
+} UNWIND_CODE_OPS;
+
+void write_uwcode(jit_ctx *ctx, unsigned char offset, UNWIND_CODE_OPS code, unsigned char info)
+{
+ B(offset);
+ B((code) | (info) << 4);
+}
+
+void write_unwind_data(jit_ctx *ctx)
+{
+ // All generated functions use a frame pointer, so the same unwind info can be used for all of them
+ unsigned char version = 1;
+ unsigned char flags = 0;
+ unsigned char CountOfCodes = 2;
+ unsigned char SizeOfProlog = 4;
+ unsigned char FrameRegister = 5; // RBP
+ unsigned char FrameOffset = 0;
+ B((version) | (flags) << 3);
+ B(SizeOfProlog);
+ B(CountOfCodes);
+ B((FrameRegister) | (FrameOffset) << 4);
+ write_uwcode(ctx, 4, UWOP_SET_FPREG, 0);
+ write_uwcode(ctx, 1, UWOP_PUSH_NONVOL, 5);
+}
+#endif
+
+#define jit_exit() { hl_debug_break(); exit(-1); }
+#define jit_error(msg) _jit_error(ctx,msg,__LINE__)
+
+#ifndef HL_64
+# ifdef HL_DEBUG
+# define error_i64() jit_error("i64-32")
+# else
+void error_i64() {
+ printf("The module you are loading is using 64 bit ints that are not supported by the HL32.\nPlease run using HL64 or compile with -D hl-legacy32");
+ jit_exit();
+}
+# endif
+#endif
+
+static void _jit_error( jit_ctx *ctx, const char *msg, int line );
+static void on_jit_error( const char *msg, int_val line );
+
+static preg *pmem( preg *r, CpuReg reg, int offset ) {
+ r->kind = RMEM;
+ r->id = 0 | (reg << 4) | (offset << 8);
+ return r;
+}
+
+static preg *pmem2( preg *r, CpuReg reg, CpuReg reg2, int mult, int offset ) {
+ r->kind = RMEM;
+ r->id = mult | (reg << 4) | (reg2 << 8);
+ r->holds = (void*)(int_val)offset;
+ return r;
+}
+
+#ifdef HL_64
+static preg *pcodeaddr( preg *r, int offset ) {
+ r->kind = RMEM;
+ r->id = 15 | (offset << 4);
+ return r;
+}
+#endif
+
+static preg *pconst( preg *r, int c ) {
+ r->kind = RCONST;
+ r->holds = NULL;
+ r->id = c;
+ return r;
+}
+
+static preg *pconst64( preg *r, int_val c ) {
+#ifdef HL_64
+ if( ((int)c) == c )
+ return pconst(r,(int)c);
+ r->kind = RCONST;
+ r->id = 0xC064C064;
+ r->holds = (vreg*)c;
+ return r;
+#else
+ return pconst(r,(int)c);
+#endif
+}
+
+#ifndef HL_64
+// it is not possible to access direct 64 bit address in x86-64
+static preg *paddr( preg *r, void *p ) {
+ r->kind = RADDR;
+ r->holds = (vreg*)p;
+ return r;
+}
+#endif
+
+static void save_regs( jit_ctx *ctx ) {
+ int i;
+ for(i=0;isavedRegs[i] = ctx->pregs[i].holds;
+ ctx->savedLocks[i] = ctx->pregs[i].lock;
+ }
+}
+
+static void restore_regs( jit_ctx *ctx ) {
+ int i;
+ for(i=0;imaxRegs;i++)
+ ctx->vregs[i].current = NULL;
+ for(i=0;isavedRegs[i];
+ preg *p = ctx->pregs + i;
+ p->holds = r;
+ p->lock = ctx->savedLocks[i];
+ if( r ) r->current = p;
+ }
+}
+
+static void jit_buf( jit_ctx *ctx ) {
+ if( BUF_POS() > ctx->bufSize - MAX_OP_SIZE ) {
+ int nsize = ctx->bufSize * 4 / 3;
+ unsigned char *nbuf;
+ int curpos;
+ if( nsize == 0 ) {
+ int i;
+ for(i=0;im->code->nfunctions;i++)
+ nsize += ctx->m->code->functions[i].nops;
+ nsize *= 4;
+ }
+ if( nsize < ctx->bufSize + MAX_OP_SIZE * 4 ) nsize = ctx->bufSize + MAX_OP_SIZE * 4;
+ curpos = BUF_POS();
+ nbuf = (unsigned char*)malloc(nsize);
+ if( nbuf == NULL ) ASSERT(nsize);
+ if( ctx->startBuf ) {
+ memcpy(nbuf,ctx->startBuf,curpos);
+ free(ctx->startBuf);
+ }
+ ctx->startBuf = nbuf;
+ ctx->buf.b = nbuf + curpos;
+ ctx->bufSize = nsize;
+ }
+}
+
+static const char *KNAMES[] = { "cpu","fpu","stack","const","addr","mem","unused" };
+#define ERRIF(c) if( c ) { printf("%s(%s,%s)\n",f?f->name:"???",KNAMES[a->kind], KNAMES[b->kind]); ASSERT(0); }
+
+typedef struct {
+ const char *name; // single operand
+ int r_mem; // r32 / r/m32 r32
+ int mem_r; // r/m32 / r32 r/m32
+ int r_const; // r32 / imm32 imm32
+ int r_i8; // r32 / imm8 imm8
+ int mem_const; // r/m32 / imm32 N/A
+} opform;
+
+#define FLAG_LONGOP 0x80000000
+#define FLAG_16B 0x40000000
+#define FLAG_8B 0x20000000
+#define FLAG_DUAL 0x10000000
+
+#define RM(op,id) ((op) | (((id)+1)<<8))
+#define GET_RM(op) (((op) >> ((op) < 0 ? 24 : 8)) & 15)
+#define SBYTE(op) ((op) << 16)
+#define LONG_OP(op) ((op) | FLAG_LONGOP)
+#define OP16(op) LONG_OP((op) | FLAG_16B)
+#define LONG_RM(op,id) LONG_OP(op | (((id) + 1) << 24))
+
+static opform OP_FORMS[_CPU_LAST] = {
+ { "MOV", 0x8B, 0x89, 0xB8, 0, RM(0xC7,0) },
+ { "LEA", 0x8D },
+ { "PUSH", 0x50, RM(0xFF,6), 0x68, 0x6A },
+ { "ADD", 0x03, 0x01, RM(0x81,0), RM(0x83,0) },
+ { "SUB", 0x2B, 0x29, RM(0x81,5), RM(0x83,5) },
+ { "IMUL", LONG_OP(0x0FAF), 0, 0x69 | FLAG_DUAL, 0x6B | FLAG_DUAL },
+ { "DIV", RM(0xF7,6), RM(0xF7,6) },
+ { "IDIV", RM(0xF7,7), RM(0xF7,7) },
+ { "CDQ", 0x99 },
+ { "CDQE", 0x98 },
+ { "POP", 0x58, RM(0x8F,0) },
+ { "RET", 0xC3 },
+ { "CALL", RM(0xFF,2), RM(0xFF,2), 0xE8 },
+ { "AND", 0x23, 0x21, RM(0x81,4), RM(0x83,4) },
+ { "OR", 0x0B, 0x09, RM(0x81,1), RM(0x83,1) },
+ { "XOR", 0x33, 0x31, RM(0x81,6), RM(0x83,6) },
+ { "CMP", 0x3B, 0x39, RM(0x81,7), RM(0x83,7) },
+ { "TEST", 0x85, 0x85/*SWP?*/, RM(0xF7,0) },
+ { "NOP", 0x90 },
+ { "SHL", RM(0xD3,4), 0, 0, RM(0xC1,4) },
+ { "SHR", RM(0xD3,5), 0, 0, RM(0xC1,5) },
+ { "SAR", RM(0xD3,7), 0, 0, RM(0xC1,7) },
+ { "INC", IS_64 ? RM(0xFF,0) : 0x40, RM(0xFF,0) },
+ { "DEC", IS_64 ? RM(0xFF,1) : 0x48, RM(0xFF,1) },
+ { "JMP", RM(0xFF,4) },
+ // FPU
+ { "FSTP", 0, RM(0xDD,3) },
+ { "FSTP32", 0, RM(0xD9,3) },
+ { "FLD", 0, RM(0xDD,0) },
+ { "FLD32", 0, RM(0xD9,0) },
+ { "FLDCW", 0, RM(0xD9, 5) },
+ // SSE
+ { "MOVSD", 0xF20F10, 0xF20F11 },
+ { "MOVSS", 0xF30F10, 0xF30F11 },
+ { "COMISD", 0x660F2F },
+ { "COMISS", LONG_OP(0x0F2F) },
+ { "ADDSD", 0xF20F58 },
+ { "SUBSD", 0xF20F5C },
+ { "MULSD", 0xF20F59 },
+ { "DIVSD", 0xF20F5E },
+ { "ADDSS", 0xF30F58 },
+ { "SUBSS", 0xF30F5C },
+ { "MULSS", 0xF30F59 },
+ { "DIVSS", 0xF30F5E },
+ { "XORPD", 0x660F57 },
+ { "CVTSI2SD", 0xF20F2A },
+ { "CVTSI2SS", 0xF30F2A },
+ { "CVTSD2SI", 0xF20F2D },
+ { "CVTSD2SS", 0xF20F5A },
+ { "CVTSS2SD", 0xF30F5A },
+ { "CVTSS2SI", 0xF30F2D },
+ { "STMXCSR", 0, LONG_RM(0x0FAE,3) },
+ { "LDMXCSR", 0, LONG_RM(0x0FAE,2) },
+ // 8 bits,
+ { "MOV8", 0x8A, 0x88, 0, 0xB0, RM(0xC6,0) },
+ { "CMP8", 0x3A, 0x38, 0, RM(0x80,7) },
+ { "TEST8", 0x84, 0x84, RM(0xF6,0) },
+ { "PUSH8", 0, 0, 0x6A | FLAG_8B },
+ { "MOV16", OP16(0x8B), OP16(0x89), OP16(0xB8) },
+ { "CMP16", OP16(0x3B), OP16(0x39) },
+ { "TEST16", OP16(0x85) },
+ // prefetchs
+ { "PREFETCHT0", 0, LONG_RM(0x0F18,1) },
+ { "PREFETCHT1", 0, LONG_RM(0x0F18,2) },
+ { "PREFETCHT2", 0, LONG_RM(0x0F18,3) },
+ { "PREFETCHNTA", 0, LONG_RM(0x0F18,0) },
+ { "PREFETCHW", 0, LONG_RM(0x0F0D,1) },
+};
+
+#ifdef HL_64
+# define REX() if( r64 ) B(r64 | 0x40)
+#else
+# define REX()
+#endif
+
+#define OP(b) \
+ if( (b) & 0xFF0000 ) { \
+ B((b)>>16); \
+ if( r64 ) B(r64 | 0x40); /* also in 32 bits mode */ \
+ B((b)>>8); \
+ B(b); \
+ } else { \
+ if( (b) & FLAG_16B ) { \
+ B(0x66); \
+ REX(); \
+ } else {\
+ REX(); \
+ if( (b) & FLAG_LONGOP ) B((b)>>8); \
+ }\
+ B(b); \
+ }
+
+static bool is_reg8( preg *a ) {
+ return a->kind == RSTACK || a->kind == RMEM || a->kind == RCONST || (a->kind == RCPU && a->id != Esi && a->id != Edi);
+}
+
+static void op( jit_ctx *ctx, CpuOp o, preg *a, preg *b, bool mode64 ) {
+ opform *f = &OP_FORMS[o];
+ int r64 = mode64 && (o != PUSH && o != POP && o != CALL && o != PUSH8 && o < PREFETCHT0) ? 8 : 0;
+ switch( o ) {
+ case CMP8:
+ case TEST8:
+ case MOV8:
+ if( !is_reg8(a) || !is_reg8(b) )
+ ASSERT(0);
+ break;
+ default:
+ break;
+ }
+ switch( ID2(a->kind,b->kind) ) {
+ case ID2(RUNUSED,RUNUSED):
+ ERRIF(f->r_mem == 0);
+ OP(f->r_mem);
+ break;
+ case ID2(RCPU,RCPU):
+ case ID2(RFPU,RFPU):
+ ERRIF( f->r_mem == 0 );
+ if( a->id > 7 ) r64 |= 4;
+ if( b->id > 7 ) r64 |= 1;
+ OP(f->r_mem);
+ MOD_RM(3,a->id,b->id);
+ break;
+ case ID2(RCPU,RFPU):
+ case ID2(RFPU,RCPU):
+ ERRIF( (f->r_mem>>16) == 0 );
+ if( a->id > 7 ) r64 |= 4;
+ if( b->id > 7 ) r64 |= 1;
+ OP(f->r_mem);
+ MOD_RM(3,a->id,b->id);
+ break;
+ case ID2(RCPU,RUNUSED):
+ ERRIF( f->r_mem == 0 );
+ if( a->id > 7 ) r64 |= 1;
+ if( GET_RM(f->r_mem) > 0 ) {
+ OP(f->r_mem);
+ MOD_RM(3, GET_RM(f->r_mem)-1, a->id);
+ } else
+ OP(f->r_mem + (a->id&7));
+ break;
+ case ID2(RSTACK,RUNUSED):
+ ERRIF( f->mem_r == 0 || GET_RM(f->mem_r) == 0 );
+ {
+ int stackPos = R(a->id)->stackPos;
+ OP(f->mem_r);
+ if( IS_SBYTE(stackPos) ) {
+ MOD_RM(1,GET_RM(f->mem_r)-1,Ebp);
+ B(stackPos);
+ } else {
+ MOD_RM(2,GET_RM(f->mem_r)-1,Ebp);
+ W(stackPos);
+ }
+ }
+ break;
+ case ID2(RCPU,RCONST):
+ ERRIF( f->r_const == 0 && f->r_i8 == 0 );
+ if( a->id > 7 ) r64 |= 1;
+ {
+ int_val cval = b->holds ? (int_val)b->holds : b->id;
+ // short byte form
+ if( f->r_i8 && IS_SBYTE(cval) ) {
+ if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4;
+ OP(f->r_i8);
+ if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_i8)-1,a->id);
+ B((int)cval);
+ } else if( GET_RM(f->r_const) > 0 || (f->r_const&FLAG_DUAL) ) {
+ if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4;
+ OP(f->r_const&0xFF);
+ if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_const)-1,a->id);
+ if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
+ } else {
+ ERRIF( f->r_const == 0);
+ OP((f->r_const&0xFF) + (a->id&7));
+ if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
+ }
+ }
+ break;
+ case ID2(RSTACK,RCPU):
+ case ID2(RSTACK,RFPU):
+ ERRIF( f->mem_r == 0 );
+ if( b->id > 7 ) r64 |= 4;
+ {
+ int stackPos = R(a->id)->stackPos;
+ OP(f->mem_r);
+ if( IS_SBYTE(stackPos) ) {
+ MOD_RM(1,b->id,Ebp);
+ B(stackPos);
+ } else {
+ MOD_RM(2,b->id,Ebp);
+ W(stackPos);
+ }
+ }
+ break;
+ case ID2(RCPU,RSTACK):
+ case ID2(RFPU,RSTACK):
+ ERRIF( f->r_mem == 0 );
+ if( a->id > 7 ) r64 |= 4;
+ {
+ int stackPos = R(b->id)->stackPos;
+ OP(f->r_mem);
+ if( IS_SBYTE(stackPos) ) {
+ MOD_RM(1,a->id,Ebp);
+ B(stackPos);
+ } else {
+ MOD_RM(2,a->id,Ebp);
+ W(stackPos);
+ }
+ }
+ break;
+ case ID2(RCONST,RUNUSED):
+ ERRIF( f->r_const == 0 );
+ {
+ int_val cval = a->holds ? (int_val)a->holds : a->id;
+ OP(f->r_const);
+ if( f->r_const & FLAG_8B ) B((int)cval); else W((int)cval);
+ }
+ break;
+ case ID2(RMEM,RUNUSED):
+ ERRIF( f->mem_r == 0 );
+ {
+ int mult = a->id & 0xF;
+ int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
+ CpuReg reg = (a->id >> 4) & 0xF;
+ if( mult == 15 ) {
+ ERRIF(1);
+ } else if( mult == 0 ) {
+ if( reg > 7 ) r64 |= 1;
+ OP(f->mem_r);
+ if( regOrOffs == 0 && (reg&7) != Ebp ) {
+ MOD_RM(0,GET_RM(f->mem_r)-1,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ } else if( IS_SBYTE(regOrOffs) ) {
+ MOD_RM(1,GET_RM(f->mem_r)-1,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ B(regOrOffs);
+ } else {
+ MOD_RM(2,GET_RM(f->mem_r)-1,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ W(regOrOffs);
+ }
+ } else {
+ // [eax + ebx * M]
+ ERRIF(1);
+ }
+ }
+ break;
+ case ID2(RCPU, RMEM):
+ case ID2(RFPU, RMEM):
+ ERRIF( f->r_mem == 0 );
+ {
+ int mult = b->id & 0xF;
+ int regOrOffs = mult == 15 ? b->id >> 4 : b->id >> 8;
+ CpuReg reg = (b->id >> 4) & 0xF;
+ if( mult == 15 ) {
+ int pos;
+ if( a->id > 7 ) r64 |= 4;
+ OP(f->r_mem);
+ MOD_RM(0,a->id,5);
+ if( IS_64 ) {
+ // offset wrt current code
+ pos = BUF_POS() + 4;
+ W(regOrOffs - pos);
+ } else {
+ ERRIF(1);
+ }
+ } else if( mult == 0 ) {
+ if( a->id > 7 ) r64 |= 4;
+ if( reg > 7 ) r64 |= 1;
+ OP(f->r_mem);
+ if( regOrOffs == 0 && (reg&7) != Ebp ) {
+ MOD_RM(0,a->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ } else if( IS_SBYTE(regOrOffs) ) {
+ MOD_RM(1,a->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ B(regOrOffs);
+ } else {
+ MOD_RM(2,a->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ W(regOrOffs);
+ }
+ } else {
+ int offset = (int)(int_val)b->holds;
+ if( a->id > 7 ) r64 |= 4;
+ if( reg > 7 ) r64 |= 1;
+ if( regOrOffs > 7 ) r64 |= 2;
+ OP(f->r_mem);
+ MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,a->id,4);
+ SIB(mult,regOrOffs,reg);
+ if( offset ) {
+ if( IS_SBYTE(offset) ) B(offset); else W(offset);
+ }
+ }
+ }
+ break;
+# ifndef HL_64
+ case ID2(RFPU,RADDR):
+# endif
+ case ID2(RCPU,RADDR):
+ ERRIF( f->r_mem == 0 );
+ if( a->id > 7 ) r64 |= 4;
+ OP(f->r_mem);
+ MOD_RM(0,a->id,5);
+ if( IS_64 )
+ W64((int_val)b->holds);
+ else
+ W((int)(int_val)b->holds);
+ break;
+# ifndef HL_64
+ case ID2(RADDR,RFPU):
+# endif
+ case ID2(RADDR,RCPU):
+ ERRIF( f->mem_r == 0 );
+ if( b->id > 7 ) r64 |= 4;
+ OP(f->mem_r);
+ MOD_RM(0,b->id,5);
+ if( IS_64 )
+ W64((int_val)a->holds);
+ else
+ W((int)(int_val)a->holds);
+ break;
+ case ID2(RMEM, RCPU):
+ case ID2(RMEM, RFPU):
+ ERRIF( f->mem_r == 0 );
+ {
+ int mult = a->id & 0xF;
+ int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
+ CpuReg reg = (a->id >> 4) & 0xF;
+ if( mult == 15 ) {
+ int pos;
+ if( b->id > 7 ) r64 |= 4;
+ OP(f->mem_r);
+ MOD_RM(0,b->id,5);
+ if( IS_64 ) {
+ // offset wrt current code
+ pos = BUF_POS() + 4;
+ W(regOrOffs - pos);
+ } else {
+ ERRIF(1);
+ }
+ } else if( mult == 0 ) {
+ if( b->id > 7 ) r64 |= 4;
+ if( reg > 7 ) r64 |= 1;
+ OP(f->mem_r);
+ if( regOrOffs == 0 && (reg&7) != Ebp ) {
+ MOD_RM(0,b->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ } else if( IS_SBYTE(regOrOffs) ) {
+ MOD_RM(1,b->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ B(regOrOffs);
+ } else {
+ MOD_RM(2,b->id,reg);
+ if( (reg&7) == Esp ) B(0x24);
+ W(regOrOffs);
+ }
+ } else {
+ int offset = (int)(int_val)a->holds;
+ if( b->id > 7 ) r64 |= 4;
+ if( reg > 7 ) r64 |= 1;
+ if( regOrOffs > 7 ) r64 |= 2;
+ OP(f->mem_r);
+ MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,b->id,4);
+ SIB(mult,regOrOffs,reg);
+ if( offset ) {
+ if( IS_SBYTE(offset) ) B(offset); else W(offset);
+ }
+ }
+ }
+ break;
+ default:
+ ERRIF(1);
+ }
+ if( ctx->debug && ctx->f && o == CALL ) {
+ preg p;
+ op(ctx,MOV,pmem(&p,Esp,-HL_WSIZE),PEBP,true); // erase EIP (clean stack report)
+ }
+}
+
+static void op32( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
+ op(ctx,o,a,b,false);
+}
+
+static void op64( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
+#ifndef HL_64
+ op(ctx,o,a,b,false);
+#else
+ op(ctx,o,a,b,true);
+#endif
+}
+
+static void patch_jump( jit_ctx *ctx, int p ) {
+ if( p == 0 ) return;
+ if( p & 0x40000000 ) {
+ int d;
+ p &= 0x3FFFFFFF;
+ d = BUF_POS() - (p + 1);
+ if( d < -128 || d >= 128 ) ASSERT(d);
+ *(char*)(ctx->startBuf + p) = (char)d;
+ } else {
+ *(int*)(ctx->startBuf + p) = BUF_POS() - (p + 4);
+ }
+}
+
+static void patch_jump_to( jit_ctx *ctx, int p, int target ) {
+ if( p == 0 ) return;
+ if( p & 0x40000000 ) {
+ int d;
+ p &= 0x3FFFFFFF;
+ d = target - (p + 1);
+ if( d < -128 || d >= 128 ) ASSERT(d);
+ *(char*)(ctx->startBuf + p) = (char)d;
+ } else {
+ *(int*)(ctx->startBuf + p) = target - (p + 4);
+ }
+}
+
+static int stack_size( hl_type *t ) {
+ switch( t->kind ) {
+ case HUI8:
+ case HUI16:
+ case HBOOL:
+# ifdef HL_64
+ case HI32:
+ case HF32:
+# endif
+ return sizeof(int_val);
+ case HI64:
+ default:
+ return hl_type_size(t);
+ }
+}
+
+static int call_reg_index( int reg ) {
+# ifdef HL_64
+ int i;
+ for(i=0;ikind == RFPU )
+ return p->id < CALL_NREGS;
+ for(i=0;ikind == RCPU && p->id == CALL_REGS[i] )
+ return true;
+ return false;
+# else
+ return false;
+# endif
+}
+
+static preg *alloc_reg( jit_ctx *ctx, preg_kind k ) {
+ int i;
+ preg *p;
+ switch( k ) {
+ case RCPU:
+ case RCPU_CALL:
+ case RCPU_8BITS:
+ {
+ int off = ctx->allocOffset++;
+ const int count = RCPU_SCRATCH_COUNT;
+ for(i=0;ipregs + r;
+ if( p->lock >= ctx->currentPos ) continue;
+ if( k == RCPU_CALL && is_call_reg(p) ) continue;
+ if( k == RCPU_8BITS && !is_reg8(p) ) continue;
+ if( p->holds == NULL ) {
+ RLOCK(p);
+ return p;
+ }
+ }
+ for(i=0;ipregs + RCPU_SCRATCH_REGS[(i + off)%count];
+ if( p->lock >= ctx->currentPos ) continue;
+ if( k == RCPU_CALL && is_call_reg(p) ) continue;
+ if( k == RCPU_8BITS && !is_reg8(p) ) continue;
+ if( p->holds ) {
+ RLOCK(p);
+ p->holds->current = NULL;
+ p->holds = NULL;
+ return p;
+ }
+ }
+ }
+ break;
+ case RFPU:
+ {
+ int off = ctx->allocOffset++;
+ const int count = RFPU_SCRATCH_COUNT;
+ for(i=0;ilock >= ctx->currentPos ) continue;
+ if( p->holds == NULL ) {
+ RLOCK(p);
+ return p;
+ }
+ }
+ for(i=0;ilock >= ctx->currentPos ) continue;
+ if( p->holds ) {
+ RLOCK(p);
+ p->holds->current = NULL;
+ p->holds = NULL;
+ return p;
+ }
+ }
+ }
+ break;
+ default:
+ ASSERT(k);
+ }
+ ASSERT(0); // out of registers !
+ return NULL;
+}
+
+static preg *fetch( vreg *r ) {
+ if( r->current )
+ return r->current;
+ return &r->stack;
+}
+
+static void scratch( preg *r ) {
+ if( r && r->holds ) {
+ r->holds->current = NULL;
+ r->holds = NULL;
+ r->lock = 0;
+ }
+}
+
+static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size );
+
+static void load( jit_ctx *ctx, preg *r, vreg *v ) {
+ preg *from = fetch(v);
+ if( from == r || v->size == 0 ) return;
+ if( r->holds ) r->holds->current = NULL;
+ if( v->current ) {
+ v->current->holds = NULL;
+ from = r;
+ }
+ r->holds = v;
+ v->current = r;
+ copy(ctx,r,from,v->size);
+}
+
+static preg *alloc_fpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
+ preg *p = fetch(r);
+ if( p->kind != RFPU ) {
+ if( !IS_FLOAT(r) && (IS_64 || r->t->kind != HI64) ) ASSERT(r->t->kind);
+ p = alloc_reg(ctx, RFPU);
+ if( andLoad )
+ load(ctx,p,r);
+ else {
+ if( r->current )
+ r->current->holds = NULL;
+ r->current = p;
+ p->holds = r;
+ }
+ } else
+ RLOCK(p);
+ return p;
+}
+
+static void reg_bind( vreg *r, preg *p ) {
+ if( r->current )
+ r->current->holds = NULL;
+ r->current = p;
+ p->holds = r;
+}
+
+static preg *alloc_cpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
+ preg *p = fetch(r);
+ if( p->kind != RCPU ) {
+# ifndef HL_64
+ if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,andLoad);
+ if( r->size > 4 ) ASSERT(r->size);
+# endif
+ p = alloc_reg(ctx, RCPU);
+ if( andLoad )
+ load(ctx,p,r);
+ else
+ reg_bind(r,p);
+ } else
+ RLOCK(p);
+ return p;
+}
+
+// allocate a register that is not a call parameter
+static preg *alloc_cpu_call( jit_ctx *ctx, vreg *r ) {
+ preg *p = fetch(r);
+ if( p->kind != RCPU ) {
+# ifndef HL_64
+ if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,true);
+ if( r->size > 4 ) ASSERT(r->size);
+# endif
+ p = alloc_reg(ctx, RCPU_CALL);
+ load(ctx,p,r);
+ } else if( is_call_reg(p) ) {
+ preg *p2 = alloc_reg(ctx, RCPU_CALL);
+ op64(ctx,MOV,p2,p);
+ scratch(p);
+ reg_bind(r,p2);
+ return p2;
+ } else
+ RLOCK(p);
+ return p;
+}
+
+static preg *fetch32( jit_ctx *ctx, vreg *r ) {
+ if( r->current )
+ return r->current;
+ // make sure that the register is correctly erased
+ if( r->size < 4 ) {
+ preg *p = alloc_cpu(ctx, r, true);
+ RUNLOCK(p);
+ return p;
+ }
+ return fetch(r);
+}
+
+// make sure higher bits are zeroes
+static preg *alloc_cpu64( jit_ctx *ctx, vreg *r, bool andLoad ) {
+# ifndef HL_64
+ return alloc_cpu(ctx,r,andLoad);
+# else
+ preg *p = fetch(r);
+ if( !andLoad ) ASSERT(0);
+ if( p->kind != RCPU ) {
+ p = alloc_reg(ctx, RCPU);
+ op64(ctx,XOR,p,p);
+ load(ctx,p,r);
+ } else {
+ // remove higher bits
+ preg tmp;
+ op64(ctx,SHL,p,pconst(&tmp,32));
+ op64(ctx,SHR,p,pconst(&tmp,32));
+ RLOCK(p);
+ }
+ return p;
+# endif
+}
+
+// make sure the register can be used with 8 bits access
+static preg *alloc_cpu8( jit_ctx *ctx, vreg *r, bool andLoad ) {
+ preg *p = fetch(r);
+ if( p->kind != RCPU ) {
+ p = alloc_reg(ctx, RCPU_8BITS);
+ load(ctx,p,r);
+ } else if( !is_reg8(p) ) {
+ preg *p2 = alloc_reg(ctx, RCPU_8BITS);
+ op64(ctx,MOV,p2,p);
+ scratch(p);
+ reg_bind(r,p2);
+ return p2;
+ } else
+ RLOCK(p);
+ return p;
+}
+
+static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size ) {
+ if( size == 0 || to == from ) return to;
+ switch( ID2(to->kind,from->kind) ) {
+ case ID2(RMEM,RCPU):
+ case ID2(RSTACK,RCPU):
+ case ID2(RCPU,RSTACK):
+ case ID2(RCPU,RMEM):
+ case ID2(RCPU,RCPU):
+# ifndef HL_64
+ case ID2(RCPU,RADDR):
+ case ID2(RADDR,RCPU):
+# endif
+ switch( size ) {
+ case 1:
+ if( to->kind == RCPU ) {
+ op64(ctx,XOR,to,to);
+ if( !is_reg8(to) ) {
+ preg p;
+ op32(ctx,MOV16,to,from);
+ op32(ctx,SHL,to,pconst(&p,24));
+ op32(ctx,SHR,to,pconst(&p,24));
+ break;
+ }
+ }
+ if( !is_reg8(from) ) {
+ preg *r = alloc_reg(ctx, RCPU_CALL);
+ op32(ctx, MOV, r, from);
+ RUNLOCK(r);
+ op32(ctx,MOV8,to,r);
+ return from;
+ }
+ op32(ctx,MOV8,to,from);
+ break;
+ case 2:
+ if( to->kind == RCPU )
+ op64(ctx,XOR,to,to);
+ op32(ctx,MOV16,to,from);
+ break;
+ case 4:
+ op32(ctx,MOV,to,from);
+ break;
+ case 8:
+ if( IS_64 ) {
+ op64(ctx,MOV,to,from);
+ break;
+ }
+ default:
+ ASSERT(size);
+ }
+ return to->kind == RCPU ? to : from;
+ case ID2(RFPU,RFPU):
+ case ID2(RMEM,RFPU):
+ case ID2(RSTACK,RFPU):
+ case ID2(RFPU,RMEM):
+ case ID2(RFPU,RSTACK):
+ switch( size ) {
+ case 8:
+ op64(ctx,MOVSD,to,from);
+ break;
+ case 4:
+ op32(ctx,MOVSS,to,from);
+ break;
+ default:
+ ASSERT(size);
+ }
+ return to->kind == RFPU ? to : from;
+ case ID2(RMEM,RSTACK):
+ {
+ vreg *rfrom = R(from->id);
+ if( IS_FLOAT(rfrom) )
+ return copy(ctx,to,alloc_fpu(ctx,rfrom,true),size);
+ return copy(ctx,to,alloc_cpu(ctx,rfrom,true),size);
+ }
+ case ID2(RMEM,RMEM):
+ case ID2(RSTACK,RMEM):
+ case ID2(RSTACK,RSTACK):
+# ifndef HL_64
+ case ID2(RMEM,RADDR):
+ case ID2(RSTACK,RADDR):
+ case ID2(RADDR,RSTACK):
+# endif
+ {
+ preg *tmp;
+ if( (!IS_64 && size == 8) || (to->kind == RSTACK && IS_FLOAT(R(to->id))) || (from->kind == RSTACK && IS_FLOAT(R(from->id))) ) {
+ tmp = alloc_reg(ctx, RFPU);
+ op64(ctx,size == 8 ? MOVSD : MOVSS,tmp,from);
+ } else {
+ tmp = alloc_reg(ctx, RCPU);
+ copy(ctx,tmp,from,size);
+ }
+ return copy(ctx,to,tmp,size);
+ }
+# ifdef HL_64
+ case ID2(RCPU,RADDR):
+ case ID2(RMEM,RADDR):
+ case ID2(RSTACK,RADDR):
+ {
+ preg p;
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx,MOV,tmp,pconst64(&p,(int_val)from->holds));
+ return copy(ctx,to,pmem(&p,tmp->id,0),size);
+ }
+ case ID2(RADDR,RCPU):
+ case ID2(RADDR,RMEM):
+ case ID2(RADDR,RSTACK):
+ {
+ preg p;
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx,MOV,tmp,pconst64(&p,(int_val)to->holds));
+ return copy(ctx,pmem(&p,tmp->id,0),from,size);
+ }
+# endif
+ default:
+ break;
+ }
+ printf("copy(%s,%s)\n",KNAMES[to->kind], KNAMES[from->kind]);
+ ASSERT(0);
+ return NULL;
+}
+
+static void store( jit_ctx *ctx, vreg *r, preg *v, bool bind ) {
+ if( r->current && r->current != v ) {
+ r->current->holds = NULL;
+ r->current = NULL;
+ }
+ v = copy(ctx,&r->stack,v,r->size);
+ if( IS_FLOAT(r) != (v->kind == RFPU) )
+ ASSERT(0);
+ if( bind && r->current != v && (v->kind == RCPU || v->kind == RFPU) ) {
+ scratch(v);
+ r->current = v;
+ v->holds = r;
+ }
+}
+
+static void store_result( jit_ctx *ctx, vreg *r ) {
+# ifndef HL_64
+ switch( r->t->kind ) {
+ case HF64:
+ scratch(r->current);
+ op64(ctx,FSTP,&r->stack,UNUSED);
+ break;
+ case HF32:
+ scratch(r->current);
+ op64(ctx,FSTP32,&r->stack,UNUSED);
+ break;
+ case HI64:
+ scratch(r->current);
+ error_i64();
+ break;
+ default:
+# endif
+ store(ctx,r,IS_FLOAT(r) ? REG_AT(XMM(0)) : PEAX,true);
+# ifndef HL_64
+ break;
+ }
+# endif
+}
+
+static void op_mov( jit_ctx *ctx, vreg *to, vreg *from ) {
+ preg *r = fetch(from);
+# ifndef HL_64
+ if( to->t->kind == HI64 ) {
+ error_i64();
+ return;
+ }
+# endif
+ if( from->t->kind == HF32 && r->kind != RFPU )
+ r = alloc_fpu(ctx,from,true);
+ store(ctx, to, r, true);
+}
+
+static void copy_to( jit_ctx *ctx, vreg *to, preg *from ) {
+ store(ctx,to,from,true);
+}
+
+static void copy_from( jit_ctx *ctx, preg *to, vreg *from ) {
+ copy(ctx,to,fetch(from),from->size);
+}
+
+static void store_const( jit_ctx *ctx, vreg *r, int c ) {
+ preg p;
+ if( c == 0 )
+ op(ctx,XOR,alloc_cpu(ctx,r,false),alloc_cpu(ctx,r,false),r->size == 8);
+ else if( r->size == 8 )
+ op64(ctx,MOV,alloc_cpu(ctx,r,false),pconst64(&p,c));
+ else
+ op32(ctx,MOV,alloc_cpu(ctx,r,false),pconst(&p,c));
+ store(ctx,r,r->current,false);
+}
+
+static void discard_regs( jit_ctx *ctx, bool native_call ) {
+ int i;
+ for(i=0;ipregs + RCPU_SCRATCH_REGS[i];
+ if( r->holds ) {
+ r->holds->current = NULL;
+ r->holds = NULL;
+ }
+ }
+ for(i=0;ipregs + XMM(i);
+ if( r->holds ) {
+ r->holds->current = NULL;
+ r->holds = NULL;
+ }
+ }
+}
+
+static int pad_before_call( jit_ctx *ctx, int size ) {
+ int total = size + ctx->totalRegsSize + HL_WSIZE * 2; // EIP+EBP
+ if( total & 15 ) {
+ int pad = 16 - (total & 15);
+ preg p;
+ if( pad ) op64(ctx,SUB,PESP,pconst(&p,pad));
+ size += pad;
+ }
+ return size;
+}
+
+static void push_reg( jit_ctx *ctx, vreg *r ) {
+ preg p;
+ switch( stack_size(r->t) ) {
+ case 1:
+ op64(ctx,SUB,PESP,pconst(&p,1));
+ op32(ctx,MOV8,pmem(&p,Esp,0),alloc_cpu8(ctx,r,true));
+ break;
+ case 2:
+ op64(ctx,SUB,PESP,pconst(&p,2));
+ op32(ctx,MOV16,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
+ break;
+ case 4:
+ if( r->size < 4 )
+ alloc_cpu(ctx,r,true); // force fetch (higher bits set to 0)
+ if( !IS_64 ) {
+ if( r->current != NULL && r->current->kind == RFPU ) scratch(r->current);
+ op32(ctx,PUSH,fetch(r),UNUSED);
+ } else {
+ // pseudo push32 (not available)
+ op64(ctx,SUB,PESP,pconst(&p,4));
+ op32(ctx,MOV,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
+ }
+ break;
+ case 8:
+ if( fetch(r)->kind == RFPU ) {
+ op64(ctx,SUB,PESP,pconst(&p,8));
+ op64(ctx,MOVSD,pmem(&p,Esp,0),fetch(r));
+ } else if( IS_64 )
+ op64(ctx,PUSH,fetch(r),UNUSED);
+ else if( r->stack.kind == RSTACK ) {
+ scratch(r->current);
+ r->stackPos += 4;
+ op32(ctx,PUSH,&r->stack,UNUSED);
+ r->stackPos -= 4;
+ op32(ctx,PUSH,&r->stack,UNUSED);
+ } else
+ ASSERT(0);
+ break;
+ default:
+ ASSERT(r->size);
+ }
+}
+
+static int begin_native_call( jit_ctx *ctx, int nargs ) {
+ ctx->nativeArgsCount = nargs;
+ return pad_before_call(ctx, nargs > CALL_NREGS ? (nargs - CALL_NREGS) * HL_WSIZE : 0);
+}
+
+static preg *alloc_native_arg( jit_ctx *ctx ) {
+# ifdef HL_64
+ int rid = ctx->nativeArgsCount - 1;
+ preg *r = rid < CALL_NREGS ? REG_AT(CALL_REGS[rid]) : alloc_reg(ctx,RCPU_CALL);
+ scratch(r);
+ return r;
+# else
+ return alloc_reg(ctx, RCPU);
+# endif
+}
+
+static void set_native_arg( jit_ctx *ctx, preg *r ) {
+ if( r->kind == RSTACK ) {
+ vreg *v = ctx->vregs + r->id;
+ if( v->size < 4 )
+ r = fetch32(ctx, v);
+ }
+# ifdef HL_64
+ if( r->kind == RFPU ) ASSERT(0);
+ int rid = --ctx->nativeArgsCount;
+ preg *target;
+ if( rid >= CALL_NREGS ) {
+ op64(ctx,PUSH,r,UNUSED);
+ return;
+ }
+ target = REG_AT(CALL_REGS[rid]);
+ if( target != r ) {
+ op64(ctx, MOV, target, r);
+ scratch(target);
+ }
+# else
+ op32(ctx,PUSH,r,UNUSED);
+# endif
+}
+
+static void set_native_arg_fpu( jit_ctx *ctx, preg *r, bool isf32 ) {
+# ifdef HL_64
+ if( r->kind == RCPU ) ASSERT(0);
+ // can only be used if last argument !!
+ ctx->nativeArgsCount--;
+ preg *target = REG_AT(XMM(IS_WINCALL64 ? ctx->nativeArgsCount : 0));
+ if( target != r ) {
+ op64(ctx, isf32 ? MOVSS : MOVSD, target, r);
+ scratch(target);
+ }
+# else
+ op32(ctx,PUSH,r,UNUSED);
+# endif
+}
+
+typedef struct {
+ int nextCpu;
+ int nextFpu;
+ int mapped[REG_COUNT];
+} call_regs;
+
+static int select_call_reg( call_regs *regs, hl_type *t, int id ) {
+# ifndef HL_64
+ return -1;
+#else
+ bool isFloat = t->kind == HF32 || t->kind == HF64;
+# ifdef HL_WIN_CALL
+ int index = regs->nextCpu++;
+# else
+ int index = isFloat ? regs->nextFpu++ : regs->nextCpu++;
+# endif
+ if( index >= CALL_NREGS )
+ return -1;
+ int reg = isFloat ? XMM(index) : CALL_REGS[index];
+ regs->mapped[reg] = id + 1;
+ return reg;
+#endif
+}
+
+static int mapped_reg( call_regs *regs, int id ) {
+# ifndef HL_64
+ return -1;
+#else
+ int i;
+ for(i=0;imapped[r] == id + 1 ) return r;
+ r = XMM(i);
+ if( regs->mapped[r] == id + 1 ) return r;
+ }
+ return -1;
+#endif
+}
+
+static int prepare_call_args( jit_ctx *ctx, int count, int *args, vreg *vregs, int extraSize ) {
+ int i;
+ int size = extraSize, paddedSize;
+ call_regs ctmp = {0};
+ for(i=0;it, i);
+ if( cr >= 0 ) {
+ preg *c = REG_AT(cr);
+ preg *cur = fetch(r);
+ if( cur != c ) {
+ copy(ctx,c,cur,r->size);
+ scratch(c);
+ }
+ RLOCK(c);
+ continue;
+ }
+ size += stack_size(r->t);
+ }
+ paddedSize = pad_before_call(ctx,size);
+ for(i=0;i= 0 ) continue;
+ push_reg(ctx,r);
+ if( r->current ) RUNLOCK(r->current);
+ }
+ return paddedSize;
+}
+
+static void op_call( jit_ctx *ctx, preg *r, int size ) {
+ preg p;
+# ifdef JIT_DEBUG
+ if( IS_64 && size >= 0 ) {
+ int jchk;
+ op32(ctx,TEST,PESP,pconst(&p,15));
+ XJump(JZero,jchk);
+ BREAK(); // unaligned ESP
+ patch_jump(ctx, jchk);
+ }
+# endif
+ if( IS_WINCALL64 ) {
+ // MSVC requires 32bytes of free space here
+ op64(ctx,SUB,PESP,pconst(&p,32));
+ if( size >= 0 ) size += 32;
+ }
+ op32(ctx, CALL, r, UNUSED);
+ if( size > 0 ) op64(ctx,ADD,PESP,pconst(&p,size));
+}
+
+static void call_native( jit_ctx *ctx, void *nativeFun, int size ) {
+ bool isExc = nativeFun == hl_assert || nativeFun == hl_throw || nativeFun == on_jit_error;
+ preg p;
+ // native function, already resolved
+ op64(ctx,MOV,PEAX,pconst64(&p,(int_val)nativeFun));
+ op_call(ctx,PEAX, isExc ? -1 : size);
+ if( isExc )
+ return;
+ discard_regs(ctx, true);
+}
+
+static void op_call_fun( jit_ctx *ctx, vreg *dst, int findex, int count, int *args ) {
+ int fid = findex < 0 ? -1 : ctx->m->functions_indexes[findex];
+ bool isNative = fid >= ctx->m->code->nfunctions;
+ int size = prepare_call_args(ctx,count,args,ctx->vregs,0);
+ preg p;
+ if( fid < 0 ) {
+ ASSERT(fid);
+ } else if( isNative ) {
+ call_native(ctx,ctx->m->functions_ptrs[findex],size);
+ } else {
+ int cpos = BUF_POS() + (IS_WINCALL64 ? 4 : 0);
+# ifdef JIT_DEBUG
+ if( IS_64 ) cpos += 13; // ESP CHECK
+# endif
+ if( ctx->m->functions_ptrs[findex] ) {
+ // already compiled
+ op_call(ctx,pconst(&p,(int)(int_val)ctx->m->functions_ptrs[findex] - (cpos + 5)), size);
+ } else if( ctx->m->code->functions + fid == ctx->f ) {
+ // our current function
+ op_call(ctx,pconst(&p, ctx->functionPos - (cpos + 5)), size);
+ } else {
+ // stage for later
+ jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
+ j->pos = cpos;
+ j->target = findex;
+ j->next = ctx->calls;
+ ctx->calls = j;
+ op_call(ctx,pconst(&p,0), size);
+ }
+ discard_regs(ctx, false);
+ }
+ if( dst )
+ store_result(ctx,dst);
+}
+
+static void op_enter( jit_ctx *ctx ) {
+ preg p;
+ op64(ctx, PUSH, PEBP, UNUSED);
+ op64(ctx, MOV, PEBP, PESP);
+ if( ctx->totalRegsSize ) op64(ctx, SUB, PESP, pconst(&p,ctx->totalRegsSize));
+}
+
+static void op_ret( jit_ctx *ctx, vreg *r ) {
+ preg p;
+ switch( r->t->kind ) {
+ case HF32:
+# ifdef HL_64
+ op64(ctx, MOVSS, PXMM(0), fetch(r));
+# else
+ op64(ctx,FLD32,&r->stack,UNUSED);
+# endif
+ break;
+ case HF64:
+# ifdef HL_64
+ op64(ctx, MOVSD, PXMM(0), fetch(r));
+# else
+ op64(ctx,FLD,&r->stack,UNUSED);
+# endif
+ break;
+ default:
+ if( r->size < 4 && !r->current )
+ fetch32(ctx, r);
+ if( r->current != PEAX )
+ op64(ctx,MOV,PEAX,fetch(r));
+ break;
+ }
+ if( ctx->totalRegsSize ) op64(ctx, ADD, PESP, pconst(&p, ctx->totalRegsSize));
+# ifdef JIT_DEBUG
+ {
+ int jeq;
+ op64(ctx, CMP, PESP, PEBP);
+ XJump_small(JEq,jeq);
+ jit_error("invalid ESP");
+ patch_jump(ctx,jeq);
+ }
+# endif
+ op64(ctx, POP, PEBP, UNUSED);
+ op64(ctx, RET, UNUSED, UNUSED);
+}
+
+static void call_native_consts( jit_ctx *ctx, void *nativeFun, int_val *args, int nargs ) {
+ int size = pad_before_call(ctx, IS_64 ? 0 : HL_WSIZE*nargs);
+ preg p;
+ int i;
+# ifdef HL_64
+ for(i=0;i=0;i--)
+ op32(ctx, PUSH, pconst64(&p, args[i]), UNUSED);
+# endif
+ call_native(ctx, nativeFun, size);
+}
+
+static void on_jit_error( const char *msg, int_val line ) {
+ char buf[256];
+ int iline = (int)line;
+ sprintf(buf,"%s (line %d)",msg,iline);
+#ifdef HL_WIN_DESKTOP
+ MessageBoxA(NULL,buf,"JIT ERROR",MB_OK);
+#else
+ printf("JIT ERROR : %s\n",buf);
+#endif
+ hl_debug_break();
+ hl_throw(NULL);
+}
+
+static void _jit_error( jit_ctx *ctx, const char *msg, int line ) {
+ int_val args[2] = { (int_val)msg, (int_val)line };
+ call_native_consts(ctx,on_jit_error,args,2);
+}
+
+
+static preg *op_binop( jit_ctx *ctx, vreg *dst, vreg *a, vreg *b, hl_op bop ) {
+ preg *pa = fetch(a), *pb = fetch(b), *out = NULL;
+ CpuOp o;
+ if( IS_FLOAT(a) ) {
+ bool isf32 = a->t->kind == HF32;
+ switch( bop ) {
+ case OAdd: o = isf32 ? ADDSS : ADDSD; break;
+ case OSub: o = isf32 ? SUBSS : SUBSD; break;
+ case OMul: o = isf32 ? MULSS : MULSD; break;
+ case OSDiv: o = isf32 ? DIVSS : DIVSD; break;
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJEq:
+ case OJNotEq:
+ case OJNotLt:
+ case OJNotGte:
+ o = isf32 ? COMISS : COMISD;
+ break;
+ case OSMod:
+ {
+ int args[] = { a->stack.id, b->stack.id };
+ int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
+ void *mod_fun;
+ if( isf32 ) mod_fun = fmodf; else mod_fun = fmod;
+ call_native(ctx,mod_fun,size);
+ store_result(ctx,dst);
+ return fetch(dst);
+ }
+ default:
+ printf("%s\n", hl_op_name(bop));
+ ASSERT(bop);
+ }
+ } else {
+ bool is64 = a->t->kind == HI64;
+# ifndef HL_64
+ if( is64 ) {
+ error_i64();
+ return fetch(a);
+ }
+# endif
+ switch( bop ) {
+ case OAdd: o = ADD; break;
+ case OSub: o = SUB; break;
+ case OMul: o = IMUL; break;
+ case OAnd: o = AND; break;
+ case OOr: o = OR; break;
+ case OXor: o = XOR; break;
+ case OShl:
+ case OUShr:
+ case OSShr:
+ if( !b->current || b->current->kind != RCPU || b->current->id != Ecx ) {
+ scratch(REG_AT(Ecx));
+ op(ctx,MOV,REG_AT(Ecx),pb,is64);
+ RLOCK(REG_AT(Ecx));
+ pa = fetch(a);
+ } else
+ RLOCK(b->current);
+ if( pa->kind != RCPU ) {
+ pa = alloc_reg(ctx, RCPU);
+ op(ctx,MOV,pa,fetch(a), is64);
+ }
+ op(ctx,bop == OShl ? SHL : (bop == OUShr ? SHR : SAR), pa, UNUSED,is64);
+ if( dst ) store(ctx, dst, pa, true);
+ return pa;
+ case OSDiv:
+ case OUDiv:
+ case OSMod:
+ case OUMod:
+ {
+ preg *out = bop == OSMod || bop == OUMod ? REG_AT(Edx) : PEAX;
+ preg *r = pb;
+ preg p;
+ int jz, jz1 = 0, jend;
+ if( pa->kind == RCPU && pa->id == Eax ) RLOCK(pa);
+ // ensure b in CPU reg and not in Eax/Edx (for UI8/UI16)
+ if( pb->kind != RCPU || (pb->id == Eax || pb->id == Edx) ) {
+ scratch(REG_AT(Ecx));
+ scratch(pb);
+ load(ctx,REG_AT(Ecx),b);
+ r = REG_AT(Ecx);
+ }
+ // integer div 0 => 0
+ op(ctx,TEST,r,r,is64);
+ XJump_small(JZero, jz);
+ // Prevent MIN/-1 overflow exception
+ // OSMod: r = (b == 0 || b == -1) ? 0 : a % b
+ // OSDiv: r = (b == 0 || b == -1) ? a * b : a / b
+ if( bop == OSMod || bop == OSDiv ) {
+ op(ctx, CMP, r, pconst(&p,-1), is64);
+ XJump_small(JEq, jz1);
+ }
+ pa = fetch(a);
+ if( pa->kind != RCPU || pa->id != Eax ) {
+ scratch(PEAX);
+ scratch(pa);
+ load(ctx,PEAX,a);
+ }
+ scratch(REG_AT(Edx));
+ scratch(REG_AT(Eax));
+ if( bop == OUDiv || bop == OUMod )
+ op(ctx, XOR, REG_AT(Edx), REG_AT(Edx), is64);
+ else
+ op(ctx, CDQ, UNUSED, UNUSED, is64); // sign-extend Eax into Eax:Edx
+ op(ctx, bop == OUDiv || bop == OUMod ? DIV : IDIV, r, UNUSED, is64);
+ XJump_small(JAlways, jend);
+ patch_jump(ctx, jz);
+ patch_jump(ctx, jz1);
+ if( bop != OSDiv ) {
+ op(ctx, XOR, out, out, is64);
+ } else {
+ load(ctx, out, a);
+ op(ctx, IMUL, out, r, is64);
+ }
+ patch_jump(ctx, jend);
+ if( dst ) store(ctx, dst, out, true);
+ return out;
+ }
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJULt:
+ case OJUGte:
+ case OJEq:
+ case OJNotEq:
+ switch( a->t->kind ) {
+ case HUI8:
+ case HBOOL:
+ o = CMP8;
+ break;
+ case HUI16:
+ o = CMP16;
+ break;
+ default:
+ o = CMP;
+ break;
+ }
+ break;
+ default:
+ printf("%s\n", hl_op_name(bop));
+ ASSERT(bop);
+ }
+ }
+ switch( RTYPE(a) ) {
+ case HI32:
+ case HUI8:
+ case HUI16:
+ case HBOOL:
+# ifndef HL_64
+ case HDYNOBJ:
+ case HVIRTUAL:
+ case HOBJ:
+ case HSTRUCT:
+ case HFUN:
+ case HMETHOD:
+ case HBYTES:
+ case HNULL:
+ case HENUM:
+ case HDYN:
+ case HTYPE:
+ case HABSTRACT:
+ case HARRAY:
+# endif
+ switch( ID2(pa->kind, pb->kind) ) {
+ case ID2(RCPU,RCPU):
+ case ID2(RCPU,RSTACK):
+ op32(ctx, o, pa, pb);
+ scratch(pa);
+ out = pa;
+ break;
+ case ID2(RSTACK,RCPU):
+ if( dst == a && o != IMUL ) {
+ op32(ctx, o, pa, pb);
+ dst = NULL;
+ out = pa;
+ } else {
+ alloc_cpu(ctx,a, true);
+ return op_binop(ctx,dst,a,b,bop);
+ }
+ break;
+ case ID2(RSTACK,RSTACK):
+ alloc_cpu(ctx, a, true);
+ return op_binop(ctx, dst, a, b, bop);
+ default:
+ printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
+ ASSERT(ID2(pa->kind, pb->kind));
+ }
+ if( dst ) store(ctx, dst, out, true);
+ return out;
+# ifdef HL_64
+ case HOBJ:
+ case HSTRUCT:
+ case HDYNOBJ:
+ case HVIRTUAL:
+ case HFUN:
+ case HMETHOD:
+ case HBYTES:
+ case HNULL:
+ case HENUM:
+ case HDYN:
+ case HTYPE:
+ case HABSTRACT:
+ case HARRAY:
+ case HI64:
+ case HGUID:
+ switch( ID2(pa->kind, pb->kind) ) {
+ case ID2(RCPU,RCPU):
+ case ID2(RCPU,RSTACK):
+ op64(ctx, o, pa, pb);
+ scratch(pa);
+ out = pa;
+ break;
+ case ID2(RSTACK,RCPU):
+ if( dst == a && OP_FORMS[o].mem_r ) {
+ op64(ctx, o, pa, pb);
+ dst = NULL;
+ out = pa;
+ } else {
+ alloc_cpu(ctx,a, true);
+ return op_binop(ctx,dst,a,b,bop);
+ }
+ break;
+ case ID2(RSTACK,RSTACK):
+ alloc_cpu(ctx, a, true);
+ return op_binop(ctx, dst, a, b, bop);
+ default:
+ printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
+ ASSERT(ID2(pa->kind, pb->kind));
+ }
+ if( dst ) store(ctx, dst, out, true);
+ return out;
+# endif
+ case HF64:
+ case HF32:
+ pa = alloc_fpu(ctx, a, true);
+ pb = alloc_fpu(ctx, b, true);
+ switch( ID2(pa->kind, pb->kind) ) {
+ case ID2(RFPU,RFPU):
+ op64(ctx,o,pa,pb);
+ if( (o == COMISD || o == COMISS) && bop != OJSGt ) {
+ int jnotnan;
+ XJump_small(JNParity,jnotnan);
+ switch( bop ) {
+ case OJSLt:
+ case OJNotLt:
+ {
+ preg *r = alloc_reg(ctx,RCPU);
+ // set CF=0, ZF=1
+ op64(ctx,XOR,r,r);
+ RUNLOCK(r);
+ break;
+ }
+ case OJSGte:
+ case OJNotGte:
+ {
+ preg *r = alloc_reg(ctx,RCPU);
+ // set ZF=0, CF=1
+ op64(ctx,XOR,r,r);
+ op64(ctx,CMP,r,PESP);
+ RUNLOCK(r);
+ break;
+ }
+ break;
+ case OJNotEq:
+ case OJEq:
+ // set ZF=0, CF=?
+ case OJSLte:
+ // set ZF=0, CF=0
+ op64(ctx,TEST,PESP,PESP);
+ break;
+ default:
+ ASSERT(bop);
+ }
+ patch_jump(ctx,jnotnan);
+ }
+ scratch(pa);
+ out = pa;
+ break;
+ default:
+ printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
+ ASSERT(ID2(pa->kind, pb->kind));
+ }
+ if( dst ) store(ctx, dst, out, true);
+ return out;
+ default:
+ ASSERT(RTYPE(a));
+ }
+ return NULL;
+}
+
+static int do_jump( jit_ctx *ctx, hl_op op, bool isFloat ) {
+ int j;
+ switch( op ) {
+ case OJAlways:
+ XJump(JAlways,j);
+ break;
+ case OJSGte:
+ XJump(isFloat ? JUGte : JSGte,j);
+ break;
+ case OJSGt:
+ XJump(isFloat ? JUGt : JSGt,j);
+ break;
+ case OJUGte:
+ XJump(JUGte,j);
+ break;
+ case OJSLt:
+ XJump(isFloat ? JULt : JSLt,j);
+ break;
+ case OJSLte:
+ XJump(isFloat ? JULte : JSLte,j);
+ break;
+ case OJULt:
+ XJump(JULt,j);
+ break;
+ case OJEq:
+ XJump(JEq,j);
+ break;
+ case OJNotEq:
+ XJump(JNeq,j);
+ break;
+ case OJNotLt:
+ XJump(JUGte,j);
+ break;
+ case OJNotGte:
+ XJump(JULt,j);
+ break;
+ default:
+ j = 0;
+ printf("Unknown JUMP %d\n",op);
+ break;
+ }
+ return j;
+}
+
+static void register_jump( jit_ctx *ctx, int pos, int target ) {
+ jlist *j = (jlist*)hl_malloc(&ctx->falloc, sizeof(jlist));
+ j->pos = pos;
+ j->target = target;
+ j->next = ctx->jumps;
+ ctx->jumps = j;
+ if( target != 0 && ctx->opsPos[target] == 0 )
+ ctx->opsPos[target] = -1;
+}
+
+#define HDYN_VALUE 8
+
+static void dyn_value_compare( jit_ctx *ctx, preg *a, preg *b, hl_type *t ) {
+ preg p;
+ switch( t->kind ) {
+ case HUI8:
+ case HBOOL:
+ op32(ctx,MOV8,a,pmem(&p,a->id,HDYN_VALUE));
+ op32(ctx,MOV8,b,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,CMP8,a,b);
+ break;
+ case HUI16:
+ op32(ctx,MOV16,a,pmem(&p,a->id,HDYN_VALUE));
+ op32(ctx,MOV16,b,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,CMP16,a,b);
+ break;
+ case HI32:
+ op32(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
+ op32(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,CMP,a,b);
+ break;
+ case HF32:
+ {
+ preg *fa = alloc_reg(ctx, RFPU);
+ preg *fb = alloc_reg(ctx, RFPU);
+ op64(ctx,MOVSS,fa,pmem(&p,a->id,HDYN_VALUE));
+ op64(ctx,MOVSS,fb,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,COMISD,fa,fb);
+ }
+ break;
+ case HF64:
+ {
+ preg *fa = alloc_reg(ctx, RFPU);
+ preg *fb = alloc_reg(ctx, RFPU);
+ op64(ctx,MOVSD,fa,pmem(&p,a->id,HDYN_VALUE));
+ op64(ctx,MOVSD,fb,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,COMISD,fa,fb);
+ }
+ break;
+ case HI64:
+ default:
+ // ptr comparison
+ op64(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
+ op64(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
+ op64(ctx,CMP,a,b);
+ break;
+ }
+}
+
+static void op_jump( jit_ctx *ctx, vreg *a, vreg *b, hl_opcode *op, int targetPos ) {
+ if( a->t->kind == HDYN || b->t->kind == HDYN || a->t->kind == HFUN || b->t->kind == HFUN ) {
+ int args[] = { a->stack.id, b->stack.id };
+ int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
+ call_native(ctx,hl_dyn_compare,size);
+ if( op->op == OJSGt || op->op == OJSGte ) {
+ preg p;
+ int jinvalid;
+ op32(ctx,CMP,PEAX,pconst(&p,hl_invalid_comparison));
+ XJump_small(JEq,jinvalid);
+ op32(ctx,TEST,PEAX,PEAX);
+ register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
+ patch_jump(ctx,jinvalid);
+ return;
+ }
+ op32(ctx,TEST,PEAX,PEAX);
+ } else switch( a->t->kind ) {
+ case HTYPE:
+ {
+ int args[] = { a->stack.id, b->stack.id };
+ int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
+ preg p;
+ call_native(ctx,hl_same_type,size);
+ op64(ctx,CMP8,PEAX,pconst(&p,1));
+ }
+ break;
+ case HNULL:
+ {
+ preg *pa = hl_type_size(a->t->tparam) == 1 ? alloc_cpu8(ctx,a,true) : alloc_cpu(ctx,a,true);
+ preg *pb = hl_type_size(b->t->tparam) == 1 ? alloc_cpu8(ctx,b,true) : alloc_cpu(ctx,b,true);
+ if( op->op == OJEq ) {
+ // if( a == b || (a && b && a->v == b->v) ) goto
+ int ja, jb;
+ // if( a != b && (!a || !b || a->v != b->v) ) goto
+ op64(ctx,CMP,pa,pb);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ dyn_value_compare(ctx,pa,pb,a->t->tparam);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ scratch(pa);
+ scratch(pb);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ } else if( op->op == OJNotEq ) {
+ int jeq, jcmp;
+ // if( a != b && (!a || !b || a->v != b->v) ) goto
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JEq,jeq);
+ op64(ctx,TEST,pa,pa);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ op64(ctx,TEST,pb,pb);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ dyn_value_compare(ctx,pa,pb,a->t->tparam);
+ XJump_small(JZero,jcmp);
+ scratch(pa);
+ scratch(pb);
+ register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
+ patch_jump(ctx,jcmp);
+ patch_jump(ctx,jeq);
+ } else
+ ASSERT(op->op);
+ return;
+ }
+ case HVIRTUAL:
+ {
+ preg p;
+ preg *pa = alloc_cpu(ctx,a,true);
+ preg *pb = alloc_cpu(ctx,b,true);
+ int ja,jb,jav,jbv,jvalue;
+ if( b->t->kind == HOBJ ) {
+ if( op->op == OJEq ) {
+ // if( a ? (b && a->value == b) : (b == NULL) ) goto
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JAlways,jvalue);
+ patch_jump(ctx,ja);
+ op64(ctx,TEST,pb,pb);
+ patch_jump(ctx,jvalue);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ patch_jump(ctx,jb);
+ } else if( op->op == OJNotEq ) {
+ // if( a ? (b == NULL || a->value != b) : (b != NULL) ) goto
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JAlways,jvalue);
+ patch_jump(ctx,ja);
+ op64(ctx,TEST,pb,pb);
+ patch_jump(ctx,jvalue);
+ register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
+ } else
+ ASSERT(op->op);
+ scratch(pa);
+ return;
+ }
+ op64(ctx,CMP,pa,pb);
+ if( op->op == OJEq ) {
+ // if( a == b || (a && b && a->value && b->value && a->value == b->value) ) goto
+ register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,jav);
+ op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jbv);
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JNeq,jvalue);
+ register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ patch_jump(ctx,jav);
+ patch_jump(ctx,jbv);
+ patch_jump(ctx,jvalue);
+ } else if( op->op == OJNotEq ) {
+ int jnext;
+ // if( a != b && (!a || !b || !a->value || !b->value || a->value != b->value) ) goto
+ XJump_small(JEq,jnext);
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,jav);
+ op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jbv);
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JEq,jvalue);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ patch_jump(ctx,jav);
+ patch_jump(ctx,jbv);
+ register_jump(ctx,do_jump(ctx,OJAlways, false),targetPos);
+ patch_jump(ctx,jnext);
+ patch_jump(ctx,jvalue);
+ } else
+ ASSERT(op->op);
+ scratch(pa);
+ scratch(pb);
+ return;
+ }
+ break;
+ case HOBJ:
+ case HSTRUCT:
+ if( b->t->kind == HVIRTUAL ) {
+ op_jump(ctx,b,a,op,targetPos); // inverse
+ return;
+ }
+ if( hl_get_obj_rt(a->t)->compareFun ) {
+ preg *pa = alloc_cpu(ctx,a,true);
+ preg *pb = alloc_cpu(ctx,b,true);
+ preg p;
+ int jeq, ja, jb, jcmp;
+ int args[] = { a->stack.id, b->stack.id };
+ switch( op->op ) {
+ case OJEq:
+ // if( a == b || (a && b && cmp(a,b) == 0) ) goto
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JEq,jeq);
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
+ op32(ctx,TEST,PEAX,PEAX);
+ XJump_small(JNotZero,jcmp);
+ patch_jump(ctx,jeq);
+ register_jump(ctx,do_jump(ctx,OJAlways,false),targetPos);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ patch_jump(ctx,jcmp);
+ break;
+ case OJNotEq:
+ // if( a != b && (!a || !b || cmp(a,b) != 0) ) goto
+ op64(ctx,CMP,pa,pb);
+ XJump_small(JEq,jeq);
+ op64(ctx,TEST,pa,pa);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+ op64(ctx,TEST,pb,pb);
+ register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
+
+ op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
+ op32(ctx,TEST,PEAX,PEAX);
+ XJump_small(JZero,jcmp);
+
+ register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
+ patch_jump(ctx,jcmp);
+ patch_jump(ctx,jeq);
+ break;
+ default:
+ // if( a && b && cmp(a,b) ?? 0 ) goto
+ op64(ctx,TEST,pa,pa);
+ XJump_small(JZero,ja);
+ op64(ctx,TEST,pb,pb);
+ XJump_small(JZero,jb);
+ op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
+ op32(ctx,CMP,PEAX,pconst(&p,0));
+ register_jump(ctx,do_jump(ctx,op->op,false),targetPos);
+ patch_jump(ctx,ja);
+ patch_jump(ctx,jb);
+ break;
+ }
+ return;
+ }
+ // fallthrough
+ default:
+ // make sure we have valid 8 bits registers
+ if( a->size == 1 ) alloc_cpu8(ctx,a,true);
+ if( b->size == 1 ) alloc_cpu8(ctx,b,true);
+ op_binop(ctx,NULL,a,b,op->op);
+ break;
+ }
+ register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
+}
+
+jit_ctx *hl_jit_alloc() {
+ int i;
+ jit_ctx *ctx = (jit_ctx*)malloc(sizeof(jit_ctx));
+ if( ctx == NULL ) return NULL;
+ memset(ctx,0,sizeof(jit_ctx));
+ hl_alloc_init(&ctx->falloc);
+ hl_alloc_init(&ctx->galloc);
+ for(i=0;iid = i;
+ r->kind = RCPU;
+ }
+ for(i=0;iid = i;
+ r->kind = RFPU;
+ }
+ return ctx;
+}
+
+void hl_jit_free( jit_ctx *ctx, h_bool can_reset ) {
+ free(ctx->vregs);
+ free(ctx->opsPos);
+ free(ctx->startBuf);
+ ctx->maxRegs = 0;
+ ctx->vregs = NULL;
+ ctx->maxOps = 0;
+ ctx->opsPos = NULL;
+ ctx->startBuf = NULL;
+ ctx->bufSize = 0;
+ ctx->buf.b = NULL;
+ ctx->calls = NULL;
+ ctx->switchs = NULL;
+ ctx->closure_list = NULL;
+ hl_free(&ctx->falloc);
+ hl_free(&ctx->galloc);
+ if( !can_reset ) free(ctx);
+}
+
+static void jit_nops( jit_ctx *ctx ) {
+ while( BUF_POS() & 15 )
+ op32(ctx, NOP, UNUSED, UNUSED);
+}
+
+#define MAX_ARGS 16
+
+static void *call_jit_c2hl = NULL;
+static void *call_jit_hl2c = NULL;
+
+static void *callback_c2hl( void *_f, hl_type *t, void **args, vdynamic *ret ) {
+ /*
+ prepare stack and regs according to prepare_call_args, but by reading runtime type information
+ from the function type. The stack and regs will be setup by the trampoline function.
+ */
+ void **f = (void**)_f;
+ unsigned char stack[MAX_ARGS * 8];
+ call_regs cregs = {0};
+ if( t->fun->nargs > MAX_ARGS )
+ hl_error("Too many arguments for dynamic call");
+ int i, size = 0, pad = 0, pos = 0;
+ for(i=0;ifun->nargs;i++) {
+ hl_type *at = t->fun->args[i];
+ int creg = select_call_reg(&cregs,at,i);
+ if( creg >= 0 )
+ continue;
+ size += stack_size(at);
+ }
+ pad = (-size) & 15;
+ size += pad;
+ pos = 0;
+ for(i=0;ifun->nargs;i++) {
+ // RTL
+ hl_type *at = t->fun->args[i];
+ void *v = args[i];
+ int creg = mapped_reg(&cregs,i);
+ void *store;
+ if( creg >= 0 ) {
+ if( REG_IS_FPU(creg) ) {
+ store = stack + size + CALL_NREGS * HL_WSIZE + (creg - XMM(0)) * sizeof(double);
+ } else {
+ store = stack + size + call_reg_index(creg) * HL_WSIZE;
+ }
+ switch( at->kind ) {
+ case HBOOL:
+ case HUI8:
+ *(int_val*)store = *(unsigned char*)v;
+ break;
+ case HUI16:
+ *(int_val*)store = *(unsigned short*)v;
+ break;
+ case HI32:
+ *(int_val*)store = *(int*)v;
+ break;
+ case HF32:
+ *(void**)store = 0;
+ *(float*)store = *(float*)v;
+ break;
+ case HF64:
+ *(double*)store = *(double*)v;
+ break;
+ case HI64:
+ case HGUID:
+ *(int64*)store = *(int64*)v;
+ break;
+ default:
+ *(void**)store = v;
+ break;
+ }
+ } else {
+ int tsize = stack_size(at);
+ store = stack + pos;
+ pos += tsize;
+ switch( at->kind ) {
+ case HBOOL:
+ case HUI8:
+ *(int*)store = *(unsigned char*)v;
+ break;
+ case HUI16:
+ *(int*)store = *(unsigned short*)v;
+ break;
+ case HI32:
+ case HF32:
+ *(int*)store = *(int*)v;
+ break;
+ case HF64:
+ *(double*)store = *(double*)v;
+ break;
+ case HI64:
+ case HGUID:
+ *(int64*)store = *(int64*)v;
+ break;
+ default:
+ *(void**)store = v;
+ break;
+ }
+ }
+ }
+ pos += pad;
+ pos >>= IS_64 ? 3 : 2;
+ switch( t->fun->ret->kind ) {
+ case HUI8:
+ case HUI16:
+ case HI32:
+ case HBOOL:
+ ret->v.i = ((int (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ return &ret->v.i;
+ case HI64:
+ case HGUID:
+ ret->v.i64 = ((int64 (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ return &ret->v.i64;
+ case HF32:
+ ret->v.f = ((float (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ return &ret->v.f;
+ case HF64:
+ ret->v.d = ((double (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ return &ret->v.d;
+ default:
+ return ((void *(*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
+ }
+}
+
+static void jit_c2hl( jit_ctx *ctx ) {
+ // create the function that will be called by callback_c2hl
+ // it will make sure to prepare the stack/regs according to native calling conventions
+ int jeq, jloop, jstart;
+ preg *fptr, *stack, *stend;
+ preg p;
+
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+
+# ifdef HL_64
+
+ fptr = REG_AT(R10);
+ stack = PEAX;
+ stend = REG_AT(R11);
+ op64(ctx, MOV, fptr, REG_AT(CALL_REGS[0]));
+ op64(ctx, MOV, stack, REG_AT(CALL_REGS[1]));
+ op64(ctx, MOV, stend, REG_AT(CALL_REGS[2]));
+
+ // set native call regs
+ int i;
+ for(i=0;iid,i*HL_WSIZE));
+ for(i=0;iid,(i+CALL_NREGS)*HL_WSIZE));
+
+# else
+
+ // make sure the stack is aligned on 16 bytes
+ // the amount of push we will do afterwards is guaranteed to be a multiple of 16bytes by hl_callback
+# ifdef HL_VCC
+ // VCC does not guarantee us an aligned stack...
+ op64(ctx,MOV,PEAX,PESP);
+ op64(ctx,AND,PEAX,pconst(&p,15));
+ op64(ctx,SUB,PESP,PEAX);
+# else
+ op64(ctx,SUB,PESP,pconst(&p,8));
+# endif
+
+ // mov arguments to regs
+ fptr = REG_AT(Eax);
+ stack = REG_AT(Edx);
+ stend = REG_AT(Ecx);
+ op64(ctx,MOV,fptr,pmem(&p,Ebp,HL_WSIZE*2));
+ op64(ctx,MOV,stack,pmem(&p,Ebp,HL_WSIZE*3));
+ op64(ctx,MOV,stend,pmem(&p,Ebp,HL_WSIZE*4));
+
+# endif
+
+ // push stack args
+ jstart = BUF_POS();
+ op64(ctx,CMP,stack,stend);
+ XJump(JEq,jeq);
+ op64(ctx,SUB,stack,pconst(&p,HL_WSIZE));
+ op64(ctx,PUSH,pmem(&p,stack->id,0),UNUSED);
+ XJump(JAlways,jloop);
+ patch_jump(ctx,jeq);
+ patch_jump_to(ctx, jloop, jstart);
+
+ op_call(ctx,fptr,0);
+
+ // cleanup and ret
+ op64(ctx,MOV,PESP,PEBP);
+ op64(ctx,POP,PEBP, UNUSED);
+ op64(ctx,RET,UNUSED,UNUSED);
+}
+
+static vdynamic *jit_wrapper_call( vclosure_wrapper *c, char *stack_args, void **regs ) {
+ vdynamic *args[MAX_ARGS];
+ int i;
+ int nargs = c->cl.t->fun->nargs;
+ call_regs cregs = {0};
+ if( nargs > MAX_ARGS )
+ hl_error("Too many arguments for wrapped call");
+ cregs.nextCpu++; // skip fptr in HL64 - was passed as arg0
+ for(i=0;icl.t->fun->args[i];
+ int creg = select_call_reg(&cregs,t,i);
+ if( creg < 0 ) {
+ args[i] = hl_is_dynamic(t) ? *(vdynamic**)stack_args : hl_make_dyn(stack_args,t);
+ stack_args += stack_size(t);
+ } else if( hl_is_dynamic(t) ) {
+ args[i] = *(vdynamic**)(regs + call_reg_index(creg));
+ } else if( t->kind == HF32 || t->kind == HF64 ) {
+ args[i] = hl_make_dyn(regs + CALL_NREGS + creg - XMM(0),&hlt_f64);
+ } else {
+ args[i] = hl_make_dyn(regs + call_reg_index(creg),t);
+ }
+ }
+ return hl_dyn_call(c->wrappedFun,args,nargs);
+}
+
+static void *jit_wrapper_ptr( vclosure_wrapper *c, char *stack_args, void **regs ) {
+ vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
+ hl_type *tret = c->cl.t->fun->ret;
+ switch( tret->kind ) {
+ case HVOID:
+ return NULL;
+ case HUI8:
+ case HUI16:
+ case HI32:
+ case HBOOL:
+ return (void*)(int_val)hl_dyn_casti(&ret,&hlt_dyn,tret);
+ case HI64:
+ case HGUID:
+ return (void*)(int_val)hl_dyn_casti64(&ret,&hlt_dyn);
+ default:
+ return hl_dyn_castp(&ret,&hlt_dyn,tret);
+ }
+}
+
+static double jit_wrapper_d( vclosure_wrapper *c, char *stack_args, void **regs ) {
+ vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
+ return hl_dyn_castd(&ret,&hlt_dyn);
+}
+
+static void jit_hl2c( jit_ctx *ctx ) {
+ // create a function that is called with a vclosure_wrapper* and native args
+ // and pack and pass the args to callback_hl2c
+ preg p;
+ int jfloat1, jfloat2, jexit;
+ hl_type_fun *ft = NULL;
+ int size;
+# ifdef HL_64
+ preg *cl = REG_AT(CALL_REGS[0]);
+ preg *tmp = REG_AT(CALL_REGS[1]);
+# else
+ preg *cl = REG_AT(Ecx);
+ preg *tmp = REG_AT(Edx);
+# endif
+
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+
+# ifdef HL_64
+ // push registers
+ int i;
+ op64(ctx,SUB,PESP,pconst(&p,CALL_NREGS*8));
+ for(i=0;it->fun->ret->kind ) {
+ // case HF32: case HF64: return jit_wrapper_d(arg0,&args);
+ // default: return jit_wrapper_ptr(arg0,&args);
+ // }
+ if( !IS_64 )
+ op64(ctx,MOV,cl,pmem(&p,Ebp,HL_WSIZE*2)); // load arg0
+ op64(ctx,MOV,tmp,pmem(&p,cl->id,0)); // ->t
+ op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE)); // ->fun
+ op64(ctx,MOV,tmp,pmem(&p,tmp->id,(int)(int_val)&ft->ret)); // ->ret
+ op32(ctx,MOV,tmp,pmem(&p,tmp->id,0)); // -> kind
+
+ op32(ctx,CMP,tmp,pconst(&p,HF64));
+ XJump_small(JEq,jfloat1);
+ op32(ctx,CMP,tmp,pconst(&p,HF32));
+ XJump_small(JEq,jfloat2);
+
+ // 64 bits : ESP + EIP (+WIN64PAD)
+ // 32 bits : ESP + EIP + PARAM0
+ int args_pos = IS_64 ? ((IS_WINCALL64 ? 32 : 0) + HL_WSIZE * 2) : (HL_WSIZE*3);
+
+ size = begin_native_call(ctx,3);
+ op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
+ set_native_arg(ctx, tmp);
+ op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
+ set_native_arg(ctx, tmp);
+ set_native_arg(ctx, cl);
+ call_native(ctx, jit_wrapper_ptr, size);
+ XJump_small(JAlways, jexit);
+
+ patch_jump(ctx,jfloat1);
+ patch_jump(ctx,jfloat2);
+ size = begin_native_call(ctx,3);
+ op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
+ set_native_arg(ctx, tmp);
+ op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
+ set_native_arg(ctx, tmp);
+ set_native_arg(ctx, cl);
+ call_native(ctx, jit_wrapper_d, size);
+
+ patch_jump(ctx,jexit);
+ op64(ctx,MOV,PESP,PEBP);
+ op64(ctx,POP,PEBP, UNUSED);
+ op64(ctx,RET,UNUSED,UNUSED);
+}
+
+static void jit_fail( uchar *msg ) {
+ if( msg == NULL ) {
+ hl_debug_break();
+ msg = USTR("assert");
+ }
+ vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
+ d->v.ptr = msg;
+ hl_throw(d);
+}
+
+static void jit_null_access( jit_ctx *ctx ) {
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+ int_val arg = (int_val)USTR("Null access");
+ call_native_consts(ctx, jit_fail, &arg, 1);
+}
+
+static void jit_null_fail( int fhash ) {
+ vbyte *field = hl_field_name(fhash);
+ hl_buffer *b = hl_alloc_buffer();
+ hl_buffer_str(b, USTR("Null access ."));
+ hl_buffer_str(b, (uchar*)field);
+ vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
+ d->v.ptr = hl_buffer_content(b,NULL);
+ hl_throw(d);
+}
+
+static void jit_null_field_access( jit_ctx *ctx ) {
+ preg p;
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+ int size = begin_native_call(ctx, 1);
+ int args_pos = (IS_WINCALL64 ? 32 : 0) + HL_WSIZE*2;
+ set_native_arg(ctx, pmem(&p,Ebp,args_pos));
+ call_native(ctx,jit_null_fail,size);
+}
+
+static void jit_assert( jit_ctx *ctx ) {
+ op64(ctx,PUSH,PEBP,UNUSED);
+ op64(ctx,MOV,PEBP,PESP);
+ int_val arg = 0;
+ call_native_consts(ctx, jit_fail, &arg, 1);
+}
+
+static int jit_build( jit_ctx *ctx, void (*fbuild)( jit_ctx *) ) {
+ int pos;
+ jit_buf(ctx);
+ jit_nops(ctx);
+ pos = BUF_POS();
+ fbuild(ctx);
+ int endPos = BUF_POS();
+ jit_nops(ctx);
+#ifdef WIN64_UNWIND_TABLES
+ int fid = ctx->nunwind++;
+ ctx->unwind_table[fid].BeginAddress = pos;
+ ctx->unwind_table[fid].EndAddress = endPos;
+ ctx->unwind_table[fid].UnwindData = ctx->unwind_offset;
+#endif
+ return pos;
+}
+
+static void hl_jit_init_module( jit_ctx *ctx, hl_module *m ) {
+ int i;
+ ctx->m = m;
+ if( m->code->hasdebug ) {
+ ctx->debug = (hl_debug_infos*)malloc(sizeof(hl_debug_infos) * m->code->nfunctions);
+ memset(ctx->debug, -1, sizeof(hl_debug_infos) * m->code->nfunctions);
+ }
+ for(i=0;icode->nfloats;i++) {
+ jit_buf(ctx);
+ *ctx->buf.d++ = m->code->floats[i];
+ }
+#ifdef WIN64_UNWIND_TABLES
+ jit_buf(ctx);
+ ctx->unwind_offset = BUF_POS();
+ write_unwind_data(ctx);
+
+ ctx->unwind_table = malloc(sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10));
+ memset(ctx->unwind_table, 0, sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10));
+#endif
+}
+
+void hl_jit_init( jit_ctx *ctx, hl_module *m ) {
+ hl_jit_init_module(ctx,m);
+ ctx->c2hl = jit_build(ctx, jit_c2hl);
+ ctx->hl2c = jit_build(ctx, jit_hl2c);
+ ctx->static_functions[0] = (void*)(int_val)jit_build(ctx,jit_null_access);
+ ctx->static_functions[1] = (void*)(int_val)jit_build(ctx,jit_assert);
+ ctx->static_functions[2] = (void*)(int_val)jit_build(ctx,jit_null_field_access);
+}
+
+void hl_jit_reset( jit_ctx *ctx, hl_module *m ) {
+ ctx->debug = NULL;
+ hl_jit_init_module(ctx,m);
+}
+
+static void *get_dyncast( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_castf;
+ case HF64:
+ return hl_dyn_castd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_casti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_casti;
+ default:
+ return hl_dyn_castp;
+ }
+}
+
+static void *get_dynset( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_setf;
+ case HF64:
+ return hl_dyn_setd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_seti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_seti;
+ default:
+ return hl_dyn_setp;
+ }
+}
+
+static void *get_dynget( hl_type *t ) {
+ switch( t->kind ) {
+ case HF32:
+ return hl_dyn_getf;
+ case HF64:
+ return hl_dyn_getd;
+ case HI64:
+ case HGUID:
+ return hl_dyn_geti64;
+ case HI32:
+ case HUI16:
+ case HUI8:
+ case HBOOL:
+ return hl_dyn_geti;
+ default:
+ return hl_dyn_getp;
+ }
+}
+
+static double uint_to_double( unsigned int v ) {
+ return v;
+}
+
+static vclosure *alloc_static_closure( jit_ctx *ctx, int fid ) {
+ hl_module *m = ctx->m;
+ vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure));
+ int fidx = m->functions_indexes[fid];
+ c->hasValue = 0;
+ if( fidx >= m->code->nfunctions ) {
+ // native
+ c->t = m->code->natives[fidx - m->code->nfunctions].t;
+ c->fun = m->functions_ptrs[fid];
+ c->value = NULL;
+ } else {
+ c->t = m->code->functions[fidx].type;
+ c->fun = (void*)(int_val)fid;
+ c->value = ctx->closure_list;
+ ctx->closure_list = c;
+ }
+ return c;
+}
+
+static void make_dyn_cast( jit_ctx *ctx, vreg *dst, vreg *v ) {
+ int size;
+ preg p;
+ preg *tmp;
+ if( v->t->kind == HNULL && v->t->tparam->kind == dst->t->kind ) {
+ int jnull, jend;
+ preg *out;
+ switch( dst->t->kind ) {
+ case HUI8:
+ case HUI16:
+ case HI32:
+ case HBOOL:
+ case HI64:
+ case HGUID:
+ tmp = alloc_cpu(ctx, v, true);
+ op64(ctx, TEST, tmp, tmp);
+ XJump_small(JZero, jnull);
+ op64(ctx, MOV, tmp, pmem(&p,tmp->id,8));
+ XJump_small(JAlways, jend);
+ patch_jump(ctx, jnull);
+ op64(ctx, XOR, tmp, tmp);
+ patch_jump(ctx, jend);
+ store(ctx, dst, tmp, true);
+ return;
+ case HF32:
+ case HF64:
+ tmp = alloc_cpu(ctx, v, true);
+ out = alloc_fpu(ctx, dst, false);
+ op64(ctx, TEST, tmp, tmp);
+ XJump_small(JZero, jnull);
+ op64(ctx, dst->t->kind == HF32 ? MOVSS : MOVSD, out, pmem(&p,tmp->id,8));
+ XJump_small(JAlways, jend);
+ patch_jump(ctx, jnull);
+ op64(ctx, XORPD, out, out);
+ patch_jump(ctx, jend);
+ store(ctx, dst, out, true);
+ return;
+ default:
+ break;
+ }
+ }
+ switch( dst->t->kind ) {
+ case HF32:
+ case HF64:
+ case HI64:
+ case HGUID:
+ size = begin_native_call(ctx, 2);
+ set_native_arg(ctx, pconst64(&p,(int_val)v->t));
+ break;
+ default:
+ size = begin_native_call(ctx, 3);
+ set_native_arg(ctx, pconst64(&p,(int_val)dst->t));
+ set_native_arg(ctx, pconst64(&p,(int_val)v->t));
+ break;
+ }
+ tmp = alloc_native_arg(ctx);
+ op64(ctx,MOV,tmp,REG_AT(Ebp));
+ if( v->stackPos >= 0 )
+ op64(ctx,ADD,tmp,pconst(&p,v->stackPos));
+ else
+ op64(ctx,SUB,tmp,pconst(&p,-v->stackPos));
+ set_native_arg(ctx,tmp);
+ call_native(ctx,get_dyncast(dst->t),size);
+ store_result(ctx, dst);
+}
+
+int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) {
+ int i, size = 0, opCount;
+ int codePos = BUF_POS();
+ int nargs = f->type->fun->nargs;
+ unsigned short *debug16 = NULL;
+ int *debug32 = NULL;
+ call_regs cregs = {0};
+ hl_thread_info *tinf = NULL;
+ preg p;
+ ctx->f = f;
+ ctx->allocOffset = 0;
+ if( f->nregs > ctx->maxRegs ) {
+ free(ctx->vregs);
+ ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1));
+ if( ctx->vregs == NULL ) {
+ ctx->maxRegs = 0;
+ return -1;
+ }
+ ctx->maxRegs = f->nregs;
+ }
+ if( f->nops > ctx->maxOps ) {
+ free(ctx->opsPos);
+ ctx->opsPos = (int*)malloc(sizeof(int) * (f->nops + 1));
+ if( ctx->opsPos == NULL ) {
+ ctx->maxOps = 0;
+ return -1;
+ }
+ ctx->maxOps = f->nops;
+ }
+ memset(ctx->opsPos,0,(f->nops+1)*sizeof(int));
+ for(i=0;inregs;i++) {
+ vreg *r = R(i);
+ r->t = f->regs[i];
+ r->size = hl_type_size(r->t);
+ r->current = NULL;
+ r->stack.holds = NULL;
+ r->stack.id = i;
+ r->stack.kind = RSTACK;
+ }
+ size = 0;
+ int argsSize = 0;
+ for(i=0;it,i);
+ if( creg < 0 || IS_WINCALL64 ) {
+ // use existing stack storage
+ r->stackPos = argsSize + HL_WSIZE * 2;
+ argsSize += stack_size(r->t);
+ } else {
+ // make room in local vars
+ size += r->size;
+ size += hl_pad_size(size,r->t);
+ r->stackPos = -size;
+ }
+ }
+ for(i=nargs;inregs;i++) {
+ vreg *r = R(i);
+ size += r->size;
+ size += hl_pad_size(size,r->t); // align local vars
+ r->stackPos = -size;
+ }
+# ifdef HL_64
+ size += (-size) & 15; // align on 16 bytes
+# else
+ size += hl_pad_size(size,&hlt_dyn); // align on word size
+# endif
+ ctx->totalRegsSize = size;
+ jit_buf(ctx);
+ ctx->functionPos = BUF_POS();
+ // make sure currentPos is > 0 before any reg allocations happen
+ // otherwise `alloc_reg` thinks that all registers are locked
+ ctx->currentPos = 1;
+ op_enter(ctx);
+# ifdef HL_64
+ {
+ // store in local var
+ for(i=0;isize);
+ p->holds = r;
+ r->current = p;
+ }
+ }
+# endif
+ if( ctx->m->code->hasdebug ) {
+ debug16 = (unsigned short*)malloc(sizeof(unsigned short) * (f->nops + 1));
+ debug16[0] = (unsigned short)(BUF_POS() - codePos);
+ }
+ ctx->opsPos[0] = BUF_POS();
+
+ for(opCount=0;opCountnops;opCount++) {
+ int jump;
+ hl_opcode *o = f->ops + opCount;
+ vreg *dst = R(o->p1);
+ vreg *ra = R(o->p2);
+ vreg *rb = R(o->p3);
+ ctx->currentPos = opCount + 1;
+ jit_buf(ctx);
+# ifdef JIT_DEBUG
+ if( opCount == 0 || f->ops[opCount-1].op != OAsm ) {
+ int uid = opCount + (f->findex<<16);
+ op32(ctx, PUSH, pconst(&p,uid), UNUSED);
+ op64(ctx, ADD, PESP, pconst(&p,HL_WSIZE));
+ }
+# endif
+ // emit code
+ switch( o->op ) {
+ case OMov:
+ case OUnsafeCast:
+ op_mov(ctx, dst, ra);
+ break;
+ case OInt:
+ store_const(ctx, dst, m->code->ints[o->p2]);
+ break;
+ case OBool:
+ store_const(ctx, dst, o->p2);
+ break;
+ case OGetGlobal:
+ {
+ void *addr = m->globals_data + m->globals_indexes[o->p2];
+# ifdef HL_64
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
+ copy_to(ctx, dst, pmem(&p,tmp->id,0));
+# else
+ copy_to(ctx, dst, paddr(&p,addr));
+# endif
+ }
+ break;
+ case OSetGlobal:
+ {
+ void *addr = m->globals_data + m->globals_indexes[o->p1];
+# ifdef HL_64
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
+ copy_from(ctx, pmem(&p,tmp->id,0), ra);
+# else
+ copy_from(ctx, paddr(&p,addr), ra);
+# endif
+ }
+ break;
+ case OCall3:
+ {
+ int args[3] = { o->p3, o->extra[0], o->extra[1] };
+ op_call_fun(ctx, dst, o->p2, 3, args);
+ }
+ break;
+ case OCall4:
+ {
+ int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] };
+ op_call_fun(ctx, dst, o->p2, 4, args);
+ }
+ break;
+ case OCallN:
+ op_call_fun(ctx, dst, o->p2, o->p3, o->extra);
+ break;
+ case OCall0:
+ op_call_fun(ctx, dst, o->p2, 0, NULL);
+ break;
+ case OCall1:
+ op_call_fun(ctx, dst, o->p2, 1, &o->p3);
+ break;
+ case OCall2:
+ {
+ int args[2] = { o->p3, (int)(int_val)o->extra };
+ op_call_fun(ctx, dst, o->p2, 2, args);
+ }
+ break;
+ case OSub:
+ case OAdd:
+ case OMul:
+ case OSDiv:
+ case OUDiv:
+ case OShl:
+ case OSShr:
+ case OUShr:
+ case OAnd:
+ case OOr:
+ case OXor:
+ case OSMod:
+ case OUMod:
+ op_binop(ctx, dst, ra, rb, o->op);
+ break;
+ case ONeg:
+ {
+ if( IS_FLOAT(ra) ) {
+ preg *pa = alloc_reg(ctx,RFPU);
+ preg *pb = alloc_fpu(ctx,ra,true);
+ op64(ctx,XORPD,pa,pa);
+ op64(ctx,ra->t->kind == HF32 ? SUBSS : SUBSD,pa,pb);
+ store(ctx,dst,pa,true);
+ } else if( ra->t->kind == HI64 ) {
+# ifdef HL_64
+ preg *pa = alloc_reg(ctx,RCPU);
+ preg *pb = alloc_cpu(ctx,ra,true);
+ op64(ctx,XOR,pa,pa);
+ op64(ctx,SUB,pa,pb);
+ store(ctx,dst,pa,true);
+# else
+ error_i64();
+# endif
+ } else {
+ preg *pa = alloc_reg(ctx,RCPU);
+ preg *pb = alloc_cpu(ctx,ra,true);
+ op32(ctx,XOR,pa,pa);
+ op32(ctx,SUB,pa,pb);
+ store(ctx,dst,pa,true);
+ }
+ }
+ break;
+ case ONot:
+ {
+ preg *v = alloc_cpu(ctx,ra,true);
+ op32(ctx,XOR,v,pconst(&p,1));
+ store(ctx,dst,v,true);
+ }
+ break;
+ case OJFalse:
+ case OJTrue:
+ case OJNotNull:
+ case OJNull:
+ {
+ preg *r = dst->t->kind == HBOOL ? alloc_cpu8(ctx, dst, true) : alloc_cpu(ctx, dst, true);
+ op64(ctx, dst->t->kind == HBOOL ? TEST8 : TEST, r, r);
+ XJump( o->op == OJFalse || o->op == OJNull ? JZero : JNotZero,jump);
+ register_jump(ctx,jump,(opCount + 1) + o->p2);
+ }
+ break;
+ case OJEq:
+ case OJNotEq:
+ case OJSLt:
+ case OJSGte:
+ case OJSLte:
+ case OJSGt:
+ case OJULt:
+ case OJUGte:
+ case OJNotLt:
+ case OJNotGte:
+ op_jump(ctx,dst,ra,o,(opCount + 1) + o->p3);
+ break;
+ case OJAlways:
+ jump = do_jump(ctx,o->op,false);
+ register_jump(ctx,jump,(opCount + 1) + o->p1);
+ break;
+ case OToDyn:
+ if( ra->t->kind == HBOOL ) {
+ int size = begin_native_call(ctx, 1);
+ set_native_arg(ctx, fetch(ra));
+ call_native(ctx, hl_alloc_dynbool, size);
+ store(ctx, dst, PEAX, true);
+ } else {
+ int_val rt = (int_val)ra->t;
+ int jskip = 0;
+ if( hl_is_ptr(ra->t) ) {
+ int jnz;
+ preg *a = alloc_cpu(ctx,ra,true);
+ op64(ctx,TEST,a,a);
+ XJump_small(JNotZero,jnz);
+ op64(ctx,XOR,PEAX,PEAX); // will replace the result of alloc_dynamic at jump land
+ XJump_small(JAlways,jskip);
+ patch_jump(ctx,jnz);
+ }
+ call_native_consts(ctx, hl_alloc_dynamic, &rt, 1);
+ // copy value to dynamic
+ if( (IS_FLOAT(ra) || ra->size == 8) && !IS_64 ) {
+ preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
+ op64(ctx,MOV,tmp,&ra->stack);
+ op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
+ if( ra->t->kind == HF64 ) {
+ ra->stackPos += 4;
+ op64(ctx,MOV,tmp,&ra->stack);
+ op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE+4),tmp);
+ ra->stackPos -= 4;
+ }
+ } else {
+ preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
+ copy_from(ctx,tmp,ra);
+ op64(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
+ }
+ if( hl_is_ptr(ra->t) ) patch_jump(ctx,jskip);
+ store(ctx, dst, PEAX, true);
+ }
+ break;
+ case OToSFloat:
+ if( ra == dst ) break;
+ if (ra->t->kind == HI32 || ra->t->kind == HUI16 || ra->t->kind == HUI8) {
+ preg* r = alloc_cpu(ctx, ra, true);
+ preg* w = alloc_fpu(ctx, dst, false);
+ op32(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r);
+ store(ctx, dst, w, true);
+ } else if (ra->t->kind == HI64 ) {
+ preg* r = alloc_cpu(ctx, ra, true);
+ preg* w = alloc_fpu(ctx, dst, false);
+ op64(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r);
+ store(ctx, dst, w, true);
+ } else if( ra->t->kind == HF64 && dst->t->kind == HF32 ) {
+ preg *r = alloc_fpu(ctx,ra,true);
+ preg *w = alloc_fpu(ctx,dst,false);
+ op32(ctx,CVTSD2SS,w,r);
+ store(ctx, dst, w, true);
+ } else if( ra->t->kind == HF32 && dst->t->kind == HF64 ) {
+ preg *r = alloc_fpu(ctx,ra,true);
+ preg *w = alloc_fpu(ctx,dst,false);
+ op32(ctx,CVTSS2SD,w,r);
+ store(ctx, dst, w, true);
+ } else
+ ASSERT(0);
+ break;
+ case OToUFloat:
+ {
+ int size;
+ size = prepare_call_args(ctx,1,&o->p2,ctx->vregs,0);
+ call_native(ctx,uint_to_double,size);
+ store_result(ctx,dst);
+ }
+ break;
+ case OToInt:
+ if( ra == dst ) break;
+ if( ra->t->kind == HF64 ) {
+ preg *r = alloc_fpu(ctx,ra,true);
+ preg *w = alloc_cpu(ctx,dst,false);
+ preg *tmp = alloc_reg(ctx,RCPU);
+ op32(ctx,STMXCSR,pmem(&p,Esp,-4),UNUSED);
+ op32(ctx,MOV,tmp,&p);
+ op32(ctx,OR,tmp,pconst(&p,0x6000)); // set round towards 0
+ op32(ctx,MOV,pmem(&p,Esp,-8),tmp);
+ op32(ctx,LDMXCSR,&p,UNUSED);
+ op32(ctx,CVTSD2SI,w,r);
+ op32(ctx,LDMXCSR,pmem(&p,Esp,-4),UNUSED);
+ store(ctx, dst, w, true);
+ } else if (ra->t->kind == HF32) {
+ preg *r = alloc_fpu(ctx, ra, true);
+ preg *w = alloc_cpu(ctx, dst, false);
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op32(ctx, STMXCSR, pmem(&p, Esp, -4), UNUSED);
+ op32(ctx, MOV, tmp, &p);
+ op32(ctx, OR, tmp, pconst(&p, 0x6000)); // set round towards 0
+ op32(ctx, MOV, pmem(&p, Esp, -8), tmp);
+ op32(ctx, LDMXCSR, &p, UNUSED);
+ op32(ctx, CVTSS2SI, w, r);
+ op32(ctx, LDMXCSR, pmem(&p, Esp, -4), UNUSED);
+ store(ctx, dst, w, true);
+ } else if( (dst->t->kind == HI64 || dst->t->kind == HGUID) && ra->t->kind == HI32 ) {
+ if( ra->current != PEAX ) {
+ op32(ctx, MOV, PEAX, fetch(ra));
+ scratch(PEAX);
+ }
+# ifdef HL_64
+ op64(ctx, CDQE, UNUSED, UNUSED); // sign-extend Eax into Rax
+ store(ctx, dst, PEAX, true);
+# else
+ op32(ctx, CDQ, UNUSED, UNUSED); // sign-extend Eax into Eax:Edx
+ scratch(REG_AT(Edx));
+ op32(ctx, MOV, fetch(dst), PEAX);
+ dst->stackPos += 4;
+ op32(ctx, MOV, fetch(dst), REG_AT(Edx));
+ dst->stackPos -= 4;
+ } else if( dst->t->kind == HI32 && ra->t->kind == HI64 ) {
+ error_i64();
+# endif
+ } else {
+ preg *r = alloc_cpu(ctx,dst,false);
+ copy_from(ctx, r, ra);
+ store(ctx, dst, r, true);
+ }
+ break;
+ case ORet:
+ op_ret(ctx, dst);
+ break;
+ case OIncr:
+ {
+ if( IS_FLOAT(dst) ) {
+ ASSERT(0);
+ } else {
+ preg *v = fetch32(ctx,dst);
+ op32(ctx,INC,v,UNUSED);
+ if( v->kind != RSTACK ) store(ctx, dst, v, false);
+ }
+ }
+ break;
+ case ODecr:
+ {
+ if( IS_FLOAT(dst) ) {
+ ASSERT(0);
+ } else {
+ preg *v = fetch32(ctx,dst);
+ op32(ctx,DEC,v,UNUSED);
+ if( v->kind != RSTACK ) store(ctx, dst, v, false);
+ }
+ }
+ break;
+ case OFloat:
+ {
+ if( m->code->floats[o->p2] == 0 ) {
+ preg *f = alloc_fpu(ctx,dst,false);
+ op64(ctx,XORPD,f,f);
+ } else switch( dst->t->kind ) {
+ case HF64:
+ case HF32:
+# ifdef HL_64
+ op64(ctx,dst->t->kind == HF32 ? CVTSD2SS : MOVSD,alloc_fpu(ctx,dst,false),pcodeaddr(&p,o->p2 * 8));
+# else
+ op64(ctx,dst->t->kind == HF32 ? MOVSS : MOVSD,alloc_fpu(ctx,dst,false),paddr(&p,m->code->floats + o->p2));
+# endif
+ break;
+ default:
+ ASSERT(dst->t->kind);
+ }
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case OString:
+ op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)hl_get_ustring(m->code,o->p2)));
+ store(ctx,dst,dst->current,false);
+ break;
+ case OBytes:
+ {
+ char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2];
+ op64(ctx,MOV,alloc_cpu(ctx,dst,false),pconst64(&p,(int_val)b));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case ONull:
+ {
+ op64(ctx,XOR,alloc_cpu(ctx, dst, false),alloc_cpu(ctx, dst, false));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case ONew:
+ {
+ int_val args[] = { (int_val)dst->t };
+ void *allocFun;
+ int nargs = 1;
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ allocFun = hl_alloc_obj;
+ break;
+ case HDYNOBJ:
+ allocFun = hl_alloc_dynobj;
+ nargs = 0;
+ break;
+ case HVIRTUAL:
+ allocFun = hl_alloc_virtual;
+ break;
+ default:
+ ASSERT(dst->t->kind);
+ }
+ call_native_consts(ctx, allocFun, args, nargs);
+ store(ctx, dst, PEAX, true);
+ }
+ break;
+ case OInstanceClosure:
+ {
+ preg *r = alloc_cpu(ctx, rb, true);
+ jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
+ int size = begin_native_call(ctx,3);
+ set_native_arg(ctx,r);
+
+ j->pos = BUF_POS();
+ j->target = o->p2;
+ j->next = ctx->calls;
+ ctx->calls = j;
+
+ set_native_arg(ctx,pconst64(&p,RESERVE_ADDRESS));
+ set_native_arg(ctx,pconst64(&p,(int_val)m->code->functions[m->functions_indexes[o->p2]].type));
+ call_native(ctx,hl_alloc_closure_ptr,size);
+ store(ctx,dst,PEAX,true);
+ }
+ break;
+ case OVirtualClosure:
+ {
+ int size, i;
+ preg *r = alloc_cpu_call(ctx, ra);
+ hl_type *t = NULL;
+ hl_type *ot = ra->t;
+ while( t == NULL ) {
+ for(i=0;iobj->nproto;i++) {
+ hl_obj_proto *pp = ot->obj->proto + i;
+ if( pp->pindex == o->p3 ) {
+ t = m->code->functions[m->functions_indexes[pp->findex]].type;
+ break;
+ }
+ }
+ ot = ot->obj->super;
+ }
+ size = begin_native_call(ctx,3);
+ set_native_arg(ctx,r);
+ // read r->type->vobj_proto[i] for function address
+ op64(ctx,MOV,r,pmem(&p,r->id,0));
+ op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*2));
+ op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*o->p3));
+ set_native_arg(ctx,r);
+ op64(ctx,MOV,r,pconst64(&p,(int_val)t));
+ set_native_arg(ctx,r);
+ call_native(ctx,hl_alloc_closure_ptr,size);
+ store(ctx,dst,PEAX,true);
+ }
+ break;
+ case OCallClosure:
+ if( ra->t->kind == HDYN ) {
+ // ASM for {
+ // vdynamic *args[] = {args};
+ // vdynamic *ret = hl_dyn_call(closure,args,nargs);
+ // dst = hl_dyncast(ret,t_dynamic,t_dst);
+ // }
+ int offset = o->p3 * HL_WSIZE;
+ preg *r = alloc_reg(ctx, RCPU_CALL);
+ if( offset & 15 ) offset += 16 - (offset & 15);
+ op64(ctx,SUB,PESP,pconst(&p,offset));
+ op64(ctx,MOV,r,PESP);
+ for(i=0;ip3;i++) {
+ vreg *a = R(o->extra[i]);
+ if( !hl_is_dynamic(a->t) ) ASSERT(0);
+ preg *v = alloc_cpu(ctx,a,true);
+ op64(ctx,MOV,pmem(&p,r->id,i * HL_WSIZE),v);
+ RUNLOCK(v);
+ }
+# ifdef HL_64
+ int size = begin_native_call(ctx, 3) + offset;
+ set_native_arg(ctx, pconst(&p,o->p3));
+ set_native_arg(ctx, r);
+ set_native_arg(ctx, fetch(ra));
+# else
+ int size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(int) + offset);
+ op64(ctx,PUSH,pconst(&p,o->p3),UNUSED);
+ op64(ctx,PUSH,r,UNUSED);
+ op64(ctx,PUSH,alloc_cpu(ctx,ra,true),UNUSED);
+# endif
+ call_native(ctx,hl_dyn_call,size);
+ if( dst->t->kind != HVOID ) {
+ store(ctx,dst,PEAX,true);
+ make_dyn_cast(ctx,dst,dst);
+ }
+ } else {
+ int jhasvalue, jend, size;
+ // ASM for if( c->hasValue ) c->fun(value,args) else c->fun(args)
+ preg *r = alloc_cpu(ctx,ra,true);
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op32(ctx,MOV,tmp,pmem(&p,r->id,HL_WSIZE*2));
+ op32(ctx,TEST,tmp,tmp);
+ scratch(tmp);
+ XJump_small(JNotZero,jhasvalue);
+ save_regs(ctx);
+ size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
+ preg *rr = r;
+ if( rr->holds != ra ) rr = alloc_cpu(ctx, ra, true);
+ op_call(ctx, pmem(&p,rr->id,HL_WSIZE), size);
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jhasvalue);
+ restore_regs(ctx);
+# ifdef HL_64
+ {
+ int regids[64];
+ preg *pc = REG_AT(CALL_REGS[0]);
+ vreg *sc = R(f->nregs); // scratch register that we temporary rebind
+ if( o->p3 >= 63 ) jit_error("assert");
+ memcpy(regids + 1, o->extra, o->p3 * sizeof(int));
+ regids[0] = f->nregs;
+ sc->size = HL_WSIZE;
+ sc->t = &hlt_dyn;
+ op64(ctx, MOV, pc, pmem(&p,r->id,HL_WSIZE*3));
+ scratch(pc);
+ sc->current = pc;
+ pc->holds = sc;
+ size = prepare_call_args(ctx,o->p3 + 1,regids,ctx->vregs,0);
+ if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
+ }
+# else
+ size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,HL_WSIZE);
+ if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
+ op64(ctx, PUSH,pmem(&p,r->id,HL_WSIZE*3),UNUSED); // push closure value
+# endif
+ op_call(ctx, pmem(&p,r->id,HL_WSIZE), size);
+ discard_regs(ctx,false);
+ patch_jump(ctx,jend);
+ store_result(ctx, dst);
+ }
+ break;
+ case OStaticClosure:
+ {
+ vclosure *c = alloc_static_closure(ctx,o->p2);
+ preg *r = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, r, pconst64(&p,(int_val)c));
+ store(ctx,dst,r,true);
+ }
+ break;
+ case OField:
+ {
+# ifndef HL_64
+ if( dst->t->kind == HI64 ) {
+ error_i64();
+ break;
+ }
+# endif
+ switch( ra->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(ra->t);
+ preg *rr = alloc_cpu(ctx,ra, true);
+ if( dst->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t;
+ if( ft->kind == HPACKED ) {
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p3]));
+ store(ctx,dst,r,true);
+ break;
+ }
+ }
+ copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p3]));
+ }
+ break;
+ case HVIRTUAL:
+ // ASM for --> if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt)
+ {
+ int jhasfield, jend, size;
+ bool need_type = !(IS_FLOAT(dst) || dst->t->kind == HI64);
+ preg *v = alloc_cpu_call(ctx,ra);
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p3));
+ op64(ctx,TEST,r,r);
+ XJump_small(JNotZero,jhasfield);
+ size = begin_native_call(ctx, need_type ? 3 : 2);
+ if( need_type ) set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
+ set_native_arg(ctx,pconst64(&p,(int_val)ra->t->virt->fields[o->p3].hashed_name));
+ set_native_arg(ctx,v);
+ call_native(ctx,get_dynget(dst->t),size);
+ store_result(ctx,dst);
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jhasfield);
+ copy_to(ctx, dst, pmem(&p,(CpuReg)r->id,0));
+ patch_jump(ctx,jend);
+ scratch(dst->current);
+ }
+ break;
+ default:
+ ASSERT(ra->t->kind);
+ break;
+ }
+ }
+ break;
+ case OSetField:
+ {
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ preg *rr = alloc_cpu(ctx, dst, true);
+ if( rb->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(dst->t,o->p2)->t;
+ if( ft->kind == HPACKED ) {
+ hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam);
+ preg *prb = alloc_cpu(ctx, rb, true);
+ preg *tmp = alloc_reg(ctx, RCPU_CALL);
+ int offset = 0;
+ while( offset < frt->size ) {
+ int remain = frt->size - offset;
+ int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
+ copy(ctx, tmp, pmem(&p, (CpuReg)prb->id, offset), copy_size);
+ copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]+offset), tmp, copy_size);
+ offset += copy_size;
+ }
+ break;
+ }
+ }
+ copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]), rb);
+ }
+ break;
+ case HVIRTUAL:
+ // ASM for --> if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v)
+ {
+ int jhasfield, jend;
+ preg *obj = alloc_cpu_call(ctx,dst);
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,MOV,r,pmem(&p,obj->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
+ op64(ctx,TEST,r,r);
+ XJump_small(JNotZero,jhasfield);
+# ifdef HL_64
+ switch( rb->t->kind ) {
+ case HF64:
+ case HF32:
+ size = begin_native_call(ctx,3);
+ set_native_arg_fpu(ctx, fetch(rb), rb->t->kind == HF32);
+ break;
+ case HI64:
+ case HGUID:
+ size = begin_native_call(ctx,3);
+ set_native_arg(ctx, fetch(rb));
+ break;
+ default:
+ size = begin_native_call(ctx, 4);
+ set_native_arg(ctx, fetch(rb));
+ set_native_arg(ctx, pconst64(&p,(int_val)rb->t));
+ break;
+ }
+ set_native_arg(ctx,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
+ set_native_arg(ctx,obj);
+# else
+ switch( rb->t->kind ) {
+ case HF64:
+ case HI64:
+ case HGUID:
+ size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(double));
+ push_reg(ctx,rb);
+ break;
+ case HF32:
+ size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(float));
+ push_reg(ctx,rb);
+ break;
+ default:
+ size = pad_before_call(ctx,HL_WSIZE*4);
+ op64(ctx,PUSH,fetch32(ctx,rb),UNUSED);
+ op64(ctx,MOV,r,pconst64(&p,(int_val)rb->t));
+ op64(ctx,PUSH,r,UNUSED);
+ break;
+ }
+ op32(ctx,MOV,r,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
+ op64(ctx,PUSH,r,UNUSED);
+ op64(ctx,PUSH,obj,UNUSED);
+# endif
+ call_native(ctx,get_dynset(rb->t),size);
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jhasfield);
+ copy_from(ctx, pmem(&p,(CpuReg)r->id,0), rb);
+ patch_jump(ctx,jend);
+ scratch(rb->current);
+ }
+ break;
+ default:
+ ASSERT(dst->t->kind);
+ break;
+ }
+ }
+ break;
+ case OGetThis:
+ {
+ vreg *r = R(0);
+ hl_runtime_obj *rt = hl_get_obj_rt(r->t);
+ preg *rr = alloc_cpu(ctx,r, true);
+ if( dst->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t;
+ if( ft->kind == HPACKED ) {
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p2]));
+ store(ctx,dst,r,true);
+ break;
+ }
+ }
+ copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]));
+ }
+ break;
+ case OSetThis:
+ {
+ vreg *r = R(0);
+ hl_runtime_obj *rt = hl_get_obj_rt(r->t);
+ preg *rr = alloc_cpu(ctx, r, true);
+ if( ra->t->kind == HSTRUCT ) {
+ hl_type *ft = hl_obj_field_fetch(r->t,o->p1)->t;
+ if( ft->kind == HPACKED ) {
+ hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam);
+ preg *pra = alloc_cpu(ctx, ra, true);
+ preg *tmp = alloc_reg(ctx, RCPU_CALL);
+ int offset = 0;
+ while( offset < frt->size ) {
+ int remain = frt->size - offset;
+ int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
+ copy(ctx, tmp, pmem(&p, (CpuReg)pra->id, offset), copy_size);
+ copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]+offset), tmp, copy_size);
+ offset += copy_size;
+ }
+ break;
+ }
+ }
+ copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]), ra);
+ }
+ break;
+ case OCallThis:
+ {
+ int nargs = o->p3 + 1;
+ int *args = (int*)hl_malloc(&ctx->falloc,sizeof(int) * nargs);
+ int size;
+ preg *r = alloc_cpu(ctx, R(0), true);
+ preg *tmp;
+ tmp = alloc_reg(ctx, RCPU_CALL);
+ op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
+ op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
+ args[0] = 0;
+ for(i=1;iextra[i-1];
+ size = prepare_call_args(ctx,nargs,args,ctx->vregs,0);
+ op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
+ discard_regs(ctx, false);
+ store_result(ctx, dst);
+ }
+ break;
+ case OCallMethod:
+ switch( R(o->extra[0])->t->kind ) {
+ case HOBJ: {
+ int size;
+ preg *r = alloc_cpu(ctx, R(o->extra[0]), true);
+ preg *tmp;
+ tmp = alloc_reg(ctx, RCPU_CALL);
+ op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
+ op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
+ size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
+ op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
+ discard_regs(ctx, false);
+ store_result(ctx, dst);
+ break;
+ }
+ case HVIRTUAL:
+ // ASM for --> if( hl_vfields(o)[f] ) dst = *hl_vfields(o)[f](o->value,args...); else dst = hl_dyn_call_obj(o->value,field,args,&ret)
+ {
+ int size;
+ int paramsSize;
+ int jhasfield, jend;
+ bool need_dyn;
+ bool obj_in_args = false;
+ vreg *obj = R(o->extra[0]);
+ preg *v = alloc_cpu_call(ctx,obj);
+ preg *r = alloc_reg(ctx,RCPU_CALL);
+ op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
+ op64(ctx,TEST,r,r);
+ save_regs(ctx);
+
+ if( o->p3 < 6 ) {
+ XJump_small(JNotZero,jhasfield);
+ } else {
+ XJump(JNotZero,jhasfield);
+ }
+
+ need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID;
+ paramsSize = (o->p3 - 1) * HL_WSIZE;
+ if( need_dyn ) paramsSize += sizeof(vdynamic);
+ if( paramsSize & 15 ) paramsSize += 16 - (paramsSize&15);
+ op64(ctx,SUB,PESP,pconst(&p,paramsSize));
+ op64(ctx,MOV,r,PESP);
+
+ for(i=0;ip3-1;i++) {
+ vreg *a = R(o->extra[i+1]);
+ if( hl_is_ptr(a->t) ) {
+ op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),alloc_cpu(ctx,a,true));
+ if( a->current != v ) {
+ RUNLOCK(a->current);
+ } else
+ obj_in_args = true;
+ } else {
+ preg *r2 = alloc_reg(ctx,RCPU);
+ op64(ctx,LEA,r2,&a->stack);
+ op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),r2);
+ if( r2 != v ) RUNLOCK(r2);
+ }
+ }
+
+ jit_buf(ctx);
+
+ if( !need_dyn ) {
+ size = begin_native_call(ctx, 5);
+ set_native_arg(ctx, pconst(&p,0));
+ } else {
+ preg *rtmp = alloc_reg(ctx,RCPU);
+ op64(ctx,LEA,rtmp,pmem(&p,Esp,paramsSize - sizeof(vdynamic)));
+ size = begin_native_call(ctx, 5);
+ set_native_arg(ctx,rtmp);
+ if( !IS_64 ) RUNLOCK(rtmp);
+ }
+ set_native_arg(ctx,r);
+ set_native_arg(ctx,pconst(&p,obj->t->virt->fields[o->p2].hashed_name)); // fid
+ set_native_arg(ctx,pconst64(&p,(int_val)obj->t->virt->fields[o->p2].t)); // ftype
+ set_native_arg(ctx,pmem(&p,v->id,HL_WSIZE)); // o->value
+ call_native(ctx,hl_dyn_call_obj,size + paramsSize);
+ if( need_dyn ) {
+ preg *r = IS_FLOAT(dst) ? REG_AT(XMM(0)) : PEAX;
+ copy(ctx,r,pmem(&p,Esp,HDYN_VALUE - (int)sizeof(vdynamic)),dst->size);
+ store(ctx, dst, r, false);
+ } else
+ store(ctx, dst, PEAX, false);
+
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jhasfield);
+ restore_regs(ctx);
+
+ if( !obj_in_args ) {
+ // o = o->value hack
+ if( v->holds ) v->holds->current = NULL;
+ obj->current = v;
+ v->holds = obj;
+ op64(ctx,MOV,v,pmem(&p,v->id,HL_WSIZE));
+ size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
+ } else {
+ // keep o->value in R(f->nregs)
+ int regids[64];
+ preg *pc = alloc_reg(ctx,RCPU_CALL);
+ vreg *sc = R(f->nregs); // scratch register that we temporary rebind
+ if( o->p3 >= 63 ) jit_error("assert");
+ memcpy(regids, o->extra, o->p3 * sizeof(int));
+ regids[0] = f->nregs;
+ sc->size = HL_WSIZE;
+ sc->t = &hlt_dyn;
+ op64(ctx, MOV, pc, pmem(&p,v->id,HL_WSIZE));
+ scratch(pc);
+ sc->current = pc;
+ pc->holds = sc;
+ size = prepare_call_args(ctx,o->p3,regids,ctx->vregs,0);
+ }
+
+ op_call(ctx,r,size);
+ discard_regs(ctx, false);
+ store_result(ctx, dst);
+ patch_jump(ctx,jend);
+ }
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ break;
+ case ORethrow:
+ {
+ int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
+ call_native(ctx,hl_rethrow,size);
+ }
+ break;
+ case OThrow:
+ {
+ int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
+ call_native(ctx,hl_throw,size);
+ }
+ break;
+ case OLabel:
+ // NOP for now
+ discard_regs(ctx,false);
+ break;
+ case OGetI8:
+ case OGetI16:
+ {
+ preg *base = alloc_cpu(ctx, ra, true);
+ preg *offset = alloc_cpu64(ctx, rb, true);
+ preg *r = alloc_reg(ctx,o->op == OGetI8 ? RCPU_8BITS : RCPU);
+ op64(ctx,XOR,r,r);
+ op32(ctx, o->op == OGetI8 ? MOV8 : MOV16,r,pmem2(&p,base->id,offset->id,1,0));
+ store(ctx, dst, r, true);
+ }
+ break;
+ case OGetMem:
+ {
+ #ifndef HL_64
+ if (dst->t->kind == HI64) {
+ error_i64();
+ }
+ #endif
+ preg *base = alloc_cpu(ctx, ra, true);
+ preg *offset = alloc_cpu64(ctx, rb, true);
+ store(ctx, dst, pmem2(&p,base->id,offset->id,1,0), false);
+ }
+ break;
+ case OSetI8:
+ {
+ preg *base = alloc_cpu(ctx, dst, true);
+ preg *offset = alloc_cpu64(ctx, ra, true);
+ preg *value = alloc_cpu8(ctx, rb, true);
+ op32(ctx,MOV8,pmem2(&p,base->id,offset->id,1,0),value);
+ }
+ break;
+ case OSetI16:
+ {
+ preg *base = alloc_cpu(ctx, dst, true);
+ preg *offset = alloc_cpu64(ctx, ra, true);
+ preg *value = alloc_cpu(ctx, rb, true);
+ op32(ctx,MOV16,pmem2(&p,base->id,offset->id,1,0),value);
+ }
+ break;
+ case OSetMem:
+ {
+ preg *base = alloc_cpu(ctx, dst, true);
+ preg *offset = alloc_cpu64(ctx, ra, true);
+ preg *value;
+ switch( rb->t->kind ) {
+ case HI32:
+ value = alloc_cpu(ctx, rb, true);
+ op32(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
+ break;
+ case HF32:
+ value = alloc_fpu(ctx, rb, true);
+ op32(ctx,MOVSS,pmem2(&p,base->id,offset->id,1,0),value);
+ break;
+ case HF64:
+ value = alloc_fpu(ctx, rb, true);
+ op32(ctx,MOVSD,pmem2(&p,base->id,offset->id,1,0),value);
+ break;
+ case HI64:
+ case HGUID:
+ value = alloc_cpu(ctx, rb, true);
+ op64(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
+ break;
+ default:
+ ASSERT(rb->t->kind);
+ break;
+ }
+ }
+ break;
+ case OType:
+ {
+ op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)(m->code->types + o->p2)));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case OGetType:
+ {
+ int jnext, jend;
+ preg *r = alloc_cpu(ctx, ra, true);
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx,TEST,r,r);
+ XJump_small(JNotZero,jnext);
+ op64(ctx,MOV, tmp, pconst64(&p,(int_val)&hlt_void));
+ XJump_small(JAlways,jend);
+ patch_jump(ctx,jnext);
+ op64(ctx, MOV, tmp, pmem(&p,r->id,0));
+ patch_jump(ctx,jend);
+ store(ctx,dst,tmp,true);
+ }
+ break;
+ case OGetArray:
+ {
+ preg *rdst = IS_FLOAT(dst) ? alloc_fpu(ctx,dst,false) : alloc_cpu(ctx,dst,false);
+ if( ra->t->kind == HABSTRACT ) {
+ int osize;
+ bool isRead = dst->t->kind != HOBJ && dst->t->kind != HSTRUCT;
+ if( isRead )
+ osize = sizeof(void*);
+ else {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ osize = rt->size;
+ }
+ preg *idx = alloc_cpu64(ctx, rb, true);
+ op64(ctx, IMUL, idx, pconst(&p,osize));
+ op64(ctx, isRead?MOV:LEA, rdst, pmem2(&p,alloc_cpu(ctx,ra, true)->id,idx->id,1,0));
+ store(ctx,dst,dst->current,false);
+ scratch(idx);
+ } else {
+ copy(ctx, rdst, pmem2(&p,alloc_cpu(ctx,ra,true)->id,alloc_cpu64(ctx,rb,true)->id,hl_type_size(dst->t),sizeof(varray)), dst->size);
+ store(ctx,dst,dst->current,false);
+ }
+ }
+ break;
+ case OSetArray:
+ {
+ if( dst->t->kind == HABSTRACT ) {
+ int osize;
+ bool isWrite = rb->t->kind != HOBJ && rb->t->kind != HSTRUCT;
+ if( isWrite ) {
+ osize = sizeof(void*);
+ } else {
+ hl_runtime_obj *rt = hl_get_obj_rt(rb->t);
+ osize = rt->size;
+ }
+ preg *pdst = alloc_cpu(ctx,dst,true);
+ preg *pra = alloc_cpu64(ctx,ra,true);
+ op64(ctx, IMUL, pra, pconst(&p,osize));
+ op64(ctx, ADD, pdst, pra);
+ scratch(pra);
+ preg *prb = alloc_cpu(ctx,rb,true);
+ preg *tmp = alloc_reg(ctx, RCPU_CALL);
+ int offset = 0;
+ while( offset < osize ) {
+ int remain = osize - offset;
+ int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1));
+ copy(ctx, tmp, pmem(&p, prb->id, offset), copy_size);
+ copy(ctx, pmem(&p, pdst->id, offset), tmp, copy_size);
+ offset += copy_size;
+ }
+ scratch(pdst);
+ } else {
+ preg *rrb = IS_FLOAT(rb) ? alloc_fpu(ctx,rb,true) : alloc_cpu(ctx,rb,true);
+ copy(ctx, pmem2(&p,alloc_cpu(ctx,dst,true)->id,alloc_cpu64(ctx,ra,true)->id,hl_type_size(rb->t),sizeof(varray)), rrb, rb->size);
+ }
+ }
+ break;
+ case OArraySize:
+ {
+ op32(ctx,MOV,alloc_cpu(ctx,dst,false),pmem(&p,alloc_cpu(ctx,ra,true)->id,ra->t->kind == HABSTRACT ? HL_WSIZE + 4 : HL_WSIZE*2));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case ORef:
+ {
+ scratch(ra->current);
+ op64(ctx,MOV,alloc_cpu(ctx,dst,false),REG_AT(Ebp));
+ if( ra->stackPos < 0 )
+ op64(ctx,SUB,dst->current,pconst(&p,-ra->stackPos));
+ else
+ op64(ctx,ADD,dst->current,pconst(&p,ra->stackPos));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case OUnref:
+ copy_to(ctx,dst,pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
+ break;
+ case OSetref:
+ copy_from(ctx,pmem(&p,alloc_cpu(ctx,dst,true)->id,0),ra);
+ break;
+ case ORefData:
+ switch( ra->t->kind ) {
+ case HARRAY:
+ {
+ preg *r = fetch(ra);
+ preg *d = alloc_cpu(ctx,dst,false);
+ op64(ctx,MOV,d,r);
+ op64(ctx,ADD,d,pconst(&p,sizeof(varray)));
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ default:
+ ASSERT(ra->t->kind);
+ }
+ break;
+ case ORefOffset:
+ {
+ preg *d = alloc_cpu(ctx,rb,true);
+ preg *r2 = alloc_cpu(ctx,dst,false);
+ preg *r = fetch(ra);
+ int size = hl_type_size(dst->t->tparam);
+ op64(ctx,MOV,r2,r);
+ switch( size ) {
+ case 1:
+ break;
+ case 2:
+ op64(ctx,SHL,d,pconst(&p,1));
+ break;
+ case 4:
+ op64(ctx,SHL,d,pconst(&p,2));
+ break;
+ case 8:
+ op64(ctx,SHL,d,pconst(&p,3));
+ break;
+ default:
+ op64(ctx,IMUL,d,pconst(&p,size));
+ break;
+ }
+ op64(ctx,ADD,r2,d);
+ scratch(d);
+ store(ctx,dst,dst->current,false);
+ }
+ break;
+ case OToVirtual:
+ {
+# ifdef HL_64
+ int size = pad_before_call(ctx, 0);
+ op64(ctx,MOV,REG_AT(CALL_REGS[1]),fetch(ra));
+ op64(ctx,MOV,REG_AT(CALL_REGS[0]),pconst64(&p,(int_val)dst->t));
+# else
+ int size = pad_before_call(ctx, HL_WSIZE*2);
+ op32(ctx,PUSH,fetch(ra),UNUSED);
+ op32(ctx,PUSH,pconst(&p,(int)(int_val)dst->t),UNUSED);
+# endif
+ if( ra->t->kind == HOBJ ) hl_get_obj_rt(ra->t); // ensure it's initialized
+ call_native(ctx,hl_to_virtual,size);
+ store(ctx,dst,PEAX,true);
+ }
+ break;
+ case OMakeEnum:
+ {
+ hl_enum_construct *c = &dst->t->tenum->constructs[o->p2];
+ int_val args[] = { (int_val)dst->t, o->p2 };
+ int i;
+ call_native_consts(ctx, hl_alloc_enum, args, 2);
+ RLOCK(PEAX);
+ for(i=0;inparams;i++) {
+ preg *r = fetch(R(o->extra[i]));
+ copy(ctx, pmem(&p,Eax,c->offsets[i]),r, R(o->extra[i])->size);
+ RUNLOCK(fetch(R(o->extra[i])));
+ if ((i & 15) == 0) jit_buf(ctx);
+ }
+ store(ctx, dst, PEAX, true);
+ }
+ break;
+ case OEnumAlloc:
+ {
+ int_val args[] = { (int_val)dst->t, o->p2 };
+ call_native_consts(ctx, hl_alloc_enum, args, 2);
+ store(ctx, dst, PEAX, true);
+ }
+ break;
+ case OEnumField:
+ {
+ hl_enum_construct *c = &ra->t->tenum->constructs[o->p3];
+ preg *r = alloc_cpu(ctx,ra,true);
+ copy_to(ctx,dst,pmem(&p,r->id,c->offsets[(int)(int_val)o->extra]));
+ }
+ break;
+ case OSetEnumField:
+ {
+ hl_enum_construct *c = &dst->t->tenum->constructs[0];
+ preg *r = alloc_cpu(ctx,dst,true);
+ switch( rb->t->kind ) {
+ case HF64:
+ {
+ preg *d = alloc_fpu(ctx,rb,true);
+ copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),d,8);
+ break;
+ }
+ default:
+ copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),alloc_cpu(ctx,rb,true),hl_type_size(c->params[o->p2]));
+ break;
+ }
+ }
+ break;
+ case ONullCheck:
+ {
+ int jz;
+ preg *r = alloc_cpu(ctx,dst,true);
+ op64(ctx,TEST,r,r);
+ XJump_small(JNotZero,jz);
+
+ hl_opcode *next = f->ops + opCount + 1;
+ bool null_field_access = false;
+ int hashed_name = 0;
+ // skip const and operation between nullcheck and access
+ while( (next < f->ops + f->nops - 1) && (next->op >= OInt && next->op <= ODecr) ) {
+ next++;
+ }
+ if( (next->op == OField && next->p2 == o->p1) || (next->op == OSetField && next->p1 == o->p1) ) {
+ int fid = next->op == OField ? next->p3 : next->p2;
+ hl_obj_field *f = NULL;
+ if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT )
+ f = hl_obj_field_fetch(dst->t, fid);
+ else if( dst->t->kind == HVIRTUAL )
+ f = dst->t->virt->fields + fid;
+ if( f == NULL ) ASSERT(dst->t->kind);
+ null_field_access = true;
+ hashed_name = f->hashed_name;
+ } else if( (next->op >= OCall1 && next->op <= OCallN) && next->p3 == o->p1 ) {
+ int fid = next->p2 < 0 ? -1 : ctx->m->functions_indexes[next->p2];
+ hl_function *cf = ctx->m->code->functions + fid;
+ const uchar *name = fun_field_name(cf);
+ null_field_access = true;
+ hashed_name = hl_hash_gen(name, true);
+ }
+
+ if( null_field_access ) {
+ pad_before_call(ctx, HL_WSIZE);
+ if( hashed_name >= 0 && hashed_name < 256 )
+ op64(ctx,PUSH8,pconst(&p,hashed_name),UNUSED);
+ else
+ op32(ctx,PUSH,pconst(&p,hashed_name),UNUSED);
+ } else {
+ pad_before_call(ctx, 0);
+ }
+
+ jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
+ j->pos = BUF_POS();
+ j->target = null_field_access ? -3 : -1;
+ j->next = ctx->calls;
+ ctx->calls = j;
+
+ op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
+ op_call(ctx,PEAX,-1);
+ patch_jump(ctx,jz);
+ }
+ break;
+ case OSafeCast:
+ make_dyn_cast(ctx, dst, ra);
+ break;
+ case ODynGet:
+ {
+ int size;
+# ifdef HL_64
+ if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
+ size = begin_native_call(ctx,2);
+ } else {
+ size = begin_native_call(ctx,3);
+ set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
+ }
+ set_native_arg(ctx,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
+ set_native_arg(ctx,fetch(ra));
+# else
+ preg *r;
+ r = alloc_reg(ctx,RCPU);
+ if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
+ size = pad_before_call(ctx,HL_WSIZE*2);
+ } else {
+ size = pad_before_call(ctx,HL_WSIZE*3);
+ op64(ctx,MOV,r,pconst64(&p,(int_val)dst->t));
+ op64(ctx,PUSH,r,UNUSED);
+ }
+ op64(ctx,MOV,r,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
+ op64(ctx,PUSH,r,UNUSED);
+ op64(ctx,PUSH,fetch(ra),UNUSED);
+# endif
+ call_native(ctx,get_dynget(dst->t),size);
+ store_result(ctx,dst);
+ }
+ break;
+ case ODynSet:
+ {
+ int size;
+# ifdef HL_64
+ switch( rb->t->kind ) {
+ case HF32:
+ case HF64:
+ size = begin_native_call(ctx, 3);
+ set_native_arg_fpu(ctx,fetch(rb),rb->t->kind == HF32);
+ set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
+ set_native_arg(ctx,fetch(dst));
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ case HI64:
+ case HGUID:
+ size = begin_native_call(ctx, 3);
+ set_native_arg(ctx,fetch(rb));
+ set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
+ set_native_arg(ctx,fetch(dst));
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ default:
+ size = begin_native_call(ctx,4);
+ set_native_arg(ctx,fetch(rb));
+ set_native_arg(ctx,pconst64(&p,(int_val)rb->t));
+ set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
+ set_native_arg(ctx,fetch(dst));
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ }
+# else
+ switch( rb->t->kind ) {
+ case HF32:
+ size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(float));
+ push_reg(ctx,rb);
+ op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
+ op32(ctx,PUSH,fetch(dst),UNUSED);
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ case HF64:
+ case HI64:
+ case HGUID:
+ size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(double));
+ push_reg(ctx,rb);
+ op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
+ op32(ctx,PUSH,fetch(dst),UNUSED);
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ default:
+ size = pad_before_call(ctx, HL_WSIZE*4);
+ op32(ctx,PUSH,fetch32(ctx,rb),UNUSED);
+ op32(ctx,PUSH,pconst64(&p,(int_val)rb->t),UNUSED);
+ op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
+ op32(ctx,PUSH,fetch(dst),UNUSED);
+ call_native(ctx,get_dynset(rb->t),size);
+ break;
+ }
+# endif
+ }
+ break;
+ case OTrap:
+ {
+ int size, jenter, jtrap;
+ int offset = 0;
+ int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
+ hl_trap_ctx *t = NULL;
+# ifndef HL_THREADS
+ if( tinf == NULL ) tinf = hl_get_thread(); // single thread
+# endif
+
+# ifdef HL_64
+ preg *trap = REG_AT(CALL_REGS[0]);
+# else
+ preg *trap = PEAX;
+# endif
+ RLOCK(trap);
+
+ preg *treg = alloc_reg(ctx, RCPU);
+ if( !tinf ) {
+ call_native(ctx, hl_get_thread, 0);
+ op64(ctx,MOV,treg,PEAX);
+ offset = (int)(int_val)&tinf->trap_current;
+ } else {
+ offset = 0;
+ op64(ctx,MOV,treg,pconst64(&p,(int_val)&tinf->trap_current));
+ }
+ op64(ctx,MOV,trap,pmem(&p,treg->id,offset));
+ op64(ctx,SUB,PESP,pconst(&p,trap_size));
+ op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->prev),trap);
+ op64(ctx,MOV,trap,PESP);
+ op64(ctx,MOV,pmem(&p,treg->id,offset),trap);
+
+ /*
+ trap E,@catch
+ catch g
+ catch g2
+ ...
+ @:catch
+
+ // Before haxe 5
+ This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following
+ sequence of HL opcodes:
+
+ trap E,@catch
+ ...
+ @catch:
+ global R, _
+ call _, ???(R,E)
+
+ ??? is expected to be hl.BaseType.check
+ */
+ hl_opcode *cat = f->ops + opCount + 1;
+ hl_opcode *next = f->ops + opCount + 1 + o->p2;
+ hl_opcode *next2 = f->ops + opCount + 2 + o->p2;
+ if( cat->op == OCatch || (next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->stack.id == (int)(int_val)next2->extra) ) {
+ int gindex = cat->op == OCatch ? cat->p1 : next->p2;
+ hl_type *gt = m->code->globals[gindex];
+ while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super;
+ if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE ) {
+ void *addr = m->globals_data + m->globals_indexes[gindex];
+# ifdef HL_64
+ op64(ctx,MOV,treg,pconst64(&p,(int_val)addr));
+ op64(ctx,MOV,treg,pmem(&p,treg->id,0));
+# else
+ op64(ctx,MOV,treg,paddr(&p,addr));
+# endif
+ } else
+ op64(ctx,MOV,treg,pconst(&p,0));
+ } else {
+ op64(ctx,MOV,treg,pconst(&p,0));
+ }
+ op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->tcheck),treg);
+
+ // On Win64 setjmp actually takes two arguments
+ // the jump buffer and the frame pointer (or the stack pointer if there is no FP)
+#if defined(HL_WIN) && defined(HL_64)
+ size = begin_native_call(ctx, 2);
+ set_native_arg(ctx, REG_AT(Ebp));
+#else
+ size = begin_native_call(ctx, 1);
+#endif
+ set_native_arg(ctx,trap);
+#ifdef HL_MINGW
+ call_native(ctx,_setjmp,size);
+#else
+ call_native(ctx,setjmp,size);
+#endif
+ op64(ctx,TEST,PEAX,PEAX);
+ XJump_small(JZero,jenter);
+ op64(ctx,ADD,PESP,pconst(&p,trap_size));
+ if( !tinf ) {
+ call_native(ctx, hl_get_thread, 0);
+ op64(ctx,MOV,PEAX,pmem(&p, Eax, (int)(int_val)&tinf->exc_value));
+ } else {
+ op64(ctx,MOV,PEAX,pconst64(&p,(int_val)&tinf->exc_value));
+ op64(ctx,MOV,PEAX,pmem(&p, Eax, 0));
+ }
+ store(ctx,dst,PEAX,false);
+
+ jtrap = do_jump(ctx,OJAlways,false);
+ register_jump(ctx,jtrap,(opCount + 1) + o->p2);
+ patch_jump(ctx,jenter);
+ }
+ break;
+ case OEndTrap:
+ {
+ int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
+ hl_trap_ctx *tmp = NULL;
+ preg *addr,*r;
+ int offset;
+ if (!tinf) {
+ call_native(ctx, hl_get_thread, 0);
+ addr = PEAX;
+ RLOCK(addr);
+ offset = (int)(int_val)&tinf->trap_current;
+ } else {
+ offset = 0;
+ addr = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, addr, pconst64(&p, (int_val)&tinf->trap_current));
+ }
+ r = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, r, pmem(&p,addr->id,offset));
+ op64(ctx, MOV, r, pmem(&p,r->id,(int)(int_val)&tmp->prev));
+ op64(ctx, MOV, pmem(&p,addr->id, offset), r);
+# ifdef HL_WIN
+ // erase eip (prevent false positive)
+ {
+ _JUMP_BUFFER *b = NULL;
+# ifdef HL_64
+ op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&(b->Rip)),PEAX);
+# else
+ op64(ctx,MOV,pmem(&p,Esp,(int)&(b->Eip)),PEAX);
+# endif
+ }
+# endif
+ op64(ctx,ADD,PESP,pconst(&p,trap_size));
+ }
+ break;
+ case OEnumIndex:
+ {
+ preg *r = alloc_reg(ctx,RCPU);
+ op64(ctx,MOV,r,pmem(&p,alloc_cpu(ctx,ra,true)->id,HL_WSIZE));
+ store(ctx,dst,r,true);
+ break;
+ }
+ break;
+ case OSwitch:
+ {
+ int jdefault;
+ int i;
+ preg *r = alloc_cpu(ctx, dst, true);
+ preg *r2 = alloc_reg(ctx, RCPU);
+ op32(ctx, CMP, r, pconst(&p,o->p2));
+ XJump(JUGte,jdefault);
+ // r2 = r * 5 + eip
+# ifdef HL_64
+ op64(ctx, XOR, r2, r2);
+# endif
+ op32(ctx, MOV, r2, r);
+ op32(ctx, SHL, r2, pconst(&p,2));
+ op32(ctx, ADD, r2, r);
+# ifdef HL_64
+ preg *tmp = alloc_reg(ctx, RCPU);
+ op64(ctx, MOV, tmp, pconst64(&p,RESERVE_ADDRESS));
+# else
+ op64(ctx, ADD, r2, pconst64(&p,RESERVE_ADDRESS));
+# endif
+ {
+ jlist *s = (jlist*)hl_malloc(&ctx->galloc, sizeof(jlist));
+ s->pos = BUF_POS() - sizeof(void*);
+ s->next = ctx->switchs;
+ ctx->switchs = s;
+ }
+# ifdef HL_64
+ op64(ctx, ADD, r2, tmp);
+# endif
+ op64(ctx, JMP, r2, UNUSED);
+ for(i=0;ip2;i++) {
+ int j = do_jump(ctx,OJAlways,false);
+ register_jump(ctx,j,(opCount + 1) + o->extra[i]);
+ if( (i & 15) == 0 ) jit_buf(ctx);
+ }
+ patch_jump(ctx, jdefault);
+ }
+ break;
+ case OGetTID:
+ op32(ctx, MOV, alloc_cpu(ctx,dst,false), pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
+ store(ctx,dst,dst->current,false);
+ break;
+ case OAssert:
+ {
+ pad_before_call(ctx, 0);
+ jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
+ j->pos = BUF_POS();
+ j->target = -2;
+ j->next = ctx->calls;
+ ctx->calls = j;
+
+ op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
+ op_call(ctx,PEAX,-1);
+ }
+ break;
+ case ONop:
+ break;
+ case OPrefetch:
+ {
+ preg *r = alloc_cpu(ctx, dst, true);
+ if( o->p2 > 0 ) {
+ switch( dst->t->kind ) {
+ case HOBJ:
+ case HSTRUCT:
+ {
+ hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
+ preg *r2 = alloc_reg(ctx, RCPU);
+ op64(ctx, LEA, r2, pmem(&p, r->id, rt->fields_indexes[o->p2-1]));
+ r = r2;
+ }
+ break;
+ default:
+ ASSERT(dst->t->kind);
+ break;
+ }
+ }
+ switch( o->p3 ) {
+ case 0:
+ op64(ctx, PREFETCHT0, pmem(&p,r->id,0), UNUSED);
+ break;
+ case 1:
+ op64(ctx, PREFETCHT1, pmem(&p,r->id,0), UNUSED);
+ break;
+ case 2:
+ op64(ctx, PREFETCHT2, pmem(&p,r->id,0), UNUSED);
+ break;
+ case 3:
+ op64(ctx, PREFETCHNTA, pmem(&p,r->id,0), UNUSED);
+ break;
+ case 4:
+ op64(ctx, PREFETCHW, pmem(&p,r->id,0), UNUSED);
+ break;
+ default:
+ ASSERT(o->p3);
+ break;
+ }
+ }
+ break;
+ case OAsm:
+ {
+ switch( o->p1 ) {
+ case 0: // byte output
+ B(o->p2);
+ break;
+ case 1: // scratch cpu reg
+ scratch(REG_AT(o->p2));
+ break;
+ case 2: // read vm reg
+ rb--;
+ copy(ctx, REG_AT(o->p2), &rb->stack, rb->size);
+ scratch(REG_AT(o->p2));
+ break;
+ case 3: // write vm reg
+ rb--;
+ copy(ctx, &rb->stack, REG_AT(o->p2), rb->size);
+ scratch(rb->current);
+ break;
+ case 4:
+ if( ctx->totalRegsSize != 0 )
+ hl_fatal("Asm naked function should not have local variables");
+ if( opCount != 0 )
+ hl_fatal("Asm naked function should be on first opcode");
+ ctx->buf.b -= BUF_POS() - ctx->functionPos; // reset to our function start
+ break;
+ default:
+ ASSERT(o->p1);
+ break;
+ }
+ }
+ break;
+ case OCatch:
+ // Only used by OTrap typing
+ break;
+ default:
+ jit_error(hl_op_name(o->op));
+ break;
+ }
+ // we are landing at this position, assume we have lost our registers
+ if( ctx->opsPos[opCount+1] == -1 )
+ discard_regs(ctx,true);
+ ctx->opsPos[opCount+1] = BUF_POS();
+
+ // write debug infos
+ size = BUF_POS() - codePos;
+ if( debug16 && size > 0xFF00 ) {
+ debug32 = malloc(sizeof(int) * (f->nops + 1));
+ for(i=0;icurrentPos;i++)
+ debug32[i] = debug16[i];
+ free(debug16);
+ debug16 = NULL;
+ }
+ if( debug16 ) debug16[ctx->currentPos] = (unsigned short)size; else if( debug32 ) debug32[ctx->currentPos] = size;
+
+ }
+ // patch jumps
+ {
+ jlist *j = ctx->jumps;
+ while( j ) {
+ *(int*)(ctx->startBuf + j->pos) = ctx->opsPos[j->target] - (j->pos + 4);
+ j = j->next;
+ }
+ ctx->jumps = NULL;
+ }
+ int codeEndPos = BUF_POS();
+ // add nops padding
+ jit_nops(ctx);
+ // clear regs
+ for(i=0;iholds = NULL;
+ r->lock = 0;
+ }
+ // save debug infos
+ if( ctx->debug ) {
+ int fid = (int)(f - m->code->functions);
+ ctx->debug[fid].start = codePos;
+ ctx->debug[fid].offsets = debug32 ? (void*)debug32 : (void*)debug16;
+ ctx->debug[fid].large = debug32 != NULL;
+ }
+ // unwind info
+#ifdef WIN64_UNWIND_TABLES
+ int uw_idx = ctx->nunwind++;
+ ctx->unwind_table[uw_idx].BeginAddress = codePos;
+ ctx->unwind_table[uw_idx].EndAddress = codeEndPos;
+ ctx->unwind_table[uw_idx].UnwindData = ctx->unwind_offset;
+#endif
+ // reset tmp allocator
+ hl_free(&ctx->falloc);
+ return codePos;
+}
+
+static void *get_wrapper( hl_type *t ) {
+ return call_jit_hl2c;
+}
+
+void hl_jit_patch_method( void *old_fun, void **new_fun_table ) {
+ // mov eax, addr
+ // jmp [eax]
+ unsigned char *b = (unsigned char*)old_fun;
+ unsigned long long addr = (unsigned long long)(int_val)new_fun_table;
+# ifdef HL_64
+ *b++ = 0x48;
+ *b++ = 0xB8;
+ *b++ = (unsigned char)addr;
+ *b++ = (unsigned char)(addr>>8);
+ *b++ = (unsigned char)(addr>>16);
+ *b++ = (unsigned char)(addr>>24);
+ *b++ = (unsigned char)(addr>>32);
+ *b++ = (unsigned char)(addr>>40);
+ *b++ = (unsigned char)(addr>>48);
+ *b++ = (unsigned char)(addr>>56);
+# else
+ *b++ = 0xB8;
+ *b++ = (unsigned char)addr;
+ *b++ = (unsigned char)(addr>>8);
+ *b++ = (unsigned char)(addr>>16);
+ *b++ = (unsigned char)(addr>>24);
+# endif
+ *b++ = 0xFF;
+ *b++ = 0x20;
+}
+
+static void missing_closure() {
+ hl_error("Missing static closure");
+}
+
+void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ) {
+ jlist *c;
+ int size = BUF_POS();
+ unsigned char *code;
+ if( size & 4095 ) size += 4096 - (size&4095);
+ code = (unsigned char*)hl_alloc_executable_memory(size);
+ if( code == NULL ) return NULL;
+ memcpy(code,ctx->startBuf,BUF_POS());
+ *codesize = size;
+ *debug = ctx->debug;
+ if( !call_jit_c2hl ) {
+ call_jit_c2hl = code + ctx->c2hl;
+ call_jit_hl2c = code + ctx->hl2c;
+ hl_setup.get_wrapper = get_wrapper;
+ hl_setup.static_call = callback_c2hl;
+ hl_setup.static_call_ref = true;
+ }
+#ifdef WIN64_UNWIND_TABLES
+ m->unwind_table = ctx->unwind_table;
+ RtlAddFunctionTable(m->unwind_table, ctx->nunwind, (DWORD64)code);
+#endif
+ if( !ctx->static_function_offset ) {
+ int i;
+ ctx->static_function_offset = true;
+ for(i=0;i<(int)(sizeof(ctx->static_functions)/sizeof(void*));i++)
+ ctx->static_functions[i] = (void*)(code + (int)(int_val)ctx->static_functions[i]);
+ }
+ // patch calls
+ c = ctx->calls;
+ while( c ) {
+ void *fabs;
+ if( c->target < 0 )
+ fabs = ctx->static_functions[-c->target-1];
+ else {
+ fabs = m->functions_ptrs[c->target];
+ if( fabs == NULL ) {
+ // read absolute address from previous module
+ int old_idx = m->hash->functions_hashes[m->functions_indexes[c->target]];
+ if( old_idx < 0 )
+ return NULL;
+ fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
+ } else {
+ // relative
+ fabs = (unsigned char*)code + (int)(int_val)fabs;
+ }
+ }
+ if( (code[c->pos]&~3) == (IS_64?0x48:0xB8) || code[c->pos] == 0x68 ) // MOV : absolute | PUSH
+ *(void**)(code + c->pos + (IS_64?2:1)) = fabs;
+ else {
+ int_val delta = (int_val)fabs - (int_val)code - (c->pos + 5);
+ int rpos = (int)delta;
+ if( (int_val)rpos != delta ) {
+ printf("Target code too far too rebase\n");
+ return NULL;
+ }
+ *(int*)(code + c->pos + 1) = rpos;
+ }
+ c = c->next;
+ }
+ // patch switchs
+ c = ctx->switchs;
+ while( c ) {
+ *(void**)(code + c->pos) = code + c->pos + (IS_64 ? 14 : 6);
+ c = c->next;
+ }
+ // patch closures
+ {
+ vclosure *c = ctx->closure_list;
+ while( c ) {
+ vclosure *next;
+ int fidx = (int)(int_val)c->fun;
+ void *fabs = m->functions_ptrs[fidx];
+ if( fabs == NULL ) {
+ // read absolute address from previous module
+ int old_idx = m->hash->functions_hashes[m->functions_indexes[fidx]];
+ if( old_idx < 0 )
+ fabs = missing_closure;
+ else
+ fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
+ } else {
+ // relative
+ fabs = (unsigned char*)code + (int)(int_val)fabs;
+ }
+ c->fun = fabs;
+ next = (vclosure*)c->value;
+ c->value = NULL;
+ c = next;
+ }
+ }
+ return code;
+}
+
diff --git a/src/main.c b/src/main.c
index a25b673cc..60c4db77e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -20,7 +20,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include
-#include
+#include
#include "hlsystem.h"
#ifdef HL_WIN
diff --git a/src/module.c b/src/module.c
index e668b1064..e46f73f13 100644
--- a/src/module.c
+++ b/src/module.c
@@ -21,6 +21,7 @@
*/
#include
#include
+#include
#ifdef HL_WIN
# undef _GUID
@@ -718,6 +719,9 @@ int hl_module_init( hl_module *m, h_bool hot_reload ) {
return 0;
}
m->functions_ptrs[f->findex] = (void*)(int_val)fpos;
+# ifdef HL_DEBUG
+ if( hl_setup.sys_nargs > 0 && ucmp(hl_setup.sys_args[0],USTR("--dump")) == 0 ) hl_emit_dump(ctx);
+# endif
}
m->jit_code = hl_jit_code(ctx, m, &m->codesize, &m->jit_debug, NULL);
for(i=0;icode->nfunctions;i++) {
diff --git a/src/opcodes.h b/src/opcodes.h
index ab9b1fa51..9e4df7f60 100644
--- a/src/opcodes.h
+++ b/src/opcodes.h
@@ -67,8 +67,8 @@ OP_BEGIN
OP(OIncr,R,X,X)
OP(ODecr,R,X,X)
- OP(OCall0,R,R,X)
- OP(OCall1,R,R,R)
+ OP(OCall0,R,C,X)
+ OP(OCall1,R,C,R)
OP(OCall2,R,AR,4)
OP(OCall3,R,AR,5)
OP(OCall4,R,AR,6)
@@ -78,17 +78,17 @@ OP_BEGIN
OP(OCallClosure,R,AR,VAR_ARGS)
OP(OStaticClosure,R,G,X)
- OP(OInstanceClosure,R,R,G)
+ OP(OInstanceClosure,R,C,R)
OP(OVirtualClosure,R,R,G)
OP(OGetGlobal,R,G,X)
- OP(OSetGlobal,R_NW,G,X)
- OP(OField,R,R,C)
- OP(OSetField,R_NW,R,C)
- OP(OGetThis,R,C,X)
- OP(OSetThis,R_NW,R,X)
+ OP(OSetGlobal,G,R,X)
+ OP(OField,R,R,G)
+ OP(OSetField,R_NW,G,R)
+ OP(OGetThis,R,G,X)
+ OP(OSetThis,G,R,X)
OP(ODynGet,R,R,C)
- OP(ODynSet,R_NW,R,C)
+ OP(ODynSet,R_NW,C,R)
OP(OJTrue,R_NW,J,X)
OP(OJFalse,R_NW,J,X)
@@ -134,7 +134,7 @@ OP_BEGIN
OP(ONew,R,X,X)
OP(OArraySize,R,R,X)
- OP(OType,R,R,X)
+ OP(OType,R,G,X)
OP(OGetType,R,R,X)
OP(OGetTID,R,R,X)