orca/ext/wasm3/source/m3_compile.c

2923 lines
98 KiB
C
Raw Permalink Normal View History

2023-04-12 14:21:03 +00:00
//
// m3_compile.c
//
// Created by Steven Massey on 4/17/19.
// Copyright © 2019 Steven Massey. All rights reserved.
//
// Allow using opcodes for compilation process
#define M3_COMPILE_OPCODES
#include "m3_env.h"
#include "m3_compile.h"
#include "m3_exec.h"
#include "m3_exception.h"
#include "m3_info.h"
//----- EMIT --------------------------------------------------------------------------------------------------------------
static inline
pc_t GetPC (IM3Compilation o)
{
return GetPagePC (o->page);
}
static M3_NOINLINE
M3Result EnsureCodePageNumLines (IM3Compilation o, u32 i_numLines)
{
M3Result result = m3Err_none;
i_numLines += 2; // room for Bridge
if (NumFreeLines (o->page) < i_numLines)
{
IM3CodePage page = AcquireCodePageWithCapacity (o->runtime, i_numLines);
if (page)
{
m3log (emit, "bridging new code page from: %d %p (free slots: %d) to: %d", o->page->info.sequence, GetPC (o), NumFreeLines (o->page), page->info.sequence);
d_m3Assert (NumFreeLines (o->page) >= 2);
EmitWord (o->page, op_Branch);
EmitWord (o->page, GetPagePC (page));
ReleaseCodePage (o->runtime, o->page);
o->page = page;
}
else result = m3Err_mallocFailedCodePage;
}
return result;
}
static M3_NOINLINE
M3Result EmitOp (IM3Compilation o, IM3Operation i_operation)
{
M3Result result = m3Err_none; d_m3Assert (i_operation or IsStackPolymorphic (o));
// it's OK for page to be null; when compile-walking the bytecode without emitting
if (o->page)
{
# if d_m3EnableOpTracing
if (i_operation != op_DumpStack)
o->numEmits++;
# endif
// have execution jump to a new page if slots are critically low
result = EnsureCodePageNumLines (o, d_m3CodePageFreeLinesThreshold);
if (not result)
{ if (d_m3LogEmit) log_emit (o, i_operation);
# if d_m3RecordBacktraces
EmitMappingEntry (o->page, o->lastOpcodeStart - o->module->wasmStart);
# endif // d_m3RecordBacktraces
EmitWord (o->page, i_operation);
}
}
return result;
}
// Push an immediate constant into the M3 codestream
static M3_NOINLINE
void EmitConstant32 (IM3Compilation o, const u32 i_immediate)
{
if (o->page)
EmitWord32 (o->page, i_immediate);
}
static M3_NOINLINE
void EmitSlotOffset (IM3Compilation o, const i32 i_offset)
{
if (o->page)
EmitWord32 (o->page, i_offset);
}
static M3_NOINLINE
pc_t EmitPointer (IM3Compilation o, const void * const i_pointer)
{
pc_t ptr = GetPagePC (o->page);
if (o->page)
EmitWord (o->page, i_pointer);
return ptr;
}
static M3_NOINLINE
void * ReservePointer (IM3Compilation o)
{
pc_t ptr = GetPagePC (o->page);
EmitPointer (o, NULL);
return (void *) ptr;
}
//-------------------------------------------------------------------------------------------------------------------------
#define d_indent " | %s"
// just want less letters and numbers to stare at down the way in the compiler table
#define i_32 c_m3Type_i32
#define i_64 c_m3Type_i64
#define f_32 c_m3Type_f32
#define f_64 c_m3Type_f64
#define none c_m3Type_none
#define any (u8)-1
#if d_m3HasFloat
# define FPOP(x) x
#else
# define FPOP(x) NULL
#endif
static const IM3Operation c_preserveSetSlot [] = { NULL, op_PreserveSetSlot_i32, op_PreserveSetSlot_i64,
FPOP(op_PreserveSetSlot_f32), FPOP(op_PreserveSetSlot_f64) };
static const IM3Operation c_setSetOps [] = { NULL, op_SetSlot_i32, op_SetSlot_i64,
FPOP(op_SetSlot_f32), FPOP(op_SetSlot_f64) };
static const IM3Operation c_setGlobalOps [] = { NULL, op_SetGlobal_i32, op_SetGlobal_i64,
FPOP(op_SetGlobal_f32), FPOP(op_SetGlobal_f64) };
static const IM3Operation c_setRegisterOps [] = { NULL, op_SetRegister_i32, op_SetRegister_i64,
FPOP(op_SetRegister_f32), FPOP(op_SetRegister_f64) };
static const IM3Operation c_intSelectOps [2] [4] = { { op_Select_i32_rss, op_Select_i32_srs, op_Select_i32_ssr, op_Select_i32_sss },
{ op_Select_i64_rss, op_Select_i64_srs, op_Select_i64_ssr, op_Select_i64_sss } };
#if d_m3HasFloat
static const IM3Operation c_fpSelectOps [2] [2] [3] = { { { op_Select_f32_sss, op_Select_f32_srs, op_Select_f32_ssr }, // selector in slot
{ op_Select_f32_rss, op_Select_f32_rrs, op_Select_f32_rsr } }, // selector in reg
{ { op_Select_f64_sss, op_Select_f64_srs, op_Select_f64_ssr }, // selector in slot
{ op_Select_f64_rss, op_Select_f64_rrs, op_Select_f64_rsr } } }; // selector in reg
#endif
// all args & returns are 64-bit aligned, so use 2 slots for a d_m3Use32BitSlots=1 build
static const u16 c_ioSlotCount = sizeof (u64) / sizeof (m3slot_t);
static
M3Result AcquireCompilationCodePage (IM3Compilation o, IM3CodePage * o_codePage)
{
M3Result result = m3Err_none;
IM3CodePage page = AcquireCodePage (o->runtime);
if (page)
{
# if (d_m3EnableCodePageRefCounting)
{
if (o->function)
{
IM3Function func = o->function;
page->info.usageCount++;
u32 index = func->numCodePageRefs++;
_ (m3ReallocArray (& func->codePageRefs, IM3CodePage, func->numCodePageRefs, index));
func->codePageRefs [index] = page;
}
}
# endif
}
else _throw (m3Err_mallocFailedCodePage);
_catch:
* o_codePage = page;
return result;
}
static inline
void ReleaseCompilationCodePage (IM3Compilation o)
{
ReleaseCodePage (o->runtime, o->page);
}
static inline
u16 GetTypeNumSlots (u8 i_type)
{
# if d_m3Use32BitSlots
return Is64BitType (i_type) ? 2 : 1;
# else
return 1;
# endif
}
static inline
void AlignSlotToType (u16 * io_slot, u8 i_type)
{
// align 64-bit words to even slots (if d_m3Use32BitSlots)
u16 numSlots = GetTypeNumSlots (i_type);
u16 mask = numSlots - 1;
* io_slot = (* io_slot + mask) & ~mask;
}
static inline
i16 GetStackTopIndex (IM3Compilation o)
{ d_m3Assert (o->stackIndex > o->stackFirstDynamicIndex or IsStackPolymorphic (o));
return o->stackIndex - 1;
}
// Items in the static portion of the stack (args/locals) are hidden from GetStackTypeFromTop ()
// In other words, only "real" Wasm stack items can be inspected. This is important when
// returning values, etc. and you need an accurate wasm-view of the stack.
static
u8 GetStackTypeFromTop (IM3Compilation o, u16 i_offset)
{
u8 type = c_m3Type_none;
++i_offset;
if (o->stackIndex >= i_offset)
{
u16 index = o->stackIndex - i_offset;
if (index >= o->stackFirstDynamicIndex)
type = o->typeStack [index];
}
return type;
}
static inline
u8 GetStackTopType (IM3Compilation o)
{
return GetStackTypeFromTop (o, 0);
}
static inline
u8 GetStackTypeFromBottom (IM3Compilation o, u16 i_offset)
{
u8 type = c_m3Type_none;
if (i_offset < o->stackIndex)
type = o->typeStack [i_offset];
return type;
}
static inline bool IsConstantSlot (IM3Compilation o, u16 i_slot) { return (i_slot >= o->slotFirstConstIndex and i_slot < o->slotMaxConstIndex); }
static inline bool IsSlotAllocated (IM3Compilation o, u16 i_slot) { return o->m3Slots [i_slot]; }
static inline
bool IsStackIndexInRegister (IM3Compilation o, i32 i_stackIndex)
{ d_m3Assert (i_stackIndex < o->stackIndex or IsStackPolymorphic (o));
if (i_stackIndex >= 0 and i_stackIndex < o->stackIndex)
return (o->wasmStack [i_stackIndex] >= d_m3Reg0SlotAlias);
else
return false;
}
static inline u16 GetNumBlockValuesOnStack (IM3Compilation o) { return o->stackIndex - o->block.blockStackIndex; }
static inline bool IsStackTopInRegister (IM3Compilation o) { return IsStackIndexInRegister (o, (i32) GetStackTopIndex (o)); }
static inline bool IsStackTopMinus1InRegister (IM3Compilation o) { return IsStackIndexInRegister (o, (i32) GetStackTopIndex (o) - 1); }
static inline bool IsStackTopMinus2InRegister (IM3Compilation o) { return IsStackIndexInRegister (o, (i32) GetStackTopIndex (o) - 2); }
static inline bool IsStackTopInSlot (IM3Compilation o) { return not IsStackTopInRegister (o); }
static inline bool IsValidSlot (u16 i_slot) { return (i_slot < d_m3MaxFunctionSlots); }
static inline
u16 GetStackTopSlotNumber (IM3Compilation o)
{
i16 i = GetStackTopIndex (o);
u16 slot = c_slotUnused;
if (i >= 0)
slot = o->wasmStack [i];
return slot;
}
// from bottom
static inline
u16 GetSlotForStackIndex (IM3Compilation o, u16 i_stackIndex)
{ d_m3Assert (i_stackIndex < o->stackIndex or IsStackPolymorphic (o));
u16 slot = c_slotUnused;
if (i_stackIndex < o->stackIndex)
slot = o->wasmStack [i_stackIndex];
return slot;
}
static inline
u16 GetExtraSlotForStackIndex (IM3Compilation o, u16 i_stackIndex)
{
u16 baseSlot = GetSlotForStackIndex (o, i_stackIndex);
if (baseSlot != c_slotUnused)
{
u16 extraSlot = GetTypeNumSlots (GetStackTypeFromBottom (o, i_stackIndex)) - 1;
baseSlot += extraSlot;
}
return baseSlot;
}
static inline
void TouchSlot (IM3Compilation o, u16 i_slot)
{
if (o->function)
{
// op_Entry uses this value to track and detect stack overflow
o->maxStackSlots = M3_MAX (o->maxStackSlots, i_slot + 1);
}
}
static inline
void MarkSlotAllocated (IM3Compilation o, u16 i_slot)
{ d_m3Assert (o->m3Slots [i_slot] == 0); // shouldn't be already allocated
o->m3Slots [i_slot] = 1;
o->slotMaxAllocatedIndexPlusOne = M3_MAX (o->slotMaxAllocatedIndexPlusOne, i_slot + 1);
TouchSlot (o, i_slot);
}
static inline
void MarkSlotsAllocated (IM3Compilation o, u16 i_slot, u16 i_numSlots)
{
while (i_numSlots--)
MarkSlotAllocated (o, i_slot++);
}
static inline
void MarkSlotsAllocatedByType (IM3Compilation o, u16 i_slot, u8 i_type)
{
u16 numSlots = GetTypeNumSlots (i_type);
MarkSlotsAllocated (o, i_slot, numSlots);
}
static
M3Result AllocateSlotsWithinRange (IM3Compilation o, u16 * o_slot, u8 i_type, u16 i_startSlot, u16 i_endSlot)
{
M3Result result = m3Err_functionStackOverflow;
u16 numSlots = GetTypeNumSlots (i_type);
u16 searchOffset = numSlots - 1;
AlignSlotToType (& i_startSlot, i_type);
// search for 1 or 2 consecutive slots in the execution stack
u16 i = i_startSlot;
while (i + searchOffset < i_endSlot)
{
if (o->m3Slots [i] == 0 and o->m3Slots [i + searchOffset] == 0)
{
MarkSlotsAllocated (o, i, numSlots);
* o_slot = i;
result = m3Err_none;
break;
}
// keep 2-slot allocations even-aligned
i += numSlots;
}
return result;
}
static inline
M3Result AllocateSlots (IM3Compilation o, u16 * o_slot, u8 i_type)
{
return AllocateSlotsWithinRange (o, o_slot, i_type, o->slotFirstDynamicIndex, d_m3MaxFunctionSlots);
}
static inline
M3Result AllocateConstantSlots (IM3Compilation o, u16 * o_slot, u8 i_type)
{
u16 maxTableIndex = o->slotFirstConstIndex + d_m3MaxConstantTableSize;
return AllocateSlotsWithinRange (o, o_slot, i_type, o->slotFirstConstIndex, M3_MIN(o->slotFirstDynamicIndex, maxTableIndex));
}
// TOQUE: this usage count system could be eliminated. real world code doesn't frequently trigger it. just copy to multiple
// unique slots.
static inline
M3Result IncrementSlotUsageCount (IM3Compilation o, u16 i_slot)
{ d_m3Assert (i_slot < d_m3MaxFunctionSlots);
M3Result result = m3Err_none; d_m3Assert (o->m3Slots [i_slot] > 0);
// OPTZ (memory): 'm3Slots' could still be fused with 'typeStack' if 4 bits were used to indicate: [0,1,2,many]. The many-case
// would scan 'wasmStack' to determine the actual usage count
if (o->m3Slots [i_slot] < 0xFF)
{
o->m3Slots [i_slot]++;
}
else result = "slot usage count overflow";
return result;
}
static inline
void DeallocateSlot (IM3Compilation o, i16 i_slot, u8 i_type)
{ d_m3Assert (i_slot >= o->slotFirstDynamicIndex);
d_m3Assert (i_slot < o->slotMaxAllocatedIndexPlusOne);
for (u16 i = 0; i < GetTypeNumSlots (i_type); ++i, ++i_slot)
{ d_m3Assert (o->m3Slots [i_slot]);
-- o->m3Slots [i_slot];
}
}
static inline
bool IsRegisterTypeAllocated (IM3Compilation o, u8 i_type)
{
return IsRegisterAllocated (o, IsFpType (i_type));
}
static inline
void AllocateRegister (IM3Compilation o, u32 i_register, u16 i_stackIndex)
{ d_m3Assert (not IsRegisterAllocated (o, i_register));
o->regStackIndexPlusOne [i_register] = i_stackIndex + 1;
}
static inline
void DeallocateRegister (IM3Compilation o, u32 i_register)
{ d_m3Assert (IsRegisterAllocated (o, i_register));
o->regStackIndexPlusOne [i_register] = c_m3RegisterUnallocated;
}
static inline
u16 GetRegisterStackIndex (IM3Compilation o, u32 i_register)
{ d_m3Assert (IsRegisterAllocated (o, i_register));
return o->regStackIndexPlusOne [i_register] - 1;
}
u16 GetMaxUsedSlotPlusOne (IM3Compilation o)
{
while (o->slotMaxAllocatedIndexPlusOne > o->slotFirstDynamicIndex)
{
if (IsSlotAllocated (o, o->slotMaxAllocatedIndexPlusOne - 1))
break;
o->slotMaxAllocatedIndexPlusOne--;
}
# ifdef DEBUG
u16 maxSlot = o->slotMaxAllocatedIndexPlusOne;
while (maxSlot < d_m3MaxFunctionSlots)
{
d_m3Assert (o->m3Slots [maxSlot] == 0);
maxSlot++;
}
# endif
return o->slotMaxAllocatedIndexPlusOne;
}
static
M3Result PreserveRegisterIfOccupied (IM3Compilation o, u8 i_registerType)
{
M3Result result = m3Err_none;
u32 regSelect = IsFpType (i_registerType);
if (IsRegisterAllocated (o, regSelect))
{
u16 stackIndex = GetRegisterStackIndex (o, regSelect);
DeallocateRegister (o, regSelect);
u8 type = GetStackTypeFromBottom (o, stackIndex);
// and point to a exec slot
u16 slot = c_slotUnused;
_ (AllocateSlots (o, & slot, type));
o->wasmStack [stackIndex] = slot;
_ (EmitOp (o, c_setSetOps [type]));
EmitSlotOffset (o, slot);
}
_catch: return result;
}
// all values must be in slots before entering loop, if, and else blocks
// otherwise they'd end up preserve-copied in the block to probably different locations (if/else)
static inline
M3Result PreserveRegisters (IM3Compilation o)
{
M3Result result;
_ (PreserveRegisterIfOccupied (o, c_m3Type_f64));
_ (PreserveRegisterIfOccupied (o, c_m3Type_i64));
_catch: return result;
}
static
M3Result PreserveNonTopRegisters (IM3Compilation o)
{
M3Result result = m3Err_none;
i16 stackTop = GetStackTopIndex (o);
if (stackTop >= 0)
{
if (IsRegisterAllocated (o, 0)) // r0
{
if (GetRegisterStackIndex (o, 0) != stackTop)
_ (PreserveRegisterIfOccupied (o, c_m3Type_i64));
}
if (IsRegisterAllocated (o, 1)) // fp0
{
if (GetRegisterStackIndex (o, 1) != stackTop)
_ (PreserveRegisterIfOccupied (o, c_m3Type_f64));
}
}
_catch: return result;
}
//----------------------------------------------------------------------------------------------------------------------
static
M3Result Push (IM3Compilation o, u8 i_type, u16 i_slot)
{
M3Result result = m3Err_none;
#if !d_m3HasFloat
if (i_type == c_m3Type_f32 || i_type == c_m3Type_f64) {
return m3Err_unknownOpcode;
}
#endif
u16 stackIndex = o->stackIndex++; // printf ("push: %d\n", (i32) i);
if (stackIndex < d_m3MaxFunctionStackHeight)
{
o->wasmStack [stackIndex] = i_slot;
o->typeStack [stackIndex] = i_type;
if (IsRegisterSlotAlias (i_slot))
{
u32 regSelect = IsFpRegisterSlotAlias (i_slot);
AllocateRegister (o, regSelect, stackIndex);
}
if (d_m3LogWasmStack) dump_type_stack (o);
}
else result = m3Err_functionStackOverflow;
return result;
}
static inline
M3Result PushRegister (IM3Compilation o, u8 i_type)
{
M3Result result = m3Err_none; d_m3Assert ((u16) d_m3Reg0SlotAlias > (u16) d_m3MaxFunctionSlots);
u16 slot = IsFpType (i_type) ? d_m3Fp0SlotAlias : d_m3Reg0SlotAlias; d_m3Assert (i_type or IsStackPolymorphic (o));
_ (Push (o, i_type, slot));
_catch: return result;
}
static
M3Result Pop (IM3Compilation o)
{
M3Result result = m3Err_none;
if (o->stackIndex > o->block.blockStackIndex)
{
o->stackIndex--; // printf ("pop: %d\n", (i32) o->stackIndex);
u16 slot = o->wasmStack [o->stackIndex];
u8 type = o->typeStack [o->stackIndex];
if (IsRegisterSlotAlias (slot))
{
u32 regSelect = IsFpRegisterSlotAlias (slot);
DeallocateRegister (o, regSelect);
}
else if (slot >= o->slotFirstDynamicIndex)
{
DeallocateSlot (o, slot, type);
}
}
else if (not IsStackPolymorphic (o))
result = m3Err_functionStackUnderrun;
return result;
}
static
M3Result PopType (IM3Compilation o, u8 i_type)
{
M3Result result = m3Err_none;
u8 topType = GetStackTopType (o);
if (i_type == topType or o->block.isPolymorphic)
{
_ (Pop (o));
}
else _throw (m3Err_typeMismatch);
_catch:
return result;
}
static
M3Result _PushAllocatedSlotAndEmit (IM3Compilation o, u8 i_type, bool i_doEmit)
{
M3Result result = m3Err_none;
u16 slot = c_slotUnused;
_ (AllocateSlots (o, & slot, i_type));
_ (Push (o, i_type, slot));
if (i_doEmit)
EmitSlotOffset (o, slot);
// printf ("push: %d\n", (u32) slot);
_catch: return result;
}
static inline
M3Result PushAllocatedSlotAndEmit (IM3Compilation o, u8 i_type)
{
return _PushAllocatedSlotAndEmit (o, i_type, true);
}
static inline
M3Result PushAllocatedSlot (IM3Compilation o, u8 i_type)
{
return _PushAllocatedSlotAndEmit (o, i_type, false);
}
static
M3Result PushConst (IM3Compilation o, u64 i_word, u8 i_type)
{
M3Result result = m3Err_none;
// Early-exit if we're not emitting
if (!o->page) return result;
bool matchFound = false;
bool is64BitType = Is64BitType (i_type);
u16 numRequiredSlots = GetTypeNumSlots (i_type);
u16 numUsedConstSlots = o->slotMaxConstIndex - o->slotFirstConstIndex;
// search for duplicate matching constant slot to reuse
if (numRequiredSlots == 2 and numUsedConstSlots >= 2)
{
u16 firstConstSlot = o->slotFirstConstIndex;
AlignSlotToType (& firstConstSlot, c_m3Type_i64);
for (u16 slot = firstConstSlot; slot < o->slotMaxConstIndex - 1; slot += 2)
{
if (IsSlotAllocated (o, slot) and IsSlotAllocated (o, slot + 1))
{
u64 constant = * (u64 *) & o->constants [slot - o->slotFirstConstIndex];
if (constant == i_word)
{
matchFound = true;
_ (Push (o, i_type, slot));
break;
}
}
}
}
else if (numRequiredSlots == 1)
{
for (u16 i = 0; i < numUsedConstSlots; ++i)
{
u16 slot = o->slotFirstConstIndex + i;
if (IsSlotAllocated (o, slot))
{
u64 constant;
if (is64BitType) {
constant = * (u64 *) & o->constants [i];
} else {
constant = * (u32 *) & o->constants [i];
}
if (constant == i_word)
{
matchFound = true;
_ (Push (o, i_type, slot));
break;
}
}
}
}
if (not matchFound)
{
u16 slot = c_slotUnused;
result = AllocateConstantSlots (o, & slot, i_type);
if (result || slot == c_slotUnused) // no more constant table space; use inline constants
{
result = m3Err_none;
if (is64BitType) {
_ (EmitOp (o, op_Const64));
EmitWord64 (o->page, i_word);
} else {
_ (EmitOp (o, op_Const32));
EmitWord32 (o->page, (u32) i_word);
}
_ (PushAllocatedSlotAndEmit (o, i_type));
}
else
{
u16 constTableIndex = slot - o->slotFirstConstIndex;
d_m3Assert(constTableIndex < d_m3MaxConstantTableSize);
if (is64BitType)
{
u64 * constant = (u64 *) & o->constants [constTableIndex];
* constant = i_word;
}
else
{
u32 * constant = (u32 *) & o->constants [constTableIndex];
* constant = (u32) i_word;
}
_ (Push (o, i_type, slot));
o->slotMaxConstIndex = M3_MAX (slot + numRequiredSlots, o->slotMaxConstIndex);
}
}
_catch: return result;
}
static inline
M3Result EmitSlotNumOfStackTopAndPop (IM3Compilation o)
{
// no emit if value is in register
if (IsStackTopInSlot (o))
EmitSlotOffset (o, GetStackTopSlotNumber (o));
return Pop (o);
}
// Or, maybe: EmitTrappingOp
M3Result AddTrapRecord (IM3Compilation o)
{
M3Result result = m3Err_none;
if (o->function)
{
}
return result;
}
static
M3Result UnwindBlockStack (IM3Compilation o)
{
M3Result result = m3Err_none;
u32 popCount = 0;
while (o->stackIndex > o->block.blockStackIndex)
{
_ (Pop (o));
++popCount;
}
if (popCount)
{
m3log (compile, "unwound stack top: %d", popCount);
}
_catch: return result;
}
static inline
M3Result SetStackPolymorphic (IM3Compilation o)
{
o->block.isPolymorphic = true; m3log (compile, "stack set polymorphic");
return UnwindBlockStack (o);
}
static
void PatchBranches (IM3Compilation o)
{
pc_t pc = GetPC (o);
pc_t patches = o->block.patches;
o->block.patches = NULL;
while (patches)
{ m3log (compile, "patching location: %p to pc: %p", patches, pc);
pc_t next = * (pc_t *) patches;
* (pc_t *) patches = pc;
patches = next;
}
}
//-------------------------------------------------------------------------------------------------------------------------
static
M3Result CopyStackIndexToSlot (IM3Compilation o, u16 i_destSlot, u16 i_stackIndex) // NoPushPop
{
M3Result result = m3Err_none;
IM3Operation op;
u8 type = GetStackTypeFromBottom (o, i_stackIndex);
bool inRegister = IsStackIndexInRegister (o, i_stackIndex);
if (inRegister)
{
op = c_setSetOps [type];
}
else op = Is64BitType (type) ? op_CopySlot_64 : op_CopySlot_32;
_ (EmitOp (o, op));
EmitSlotOffset (o, i_destSlot);
if (not inRegister)
{
u16 srcSlot = GetSlotForStackIndex (o, i_stackIndex);
EmitSlotOffset (o, srcSlot);
}
_catch: return result;
}
static
M3Result CopyStackTopToSlot (IM3Compilation o, u16 i_destSlot) // NoPushPop
{
M3Result result;
i16 stackTop = GetStackTopIndex (o);
_ (CopyStackIndexToSlot (o, i_destSlot, (u16) stackTop));
_catch: return result;
}
// a copy-on-write strategy is used with locals. when a get local occurs, it's not copied anywhere. the stack
// entry just has a index pointer to that local memory slot.
// then, when a previously referenced local is set, the current value needs to be preserved for those references
// TODO: consider getting rid of these specialized operations: PreserveSetSlot & PreserveCopySlot.
// They likely just take up space (which seems to reduce performance) without improving performance.
static
M3Result PreservedCopyTopSlot (IM3Compilation o, u16 i_destSlot, u16 i_preserveSlot)
{
M3Result result = m3Err_none; d_m3Assert (i_destSlot != i_preserveSlot);
IM3Operation op;
u8 type = GetStackTopType (o);
if (IsStackTopInRegister (o))
{
op = c_preserveSetSlot [type];
}
else op = Is64BitType (type) ? op_PreserveCopySlot_64 : op_PreserveCopySlot_32;
_ (EmitOp (o, op));
EmitSlotOffset (o, i_destSlot);
if (IsStackTopInSlot (o))
EmitSlotOffset (o, GetStackTopSlotNumber (o));
EmitSlotOffset (o, i_preserveSlot);
_catch: return result;
}
static
M3Result CopyStackTopToRegister (IM3Compilation o, bool i_updateStack)
{
M3Result result = m3Err_none;
if (IsStackTopInSlot (o))
{
u8 type = GetStackTopType (o);
_ (PreserveRegisterIfOccupied (o, type));
IM3Operation op = c_setRegisterOps [type];
_ (EmitOp (o, op));
EmitSlotOffset (o, GetStackTopSlotNumber (o));
if (i_updateStack)
{
_ (PopType (o, type));
_ (PushRegister (o, type));
}
}
_catch: return result;
}
// if local is unreferenced, o_preservedSlotNumber will be equal to localIndex on return
static
M3Result FindReferencedLocalWithinCurrentBlock (IM3Compilation o, u16 * o_preservedSlotNumber, u32 i_localSlot)
{
M3Result result = m3Err_none;
IM3CompilationScope scope = & o->block;
i16 startIndex = scope->blockStackIndex;
while (scope->opcode == c_waOp_block)
{
scope = scope->outer;
if (not scope)
break;
startIndex = scope->blockStackIndex;
}
* o_preservedSlotNumber = (u16) i_localSlot;
for (u32 i = startIndex; i < o->stackIndex; ++i)
{
if (o->wasmStack [i] == i_localSlot)
{
if (* o_preservedSlotNumber == i_localSlot)
{
u8 type = GetStackTypeFromBottom (o, i); d_m3Assert (type != c_m3Type_none)
_ (AllocateSlots (o, o_preservedSlotNumber, type));
}
else
_ (IncrementSlotUsageCount (o, * o_preservedSlotNumber));
o->wasmStack [i] = * o_preservedSlotNumber;
}
}
_catch: return result;
}
static
M3Result GetBlockScope (IM3Compilation o, IM3CompilationScope * o_scope, u32 i_depth)
{
M3Result result = m3Err_none;
IM3CompilationScope scope = & o->block;
while (i_depth--)
{
scope = scope->outer;
_throwif ("invalid block depth", not scope);
}
* o_scope = scope;
_catch:
return result;
}
static
M3Result CopyStackSlotsR (IM3Compilation o, u16 i_targetSlotStackIndex, u16 i_stackIndex, u16 i_endStackIndex, u16 i_tempSlot)
{
M3Result result = m3Err_none;
if (i_stackIndex < i_endStackIndex)
{
u16 srcSlot = GetSlotForStackIndex (o, i_stackIndex);
u8 type = GetStackTypeFromBottom (o, i_stackIndex);
u16 numSlots = GetTypeNumSlots (type);
u16 extraSlot = numSlots - 1;
u16 targetSlot = GetSlotForStackIndex (o, i_targetSlotStackIndex);
u16 preserveIndex = i_stackIndex;
u16 collisionSlot = srcSlot;
if (targetSlot != srcSlot)
{
// search for collisions
u16 checkIndex = i_stackIndex + 1;
while (checkIndex < i_endStackIndex)
{
u16 otherSlot1 = GetSlotForStackIndex (o, checkIndex);
u16 otherSlot2 = GetExtraSlotForStackIndex (o, checkIndex);
if (targetSlot == otherSlot1 or
targetSlot == otherSlot2 or
targetSlot + extraSlot == otherSlot1)
{
_throwif (m3Err_functionStackOverflow, i_tempSlot >= d_m3MaxFunctionSlots);
_ (CopyStackIndexToSlot (o, i_tempSlot, checkIndex));
o->wasmStack [checkIndex] = i_tempSlot;
i_tempSlot += GetTypeNumSlots (c_m3Type_i64);
TouchSlot (o, i_tempSlot - 1);
// restore this on the way back down
preserveIndex = checkIndex;
collisionSlot = otherSlot1;
break;
}
++checkIndex;
}
_ (CopyStackIndexToSlot (o, targetSlot, i_stackIndex)); m3log (compile, " copying slot: %d to slot: %d", srcSlot, targetSlot);
o->wasmStack [i_stackIndex] = targetSlot;
}
_ (CopyStackSlotsR (o, i_targetSlotStackIndex + 1, i_stackIndex + 1, i_endStackIndex, i_tempSlot));
// restore the stack state
o->wasmStack [i_stackIndex] = srcSlot;
o->wasmStack [preserveIndex] = collisionSlot;
}
_catch:
return result;
}
static
M3Result ResolveBlockResults (IM3Compilation o, IM3CompilationScope i_targetBlock, bool i_isBranch)
{
M3Result result = m3Err_none; if (d_m3LogWasmStack) dump_type_stack (o);
bool isLoop = (i_targetBlock->opcode == c_waOp_loop and i_isBranch);
u16 numParams = GetFuncTypeNumParams (i_targetBlock->type);
u16 numResults = GetFuncTypeNumResults (i_targetBlock->type);
u16 slotRecords = i_targetBlock->exitStackIndex;
u16 numValues;
if (not isLoop)
{
numValues = numResults;
slotRecords += numParams;
}
else numValues = numParams;
u16 blockHeight = GetNumBlockValuesOnStack (o);
_throwif (m3Err_typeCountMismatch, i_isBranch ? (blockHeight < numValues) : (blockHeight != numValues));
if (numValues)
{
u16 endIndex = GetStackTopIndex (o) + 1;
if (not isLoop and IsFpType (GetStackTopType (o)))
{
_ (CopyStackTopToRegister (o, false));
--endIndex;
}
// TODO: tempslot affects maxStackSlots, so can grow unnecess each time.
u16 tempSlot = o->maxStackSlots;// GetMaxUsedSlotPlusOne (o); doesn't work cause can collide with slotRecords
AlignSlotToType (& tempSlot, c_m3Type_i64);
_ (CopyStackSlotsR (o, slotRecords, endIndex - numValues, endIndex, tempSlot));
if (d_m3LogWasmStack) dump_type_stack (o);
}
_catch: return result;
}
static
M3Result ReturnValues (IM3Compilation o, IM3CompilationScope i_functionBlock, bool i_isBranch)
{
M3Result result = m3Err_none; if (d_m3LogWasmStack) dump_type_stack (o);
u16 numReturns = GetFuncTypeNumResults (i_functionBlock->type); // could just o->function too...
u16 blockHeight = GetNumBlockValuesOnStack (o);
if (not IsStackPolymorphic (o))
_throwif (m3Err_typeCountMismatch, i_isBranch ? (blockHeight < numReturns) : (blockHeight != numReturns));
if (numReturns)
{
// return slots like args are 64-bit aligned
u16 returnSlot = numReturns * c_ioSlotCount;
u16 stackTop = GetStackTopIndex (o);
for (u16 i = 0; i < numReturns; ++i)
{
u8 returnType = GetFuncTypeResultType (i_functionBlock->type, numReturns - 1 - i);
u8 stackType = GetStackTypeFromTop (o, i); // using FromTop so that only dynamic items are checked
if (IsStackPolymorphic (o) and stackType == c_m3Type_none)
stackType = returnType;
_throwif (m3Err_typeMismatch, returnType != stackType);
if (not IsStackPolymorphic (o))
{
returnSlot -= c_ioSlotCount;
_ (CopyStackIndexToSlot (o, returnSlot, stackTop--));
}
}
if (not i_isBranch)
{
while (numReturns--)
_ (Pop (o));
}
}
_catch: return result;
}
//-------------------------------------------------------------------------------------------------------------------------
static
M3Result Compile_Const_i32 (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
i32 value;
_ (ReadLEB_i32 (& value, & o->wasm, o->wasmEnd));
_ (PushConst (o, value, c_m3Type_i32)); m3log (compile, d_indent " (const i32 = %" PRIi32 ")", get_indention_string (o), value);
_catch: return result;
}
static
M3Result Compile_Const_i64 (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
i64 value;
_ (ReadLEB_i64 (& value, & o->wasm, o->wasmEnd));
_ (PushConst (o, value, c_m3Type_i64)); m3log (compile, d_indent " (const i64 = %" PRIi64 ")", get_indention_string (o), value);
_catch: return result;
}
#if d_m3ImplementFloat
static
M3Result Compile_Const_f32 (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
union { u32 u; f32 f; } value = { 0 };
_ (Read_f32 (& value.f, & o->wasm, o->wasmEnd)); m3log (compile, d_indent " (const f32 = %" PRIf32 ")", get_indention_string (o), value.f);
_ (PushConst (o, value.u, c_m3Type_f32));
_catch: return result;
}
static
M3Result Compile_Const_f64 (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
union { u64 u; f64 f; } value = { 0 };
_ (Read_f64 (& value.f, & o->wasm, o->wasmEnd)); m3log (compile, d_indent " (const f64 = %" PRIf64 ")", get_indention_string (o), value.f);
_ (PushConst (o, value.u, c_m3Type_f64));
_catch: return result;
}
#endif
#if d_m3CascadedOpcodes
static
M3Result Compile_ExtendedOpcode (IM3Compilation o, m3opcode_t i_opcode)
{
_try {
u8 opcode;
_ (Read_u8 (& opcode, & o->wasm, o->wasmEnd)); m3log (compile, d_indent " (FC: %" PRIi32 ")", get_indention_string (o), opcode);
i_opcode = (i_opcode << 8) | opcode;
//printf("Extended opcode: 0x%x\n", i_opcode);
IM3OpInfo opInfo = GetOpInfo (i_opcode);
_throwif (m3Err_unknownOpcode, not opInfo);
M3Compiler compiler = opInfo->compiler;
_throwif (m3Err_noCompiler, not compiler);
_ ((* compiler) (o, i_opcode));
o->previousOpcode = i_opcode;
} _catch: return result;
}
#endif
static
M3Result Compile_Return (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result = m3Err_none;
if (not IsStackPolymorphic (o))
{
IM3CompilationScope functionScope;
_ (GetBlockScope (o, & functionScope, o->block.depth));
_ (ReturnValues (o, functionScope, true));
_ (EmitOp (o, op_Return));
_ (SetStackPolymorphic (o));
}
_catch: return result;
}
static
M3Result ValidateBlockEnd (IM3Compilation o)
{
M3Result result = m3Err_none;
/*
u16 numResults = GetFuncTypeNumResults (o->block.type);
u16 blockHeight = GetNumBlockValuesOnStack (o);
if (IsStackPolymorphic (o))
{
}
else
{
}
_catch: */ return result;
}
static
M3Result Compile_End (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result = m3Err_none; //dump_type_stack (o);
// function end:
if (o->block.depth == 0)
{
ValidateBlockEnd (o);
// if (not IsStackPolymorphic (o))
{
if (o->function)
{
_ (ReturnValues (o, & o->block, false));
}
_ (EmitOp (o, op_Return));
}
}
_catch: return result;
}
static
M3Result Compile_SetLocal (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
u32 localIndex;
_ (ReadLEB_u32 (& localIndex, & o->wasm, o->wasmEnd)); // printf ("--- set local: %d \n", localSlot);
if (localIndex < GetFunctionNumArgsAndLocals (o->function))
{
u16 localSlot = GetSlotForStackIndex (o, localIndex);
u16 preserveSlot;
_ (FindReferencedLocalWithinCurrentBlock (o, & preserveSlot, localSlot)); // preserve will be different than local, if referenced
if (preserveSlot == localSlot)
_ (CopyStackTopToSlot (o, localSlot))
else
_ (PreservedCopyTopSlot (o, localSlot, preserveSlot))
if (i_opcode != c_waOp_teeLocal)
_ (Pop (o));
}
else _throw ("local index out of bounds");
_catch: return result;
}
static
M3Result Compile_GetLocal (IM3Compilation o, m3opcode_t i_opcode)
{
_try {
u32 localIndex;
_ (ReadLEB_u32 (& localIndex, & o->wasm, o->wasmEnd));
if (localIndex >= GetFunctionNumArgsAndLocals (o->function))
_throw ("local index out of bounds");
u8 type = GetStackTypeFromBottom (o, localIndex);
u16 slot = GetSlotForStackIndex (o, localIndex);
_ (Push (o, type, slot));
} _catch: return result;
}
static
M3Result Compile_GetGlobal (IM3Compilation o, M3Global * i_global)
{
M3Result result;
IM3Operation op = Is64BitType (i_global->type) ? op_GetGlobal_s64 : op_GetGlobal_s32;
_ (EmitOp (o, op));
EmitPointer (o, & i_global->intValue);
_ (PushAllocatedSlotAndEmit (o, i_global->type));
_catch: return result;
}
static
M3Result Compile_SetGlobal (IM3Compilation o, M3Global * i_global)
{
M3Result result = m3Err_none;
if (i_global->isMutable)
{
IM3Operation op;
u8 type = GetStackTopType (o);
if (IsStackTopInRegister (o))
{
op = c_setGlobalOps [type];
}
else op = Is64BitType (type) ? op_SetGlobal_s64 : op_SetGlobal_s32;
_ (EmitOp (o, op));
EmitPointer (o, & i_global->intValue);
if (IsStackTopInSlot (o))
EmitSlotOffset (o, GetStackTopSlotNumber (o));
_ (Pop (o));
}
else _throw (m3Err_settingImmutableGlobal);
_catch: return result;
}
static
M3Result Compile_GetSetGlobal (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result = m3Err_none;
u32 globalIndex;
_ (ReadLEB_u32 (& globalIndex, & o->wasm, o->wasmEnd));
if (globalIndex < o->module->numGlobals)
{
if (o->module->globals)
{
M3Global * global = & o->module->globals [globalIndex];
_ ((i_opcode == c_waOp_getGlobal) ? Compile_GetGlobal (o, global) : Compile_SetGlobal (o, global));
}
else _throw (ErrorCompile (m3Err_globalMemoryNotAllocated, o, "module '%s' is missing global memory", o->module->name));
}
else _throw (m3Err_globaIndexOutOfBounds);
_catch: return result;
}
static
void EmitPatchingBranchPointer (IM3Compilation o, IM3CompilationScope i_scope)
{
pc_t patch = EmitPointer (o, i_scope->patches); m3log (compile, "branch patch required at: %p", patch);
i_scope->patches = patch;
}
static
M3Result EmitPatchingBranch (IM3Compilation o, IM3CompilationScope i_scope)
{
M3Result result = m3Err_none;
_ (EmitOp (o, op_Branch));
EmitPatchingBranchPointer (o, i_scope);
_catch: return result;
}
static
M3Result Compile_Branch (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
u32 depth;
_ (ReadLEB_u32 (& depth, & o->wasm, o->wasmEnd));
IM3CompilationScope scope;
_ (GetBlockScope (o, & scope, depth));
// branch target is a loop (continue)
if (scope->opcode == c_waOp_loop)
{
if (i_opcode == c_waOp_branchIf)
{
if (GetFuncTypeNumParams (scope->type))
{
IM3Operation op = IsStackTopInRegister (o) ? op_BranchIfPrologue_r : op_BranchIfPrologue_s;
_ (EmitOp (o, op));
_ (EmitSlotNumOfStackTopAndPop (o));
pc_t * jumpTo = (pc_t *) ReservePointer (o);
_ (ResolveBlockResults (o, scope, /* isBranch: */ true));
_ (EmitOp (o, op_ContinueLoop));
EmitPointer (o, scope->pc);
* jumpTo = GetPC (o);
}
else
{
// move the condition to a register
_ (CopyStackTopToRegister (o, false));
_ (PopType (o, c_m3Type_i32));
_ (EmitOp (o, op_ContinueLoopIf));
EmitPointer (o, scope->pc);
}
// dump_type_stack(o);
}
else // is c_waOp_branch
{
_ (EmitOp (o, op_ContinueLoop));
EmitPointer (o, scope->pc);
o->block.isPolymorphic = true;
}
}
else // forward branch
{
pc_t * jumpTo = NULL;
bool isReturn = (scope->depth == 0);
bool targetHasResults = GetFuncTypeNumResults (scope->type);
if (i_opcode == c_waOp_branchIf)
{
if (targetHasResults or isReturn)
{
IM3Operation op = IsStackTopInRegister (o) ? op_BranchIfPrologue_r : op_BranchIfPrologue_s;
_ (EmitOp (o, op));
_ (EmitSlotNumOfStackTopAndPop (o)); // condition
// this is continuation point, if the branch isn't taken
jumpTo = (pc_t *) ReservePointer (o);
}
else
{
IM3Operation op = IsStackTopInRegister (o) ? op_BranchIf_r : op_BranchIf_s;
_ (EmitOp (o, op));
_ (EmitSlotNumOfStackTopAndPop (o)); // condition
EmitPatchingBranchPointer (o, scope);
goto _catch;
}
}
if (not IsStackPolymorphic (o))
{
if (isReturn)
{
_ (ReturnValues (o, scope, true));
_ (EmitOp (o, op_Return));
}
else
{
_ (ResolveBlockResults (o, scope, true));
_ (EmitPatchingBranch (o, scope));
}
}
if (jumpTo)
{
* jumpTo = GetPC (o);
}
if (i_opcode == c_waOp_branch)
_ (SetStackPolymorphic (o));
}
_catch: return result;
}
static
M3Result Compile_BranchTable (IM3Compilation o, m3opcode_t i_opcode)
{
_try {
u32 targetCount;
_ (ReadLEB_u32 (& targetCount, & o->wasm, o->wasmEnd));
_ (PreserveRegisterIfOccupied (o, c_m3Type_i64)); // move branch operand to a slot
u16 slot = GetStackTopSlotNumber (o);
_ (Pop (o));
// OPTZ: according to spec: "forward branches that target a control instruction with a non-empty
// result type consume matching operands first and push them back on the operand stack after unwinding"
// So, this move-to-reg is only necessary if the target scopes have a type.
u32 numCodeLines = targetCount + 4; // 3 => IM3Operation + slot + target_count + default_target
_ (EnsureCodePageNumLines (o, numCodeLines));
_ (EmitOp (o, op_BranchTable));
EmitSlotOffset (o, slot);
EmitConstant32 (o, targetCount);
IM3CodePage continueOpPage = NULL;
++targetCount; // include default
for (u32 i = 0; i < targetCount; ++i)
{
u32 target;
_ (ReadLEB_u32 (& target, & o->wasm, o->wasmEnd));
IM3CompilationScope scope;
_ (GetBlockScope (o, & scope, target));
// TODO: don't need codepage rigmarole for
// no-param forward-branch targets
_ (AcquireCompilationCodePage (o, & continueOpPage));
pc_t startPC = GetPagePC (continueOpPage);
IM3CodePage savedPage = o->page;
o->page = continueOpPage;
if (scope->opcode == c_waOp_loop)
{
_ (ResolveBlockResults (o, scope, true));
_ (EmitOp (o, op_ContinueLoop));
EmitPointer (o, scope->pc);
}
else
{
// TODO: this could be fused with equivalent targets
if (not IsStackPolymorphic (o))
{
if (scope->depth == 0)
{
_ (ReturnValues (o, scope, true));
_ (EmitOp (o, op_Return));
}
else
{
_ (ResolveBlockResults (o, scope, true));
_ (EmitPatchingBranch (o, scope));
}
}
}
ReleaseCompilationCodePage (o); // FIX: continueOpPage can get lost if thrown
o->page = savedPage;
EmitPointer (o, startPC);
}
_ (SetStackPolymorphic (o));
}
_catch: return result;
}
static
M3Result CompileCallArgsAndReturn (IM3Compilation o, u16 * o_stackOffset, IM3FuncType i_type, bool i_isIndirect)
{
_try {
u16 topSlot = GetMaxUsedSlotPlusOne (o);
// force use of at least one stack slot; this is to help ensure
// the m3 stack overflows (and traps) before the native stack can overflow.
// e.g. see Wasm spec test 'runaway' in call.wast
topSlot = M3_MAX (1, topSlot);
// stack frame is 64-bit aligned
AlignSlotToType (& topSlot, c_m3Type_i64);
* o_stackOffset = topSlot;
// wait to pop this here so that topSlot search is correct
if (i_isIndirect)
_ (Pop (o));
u16 numArgs = GetFuncTypeNumParams (i_type);
u16 numRets = GetFuncTypeNumResults (i_type);
u16 argTop = topSlot + (numArgs + numRets) * c_ioSlotCount;
while (numArgs--)
{
_ (CopyStackTopToSlot (o, argTop -= c_ioSlotCount));
_ (Pop (o));
}
u16 i = 0;
while (numRets--)
{
u8 type = GetFuncTypeResultType (i_type, i++);
_ (Push (o, type, topSlot));
MarkSlotsAllocatedByType (o, topSlot, type);
topSlot += c_ioSlotCount;
}
} _catch: return result;
}
static
M3Result Compile_Call (IM3Compilation o, m3opcode_t i_opcode)
{
_try {
u32 functionIndex;
_ (ReadLEB_u32 (& functionIndex, & o->wasm, o->wasmEnd));
IM3Function function = Module_GetFunction (o->module, functionIndex);
if (function)
{ m3log (compile, d_indent " (func= [%d] '%s'; args= %d)",
get_indention_string (o), functionIndex, m3_GetFunctionName (function), function->funcType->numArgs);
if (function->module)
{
u16 slotTop;
_ (CompileCallArgsAndReturn (o, & slotTop, function->funcType, false));
IM3Operation op;
const void * operand;
if (function->compiled)
{
op = op_Call;
operand = function->compiled;
}
else
{
op = op_Compile;
operand = function;
}
_ (EmitOp (o, op));
EmitPointer (o, operand);
EmitSlotOffset (o, slotTop);
}
else
{
_throw (ErrorCompile (m3Err_functionImportMissing, o, "'%s.%s'", GetFunctionImportModuleName (function), m3_GetFunctionName (function)));
}
}
else _throw (m3Err_functionLookupFailed);
} _catch: return result;
}
static
M3Result Compile_CallIndirect (IM3Compilation o, m3opcode_t i_opcode)
{
_try {
u32 typeIndex;
_ (ReadLEB_u32 (& typeIndex, & o->wasm, o->wasmEnd));
u32 tableIndex;
_ (ReadLEB_u32 (& tableIndex, & o->wasm, o->wasmEnd));
_throwif ("function call type index out of range", typeIndex >= o->module->numFuncTypes);
if (IsStackTopInRegister (o))
_ (PreserveRegisterIfOccupied (o, c_m3Type_i32));
u16 tableIndexSlot = GetStackTopSlotNumber (o);
u16 execTop;
IM3FuncType type = o->module->funcTypes [typeIndex];
_ (CompileCallArgsAndReturn (o, & execTop, type, true));
_ (EmitOp (o, op_CallIndirect));
EmitSlotOffset (o, tableIndexSlot);
EmitPointer (o, o->module);
EmitPointer (o, type); // TODO: unify all types in M3Environment
EmitSlotOffset (o, execTop);
} _catch:
return result;
}
static
M3Result Compile_Memory_Size (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
i8 reserved;
_ (ReadLEB_i7 (& reserved, & o->wasm, o->wasmEnd));
_ (PreserveRegisterIfOccupied (o, c_m3Type_i32));
_ (EmitOp (o, op_MemSize));
_ (PushRegister (o, c_m3Type_i32));
_catch: return result;
}
static
M3Result Compile_Memory_Grow (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
i8 reserved;
_ (ReadLEB_i7 (& reserved, & o->wasm, o->wasmEnd));
_ (CopyStackTopToRegister (o, false));
_ (PopType (o, c_m3Type_i32));
_ (EmitOp (o, op_MemGrow));
_ (PushRegister (o, c_m3Type_i32));
_catch: return result;
}
static
M3Result Compile_Memory_CopyFill (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result = m3Err_none;
u32 sourceMemoryIdx, targetMemoryIdx;
IM3Operation op;
if (i_opcode == c_waOp_memoryCopy)
{
_ (ReadLEB_u32 (& sourceMemoryIdx, & o->wasm, o->wasmEnd));
op = op_MemCopy;
}
else op = op_MemFill;
_ (ReadLEB_u32 (& targetMemoryIdx, & o->wasm, o->wasmEnd));
_ (CopyStackTopToRegister (o, false));
_ (EmitOp (o, op));
_ (PopType (o, c_m3Type_i32));
_ (EmitSlotNumOfStackTopAndPop (o));
_ (EmitSlotNumOfStackTopAndPop (o));
_catch: return result;
}
static
M3Result ReadBlockType (IM3Compilation o, IM3FuncType * o_blockType)
{
M3Result result;
i64 type;
_ (ReadLebSigned (& type, 33, & o->wasm, o->wasmEnd));
if (type < 0)
{
u8 valueType;
_ (NormalizeType (&valueType, type)); m3log (compile, d_indent " (type: %s)", get_indention_string (o), c_waTypes [valueType]);
*o_blockType = o->module->environment->retFuncTypes[valueType];
}
else
{
_throwif("func type out of bounds", type >= o->module->numFuncTypes);
*o_blockType = o->module->funcTypes[type]; m3log (compile, d_indent " (type: %s)", get_indention_string (o), SPrintFuncTypeSignature (*o_blockType));
}
_catch: return result;
}
static
M3Result PreserveArgsAndLocals (IM3Compilation o)
{
M3Result result = m3Err_none;
if (o->stackIndex > o->stackFirstDynamicIndex)
{
u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o->function);
for (u32 i = 0; i < numArgsAndLocals; ++i)
{
u16 slot = GetSlotForStackIndex (o, i);
u16 preservedSlotNumber;
_ (FindReferencedLocalWithinCurrentBlock (o, & preservedSlotNumber, slot));
if (preservedSlotNumber != slot)
{
u8 type = GetStackTypeFromBottom (o, i); d_m3Assert (type != c_m3Type_none)
IM3Operation op = Is64BitType (type) ? op_CopySlot_64 : op_CopySlot_32;
EmitOp (o, op);
EmitSlotOffset (o, preservedSlotNumber);
EmitSlotOffset (o, slot);
}
}
}
_catch:
return result;
}
static
M3Result Compile_LoopOrBlock (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
// TODO: these shouldn't be necessary for non-loop blocks?
_ (PreserveRegisters (o));
_ (PreserveArgsAndLocals (o));
IM3FuncType blockType;
_ (ReadBlockType (o, & blockType));
if (i_opcode == c_waOp_loop)
{
u16 numParams = GetFuncTypeNumParams (blockType);
if (numParams)
{
// instantiate constants
u16 numValues = GetNumBlockValuesOnStack (o); // CompileBlock enforces this at comptime
d_m3Assert (numValues >= numParams);
if (numValues >= numParams)
{
u16 stackTop = GetStackTopIndex (o) + 1;
for (u16 i = stackTop - numParams; i < stackTop; ++i)
{
u16 slot = GetSlotForStackIndex (o, i);
u8 type = GetStackTypeFromBottom (o, i);
if (IsConstantSlot (o, slot))
{
u16 newSlot;
_ (AllocateSlots (o, & newSlot, type));
_ (CopyStackIndexToSlot (o, newSlot, i));
o->wasmStack [i] = newSlot;
}
}
}
}
_ (EmitOp (o, op_Loop));
}
else
{
}
_ (CompileBlock (o, blockType, i_opcode));
_catch: return result;
}
static
M3Result CompileElseBlock (IM3Compilation o, pc_t * o_startPC, IM3FuncType i_blockType)
{
_try {
IM3CodePage elsePage;
_ (AcquireCompilationCodePage (o, & elsePage));
* o_startPC = GetPagePC (elsePage);
IM3CodePage savedPage = o->page;
o->page = elsePage;
_ (CompileBlock (o, i_blockType, c_waOp_else));
_ (EmitOp (o, op_Branch));
EmitPointer (o, GetPagePC (savedPage));
ReleaseCompilationCodePage (o);
o->page = savedPage;
} _catch:
return result;
}
static
M3Result Compile_If (IM3Compilation o, m3opcode_t i_opcode)
{
/* [ op_If ]
[ <else-pc> ] ----> [ ..else.. ]
[ ..if.. ] [ ..block.. ]
[ ..block.. ] [ op_Branch ]
[ end ] <----- [ <end-pc> ] */
_try {
_ (PreserveNonTopRegisters (o));
_ (PreserveArgsAndLocals (o));
IM3Operation op = IsStackTopInRegister (o) ? op_If_r : op_If_s;
_ (EmitOp (o, op));
_ (EmitSlotNumOfStackTopAndPop (o));
pc_t * pc = (pc_t *) ReservePointer (o);
IM3FuncType blockType;
_ (ReadBlockType (o, & blockType));
// dump_type_stack (o);
u16 stackIndex = o->stackIndex;
_ (CompileBlock (o, blockType, i_opcode));
if (o->previousOpcode == c_waOp_else)
{
o->stackIndex = stackIndex;
_ (CompileElseBlock (o, pc, blockType));
}
else
{
// if block produces values and there isn't a defined else
// case, then we need to make one up so that the pass-through
// results end up in the right place
if (GetFuncTypeNumResults (blockType))
{
// rewind to the if's end to create a fake else block
o->wasm--;
o->stackIndex = stackIndex;
// dump_type_stack (o);
_ (CompileElseBlock (o, pc, blockType));
}
else * pc = GetPC (o);
}
} _catch: return result;
}
static
M3Result Compile_Select (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result = m3Err_none;
u16 slots [3] = { c_slotUnused, c_slotUnused, c_slotUnused };
u8 type = GetStackTypeFromTop (o, 1); // get type of selection
IM3Operation op = NULL;
if (IsFpType (type))
{
# if d_m3HasFloat
// not consuming a fp reg, so preserve
if (not IsStackTopMinus1InRegister (o) and
not IsStackTopMinus2InRegister (o))
{
_ (PreserveRegisterIfOccupied (o, type));
}
bool selectorInReg = IsStackTopInRegister (o);
slots [0] = GetStackTopSlotNumber (o);
_ (Pop (o));
u32 opIndex = 0;
for (u32 i = 1; i <= 2; ++i)
{
if (IsStackTopInRegister (o))
opIndex = i;
else
slots [i] = GetStackTopSlotNumber (o);
_ (Pop (o));
}
op = c_fpSelectOps [type - c_m3Type_f32] [selectorInReg] [opIndex];
# else
_throw (m3Err_unknownOpcode);
# endif
}
else if (IsIntType (type))
{
// 'sss' operation doesn't consume a register, so might have to protected its contents
if (not IsStackTopInRegister (o) and
not IsStackTopMinus1InRegister (o) and
not IsStackTopMinus2InRegister (o))
{
_ (PreserveRegisterIfOccupied (o, type));
}
u32 opIndex = 3; // op_Select_*_sss
for (u32 i = 0; i < 3; ++i)
{
if (IsStackTopInRegister (o))
opIndex = i;
else
slots [i] = GetStackTopSlotNumber (o);
_ (Pop (o));
}
op = c_intSelectOps [type - c_m3Type_i32] [opIndex];
}
else if (not IsStackPolymorphic (o))
_throw (m3Err_functionStackUnderrun);
EmitOp (o, op);
for (u32 i = 0; i < 3; i++)
{
if (IsValidSlot (slots [i]))
EmitSlotOffset (o, slots [i]);
}
_ (PushRegister (o, type));
_catch: return result;
}
static
M3Result Compile_Drop (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result = Pop (o); if (d_m3LogWasmStack) dump_type_stack (o);
return result;
}
static
M3Result Compile_Nop (IM3Compilation o, m3opcode_t i_opcode)
{
return m3Err_none;
}
static
M3Result Compile_Unreachable (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
_ (AddTrapRecord (o));
_ (EmitOp (o, op_Unreachable));
_ (SetStackPolymorphic (o));
_catch:
return result;
}
// OPTZ: currently all stack slot indices take up a full word, but
// dual stack source operands could be packed together
static
M3Result Compile_Operator (IM3Compilation o, m3opcode_t i_opcode)
{
M3Result result;
IM3OpInfo opInfo = GetOpInfo (i_opcode);
_throwif (m3Err_unknownOpcode, not opInfo);
IM3Operation op;
// This preserve is for for FP compare operations.
// either need additional slot destination operations or the
// easy fix, move _r0 out of the way.
// moving out the way might be the optimal solution most often?
// otherwise, the _r0 reg can get buried down in the stack
// and be idle & wasted for a moment.
if (IsFpType (GetStackTopType (o)) and IsIntType (opInfo->type))
{
_ (PreserveRegisterIfOccupied (o, opInfo->type));
}
if (opInfo->stackOffset == 0)
{
if (IsStackTopInRegister (o))
{
op = opInfo->operations [0]; // _s
}
else
{
_ (PreserveRegisterIfOccupied (o, opInfo->type));
op = opInfo->operations [1]; // _r
}
}
else
{
if (IsStackTopInRegister (o))
{
op = opInfo->operations [0]; // _rs
if (IsStackTopMinus1InRegister (o))
{ d_m3Assert (i_opcode == c_waOp_store_f32 or i_opcode == c_waOp_store_f64);
op = opInfo->operations [3]; // _rr for fp.store
}
}
else if (IsStackTopMinus1InRegister (o))
{
op = opInfo->operations [1]; // _sr
if (not op) // must be commutative, then
op = opInfo->operations [0];
}
else
{
_ (PreserveRegisterIfOccupied (o, opInfo->type)); // _ss
op = opInfo->operations [2];
}
}
if (op)
{
_ (EmitOp (o, op));
_ (EmitSlotNumOfStackTopAndPop (o));
if (opInfo->stackOffset < 0)
_ (EmitSlotNumOfStackTopAndPop (o));
if (opInfo->type != c_m3Type_none)
_ (PushRegister (o, opInfo->type));
}
else
{
# ifdef DEBUG
result = ErrorCompile ("no operation found for opcode", o, "'%s'", opInfo->name);
# else
result = ErrorCompile ("no operation found for opcode", o, "%x", i_opcode);
# endif
_throw (result);
}
_catch: return result;
}
static
M3Result Compile_Convert (IM3Compilation o, m3opcode_t i_opcode)
{
_try {
IM3OpInfo opInfo = GetOpInfo (i_opcode);
_throwif (m3Err_unknownOpcode, not opInfo);
bool destInSlot = IsRegisterTypeAllocated (o, opInfo->type);
bool sourceInSlot = IsStackTopInSlot (o);
IM3Operation op = opInfo->operations [destInSlot * 2 + sourceInSlot];
_ (EmitOp (o, op));
_ (EmitSlotNumOfStackTopAndPop (o));
if (destInSlot)
_ (PushAllocatedSlotAndEmit (o, opInfo->type))
else
_ (PushRegister (o, opInfo->type))
}
_catch: return result;
}
static
M3Result Compile_Load_Store (IM3Compilation o, m3opcode_t i_opcode)
{
_try {
u32 alignHint, memoryOffset;
_ (ReadLEB_u32 (& alignHint, & o->wasm, o->wasmEnd));
_ (ReadLEB_u32 (& memoryOffset, & o->wasm, o->wasmEnd));
m3log (compile, d_indent " (offset = %d)", get_indention_string (o), memoryOffset);
IM3OpInfo opInfo = GetOpInfo (i_opcode);
_throwif (m3Err_unknownOpcode, not opInfo);
if (IsFpType (opInfo->type))
_ (PreserveRegisterIfOccupied (o, c_m3Type_f64));
_ (Compile_Operator (o, i_opcode));
EmitConstant32 (o, memoryOffset);
}
_catch: return result;
}
M3Result CompileRawFunction (IM3Module io_module, IM3Function io_function, const void * i_function, const void * i_userdata)
{
d_m3Assert (io_module->runtime);
IM3CodePage page = AcquireCodePageWithCapacity (io_module->runtime, 4);
if (page)
{
io_function->compiled = GetPagePC (page);
io_function->module = io_module;
EmitWord (page, op_CallRawFunction);
EmitWord (page, i_function);
EmitWord (page, io_function);
EmitWord (page, i_userdata);
ReleaseCodePage (io_module->runtime, page);
return m3Err_none;
}
else {
return m3Err_mallocFailedCodePage;
}
}
// d_logOp, d_logOp2 macros aren't actually used by the compiler, just codepage decoding (d_m3LogCodePages = 1)
#define d_logOp(OP) { op_##OP, NULL, NULL, NULL }
#define d_logOp2(OP1,OP2) { op_##OP1, op_##OP2, NULL, NULL }
#define d_emptyOpList { NULL, NULL, NULL, NULL }
#define d_unaryOpList(TYPE, NAME) { op_##TYPE##_##NAME##_r, op_##TYPE##_##NAME##_s, NULL, NULL }
#define d_binOpList(TYPE, NAME) { op_##TYPE##_##NAME##_rs, op_##TYPE##_##NAME##_sr, op_##TYPE##_##NAME##_ss, NULL }
#define d_storeFpOpList(TYPE, NAME) { op_##TYPE##_##NAME##_rs, op_##TYPE##_##NAME##_sr, op_##TYPE##_##NAME##_ss, op_##TYPE##_##NAME##_rr }
#define d_commutativeBinOpList(TYPE, NAME) { op_##TYPE##_##NAME##_rs, NULL, op_##TYPE##_##NAME##_ss, NULL }
#define d_convertOpList(OP) { op_##OP##_r_r, op_##OP##_r_s, op_##OP##_s_r, op_##OP##_s_s }
const M3OpInfo c_operations [] =
{
M3OP( "unreachable", 0, none, d_logOp (Unreachable), Compile_Unreachable ), // 0x00
M3OP( "nop", 0, none, d_emptyOpList, Compile_Nop ), // 0x01 .
M3OP( "block", 0, none, d_emptyOpList, Compile_LoopOrBlock ), // 0x02
M3OP( "loop", 0, none, d_logOp (Loop), Compile_LoopOrBlock ), // 0x03
M3OP( "if", -1, none, d_emptyOpList, Compile_If ), // 0x04
M3OP( "else", 0, none, d_emptyOpList, Compile_Nop ), // 0x05
M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, // 0x06...0x0a
M3OP( "end", 0, none, d_emptyOpList, Compile_End ), // 0x0b
M3OP( "br", 0, none, d_logOp (Branch), Compile_Branch ), // 0x0c
M3OP( "br_if", -1, none, d_logOp2 (BranchIf_r, BranchIf_s), Compile_Branch ), // 0x0d
M3OP( "br_table", -1, none, d_logOp (BranchTable), Compile_BranchTable ), // 0x0e
M3OP( "return", 0, any, d_logOp (Return), Compile_Return ), // 0x0f
M3OP( "call", 0, any, d_logOp (Call), Compile_Call ), // 0x10
M3OP( "call_indirect", 0, any, d_logOp (CallIndirect), Compile_CallIndirect ), // 0x11
M3OP( "return_call", 0, any, d_emptyOpList, Compile_Call ), // 0x12 TODO: Optimize
M3OP( "return_call_indirect",0, any, d_emptyOpList, Compile_CallIndirect ), // 0x13
M3OP_RESERVED, M3OP_RESERVED, // 0x14...
M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, // ...0x19
M3OP( "drop", -1, none, d_emptyOpList, Compile_Drop ), // 0x1a
M3OP( "select", -2, any, d_emptyOpList, Compile_Select ), // 0x1b
M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, // 0x1c...0x1f
M3OP( "local.get", 1, any, d_emptyOpList, Compile_GetLocal ), // 0x20
M3OP( "local.set", 1, none, d_emptyOpList, Compile_SetLocal ), // 0x21
M3OP( "local.tee", 0, any, d_emptyOpList, Compile_SetLocal ), // 0x22
M3OP( "global.get", 1, none, d_emptyOpList, Compile_GetSetGlobal ), // 0x23
M3OP( "global.set", 1, none, d_emptyOpList, Compile_GetSetGlobal ), // 0x24
M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, // 0x25...0x27
M3OP( "i32.load", 0, i_32, d_unaryOpList (i32, Load_i32), Compile_Load_Store ), // 0x28
M3OP( "i64.load", 0, i_64, d_unaryOpList (i64, Load_i64), Compile_Load_Store ), // 0x29
M3OP_F( "f32.load", 0, f_32, d_unaryOpList (f32, Load_f32), Compile_Load_Store ), // 0x2a
M3OP_F( "f64.load", 0, f_64, d_unaryOpList (f64, Load_f64), Compile_Load_Store ), // 0x2b
M3OP( "i32.load8_s", 0, i_32, d_unaryOpList (i32, Load_i8), Compile_Load_Store ), // 0x2c
M3OP( "i32.load8_u", 0, i_32, d_unaryOpList (i32, Load_u8), Compile_Load_Store ), // 0x2d
M3OP( "i32.load16_s", 0, i_32, d_unaryOpList (i32, Load_i16), Compile_Load_Store ), // 0x2e
M3OP( "i32.load16_u", 0, i_32, d_unaryOpList (i32, Load_u16), Compile_Load_Store ), // 0x2f
M3OP( "i64.load8_s", 0, i_64, d_unaryOpList (i64, Load_i8), Compile_Load_Store ), // 0x30
M3OP( "i64.load8_u", 0, i_64, d_unaryOpList (i64, Load_u8), Compile_Load_Store ), // 0x31
M3OP( "i64.load16_s", 0, i_64, d_unaryOpList (i64, Load_i16), Compile_Load_Store ), // 0x32
M3OP( "i64.load16_u", 0, i_64, d_unaryOpList (i64, Load_u16), Compile_Load_Store ), // 0x33
M3OP( "i64.load32_s", 0, i_64, d_unaryOpList (i64, Load_i32), Compile_Load_Store ), // 0x34
M3OP( "i64.load32_u", 0, i_64, d_unaryOpList (i64, Load_u32), Compile_Load_Store ), // 0x35
M3OP( "i32.store", -2, none, d_binOpList (i32, Store_i32), Compile_Load_Store ), // 0x36
M3OP( "i64.store", -2, none, d_binOpList (i64, Store_i64), Compile_Load_Store ), // 0x37
M3OP_F( "f32.store", -2, none, d_storeFpOpList (f32, Store_f32), Compile_Load_Store ), // 0x38
M3OP_F( "f64.store", -2, none, d_storeFpOpList (f64, Store_f64), Compile_Load_Store ), // 0x39
M3OP( "i32.store8", -2, none, d_binOpList (i32, Store_u8), Compile_Load_Store ), // 0x3a
M3OP( "i32.store16", -2, none, d_binOpList (i32, Store_i16), Compile_Load_Store ), // 0x3b
M3OP( "i64.store8", -2, none, d_binOpList (i64, Store_u8), Compile_Load_Store ), // 0x3c
M3OP( "i64.store16", -2, none, d_binOpList (i64, Store_i16), Compile_Load_Store ), // 0x3d
M3OP( "i64.store32", -2, none, d_binOpList (i64, Store_i32), Compile_Load_Store ), // 0x3e
M3OP( "memory.size", 1, i_32, d_logOp (MemSize), Compile_Memory_Size ), // 0x3f
M3OP( "memory.grow", 1, i_32, d_logOp (MemGrow), Compile_Memory_Grow ), // 0x40
M3OP( "i32.const", 1, i_32, d_logOp (Const32), Compile_Const_i32 ), // 0x41
M3OP( "i64.const", 1, i_64, d_logOp (Const64), Compile_Const_i64 ), // 0x42
M3OP_F( "f32.const", 1, f_32, d_emptyOpList, Compile_Const_f32 ), // 0x43
M3OP_F( "f64.const", 1, f_64, d_emptyOpList, Compile_Const_f64 ), // 0x44
M3OP( "i32.eqz", 0, i_32, d_unaryOpList (i32, EqualToZero) , NULL ), // 0x45
M3OP( "i32.eq", -1, i_32, d_commutativeBinOpList (i32, Equal) , NULL ), // 0x46
M3OP( "i32.ne", -1, i_32, d_commutativeBinOpList (i32, NotEqual) , NULL ), // 0x47
M3OP( "i32.lt_s", -1, i_32, d_binOpList (i32, LessThan) , NULL ), // 0x48
M3OP( "i32.lt_u", -1, i_32, d_binOpList (u32, LessThan) , NULL ), // 0x49
M3OP( "i32.gt_s", -1, i_32, d_binOpList (i32, GreaterThan) , NULL ), // 0x4a
M3OP( "i32.gt_u", -1, i_32, d_binOpList (u32, GreaterThan) , NULL ), // 0x4b
M3OP( "i32.le_s", -1, i_32, d_binOpList (i32, LessThanOrEqual) , NULL ), // 0x4c
M3OP( "i32.le_u", -1, i_32, d_binOpList (u32, LessThanOrEqual) , NULL ), // 0x4d
M3OP( "i32.ge_s", -1, i_32, d_binOpList (i32, GreaterThanOrEqual) , NULL ), // 0x4e
M3OP( "i32.ge_u", -1, i_32, d_binOpList (u32, GreaterThanOrEqual) , NULL ), // 0x4f
M3OP( "i64.eqz", 0, i_32, d_unaryOpList (i64, EqualToZero) , NULL ), // 0x50
M3OP( "i64.eq", -1, i_32, d_commutativeBinOpList (i64, Equal) , NULL ), // 0x51
M3OP( "i64.ne", -1, i_32, d_commutativeBinOpList (i64, NotEqual) , NULL ), // 0x52
M3OP( "i64.lt_s", -1, i_32, d_binOpList (i64, LessThan) , NULL ), // 0x53
M3OP( "i64.lt_u", -1, i_32, d_binOpList (u64, LessThan) , NULL ), // 0x54
M3OP( "i64.gt_s", -1, i_32, d_binOpList (i64, GreaterThan) , NULL ), // 0x55
M3OP( "i64.gt_u", -1, i_32, d_binOpList (u64, GreaterThan) , NULL ), // 0x56
M3OP( "i64.le_s", -1, i_32, d_binOpList (i64, LessThanOrEqual) , NULL ), // 0x57
M3OP( "i64.le_u", -1, i_32, d_binOpList (u64, LessThanOrEqual) , NULL ), // 0x58
M3OP( "i64.ge_s", -1, i_32, d_binOpList (i64, GreaterThanOrEqual) , NULL ), // 0x59
M3OP( "i64.ge_u", -1, i_32, d_binOpList (u64, GreaterThanOrEqual) , NULL ), // 0x5a
M3OP_F( "f32.eq", -1, i_32, d_commutativeBinOpList (f32, Equal) , NULL ), // 0x5b
M3OP_F( "f32.ne", -1, i_32, d_commutativeBinOpList (f32, NotEqual) , NULL ), // 0x5c
M3OP_F( "f32.lt", -1, i_32, d_binOpList (f32, LessThan) , NULL ), // 0x5d
M3OP_F( "f32.gt", -1, i_32, d_binOpList (f32, GreaterThan) , NULL ), // 0x5e
M3OP_F( "f32.le", -1, i_32, d_binOpList (f32, LessThanOrEqual) , NULL ), // 0x5f
M3OP_F( "f32.ge", -1, i_32, d_binOpList (f32, GreaterThanOrEqual) , NULL ), // 0x60
M3OP_F( "f64.eq", -1, i_32, d_commutativeBinOpList (f64, Equal) , NULL ), // 0x61
M3OP_F( "f64.ne", -1, i_32, d_commutativeBinOpList (f64, NotEqual) , NULL ), // 0x62
M3OP_F( "f64.lt", -1, i_32, d_binOpList (f64, LessThan) , NULL ), // 0x63
M3OP_F( "f64.gt", -1, i_32, d_binOpList (f64, GreaterThan) , NULL ), // 0x64
M3OP_F( "f64.le", -1, i_32, d_binOpList (f64, LessThanOrEqual) , NULL ), // 0x65
M3OP_F( "f64.ge", -1, i_32, d_binOpList (f64, GreaterThanOrEqual) , NULL ), // 0x66
M3OP( "i32.clz", 0, i_32, d_unaryOpList (u32, Clz) , NULL ), // 0x67
M3OP( "i32.ctz", 0, i_32, d_unaryOpList (u32, Ctz) , NULL ), // 0x68
M3OP( "i32.popcnt", 0, i_32, d_unaryOpList (u32, Popcnt) , NULL ), // 0x69
M3OP( "i32.add", -1, i_32, d_commutativeBinOpList (i32, Add) , NULL ), // 0x6a
M3OP( "i32.sub", -1, i_32, d_binOpList (i32, Subtract) , NULL ), // 0x6b
M3OP( "i32.mul", -1, i_32, d_commutativeBinOpList (i32, Multiply) , NULL ), // 0x6c
M3OP( "i32.div_s", -1, i_32, d_binOpList (i32, Divide) , NULL ), // 0x6d
M3OP( "i32.div_u", -1, i_32, d_binOpList (u32, Divide) , NULL ), // 0x6e
M3OP( "i32.rem_s", -1, i_32, d_binOpList (i32, Remainder) , NULL ), // 0x6f
M3OP( "i32.rem_u", -1, i_32, d_binOpList (u32, Remainder) , NULL ), // 0x70
M3OP( "i32.and", -1, i_32, d_commutativeBinOpList (u32, And) , NULL ), // 0x71
M3OP( "i32.or", -1, i_32, d_commutativeBinOpList (u32, Or) , NULL ), // 0x72
M3OP( "i32.xor", -1, i_32, d_commutativeBinOpList (u32, Xor) , NULL ), // 0x73
M3OP( "i32.shl", -1, i_32, d_binOpList (u32, ShiftLeft) , NULL ), // 0x74
M3OP( "i32.shr_s", -1, i_32, d_binOpList (i32, ShiftRight) , NULL ), // 0x75
M3OP( "i32.shr_u", -1, i_32, d_binOpList (u32, ShiftRight) , NULL ), // 0x76
M3OP( "i32.rotl", -1, i_32, d_binOpList (u32, Rotl) , NULL ), // 0x77
M3OP( "i32.rotr", -1, i_32, d_binOpList (u32, Rotr) , NULL ), // 0x78
M3OP( "i64.clz", 0, i_64, d_unaryOpList (u64, Clz) , NULL ), // 0x79
M3OP( "i64.ctz", 0, i_64, d_unaryOpList (u64, Ctz) , NULL ), // 0x7a
M3OP( "i64.popcnt", 0, i_64, d_unaryOpList (u64, Popcnt) , NULL ), // 0x7b
M3OP( "i64.add", -1, i_64, d_commutativeBinOpList (i64, Add) , NULL ), // 0x7c
M3OP( "i64.sub", -1, i_64, d_binOpList (i64, Subtract) , NULL ), // 0x7d
M3OP( "i64.mul", -1, i_64, d_commutativeBinOpList (i64, Multiply) , NULL ), // 0x7e
M3OP( "i64.div_s", -1, i_64, d_binOpList (i64, Divide) , NULL ), // 0x7f
M3OP( "i64.div_u", -1, i_64, d_binOpList (u64, Divide) , NULL ), // 0x80
M3OP( "i64.rem_s", -1, i_64, d_binOpList (i64, Remainder) , NULL ), // 0x81
M3OP( "i64.rem_u", -1, i_64, d_binOpList (u64, Remainder) , NULL ), // 0x82
M3OP( "i64.and", -1, i_64, d_commutativeBinOpList (u64, And) , NULL ), // 0x83
M3OP( "i64.or", -1, i_64, d_commutativeBinOpList (u64, Or) , NULL ), // 0x84
M3OP( "i64.xor", -1, i_64, d_commutativeBinOpList (u64, Xor) , NULL ), // 0x85
M3OP( "i64.shl", -1, i_64, d_binOpList (u64, ShiftLeft) , NULL ), // 0x86
M3OP( "i64.shr_s", -1, i_64, d_binOpList (i64, ShiftRight) , NULL ), // 0x87
M3OP( "i64.shr_u", -1, i_64, d_binOpList (u64, ShiftRight) , NULL ), // 0x88
M3OP( "i64.rotl", -1, i_64, d_binOpList (u64, Rotl) , NULL ), // 0x89
M3OP( "i64.rotr", -1, i_64, d_binOpList (u64, Rotr) , NULL ), // 0x8a
M3OP_F( "f32.abs", 0, f_32, d_unaryOpList(f32, Abs) , NULL ), // 0x8b
M3OP_F( "f32.neg", 0, f_32, d_unaryOpList(f32, Negate) , NULL ), // 0x8c
M3OP_F( "f32.ceil", 0, f_32, d_unaryOpList(f32, Ceil) , NULL ), // 0x8d
M3OP_F( "f32.floor", 0, f_32, d_unaryOpList(f32, Floor) , NULL ), // 0x8e
M3OP_F( "f32.trunc", 0, f_32, d_unaryOpList(f32, Trunc) , NULL ), // 0x8f
M3OP_F( "f32.nearest", 0, f_32, d_unaryOpList(f32, Nearest) , NULL ), // 0x90
M3OP_F( "f32.sqrt", 0, f_32, d_unaryOpList(f32, Sqrt) , NULL ), // 0x91
M3OP_F( "f32.add", -1, f_32, d_commutativeBinOpList (f32, Add) , NULL ), // 0x92
M3OP_F( "f32.sub", -1, f_32, d_binOpList (f32, Subtract) , NULL ), // 0x93
M3OP_F( "f32.mul", -1, f_32, d_commutativeBinOpList (f32, Multiply) , NULL ), // 0x94
M3OP_F( "f32.div", -1, f_32, d_binOpList (f32, Divide) , NULL ), // 0x95
M3OP_F( "f32.min", -1, f_32, d_commutativeBinOpList (f32, Min) , NULL ), // 0x96
M3OP_F( "f32.max", -1, f_32, d_commutativeBinOpList (f32, Max) , NULL ), // 0x97
M3OP_F( "f32.copysign", -1, f_32, d_binOpList (f32, CopySign) , NULL ), // 0x98
M3OP_F( "f64.abs", 0, f_64, d_unaryOpList(f64, Abs) , NULL ), // 0x99
M3OP_F( "f64.neg", 0, f_64, d_unaryOpList(f64, Negate) , NULL ), // 0x9a
M3OP_F( "f64.ceil", 0, f_64, d_unaryOpList(f64, Ceil) , NULL ), // 0x9b
M3OP_F( "f64.floor", 0, f_64, d_unaryOpList(f64, Floor) , NULL ), // 0x9c
M3OP_F( "f64.trunc", 0, f_64, d_unaryOpList(f64, Trunc) , NULL ), // 0x9d
M3OP_F( "f64.nearest", 0, f_64, d_unaryOpList(f64, Nearest) , NULL ), // 0x9e
M3OP_F( "f64.sqrt", 0, f_64, d_unaryOpList(f64, Sqrt) , NULL ), // 0x9f
M3OP_F( "f64.add", -1, f_64, d_commutativeBinOpList (f64, Add) , NULL ), // 0xa0
M3OP_F( "f64.sub", -1, f_64, d_binOpList (f64, Subtract) , NULL ), // 0xa1
M3OP_F( "f64.mul", -1, f_64, d_commutativeBinOpList (f64, Multiply) , NULL ), // 0xa2
M3OP_F( "f64.div", -1, f_64, d_binOpList (f64, Divide) , NULL ), // 0xa3
M3OP_F( "f64.min", -1, f_64, d_commutativeBinOpList (f64, Min) , NULL ), // 0xa4
M3OP_F( "f64.max", -1, f_64, d_commutativeBinOpList (f64, Max) , NULL ), // 0xa5
M3OP_F( "f64.copysign", -1, f_64, d_binOpList (f64, CopySign) , NULL ), // 0xa6
M3OP( "i32.wrap/i64", 0, i_32, d_unaryOpList (i32, Wrap_i64), NULL ), // 0xa7
M3OP_F( "i32.trunc_s/f32", 0, i_32, d_convertOpList (i32_Trunc_f32), Compile_Convert ), // 0xa8
M3OP_F( "i32.trunc_u/f32", 0, i_32, d_convertOpList (u32_Trunc_f32), Compile_Convert ), // 0xa9
M3OP_F( "i32.trunc_s/f64", 0, i_32, d_convertOpList (i32_Trunc_f64), Compile_Convert ), // 0xaa
M3OP_F( "i32.trunc_u/f64", 0, i_32, d_convertOpList (u32_Trunc_f64), Compile_Convert ), // 0xab
M3OP( "i64.extend_s/i32", 0, i_64, d_unaryOpList (i64, Extend_i32), NULL ), // 0xac
M3OP( "i64.extend_u/i32", 0, i_64, d_unaryOpList (i64, Extend_u32), NULL ), // 0xad
M3OP_F( "i64.trunc_s/f32", 0, i_64, d_convertOpList (i64_Trunc_f32), Compile_Convert ), // 0xae
M3OP_F( "i64.trunc_u/f32", 0, i_64, d_convertOpList (u64_Trunc_f32), Compile_Convert ), // 0xaf
M3OP_F( "i64.trunc_s/f64", 0, i_64, d_convertOpList (i64_Trunc_f64), Compile_Convert ), // 0xb0
M3OP_F( "i64.trunc_u/f64", 0, i_64, d_convertOpList (u64_Trunc_f64), Compile_Convert ), // 0xb1
M3OP_F( "f32.convert_s/i32",0, f_32, d_convertOpList (f32_Convert_i32), Compile_Convert ), // 0xb2
M3OP_F( "f32.convert_u/i32",0, f_32, d_convertOpList (f32_Convert_u32), Compile_Convert ), // 0xb3
M3OP_F( "f32.convert_s/i64",0, f_32, d_convertOpList (f32_Convert_i64), Compile_Convert ), // 0xb4
M3OP_F( "f32.convert_u/i64",0, f_32, d_convertOpList (f32_Convert_u64), Compile_Convert ), // 0xb5
M3OP_F( "f32.demote/f64", 0, f_32, d_unaryOpList (f32, Demote_f64), NULL ), // 0xb6
M3OP_F( "f64.convert_s/i32",0, f_64, d_convertOpList (f64_Convert_i32), Compile_Convert ), // 0xb7
M3OP_F( "f64.convert_u/i32",0, f_64, d_convertOpList (f64_Convert_u32), Compile_Convert ), // 0xb8
M3OP_F( "f64.convert_s/i64",0, f_64, d_convertOpList (f64_Convert_i64), Compile_Convert ), // 0xb9
M3OP_F( "f64.convert_u/i64",0, f_64, d_convertOpList (f64_Convert_u64), Compile_Convert ), // 0xba
M3OP_F( "f64.promote/f32", 0, f_64, d_unaryOpList (f64, Promote_f32), NULL ), // 0xbb
M3OP_F( "i32.reinterpret/f32",0,i_32, d_convertOpList (i32_Reinterpret_f32), Compile_Convert ), // 0xbc
M3OP_F( "i64.reinterpret/f64",0,i_64, d_convertOpList (i64_Reinterpret_f64), Compile_Convert ), // 0xbd
M3OP_F( "f32.reinterpret/i32",0,f_32, d_convertOpList (f32_Reinterpret_i32), Compile_Convert ), // 0xbe
M3OP_F( "f64.reinterpret/i64",0,f_64, d_convertOpList (f64_Reinterpret_i64), Compile_Convert ), // 0xbf
M3OP( "i32.extend8_s", 0, i_32, d_unaryOpList (i32, Extend8_s), NULL ), // 0xc0
M3OP( "i32.extend16_s", 0, i_32, d_unaryOpList (i32, Extend16_s), NULL ), // 0xc1
M3OP( "i64.extend8_s", 0, i_64, d_unaryOpList (i64, Extend8_s), NULL ), // 0xc2
M3OP( "i64.extend16_s", 0, i_64, d_unaryOpList (i64, Extend16_s), NULL ), // 0xc3
M3OP( "i64.extend32_s", 0, i_64, d_unaryOpList (i64, Extend32_s), NULL ), // 0xc4
# ifdef DEBUG // for codepage logging. the order doesn't matter:
# define d_m3DebugOp(OP) M3OP (#OP, 0, none, { op_##OP })
# if d_m3HasFloat
# define d_m3DebugTypedOp(OP) M3OP (#OP, 0, none, { op_##OP##_i32, op_##OP##_i64, op_##OP##_f32, op_##OP##_f64, })
# else
# define d_m3DebugTypedOp(OP) M3OP (#OP, 0, none, { op_##OP##_i32, op_##OP##_i64 })
# endif
d_m3DebugOp (Compile), d_m3DebugOp (Entry), d_m3DebugOp (End),
d_m3DebugOp (Unsupported), d_m3DebugOp (CallRawFunction),
d_m3DebugOp (GetGlobal_s32), d_m3DebugOp (GetGlobal_s64), d_m3DebugOp (ContinueLoop), d_m3DebugOp (ContinueLoopIf),
d_m3DebugOp (CopySlot_32), d_m3DebugOp (PreserveCopySlot_32), d_m3DebugOp (If_s), d_m3DebugOp (BranchIfPrologue_s),
d_m3DebugOp (CopySlot_64), d_m3DebugOp (PreserveCopySlot_64), d_m3DebugOp (If_r), d_m3DebugOp (BranchIfPrologue_r),
d_m3DebugOp (Select_i32_rss), d_m3DebugOp (Select_i32_srs), d_m3DebugOp (Select_i32_ssr), d_m3DebugOp (Select_i32_sss),
d_m3DebugOp (Select_i64_rss), d_m3DebugOp (Select_i64_srs), d_m3DebugOp (Select_i64_ssr), d_m3DebugOp (Select_i64_sss),
# if d_m3HasFloat
d_m3DebugOp (Select_f32_sss), d_m3DebugOp (Select_f32_srs), d_m3DebugOp (Select_f32_ssr),
d_m3DebugOp (Select_f32_rss), d_m3DebugOp (Select_f32_rrs), d_m3DebugOp (Select_f32_rsr),
d_m3DebugOp (Select_f64_sss), d_m3DebugOp (Select_f64_srs), d_m3DebugOp (Select_f64_ssr),
d_m3DebugOp (Select_f64_rss), d_m3DebugOp (Select_f64_rrs), d_m3DebugOp (Select_f64_rsr),
# endif
d_m3DebugOp (MemFill), d_m3DebugOp (MemCopy),
d_m3DebugTypedOp (SetGlobal), d_m3DebugOp (SetGlobal_s32), d_m3DebugOp (SetGlobal_s64),
d_m3DebugTypedOp (SetRegister), d_m3DebugTypedOp (SetSlot), d_m3DebugTypedOp (PreserveSetSlot),
# endif
# if d_m3CascadedOpcodes
[c_waOp_extended] = M3OP( "0xFC", 0, c_m3Type_unknown, d_emptyOpList, Compile_ExtendedOpcode ),
# endif
# ifdef DEBUG
M3OP( "termination", 0, c_m3Type_unknown ) // for find_operation_info
# endif
};
const M3OpInfo c_operationsFC [] =
{
M3OP_F( "i32.trunc_s:sat/f32",0, i_32, d_convertOpList (i32_TruncSat_f32), Compile_Convert ), // 0x00
M3OP_F( "i32.trunc_u:sat/f32",0, i_32, d_convertOpList (u32_TruncSat_f32), Compile_Convert ), // 0x01
M3OP_F( "i32.trunc_s:sat/f64",0, i_32, d_convertOpList (i32_TruncSat_f64), Compile_Convert ), // 0x02
M3OP_F( "i32.trunc_u:sat/f64",0, i_32, d_convertOpList (u32_TruncSat_f64), Compile_Convert ), // 0x03
M3OP_F( "i64.trunc_s:sat/f32",0, i_64, d_convertOpList (i64_TruncSat_f32), Compile_Convert ), // 0x04
M3OP_F( "i64.trunc_u:sat/f32",0, i_64, d_convertOpList (u64_TruncSat_f32), Compile_Convert ), // 0x05
M3OP_F( "i64.trunc_s:sat/f64",0, i_64, d_convertOpList (i64_TruncSat_f64), Compile_Convert ), // 0x06
M3OP_F( "i64.trunc_u:sat/f64",0, i_64, d_convertOpList (u64_TruncSat_f64), Compile_Convert ), // 0x07
M3OP_RESERVED, M3OP_RESERVED,
M3OP( "memory.copy", 0, none, d_emptyOpList, Compile_Memory_CopyFill ), // 0x0a
M3OP( "memory.fill", 0, none, d_emptyOpList, Compile_Memory_CopyFill ), // 0x0b
# ifdef DEBUG
M3OP( "termination", 0, c_m3Type_unknown ) // for find_operation_info
# endif
};
IM3OpInfo GetOpInfo (m3opcode_t opcode)
{
switch (opcode >> 8) {
case 0x00:
if (M3_LIKELY(opcode < M3_COUNT_OF(c_operations))) {
return &c_operations[opcode];
}
break;
case c_waOp_extended:
opcode &= 0xFF;
if (M3_LIKELY(opcode < M3_COUNT_OF(c_operationsFC))) {
return &c_operationsFC[opcode];
}
break;
}
return NULL;
}
M3Result CompileBlockStatements (IM3Compilation o)
{
M3Result result = m3Err_none;
bool validEnd = false;
while (o->wasm < o->wasmEnd)
{
# if d_m3EnableOpTracing
if (o->numEmits)
{
EmitOp (o, op_DumpStack);
EmitConstant32 (o, o->numOpcodes);
EmitConstant32 (o, GetMaxUsedSlotPlusOne(o));
EmitPointer (o, o->function);
o->numEmits = 0;
}
# endif
m3opcode_t opcode;
o->lastOpcodeStart = o->wasm;
_ (Read_opcode (& opcode, & o->wasm, o->wasmEnd)); log_opcode (o, opcode);
// Restrict opcodes when evaluating expressions
if (not o->function) {
switch (opcode) {
case c_waOp_i32_const: case c_waOp_i64_const:
case c_waOp_f32_const: case c_waOp_f64_const:
case c_waOp_getGlobal: case c_waOp_end:
break;
default:
_throw(m3Err_restrictedOpcode);
}
}
IM3OpInfo opinfo = GetOpInfo (opcode);
if (opinfo == NULL)
_throw (ErrorCompile (m3Err_unknownOpcode, o, "opcode '%x' not available", opcode));
if (opinfo->compiler) {
_ ((* opinfo->compiler) (o, opcode))
} else {
_ (Compile_Operator (o, opcode));
}
o->previousOpcode = opcode;
if (opcode == c_waOp_else)
{
_throwif (m3Err_wasmMalformed, o->block.opcode != c_waOp_if);
validEnd = true;
break;
}
else if (opcode == c_waOp_end)
{
validEnd = true;
break;
}
}
_throwif(m3Err_wasmMalformed, !(validEnd));
_catch:
return result;
}
static
M3Result PushBlockResults (IM3Compilation o)
{
M3Result result = m3Err_none;
u16 numResults = GetFuncTypeNumResults (o->block.type);
for (u16 i = 0; i < numResults; ++i)
{
u8 type = GetFuncTypeResultType (o->block.type, i);
if (i == numResults - 1 and IsFpType (type))
{
_ (PushRegister (o, type));
}
else
_ (PushAllocatedSlot (o, type));
}
_catch: return result;
}
M3Result CompileBlock (IM3Compilation o, IM3FuncType i_blockType, m3opcode_t i_blockOpcode)
{
d_m3Assert (not IsRegisterAllocated (o, 0));
d_m3Assert (not IsRegisterAllocated (o, 1));
M3CompilationScope outerScope = o->block;
M3CompilationScope * block = & o->block;
block->outer = & outerScope;
block->pc = GetPagePC (o->page);
block->patches = NULL;
block->type = i_blockType;
block->depth ++;
block->opcode = i_blockOpcode;
/*
The block stack frame is a little strange but for good reasons. Because blocks need to be restarted to
compile different pathways (if/else), the incoming params must be saved. The parameters are popped
and validated. But, then the stack top is readjusted so they aren't subsequently overwritten.
Next, the result are preallocated to find destination slots. But again these are immediately popped
(deallocated) and the stack top is readjusted to keep these records in pace. This allows branch instructions
to find their result landing pads. Finally, the params are copied from the "dead" records and pushed back
onto the stack as active stack items for the CompileBlockStatements () call.
[ block ]
[ params ]
------------------
[ result ] <---- blockStackIndex
[ slots ]
------------------
[ saved param ]
[ records ]
<----- exitStackIndex
*/
_try {
// validate and dealloc params ----------------------------
u16 stackIndex = o->stackIndex;
u16 numParams = GetFuncTypeNumParams (i_blockType);
if (i_blockOpcode != c_waOp_else)
{
for (u16 i = 0; i < numParams; ++i)
{
u8 type = GetFuncTypeParamType (i_blockType, numParams - 1 - i);
_ (PopType (o, type));
}
}
else o->stackIndex -= numParams;
u16 paramIndex = o->stackIndex;
block->exitStackIndex = paramIndex; // consume the params at block exit
// keep copies of param slots in the stack
o->stackIndex = stackIndex;
// find slots for the results ----------------------------
PushBlockResults (o);
stackIndex = o->stackIndex;
// dealloc but keep record of the result slots in the stack
u16 numResults = GetFuncTypeNumResults (i_blockType);
while (numResults--)
Pop (o);
block->blockStackIndex = o->stackIndex = stackIndex;
// push the params back onto the stack -------------------
for (u16 i = 0; i < numParams; ++i)
{
u8 type = GetFuncTypeParamType (i_blockType, i);
u16 slot = GetSlotForStackIndex (o, paramIndex + i);
Push (o, type, slot);
if (slot >= o->slotFirstDynamicIndex)
MarkSlotsAllocatedByType (o, slot, type);
}
//--------------------------------------------------------
_ (CompileBlockStatements (o));
_ (ValidateBlockEnd (o));
if (o->function) // skip for expressions
{
if (not IsStackPolymorphic (o))
_ (ResolveBlockResults (o, & o->block, /* isBranch: */ false));
_ (UnwindBlockStack (o))
if (not ((i_blockOpcode == c_waOp_if and numResults) or o->previousOpcode == c_waOp_else))
{
o->stackIndex = o->block.exitStackIndex;
_ (PushBlockResults (o));
}
}
PatchBranches (o);
o->block = outerScope;
} _catch: return result;
}
static
M3Result CompileLocals (IM3Compilation o)
{
M3Result result;
u32 numLocals = 0;
u32 numLocalBlocks;
_ (ReadLEB_u32 (& numLocalBlocks, & o->wasm, o->wasmEnd));
for (u32 l = 0; l < numLocalBlocks; ++l)
{
u32 varCount;
i8 waType;
u8 localType;
_ (ReadLEB_u32 (& varCount, & o->wasm, o->wasmEnd));
_ (ReadLEB_i7 (& waType, & o->wasm, o->wasmEnd));
_ (NormalizeType (& localType, waType));
numLocals += varCount; m3log (compile, "pushing locals. count: %d; type: %s", varCount, c_waTypes [localType]);
while (varCount--)
_ (PushAllocatedSlot (o, localType));
}
if (o->function)
o->function->numLocals = numLocals;
_catch: return result;
}
static
M3Result ReserveConstants (IM3Compilation o)
{
M3Result result = m3Err_none;
// in the interest of speed, this blindly scans the Wasm code looking for any byte
// that looks like an const opcode.
u16 numConstantSlots = 0;
bytes_t wa = o->wasm;
while (wa < o->wasmEnd)
{
u8 code = * wa++;
if (code == c_waOp_i32_const or code == c_waOp_f32_const)
numConstantSlots += 1;
else if (code == c_waOp_i64_const or code == c_waOp_f64_const)
numConstantSlots += GetTypeNumSlots (c_m3Type_i64);
if (numConstantSlots >= d_m3MaxConstantTableSize)
break;
}
// if constants overflow their reserved stack space, the compiler simply emits op_Const
// operations as needed. Compiled expressions (global inits) don't pass through this
// ReserveConstants function and thus always produce inline constants.
AlignSlotToType (& numConstantSlots, c_m3Type_i64); m3log (compile, "reserved constant slots: %d", numConstantSlots);
o->slotFirstDynamicIndex = o->slotFirstConstIndex + numConstantSlots;
if (o->slotFirstDynamicIndex >= d_m3MaxFunctionSlots)
_throw (m3Err_functionStackOverflow);
_catch:
return result;
}
M3Result CompileFunction (IM3Function io_function)
{
if (!io_function->wasm) return "function body is missing";
IM3FuncType funcType = io_function->funcType; m3log (compile, "compiling: [%d] %s %s; wasm-size: %d",
io_function->index, m3_GetFunctionName (io_function), SPrintFuncTypeSignature (funcType), (u32) (io_function->wasmEnd - io_function->wasm));
IM3Runtime runtime = io_function->module->runtime;
IM3Compilation o = & runtime->compilation; d_m3Assert (d_m3MaxFunctionSlots >= d_m3MaxFunctionStackHeight * (d_m3Use32BitSlots + 1)) // need twice as many slots in 32-bit mode
memset (o, 0x0, sizeof (M3Compilation));
o->runtime = runtime;
o->module = io_function->module;
o->function = io_function;
o->wasm = io_function->wasm;
o->wasmEnd = io_function->wasmEnd;
o->block.type = funcType;
_try {
// skip over code size. the end was already calculated during parse phase
u32 size;
_ (ReadLEB_u32 (& size, & o->wasm, o->wasmEnd)); d_m3Assert (size == (o->wasmEnd - o->wasm))
_ (AcquireCompilationCodePage (o, & o->page));
pc_t pc = GetPagePC (o->page);
u16 numRetSlots = GetFunctionNumReturns (o->function) * c_ioSlotCount;
for (u16 i = 0; i < numRetSlots; ++i)
MarkSlotAllocated (o, i);
o->function->numRetSlots = o->slotFirstDynamicIndex = numRetSlots;
u16 numArgs = GetFunctionNumArgs (o->function);
// push the arg types to the type stack
for (u16 i = 0; i < numArgs; ++i)
{
u8 type = GetFunctionArgType (o->function, i);
_ (PushAllocatedSlot (o, type));
// prevent allocator fill-in
o->slotFirstDynamicIndex += c_ioSlotCount;
}
o->slotMaxAllocatedIndexPlusOne = o->function->numRetAndArgSlots = o->slotFirstLocalIndex = o->slotFirstDynamicIndex;
_ (CompileLocals (o));
u16 maxSlot = GetMaxUsedSlotPlusOne (o);
o->function->numLocalBytes = (maxSlot - o->slotFirstLocalIndex) * sizeof (m3slot_t);
o->slotFirstConstIndex = o->slotMaxConstIndex = maxSlot;
// ReserveConstants initializes o->firstDynamicSlotNumber
_ (ReserveConstants (o));
// start tracking the max stack used (Push() also updates this value) so that op_Entry can precisely detect stack overflow
o->maxStackSlots = o->slotMaxAllocatedIndexPlusOne = o->slotFirstDynamicIndex;
o->block.blockStackIndex = o->stackFirstDynamicIndex = o->stackIndex; m3log (compile, "start stack index: %d",
(u32) o->stackFirstDynamicIndex);
_ (EmitOp (o, op_Entry));
EmitPointer (o, io_function);
_ (CompileBlockStatements (o));
// TODO: validate opcode sequences
_throwif(m3Err_wasmMalformed, o->previousOpcode != c_waOp_end);
io_function->compiled = pc;
io_function->maxStackSlots = o->maxStackSlots;
u16 numConstantSlots = o->slotMaxConstIndex - o->slotFirstConstIndex; m3log (compile, "unique constant slots: %d; unused slots: %d",
numConstantSlots, o->slotFirstDynamicIndex - o->slotMaxConstIndex);
io_function->numConstantBytes = numConstantSlots * sizeof (m3slot_t);
if (numConstantSlots)
{
io_function->constants = m3_CopyMem (o->constants, io_function->numConstantBytes);
_throwifnull(io_function->constants);
}
} _catch:
ReleaseCompilationCodePage (o);
return result;
}