Unverified Commit 62c6e2f7 authored by Arisotura's avatar Arisotura Committed by GitHub
Browse files

Merge pull request #667 from Arisotura/generic_jit

merge jit
parents d97ce22b c5381d29
......@@ -9,3 +9,5 @@ melon_grc.h
cmake-build
cmake-build-debug
.idea
*.exe
......@@ -14,6 +14,42 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
include(CheckSymbolExists)
function(detect_architecture symbol arch)
if (NOT DEFINED ARCHITECTURE)
set(CMAKE_REQUIRED_QUIET 1)
check_symbol_exists("${symbol}" "" ARCHITECTURE_${arch})
unset(CMAKE_REQUIRED_QUIET)
# The output variable needs to be unique across invocations otherwise
# CMake's crazy scope rules will keep it defined
if (ARCHITECTURE_${arch})
set(ARCHITECTURE "${arch}" PARENT_SCOPE)
set(ARCHITECTURE_${arch} 1 PARENT_SCOPE)
add_definitions(-DARCHITECTURE_${arch}=1)
endif()
endif()
endfunction()
detect_architecture("__x86_64__" x86_64)
detect_architecture("__i386__" x86)
detect_architecture("__arm__" ARM)
detect_architecture("__aarch64__" ARM64)
if (ARCHITECTURE STREQUAL x86_64 OR ARCHITECTURE STREQUAL ARM64)
option(ENABLE_JIT "Enable x64 JIT recompiler" ON)
endif()
if (ENABLE_JIT)
add_definitions(-DJIT_ENABLED)
endif()
if (CMAKE_BUILD_TYPE STREQUAL Release)
option(ENABLE_LTO "Enable link-time optimization" ON)
else()
option(ENABLE_LTO "Enable link-time optimization" OFF)
endif()
if (CMAKE_BUILD_TYPE STREQUAL Debug)
add_compile_options(-Og)
endif()
......
......@@ -21,8 +21,15 @@
#include "DSi.h"
#include "ARM.h"
#include "ARMInterpreter.h"
#include "Config.h"
#include "AREngine.h"
#include "ARMJIT.h"
#include "Config.h"
#ifdef JIT_ENABLED
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
#endif
// instruction timing notes
//
......@@ -72,7 +79,9 @@ ARM::~ARM()
ARMv5::ARMv5() : ARM(0)
{
//
#ifndef JIT_ENABLED
DTCM = new u8[DTCMSize];
#endif
}
ARMv4::ARMv4() : ARM(1)
......@@ -80,6 +89,13 @@ ARMv4::ARMv4() : ARM(1)
//
}
ARMv5::~ARMv5()
{
#ifndef JIT_ENABLED
delete[] DTCM;
#endif
}
void ARM::Reset()
{
Cycles = 0;
......@@ -96,6 +112,12 @@ void ARM::Reset()
CodeMem.Mem = NULL;
#ifdef JIT_ENABLED
FastBlockLookup = NULL;
FastBlockLookupStart = 0;
FastBlockLookupSize = 0;
#endif
// zorp
JumpTo(ExceptionBase);
}
......@@ -123,7 +145,6 @@ void ARMv5::Reset()
GetMemRegion = NDS::ARM9GetMemRegion;
}
CP15Reset();
ARM::Reset();
}
......@@ -158,7 +179,11 @@ void ARM::DoSavestate(Savestate* file)
file->Var32((u32*)&Cycles);
//file->Var32((u32*)&CyclesToRun);
file->Var32(&Halted);
// hack to make save states compatible
u32 halted = Halted;
file->Var32(&halted);
Halted = halted;
file->VarArray(R, 16*sizeof(u32));
file->Var32(&CPSR);
......@@ -168,6 +193,15 @@ void ARM::DoSavestate(Savestate* file)
file->VarArray(R_IRQ, 3*sizeof(u32));
file->VarArray(R_UND, 3*sizeof(u32));
file->Var32(&CurInstr);
#ifdef JIT_ENABLED
if (!file->Saving && Config::JIT_Enable)
{
// hack, the JIT doesn't really pipeline
// but we still want JIT save states to be
// loaded while running the interpreter
FillPipeline();
}
#endif
file->VarArray(NextInstr, 2*sizeof(u32));
file->Var32(&ExceptionBase);
......@@ -240,15 +274,15 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
if (addr & 0x2)
{
NextInstr[0] = CodeRead32(addr-2, true) >> 16;
Cycles += CodeCycles;
Cycles -= CodeCycles;
NextInstr[1] = CodeRead32(addr+2, false);
Cycles += CodeCycles;
Cycles -= CodeCycles;
}
else
{
NextInstr[0] = CodeRead32(addr, true);
NextInstr[1] = NextInstr[0] >> 16;
Cycles += CodeCycles;
Cycles -= CodeCycles;
}
CPSR |= 0x20;
......@@ -261,9 +295,9 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
if (newregion != oldregion) SetupCodeMem(addr);
NextInstr[0] = CodeRead32(addr, true);
Cycles += CodeCycles;
Cycles -= CodeCycles;
NextInstr[1] = CodeRead32(addr+4, false);
Cycles += CodeCycles;
Cycles -= CodeCycles;
CPSR &= ~0x20;
}
......@@ -303,7 +337,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr)
NextInstr[0] = CodeRead16(addr);
NextInstr[1] = CodeRead16(addr+2);
Cycles += NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1];
Cycles -= NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1];
CPSR |= 0x20;
}
......@@ -316,7 +350,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr)
NextInstr[0] = CodeRead32(addr);
NextInstr[1] = CodeRead32(addr+4);
Cycles += NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3];
Cycles -= NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3];
CPSR &= ~0x20;
}
......@@ -558,7 +592,7 @@ void ARMv5::Execute()
else
AddCycles_C();
}
// TODO optimize this shit!!!
if (Halted)
{
......@@ -575,7 +609,7 @@ void ARMv5::Execute()
}*/
if (IRQ) TriggerIRQ();
NDS::ARM9Timestamp += Cycles;
NDS::ARM9Timestamp -= Cycles;
Cycles = 0;
}
......@@ -583,6 +617,75 @@ void ARMv5::Execute()
Halted = 0;
}
#ifdef JIT_ENABLED
void ARMv5::ExecuteJIT()
{
if (Halted)
{
if (Halted == 2)
{
Halted = 0;
}
else if (NDS::HaltInterrupted(0))
{
Halted = 0;
if (NDS::IME[0] & 0x1)
TriggerIRQ();
}
else
{
NDS::ARM9Timestamp = NDS::ARM9Target;
return;
}
}
while (NDS::ARM9Timestamp < NDS::ARM9Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
// hack so Cycles <= 0 becomes Cycles < 0
Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize))
&& !ARMJIT::SetupExecutableRegion(0, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize))
{
NDS::ARM9Timestamp = NDS::ARM9Target;
printf("ARMv5 PC in non executable region %08X\n", R[15]);
return;
}
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(0, FastBlockLookup,
instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
NDS::ARM9Timestamp = NDS::ARM9Target - Cycles - 1;
if (StopExecution)
{
if (IRQ)
TriggerIRQ();
if (Halted || IdleLoop)
{
bool idleLoop = IdleLoop;
IdleLoop = 0;
if ((Halted == 1 || idleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target)
{
NDS::ARM9Timestamp = NDS::ARM9Target;
}
break;
}
}
}
if (Halted == 2)
Halted = 0;
}
#endif
void ARMv4::Execute()
{
if (Halted)
......@@ -652,10 +755,131 @@ void ARMv4::Execute()
}*/
if (IRQ) TriggerIRQ();
NDS::ARM7Timestamp += Cycles;
NDS::ARM7Timestamp -= Cycles;
Cycles = 0;
}
if (Halted == 2)
Halted = 0;
if (Halted == 4)
{
DSi::SoftReset();
Halted = 2;
}
}
#ifdef JIT_ENABLED
void ARMv4::ExecuteJIT()
{
if (Halted)
{
if (Halted == 2)
{
Halted = 0;
}
else if (NDS::HaltInterrupted(1))
{
Halted = 0;
if (NDS::IME[1] & 0x1)
TriggerIRQ();
}
else
{
NDS::ARM7Timestamp = NDS::ARM7Target;
return;
}
}
while (NDS::ARM7Timestamp < NDS::ARM7Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize))
&& !ARMJIT::SetupExecutableRegion(1, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize))
{
NDS::ARM7Timestamp = NDS::ARM7Target;
printf("ARMv4 PC in non executable region %08X\n", R[15]);
return;
}
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(1, FastBlockLookup,
instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
NDS::ARM7Timestamp = NDS::ARM7Target - Cycles - 1;
// TODO optimize this shit!!!
if (StopExecution)
{
if (IRQ)
TriggerIRQ();
if (Halted || IdleLoop)
{
bool idleLoop = IdleLoop;
IdleLoop = 0;
if ((Halted == 1 || idleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target)
{
NDS::ARM7Timestamp = NDS::ARM7Target;
}
break;
}
}
}
if (Halted == 2)
Halted = 0;
if (Halted == 4)
{
DSi::SoftReset();
Halted = 2;
}
}
#endif
void ARMv5::FillPipeline()
{
SetupCodeMem(R[15]);
if (CPSR & 0x20)
{
if ((R[15] - 2) & 0x2)
{
NextInstr[0] = CodeRead32(R[15] - 4, false) >> 16;
NextInstr[1] = CodeRead32(R[15], false);
}
else
{
NextInstr[0] = CodeRead32(R[15] - 2, false);
NextInstr[1] = NextInstr[0] >> 16;
}
}
else
{
NextInstr[0] = CodeRead32(R[15] - 4, false);
NextInstr[1] = CodeRead32(R[15], false);
}
}
void ARMv4::FillPipeline()
{
SetupCodeMem(R[15]);
if (CPSR & 0x20)
{
NextInstr[0] = CodeRead16(R[15] - 2);
NextInstr[1] = CodeRead16(R[15]);
}
else
{
NextInstr[0] = CodeRead32(R[15] - 4);
NextInstr[1] = CodeRead32(R[15]);
}
}
\ No newline at end of file
......@@ -32,16 +32,21 @@ enum
RWFlags_ForceUser = (1<<21),
};
const u32 ITCMPhysicalSize = 0x8000;
const u32 DTCMPhysicalSize = 0x4000;
class ARM
{
public:
ARM(u32 num);
~ARM(); // destroy shit
virtual ~ARM(); // destroy shit
virtual void Reset();
virtual void DoSavestate(Savestate* file);
virtual void FillPipeline() = 0;
virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0;
void RestoreCPSR();
......@@ -52,6 +57,9 @@ public:
}
virtual void Execute() = 0;
#ifdef ENABLE_JIT
virtual void ExecuteJIT() = 0;
#endif
bool CheckCondition(u32 code)
{
......@@ -107,9 +115,16 @@ public:
u32 Num;
s32 Cycles;
u32 Halted;
u32 IRQ; // nonzero to trigger IRQ
union
{
struct
{
u8 Halted;
u8 IRQ; // nonzero to trigger IRQ
u8 IdleLoop;
};
u32 StopExecution;
};
u32 CodeRegion;
s32 CodeCycles;
......@@ -131,6 +146,11 @@ public:
NDS::MemRegion CodeMem;
#ifdef JIT_ENABLED
u32 FastBlockLookupStart, FastBlockLookupSize;
u64* FastBlockLookup;
#endif
static u32 ConditionTable[16];
protected:
......@@ -146,6 +166,7 @@ class ARMv5 : public ARM
{
public:
ARMv5();
~ARMv5();
void Reset();
......@@ -153,12 +174,17 @@ public:
void UpdateRegionTimings(u32 addrstart, u32 addrend);
void FillPipeline();
void JumpTo(u32 addr, bool restorecpsr = false);
void PrefetchAbort();
void DataAbort();
void Execute();
#ifdef JIT_ENABLED
void ExecuteJIT();
#endif
// all code accesses are forced nonseq 32bit
u32 CodeRead32(u32 addr, bool branch);
......@@ -176,14 +202,14 @@ public:
{
// code only. always nonseq 32-bit for ARM9.
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC;
Cycles -= numC;
}
void AddCycles_CI(s32 numI)
{
// code+internal
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC + numI;
Cycles -= numC + numI;
}
void AddCycles_CDI()
......@@ -194,9 +220,9 @@ public:
s32 numD = DataCycles;
//if (DataRegion != CodeRegion)
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
Cycles -= std::max(numC + numD - 6, std::max(numC, numD));
//else
// Cycles += numC + numD;
// Cycles -= numC + numD;
}
void AddCycles_CD()
......@@ -206,9 +232,9 @@ public:
s32 numD = DataCycles;
//if (DataRegion != CodeRegion)
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
Cycles -= std::max(numC + numD - 6, std::max(numC, numD));
//else
// Cycles += numC + numD;
// Cycles -= numC + numD;
}
void GetCodeMemRegion(u32 addr, NDS::MemRegion* region);
......@@ -237,10 +263,14 @@ public:
u32 DTCMSetting, ITCMSetting;
u8 ITCM[0x8000];
// for aarch64 JIT they need to go up here
// to be addressable by a 12-bit immediate
u32 ITCMSize;
u8 DTCM[0x4000];
u32 DTCMBase, DTCMSize;
s32 RegionCodeCycles;
u8 ITCM[ITCMPhysicalSize];
u8* DTCM;
u8 ICache[0x2000];
u32 ICacheTags[64*4];
......@@ -265,7 +295,6 @@ public:
// code/16N/32N/32S
u8 MemTimings[0x100000][4];
s32 RegionCodeCycles;
u8* CurICacheLine;
bool (*GetMemRegion)(u32 addr, bool write, NDS::MemRegion* region);
......@@ -278,9 +307,14 @@ public:
void Reset();
void FillPipeline();
void JumpTo(u32 addr, bool restorecpsr = false);
void Execute();
#ifdef JIT_ENABLED
void ExecuteJIT();
#endif
u16 CodeRead16(u32 addr)
{
......@@ -295,8 +329,8 @@ public:
void DataRead8(u32 addr, u32* val)
{
*val = BusRead8(addr);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
void DataRead16(u32 addr, u32* val)
......@@ -304,8 +338,8 @@ public:
addr &= ~1;
*val = BusRead16(addr);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
void DataRead32(u32 addr, u32* val)
......@@ -313,8 +347,8 @@ public:
addr &= ~3;
*val = BusRead32(addr);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][2];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][2];
}
void DataRead32S(u32 addr, u32* val)
......@@ -322,14 +356,14 @@ public:
addr &= ~3;
*val = BusRead32(addr);
DataCycles += NDS::ARM7MemTimings[DataRegion][3];
DataCycles += NDS::ARM7MemTimings[addr >> 15][3];
}
void DataWrite8(u32 addr, u8 val)
{
BusWrite8(addr, val);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
void DataWrite16(u32 addr, u16 val)
......@@ -337,8 +371,8 @@ public:
addr &= ~1;
BusWrite16(addr, val);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
void DataWrite32(u32 addr, u32 val)
......@@ -346,8 +380,8 @@ public:
addr &= ~3;
BusWrite32(addr, val);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][2];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings<