Commit c5381d29 authored by RSDuck's avatar RSDuck
Browse files

reconcile DSi and JIT, fastmem for x64 and Windows

parent ea6d0358
......@@ -21,12 +21,15 @@
#include "DSi.h"
#include "ARM.h"
#include "ARMInterpreter.h"
#include "ARMJIT.h"
#include "Config.h"
#include "AREngine.h"
#include "ARMJIT.h"
#include "Config.h"
#ifdef JIT_ENABLED
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
#endif
// instruction timing notes
//
......@@ -109,6 +112,12 @@ void ARM::Reset()
CodeMem.Mem = NULL;
#ifdef JIT_ENABLED
FastBlockLookup = NULL;
FastBlockLookupStart = 0;
FastBlockLookupSize = 0;
#endif
// zorp
JumpTo(ExceptionBase);
}
......@@ -752,6 +761,12 @@ void ARMv4::Execute()
if (Halted == 2)
Halted = 0;
if (Halted == 4)
{
DSi::SoftReset();
Halted = 2;
}
}
#ifdef JIT_ENABLED
......@@ -820,6 +835,12 @@ void ARMv4::ExecuteJIT()
if (Halted == 2)
Halted = 0;
if (Halted == 4)
{
DSi::SoftReset();
Halted = 2;
}
}
#endif
......
......@@ -147,7 +147,7 @@ public:
NDS::MemRegion CodeMem;
#ifdef JIT_ENABLED
u32 FastBlockLookupStart = 0, FastBlockLookupSize = 0;
u32 FastBlockLookupStart, FastBlockLookupSize;
u64* FastBlockLookup;
#endif
......
This diff is collapsed.
......@@ -16,6 +16,8 @@ void DeInit();
void Reset();
void CheckAndInvalidateITCM();
void InvalidateByAddr(u32 pseudoPhysical);
template <u32 num, int region>
......
......@@ -168,7 +168,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
? ARMJIT_Memory::ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
: ARMJIT_Memory::ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsMappable(expectedTarget)))
if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)))
{
ptrdiff_t memopStart = GetCodeOffset();
LoadStorePatch patch;
......@@ -461,7 +461,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
: ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
bool compileFastPath = Config::JIT_FastMemory
&& store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsMappable(expectedTarget));
&& store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget));
if (decrement)
{
......
......@@ -214,13 +214,13 @@ u32 LocaliseCodeAddress(u32 num, u32 addr);
template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset);
template <typename T> T SlowRead9(u32 addr, ARMv5* cpu);
template <typename T> void SlowWrite9(u32 addr, ARMv5* cpu, T val);
template <typename T> T SlowRead7(u32 addr);
template <typename T> void SlowWrite7(u32 addr, T val);
template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu);
template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, T val);
template <typename T, int ConsoleType> T SlowRead7(u32 addr);
template <typename T, int ConsoleType> void SlowWrite7(u32 addr, T val);
template <bool PreInc, bool Write> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
template <bool PreInc, bool Write> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);
template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);
}
......
This diff is collapsed.
......@@ -23,7 +23,7 @@ enum
memregion_DTCM,
memregion_BIOS9,
memregion_MainRAM,
memregion_SWRAM,
memregion_SharedWRAM,
memregion_IO9,
memregion_VRAM,
memregion_BIOS7,
......@@ -31,18 +31,28 @@ enum
memregion_IO7,
memregion_Wifi,
memregion_VWRAM,
// DSi
memregion_BIOS9DSi,
memregion_BIOS7DSi,
memregion_NewSharedWRAM_A,
memregion_NewSharedWRAM_B,
memregion_NewSharedWRAM_C,
memregions_Count
};
int ClassifyAddress9(u32 addr);
int ClassifyAddress7(u32 addr);
bool GetRegionMapping(int region, u32 num, u32& mappingStart, u32& mappingSize, u32& memoryOffset, u32& memorySize);
bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize);
u32 LocaliseAddress(int region, u32 num, u32 addr);
bool IsMappable(int region);
bool IsFastmemCompatible(int region);
void RemapDTCM(u32 newBase, u32 newSize);
void RemapSWRAM();
void RemapNWRAM(int num);
void SetCodeProtection(int region, u32 offset, bool protect);
......
......@@ -40,6 +40,12 @@ const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable =
#endif
;
#ifdef _WIN32
const BitSet32 CallerSavedPushRegs({R10, R11});
#else
const BitSet32 CallerSavedPushRegs({R9, R10, R11});
#endif
void Compiler::PushRegs(bool saveHiRegs)
{
BitSet32 loadedRegs(RegCache.LoadedRegs);
......@@ -301,6 +307,107 @@ Compiler::Compiler()
RET();
}
for (int consoleType = 0; consoleType < 2; consoleType++)
{
for (int num = 0; num < 2; num++)
{
for (int size = 0; size < 3; size++)
{
for (int reg = 0; reg < 16; reg++)
{
if (reg == RSCRATCH || reg == ABI_PARAM1 || reg == ABI_PARAM2 || reg == ABI_PARAM3)
{
PatchedStoreFuncs[consoleType][num][size][reg] = NULL;
PatchedLoadFuncs[consoleType][num][size][0][reg] = NULL;
PatchedLoadFuncs[consoleType][num][size][1][reg] = NULL;
continue;
}
X64Reg rdMapped = (X64Reg)reg;
PatchedStoreFuncs[consoleType][num][size][reg] = GetWritableCodePtr();
if (RSCRATCH3 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (num == 0)
{
MOV(64, R(ABI_PARAM2), R(RCPU));
MOV(32, R(ABI_PARAM3), R(rdMapped));
}
else
{
MOV(32, R(ABI_PARAM2), R(rdMapped));
}
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break;
case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
case 17: ABI_CallFunction(SlowWrite7<u16, 0>); break;
case 8: ABI_CallFunction(SlowWrite9<u8, 0>); break;
case 9: ABI_CallFunction(SlowWrite7<u8, 0>); break;
}
}
else
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break;
case 16: ABI_CallFunction(SlowWrite9<u16, 1>); break;
case 17: ABI_CallFunction(SlowWrite7<u16, 1>); break;
case 8: ABI_CallFunction(SlowWrite9<u8, 1>); break;
case 9: ABI_CallFunction(SlowWrite7<u8, 1>); break;
}
}
ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
RET();
for (int signextend = 0; signextend < 2; signextend++)
{
PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetWritableCodePtr();
if (RSCRATCH3 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (num == 0)
MOV(64, R(ABI_PARAM2), R(RCPU));
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 0>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 0>); break;
case 16: ABI_CallFunction(SlowRead9<u16, 0>); break;
case 17: ABI_CallFunction(SlowRead7<u16, 0>); break;
case 8: ABI_CallFunction(SlowRead9<u8, 0>); break;
case 9: ABI_CallFunction(SlowRead7<u8, 0>); break;
}
}
else
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 1>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 1>); break;
case 16: ABI_CallFunction(SlowRead9<u16, 1>); break;
case 17: ABI_CallFunction(SlowRead7<u16, 1>); break;
case 8: ABI_CallFunction(SlowRead9<u8, 1>); break;
case 9: ABI_CallFunction(SlowRead7<u8, 1>); break;
}
}
ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (signextend)
MOVSX(32, 8 << size, rdMapped, R(RSCRATCH));
else
MOVZX(32, 8 << size, rdMapped, R(RSCRATCH));
RET();
}
}
}
}
}
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
......@@ -500,6 +607,8 @@ void Compiler::Reset()
NearCode = NearStart;
FarCode = FarStart;
LoadStorePatches.clear();
}
bool Compiler::IsJITFault(u64 addr)
......
......@@ -7,6 +7,8 @@
#include "../ARMJIT_Internal.h"
#include "../ARMJIT_RegisterCache.h"
#include <unordered_map>
namespace ARMJIT
{
......@@ -18,6 +20,13 @@ const Gen::X64Reg RSCRATCH2 = Gen::EDX;
const Gen::X64Reg RSCRATCH3 = Gen::ECX;
const Gen::X64Reg RSCRATCH4 = Gen::R8;
struct LoadStorePatch
{
void* PatchFunc;
s16 Offset;
u16 Size;
};
struct Op2
{
Op2()
......@@ -211,6 +220,11 @@ public:
u8* NearStart;
u8* FarStart;
void* PatchedStoreFuncs[2][2][3][16];
void* PatchedLoadFuncs[2][2][3][2][16];
std::unordered_map<u8*, LoadStorePatch> LoadStorePatches;
u8* ResetStart;
u32 CodeMemSize;
......
This diff is collapsed.
......@@ -608,6 +608,27 @@ void ARMv5::CP15Write(u32 id, u32 val)
ITCMSetting = val;
UpdateITCMSetting();
return;
case 0xF00:
//printf("cache debug index register %08X\n", val);
return;
case 0xF10:
//printf("cache debug instruction tag %08X\n", val);
return;
case 0xF20:
//printf("cache debug data tag %08X\n", val);
return;
case 0xF30:
//printf("cache debug instruction cache %08X\n", val);
return;
case 0xF40:
//printf("cache debug data cache %08X\n", val);
return;
}
if ((id&0xF00)!=0x700)
......
......@@ -40,14 +40,7 @@ char DSiNANDPath[1024];
#ifdef JIT_ENABLED
int JIT_Enable = false;
int JIT_MaxBlockSize = 32;
int JIT_BrancheOptimisations = 2;
int JIT_LiteralOptimisations = true;
#endif
#ifdef JIT_ENABLED
int JIT_Enable = false;
int JIT_MaxBlockSize = 32;
int JIT_BrancheOptimisations = true;
int JIT_BranchOptimisations = 2;
int JIT_LiteralOptimisations = true;
int JIT_FastMemory = true;
#endif
......@@ -66,16 +59,9 @@ ConfigEntry ConfigFile[] =
#ifdef JIT_ENABLED
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BrancheOptimisations, 2, NULL, 0},
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
#endif
#ifdef JIT_ENABLED
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BrancheOptimisations, 1, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BranchOptimisations, 2, NULL, 0},
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
{"JIT_FastMem", 0, &JIT_FastMemory, 1, NULL, 0},
{"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0},
#endif
{"", -1, NULL, 0, NULL, 0}
......
......@@ -54,14 +54,7 @@ extern char DSiNANDPath[1024];
#ifdef JIT_ENABLED
extern int JIT_Enable;
extern int JIT_MaxBlockSize;
extern int JIT_BrancheOptimisations;
extern int JIT_LiteralOptimisations;
#endif
#ifdef JIT_ENABLED
extern int JIT_Enable;
extern int JIT_MaxBlockSize;
extern int JIT_BrancheOptimisations;
extern int JIT_BranchOptimisations;
extern int JIT_LiteralOptimisations;
extern int JIT_FastMemory;
#endif
......
......@@ -26,6 +26,11 @@
#include "NDSCart.h"
#include "Platform.h"
#ifdef JIT_ENABLED
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
#endif
#include "DSi_NDMA.h"
#include "DSi_I2C.h"
#include "DSi_SD.h"
......@@ -34,15 +39,6 @@
#include "tiny-AES-c/aes.hpp"
namespace NDS
{
extern ARMv5* ARM9;
extern ARMv4* ARM7;
}
namespace DSi
{
......@@ -59,9 +55,9 @@ u8 ARM7iBIOS[0x10000];
u32 MBK[2][9];
u8 NWRAM_A[0x40000];
u8 NWRAM_B[0x40000];
u8 NWRAM_C[0x40000];
u8* NWRAM_A;
u8* NWRAM_B;
u8* NWRAM_C;
u8* NWRAMMap_A[2][4];
u8* NWRAMMap_B[3][8];
......@@ -86,6 +82,12 @@ u8 ARM7Init[0x3C00];
bool Init()
{
#ifndef JIT_ENABLED
NWRAM_A = new u8[NWRAMSize];
NWRAM_B = new u8[NWRAMSize];
NWRAM_C = new u8[NWRAMSize];
#endif
if (!DSi_I2C::Init()) return false;
if (!DSi_AES::Init()) return false;
......@@ -106,6 +108,12 @@ bool Init()
void DeInit()
{
#ifndef JIT_ENABLED
delete[] NWRAM_A;
delete[] NWRAM_B;
delete[] NWRAM_C;
#endif
DSi_I2C::DeInit();
DSi_AES::DeInit();
......@@ -176,7 +184,12 @@ void SoftReset()
NDS::ARM9->Reset();
NDS::ARM7->Reset();
NDS::ARM9->CP15Reset();
memcpy(NDS::ARM9->ITCM, ITCMInit, 0x8000);
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidateITCM();
#endif
DSi_AES::Reset();
......@@ -274,9 +287,9 @@ bool LoadNAND()
{
printf("Loading DSi NAND\n");
memset(NWRAM_A, 0, 0x40000);
memset(NWRAM_B, 0, 0x40000);
memset(NWRAM_C, 0, 0x40000);
memset(NWRAM_A, 0, NWRAMSize);
memset(NWRAM_B, 0, NWRAMSize);
memset(NWRAM_C, 0, NWRAMSize);
memset(MBK, 0, sizeof(MBK));
memset(NWRAMMap_A, 0, sizeof(NWRAMMap_A));
......@@ -527,6 +540,8 @@ void MapNWRAM_A(u32 num, u8 val)
return;
}
ARMJIT_Memory::RemapNWRAM(0);
int mbkn = 0, mbks = 8*num;
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
......@@ -558,6 +573,8 @@ void MapNWRAM_B(u32 num, u8 val)
return;
}
ARMJIT_Memory::RemapNWRAM(1);
int mbkn = 1+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
......@@ -593,6 +610,8 @@ void MapNWRAM_C(u32 num, u8 val)
return;
}
ARMJIT_Memory::RemapNWRAM(2);
int mbkn = 3+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
......@@ -625,6 +644,8 @@ void MapNWRAMRange(u32 cpu, u32 num, u32 val)
u32 oldval = MBK[cpu][5+num];
if (oldval == val) return;
ARMJIT_Memory::RemapNWRAM(num);
MBK[cpu][5+num] = val;
// TODO: what happens when the ranges are 'out of range'????
......@@ -826,19 +847,31 @@ void ARM9Write8(u32 addr, u8 val)
if (addr >= NWRAMStart[0][0] && addr < NWRAMEnd[0][0])
{
u8* ptr = NWRAMMap_A[0][(addr >> 16) & NWRAMMask[0][0]];
if (ptr) *(u8*)&ptr[addr & 0xFFFF] = val;
if (ptr)
{
*(u8*)&ptr[addr & 0xFFFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr);
}
return;
}
if (addr >= NWRAMStart[0][1] && addr < NWRAMEnd[0][1])
{
u8* ptr = NWRAMMap_B[0][(addr >> 15) & NWRAMMask[0][1]];
if (ptr) *(u8*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u8*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr);
}
return;
}
if (addr >= NWRAMStart[0][2] && addr < NWRAMEnd[0][2])
{
u8* ptr = NWRAMMap_C[0][(addr >> 15) & NWRAMMask[0][2]];
if (ptr) *(u8*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u8*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr);
}
return;
}
return NDS::ARM9Write8(addr, val);
......@@ -859,19 +892,31 @@ void ARM9Write16(u32 addr, u16 val)
if (addr >= NWRAMStart[0][0] && addr < NWRAMEnd[0][0])
{
u8* ptr = NWRAMMap_A[0][(addr >> 16) & NWRAMMask[0][0]];
if (ptr) *(u16*)&ptr[addr & 0xFFFF] = val;
if (ptr)
{
*(u16*)&ptr[addr & 0xFFFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr);
}
return;
}
if (addr >= NWRAMStart[0][1] && addr < NWRAMEnd[0][1])
{
u8* ptr = NWRAMMap_B[0][(addr >> 15) & NWRAMMask[0][1]];
if (ptr) *(u16*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u16*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr);
}
return;
}
if (addr >= NWRAMStart[0][2] && addr < NWRAMEnd[0][2])
{
u8* ptr = NWRAMMap_C[0][(addr >> 15) & NWRAMMask[0][2]];
if (ptr) *(u16*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u16*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr);
}
return;
}
return NDS::ARM9Write16(addr, val);
......@@ -892,19 +937,31 @@ void ARM9Write32(u32 addr, u32 val)
if (addr >= NWRAMStart[0][0] && addr < NWRAMEnd[0][0])
{
u8* ptr = NWRAMMap_A[0][(addr >> 16) & NWRAMMask[0][0]];
if (ptr) *(u32*)&ptr[addr & 0xFFFF] = val;
if (ptr)
{
*(u32*)&ptr[addr & 0xFFFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr);
}
return;
}
if (addr >= NWRAMStart[0][1] && addr < NWRAMEnd[0][1])
{
u8* ptr = NWRAMMap_B[0][(addr >> 15) & NWRAMMask[0][1]];
if (ptr) *(u32*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u32*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr);