Commit 0a6b8aa2 authored by Flyinghead's avatar Flyinghead
Browse files

rec-arm64: simplify read/write mem rewrite logi

pass exception pc in x2 reg for both slow/fast paths
infer the op type and size from the armv8 op encoding
rewrite doesn't need access to the block anymore
add support for some imm args

ssa: swap setpeq args in ssa if first is imm
parent 33bb26f1
......@@ -192,7 +192,7 @@ private:
}
else if (op.op == shop_and || op.op == shop_or || op.op == shop_xor || op.op == shop_add || op.op == shop_mul_s16 || op.op == shop_mul_u16
|| op.op == shop_mul_i32 || op.op == shop_test || op.op == shop_seteq || op.op == shop_fseteq || op.op == shop_fadd || op.op == shop_fmul
|| op.op == shop_mul_u64 || op.op == shop_mul_s64 || op.op == shop_adc)
|| op.op == shop_mul_u64 || op.op == shop_mul_s64 || op.op == shop_adc || op.op == shop_setpeq)
{
if (op.rs1.is_imm() && op.rs2.is_reg())
{
......
......@@ -154,16 +154,13 @@ void ngen_ResetBlocks_arm64()
}
template<typename T>
static T ReadMemNoEx(u32 addr, u32 pc)
static T ReadMemNoEx(u32 addr, u32, u32 pc)
{
#ifndef NO_MMU
u32 ex;
T rv = mmu_ReadMemNoEx<T>(addr, &ex);
if (ex)
{
if (pc & 1)
spc = pc - 1;
else
spc = pc;
longjmp(jmp_env, 1);
}
......@@ -180,9 +177,6 @@ static void WriteMemNoEx(u32 addr, T data, u32 pc)
u32 ex = mmu_WriteMemNoEx<T>(addr, data);
if (ex)
{
if (pc & 1)
spc = pc - 1;
else
spc = pc;
longjmp(jmp_env, 1);
}
......@@ -833,11 +827,21 @@ public:
case shop_mul_u64:
case shop_mul_s64:
{
Register reg2;
if (op.rs2.is_imm())
{
Mov(w0, op.rs2.imm_value());
reg2 = w0;
}
else
{
reg2 = regalloc.MapRegister(op.rs2);
}
const Register& rd_xreg = Register::GetXRegFromCode(regalloc.MapRegister(op.rd).GetCode());
if (op.op == shop_mul_u64)
Umull(rd_xreg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
Umull(rd_xreg, regalloc.MapRegister(op.rs1), reg2);
else
Smull(rd_xreg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
Smull(rd_xreg, regalloc.MapRegister(op.rs1), reg2);
const Register& rd2_xreg = Register::GetXRegFromCode(regalloc.MapRegister(op.rd2).GetCode());
Lsr(rd2_xreg, rd_xreg, 32);
}
......@@ -948,7 +952,10 @@ public:
case shop_fsca:
Mov(x1, reinterpret_cast<uintptr_t>(&sin_table));
Add(x1, x1, Operand(regalloc.MapRegister(op.rs1), UXTH, 3));
if (op.rs1.is_reg())
Add(x1, x1, Operand(regalloc.MapRegister(op.rs1), UXTH, 3));
else
Add(x1, x1, Operand(op.rs1.imm_value() << 3));
#ifdef EXPLODE_SPANS
Ldr(regalloc.MapVRegister(op.rd, 0), MemOperand(x1, 4, PostIndex));
Ldr(regalloc.MapVRegister(op.rd, 1), MemOperand(x1));
......@@ -1110,13 +1117,9 @@ public:
return MemOperand(x28, offset);
}
void GenReadMemorySlow(const shil_opcode& op)
void GenReadMemorySlow(u32 size)
{
Instruction *start_instruction = GetCursorAddress<Instruction *>();
u32 size = op.flags & 0x7f;
if (mmu_enabled())
Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
switch (size)
{
......@@ -1154,56 +1157,39 @@ public:
die("1..8 bytes");
break;
}
if (size != 8)
host_reg_to_shil_param(op.rd, w0);
else
{
#ifdef EXPLODE_SPANS
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
Fmov(regalloc.MapVRegister(op.rd, 0), w0);
Lsr(x0, x0, 32);
Fmov(regalloc.MapVRegister(op.rd, 1), w0);
#else
host_reg_to_shil_param(op.rd, x0);
#endif
}
EnsureCodeSize(start_instruction, read_memory_rewrite_size);
}
void GenWriteMemorySlow(const shil_opcode& op)
void GenWriteMemorySlow(u32 size)
{
Instruction *start_instruction = GetCursorAddress<Instruction *>();
if (mmu_enabled())
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
u32 size = op.flags & 0x7f;
switch (size)
{
case 1:
if (!mmu_enabled())
GenCallRuntime(WriteMem8);
GenCallRuntime(WriteMem8);
else
GenCallRuntime(WriteMemNoEx<u8>);
break;
case 2:
if (!mmu_enabled())
GenCallRuntime(WriteMem16);
GenCallRuntime(WriteMem16);
else
GenCallRuntime(WriteMemNoEx<u16>);
break;
case 4:
if (!mmu_enabled())
GenCallRuntime(WriteMem32);
GenCallRuntime(WriteMem32);
else
GenCallRuntime(WriteMemNoEx<u32>);
break;
case 8:
if (!mmu_enabled())
GenCallRuntime(WriteMem64);
GenCallRuntime(WriteMem64);
else
GenCallRuntime(WriteMemNoEx<u64>);
break;
......@@ -1215,17 +1201,6 @@ public:
EnsureCodeSize(start_instruction, write_memory_rewrite_size);
}
void InitializeRewrite(RuntimeBlockInfo *block, size_t opid)
{
this->block = block;
// writem rewrite doesn't use regalloc
if (block->oplist[opid].op == shop_readm)
{
regalloc.DoAlloc(block);
regalloc.SetOpnum(opid);
}
}
u32 RelinkBlock(RuntimeBlockInfo *block)
{
ptrdiff_t start_offset = GetBuffer()->GetCursorOffset();
......@@ -1578,11 +1553,26 @@ private:
return;
GenMemAddr(op, call_regs[0]);
if (mmu_enabled())
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc
if (optimise && GenReadMemoryFast(op, opid))
return;
u32 size = op.flags & 0x7f;
if (!optimise || !GenReadMemoryFast(op, opid))
GenReadMemorySlow(size);
GenReadMemorySlow(op);
if (size < 8)
host_reg_to_shil_param(op.rd, w0);
else
{
#ifdef EXPLODE_SPANS
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
Fmov(regalloc.MapVRegister(op.rd, 0), w0);
Lsr(x0, x0, 32);
Fmov(regalloc.MapVRegister(op.rd, 1), w0);
#else
Str(x0, sh4_context_mem_operand(op.rd.reg_ptr()));
#endif
}
}
bool GenReadMemoryImmediate(const shil_opcode& op)
......@@ -1599,22 +1589,32 @@ private:
return false;
u32 paddr;
u32 rv;
if (size == 2)
switch (size)
{
case 1:
rv = mmu_data_translation<MMU_TT_DREAD, u8>(addr, paddr);
break;
case 2:
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
else if (size == 4)
break;
case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
else
break;
default:
die("Invalid immediate size");
break;
}
if (rv != MMU_ERROR_NONE)
return false;
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_read_const(addr, isram, size);
void* ptr = _vmem_read_const(addr, isram, size > 4 ? 4 : size);
if (isram)
{
Ldr(x1, reinterpret_cast<uintptr_t>(ptr));
Ldr(x1, reinterpret_cast<uintptr_t>(ptr)); // faster than Mov
if (regalloc.IsAllocAny(op.rd))
{
switch (size)
......@@ -1672,36 +1672,51 @@ private:
else
{
// Not RAM
Mov(w0, addr);
switch(size)
if (size == 8)
{
case 1:
GenCallRuntime((void (*)())ptr);
Sxtb(w0, w0);
break;
case 2:
verify(!regalloc.IsAllocAny(op.rd));
// Need to call the handler twice
Mov(w0, addr);
GenCallRuntime((void (*)())ptr);
Sxth(w0, w0);
break;
Str(w0, sh4_context_mem_operand(op.rd.reg_ptr()));
case 4:
Mov(w0, addr + 4);
GenCallRuntime((void (*)())ptr);
break;
case 8:
die("SZ_64F not supported");
break;
Str(w0, sh4_context_mem_operand((u8*)op.rd.reg_ptr() + 4));
}
if (regalloc.IsAllocg(op.rd))
Mov(regalloc.MapRegister(op.rd), w0);
else
{
verify(regalloc.IsAllocf(op.rd));
Fmov(regalloc.MapVRegister(op.rd), w0);
}
Mov(w0, addr);
switch(size)
{
case 1:
GenCallRuntime((void (*)())ptr);
Sxtb(w0, w0);
break;
case 2:
GenCallRuntime((void (*)())ptr);
Sxth(w0, w0);
break;
case 4:
GenCallRuntime((void (*)())ptr);
break;
default:
die("Invalid size");
break;
}
if (regalloc.IsAllocg(op.rd))
Mov(regalloc.MapRegister(op.rd), w0);
else
{
verify(regalloc.IsAllocf(op.rd));
Fmov(regalloc.MapVRegister(op.rd), w0);
}
}
}
return true;
......@@ -1715,7 +1730,7 @@ private:
Instruction *start_instruction = GetCursorAddress<Instruction *>();
// WARNING: the rewrite code relies on having 1-2 ops before the memory access (3 when mmu is enabled)
// WARNING: the rewrite code relies on having 1 or 2 ops before the memory access
// Update ngen_Rewrite (and perhaps read_memory_rewrite_size) if adding or removing code
if (!_nvmem_4gb_space())
{
......@@ -1725,79 +1740,27 @@ private:
else
{
Add(x1, *call_regs64[0], sizeof(Sh4Context), LeaveFlags);
if (mmu_enabled())
{
u32 exception_pc = block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0);
Mov(w27, exception_pc & 0xFFFF);
Movk(w27, exception_pc >> 16, 16);
}
}
//printf("direct read memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
u32 size = op.flags & 0x7f;
if (regalloc.IsAllocAny(op.rd))
{
switch(size)
{
case 1:
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
Ldrsb(w0, MemOperand(x28, x1));
break;
case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
Ldrsh(w0, MemOperand(x28, x1));
break;
case 4:
if (!op.rd.is_r32f())
Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
else
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1));
Ldr(w0, MemOperand(x28, x1));
break;
case 8:
Ldr(x1, MemOperand(x28, x1));
Ldr(x0, MemOperand(x28, x1));
break;
}
if (size == 8)
{
#ifdef EXPLODE_SPANS
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
Fmov(regalloc.MapVRegister(op.rd, 0), w1);
Lsr(x1, x1, 32);
Fmov(regalloc.MapVRegister(op.rd, 1), w1);
#else
die("GenReadMemoryFast: size == 8 and !explode_spans");
#endif
}
}
else
{
switch(size)
{
case 1:
Ldrsb(w1, MemOperand(x28, x1));
break;
case 2:
Ldrsh(w1, MemOperand(x28, x1));
break;
case 4:
Ldr(w1, MemOperand(x28, x1));
break;
case 8:
Ldr(x1, MemOperand(x28, x1));
break;
}
if (size == 8)
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
else
Str(w1, sh4_context_mem_operand(op.rd.reg_ptr()));
}
EnsureCodeSize(start_instruction, read_memory_rewrite_size);
return true;
......@@ -1809,6 +1772,8 @@ private:
return;
GenMemAddr(op, call_regs[0]);
if (mmu_enabled())
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc
u32 size = op.flags & 0x7f;
if (size != 8)
......@@ -1828,7 +1793,7 @@ private:
if (optimise && GenWriteMemoryFast(op, opid))
return;
GenWriteMemorySlow(op);
GenWriteMemorySlow(size);
}
bool GenWriteMemoryImmediate(const shil_opcode& op)
......@@ -1840,7 +1805,7 @@ private:
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
if ((addr >> 12) != (block->vaddr >> 12))
if ((addr >> 12) != (block->vaddr >> 12) && ((addr >> 12) != ((block->vaddr + block->guest_opcodes * 2 - 1) >> 12)))
// When full mmu is on, only consider addresses in the same 4k page
return false;
u32 paddr;
......@@ -1857,13 +1822,16 @@ private:
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr);
break;
default:
die("Invalid immediate size");
break;
}
if (rv != MMU_ERROR_NONE)
return false;
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_write_const(addr, isram, size);
void* ptr = _vmem_write_const(addr, isram, size > 4 ? 4 : size);
Register reg2;
if (op.rs2.is_imm())
......@@ -1896,10 +1864,7 @@ private:
break;
case 4:
if (op.rs2.is_r32f())
Str(reg2, MemOperand(x1));
else
Str(reg2, MemOperand(x1));
Str(reg2, MemOperand(x1));
break;
default:
......@@ -1944,7 +1909,7 @@ private:
Instruction *start_instruction = GetCursorAddress<Instruction *>();
// WARNING: the rewrite code relies on having 1-2 ops before the memory access (3 when mmu is enabled)
// WARNING: the rewrite code relies on having 1 or 2 ops before the memory access
// Update ngen_Rewrite (and perhaps write_memory_rewrite_size) if adding or removing code
if (!_nvmem_4gb_space())
{
......@@ -1954,17 +1919,8 @@ private:
else
{
Add(x7, *call_regs64[0], sizeof(Sh4Context), LeaveFlags);
if (mmu_enabled())
{
u32 exception_pc = block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0);
Mov(w27, exception_pc & 0xFFFF);
Movk(w27, exception_pc >> 16, 16);
}
}
//printf("direct write memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
u32 size = op.flags & 0x7f;
switch(size)
{
......@@ -2137,9 +2093,8 @@ private:
std::vector<const VRegister*> call_fregs;
Arm64RegAlloc regalloc;
RuntimeBlockInfo* block = NULL;
const int read_memory_rewrite_size = 5; // worst case for u64/mmu: add, mov, movk, ldr, str
// FIXME rewrite size per read/write size?
const int write_memory_rewrite_size = 4; // TODO only 2 if !mmu & 4gb
const int read_memory_rewrite_size = 3; // ubfx, add, ldr
const int write_memory_rewrite_size = 3; // ubfx, add, str
};
static Arm64Assembler* compiler;
......@@ -2176,34 +2131,66 @@ void ngen_CC_Finish_arm64(shil_opcode* op)
}
#define STR_LDR_MASK 0xFFE0EC00
static const u32 armv8_mem_ops[] = {
0x38E06800, // Ldrsb
0x78E06800, // Ldrsh
0xB8606800, // Ldr w
0xF8606800, // Ldr x
0x38206800, // Strb
0x78206800, // Strh
0xB8206800, // Str w
0xF8206800, // Str x
};
static const bool read_ops[] = {
true,
true,
true,
true,
false,
false,
false,
false,
};
static const u32 op_sizes[] = {
1,
2,
4,
8,
1,
2,
4,
8,
};
bool ngen_Rewrite(unat& host_pc, unat, unat)
{
//printf("ngen_Rewrite pc %p\n", host_pc);
void *host_pc_rw = (void*)CC_RX2RW(host_pc);
RuntimeBlockInfo *block = bm_GetBlock2((void*)host_pc);
if (block == NULL)
//LOGI("ngen_Rewrite pc %zx\n", host_pc);
u32 *code_ptr = (u32 *)CC_RX2RW(host_pc);
u32 armv8_op = *code_ptr;
bool is_read;
u32 size;
bool found = false;
u32 masked = armv8_op & STR_LDR_MASK;
for (int i = 0; i < ARRAY_SIZE(armv8_mem_ops); i++)
{
printf("ngen_Rewrite: Block at %p not found\n", (void *)host_pc);
return false;
}
u32 *code_ptr = (u32*)host_pc_rw;
auto it = block->memory_accesses.find(code_ptr);
if (it == block->memory_accesses.end())
if (masked == armv8_mem_ops[i])
{
printf("ngen_Rewrite: memory access at %p not found (%lu entries)\n", code_ptr, block->memory_accesses.size());
return false;
size = op_sizes[i];
is_read = read_ops[i];
found = true;
break;
}
}
u32 opid = it->second;
verify(opid < block->oplist.size());
const shil_opcode& op = block->oplist[opid];
// Skip the preceding ops (add, bic, ...)
u32 *code_rewrite = code_ptr - 1 - (!_nvmem_4gb_space() ? 1 : 0) - (mmu_enabled() ? 2 : 0);
verify(found);
// Skip the preceding ops (add, ubfx)
u32 *code_rewrite = code_ptr - 1 - (!_nvmem_4gb_space() ? 1 : 0);
Arm64Assembler *assembler = new Arm64Assembler(code_rewrite);
assembler->InitializeRewrite(block, opid);
if (op.op == shop_readm)
assembler->GenReadMemorySlow(op);
if (is_read)
assembler->GenReadMemorySlow(size);
else
assembler->GenWriteMemorySlow(op);
assembler->GenWriteMemorySlow(size);
assembler->Finalize(true);
delete assembler;
host_pc = (unat)CC_RW2RX(code_rewrite);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment