Commit 8c2572e3 authored by StapleButter's avatar StapleButter
Browse files

correct VRAM emulation.

there is room for optimizations esp. in the GPU drawing routines.
parent dadf1eb5
This diff is collapsed.
......@@ -35,18 +35,30 @@ extern u8 VRAMSTAT;
extern u8 Palette[2*1024];
extern u8 OAM[2*1024];
extern u8 VRAM_A[128*1024];
extern u8 VRAM_B[128*1024];
extern u8 VRAM_C[128*1024];
extern u8 VRAM_D[128*1024];
extern u8 VRAM_E[ 64*1024];
extern u8 VRAM_F[ 16*1024];
extern u8 VRAM_G[ 16*1024];
extern u8 VRAM_H[ 32*1024];
extern u8 VRAM_I[ 16*1024];
extern u8* VRAM[9];
extern u8* VRAM_ABG[128];
extern u8* VRAM_AOBJ[128];
extern u8* VRAM_BBG[128];
extern u8* VRAM_BOBJ[128];
extern u8* VRAM_LCD[128];
extern u8* VRAM_ARM7[2];
extern u8* VRAM_ABGExtPal[4];
extern u8* VRAM_AOBJExtPal;
extern u8* VRAM_BBGExtPal[4];
extern u8* VRAM_BOBJExtPal;
extern u32 VRAMMap_LCDC;
extern u32 VRAMMap_ABG[0x20];
extern u32 VRAMMap_AOBJ[0x10];
extern u32 VRAMMap_BBG[0x8];
extern u32 VRAMMap_BOBJ[0x8];
extern u32 VRAMMap_ABGExtPal[4];
extern u32 VRAMMap_AOBJExtPal;
extern u32 VRAMMap_BBGExtPal[4];
extern u32 VRAMMap_BOBJExtPal;
extern u32 VRAMMap_Texture[4];
extern u32 VRAMMap_TexPal[6];
extern u32 VRAMMap_ARM7[2];
extern u32 Framebuffer[256*192*2];
......@@ -65,6 +77,283 @@ void MapVRAM_FG(u32 bank, u8 cnt);
void MapVRAM_H(u32 bank, u8 cnt);
void MapVRAM_I(u32 bank, u8 cnt);
template<typename T>
T ReadVRAM_LCDC(u32 addr)
{
int bank;
switch (addr & 0xFF8FC000)
{
case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000:
case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000:
bank = 0;
addr &= 0x1FFFF;
break;
case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000:
case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000:
bank = 1;
addr &= 0x1FFFF;
break;
case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000:
case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000:
bank = 2;
addr &= 0x1FFFF;
break;
case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000:
case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000:
bank = 3;
addr &= 0x1FFFF;
break;
case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000:
bank = 4;
addr &= 0xFFFF;
break;
case 0x06890000:
bank = 5;
addr &= 0x3FFF;
break;
case 0x06894000:
bank = 6;
addr &= 0x3FFF;
break;
case 0x06898000:
case 0x0689C000:
bank = 7;
addr &= 0x7FFF;
break;
case 0x068A0000:
bank = 8;
addr &= 0x3FFF;
break;
default: return 0;
}
if (VRAMMap_LCDC & (1<<bank)) return *(T*)&VRAM[bank][addr];
return 0;
}
template<typename T>
void WriteVRAM_LCDC(u32 addr, T val)
{
int bank;
switch (addr & 0xFF8FC000)
{
case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000:
case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000:
bank = 0;
addr &= 0x1FFFF;
break;
case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000:
case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000:
bank = 1;
addr &= 0x1FFFF;
break;
case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000:
case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000:
bank = 2;
addr &= 0x1FFFF;
break;
case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000:
case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000:
bank = 3;
addr &= 0x1FFFF;
break;
case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000:
bank = 4;
addr &= 0xFFFF;
break;
case 0x06890000:
bank = 5;
addr &= 0x3FFF;
break;
case 0x06894000:
bank = 6;
addr &= 0x3FFF;
break;
case 0x06898000:
case 0x0689C000:
bank = 7;
addr &= 0x7FFF;
break;
case 0x068A0000:
bank = 8;
addr &= 0x3FFF;
break;
default: return;
}
if (VRAMMap_LCDC & (1<<bank)) *(T*)&VRAM[bank][addr] = val;
}
template<typename T>
T ReadVRAM_ABG(u32 addr)
{
u32 ret = 0;
u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F];
if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF];
if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF];
if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF];
if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF];
if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF];
if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
return ret;
}
template<typename T>
void WriteVRAM_ABG(u32 addr, T val)
{
u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F];
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
if (mask & (1<<5)) *(T*)&VRAM_F[addr & 0x3FFF] = val;
if (mask & (1<<6)) *(T*)&VRAM_G[addr & 0x3FFF] = val;
}
template<typename T>
T ReadVRAM_AOBJ(u32 addr)
{
u32 ret = 0;
u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF];
if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF];
if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF];
if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
return ret;
}
template<typename T>
void WriteVRAM_AOBJ(u32 addr, T val)
{
u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
if (mask & (1<<5)) *(T*)&VRAM_F[addr & 0x3FFF] = val;
if (mask & (1<<6)) *(T*)&VRAM_G[addr & 0x3FFF] = val;
}
template<typename T>
T ReadVRAM_BBG(u32 addr)
{
u32 ret = 0;
u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF];
if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF];
if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF];
return ret;
}
template<typename T>
void WriteVRAM_BBG(u32 addr, T val)
{
u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
}
template<typename T>
T ReadVRAM_BOBJ(u32 addr)
{
u32 ret = 0;
u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF];
if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF];
return ret;
}
template<typename T>
void WriteVRAM_BOBJ(u32 addr, T val)
{
u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
}
template<typename T>
T ReadVRAM_ARM7(u32 addr)
{
u32 ret = 0;
u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1];
if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF];
if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF];
return ret;
}
template<typename T>
void WriteVRAM_ARM7(u32 addr, T val)
{
u32 mask = VRAMMap_BOBJ[(addr >> 17) & 0x1];
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
}
template<typename T>
T ReadVRAM_BG(u32 addr)
{
if ((addr & 0xFFE00000) == 0x06000000)
return ReadVRAM_ABG<T>(addr);
else
return ReadVRAM_BBG<T>(addr);
}
template<typename T>
T ReadVRAM_OBJ(u32 addr)
{
if ((addr & 0xFFE00000) == 0x06400000)
return ReadVRAM_AOBJ<T>(addr);
else
return ReadVRAM_BOBJ<T>(addr);
}
void DisplaySwap(u32 val);
void StartFrame();
......
This diff is collapsed.
......@@ -39,6 +39,12 @@ public:
void DrawScanline(u32 line);
void VBlank();
void BGExtPalDirty(u32 base);
void OBJExtPalDirty();
u16* GetBGExtPal(u32 slot, u32 pal);
u16* GetOBJExtPal(u32 pal);
private:
u32 Num;
u32* Framebuffer;
......@@ -58,6 +64,11 @@ private:
u32 BlendFunc;
u16 BGExtPalCache[4][16*256];
u16 OBJExtPalCache[16*256];
u32 BGExtPalStatus[4];
u32 OBJExtPalStatus;
template<u32 bgmode> void DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst);
void DrawScanline_Mode1(u32 line, u32* dst);
......
......@@ -72,6 +72,7 @@ void RenderPixel(u32 attr, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb)
pixel[3] = 31; // TODO: alpha
// TODO: optional update for translucent pixels
if (z > 0xFFFFFF) z = 0xFFFFFF;
*depth = z;
}
......@@ -128,9 +129,12 @@ void RenderPolygon(Polygon* polygon)
vtx->FinalPosition[2] = posZ;
vtx->FinalPosition[3] = posW;
vtx->FinalColor[0] = vtx->Color[0] ? (((vtx->Color[0] >> 12) << 4) + 0xF) : 0;
vtx->FinalColor[1] = vtx->Color[1] ? (((vtx->Color[1] >> 12) << 4) + 0xF) : 0;
vtx->FinalColor[2] = vtx->Color[2] ? (((vtx->Color[2] >> 12) << 4) + 0xF) : 0;
vtx->FinalColor[0] = vtx->Color[0] >> 12;
if (vtx->FinalColor[0]) vtx->FinalColor[0] = ((vtx->FinalColor[0] << 4) + 0xF);
vtx->FinalColor[1] = vtx->Color[1] >> 12;
if (vtx->FinalColor[1]) vtx->FinalColor[1] = ((vtx->FinalColor[1] << 4) + 0xF);
vtx->FinalColor[2] = vtx->Color[2] >> 12;
if (vtx->FinalColor[2]) vtx->FinalColor[2] = ((vtx->FinalColor[2] << 4) + 0xF);
vtx->ViewportTransformDone = true;
}
......@@ -238,19 +242,24 @@ void RenderPolygon(Polygon* polygon)
s32 lfactor, rfactor;
// TODO: work out the actual division bias there. 0x400 was found to make things look good.
// but actually, it isn't right. so what's going on there?
// seems vertical slopes are interpolated starting from the bottom and not the top. maybe.
// also seems lfactor/rfactor are rounded
if (vlnext->FinalPosition[1] == vlcur->FinalPosition[1])
lfactor = 0;
else
lfactor = ((y - vlcur->FinalPosition[1]) << 12) / (vlnext->FinalPosition[1] - vlcur->FinalPosition[1]);
lfactor = (((y - vlcur->FinalPosition[1]) << 12) + 0x00) / (vlnext->FinalPosition[1] - vlcur->FinalPosition[1]);
if (vrnext->FinalPosition[1] == vrcur->FinalPosition[1])
rfactor = 0;
else
rfactor = ((y - vrcur->FinalPosition[1]) << 12) / (vrnext->FinalPosition[1] - vrcur->FinalPosition[1]);
rfactor = (((y - vrcur->FinalPosition[1]) << 12) + 0x00) / (vrnext->FinalPosition[1] - vrcur->FinalPosition[1]);
s32 xl = vlcur->FinalPosition[0] + (((vlnext->FinalPosition[0] - vlcur->FinalPosition[0]) * lfactor) >> 12);
s32 xr = vrcur->FinalPosition[0] + (((vrnext->FinalPosition[0] - vrcur->FinalPosition[0]) * rfactor) >> 12);
//printf("y:%d xl:%d xr:%d %08X\n", y, xl, xr, rfactor); // y: 48 143
if (xl > xr) // TODO: handle it in a more elegant way
{
Vertex* vtmp;
......@@ -270,7 +279,7 @@ void RenderPolygon(Polygon* polygon)
continue; // hax
}
s32 zl = vlcur->FinalPosition[2] + (((s64)(vlnext->FinalPosition[2] -vlcur->FinalPosition[2]) * lfactor) >> 12);
s32 zl = vlcur->FinalPosition[2] + (((s64)(vlnext->FinalPosition[2] - vlcur->FinalPosition[2]) * lfactor) >> 12);
s32 zr = vrcur->FinalPosition[2] + (((s64)(vrnext->FinalPosition[2] - vrcur->FinalPosition[2]) * rfactor) >> 12);
s32 wl = vlcur->FinalPosition[3] + (((s64)(vlnext->FinalPosition[3] - vlcur->FinalPosition[3]) * lfactor) >> 12);
......@@ -303,8 +312,11 @@ void RenderPolygon(Polygon* polygon)
if (xr == xl) xr++;
s32 xdiv = 0x1000 / (xr - xl);
//printf("y%d: %d->%d %08X %08X\n", y, xl, xr, lfactor, rfactor);
for (s32 x = xl; x < xr; x++)
{
//s32 xfactor = ((x - xl) << 12) / (xr - xl);
s32 xfactor = (x - xl) * xdiv;
s32 z = zl + (((s64)(zr - zl) * xfactor) >> 12);
......@@ -327,12 +339,6 @@ void RenderPolygon(Polygon* polygon)
u32 vb = ((perspfactor1 * bl) + (perspfactor2 * br)) / (perspfactor1 + perspfactor2);
RenderPixel(polygon->Attr, x, y, z, vr>>3, vg>>3, vb>>3);
// Z debug
/*u8 zerp = (w * 63) / 0xFFFFFF;
pixel[0] = zerp;
pixel[1] = zerp;
pixel[2] = zerp;*/
}
}
......
......@@ -311,7 +311,8 @@ void Reset()
// test
//LoadROM();
//LoadFirmware();
if (NDSCart::LoadROM("rom/sm64ds.nds"))
// a_interp2.nds a_rounding (10) (11)
if (NDSCart::LoadROM("rom/nsmb.nds"))
Running = true; // hax
}
......@@ -803,18 +804,14 @@ u8 ARM9Read8(u32 addr)
case 0x06000000:
{
u32 chunk = (addr >> 14) & 0x7F;
u8* vram = NULL;
switch (addr & 0x00E00000)
{
case 0x00000000: vram = GPU::VRAM_ABG[chunk]; break;
case 0x00200000: vram = GPU::VRAM_BBG[chunk]; break;
case 0x00400000: vram = GPU::VRAM_AOBJ[chunk]; break;
case 0x00600000: vram = GPU::VRAM_BOBJ[chunk]; break;
case 0x00800000: vram = GPU::VRAM_LCD[chunk]; break;
case 0x00000000: return GPU::ReadVRAM_ABG<u8>(addr);
case 0x00200000: return GPU::ReadVRAM_BBG<u8>(addr);
case 0x00400000: return GPU::ReadVRAM_AOBJ<u8>(addr);
case 0x00600000: return GPU::ReadVRAM_BOBJ<u8>(addr);
default: return GPU::ReadVRAM_LCDC<u8>(addr);
}
if (vram)
return *(u8*)&vram[addr & 0x3FFF];
}
return 0;
......@@ -854,18 +851,14 @@ u16 ARM9Read16(u32 addr)
case 0x06000000:
{
u32 chunk = (addr >> 14) & 0x7F;
u8* vram = NULL;
switch (addr & 0x00E00000)
{
case 0x00000000: vram = GPU::VRAM_ABG[chunk]; break;
case 0x00200000: vram = GPU::VRAM_BBG[chunk]; break;
case 0x00400000: vram = GPU::VRAM_AOBJ[chunk]; break;
case 0x00600000: vram = GPU::VRAM_BOBJ[chunk]; break;
case 0x00800000: vram = GPU::VRAM_LCD[chunk]; break;
case 0x00000000: return GPU::ReadVRAM_ABG<u16>(addr);
case 0x00200000: return GPU::ReadVRAM_BBG<u16>(addr);
case 0x00400000: return GPU::ReadVRAM_AOBJ<u16>(addr);
case 0x00600000: return GPU::ReadVRAM_BOBJ<u16>(addr);
default: return GPU::ReadVRAM_LCDC<u16>(addr);
}
if (vram)
return *(u16*)&vram[addr & 0x3FFF];
}
return 0;
......@@ -905,18 +898,14 @@ u32 ARM9Read32(u32 addr)
case 0x06000000:
{
u32 chunk = (addr >> 14) & 0x7F;
u8* vram = NULL;
switch (addr & 0x00E00000)
{
case 0x00000000: vram = GPU::VRAM_ABG[chunk]; break;
case 0x00200000: vram = GPU::VRAM_BBG[chunk]; break;
case 0x00400000: vram = GPU::VRAM_AOBJ[chunk]; break;
case 0x00600000: vram = GPU::VRAM_BOBJ[chunk]; break;
case 0x00800000: vram = GPU::VRAM_LCD[chunk]; break;
case 0x00000000: return GPU::ReadVRAM_ABG<u32>(addr);
case 0x00200000: return GPU::ReadVRAM_BBG<u32>(addr);
case 0x00400000: return GPU::ReadVRAM_AOBJ<u32>(addr);
case 0x00600000: return GPU::ReadVRAM_BOBJ<u32>(addr);
default: return GPU::ReadVRAM_LCDC<u32>(addr);
}
if (vram)
return *(u32*)&vram[addr & 0x3FFF];
}
return 0;
......@@ -978,19 +967,13 @@ void ARM9Write16(u32 addr, u16 val)
return;
case 0x06000000:
switch (addr & 0x00E00000)
{
u32 chunk = (addr >> 14) & 0x7F;
u8* vram = NULL;
switch (addr & 0x00E00000)
{
case 0x00000000: vram = GPU::VRAM_ABG[chunk]; break;
case 0x00200000: vram = GPU::VRAM_BBG[chunk]; break;
case 0x00400000: vram = GPU::VRAM_AOBJ[chunk]; break;
case 0x00600000: vram = GPU::VRAM_BOBJ[chunk]; break;
case 0x00800000: vram = GPU::VRAM_LCD[chunk]; break;
}
if (vram)
*(u16*)&vram[addr & 0x3FFF] = val;
case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); break;
case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); break;
case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); break;
case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); break;
default: GPU::WriteVRAM_LCDC<u16>(addr, val); break;
}
return;
......@@ -1023,19 +1006,13 @@ void ARM9Write32(u32 addr, u32 val)
return;
case 0x06000000:
switch (addr & 0x00E00000)
{
u32 chunk = (addr >> 14) & 0x7F;
u8* vram = NULL;
switch (addr & 0x00E00000)
{
case 0x00000000: vram = GPU::VRAM_ABG[chunk]; break;
case 0x00200000: vram = GPU::VRAM_BBG[chunk]; break;
case 0x00400000: vram = GPU::VRAM_AOBJ[chunk]; break;
case 0x00600000: vram = GPU::VRAM_BOBJ[chunk]; break;
case 0x00800000: vram = GPU::VRAM_LCD[chunk]; break;
}
if (vram)
*(u32*)&vram[addr & 0x3FFF] = val;
case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); break;
case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); break;
case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); break;
case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); break;
default: GPU::WriteVRAM_LCDC<u32>(addr, val); break;
}
return;
......@@ -1079,13 +1056,7 @@ u8 ARM7Read8(u32 addr)
case 0x06000000:
case 0x06800000:
{
u32 chunk = (addr >> 17) & 0x1;
u8* vram = GPU::VRAM_ARM7[chunk];
if (vram)
return *(u8*)&vram[addr & 0x1FFFF];
}
return 0;
return GPU::ReadVRAM_ARM7<u8>(addr);
}
printf("unknown arm7 read8 %08X %08X %08X/%08X\n", addr, ARM7->R[15], ARM7->R[0], ARM7->R[1]);
......@@ -1125,13 +1096,7 @@ u16 ARM7Read16(u32 addr)
case 0x06000000:
case 0x06800000:
{
u32 chunk = (addr >> 17) & 0x1;
u8* vram = GPU::VRAM_ARM7[chunk];
if (vram)
return *(u16*)&vram[addr & 0x1FFFF];
}
return 0;
return GPU::ReadVRAM_ARM7<u16>(addr);
}
printf("unknown arm7 read16 %08X %08X\n", addr, ARM7->R[15]);
......@@ -1168,13 +1133,7 @@ u32 ARM7Read32(u32 addr)
case 0x06000000:
case 0x06800000:
{
u32 chunk = (addr >> 17) & 0x1;
u8* vram = GPU::VRAM_ARM7[chunk];
if (vram)