From 9f0975a0fd9dc1315339b24b5a19a4621a68ab0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Czekan=CC=81ski?= Date: Sat, 9 May 2020 14:24:33 +0200 Subject: [PATCH] clut-cache test --- Makefile | 1 + README.md | 1 + gpu/clut-cache/Makefile | 3 + gpu/clut-cache/main.c | 187 ++++++++++++++++++++++++++++++++++++++++ gpu/clut-cache/vram.png | Bin 0 -> 2715 bytes 5 files changed, 192 insertions(+) create mode 100644 gpu/clut-cache/Makefile create mode 100644 gpu/clut-cache/main.c create mode 100644 gpu/clut-cache/vram.png diff --git a/Makefile b/Makefile index bc13a6b..e60a8ff 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ IMAGES = common \ gpu/bandwidth \ gpu/benchmark \ gpu/clipping \ + gpu/clut-cache \ gpu/gp0-e1 \ gpu/lines \ gpu/mask-bit \ diff --git a/README.md b/README.md index 805fe0b..9db3eff 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Name | Description -------------------------|------------ bandwidth | Measure GPU/VRAM bandwidth benchmark | GPU test to benchmark rasterizer for various commands +clut-cache | CLUT (Palette) cache behavior test clipping | Test Draw Area/Clipping GP0(0xE3), GP0(0xE4) using rectangle and quad gp0-e1 | Check if GP0_E1, GPUSTAT and polygon render uses the same register internally lines | Draws lines using different modes - for verifying Bresenham implementation, color blending, polyline handling diff --git a/gpu/clut-cache/Makefile b/gpu/clut-cache/Makefile new file mode 100644 index 0000000..5e386fa --- /dev/null +++ b/gpu/clut-cache/Makefile @@ -0,0 +1,3 @@ +TARGET = clut-cache.elf + +include ../../common-test.mk diff --git a/gpu/clut-cache/main.c b/gpu/clut-cache/main.c new file mode 100644 index 0000000..1125a0b --- /dev/null +++ b/gpu/clut-cache/main.c @@ -0,0 +1,187 @@ +#include + +void setE1(int texPageX, int texPageY, int transparencyMode, int dithering) { + DR_TPAGE e; + unsigned short texpage = getTPage(/* 8bit */ 1, transparencyMode, texPageX, texPageY); + setDrawTPage(&e, /* Drawing to display area */ 1, dithering, texpage); + DrawPrim(&e); +} + +void rectangle(int x, int y, int u, int v, int clutx, int cluty) { + SPRT s; + setSprt(&s); + setSemiTrans(&s, false); + setXY0(&s, x, y); + setWH(&s, 256, 1); + setRGB0(&s, 0x80, 0x80, 0x80); + setClut(&s, clutx, cluty); + setUV0(&s, u, v); + + DrawPrim(&s); +} + +void uploadToGPU(uint16_t* buffer, uint16_t x, uint16_t y, int words) { + DrawSync(0); + CPU2VRAM buf = {0}; + setcode(&buf, 0xA0); // CPU -> VRAM + setlen(&buf, 3); + setXY0(&buf, x, y); + setWH(&buf, words, 1); + DrawPrim(&buf); + + volatile uint32_t* GP0 = (uint32_t*)0x1F801810; + for (int n = 0; n < words; n += 2) { + uint16_t pixel1 = buffer[n]; + uint16_t pixel2 = buffer[n+1]; + *GP0 = pixel1 | (pixel2<<16); + } +} + +void line(int sx, int sy, int ex, int ey, int r, int g, int b) { + LINE_F2 l; + setLineF2(&l); + setRGB0(&l, r, g, b); + + l.x0 = sx; + l.y0 = sy; + l.x1 = ex; + l.y1 = ey; + + DrawPrim(&l); +} + +void writeTestClut(int x, int y) { + uint16_t buffer[256]; + for (int n = 0; n < 256; n++) { + buffer[n] = n; + } + uploadToGPU(buffer, x, y, 256); +} + +void gpuClearCache() { + volatile uint32_t* GP0 = (uint32_t*)0x1F801810; + *GP0 = 0x01000000; +} + +void writeTextureLinear(int x, int y) { + uint16_t buffer[256]; + for (int i = 0, ptr = 0; i<256; i+=2) { + uint16_t p1 = i; + uint16_t p2 = (i+1); + buffer[ptr++] = p1 | (p2 << 8); + } + uploadToGPU(buffer, x, y, 128); +} + +void writeTextureLinearReversed(int x, int y) { + uint16_t buffer[256]; + for (int i = 0, ptr = 0; i<256; i+=2) { + uint16_t p1 = 255 - i; + uint16_t p2 = 255 - (i+1); + buffer[ptr++] = p1 | (p2 << 8); + } + uploadToGPU(buffer, x, y, 128); +} + +void writeTextureRandom(int x, int y) { + uint16_t buffer[256]; + for (int i = 0, ptr = 0; i<256; i+=2) { + uint16_t p1 = i * (i<<3); + uint16_t p2 = (i+1) * ((i+1)<<3); + buffer[ptr++] = p1 | (p2 << 8); + } + uploadToGPU(buffer, x, y, 128); +} + +void testClutCacheReuseNoClear(int y) { + writeTestClut(0, y); + + // Write textured rectangle + rectangle(0, y+4, 0, 1, 0, y); + + // Overwrite CLUT in VRAM without telling GPU about it + fillRect(0, y, 256, 1, 0xff, 0xff, 0xff); + + // Write textured rectangle again (cached CLUT should be used) + rectangle(0, y+8, 0, 1, 0, y); +} + +void testClutCacheReuseClear(int y) { + writeTestClut(0, y); + + rectangle(0, y+4, 0, 1, 0, y); + + // Overwrite CLUT in VRAM, but issue the clear cache command + line(0, y, 256, y, 0xff, 0xff, 0xff); + gpuClearCache(); + + // Write textured rectangle again (cached CLUT should be used) + rectangle(0, y+8, 0, 1, 0, y); +} + +void testClutCacheInvalidatedDifferentClut(int y) { + writeTestClut(0, y); + + rectangle(0, y+4, 0, 1, 0, y); + + fillRect(0, y, 256, 1, 0xff, 0xff, 0xff); + + // Write textured rectangle again (CLUT$ should be invalidated due to different clutx used) + rectangle(0, y+8, 0, 1, 16, y); +} + +int main() { + initVideo(320, 240); + printf("\ngpu/clut-cache\n"); + printf("GPU caches the palette/CLUT before rendering 4/8bit textured primitives.\n"); + printf("This test check this by rendering textured rectangle over currently used CLUT.\n"); + printf("3 last tests check if Clear Cache or using different CLUT position does invalidate the CLUT$.\n\n"); + + clearScreen(); + DrawSync(0); + setE1(0, 0, 0, 0); + + // 0 - test pattern + int y = 32; + writeTestClut(0, y); + gpuClearCache(); + + // 1 - override with linear palette + y += 16; + writeTextureLinear(0, 1); + writeTestClut(0, y); + gpuClearCache(); + rectangle(0, y, 0, 1, 0, y); + + // 2 - override with inverted linear palette + y += 16; + writeTextureLinearReversed(0, 2); + writeTestClut(0, y); + gpuClearCache(); + rectangle(0, y, 0, 2, 0, y); + + // 3 - override with random palette + y += 16; + writeTextureRandom(0, 3); + writeTestClut(0, y); + gpuClearCache(); + rectangle(0, y, 0, 3, 0, y); + + + y += 32; + testClutCacheReuseNoClear(y); + + y += 32; + testClutCacheReuseClear(y); + + y += 32; + testClutCacheInvalidatedDifferentClut(y); + + DrawSync(0); + printf("Done\n"); + + for (;;) { + VSync(0); + } + return 0; +} diff --git a/gpu/clut-cache/vram.png b/gpu/clut-cache/vram.png new file mode 100644 index 0000000000000000000000000000000000000000..808ff60aa7c9bf5672d86402828a1bc0410c0940 GIT binary patch literal 2715 zcmeAS@N?(olHy`uVBq!ia0y~yU;#3j7&w@K)a3P@?|>9bx}&cn1H;CC?mvmFK)yn< zN02WALzNl>LqiJ#!!Mvv!wUw6QUeBtR|yOZRx=nF#0%!^3bX-AuqAoByDx`7I;J!Gcd><0%69y3#E2IvALcujv*Dd-rm*Cf9)a7_F(s=rpCrbW@cVqW@ct? zewz&j7YqzG*ee6+3kDlLI2>-&_vpKIH7$N>$>qCo)7_Ze7T@lF&aCDKbV~kX#n(D* zI&GgW*8P7t^I828{ipZRLE=yPw`G6U`&_Wt{=QWHZnOVqZ0@``zx~+lb!Yo$wpZ4j zvDxzd?@xW@&&jveot=H=aPPmG>^*O9-AR|r`|>>caqjhJq0b(=)$cRCSC$)>Cikpz zW@X0v-=E{%o}~gcs6xzTXK1Luq4ab@)rqu;i4#9g_%-LRQDjQJNA0;CXD+0Eo>b-Y za&qn|%hFTN3=an%n|5oHy-NF=`N8ohP4eO4Bq`eXfnhVS&m*@a%YH{VZk{&xJz z^q%KD#b&x+Kj_t;duDmq@UizR%iorfDgP%`o%=E6;e@k>rN8FZ`^?=`d8*(5B^2+d!AapO)37L_&NFawXBRJS<$fn`M;--VS@cNPp@m;OVaMDs?MHOx@oQF$>ix*{zRy#`k(wz{jY4| zl|L^gO^Q9MIWsHh(`xT5zwRYZzD|mnG)ew6P>;H*eR$+_pVdp2eDys0`qM1S7%k1p zC>7K7u|Y-einMNi^E|oCKg;j*k|$ey#3HXRNvW;On?9w-^tJ1@)k~gyoOEAZ^|RLD zN5A@lzfar$x;QH3)#OR>wL$Am_ozkwTlw$u(tmodQA9uFpnn+b#cBHBbNf^G|)R|J^I+CoMinRNYWNz5Uv)z1a=BHJ9Gm4Jrwt zprOCYBgk&6=+9643;tES*XDppFfe>btYcRAU;X?yFG7qW*8f`mwOgyNGkg#6+F8zt zWR(0TTuJ-E;Traa{Il*ziYa0KV3Ui={=Sa!-Lfe~?}d;|vU{cI`SVKHlhgkX{d~N4 zy#hk&K>JTVhX0!XuFE6DMp>gFFd70wJOuWw{m&@p99o|z?ED_+71a{gh?11Vl2ohY zqEsNoU}Ruqple{FYh(~&XklewVP$NoZD43+VBof6O)iRt-29Zxv`X9>%%6%}1O=9- LtDnm{r-UW|Xi#TQ literal 0 HcmV?d00001