fixes: texture unit mem access sometimes going to smem, bilinear texture filtering; new: cache req_id,

This commit is contained in:
Blaise Tine
2021-11-24 00:00:17 -05:00
parent 1501360f4b
commit 18762dffce
70 changed files with 3818 additions and 1727 deletions

View File

@@ -1,167 +1,122 @@
#ifndef _TEXSW_H_
#pragma once
#include <vx_intrinsics.h>
#include <texturing.h>
#include "common.h"
#define TEX_LOD_MAX 11
#define MIN(x, y) ((x < y) ? (x) : (y))
#define MAX(x, y) ((x > y) ? (x) : (y))
inline int address(int wrap, int value) {
switch (wrap) {
case 1: return value & 0xfffff;
default:
case 0: return MIN(MAX(value, 0), 0xfffff);
inline uint32_t texel_read(uint8_t* address, uint32_t stride) {
switch (stride) {
case 1: return *(uint8_t*)address;
case 2: return *(uint16_t*)address;
case 4: return *(uint32_t*)address;
default:
std::abort();
return 0;
}
}
inline void unpack(int format, int value, int* l, int* h) {
switch (format) {
case 1:
case 2:
*l = value;
*h = 0;
break;
case 3:
*l = (value | (value << 8)) & 0x00ff00ff;
*h = 0;
break;
case 4:
*l = (value | (value << 16)) & 0x07e0f81f;
*h = 0;
break;
case 5:
*l = (value | (value << 12)) & 0x0f0f0f0f;
*h = 0;
break;
default:
case 0:
*l = value & 0x00ff00ff;
*h = (value >> 8) & 0x00ff00ff;
break;
}
}
inline uint32_t vx_tex_sw(kernel_arg_t* state,
Fixed<TEX_FXD_FRAC> xu,
Fixed<TEX_FXD_FRAC> xv,
uint32_t lod) {
uint8_t* base_addr = ((uint8_t*)state->src_addr) + state->mip_offs[lod];
uint32_t log_width = std::max<int32_t>(state->src_logwidth - lod, 0);
uint32_t log_height = std::max<int32_t>(state->src_logheight - lod, 0);
auto format = (TexFormat)state->format;
auto wrapu = (WrapMode)state->wrapu;
auto wrapv = (WrapMode)state->wrapv;
auto filter = state->filter;
auto stride = Stride(format);
inline void lerp(int al, int ah, int bl, int bh, int frac, int* l, int* h) {
*l = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
*h = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
}
inline int pack(int format, int l, int h) {
switch (format) {
case 1:
case 2:
return l;
case 3:
return (l | (l >> 8)) & 0xffff;
case 4:
return (l | (l >> 16)) & 0xffff;
case 5:
return (l | (l >> 12)) & 0xffff;
default:
case 0:
return (h << 8) | l;
}
}
inline int tex_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
int base_addr = state->src_ptr;
int mip_offset = 0;
int log_width = state->src_logWidth;
int log_height = state->src_logHeight;
int format = state->format;
int wrap = state->wrap;
int filter = state->filter;
int32_t* pBits = ((uint32_t*)base_addr) + mip_offset;
uint32_t color;
if (filter) {
int u0 = address(wrap, u - (0x80000 >> log_width));
int v0 = address(wrap, v - (0x80000 >> log_height));
int u1 = address(wrap, u + (0x80000 >> log_width));
int v1 = address(wrap, v + (0x80000 >> log_height));
// addressing
uint32_t offset00, offset01, offset10, offset11;
uint32_t alpha, beta;
TexAddressLinear(xu, xv, log_width, log_height, wrapu, wrapv,
&offset00, &offset01, &offset10, &offset11, &alpha, &beta);
int x0 = u0 >> (20 - log_width);
int y0 = v0 >> (20 - log_height);
int x1 = u1 >> (20 - log_width);
int y1 = v1 >> (20 - log_height);
uint8_t* addr00 = base_addr + offset00 * stride;
uint8_t* addr01 = base_addr + offset01 * stride;
uint8_t* addr10 = base_addr + offset10 * stride;
uint8_t* addr11 = base_addr + offset11 * stride;
// memory lookup
int c0 = pBits[x0 + (y0 << log_width)];
int c1 = pBits[x1 + (y0 << log_width)];
int c2 = pBits[x0 + (y1 << log_width)];
int c3 = pBits[x1 + (y1 << log_width)];
uint32_t texel00 = texel_read(addr00, stride);
uint32_t texel01 = texel_read(addr01, stride);
uint32_t texel10 = texel_read(addr10, stride);
uint32_t texel11 = texel_read(addr11, stride);
// filtering
int alpha = x0 & 0xff;
int beta = y0 & 0xff;
int c0a, c0b;
int c1a, c1b;
int c01a, c01b;
unpack(format, c0, &c0a, &c0b);
unpack(format, c1, &c1a, &c1b);
lerp(c0a, c0b, c1a, c1b, alpha, &c01a, &c01b);
int c2a, c2b;
int c3a, c3b;
int c23a, c23b;
unpack(format, c2, &c2a, &c2b);
unpack(format, c3, &c3a, &c3b);
lerp(c2a, c2b, c3a, c3b, alpha, &c23a, &c23b);
int c4a, c4b;
lerp(c01a, c01b, c23a, c23b, beta, &c4a, &c4b);
return pack(format, c4a, c4b);
color = TexFilterLinear(
format, texel00, texel01, texel10, texel11, alpha, beta);
} else {
int u0 = address(wrap, u);
int v0 = address(wrap, v);
// addressing
uint32_t offset;
TexAddressPoint(xu, xv, log_width, log_height, wrapu, wrapv, &offset);
uint8_t* addr = base_addr + offset * stride;
// memory lookup
uint32_t texel = texel_read(addr, stride);
int x0 = u0 >> (20 - log_width);
int y0 = v0 >> (20 - log_height);
int c0 = pBits[x0 + (y0 <<log_width)];
int c0a, c0b;
unpack(format, c0, &c0a, &c0b);
return pack(format, c0a, c0b);
// filtering
color = TexFilterPoint(format, texel);
}
return color;
}
inline int vx_tex3(int stage, int u, int v, int lod) {
int lodn = MIN(lod + 0x100000, TEX_LOD_MAX);
int a = vx_tex(0, u, v, lod);
int b = vx_tex(0, u, v, lodn);
int al = a & 0x00ff00ff;
int ah = (a >> 8) & 0x00ff00ff;
int bl = b & 0x00ff00ff;
int bh = (b >> 8) & 0x00ff00ff;
int frac = (lod >> 12) & 0xff;
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
int c = al | (ah << 8);
return c;
inline uint32_t tex_load_hw(kernel_arg_t* state,
Fixed<TEX_FXD_FRAC> xu,
Fixed<TEX_FXD_FRAC> xv,
Fixed<16> xlod) {
uint32_t color;
int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
if (state->filter == 2) {
uint32_t lod_n = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
uint32_t frac = ilod >> (lod + 16 - 8);
uint32_t texel0 = vx_tex(0, xu.data(), xv.data(), lod);
uint32_t texel1 = vx_tex(0, xu.data(), xv.data(), lod_n);
uint32_t cl, ch;
{
uint32_t c0l, c0h;
uint32_t c1l, c1h;
Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
}
color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
} else {
color = vx_tex(0, xu.data(), xv.data(), lod);
}
return color;
}
inline int tex3_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
int lodn = MIN(lod + 0x10000, TEX_LOD_MAX);
int a = tex_sw(state, 0, u, v, lod);
int b = tex_sw(state, 0, u, v, lodn);
int al = a & 0x00ff00ff;
int ah = (a >> 8) & 0x00ff00ff;
int bl = b & 0x00ff00ff;
int bh = (b >> 8) & 0x00ff00ff;
int frac = (lod >> 12) & 0xff;
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
int c = al | (ah << 8);
return c;
}
#endif
inline uint32_t tex_load_sw(kernel_arg_t* state,
Fixed<TEX_FXD_FRAC> xu,
Fixed<TEX_FXD_FRAC> xv,
Fixed<16> xlod) {
uint32_t color;
int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
if (state->filter == 2) {
uint32_t lod_n = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
uint32_t frac = ilod >> (lod + 16 - 8);
uint32_t texel0 = vx_tex_sw(state, xu, xv, lod);
uint32_t texel1 = vx_tex_sw(state, xu, xv, lod_n);
uint32_t cl, ch;
{
uint32_t c0l, c0h;
uint32_t c1l, c1h;
Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
}
color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
} else {
color = vx_tex_sw(state, xu, xv, lod);
}
return color;
}