fixes: texture unit mem access sometimes going to smem, bilinear texture filtering; new: cache req_id,
This commit is contained in:
@@ -1,167 +1,122 @@
|
||||
#ifndef _TEXSW_H_
|
||||
#pragma once
|
||||
|
||||
#include <vx_intrinsics.h>
|
||||
#include <texturing.h>
|
||||
#include "common.h"
|
||||
|
||||
#define TEX_LOD_MAX 11
|
||||
|
||||
#define MIN(x, y) ((x < y) ? (x) : (y))
|
||||
|
||||
#define MAX(x, y) ((x > y) ? (x) : (y))
|
||||
|
||||
inline int address(int wrap, int value) {
|
||||
switch (wrap) {
|
||||
case 1: return value & 0xfffff;
|
||||
default:
|
||||
case 0: return MIN(MAX(value, 0), 0xfffff);
|
||||
inline uint32_t texel_read(uint8_t* address, uint32_t stride) {
|
||||
switch (stride) {
|
||||
case 1: return *(uint8_t*)address;
|
||||
case 2: return *(uint16_t*)address;
|
||||
case 4: return *(uint32_t*)address;
|
||||
default:
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline void unpack(int format, int value, int* l, int* h) {
|
||||
switch (format) {
|
||||
case 1:
|
||||
case 2:
|
||||
*l = value;
|
||||
*h = 0;
|
||||
break;
|
||||
case 3:
|
||||
*l = (value | (value << 8)) & 0x00ff00ff;
|
||||
*h = 0;
|
||||
break;
|
||||
case 4:
|
||||
*l = (value | (value << 16)) & 0x07e0f81f;
|
||||
*h = 0;
|
||||
break;
|
||||
case 5:
|
||||
*l = (value | (value << 12)) & 0x0f0f0f0f;
|
||||
*h = 0;
|
||||
break;
|
||||
default:
|
||||
case 0:
|
||||
*l = value & 0x00ff00ff;
|
||||
*h = (value >> 8) & 0x00ff00ff;
|
||||
break;
|
||||
}
|
||||
}
|
||||
inline uint32_t vx_tex_sw(kernel_arg_t* state,
|
||||
Fixed<TEX_FXD_FRAC> xu,
|
||||
Fixed<TEX_FXD_FRAC> xv,
|
||||
uint32_t lod) {
|
||||
uint8_t* base_addr = ((uint8_t*)state->src_addr) + state->mip_offs[lod];
|
||||
uint32_t log_width = std::max<int32_t>(state->src_logwidth - lod, 0);
|
||||
uint32_t log_height = std::max<int32_t>(state->src_logheight - lod, 0);
|
||||
auto format = (TexFormat)state->format;
|
||||
auto wrapu = (WrapMode)state->wrapu;
|
||||
auto wrapv = (WrapMode)state->wrapv;
|
||||
auto filter = state->filter;
|
||||
auto stride = Stride(format);
|
||||
|
||||
inline void lerp(int al, int ah, int bl, int bh, int frac, int* l, int* h) {
|
||||
*l = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
|
||||
*h = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
|
||||
}
|
||||
|
||||
inline int pack(int format, int l, int h) {
|
||||
switch (format) {
|
||||
case 1:
|
||||
case 2:
|
||||
return l;
|
||||
case 3:
|
||||
return (l | (l >> 8)) & 0xffff;
|
||||
case 4:
|
||||
return (l | (l >> 16)) & 0xffff;
|
||||
case 5:
|
||||
return (l | (l >> 12)) & 0xffff;
|
||||
default:
|
||||
case 0:
|
||||
return (h << 8) | l;
|
||||
}
|
||||
}
|
||||
|
||||
inline int tex_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
|
||||
int base_addr = state->src_ptr;
|
||||
int mip_offset = 0;
|
||||
int log_width = state->src_logWidth;
|
||||
int log_height = state->src_logHeight;
|
||||
int format = state->format;
|
||||
int wrap = state->wrap;
|
||||
int filter = state->filter;
|
||||
|
||||
int32_t* pBits = ((uint32_t*)base_addr) + mip_offset;
|
||||
uint32_t color;
|
||||
|
||||
if (filter) {
|
||||
int u0 = address(wrap, u - (0x80000 >> log_width));
|
||||
int v0 = address(wrap, v - (0x80000 >> log_height));
|
||||
int u1 = address(wrap, u + (0x80000 >> log_width));
|
||||
int v1 = address(wrap, v + (0x80000 >> log_height));
|
||||
// addressing
|
||||
uint32_t offset00, offset01, offset10, offset11;
|
||||
uint32_t alpha, beta;
|
||||
TexAddressLinear(xu, xv, log_width, log_height, wrapu, wrapv,
|
||||
&offset00, &offset01, &offset10, &offset11, &alpha, &beta);
|
||||
|
||||
int x0 = u0 >> (20 - log_width);
|
||||
int y0 = v0 >> (20 - log_height);
|
||||
int x1 = u1 >> (20 - log_width);
|
||||
int y1 = v1 >> (20 - log_height);
|
||||
uint8_t* addr00 = base_addr + offset00 * stride;
|
||||
uint8_t* addr01 = base_addr + offset01 * stride;
|
||||
uint8_t* addr10 = base_addr + offset10 * stride;
|
||||
uint8_t* addr11 = base_addr + offset11 * stride;
|
||||
|
||||
// memory lookup
|
||||
|
||||
int c0 = pBits[x0 + (y0 << log_width)];
|
||||
int c1 = pBits[x1 + (y0 << log_width)];
|
||||
int c2 = pBits[x0 + (y1 << log_width)];
|
||||
int c3 = pBits[x1 + (y1 << log_width)];
|
||||
uint32_t texel00 = texel_read(addr00, stride);
|
||||
uint32_t texel01 = texel_read(addr01, stride);
|
||||
uint32_t texel10 = texel_read(addr10, stride);
|
||||
uint32_t texel11 = texel_read(addr11, stride);
|
||||
|
||||
// filtering
|
||||
|
||||
int alpha = x0 & 0xff;
|
||||
int beta = y0 & 0xff;
|
||||
|
||||
int c0a, c0b;
|
||||
int c1a, c1b;
|
||||
int c01a, c01b;
|
||||
|
||||
unpack(format, c0, &c0a, &c0b);
|
||||
unpack(format, c1, &c1a, &c1b);
|
||||
lerp(c0a, c0b, c1a, c1b, alpha, &c01a, &c01b);
|
||||
|
||||
int c2a, c2b;
|
||||
int c3a, c3b;
|
||||
int c23a, c23b;
|
||||
|
||||
unpack(format, c2, &c2a, &c2b);
|
||||
unpack(format, c3, &c3a, &c3b);
|
||||
lerp(c2a, c2b, c3a, c3b, alpha, &c23a, &c23b);
|
||||
|
||||
int c4a, c4b;
|
||||
lerp(c01a, c01b, c23a, c23b, beta, &c4a, &c4b);
|
||||
return pack(format, c4a, c4b);
|
||||
color = TexFilterLinear(
|
||||
format, texel00, texel01, texel10, texel11, alpha, beta);
|
||||
} else {
|
||||
int u0 = address(wrap, u);
|
||||
int v0 = address(wrap, v);
|
||||
// addressing
|
||||
uint32_t offset;
|
||||
TexAddressPoint(xu, xv, log_width, log_height, wrapu, wrapv, &offset);
|
||||
|
||||
uint8_t* addr = base_addr + offset * stride;
|
||||
|
||||
// memory lookup
|
||||
uint32_t texel = texel_read(addr, stride);
|
||||
|
||||
int x0 = u0 >> (20 - log_width);
|
||||
int y0 = v0 >> (20 - log_height);
|
||||
|
||||
int c0 = pBits[x0 + (y0 <<log_width)];
|
||||
|
||||
int c0a, c0b;
|
||||
unpack(format, c0, &c0a, &c0b);
|
||||
return pack(format, c0a, c0b);
|
||||
// filtering
|
||||
color = TexFilterPoint(format, texel);
|
||||
}
|
||||
return color;
|
||||
}
|
||||
|
||||
inline int vx_tex3(int stage, int u, int v, int lod) {
|
||||
int lodn = MIN(lod + 0x100000, TEX_LOD_MAX);
|
||||
int a = vx_tex(0, u, v, lod);
|
||||
int b = vx_tex(0, u, v, lodn);
|
||||
int al = a & 0x00ff00ff;
|
||||
int ah = (a >> 8) & 0x00ff00ff;
|
||||
int bl = b & 0x00ff00ff;
|
||||
int bh = (b >> 8) & 0x00ff00ff;
|
||||
int frac = (lod >> 12) & 0xff;
|
||||
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
|
||||
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
|
||||
int c = al | (ah << 8);
|
||||
return c;
|
||||
inline uint32_t tex_load_hw(kernel_arg_t* state,
|
||||
Fixed<TEX_FXD_FRAC> xu,
|
||||
Fixed<TEX_FXD_FRAC> xv,
|
||||
Fixed<16> xlod) {
|
||||
uint32_t color;
|
||||
int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
|
||||
uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
|
||||
if (state->filter == 2) {
|
||||
uint32_t lod_n = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
|
||||
uint32_t frac = ilod >> (lod + 16 - 8);
|
||||
uint32_t texel0 = vx_tex(0, xu.data(), xv.data(), lod);
|
||||
uint32_t texel1 = vx_tex(0, xu.data(), xv.data(), lod_n);
|
||||
uint32_t cl, ch;
|
||||
{
|
||||
uint32_t c0l, c0h;
|
||||
uint32_t c1l, c1h;
|
||||
Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
|
||||
Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
|
||||
Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
|
||||
}
|
||||
color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
|
||||
} else {
|
||||
color = vx_tex(0, xu.data(), xv.data(), lod);
|
||||
}
|
||||
return color;
|
||||
}
|
||||
|
||||
inline int tex3_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
|
||||
int lodn = MIN(lod + 0x10000, TEX_LOD_MAX);
|
||||
int a = tex_sw(state, 0, u, v, lod);
|
||||
int b = tex_sw(state, 0, u, v, lodn);
|
||||
int al = a & 0x00ff00ff;
|
||||
int ah = (a >> 8) & 0x00ff00ff;
|
||||
|
||||
int bl = b & 0x00ff00ff;
|
||||
int bh = (b >> 8) & 0x00ff00ff;
|
||||
int frac = (lod >> 12) & 0xff;
|
||||
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
|
||||
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
|
||||
int c = al | (ah << 8);
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
||||
inline uint32_t tex_load_sw(kernel_arg_t* state,
|
||||
Fixed<TEX_FXD_FRAC> xu,
|
||||
Fixed<TEX_FXD_FRAC> xv,
|
||||
Fixed<16> xlod) {
|
||||
uint32_t color;
|
||||
int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
|
||||
uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
|
||||
if (state->filter == 2) {
|
||||
uint32_t lod_n = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
|
||||
uint32_t frac = ilod >> (lod + 16 - 8);
|
||||
uint32_t texel0 = vx_tex_sw(state, xu, xv, lod);
|
||||
uint32_t texel1 = vx_tex_sw(state, xu, xv, lod_n);
|
||||
uint32_t cl, ch;
|
||||
{
|
||||
uint32_t c0l, c0h;
|
||||
uint32_t c1l, c1h;
|
||||
Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
|
||||
Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
|
||||
Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
|
||||
}
|
||||
color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
|
||||
} else {
|
||||
color = vx_tex_sw(state, xu, xv, lod);
|
||||
}
|
||||
return color;
|
||||
}
|
||||
Reference in New Issue
Block a user