fixes: texture unit mem access sometimes going to smem, bilinear texture filtering; new: cache req_id,

This commit is contained in:
Blaise Tine
2021-11-24 00:00:17 -05:00
parent 1501360f4b
commit 18762dffce
70 changed files with 3818 additions and 1727 deletions

View File

@@ -9,8 +9,8 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_CFLAGS += -std=c++11 -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -DENABLE_SW -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
@@ -21,7 +21,7 @@ CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -Wfatal-errors
CXXFLAGS += -DLUPNG_USE_ZLIB
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex -lz
@@ -38,7 +38,7 @@ kernel.bin: kernel.elf
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(VX_CXX) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@

View File

@@ -1,25 +1,27 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#include <VX_config.h>
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
typedef struct {
uint32_t num_tasks;
uint8_t format;
uint8_t filter;
uint8_t wrap;
uint8_t use_sw;
uint32_t lod;
uint8_t src_logWidth;
uint8_t src_logHeight;
uint8_t src_stride;
uint8_t src_pitch;
uint32_t src_ptr;
uint32_t dst_width;
uint32_t dst_height;
uint8_t dst_stride;
uint32_t dst_pitch;
uint32_t dst_ptr;
bool use_sw;
uint32_t num_tasks;
uint8_t format;
uint8_t filter;
uint8_t wrapu;
uint8_t wrapv;
uint8_t src_logwidth;
uint8_t src_logheight;
uint32_t src_addr;
float lod;
uint32_t mip_offs[TEX_LOD_MAX+1];
uint32_t dst_width;
uint32_t dst_height;
uint8_t dst_stride;
uint32_t dst_pitch;
uint32_t dst_addr;
} kernel_arg_t;
#endif

View File

@@ -1,11 +1,9 @@
#include <stdint.h>
#include <vx_intrinsics.h>
#include <vx_spawn.h>
#include "common.h"
#include <vx_print.h>
#include "texsw.h"
#define ENABLE_SW
typedef struct {
kernel_arg_t* state;
uint32_t tile_width;
@@ -14,29 +12,50 @@ typedef struct {
float deltaY;
} tile_arg_t;
template <typename T, T Start, T End>
struct static_for_t {
template <typename Fn>
inline void operator()(const Fn& callback) const {
callback(Start);
static_for_t<T, Start+1, End>()(callback);
}
};
template <typename T, T N>
struct static_for_t<T, N, N> {
template <typename Fn>
inline void operator()(const Fn& callback) const {}
};
void kernel_body(int task_id, tile_arg_t* arg) {
kernel_arg_t* state = arg->state;
uint32_t xoffset = 0;
uint32_t yoffset = task_id * arg->tile_height;
uint8_t* dst_ptr = (uint8_t*)(state->dst_ptr + xoffset * state->dst_stride + yoffset * state->dst_pitch);
uint32_t yoffset = task_id * arg->tile_height;
float fv = yoffset * arg->deltaY;
uint8_t* dst_ptr = (uint8_t*)(state->dst_addr + xoffset * state->dst_stride + yoffset * state->dst_pitch);
Fixed<16> xlod(state->lod);
/*vx_printf("task_id=%d, deltaX=%f, deltaY=%f, tile_width=%d, tile_height=%d\n",
task_id, arg->deltaX, arg->deltaY, arg->tile_width, arg->tile_height);*/
float fv = (yoffset + 0.5f) * arg->deltaY;
for (uint32_t y = 0; y < arg->tile_height; ++y) {
uint32_t* dst_row = (uint32_t*)dst_ptr;
float fu = xoffset * arg->deltaX;
float fu = (xoffset + 0.5f) * arg->deltaX;
for (uint32_t x = 0; x < arg->tile_width; ++x) {
int32_t u = (int32_t)(fu * (1<<20));
int32_t v = (int32_t)(fv * (1<<20));
Fixed<TEX_FXD_FRAC> xu(fu);
Fixed<TEX_FXD_FRAC> xv(fv);
uint32_t color;
#ifdef ENABLE_SW
if (state->use_sw) {
dst_row[x] = (state->filter == 2) ? tex3_sw(state, 0, u, v, state->lod) : tex_sw(state, 0, u, v, state->lod);
} else {
#endif
dst_row[x] = (state->filter == 2) ? vx_tex3(0, u, v, state->lod) : vx_tex(0, u, v, state->lod);
#ifdef ENABLE_SW
}
if (state->use_sw)
color = tex_load_sw(state, xu, xv, xlod);
else
#endif
color = tex_load_hw(state, xu, xv, xlod);
//vx_printf("task_id=%d, x=%d, y=%d, fu=%f, fv=%f, xu=0x%x, xv=0x%x, color=0x%x\n", task_id, x, y, fu, fv, xu.data(), xv.data(), color);
dst_row[x] = color;
fu += arg->deltaX;
}
dst_ptr += state->dst_pitch;
@@ -48,13 +67,16 @@ int main() {
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
// configure texture unit
vx_csr_write(CSR_TEX_ADDR(0), arg->src_ptr);
vx_csr_write(CSR_TEX_MIPOFF(0), 0);
vx_csr_write(CSR_TEX_WIDTH(0), arg->src_logWidth);
vx_csr_write(CSR_TEX_HEIGHT(0), arg->src_logHeight);
vx_csr_write(CSR_TEX_FORMAT(0), arg->format);
vx_csr_write(CSR_TEX_WRAP(0), (arg->wrap << 2) | arg->wrap);
vx_csr_write(CSR_TEX_FILTER(0), (arg->filter ? 1 : 0));
csr_write(CSR_TEX(0, TEX_STATE_WIDTH), arg->src_logwidth);
csr_write(CSR_TEX(0, TEX_STATE_HEIGHT), arg->src_logheight);
csr_write(CSR_TEX(0, TEX_STATE_FORMAT), arg->format);
csr_write(CSR_TEX(0, TEX_STATE_WRAPU), arg->wrapu);
csr_write(CSR_TEX(0, TEX_STATE_WRAPV), arg->wrapv);
csr_write(CSR_TEX(0, TEX_STATE_FILTER), (arg->filter ? 1 : 0));
csr_write(CSR_TEX(0, TEX_STATE_ADDR), arg->src_addr);
static_for_t<int, 0, TEX_LOD_MAX+1>()([&](int i) {
csr_write(CSR_TEX(0, TEX_STATE_MIPOFF(i)), arg->mip_offs[i]);
});
tile_arg_t targ;
targ.state = arg;
@@ -64,4 +86,9 @@ int main() {
targ.deltaY = 1.0f / arg->dst_height;
vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, &targ);
/*for (uint32_t t=0; t < arg->num_tasks; ++t) {
kernel_body(t, &targ);
}*/
return 0;
}

View File

@@ -25,10 +25,11 @@ const char* kernel_file = "kernel.bin";
const char* input_file = "palette64.png";
const char* output_file = "output.png";
int wrap = 0;
int filter = 0;
int filter = 0; // 0-> point, 1->bilinear, 2->trilinear
float scale = 1.0f;
int format = 0;
bool use_sw = false;
float lod = 1.0f; // >= 1.0f
ePixelFormat eformat = FORMAT_A8R8G8B8;
vx_device_h device = nullptr;
@@ -36,7 +37,7 @@ vx_buffer_h buffer = nullptr;
static void show_usage() {
std::cout << "Vortex Texture Test." << std::endl;
std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f format] [-g filter] [-z no_hw] [-h: help]" << std::endl;
std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f format] [-g filter] [-l lod] [-z no_hw] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
@@ -55,6 +56,9 @@ static void parse_args(int argc, char **argv) {
case 'w':
wrap = std::atoi(optarg);
break;
case 'l':
lod = std::stof(optarg, NULL);
break;
case 'z':
use_sw = true;
break;
@@ -118,7 +122,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_addr, buf_size, 0));
std::vector<uint8_t> dst_pixels(buf_size);
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
@@ -137,25 +141,39 @@ int run_test(const kernel_arg_t& kernel_arg,
int main(int argc, char *argv[]) {
kernel_arg_t kernel_arg;
std::vector<uint8_t> src_pixels;
std::vector<uint32_t> mip_offsets;
uint32_t src_width;
uint32_t src_height;
// parse command arguments
parse_args(argc, argv);
RT_CHECK(LoadImage(input_file, eformat, src_pixels, &src_width, &src_height));
{
std::vector<uint8_t> staging;
RT_CHECK(LoadImage(input_file, eformat, staging, &src_width, &src_height));
RT_CHECK(GenerateMipmaps(src_pixels, mip_offsets, staging, eformat, src_width, src_height));
//uint32_t src_bpp = Format::GetInfo(eformat).BytePerPixel;
//dump_image(src_pixels, src_pixels.size() / src_bpp, 1, src_bpp);
}
// check power of two support
if (!ISPOW2(src_width) || !ISPOW2(src_height)) {
if (!ispow2(src_width) || !ispow2(src_height)) {
std::cout << "Error: only power of two textures supported: width=" << src_width << ", heigth=" << src_height << std::endl;
return -1;
}
uint32_t src_bpp = Format::GetInfo(eformat).BytePerPixel;
//dump_image(src_pixels, src_width, src_height, src_bpp);
uint32_t src_logwidth = log2ceil(src_width);
uint32_t src_logheight = log2ceil(src_height);
uint32_t src_bufsize = src_bpp * src_width * src_height;
uint32_t src_max_lod = std::max(src_logwidth, src_logheight);
if (lod > src_max_lod) {
std::cout << "Error: out-of-bound level-of-detail: lod=" << lod << ", source image=" << src_max_lod << std::endl;
return -1;
}
uint32_t src_bufsize = src_pixels.size();
uint32_t dst_width = (uint32_t)(src_width * scale);
uint32_t dst_height = (uint32_t)(src_height * scale);
@@ -183,7 +201,7 @@ int main(int argc, char *argv[]) {
// allocate device memory
std::cout << "allocate device memory" << std::endl;
size_t src_addr, dst_addr;
uint64_t src_addr, dst_addr;
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
@@ -192,32 +210,37 @@ int main(int argc, char *argv[]) {
// allocate staging shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t), std::max<uint32_t>(src_bufsize, dst_bufsize));
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t),
std::max<uint32_t>(src_bufsize, dst_bufsize));
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
kernel_arg.use_sw = use_sw;
kernel_arg.num_tasks = std::min<uint32_t>(num_tasks, dst_height);
kernel_arg.format = format;
kernel_arg.filter = filter;
kernel_arg.wrap = wrap;
kernel_arg.use_sw = use_sw;
kernel_arg.lod = 0x0;
kernel_arg.wrapu = wrap;
kernel_arg.wrapv = wrap;
kernel_arg.src_logWidth = (uint32_t)std::log2(src_width);
kernel_arg.src_logHeight = (uint32_t)std::log2(src_height);
kernel_arg.src_stride = src_bpp;
kernel_arg.src_pitch = src_bpp * src_width;
kernel_arg.src_ptr = src_addr;
kernel_arg.src_logwidth = src_logwidth;
kernel_arg.src_logheight = src_logheight;
kernel_arg.src_addr = src_addr;
kernel_arg.lod = lod;
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
assert(i < TEX_LOD_MAX);
kernel_arg.mip_offs[i] = mip_offsets.at(i);
}
kernel_arg.dst_width = dst_width;
kernel_arg.dst_height = dst_height;
kernel_arg.dst_stride = dst_bpp;
kernel_arg.dst_pitch = dst_bpp * dst_width;
kernel_arg.dst_ptr = dst_addr;
kernel_arg.dst_addr = dst_addr;
auto buf_ptr = (int*)vx_host_ptr(buffer);
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
@@ -225,21 +248,21 @@ int main(int argc, char *argv[]) {
// upload source buffer
std::cout << "upload source buffer" << std::endl;
{
auto buf_ptr = (int8_t*)vx_host_ptr(buffer);
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < src_bufsize; ++i) {
buf_ptr[i] = src_pixels[i];
}
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, src_bufsize, 0));
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_addr, src_bufsize, 0));
}
// clear destination buffer
std::cout << "clear destination buffer" << std::endl;
{
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
auto buf_ptr = (uint32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < (dst_bufsize/4); ++i) {
buf_ptr[i] = 0xdeadbeef;
}
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, dst_bufsize, 0));
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_addr, dst_bufsize, 0));
}
// run tests

View File

@@ -1,167 +1,122 @@
#ifndef _TEXSW_H_
#pragma once
#include <vx_intrinsics.h>
#include <texturing.h>
#include "common.h"
#define TEX_LOD_MAX 11
#define MIN(x, y) ((x < y) ? (x) : (y))
#define MAX(x, y) ((x > y) ? (x) : (y))
inline int address(int wrap, int value) {
switch (wrap) {
case 1: return value & 0xfffff;
default:
case 0: return MIN(MAX(value, 0), 0xfffff);
inline uint32_t texel_read(uint8_t* address, uint32_t stride) {
switch (stride) {
case 1: return *(uint8_t*)address;
case 2: return *(uint16_t*)address;
case 4: return *(uint32_t*)address;
default:
std::abort();
return 0;
}
}
inline void unpack(int format, int value, int* l, int* h) {
switch (format) {
case 1:
case 2:
*l = value;
*h = 0;
break;
case 3:
*l = (value | (value << 8)) & 0x00ff00ff;
*h = 0;
break;
case 4:
*l = (value | (value << 16)) & 0x07e0f81f;
*h = 0;
break;
case 5:
*l = (value | (value << 12)) & 0x0f0f0f0f;
*h = 0;
break;
default:
case 0:
*l = value & 0x00ff00ff;
*h = (value >> 8) & 0x00ff00ff;
break;
}
}
inline uint32_t vx_tex_sw(kernel_arg_t* state,
Fixed<TEX_FXD_FRAC> xu,
Fixed<TEX_FXD_FRAC> xv,
uint32_t lod) {
uint8_t* base_addr = ((uint8_t*)state->src_addr) + state->mip_offs[lod];
uint32_t log_width = std::max<int32_t>(state->src_logwidth - lod, 0);
uint32_t log_height = std::max<int32_t>(state->src_logheight - lod, 0);
auto format = (TexFormat)state->format;
auto wrapu = (WrapMode)state->wrapu;
auto wrapv = (WrapMode)state->wrapv;
auto filter = state->filter;
auto stride = Stride(format);
inline void lerp(int al, int ah, int bl, int bh, int frac, int* l, int* h) {
*l = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
*h = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
}
inline int pack(int format, int l, int h) {
switch (format) {
case 1:
case 2:
return l;
case 3:
return (l | (l >> 8)) & 0xffff;
case 4:
return (l | (l >> 16)) & 0xffff;
case 5:
return (l | (l >> 12)) & 0xffff;
default:
case 0:
return (h << 8) | l;
}
}
inline int tex_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
int base_addr = state->src_ptr;
int mip_offset = 0;
int log_width = state->src_logWidth;
int log_height = state->src_logHeight;
int format = state->format;
int wrap = state->wrap;
int filter = state->filter;
int32_t* pBits = ((uint32_t*)base_addr) + mip_offset;
uint32_t color;
if (filter) {
int u0 = address(wrap, u - (0x80000 >> log_width));
int v0 = address(wrap, v - (0x80000 >> log_height));
int u1 = address(wrap, u + (0x80000 >> log_width));
int v1 = address(wrap, v + (0x80000 >> log_height));
// addressing
uint32_t offset00, offset01, offset10, offset11;
uint32_t alpha, beta;
TexAddressLinear(xu, xv, log_width, log_height, wrapu, wrapv,
&offset00, &offset01, &offset10, &offset11, &alpha, &beta);
int x0 = u0 >> (20 - log_width);
int y0 = v0 >> (20 - log_height);
int x1 = u1 >> (20 - log_width);
int y1 = v1 >> (20 - log_height);
uint8_t* addr00 = base_addr + offset00 * stride;
uint8_t* addr01 = base_addr + offset01 * stride;
uint8_t* addr10 = base_addr + offset10 * stride;
uint8_t* addr11 = base_addr + offset11 * stride;
// memory lookup
int c0 = pBits[x0 + (y0 << log_width)];
int c1 = pBits[x1 + (y0 << log_width)];
int c2 = pBits[x0 + (y1 << log_width)];
int c3 = pBits[x1 + (y1 << log_width)];
uint32_t texel00 = texel_read(addr00, stride);
uint32_t texel01 = texel_read(addr01, stride);
uint32_t texel10 = texel_read(addr10, stride);
uint32_t texel11 = texel_read(addr11, stride);
// filtering
int alpha = x0 & 0xff;
int beta = y0 & 0xff;
int c0a, c0b;
int c1a, c1b;
int c01a, c01b;
unpack(format, c0, &c0a, &c0b);
unpack(format, c1, &c1a, &c1b);
lerp(c0a, c0b, c1a, c1b, alpha, &c01a, &c01b);
int c2a, c2b;
int c3a, c3b;
int c23a, c23b;
unpack(format, c2, &c2a, &c2b);
unpack(format, c3, &c3a, &c3b);
lerp(c2a, c2b, c3a, c3b, alpha, &c23a, &c23b);
int c4a, c4b;
lerp(c01a, c01b, c23a, c23b, beta, &c4a, &c4b);
return pack(format, c4a, c4b);
color = TexFilterLinear(
format, texel00, texel01, texel10, texel11, alpha, beta);
} else {
int u0 = address(wrap, u);
int v0 = address(wrap, v);
// addressing
uint32_t offset;
TexAddressPoint(xu, xv, log_width, log_height, wrapu, wrapv, &offset);
uint8_t* addr = base_addr + offset * stride;
// memory lookup
uint32_t texel = texel_read(addr, stride);
int x0 = u0 >> (20 - log_width);
int y0 = v0 >> (20 - log_height);
int c0 = pBits[x0 + (y0 <<log_width)];
int c0a, c0b;
unpack(format, c0, &c0a, &c0b);
return pack(format, c0a, c0b);
// filtering
color = TexFilterPoint(format, texel);
}
return color;
}
inline int vx_tex3(int stage, int u, int v, int lod) {
int lodn = MIN(lod + 0x100000, TEX_LOD_MAX);
int a = vx_tex(0, u, v, lod);
int b = vx_tex(0, u, v, lodn);
int al = a & 0x00ff00ff;
int ah = (a >> 8) & 0x00ff00ff;
int bl = b & 0x00ff00ff;
int bh = (b >> 8) & 0x00ff00ff;
int frac = (lod >> 12) & 0xff;
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
int c = al | (ah << 8);
return c;
inline uint32_t tex_load_hw(kernel_arg_t* state,
Fixed<TEX_FXD_FRAC> xu,
Fixed<TEX_FXD_FRAC> xv,
Fixed<16> xlod) {
uint32_t color;
int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
if (state->filter == 2) {
uint32_t lod_n = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
uint32_t frac = ilod >> (lod + 16 - 8);
uint32_t texel0 = vx_tex(0, xu.data(), xv.data(), lod);
uint32_t texel1 = vx_tex(0, xu.data(), xv.data(), lod_n);
uint32_t cl, ch;
{
uint32_t c0l, c0h;
uint32_t c1l, c1h;
Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
}
color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
} else {
color = vx_tex(0, xu.data(), xv.data(), lod);
}
return color;
}
inline int tex3_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
int lodn = MIN(lod + 0x10000, TEX_LOD_MAX);
int a = tex_sw(state, 0, u, v, lod);
int b = tex_sw(state, 0, u, v, lodn);
int al = a & 0x00ff00ff;
int ah = (a >> 8) & 0x00ff00ff;
int bl = b & 0x00ff00ff;
int bh = (b >> 8) & 0x00ff00ff;
int frac = (lod >> 12) & 0xff;
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
int c = al | (ah << 8);
return c;
}
#endif
inline uint32_t tex_load_sw(kernel_arg_t* state,
Fixed<TEX_FXD_FRAC> xu,
Fixed<TEX_FXD_FRAC> xv,
Fixed<16> xlod) {
uint32_t color;
int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
if (state->filter == 2) {
uint32_t lod_n = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
uint32_t frac = ilod >> (lod + 16 - 8);
uint32_t texel0 = vx_tex_sw(state, xu, xv, lod);
uint32_t texel1 = vx_tex_sw(state, xu, xv, lod_n);
uint32_t cl, ch;
{
uint32_t c0l, c0h;
uint32_t c1l, c1h;
Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
}
color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
} else {
color = vx_tex_sw(state, xu, xv, lod);
}
return color;
}

View File

@@ -191,4 +191,112 @@ int ConvertImage(std::vector<uint8_t>& dst_pixels,
SurfaceDesc dstDesc{dst_format, dst_pixels.data(), width, height, dst_pitch};
return CopyBuffers(dstDesc, 0, 0, width, height, srcDesc, 0, 0);
}
int GenerateMipmaps(std::vector<uint8_t>& dst_pixels,
std::vector<uint32_t>& mip_offsets,
const std::vector<uint8_t>& src_pixels,
ePixelFormat format,
uint32_t src_width,
uint32_t src_height) {
std::vector<uint8_t> src_staging, dst_staging;
const std::vector<uint8_t> *pSrcPixels;
std::vector<uint8_t> *pDstPixels;
// convert source image if needed
bool need_conversion = (format != FORMAT_A8R8G8B8);
if (need_conversion) {
ConvertImage(src_staging, src_pixels, src_width, src_height, format, FORMAT_A8R8G8B8);
pSrcPixels = &src_staging;
pDstPixels = &dst_staging;
} else {
pSrcPixels = &src_pixels;
pDstPixels = &dst_pixels;
}
uint32_t src_logwidth = log2ceil(src_width);
uint32_t src_logheight = log2ceil(src_height);
uint32_t max_lod = std::max(src_logwidth, src_logheight) + 1;
mip_offsets.resize(max_lod);
// Calculate mipmaps buffer size
uint32_t dst_height = 1;
uint32_t dst_width = 0;
for (uint32_t lod = 0, w = src_width, h = src_height; lod < max_lod; ++lod) {
assert((w > 0) || (w > 0));
uint32_t pw = std::max<int>(w, 1);
uint32_t ph = std::max<int>(h, 1);
mip_offsets.at(lod) = dst_width;
dst_width += pw * ph;
w >>= 1;
h >>= 1;
}
// allocate mipmap
pDstPixels->resize(dst_width * 4);
// generate mipmaps
{
auto pSrc = reinterpret_cast<const uint32_t*>(pSrcPixels->data());
auto pDst = reinterpret_cast<uint32_t*>(pDstPixels->data());
// copy level 0
memcpy(pDst, pSrc, pSrcPixels->size());
assert(pSrcPixels->size() == 4 * src_width * src_height);
pSrc = pDst;
pDst += src_width * src_height;
// copy lower levels
for (uint32_t lod = 1, w = (src_width/2), h = (src_height/2); lod < max_lod;) {
assert((w > 0) || (w > 0));
uint32_t pw = std::max<int>(w, 1);
uint32_t ph = std::max<int>(h, 1);
for (uint32_t y = 0; y < pw; ++y) {
auto v0 = 2 * y;
auto v1 = 2 * y + ((ph > 1) ? 1 : 0);
auto pSrc0 = pSrc + v0 * (2 * pw);
auto pSrc1 = pSrc + v1 * (2 * pw);
for (uint32_t x = 0; x <pw; ++x) {
auto u0 = 2 * x;
auto u1 = 2 * x + ((pw > 1) ? 1 : 0);
auto c00 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc0 + u0);
auto c01 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc0 + u1);
auto c10 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc1 + u0);
auto c11 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc1 + u1);
const ColorARGB color((c00.a + c01.a + c10.a + c11.a+2) >> 2,
(c00.r + c01.r + c10.r + c11.r+2) >> 2,
(c00.g + c01.g + c10.g + c11.g+2) >> 2,
(c00.b + c01.b + c10.b + c11.b+2) >> 2);
uint32_t ncolor;
Format::ConvertTo<FORMAT_A8R8G8B8>(&ncolor, color);
pDst[x + y * pw] = ncolor;
}
}
++lod;
pSrc = pDst;
pDst += pw * ph;
w >>= 1;
h >>= 1;
}
assert((pDst - reinterpret_cast<uint32_t*>(pDstPixels->data())) == dst_width);
}
// convert destination image if needed
if (need_conversion) {
ConvertImage(dst_staging, dst_staging, dst_width, dst_height, FORMAT_A8R8G8B8, format);
}
uint32_t bpp = Format::GetInfo(format).BytePerPixel;
for (auto& offset : mip_offsets) {
offset *= bpp;
}
return 0;
}

View File

@@ -1,14 +1,9 @@
#include <cstdint>
#include <vector>
#include <iostream>
#include <bitmanip.h>
#include "surfacedesc.h"
#define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
inline uint32_t ilog2 (uint32_t value) {
return (uint32_t)(sizeof(uint32_t) * 8UL) - (uint32_t)__builtin_clzl((value << 1) - 1UL) - 1;
}
int LoadImage(const char *filename,
ePixelFormat format,
std::vector<uint8_t> &pixels,
@@ -37,7 +32,14 @@ int ConvertImage(std::vector<uint8_t>& dst_pixels,
ePixelFormat src_format,
ePixelFormat dst_format);
int GenerateMipmaps(std::vector<uint8_t>& dst_pixels,
std::vector<uint32_t>& mip_offsets,
const std::vector<uint8_t>& src_pixels,
ePixelFormat format,
uint32_t src_width,
uint32_t src_height);
void dump_image(const std::vector<uint8_t>& pixels,
uint32_t width,
uint32_t height,
uint32_t bpp);
uint32_t bpp);