fixes: texture unit mem access sometimes going to smem, bilinear texture filtering; new: cache req_id,
This commit is contained in:
@@ -9,8 +9,8 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
VX_CFLAGS += -std=c++11 -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -DENABLE_SW -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
@@ -21,7 +21,7 @@ CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -DLUPNG_USE_ZLIB
|
||||
|
||||
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include
|
||||
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common
|
||||
|
||||
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex -lz
|
||||
|
||||
@@ -38,7 +38,7 @@ kernel.bin: kernel.elf
|
||||
$(VX_CP) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(VX_SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
|
||||
$(VX_CXX) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
@@ -1,25 +1,27 @@
|
||||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#include <VX_config.h>
|
||||
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
typedef struct {
|
||||
uint32_t num_tasks;
|
||||
uint8_t format;
|
||||
uint8_t filter;
|
||||
uint8_t wrap;
|
||||
uint8_t use_sw;
|
||||
uint32_t lod;
|
||||
uint8_t src_logWidth;
|
||||
uint8_t src_logHeight;
|
||||
uint8_t src_stride;
|
||||
uint8_t src_pitch;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_width;
|
||||
uint32_t dst_height;
|
||||
uint8_t dst_stride;
|
||||
uint32_t dst_pitch;
|
||||
uint32_t dst_ptr;
|
||||
bool use_sw;
|
||||
uint32_t num_tasks;
|
||||
uint8_t format;
|
||||
uint8_t filter;
|
||||
uint8_t wrapu;
|
||||
uint8_t wrapv;
|
||||
uint8_t src_logwidth;
|
||||
uint8_t src_logheight;
|
||||
uint32_t src_addr;
|
||||
float lod;
|
||||
uint32_t mip_offs[TEX_LOD_MAX+1];
|
||||
uint32_t dst_width;
|
||||
uint32_t dst_height;
|
||||
uint8_t dst_stride;
|
||||
uint32_t dst_pitch;
|
||||
uint32_t dst_addr;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
||||
@@ -1,11 +1,9 @@
|
||||
#include <stdint.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <vx_spawn.h>
|
||||
#include "common.h"
|
||||
#include <vx_print.h>
|
||||
#include "texsw.h"
|
||||
|
||||
#define ENABLE_SW
|
||||
|
||||
typedef struct {
|
||||
kernel_arg_t* state;
|
||||
uint32_t tile_width;
|
||||
@@ -14,29 +12,50 @@ typedef struct {
|
||||
float deltaY;
|
||||
} tile_arg_t;
|
||||
|
||||
template <typename T, T Start, T End>
|
||||
struct static_for_t {
|
||||
template <typename Fn>
|
||||
inline void operator()(const Fn& callback) const {
|
||||
callback(Start);
|
||||
static_for_t<T, Start+1, End>()(callback);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, T N>
|
||||
struct static_for_t<T, N, N> {
|
||||
template <typename Fn>
|
||||
inline void operator()(const Fn& callback) const {}
|
||||
};
|
||||
|
||||
void kernel_body(int task_id, tile_arg_t* arg) {
|
||||
kernel_arg_t* state = arg->state;
|
||||
|
||||
uint32_t xoffset = 0;
|
||||
uint32_t yoffset = task_id * arg->tile_height;
|
||||
uint8_t* dst_ptr = (uint8_t*)(state->dst_ptr + xoffset * state->dst_stride + yoffset * state->dst_pitch);
|
||||
uint32_t yoffset = task_id * arg->tile_height;
|
||||
|
||||
float fv = yoffset * arg->deltaY;
|
||||
uint8_t* dst_ptr = (uint8_t*)(state->dst_addr + xoffset * state->dst_stride + yoffset * state->dst_pitch);
|
||||
|
||||
Fixed<16> xlod(state->lod);
|
||||
|
||||
/*vx_printf("task_id=%d, deltaX=%f, deltaY=%f, tile_width=%d, tile_height=%d\n",
|
||||
task_id, arg->deltaX, arg->deltaY, arg->tile_width, arg->tile_height);*/
|
||||
|
||||
float fv = (yoffset + 0.5f) * arg->deltaY;
|
||||
for (uint32_t y = 0; y < arg->tile_height; ++y) {
|
||||
uint32_t* dst_row = (uint32_t*)dst_ptr;
|
||||
float fu = xoffset * arg->deltaX;
|
||||
float fu = (xoffset + 0.5f) * arg->deltaX;
|
||||
for (uint32_t x = 0; x < arg->tile_width; ++x) {
|
||||
int32_t u = (int32_t)(fu * (1<<20));
|
||||
int32_t v = (int32_t)(fv * (1<<20));
|
||||
Fixed<TEX_FXD_FRAC> xu(fu);
|
||||
Fixed<TEX_FXD_FRAC> xv(fv);
|
||||
uint32_t color;
|
||||
#ifdef ENABLE_SW
|
||||
if (state->use_sw) {
|
||||
dst_row[x] = (state->filter == 2) ? tex3_sw(state, 0, u, v, state->lod) : tex_sw(state, 0, u, v, state->lod);
|
||||
} else {
|
||||
#endif
|
||||
dst_row[x] = (state->filter == 2) ? vx_tex3(0, u, v, state->lod) : vx_tex(0, u, v, state->lod);
|
||||
#ifdef ENABLE_SW
|
||||
}
|
||||
if (state->use_sw)
|
||||
color = tex_load_sw(state, xu, xv, xlod);
|
||||
else
|
||||
#endif
|
||||
color = tex_load_hw(state, xu, xv, xlod);
|
||||
//vx_printf("task_id=%d, x=%d, y=%d, fu=%f, fv=%f, xu=0x%x, xv=0x%x, color=0x%x\n", task_id, x, y, fu, fv, xu.data(), xv.data(), color);
|
||||
dst_row[x] = color;
|
||||
fu += arg->deltaX;
|
||||
}
|
||||
dst_ptr += state->dst_pitch;
|
||||
@@ -48,13 +67,16 @@ int main() {
|
||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
|
||||
// configure texture unit
|
||||
vx_csr_write(CSR_TEX_ADDR(0), arg->src_ptr);
|
||||
vx_csr_write(CSR_TEX_MIPOFF(0), 0);
|
||||
vx_csr_write(CSR_TEX_WIDTH(0), arg->src_logWidth);
|
||||
vx_csr_write(CSR_TEX_HEIGHT(0), arg->src_logHeight);
|
||||
vx_csr_write(CSR_TEX_FORMAT(0), arg->format);
|
||||
vx_csr_write(CSR_TEX_WRAP(0), (arg->wrap << 2) | arg->wrap);
|
||||
vx_csr_write(CSR_TEX_FILTER(0), (arg->filter ? 1 : 0));
|
||||
csr_write(CSR_TEX(0, TEX_STATE_WIDTH), arg->src_logwidth);
|
||||
csr_write(CSR_TEX(0, TEX_STATE_HEIGHT), arg->src_logheight);
|
||||
csr_write(CSR_TEX(0, TEX_STATE_FORMAT), arg->format);
|
||||
csr_write(CSR_TEX(0, TEX_STATE_WRAPU), arg->wrapu);
|
||||
csr_write(CSR_TEX(0, TEX_STATE_WRAPV), arg->wrapv);
|
||||
csr_write(CSR_TEX(0, TEX_STATE_FILTER), (arg->filter ? 1 : 0));
|
||||
csr_write(CSR_TEX(0, TEX_STATE_ADDR), arg->src_addr);
|
||||
static_for_t<int, 0, TEX_LOD_MAX+1>()([&](int i) {
|
||||
csr_write(CSR_TEX(0, TEX_STATE_MIPOFF(i)), arg->mip_offs[i]);
|
||||
});
|
||||
|
||||
tile_arg_t targ;
|
||||
targ.state = arg;
|
||||
@@ -64,4 +86,9 @@ int main() {
|
||||
targ.deltaY = 1.0f / arg->dst_height;
|
||||
|
||||
vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, &targ);
|
||||
/*for (uint32_t t=0; t < arg->num_tasks; ++t) {
|
||||
kernel_body(t, &targ);
|
||||
}*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -25,10 +25,11 @@ const char* kernel_file = "kernel.bin";
|
||||
const char* input_file = "palette64.png";
|
||||
const char* output_file = "output.png";
|
||||
int wrap = 0;
|
||||
int filter = 0;
|
||||
int filter = 0; // 0-> point, 1->bilinear, 2->trilinear
|
||||
float scale = 1.0f;
|
||||
int format = 0;
|
||||
bool use_sw = false;
|
||||
float lod = 1.0f; // >= 1.0f
|
||||
ePixelFormat eformat = FORMAT_A8R8G8B8;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
@@ -36,7 +37,7 @@ vx_buffer_h buffer = nullptr;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Texture Test." << std::endl;
|
||||
std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f format] [-g filter] [-z no_hw] [-h: help]" << std::endl;
|
||||
std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f format] [-g filter] [-l lod] [-z no_hw] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
@@ -55,6 +56,9 @@ static void parse_args(int argc, char **argv) {
|
||||
case 'w':
|
||||
wrap = std::atoi(optarg);
|
||||
break;
|
||||
case 'l':
|
||||
lod = std::stof(optarg, NULL);
|
||||
break;
|
||||
case 'z':
|
||||
use_sw = true;
|
||||
break;
|
||||
@@ -118,7 +122,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_addr, buf_size, 0));
|
||||
|
||||
std::vector<uint8_t> dst_pixels(buf_size);
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
@@ -137,25 +141,39 @@ int run_test(const kernel_arg_t& kernel_arg,
|
||||
int main(int argc, char *argv[]) {
|
||||
kernel_arg_t kernel_arg;
|
||||
std::vector<uint8_t> src_pixels;
|
||||
std::vector<uint32_t> mip_offsets;
|
||||
uint32_t src_width;
|
||||
uint32_t src_height;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
RT_CHECK(LoadImage(input_file, eformat, src_pixels, &src_width, &src_height));
|
||||
{
|
||||
std::vector<uint8_t> staging;
|
||||
RT_CHECK(LoadImage(input_file, eformat, staging, &src_width, &src_height));
|
||||
|
||||
RT_CHECK(GenerateMipmaps(src_pixels, mip_offsets, staging, eformat, src_width, src_height));
|
||||
|
||||
//uint32_t src_bpp = Format::GetInfo(eformat).BytePerPixel;
|
||||
//dump_image(src_pixels, src_pixels.size() / src_bpp, 1, src_bpp);
|
||||
}
|
||||
|
||||
// check power of two support
|
||||
if (!ISPOW2(src_width) || !ISPOW2(src_height)) {
|
||||
if (!ispow2(src_width) || !ispow2(src_height)) {
|
||||
std::cout << "Error: only power of two textures supported: width=" << src_width << ", heigth=" << src_height << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t src_bpp = Format::GetInfo(eformat).BytePerPixel;
|
||||
|
||||
//dump_image(src_pixels, src_width, src_height, src_bpp);
|
||||
uint32_t src_logwidth = log2ceil(src_width);
|
||||
uint32_t src_logheight = log2ceil(src_height);
|
||||
|
||||
uint32_t src_bufsize = src_bpp * src_width * src_height;
|
||||
uint32_t src_max_lod = std::max(src_logwidth, src_logheight);
|
||||
if (lod > src_max_lod) {
|
||||
std::cout << "Error: out-of-bound level-of-detail: lod=" << lod << ", source image=" << src_max_lod << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t src_bufsize = src_pixels.size();
|
||||
|
||||
uint32_t dst_width = (uint32_t)(src_width * scale);
|
||||
uint32_t dst_height = (uint32_t)(src_height * scale);
|
||||
@@ -183,7 +201,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
size_t src_addr, dst_addr;
|
||||
uint64_t src_addr, dst_addr;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
|
||||
|
||||
@@ -192,32 +210,37 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// allocate staging shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t), std::max<uint32_t>(src_bufsize, dst_bufsize));
|
||||
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t),
|
||||
std::max<uint32_t>(src_bufsize, dst_bufsize));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
kernel_arg.use_sw = use_sw;
|
||||
kernel_arg.num_tasks = std::min<uint32_t>(num_tasks, dst_height);
|
||||
kernel_arg.format = format;
|
||||
kernel_arg.filter = filter;
|
||||
kernel_arg.wrap = wrap;
|
||||
kernel_arg.use_sw = use_sw;
|
||||
kernel_arg.lod = 0x0;
|
||||
kernel_arg.wrapu = wrap;
|
||||
kernel_arg.wrapv = wrap;
|
||||
|
||||
kernel_arg.src_logWidth = (uint32_t)std::log2(src_width);
|
||||
kernel_arg.src_logHeight = (uint32_t)std::log2(src_height);
|
||||
kernel_arg.src_stride = src_bpp;
|
||||
kernel_arg.src_pitch = src_bpp * src_width;
|
||||
kernel_arg.src_ptr = src_addr;
|
||||
kernel_arg.src_logwidth = src_logwidth;
|
||||
kernel_arg.src_logheight = src_logheight;
|
||||
kernel_arg.src_addr = src_addr;
|
||||
kernel_arg.lod = lod;
|
||||
|
||||
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
|
||||
assert(i < TEX_LOD_MAX);
|
||||
kernel_arg.mip_offs[i] = mip_offsets.at(i);
|
||||
}
|
||||
|
||||
kernel_arg.dst_width = dst_width;
|
||||
kernel_arg.dst_height = dst_height;
|
||||
kernel_arg.dst_stride = dst_bpp;
|
||||
kernel_arg.dst_pitch = dst_bpp * dst_width;
|
||||
kernel_arg.dst_ptr = dst_addr;
|
||||
kernel_arg.dst_addr = dst_addr;
|
||||
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
@@ -225,21 +248,21 @@ int main(int argc, char *argv[]) {
|
||||
// upload source buffer
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int8_t*)vx_host_ptr(buffer);
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < src_bufsize; ++i) {
|
||||
buf_ptr[i] = src_pixels[i];
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, src_bufsize, 0));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_addr, src_bufsize, 0));
|
||||
}
|
||||
|
||||
// clear destination buffer
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
auto buf_ptr = (uint32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < (dst_bufsize/4); ++i) {
|
||||
buf_ptr[i] = 0xdeadbeef;
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, dst_bufsize, 0));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_addr, dst_bufsize, 0));
|
||||
}
|
||||
|
||||
// run tests
|
||||
|
||||
@@ -1,167 +1,122 @@
|
||||
#ifndef _TEXSW_H_
|
||||
#pragma once
|
||||
|
||||
#include <vx_intrinsics.h>
|
||||
#include <texturing.h>
|
||||
#include "common.h"
|
||||
|
||||
#define TEX_LOD_MAX 11
|
||||
|
||||
#define MIN(x, y) ((x < y) ? (x) : (y))
|
||||
|
||||
#define MAX(x, y) ((x > y) ? (x) : (y))
|
||||
|
||||
inline int address(int wrap, int value) {
|
||||
switch (wrap) {
|
||||
case 1: return value & 0xfffff;
|
||||
default:
|
||||
case 0: return MIN(MAX(value, 0), 0xfffff);
|
||||
inline uint32_t texel_read(uint8_t* address, uint32_t stride) {
|
||||
switch (stride) {
|
||||
case 1: return *(uint8_t*)address;
|
||||
case 2: return *(uint16_t*)address;
|
||||
case 4: return *(uint32_t*)address;
|
||||
default:
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline void unpack(int format, int value, int* l, int* h) {
|
||||
switch (format) {
|
||||
case 1:
|
||||
case 2:
|
||||
*l = value;
|
||||
*h = 0;
|
||||
break;
|
||||
case 3:
|
||||
*l = (value | (value << 8)) & 0x00ff00ff;
|
||||
*h = 0;
|
||||
break;
|
||||
case 4:
|
||||
*l = (value | (value << 16)) & 0x07e0f81f;
|
||||
*h = 0;
|
||||
break;
|
||||
case 5:
|
||||
*l = (value | (value << 12)) & 0x0f0f0f0f;
|
||||
*h = 0;
|
||||
break;
|
||||
default:
|
||||
case 0:
|
||||
*l = value & 0x00ff00ff;
|
||||
*h = (value >> 8) & 0x00ff00ff;
|
||||
break;
|
||||
}
|
||||
}
|
||||
inline uint32_t vx_tex_sw(kernel_arg_t* state,
|
||||
Fixed<TEX_FXD_FRAC> xu,
|
||||
Fixed<TEX_FXD_FRAC> xv,
|
||||
uint32_t lod) {
|
||||
uint8_t* base_addr = ((uint8_t*)state->src_addr) + state->mip_offs[lod];
|
||||
uint32_t log_width = std::max<int32_t>(state->src_logwidth - lod, 0);
|
||||
uint32_t log_height = std::max<int32_t>(state->src_logheight - lod, 0);
|
||||
auto format = (TexFormat)state->format;
|
||||
auto wrapu = (WrapMode)state->wrapu;
|
||||
auto wrapv = (WrapMode)state->wrapv;
|
||||
auto filter = state->filter;
|
||||
auto stride = Stride(format);
|
||||
|
||||
inline void lerp(int al, int ah, int bl, int bh, int frac, int* l, int* h) {
|
||||
*l = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
|
||||
*h = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
|
||||
}
|
||||
|
||||
inline int pack(int format, int l, int h) {
|
||||
switch (format) {
|
||||
case 1:
|
||||
case 2:
|
||||
return l;
|
||||
case 3:
|
||||
return (l | (l >> 8)) & 0xffff;
|
||||
case 4:
|
||||
return (l | (l >> 16)) & 0xffff;
|
||||
case 5:
|
||||
return (l | (l >> 12)) & 0xffff;
|
||||
default:
|
||||
case 0:
|
||||
return (h << 8) | l;
|
||||
}
|
||||
}
|
||||
|
||||
inline int tex_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
|
||||
int base_addr = state->src_ptr;
|
||||
int mip_offset = 0;
|
||||
int log_width = state->src_logWidth;
|
||||
int log_height = state->src_logHeight;
|
||||
int format = state->format;
|
||||
int wrap = state->wrap;
|
||||
int filter = state->filter;
|
||||
|
||||
int32_t* pBits = ((uint32_t*)base_addr) + mip_offset;
|
||||
uint32_t color;
|
||||
|
||||
if (filter) {
|
||||
int u0 = address(wrap, u - (0x80000 >> log_width));
|
||||
int v0 = address(wrap, v - (0x80000 >> log_height));
|
||||
int u1 = address(wrap, u + (0x80000 >> log_width));
|
||||
int v1 = address(wrap, v + (0x80000 >> log_height));
|
||||
// addressing
|
||||
uint32_t offset00, offset01, offset10, offset11;
|
||||
uint32_t alpha, beta;
|
||||
TexAddressLinear(xu, xv, log_width, log_height, wrapu, wrapv,
|
||||
&offset00, &offset01, &offset10, &offset11, &alpha, &beta);
|
||||
|
||||
int x0 = u0 >> (20 - log_width);
|
||||
int y0 = v0 >> (20 - log_height);
|
||||
int x1 = u1 >> (20 - log_width);
|
||||
int y1 = v1 >> (20 - log_height);
|
||||
uint8_t* addr00 = base_addr + offset00 * stride;
|
||||
uint8_t* addr01 = base_addr + offset01 * stride;
|
||||
uint8_t* addr10 = base_addr + offset10 * stride;
|
||||
uint8_t* addr11 = base_addr + offset11 * stride;
|
||||
|
||||
// memory lookup
|
||||
|
||||
int c0 = pBits[x0 + (y0 << log_width)];
|
||||
int c1 = pBits[x1 + (y0 << log_width)];
|
||||
int c2 = pBits[x0 + (y1 << log_width)];
|
||||
int c3 = pBits[x1 + (y1 << log_width)];
|
||||
uint32_t texel00 = texel_read(addr00, stride);
|
||||
uint32_t texel01 = texel_read(addr01, stride);
|
||||
uint32_t texel10 = texel_read(addr10, stride);
|
||||
uint32_t texel11 = texel_read(addr11, stride);
|
||||
|
||||
// filtering
|
||||
|
||||
int alpha = x0 & 0xff;
|
||||
int beta = y0 & 0xff;
|
||||
|
||||
int c0a, c0b;
|
||||
int c1a, c1b;
|
||||
int c01a, c01b;
|
||||
|
||||
unpack(format, c0, &c0a, &c0b);
|
||||
unpack(format, c1, &c1a, &c1b);
|
||||
lerp(c0a, c0b, c1a, c1b, alpha, &c01a, &c01b);
|
||||
|
||||
int c2a, c2b;
|
||||
int c3a, c3b;
|
||||
int c23a, c23b;
|
||||
|
||||
unpack(format, c2, &c2a, &c2b);
|
||||
unpack(format, c3, &c3a, &c3b);
|
||||
lerp(c2a, c2b, c3a, c3b, alpha, &c23a, &c23b);
|
||||
|
||||
int c4a, c4b;
|
||||
lerp(c01a, c01b, c23a, c23b, beta, &c4a, &c4b);
|
||||
return pack(format, c4a, c4b);
|
||||
color = TexFilterLinear(
|
||||
format, texel00, texel01, texel10, texel11, alpha, beta);
|
||||
} else {
|
||||
int u0 = address(wrap, u);
|
||||
int v0 = address(wrap, v);
|
||||
// addressing
|
||||
uint32_t offset;
|
||||
TexAddressPoint(xu, xv, log_width, log_height, wrapu, wrapv, &offset);
|
||||
|
||||
uint8_t* addr = base_addr + offset * stride;
|
||||
|
||||
// memory lookup
|
||||
uint32_t texel = texel_read(addr, stride);
|
||||
|
||||
int x0 = u0 >> (20 - log_width);
|
||||
int y0 = v0 >> (20 - log_height);
|
||||
|
||||
int c0 = pBits[x0 + (y0 <<log_width)];
|
||||
|
||||
int c0a, c0b;
|
||||
unpack(format, c0, &c0a, &c0b);
|
||||
return pack(format, c0a, c0b);
|
||||
// filtering
|
||||
color = TexFilterPoint(format, texel);
|
||||
}
|
||||
return color;
|
||||
}
|
||||
|
||||
inline int vx_tex3(int stage, int u, int v, int lod) {
|
||||
int lodn = MIN(lod + 0x100000, TEX_LOD_MAX);
|
||||
int a = vx_tex(0, u, v, lod);
|
||||
int b = vx_tex(0, u, v, lodn);
|
||||
int al = a & 0x00ff00ff;
|
||||
int ah = (a >> 8) & 0x00ff00ff;
|
||||
int bl = b & 0x00ff00ff;
|
||||
int bh = (b >> 8) & 0x00ff00ff;
|
||||
int frac = (lod >> 12) & 0xff;
|
||||
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
|
||||
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
|
||||
int c = al | (ah << 8);
|
||||
return c;
|
||||
inline uint32_t tex_load_hw(kernel_arg_t* state,
|
||||
Fixed<TEX_FXD_FRAC> xu,
|
||||
Fixed<TEX_FXD_FRAC> xv,
|
||||
Fixed<16> xlod) {
|
||||
uint32_t color;
|
||||
int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
|
||||
uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
|
||||
if (state->filter == 2) {
|
||||
uint32_t lod_n = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
|
||||
uint32_t frac = ilod >> (lod + 16 - 8);
|
||||
uint32_t texel0 = vx_tex(0, xu.data(), xv.data(), lod);
|
||||
uint32_t texel1 = vx_tex(0, xu.data(), xv.data(), lod_n);
|
||||
uint32_t cl, ch;
|
||||
{
|
||||
uint32_t c0l, c0h;
|
||||
uint32_t c1l, c1h;
|
||||
Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
|
||||
Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
|
||||
Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
|
||||
}
|
||||
color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
|
||||
} else {
|
||||
color = vx_tex(0, xu.data(), xv.data(), lod);
|
||||
}
|
||||
return color;
|
||||
}
|
||||
|
||||
inline int tex3_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
|
||||
int lodn = MIN(lod + 0x10000, TEX_LOD_MAX);
|
||||
int a = tex_sw(state, 0, u, v, lod);
|
||||
int b = tex_sw(state, 0, u, v, lodn);
|
||||
int al = a & 0x00ff00ff;
|
||||
int ah = (a >> 8) & 0x00ff00ff;
|
||||
|
||||
int bl = b & 0x00ff00ff;
|
||||
int bh = (b >> 8) & 0x00ff00ff;
|
||||
int frac = (lod >> 12) & 0xff;
|
||||
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
|
||||
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
|
||||
int c = al | (ah << 8);
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
||||
inline uint32_t tex_load_sw(kernel_arg_t* state,
|
||||
Fixed<TEX_FXD_FRAC> xu,
|
||||
Fixed<TEX_FXD_FRAC> xv,
|
||||
Fixed<16> xlod) {
|
||||
uint32_t color;
|
||||
int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
|
||||
uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
|
||||
if (state->filter == 2) {
|
||||
uint32_t lod_n = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
|
||||
uint32_t frac = ilod >> (lod + 16 - 8);
|
||||
uint32_t texel0 = vx_tex_sw(state, xu, xv, lod);
|
||||
uint32_t texel1 = vx_tex_sw(state, xu, xv, lod_n);
|
||||
uint32_t cl, ch;
|
||||
{
|
||||
uint32_t c0l, c0h;
|
||||
uint32_t c1l, c1h;
|
||||
Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
|
||||
Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
|
||||
Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
|
||||
}
|
||||
color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
|
||||
} else {
|
||||
color = vx_tex_sw(state, xu, xv, lod);
|
||||
}
|
||||
return color;
|
||||
}
|
||||
@@ -191,4 +191,112 @@ int ConvertImage(std::vector<uint8_t>& dst_pixels,
|
||||
SurfaceDesc dstDesc{dst_format, dst_pixels.data(), width, height, dst_pitch};
|
||||
|
||||
return CopyBuffers(dstDesc, 0, 0, width, height, srcDesc, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int GenerateMipmaps(std::vector<uint8_t>& dst_pixels,
|
||||
std::vector<uint32_t>& mip_offsets,
|
||||
const std::vector<uint8_t>& src_pixels,
|
||||
ePixelFormat format,
|
||||
uint32_t src_width,
|
||||
uint32_t src_height) {
|
||||
std::vector<uint8_t> src_staging, dst_staging;
|
||||
const std::vector<uint8_t> *pSrcPixels;
|
||||
std::vector<uint8_t> *pDstPixels;
|
||||
|
||||
// convert source image if needed
|
||||
bool need_conversion = (format != FORMAT_A8R8G8B8);
|
||||
if (need_conversion) {
|
||||
ConvertImage(src_staging, src_pixels, src_width, src_height, format, FORMAT_A8R8G8B8);
|
||||
pSrcPixels = &src_staging;
|
||||
pDstPixels = &dst_staging;
|
||||
} else {
|
||||
pSrcPixels = &src_pixels;
|
||||
pDstPixels = &dst_pixels;
|
||||
}
|
||||
|
||||
uint32_t src_logwidth = log2ceil(src_width);
|
||||
uint32_t src_logheight = log2ceil(src_height);
|
||||
uint32_t max_lod = std::max(src_logwidth, src_logheight) + 1;
|
||||
|
||||
mip_offsets.resize(max_lod);
|
||||
|
||||
// Calculate mipmaps buffer size
|
||||
uint32_t dst_height = 1;
|
||||
uint32_t dst_width = 0;
|
||||
for (uint32_t lod = 0, w = src_width, h = src_height; lod < max_lod; ++lod) {
|
||||
assert((w > 0) || (w > 0));
|
||||
uint32_t pw = std::max<int>(w, 1);
|
||||
uint32_t ph = std::max<int>(h, 1);
|
||||
mip_offsets.at(lod) = dst_width;
|
||||
dst_width += pw * ph;
|
||||
w >>= 1;
|
||||
h >>= 1;
|
||||
}
|
||||
|
||||
// allocate mipmap
|
||||
pDstPixels->resize(dst_width * 4);
|
||||
|
||||
// generate mipmaps
|
||||
{
|
||||
auto pSrc = reinterpret_cast<const uint32_t*>(pSrcPixels->data());
|
||||
auto pDst = reinterpret_cast<uint32_t*>(pDstPixels->data());
|
||||
|
||||
// copy level 0
|
||||
memcpy(pDst, pSrc, pSrcPixels->size());
|
||||
assert(pSrcPixels->size() == 4 * src_width * src_height);
|
||||
pSrc = pDst;
|
||||
pDst += src_width * src_height;
|
||||
|
||||
// copy lower levels
|
||||
for (uint32_t lod = 1, w = (src_width/2), h = (src_height/2); lod < max_lod;) {
|
||||
assert((w > 0) || (w > 0));
|
||||
uint32_t pw = std::max<int>(w, 1);
|
||||
uint32_t ph = std::max<int>(h, 1);
|
||||
for (uint32_t y = 0; y < pw; ++y) {
|
||||
auto v0 = 2 * y;
|
||||
auto v1 = 2 * y + ((ph > 1) ? 1 : 0);
|
||||
auto pSrc0 = pSrc + v0 * (2 * pw);
|
||||
auto pSrc1 = pSrc + v1 * (2 * pw);
|
||||
|
||||
for (uint32_t x = 0; x <pw; ++x) {
|
||||
auto u0 = 2 * x;
|
||||
auto u1 = 2 * x + ((pw > 1) ? 1 : 0);
|
||||
|
||||
auto c00 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc0 + u0);
|
||||
auto c01 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc0 + u1);
|
||||
auto c10 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc1 + u0);
|
||||
auto c11 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc1 + u1);
|
||||
|
||||
const ColorARGB color((c00.a + c01.a + c10.a + c11.a+2) >> 2,
|
||||
(c00.r + c01.r + c10.r + c11.r+2) >> 2,
|
||||
(c00.g + c01.g + c10.g + c11.g+2) >> 2,
|
||||
(c00.b + c01.b + c10.b + c11.b+2) >> 2);
|
||||
|
||||
uint32_t ncolor;
|
||||
Format::ConvertTo<FORMAT_A8R8G8B8>(&ncolor, color);
|
||||
pDst[x + y * pw] = ncolor;
|
||||
}
|
||||
}
|
||||
++lod;
|
||||
pSrc = pDst;
|
||||
pDst += pw * ph;
|
||||
w >>= 1;
|
||||
h >>= 1;
|
||||
}
|
||||
assert((pDst - reinterpret_cast<uint32_t*>(pDstPixels->data())) == dst_width);
|
||||
}
|
||||
|
||||
// convert destination image if needed
|
||||
if (need_conversion) {
|
||||
ConvertImage(dst_staging, dst_staging, dst_width, dst_height, FORMAT_A8R8G8B8, format);
|
||||
}
|
||||
|
||||
uint32_t bpp = Format::GetInfo(format).BytePerPixel;
|
||||
for (auto& offset : mip_offsets) {
|
||||
offset *= bpp;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,14 +1,9 @@
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <bitmanip.h>
|
||||
#include "surfacedesc.h"
|
||||
|
||||
#define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
|
||||
|
||||
inline uint32_t ilog2 (uint32_t value) {
|
||||
return (uint32_t)(sizeof(uint32_t) * 8UL) - (uint32_t)__builtin_clzl((value << 1) - 1UL) - 1;
|
||||
}
|
||||
|
||||
int LoadImage(const char *filename,
|
||||
ePixelFormat format,
|
||||
std::vector<uint8_t> &pixels,
|
||||
@@ -37,7 +32,14 @@ int ConvertImage(std::vector<uint8_t>& dst_pixels,
|
||||
ePixelFormat src_format,
|
||||
ePixelFormat dst_format);
|
||||
|
||||
int GenerateMipmaps(std::vector<uint8_t>& dst_pixels,
|
||||
std::vector<uint32_t>& mip_offsets,
|
||||
const std::vector<uint8_t>& src_pixels,
|
||||
ePixelFormat format,
|
||||
uint32_t src_width,
|
||||
uint32_t src_height);
|
||||
|
||||
void dump_image(const std::vector<uint8_t>& pixels,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t bpp);
|
||||
uint32_t bpp);
|
||||
Reference in New Issue
Block a user