This commit is contained in:
Santosh Raghav Srivatsan
2021-11-08 16:35:32 -05:00
147 changed files with 6615 additions and 547 deletions

View File

@@ -10,6 +10,7 @@ all:
$(MAKE) -C fence
$(MAKE) -C no_mf_ext
$(MAKE) -C no_smem
$(MAKE) -C prefetch
run-simx:
$(MAKE) -C basic run-simx
@@ -19,10 +20,11 @@ run-simx:
$(MAKE) -C io_addr run-simx
$(MAKE) -C printf run-simx
$(MAKE) -C diverge run-simx
#$(MAKE) -C sort run-simx
$(MAKE) -C sort run-simx
$(MAKE) -C fence run-simx
$(MAKE) -C no_mf_ext run-simx
$(MAKE) -C no_smem run-simx
$(MAKE) -C prefetch run-simx
run-rtlsim:
$(MAKE) -C basic run-rtlsim
@@ -32,10 +34,11 @@ run-rtlsim:
$(MAKE) -C io_addr run-rtlsim
$(MAKE) -C printf run-rtlsim
$(MAKE) -C diverge run-rtlsim
#$(MAKE) -C sort run-rtlsim
$(MAKE) -C sort run-rtlsim
$(MAKE) -C fence run-rtlsim
$(MAKE) -C no_mf_ext run-rtlsim
$(MAKE) -C no_smem run-rtlsim
$(MAKE) -C prefetch run-rtlsim
run-vlsim:
$(MAKE) -C basic run-vlsim
@@ -45,10 +48,11 @@ run-vlsim:
$(MAKE) -C io_addr run-vlsim
$(MAKE) -C printf run-vlsim
$(MAKE) -C diverge run-vlsim
#$(MAKE) -C sort run-vlsim
$(MAKE) -C sort run-vlsim
$(MAKE) -C fence run-vlsim
$(MAKE) -C no_mf_ext run-vlsim
$(MAKE) -C no_smem run-vlsim
$(MAKE) -C prefetch run-vlsim
clean:
$(MAKE) -C basic clean
@@ -62,6 +66,7 @@ clean:
$(MAKE) -C fence clean
$(MAKE) -C no_mf_ext clean
$(MAKE) -C no_smem clean
$(MAKE) -C prefetch clean
clean-all:
$(MAKE) -C basic clean-all
@@ -75,4 +80,4 @@ clean-all:
$(MAKE) -C fence clean-all
$(MAKE) -C no_mf_ext clean-all
$(MAKE) -C no_smem clean-all
$(MAKE) -C prefetch clean-all

View File

@@ -3,7 +3,7 @@
#include "common.h"
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
uint32_t count = arg->count;
int32_t* src_ptr = (int32_t*)arg->src_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;

View File

@@ -3,7 +3,7 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, const kernel_arg_t* arg) {
void kernel_body(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
@@ -17,6 +17,6 @@ void kernel_body(int task_id, const kernel_arg_t* arg) {
}
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, arg);
}

View File

@@ -5,7 +5,7 @@
// Parallel Selection sort
void kernel_body(int task_id, const kernel_arg_t* arg) {
void kernel_body(int task_id, kernel_arg_t* arg) {
int32_t* src_ptr = (int32_t*)arg->src_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
@@ -44,6 +44,6 @@ void kernel_body(int task_id, const kernel_arg_t* arg) {
}
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, kernel_body, arg);
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg);
}

View File

@@ -4,14 +4,14 @@
#include <vx_spawn.h>
#include "common.h"
typedef void (*PFN_Kernel)(int task_id, const kernel_arg_t* arg);
typedef void (*PFN_Kernel)(int task_id, kernel_arg_t* arg);
inline float __ieee754_sqrtf (float x) {
asm ("fsqrt.s %0, %1" : "=f" (x) : "f" (x));
return x;
}
void kernel_iadd(int task_id, const kernel_arg_t* arg) {
void kernel_iadd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
@@ -26,7 +26,7 @@ void kernel_iadd(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_imul(int task_id, const kernel_arg_t* arg) {
void kernel_imul(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
@@ -41,7 +41,7 @@ void kernel_imul(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_idiv(int task_id, const kernel_arg_t* arg) {
void kernel_idiv(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
@@ -56,7 +56,7 @@ void kernel_idiv(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_idiv_mul(int task_id, const kernel_arg_t* arg) {
void kernel_idiv_mul(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
@@ -73,7 +73,7 @@ void kernel_idiv_mul(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fadd(int task_id, const kernel_arg_t* arg) {
void kernel_fadd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -88,7 +88,7 @@ void kernel_fadd(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fsub(int task_id, const kernel_arg_t* arg) {
void kernel_fsub(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -103,7 +103,7 @@ void kernel_fsub(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fmul(int task_id, const kernel_arg_t* arg) {
void kernel_fmul(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -118,7 +118,7 @@ void kernel_fmul(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fmadd(int task_id, const kernel_arg_t* arg) {
void kernel_fmadd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -133,7 +133,7 @@ void kernel_fmadd(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fmsub(int task_id, const kernel_arg_t* arg) {
void kernel_fmsub(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -148,7 +148,7 @@ void kernel_fmsub(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fnmadd(int task_id, const kernel_arg_t* arg) {
void kernel_fnmadd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -163,7 +163,7 @@ void kernel_fnmadd(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fnmsub(int task_id, const kernel_arg_t* arg) {
void kernel_fnmsub(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -178,7 +178,7 @@ void kernel_fnmsub(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fnmadd_madd(int task_id, const kernel_arg_t* arg) {
void kernel_fnmadd_madd(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -195,7 +195,7 @@ void kernel_fnmadd_madd(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fdiv(int task_id, const kernel_arg_t* arg) {
void kernel_fdiv(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -210,7 +210,7 @@ void kernel_fdiv(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fdiv2(int task_id, const kernel_arg_t* arg) {
void kernel_fdiv2(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -227,7 +227,7 @@ void kernel_fdiv2(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_fsqrt(int task_id, const kernel_arg_t* arg) {
void kernel_fsqrt(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -242,7 +242,7 @@ void kernel_fsqrt(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_ftoi(int task_id, const kernel_arg_t* arg) {
void kernel_ftoi(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -258,7 +258,7 @@ void kernel_ftoi(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_ftou(int task_id, const kernel_arg_t* arg) {
void kernel_ftou(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
@@ -274,7 +274,7 @@ void kernel_ftou(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_itof(int task_id, const kernel_arg_t* arg) {
void kernel_itof(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
@@ -290,7 +290,7 @@ void kernel_itof(int task_id, const kernel_arg_t* arg) {
}
}
void kernel_utof(int task_id, const kernel_arg_t* arg) {
void kernel_utof(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
@@ -329,6 +329,6 @@ static const PFN_Kernel sc_tests[] = {
};
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, sc_tests[arg->testid], arg);
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)sc_tests[arg->testid], arg);
}

View File

@@ -3,7 +3,7 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, const kernel_arg_t* arg) {
void kernel_body(int task_id, kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
@@ -19,6 +19,6 @@ void kernel_body(int task_id, const kernel_arg_t* arg) {
}
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, arg);
}

View File

@@ -3,7 +3,7 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, const kernel_arg_t* arg) {
void kernel_body(int task_id, kernel_arg_t* arg) {
uint32_t* src_ptr = (uint32_t*)arg->src_ptr;
uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr;
@@ -13,6 +13,6 @@ void kernel_body(int task_id, const kernel_arg_t* arg) {
}
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, kernel_body, arg);
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg);
}

View File

@@ -3,7 +3,7 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, const kernel_arg_t* arg) {
void kernel_body(int task_id, kernel_arg_t* arg) {
uint32_t stride = arg->stride;
uint32_t* addr_ptr = (uint32_t*)arg->addr_ptr;
float* src_ptr = (float*)arg->src_ptr;
@@ -23,6 +23,6 @@ void kernel_body(int task_id, const kernel_arg_t* arg) {
}
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, arg);
}

View File

@@ -4,7 +4,7 @@
#include "common.h"
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
uint32_t size = arg->size;
int32_t* src_ptr = (int32_t*)arg->src_ptr;

View File

@@ -4,7 +4,7 @@
#include "common.h"
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
uint32_t size = arg->size;
int32_t* src_ptr = (int32_t*)arg->src_ptr;

View File

@@ -4,12 +4,12 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, const kernel_arg_t* arg) {
void kernel_body(int task_id, kernel_arg_t* arg) {
int* src_ptr = (int*)arg->src_ptr;
vx_printf("task=%d, value=%d\n", task_id, src_ptr[task_id]);
}
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, kernel_body, arg);
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg);
}

View File

@@ -1,18 +1,26 @@
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
VORTEX_HW_PATH ?= $(realpath ../../../hw)
LLVM_PREFIX ?= /opt/llvm-riscv
SYSROOT=${RISCV_TOOLCHAIN_PATH}/riscv32-unknown-elf
OPTS ?= -n16
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CC = ${LLVM_PREFIX}/bin/clang
VX_CXX = ${LLVM_PREFIX}/bin/clang++
VX_DP = ${LLVM_PREFIX}/bin/llvm-objdump
VX_CP = ${LLVM_PREFIX}/bin/llvm-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -Os -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_CFLAGS += -O3 -march=rv32imf -mabi=ilp32f -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -Xclang -target-feature -Xclang +vortex
VX_CFLAGS += --sysroot=${SYSROOT} --gcc-toolchain=${RISCV_TOOLCHAIN_PATH}
VX_CFLAGS += -I${VORTEX_HW_PATH} -I${VORTEX_RT_PATH}/include
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_LDFLAGS += -Wl,-Bstatic,-T${VORTEX_RT_PATH}/linker/vx_link.ld,--gc-sections ${VORTEX_RT_PATH}/libvortexrt.a
VX_DPFLAGS = -arch=riscv32 -mcpu=generic-rv32 -mattr=+m,+f -mattr=+vortex
VX_SRCS = kernel.c
@@ -30,7 +38,7 @@ SRCS = main.cpp
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DP) -D kernel.elf > kernel.dump
$(VX_DP) $(VX_DPFLAGS) -D kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CP) -O binary kernel.elf kernel.bin

View File

@@ -1,26 +1,9 @@
#include <stdint.h>
#include <vx_intrinsics.h>
#include <vx_spawn.h>
#include <vx_print.h>
#include "common.h"
// Parallel Selection sort
int __attribute__((noinline)) __smaller(int index, int tid, int32_t cur_value, int32_t ref_value) {
int ret = 0;
__if (cur_value < ref_value) {
ret = 1;
} __else {
__if (cur_value == ref_value) {
__if (index < tid) {
ret = 1;
} __endif
} __endif
} __endif
return ret;
}
void kernel_body(int task_id, const kernel_arg_t* arg) {
void kernel_body(int __DIVERGENT__ task_id, kernel_arg_t* arg) {
uint32_t num_points = arg->num_points;
int32_t* src_ptr = (int32_t*)arg->src_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
@@ -30,13 +13,12 @@ void kernel_body(int task_id, const kernel_arg_t* arg) {
uint32_t pos = 0;
for (uint32_t i = 0; i < num_points; ++i) {
int32_t cur_value = src_ptr[i];
pos += __smaller(i, task_id, cur_value, ref_value);
pos += (cur_value < ref_value) || ((cur_value == ref_value) && (i < task_id));
}
dst_ptr[pos] = ref_value;
vx_printf("taskid=%d, pos=%d, value=%d\n", task_id, pos, ref_value);
}
void main() {
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, kernel_body, arg);
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg);
}

View File

@@ -0,0 +1,72 @@
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
OPTS ?= -g1
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_SRCS = kernel.c
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -Wfatal-errors
CXXFLAGS += -DLUPNG_USE_ZLIB
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex -lz
PROJECT = tex
SRCS = main.cpp utils.cpp tga.cpp lupng.c
all: $(PROJECT) kernel.bin kernel.dump
kernel.dump: kernel.elf
$(VX_DP) -D kernel.elf > kernel.dump
kernel.bin: kernel.elf
$(VX_CP) -O binary kernel.elf kernel.bin
kernel.elf: $(VX_SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-simx: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-fpga: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/fpga:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/asesim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
clean-all: clean
rm -rf *.elf *.bin *.dump
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -0,0 +1,268 @@
//
// Copyright (c) Blaise Tine. All rights reserved.
//
//
// Use of this sample source code is subject to the terms of the Microsoft
// license agreement under which you licensed this sample source code. If
// you did not accept the terms of the license agreement, you are not
// authorized to use this sample source code. For the terms of the license,
// please see the license agreement between you and Microsoft or, if applicable,
// see the LICENSE.RTF on your install media or the root of your tools
// installation.
// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
// INDEMNITIES.
//
#pragma once
#include "surfacedesc.h"
class BlitTable {
public:
typedef int (*PfnCopy)(const SurfaceDesc &dstDesc,
uint32_t dstOffsetX,
uint32_t dstOffsetY,
uint32_t copyWidth,
uint32_t copyHeight,
const SurfaceDesc &srcDesc,
uint32_t srcOffsetX,
uint32_t srcOffsetY);
BlitTable() {
for (uint32_t s = 0; s < FORMAT_COLOR_SIZE_; ++s) {
for (uint32_t d = 0; d < FORMAT_COLOR_SIZE_; ++d) {
copyFuncs_[s][d] = CopyInvalid;
}
}
for (uint32_t s = 0; s < FORMAT_COLOR_SIZE_; ++s) {
switch (s) {
case FORMAT_A8:
case FORMAT_L8:
copyFuncs_[s][s] = CopyFast<uint8_t>;
break;
case FORMAT_A8L8:
copyFuncs_[FORMAT_A8L8][FORMAT_A8] = Copy<FORMAT_A8L8, FORMAT_A8>;
copyFuncs_[FORMAT_A8L8][FORMAT_A8L8] = CopyFast<uint16_t>;
break;
case FORMAT_R5G6B5:
copyFuncs_[FORMAT_R5G6B5][FORMAT_L8] = Copy<FORMAT_R5G6B5, FORMAT_L8>;
copyFuncs_[FORMAT_R5G6B5][FORMAT_R5G6B5] = CopyFast<uint16_t>;
copyFuncs_[FORMAT_R5G6B5][FORMAT_R8G8B8] =
Copy<FORMAT_R5G6B5, FORMAT_R8G8B8>;
copyFuncs_[FORMAT_R5G6B5][FORMAT_B8G8R8] =
Copy<FORMAT_R5G6B5, FORMAT_B8G8R8>;
copyFuncs_[FORMAT_R5G6B5][FORMAT_A8B8G8R8] =
Copy<FORMAT_R5G6B5, FORMAT_A8B8G8R8>;
copyFuncs_[FORMAT_R5G6B5][FORMAT_A8R8G8B8] =
Copy<FORMAT_R5G6B5, FORMAT_A8R8G8B8>;
break;
case FORMAT_A1R5G5B5:
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8] =
Copy<FORMAT_A1R5G5B5, FORMAT_A8>;
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_L8] =
Copy<FORMAT_A1R5G5B5, FORMAT_L8>;
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8L8] =
Copy<FORMAT_A1R5G5B5, FORMAT_A8L8>;
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_R8G8B8] =
Copy<FORMAT_A1R5G5B5, FORMAT_R8G8B8>;
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8R8G8B8] =
Copy<FORMAT_A1R5G5B5, FORMAT_A8R8G8B8>;
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_R5G5B5A1] =
Copy<FORMAT_A1R5G5B5, FORMAT_R5G5B5A1>;
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_R4G4B4A4] =
Copy<FORMAT_A1R5G5B5, FORMAT_R4G4B4A4>;
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_B8G8R8] =
Copy<FORMAT_A1R5G5B5, FORMAT_B8G8R8>;
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8B8G8R8] =
Copy<FORMAT_A1R5G5B5, FORMAT_A8B8G8R8>;
break;
case FORMAT_A4R4G4B4:
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8] =
Copy<FORMAT_A4R4G4B4, FORMAT_A8>;
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_L8] =
Copy<FORMAT_A4R4G4B4, FORMAT_L8>;
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8L8] =
Copy<FORMAT_A4R4G4B4, FORMAT_A8L8>;
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_R8G8B8] =
Copy<FORMAT_A4R4G4B4, FORMAT_R8G8B8>;
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8R8G8B8] =
Copy<FORMAT_A4R4G4B4, FORMAT_A8R8G8B8>;
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_R5G5B5A1] =
Copy<FORMAT_A4R4G4B4, FORMAT_R5G5B5A1>;
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_R4G4B4A4] =
Copy<FORMAT_A4R4G4B4, FORMAT_R4G4B4A4>;
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_B8G8R8] =
Copy<FORMAT_A4R4G4B4, FORMAT_B8G8R8>;
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8B8G8R8] =
Copy<FORMAT_A4R4G4B4, FORMAT_A8B8G8R8>;
break;
case FORMAT_R8G8B8:
copyFuncs_[FORMAT_R8G8B8][FORMAT_L8] = Copy<FORMAT_R8G8B8, FORMAT_L8>;
copyFuncs_[FORMAT_R8G8B8][FORMAT_R5G6B5] =
Copy<FORMAT_R8G8B8, FORMAT_R5G6B5>;
copyFuncs_[FORMAT_R8G8B8][FORMAT_R8G8B8] = CopyFast<uint24_t>;
copyFuncs_[FORMAT_R8G8B8][FORMAT_B8G8R8] =
Copy<FORMAT_R8G8B8, FORMAT_B8G8R8>;
copyFuncs_[FORMAT_R8G8B8][FORMAT_A8B8G8R8] =
Copy<FORMAT_R8G8B8, FORMAT_A8B8G8R8>;
copyFuncs_[FORMAT_R8G8B8][FORMAT_A8R8G8B8] =
Copy<FORMAT_R8G8B8, FORMAT_A8R8G8B8>;
break;
case FORMAT_A8R8G8B8:
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8] =
Copy<FORMAT_A8R8G8B8, FORMAT_A8>;
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_L8] =
Copy<FORMAT_A8R8G8B8, FORMAT_L8>;
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8L8] =
Copy<FORMAT_A8R8G8B8, FORMAT_A8L8>;
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R5G6B5] =
Copy<FORMAT_A8R8G8B8, FORMAT_R5G6B5>;
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R8G8B8] =
Copy<FORMAT_A8R8G8B8, FORMAT_R8G8B8>;
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8R8G8B8] = CopyFast<uint32_t>;
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R5G5B5A1] =
Copy<FORMAT_A8R8G8B8, FORMAT_R5G5B5A1>;
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R4G4B4A4] =
Copy<FORMAT_A8R8G8B8, FORMAT_R4G4B4A4>;
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_B8G8R8] =
Copy<FORMAT_A8R8G8B8, FORMAT_B8G8R8>;
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8B8G8R8] =
Copy<FORMAT_A8R8G8B8, FORMAT_A8B8G8R8>;
break;
case FORMAT_R5G5B5A1:
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_A8] =
Copy<FORMAT_R5G5B5A1, FORMAT_A8>;
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_L8] =
Copy<FORMAT_R5G5B5A1, FORMAT_L8>;
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_A8L8] =
Copy<FORMAT_R5G5B5A1, FORMAT_A8L8>;
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_RGB] =
Copy<FORMAT_R5G5B5A1, FORMAT_RGB>;
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_ARGB] =
Copy<FORMAT_R5G5B5A1, FORMAT_ARGB>;
break;
case FORMAT_R4G4B4A4:
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_A8] =
Copy<FORMAT_R4G4B4A4, FORMAT_A8>;
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_L8] =
Copy<FORMAT_R4G4B4A4, FORMAT_L8>;
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_A8L8] =
Copy<FORMAT_R4G4B4A4, FORMAT_A8L8>;
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_RGB] =
Copy<FORMAT_R4G4B4A4, FORMAT_RGB>;
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_ARGB] =
Copy<FORMAT_R4G4B4A4, FORMAT_ARGB>;
break;
case FORMAT_B8G8R8:
copyFuncs_[FORMAT_B8G8R8][FORMAT_L8] = Copy<FORMAT_B8G8R8, FORMAT_L8>;
copyFuncs_[FORMAT_B8G8R8][FORMAT_RGB] = Copy<FORMAT_B8G8R8, FORMAT_RGB>;
break;
case FORMAT_A8B8G8R8:
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_A8] =
Copy<FORMAT_A8B8G8R8, FORMAT_A8>;
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_L8] =
Copy<FORMAT_A8B8G8R8, FORMAT_L8>;
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_A8L8] =
Copy<FORMAT_A8B8G8R8, FORMAT_A8L8>;
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_RGB] =
Copy<FORMAT_A8B8G8R8, FORMAT_RGB>;
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_ARGB] =
Copy<FORMAT_A8B8G8R8, FORMAT_ARGB>;
break;
}
}
}
PfnCopy get(uint32_t srcFormat, uint32_t dstFormat) const {
assert(srcFormat < FORMAT_COLOR_SIZE_);
assert(dstFormat < FORMAT_COLOR_SIZE_);
return copyFuncs_[srcFormat][dstFormat];
}
private:
template <ePixelFormat SrcFormat, ePixelFormat DstFormat>
static int Copy(const SurfaceDesc &dstDesc,
uint32_t dstOffsetX,
uint32_t dstOffsetY,
uint32_t copyWidth,
uint32_t copyHeight,
const SurfaceDesc &srcDesc,
uint32_t srcOffsetX,
uint32_t srcOffsetY) {
auto srcBPP = TFormatInfo<SrcFormat>::CBSIZE;
auto dstBPP = TFormatInfo<DstFormat>::CBSIZE;
auto srcNextLine = srcDesc.Pitch;
auto dstNextLine = dstDesc.Pitch;
auto pbSrc = srcDesc.pBits + srcOffsetX * srcBPP + srcOffsetY * srcDesc.Pitch;
auto pbDst = dstDesc.pBits + dstOffsetX * dstBPP + dstOffsetY * dstDesc.Pitch;
while (copyHeight--) {
auto pSrc = reinterpret_cast<const typename TFormatInfo<SrcFormat>::TYPE *>(pbSrc);
for (auto *pDst = reinterpret_cast<typename TFormatInfo<DstFormat>::TYPE *>(
pbDst),
*const pEnd = pDst + copyWidth;
pDst != pEnd; ++pDst, ++pSrc) {
auto tmp = Format::ConvertFrom<SrcFormat, true>(pSrc);
Format::ConvertTo<DstFormat>(pDst, tmp);
}
pbSrc += srcNextLine;
pbDst += dstNextLine;
}
return 0;
}
template <typename Type>
static int CopyFast(const SurfaceDesc &dstDesc,
uint32_t dstOffsetX,
uint32_t dstOffsetY,
uint32_t copyWidth,
uint32_t copyHeight,
const SurfaceDesc &srcDesc,
uint32_t srcOffsetX,
uint32_t srcOffsetY) {
auto nBPP = sizeof(Type);
auto srcNextLine = srcDesc.Pitch;
auto dstNextLine = dstDesc.Pitch;
auto pbSrc = srcDesc.pBits + srcOffsetX * nBPP + srcOffsetY * srcDesc.Pitch;
auto pbDst = dstDesc.pBits + dstOffsetX * nBPP + dstOffsetY * dstDesc.Pitch;
while (copyHeight--) {
auto pSrc = reinterpret_cast<const Type *>(pbSrc);
for (auto *pDst = reinterpret_cast<Type *>(pbDst), *const pEnd = pDst + copyWidth;
pDst != pEnd; ++pDst, ++pSrc) {
*pDst = *pSrc;
}
pbSrc += srcNextLine;
pbDst += dstNextLine;
}
return 0;
}
static int CopyInvalid(const SurfaceDesc & /*dstDesc*/,
uint32_t /*dstOffsetX*/,
uint32_t /*dstOffsetY*/,
uint32_t /*copyWidth*/,
uint32_t /*copyHeight*/,
const SurfaceDesc & /*srcDesc*/,
uint32_t /*srcOffsetX*/,
uint32_t /*srcOffsetY*/)
{
std::cout << "Error: invalid format" << std::endl;
return -1;
}
PfnCopy copyFuncs_[FORMAT_COLOR_SIZE_][FORMAT_COLOR_SIZE_];
};

View File

@@ -0,0 +1,68 @@
//
// Copyright (c) Blaise Tine. All rights reserved.
//
//
// Use of this sample source code is subject to the terms of the Microsoft
// license agreement under which you licensed this sample source code. If
// you did not accept the terms of the license agreement, you are not
// authorized to use this sample source code. For the terms of the license,
// please see the license agreement between you and Microsoft or, if applicable,
// see the LICENSE.RTF on your install media or the root of your tools
// installation.
// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
// INDEMNITIES.
//
#pragma once
#include <cstdint>
#include <assert.h>
struct ColorARGB {
union {
struct {
uint32_t value;
};
struct {
uint8_t b, g, r, a;
};
struct {
uint8_t m[4];
};
};
ColorARGB() {}
ColorARGB(int a, int r, int g, int b) {
assert((a >= 0) && (a <= 0xff));
assert((r >= 0) && (r <= 0xff));
assert((g >= 0) && (g <= 0xff));
assert((b >= 0) && (b <= 0xff));
this->b = static_cast<uint8_t>(b);
this->g = static_cast<uint8_t>(g);
this->r = static_cast<uint8_t>(r);
this->a = static_cast<uint8_t>(a);
}
ColorARGB(int r, int g, int b) {
assert((r >= 0) && (r <= 0xff));
assert((g >= 0) && (g <= 0xff));
assert((b >= 0) && (b <= 0xff));
this->b = static_cast<uint8_t>(b);
this->g = static_cast<uint8_t>(g);
this->r = static_cast<uint8_t>(r);
}
ColorARGB(int value) {
this->value = value;
}
void operator=(const ColorARGB &rhs) {
this->value = rhs.value;
}
operator uint32_t() const {
return this->value;
}
};

View File

@@ -0,0 +1,25 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
typedef struct {
uint32_t num_tasks;
uint8_t format;
uint8_t filter;
uint8_t wrap;
uint8_t use_sw;
uint32_t lod;
uint8_t src_logWidth;
uint8_t src_logHeight;
uint8_t src_stride;
uint8_t src_pitch;
uint32_t src_ptr;
uint32_t dst_width;
uint32_t dst_height;
uint8_t dst_stride;
uint32_t dst_pitch;
uint32_t dst_ptr;
} kernel_arg_t;
#endif

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 MiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,37 @@
//
// Copyright (c) Blaise Tine. All rights reserved.
//
//
// Use of this sample source code is subject to the terms of the Microsoft
// license agreement under which you licensed this sample source code. If
// you did not accept the terms of the license agreement, you are not
// authorized to use this sample source code. For the terms of the license,
// please see the license agreement between you and Microsoft or, if applicable,
// see the LICENSE.RTF on your install media or the root of your tools
// installation.
// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
// INDEMNITIES.
//
#pragma once
#include <cstdint>
struct uint24_t {
uint8_t m[3];
explicit uint24_t(uint32_t value) {
m[0] = (value >> 0) & 0xff;
m[1] = (value >> 8) & 0xff;
m[2] = (value >> 16) & 0xff;
}
explicit uint24_t(uint8_t x, uint8_t y, uint8_t z) {
m[0] = x;
m[1] = y;
m[2] = z;
}
operator uint32_t() const {
return (m[2] << 16) | (m[1] << 8) | m[0];
}
};

View File

@@ -0,0 +1,67 @@
#include <stdint.h>
#include <vx_intrinsics.h>
#include <vx_spawn.h>
#include "common.h"
#include "texsw.h"
#define ENABLE_SW
typedef struct {
kernel_arg_t* state;
uint32_t tile_width;
uint32_t tile_height;
float deltaX;
float deltaY;
} tile_arg_t;
void kernel_body(int task_id, tile_arg_t* arg) {
kernel_arg_t* state = arg->state;
uint32_t xoffset = 0;
uint32_t yoffset = task_id * arg->tile_height;
uint8_t* dst_ptr = (uint8_t*)(state->dst_ptr + xoffset * state->dst_stride + yoffset * state->dst_pitch);
float fv = yoffset * arg->deltaY;
for (uint32_t y = 0; y < arg->tile_height; ++y) {
uint32_t* dst_row = (uint32_t*)dst_ptr;
float fu = xoffset * arg->deltaX;
for (uint32_t x = 0; x < arg->tile_width; ++x) {
int32_t u = (int32_t)(fu * (1<<20));
int32_t v = (int32_t)(fv * (1<<20));
#ifdef ENABLE_SW
if (state->use_sw) {
dst_row[x] = (state->filter == 2) ? tex3_sw(state, 0, u, v, state->lod) : tex_sw(state, 0, u, v, state->lod);
} else {
#endif
dst_row[x] = (state->filter == 2) ? vx_tex3(0, u, v, state->lod) : vx_tex(0, u, v, state->lod);
#ifdef ENABLE_SW
}
#endif
fu += arg->deltaX;
}
dst_ptr += state->dst_pitch;
fv += arg->deltaY;
}
}
int main() {
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
// configure texture unit
vx_csr_write(CSR_TEX_ADDR(0), arg->src_ptr);
vx_csr_write(CSR_TEX_MIPOFF(0), 0);
vx_csr_write(CSR_TEX_WIDTH(0), arg->src_logWidth);
vx_csr_write(CSR_TEX_HEIGHT(0), arg->src_logHeight);
vx_csr_write(CSR_TEX_FORMAT(0), arg->format);
vx_csr_write(CSR_TEX_WRAP(0), (arg->wrap << 2) | arg->wrap);
vx_csr_write(CSR_TEX_FILTER(0), (arg->filter ? 1 : 0));
tile_arg_t targ;
targ.state = arg;
targ.tile_width = arg->dst_width;
targ.tile_height = (arg->dst_height + arg->num_tasks - 1) / arg->num_tasks;
targ.deltaX = 1.0f / arg->dst_width;
targ.deltaY = 1.0f / arg->dst_height;
vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, &targ);
}

1313
tests/regression/tex/lupng.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,186 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2014 Jan Solanti
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifdef __cplusplus
extern "C" {
#endif
#pragma once
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef __int8 int8_t;
typedef __int16 int16_t;
typedef __int32 int32_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
#else
#include <stdlib.h>
#include <stdint.h>
#endif
typedef struct {
int32_t width;
int32_t height;
uint8_t channels;
uint8_t depth; /* must be 8 or 16 */
size_t dataSize;
uint8_t *data;
} LuImage;
typedef size_t (*PngReadProc)(void *outPtr, size_t size, size_t count, void *userPtr);
typedef size_t (*PngWriteProc)(const void *inPtr, size_t size, size_t count, void *userPtr);
typedef void* (*PngAllocProc)(size_t size, void *userPtr);
typedef void (*PngFreeProc)(void *ptr, void *userPtr);
typedef void (*PngWarnProc)(void *userPtr, const char *fmt, ...);
typedef struct {
/* loader */
PngReadProc readProc;
void *readProcUserPtr;
int skipSig;
/* writer */
PngWriteProc writeProc;
void *writeProcUserPtr;
int compressionLevel;
/* memory allocation */
PngAllocProc allocProc;
void *allocProcUserPtr;
PngFreeProc freeProc;
void *freeProcUserPtr;
/* warnings/error output */
PngWarnProc warnProc; /* set to NULL to disable output altogether */
void *warnProcUserPtr;
/* special case: avoid allocating a LuImage when loading or creating
* an image, just use this one */
LuImage *overrideImage;
} LuUserContext;
/**
* Initializes a LuUserContext to use the defaul malloc implementation.
*
* @param userCtx the LuUserContext to initialize
*/
void luUserContextInitDefault(LuUserContext *userCtx);
/**
* Creates a new Image object with the specified attributes.
* The data store of the Image is allocated but its contents are undefined.
* Only 8 and 16 bits deep images with 1-4 channels are supported.
*
* @param buffer pointer to an existing buffer (which may already contain the
* image data), or NULL to internally allocate a new buffer
* @param userCtx the user context (with the memory allocator function
* pointers to use), or NULL to use the default allocator
* (malloc).
*/
LuImage *luImageCreate(size_t width, size_t height, uint8_t channels, uint8_t depth,
uint8_t *buffer, const LuUserContext *usrCtx);
/**
* Releases the memory associated with the given Image object.
*
* @param userCtx the user context (with the memory deallocator function
* pointers to use), or NULL to use the default deallocator
* (free). The deallocator should match the ones used for
* allocation.
*/
void luImageRelease(LuImage *img, const LuUserContext *usrCtx);
/**
* Extracts the raw image buffer form a LuImage and releases the
* then-orphaned LuImage object. This can be used if you want to use
* the image data in your own structures.
*
* @param userCtx the user context (with the memory deallocator function
* pointers to use), or NULL to use the default deallocator
* (free). The deallocator should match the ones used for
* allocation.
*/
uint8_t *luImageExtractBufAndRelease(LuImage *img, const LuUserContext *userCtx);
/**
* Decodes a PNG image from a file
*
* @param filename the file name (optionally with full path) to read from.
* @param userCtx the user context (with the memory allocator function
* pointers to use), or NULL to use the default allocator
* (malloc).
*/
LuImage *luPngReadFile(const char *filename, LuUserContext *userCtx);
/**
* Decodes a PNG image with the provided read function into a LuImage struct
*
* @param readProc a function pointer to a user-defined function to use for
* reading the PNG data.
* @param userPtr an opaque pointer provided as an argument to readProc
* @param skipSig don't verify PNG signature - the bytes have already been
* removed from the input stream
*/
LuImage *luPngRead(PngReadProc readProc, void *userPtr, int skipSig);
/**
* Decodes a PNG image with the provided user context into a LuImage struct
*
* @param userCtx the LuUserContext to use
*/
LuImage *luPngReadUC(const LuUserContext *userCtx);
/**
* Encodes a LuImage struct to PNG and writes it out to a file.
*
* @param filename the file name (optionally with full path) to write to.
* Existing files will be overwritten!
* @param img the LuImage to encode
*/
int luPngWriteFile(const char *filename, const LuImage *img);
/**
* Encodes a LuImage struct to PNG and writes it out using a user-defined write
* function.
*
* @param writeProc a function pointer to a user-defined function that will be
* used for writing the final PNG data.
* @param userPtr an opaque pointer provided as an argument to writeProc
* @param img the LuImage to encode
*/
int luPngWrite(PngWriteProc writeProc, void *userPtr, const LuImage *img);
/**
* Encodes a LuImage struct to PNG and writes it out with the provided user
* context.
*
* @param userCtx the LuUserContext to use
* @param img the LuImage to encode
*/
int luPngWriteUC(const LuUserContext *userCtx, const LuImage *img);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,256 @@
#include <iostream>
#include <vector>
#include <unistd.h>
#include <string.h>
#include <chrono>
#include <cmath>
#include <assert.h>
#include <vortex.h>
#include "common.h"
#include "utils.h"
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
///////////////////////////////////////////////////////////////////////////////
const char* kernel_file = "kernel.bin";
const char* input_file = "palette64.png";
const char* output_file = "output.png";
int wrap = 0;
int filter = 0;
float scale = 1.0f;
int format = 0;
bool use_sw = false;
ePixelFormat eformat = FORMAT_A8R8G8B8;
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
static void show_usage() {
std::cout << "Vortex Texture Test." << std::endl;
std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f format] [-g filter] [-z no_hw] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "zi:o:k:w:f:g:h?")) != -1) {
switch (c) {
case 'i':
input_file = optarg;
break;
case 'o':
output_file = optarg;
break;
case 's':
scale = std::stof(optarg, NULL);
break;
case 'w':
wrap = std::atoi(optarg);
break;
case 'z':
use_sw = true;
break;
case 'f': {
format = std::atoi(optarg);
switch (format) {
case 0: eformat = FORMAT_A8R8G8B8; break;
case 1: eformat = FORMAT_R5G6B5; break;
case 2: eformat = FORMAT_R4G4B4A4; break;
case 3: eformat = FORMAT_L8; break;
case 4: eformat = FORMAT_A8; break;
default:
std::cout << "Error: invalid format: " << format << std::endl;
exit(1);
}
} break;
case 'g':
filter = std::atoi(optarg);
break;
case 'k':
kernel_file = optarg;
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
void cleanup() {
if (buffer) {
vx_buf_release(buffer);
}
if (device) {
vx_dev_close(device);
}
}
int run_test(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t width,
uint32_t height) {
auto time_start = std::chrono::high_resolution_clock::now();
// start device
std::cout << "start device" << std::endl;
RT_CHECK(vx_start(device));
// wait for completion
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
auto time_end = std::chrono::high_resolution_clock::now();
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
printf("Elapsed time: %lg ms\n", elapsed);
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
std::vector<uint8_t> dst_pixels(buf_size);
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < buf_size; ++i) {
dst_pixels[i] = buf_ptr[i];
}
// save output image
std::cout << "save output image" << std::endl;
//dump_image(dst_pixels, width, height, bpp);
RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, dst_pixels, width, height));
return 0;
}
int main(int argc, char *argv[]) {
kernel_arg_t kernel_arg;
std::vector<uint8_t> src_pixels;
uint32_t src_width;
uint32_t src_height;
// parse command arguments
parse_args(argc, argv);
RT_CHECK(LoadImage(input_file, eformat, src_pixels, &src_width, &src_height));
// check power of two support
if (!ISPOW2(src_width) || !ISPOW2(src_height)) {
std::cout << "Error: only power of two textures supported: width=" << src_width << ", heigth=" << src_height << std::endl;
return -1;
}
uint32_t src_bpp = Format::GetInfo(eformat).BytePerPixel;
//dump_image(src_pixels, src_width, src_height, src_bpp);
uint32_t src_bufsize = src_bpp * src_width * src_height;
uint32_t dst_width = (uint32_t)(src_width * scale);
uint32_t dst_height = (uint32_t)(src_height * scale);
uint32_t dst_bpp = 4;
uint32_t dst_bufsize = dst_bpp * dst_width * dst_height;
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
unsigned max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
uint32_t num_tasks = max_cores * max_warps * max_threads;
std::cout << "number of tasks: " << std::dec << num_tasks << std::endl;
std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl;
std::cout << "destination buffer: width=" << dst_width << ", heigth=" << dst_height << ", size=" << dst_bufsize << " bytes" << std::endl;
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
size_t src_addr, dst_addr;
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
std::cout << "src_addr=0x" << std::hex << src_addr << std::endl;
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
// allocate staging shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t), std::max<uint32_t>(src_bufsize, dst_bufsize));
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
kernel_arg.num_tasks = std::min<uint32_t>(num_tasks, dst_height);
kernel_arg.format = format;
kernel_arg.filter = filter;
kernel_arg.wrap = wrap;
kernel_arg.use_sw = use_sw;
kernel_arg.lod = 0x0;
kernel_arg.src_logWidth = (uint32_t)std::log2(src_width);
kernel_arg.src_logHeight = (uint32_t)std::log2(src_height);
kernel_arg.src_stride = src_bpp;
kernel_arg.src_pitch = src_bpp * src_width;
kernel_arg.src_ptr = src_addr;
kernel_arg.dst_width = dst_width;
kernel_arg.dst_height = dst_height;
kernel_arg.dst_stride = dst_bpp;
kernel_arg.dst_pitch = dst_bpp * dst_width;
kernel_arg.dst_ptr = dst_addr;
auto buf_ptr = (int*)vx_host_ptr(buffer);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
// upload source buffer
std::cout << "upload source buffer" << std::endl;
{
auto buf_ptr = (int8_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < src_bufsize; ++i) {
buf_ptr[i] = src_pixels[i];
}
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, src_bufsize, 0));
}
// clear destination buffer
std::cout << "clear destination buffer" << std::endl;
{
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < (dst_bufsize/4); ++i) {
buf_ptr[i] = 0xdeadbeef;
}
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, dst_bufsize, 0));
}
// run tests
std::cout << "run tests" << std::endl;
RT_CHECK(run_test(kernel_arg, dst_bufsize, dst_width, dst_height));
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
std::cout << "PASSED!" << std::endl;
return 0;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 543 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 534 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

@@ -0,0 +1,25 @@
//
// Copyright (c) Blaise Tine. All rights reserved.
//
//
// Use of this sample source code is subject to the terms of the Microsoft
// license agreement under which you licensed this sample source code. If
// you did not accept the terms of the license agreement, you are not
// authorized to use this sample source code. For the terms of the license,
// please see the license agreement between you and Microsoft or, if applicable,
// see the LICENSE.RTF on your install media or the root of your tools
// installation.
// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
// INDEMNITIES.
//
#pragma once
#include "format.h"
struct SurfaceDesc {
ePixelFormat Format;
uint8_t *pBits;
uint32_t Width;
uint32_t Height;
uint32_t Pitch;
};

View File

@@ -0,0 +1,167 @@
#ifndef _TEXSW_H_
#include "common.h"
#define TEX_LOD_MAX 11
#define MIN(x, y) ((x < y) ? (x) : (y))
#define MAX(x, y) ((x > y) ? (x) : (y))
inline int address(int wrap, int value) {
switch (wrap) {
case 1: return value & 0xfffff;
default:
case 0: return MIN(MAX(value, 0), 0xfffff);
}
}
inline void unpack(int format, int value, int* l, int* h) {
switch (format) {
case 1:
case 2:
*l = value;
*h = 0;
break;
case 3:
*l = (value | (value << 8)) & 0x00ff00ff;
*h = 0;
break;
case 4:
*l = (value | (value << 16)) & 0x07e0f81f;
*h = 0;
break;
case 5:
*l = (value | (value << 12)) & 0x0f0f0f0f;
*h = 0;
break;
default:
case 0:
*l = value & 0x00ff00ff;
*h = (value >> 8) & 0x00ff00ff;
break;
}
}
inline void lerp(int al, int ah, int bl, int bh, int frac, int* l, int* h) {
*l = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
*h = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
}
inline int pack(int format, int l, int h) {
switch (format) {
case 1:
case 2:
return l;
case 3:
return (l | (l >> 8)) & 0xffff;
case 4:
return (l | (l >> 16)) & 0xffff;
case 5:
return (l | (l >> 12)) & 0xffff;
default:
case 0:
return (h << 8) | l;
}
}
inline int tex_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
int base_addr = state->src_ptr;
int mip_offset = 0;
int log_width = state->src_logWidth;
int log_height = state->src_logHeight;
int format = state->format;
int wrap = state->wrap;
int filter = state->filter;
int32_t* pBits = ((uint32_t*)base_addr) + mip_offset;
if (filter) {
int u0 = address(wrap, u - (0x80000 >> log_width));
int v0 = address(wrap, v - (0x80000 >> log_height));
int u1 = address(wrap, u + (0x80000 >> log_width));
int v1 = address(wrap, v + (0x80000 >> log_height));
int x0 = u0 >> (20 - log_width);
int y0 = v0 >> (20 - log_height);
int x1 = u1 >> (20 - log_width);
int y1 = v1 >> (20 - log_height);
// memory lookup
int c0 = pBits[x0 + (y0 << log_width)];
int c1 = pBits[x1 + (y0 << log_width)];
int c2 = pBits[x0 + (y1 << log_width)];
int c3 = pBits[x1 + (y1 << log_width)];
// filtering
int alpha = x0 & 0xff;
int beta = y0 & 0xff;
int c0a, c0b;
int c1a, c1b;
int c01a, c01b;
unpack(format, c0, &c0a, &c0b);
unpack(format, c1, &c1a, &c1b);
lerp(c0a, c0b, c1a, c1b, alpha, &c01a, &c01b);
int c2a, c2b;
int c3a, c3b;
int c23a, c23b;
unpack(format, c2, &c2a, &c2b);
unpack(format, c3, &c3a, &c3b);
lerp(c2a, c2b, c3a, c3b, alpha, &c23a, &c23b);
int c4a, c4b;
lerp(c01a, c01b, c23a, c23b, beta, &c4a, &c4b);
return pack(format, c4a, c4b);
} else {
int u0 = address(wrap, u);
int v0 = address(wrap, v);
int x0 = u0 >> (20 - log_width);
int y0 = v0 >> (20 - log_height);
int c0 = pBits[x0 + (y0 <<log_width)];
int c0a, c0b;
unpack(format, c0, &c0a, &c0b);
return pack(format, c0a, c0b);
}
}
inline int vx_tex3(int stage, int u, int v, int lod) {
int lodn = MIN(lod + 0x100000, TEX_LOD_MAX);
int a = vx_tex(0, u, v, lod);
int b = vx_tex(0, u, v, lodn);
int al = a & 0x00ff00ff;
int ah = (a >> 8) & 0x00ff00ff;
int bl = b & 0x00ff00ff;
int bh = (b >> 8) & 0x00ff00ff;
int frac = (lod >> 12) & 0xff;
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
int c = al | (ah << 8);
return c;
}
inline int tex3_sw(kernel_arg_t* state, int stage, int u, int v, int lod) {
int lodn = MIN(lod + 0x10000, TEX_LOD_MAX);
int a = tex_sw(state, 0, u, v, lod);
int b = tex_sw(state, 0, u, v, lodn);
int al = a & 0x00ff00ff;
int ah = (a >> 8) & 0x00ff00ff;
int bl = b & 0x00ff00ff;
int bh = (b >> 8) & 0x00ff00ff;
int frac = (lod >> 12) & 0xff;
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
int c = al | (ah << 8);
return c;
}
#endif

View File

@@ -0,0 +1,122 @@
#include "tga.h"
#include <fstream>
#include <iostream>
#include "format.h"
struct __attribute__((__packed__)) tga_header_t {
int8_t idlength;
int8_t colormaptype;
int8_t imagetype;
int16_t colormaporigin;
int16_t colormaplength;
int8_t colormapdepth;
int16_t xoffset;
int16_t yoffset;
int16_t width;
int16_t height;
int8_t bitsperpixel;
int8_t imagedescriptor;
};
int LoadTGA(const char *filename,
std::vector<uint8_t> &pixels,
uint32_t *width,
uint32_t *height,
uint32_t *bpp) {
std::ifstream ifs(filename, std::ios::in | std::ios::binary);
if (!ifs.is_open()) {
std::cerr << "couldn't open file: " << filename << "!" << std::endl;
return -1;
}
tga_header_t header;
ifs.read(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
if (ifs.fail()) {
std::cerr << "invalid TGA file header!" << std::endl;
return -1;
}
if (header.imagetype != 2) {
std::cerr << "unsupported TGA encoding format!" << std::endl;
return -1;
}
ifs.seekg(header.idlength, std::ios::cur); // skip string
if (ifs.fail()) {
std::cerr << "invalid TGA file!" << std::endl;
return -1;
}
switch (header.bitsperpixel) {
case 16:
case 24:
case 32: {
// Read pixels data
auto stride = header.bitsperpixel / 8;
pixels.resize(stride * header.width * header.height);
ifs.read((char*)pixels.data(), pixels.size());
if (ifs.fail()) {
std::cerr << "invalid TGA file!" << std::endl;
return -1;
}
*bpp = stride;
break;
}
default:
std::cerr << "unsupported TGA bitsperpixel!" << std::endl;
return -1;
}
*width = header.width;
*height = header.height;
return 0;
}
int SaveTGA(const char *filename,
const std::vector<uint8_t> &pixels,
uint32_t width,
uint32_t height,
uint32_t bpp) {
std::ofstream ofs(filename, std::ios::out | std::ios::binary);
if (!ofs.is_open()) {
std::cerr << "couldn't create file: " << filename << "!" << std::endl;
return -1;
}
if (bpp < 2 || bpp > 4) {
std::cerr << "unsupported pixel stride: " << bpp << "!" << std::endl;
return -1;
}
tga_header_t header;
header.idlength = 0;
header.colormaptype = 0; // no palette
header.imagetype = 2; // color mapped data
header.colormaporigin = 0;
header.colormaplength = 0;
header.colormapdepth = 0;
header.xoffset = 0;
header.yoffset = 0;
header.width = width;
header.height = height;
header.bitsperpixel = bpp * 8;
header.imagedescriptor = 0;
// write header
ofs.write(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
// write pixel data
uint32_t pitch = bpp * width;
const uint8_t* pixel_bytes = pixels.data() + (height - 1) * pitch;
for (uint32_t y = 0; y < height; ++y) {
const uint8_t* pixel_row = pixel_bytes;
for (uint32_t x = 0; x < width; ++x) {
ofs.write((const char*)pixel_row, bpp);
pixel_row += bpp;
}
pixel_bytes -= pitch;
}
return 0;
}

View File

@@ -0,0 +1,14 @@
#include <cstdint>
#include <vector>
int LoadTGA(const char *filename,
std::vector<uint8_t> &pixels,
uint32_t *width,
uint32_t *height,
uint32_t *bpp);
int SaveTGA(const char *filename,
const std::vector<uint8_t> &pixels,
uint32_t width,
uint32_t height,
uint32_t bpp);

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.2 KiB

View File

@@ -0,0 +1,194 @@
#include "utils.h"
#include <assert.h>
#include <cstring>
#include "blitter.h"
#include "format.h"
#include "tga.h"
#include "lupng.h"
std::string getFileExt(const std::string& str) {
auto i = str.rfind('.');
if (i != std::string::npos) {
return str.substr(i+1);
}
return("");
}
bool iequals(const std::string& a, const std::string& b) {
auto sz = a.size();
if (b.size() != sz)
return false;
for (size_t i = 0; i < sz; ++i) {
if (tolower(a[i]) != tolower(b[i]))
return false;
}
return true;
}
int LoadImage(const char *filename,
ePixelFormat format,
std::vector<uint8_t> &pixels,
uint32_t *width,
uint32_t *height) {
uint32_t img_width;
uint32_t img_height;
uint32_t img_bpp;
auto ext = getFileExt(filename);
if (iequals(ext, "tga")) {
int ret = LoadTGA(filename, pixels, &img_width, &img_height, &img_bpp);
if (ret)
return ret;
} else
if (iequals(ext, "png")) {
auto image = luPngReadFile(filename, NULL);
if (image == NULL)
return -1;
if (image->depth != 8
|| (image->channels != 3
&& image->channels != 4)) {
luImageRelease(image, NULL);
std::cerr << "invalid png file format!" << std::endl;
return -1;
}
pixels.resize(image->channels * image->width * image->height);
memcpy(pixels.data(), image->data, pixels.size());
img_width = image->width;
img_height = image->height;
img_bpp = image->channels;
luImageRelease(image, NULL);
} else {
std::cerr << "invalid file extension: " << ext << "!" << std::endl;
return -1;
}
ePixelFormat img_format;
switch (img_bpp) {
case 1:
img_format = FORMAT_A8;
break;
case 2:
img_format = FORMAT_A1R5G5B5;
break;
case 3:
img_format = FORMAT_R8G8B8;
break;
case 4:
img_format = FORMAT_A8R8G8B8;
break;
default:
std::abort();
}
if (img_format != format) {
// format conversion to RGBA
std::vector<uint8_t> staging;
int ret = ConvertImage(staging, pixels, img_width, img_height, img_format, format);
if (ret)
return ret;
pixels.swap(staging);
}
*width = img_width;
*height = img_height;
return 0;
}
int SaveImage(const char *filename,
ePixelFormat format,
const std::vector<uint8_t> &pixels,
uint32_t width,
uint32_t height) {
uint32_t bpp = Format::GetInfo(format).BytePerPixel;
auto ext = getFileExt(filename);
if (iequals(ext, "tga")) {
return SaveTGA(filename, pixels, width, height, bpp);
} else
if (iequals(ext, "png")) {
LuImage image;
image.width = width;
image.height = height;
image.depth = 8;
image.channels = bpp;
image.data = (uint8_t*)pixels.data();
return luPngWriteFile(filename, &image);
} else {
std::cerr << "invalid file extension: " << ext << "!" << std::endl;
return -1;
}
return 0;
}
void dump_image(const std::vector<uint8_t>& pixels, uint32_t width, uint32_t height, uint32_t bpp) {
assert(width * height * bpp == pixels.size());
const uint8_t* pixel_bytes = pixels.data();
for (uint32_t y = 0; y < height; ++y) {
for (uint32_t x = 0; x < width; ++x) {
uint32_t pixel32 = 0;
for (uint32_t b = 0; b < bpp; ++b) {
uint32_t pixel8 = *pixel_bytes++;
pixel32 |= pixel8 << (b * 8);
}
if (x) std::cout << ", ";
std::cout << std::hex << pixel32;
}
std::cout << std::endl;
}
}
int CopyBuffers(SurfaceDesc &dstDesc,
int32_t dstOffsetX,
int32_t dstOffsetY,
uint32_t copyWidth,
uint32_t copyHeight,
const SurfaceDesc &srcDesc,
int32_t srcOffsetX,
int32_t srcOffsetY) {
static const BlitTable s_blitTable;
if ((srcOffsetX >= (int32_t)srcDesc.Width) || (srcOffsetY >= (int32_t)srcDesc.Height) ||
(dstOffsetX >= (int32_t)dstDesc.Width) || (dstOffsetY >= (int32_t)dstDesc.Height)) {
return -1;
}
if (copyWidth > dstDesc.Width) {
copyWidth = dstDesc.Width;
}
if (copyWidth > srcDesc.Width) {
copyWidth = srcDesc.Width;
}
if (copyHeight > dstDesc.Height) {
copyHeight = dstDesc.Height;
}
if (copyHeight > srcDesc.Height) {
copyHeight = srcDesc.Height;
}
return s_blitTable.get(srcDesc.Format, dstDesc.Format)(
dstDesc, dstOffsetX, dstOffsetY, copyWidth, copyHeight, srcDesc,
srcOffsetX, srcOffsetY);
}
int ConvertImage(std::vector<uint8_t>& dst_pixels,
const std::vector<uint8_t>& src_pixels,
uint32_t width,
uint32_t height,
ePixelFormat src_format,
ePixelFormat dst_format) {
uint32_t src_pitch = Format::GetInfo(src_format).BytePerPixel * width;
uint32_t dst_pitch = Format::GetInfo(dst_format).BytePerPixel * width;
dst_pixels.resize(dst_pitch * height);
SurfaceDesc srcDesc{src_format, (uint8_t*)src_pixels.data(), width, height, src_pitch};
SurfaceDesc dstDesc{dst_format, dst_pixels.data(), width, height, dst_pitch};
return CopyBuffers(dstDesc, 0, 0, width, height, srcDesc, 0, 0);
}

View File

@@ -0,0 +1,43 @@
#include <cstdint>
#include <vector>
#include <iostream>
#include "surfacedesc.h"
#define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
inline uint32_t ilog2 (uint32_t value) {
return (uint32_t)(sizeof(uint32_t) * 8UL) - (uint32_t)__builtin_clzl((value << 1) - 1UL) - 1;
}
int LoadImage(const char *filename,
ePixelFormat format,
std::vector<uint8_t> &pixels,
uint32_t *width,
uint32_t *height);
int SaveImage(const char *filename,
ePixelFormat format,
const std::vector<uint8_t> &pixels,
uint32_t width,
uint32_t height);
int CopyBuffers(SurfaceDesc &dstDesc,
int32_t dstOffsetX,
int32_t dstOffsetY,
uint32_t copyWidth,
uint32_t copyHeight,
const SurfaceDesc &srcDesc,
int32_t srcOffsetX,
int32_t srcOffsetY);
int ConvertImage(std::vector<uint8_t>& dst_pixels,
const std::vector<uint8_t>& src_pixels,
uint32_t width,
uint32_t height,
ePixelFormat src_format,
ePixelFormat dst_format);
void dump_image(const std::vector<uint8_t>& pixels,
uint32_t width,
uint32_t height,
uint32_t bpp);