cocogfx fixes and refactoring

2021-11-25 13:58:09 -05:00
parent a671e1a05d
commit b995843a5b
44 changed files with 339 additions and 3921 deletions
--- a/tests/regression/tex/Makefile
+++ b/tests/regression/tex/Makefile
@@ -10,7 +10,7 @@ VX_DP  = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
 VX_CP  = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy

 VX_CFLAGS += -std=c++11 -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
-VX_CFLAGS += -DENABLE_SW -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common
+VX_CFLAGS += -DENABLE_SW -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party/cocogfx/include

 VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a

@@ -19,15 +19,13 @@ VX_SRCS = kernel.c
 #CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
 CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -Wfatal-errors

-CXXFLAGS += -DLUPNG_USE_ZLIB
+CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party/cocogfx/include

-CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common
-
-LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex  -lz
+LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex $(VORTEX_RT_PATH)/../third_party/cocogfx/libcocogfx.a -lz

 PROJECT = tex

-SRCS = main.cpp utils.cpp tga.cpp lupng.c
+SRCS = main.cpp utils.cpp

 all: $(PROJECT) kernel.bin kernel.dump
 
--- a/tests/regression/tex/blitter.h
+++ b/tests/regression/tex/blitter.h
@@ -1,268 +0,0 @@
-//
-// Copyright (c) Blaise Tine.  All rights reserved.
-//
-//
-// Use of this sample source code is subject to the terms of the Microsoft
-// license agreement under which you licensed this sample source code. If
-// you did not accept the terms of the license agreement, you are not
-// authorized to use this sample source code. For the terms of the license,
-// please see the license agreement between you and Microsoft or, if applicable,
-// see the LICENSE.RTF on your install media or the root of your tools
-// installation.
-// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
-// INDEMNITIES.
-//
-#pragma once
-
-#include "surfacedesc.h"
-
-class BlitTable {
-public:
-  typedef int (*PfnCopy)(const SurfaceDesc &dstDesc, 
-                         uint32_t dstOffsetX,
-                         uint32_t dstOffsetY, 
-                         uint32_t copyWidth,
-                         uint32_t copyHeight, 
-                         const SurfaceDesc &srcDesc,
-                         uint32_t srcOffsetX, 
-                         uint32_t srcOffsetY);
-
-  BlitTable() {
-    for (uint32_t s = 0; s < FORMAT_COLOR_SIZE_; ++s) {
-      for (uint32_t d = 0; d < FORMAT_COLOR_SIZE_; ++d) {
-        copyFuncs_[s][d] = CopyInvalid;
-      }
-    }
-
-    for (uint32_t s = 0; s < FORMAT_COLOR_SIZE_; ++s) {
-      switch (s) {
-      case FORMAT_A8:
-      case FORMAT_L8:
-        copyFuncs_[s][s] = CopyFast<uint8_t>;
-        break;
-
-      case FORMAT_A8L8:
-        copyFuncs_[FORMAT_A8L8][FORMAT_A8] = Copy<FORMAT_A8L8, FORMAT_A8>;
-        copyFuncs_[FORMAT_A8L8][FORMAT_A8L8] = CopyFast<uint16_t>;
-        break;
-
-      case FORMAT_R5G6B5:
-        copyFuncs_[FORMAT_R5G6B5][FORMAT_L8] = Copy<FORMAT_R5G6B5, FORMAT_L8>;
-        copyFuncs_[FORMAT_R5G6B5][FORMAT_R5G6B5] = CopyFast<uint16_t>;
-        copyFuncs_[FORMAT_R5G6B5][FORMAT_R8G8B8] =
-            Copy<FORMAT_R5G6B5, FORMAT_R8G8B8>;
-        copyFuncs_[FORMAT_R5G6B5][FORMAT_B8G8R8] =
-            Copy<FORMAT_R5G6B5, FORMAT_B8G8R8>;
-        copyFuncs_[FORMAT_R5G6B5][FORMAT_A8B8G8R8] =
-            Copy<FORMAT_R5G6B5, FORMAT_A8B8G8R8>;
-        copyFuncs_[FORMAT_R5G6B5][FORMAT_A8R8G8B8] =
-            Copy<FORMAT_R5G6B5, FORMAT_A8R8G8B8>;
-        break;
-
-      case FORMAT_A1R5G5B5:
-        copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8] =
-            Copy<FORMAT_A1R5G5B5, FORMAT_A8>;
-        copyFuncs_[FORMAT_A1R5G5B5][FORMAT_L8] =
-            Copy<FORMAT_A1R5G5B5, FORMAT_L8>;
-        copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8L8] =
-            Copy<FORMAT_A1R5G5B5, FORMAT_A8L8>;
-        copyFuncs_[FORMAT_A1R5G5B5][FORMAT_R8G8B8] =
-            Copy<FORMAT_A1R5G5B5, FORMAT_R8G8B8>;
-        copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8R8G8B8] =
-            Copy<FORMAT_A1R5G5B5, FORMAT_A8R8G8B8>;
-        copyFuncs_[FORMAT_A1R5G5B5][FORMAT_R5G5B5A1] =
-            Copy<FORMAT_A1R5G5B5, FORMAT_R5G5B5A1>;
-        copyFuncs_[FORMAT_A1R5G5B5][FORMAT_R4G4B4A4] =
-            Copy<FORMAT_A1R5G5B5, FORMAT_R4G4B4A4>;
-        copyFuncs_[FORMAT_A1R5G5B5][FORMAT_B8G8R8] =
-            Copy<FORMAT_A1R5G5B5, FORMAT_B8G8R8>;
-        copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8B8G8R8] =
-            Copy<FORMAT_A1R5G5B5, FORMAT_A8B8G8R8>;
-        break;
-
-      case FORMAT_A4R4G4B4:
-        copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8] =
-            Copy<FORMAT_A4R4G4B4, FORMAT_A8>;
-        copyFuncs_[FORMAT_A4R4G4B4][FORMAT_L8] =
-            Copy<FORMAT_A4R4G4B4, FORMAT_L8>;
-        copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8L8] =
-            Copy<FORMAT_A4R4G4B4, FORMAT_A8L8>;
-        copyFuncs_[FORMAT_A4R4G4B4][FORMAT_R8G8B8] =
-            Copy<FORMAT_A4R4G4B4, FORMAT_R8G8B8>;
-        copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8R8G8B8] =
-            Copy<FORMAT_A4R4G4B4, FORMAT_A8R8G8B8>;
-        copyFuncs_[FORMAT_A4R4G4B4][FORMAT_R5G5B5A1] =
-            Copy<FORMAT_A4R4G4B4, FORMAT_R5G5B5A1>;
-        copyFuncs_[FORMAT_A4R4G4B4][FORMAT_R4G4B4A4] =
-            Copy<FORMAT_A4R4G4B4, FORMAT_R4G4B4A4>;
-        copyFuncs_[FORMAT_A4R4G4B4][FORMAT_B8G8R8] =
-            Copy<FORMAT_A4R4G4B4, FORMAT_B8G8R8>;
-        copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8B8G8R8] =
-            Copy<FORMAT_A4R4G4B4, FORMAT_A8B8G8R8>;
-        break;
-
-      case FORMAT_R8G8B8:
-        copyFuncs_[FORMAT_R8G8B8][FORMAT_L8] = Copy<FORMAT_R8G8B8, FORMAT_L8>;
-        copyFuncs_[FORMAT_R8G8B8][FORMAT_R5G6B5] =
-            Copy<FORMAT_R8G8B8, FORMAT_R5G6B5>;
-        copyFuncs_[FORMAT_R8G8B8][FORMAT_R8G8B8] = CopyFast<uint24_t>;
-        copyFuncs_[FORMAT_R8G8B8][FORMAT_B8G8R8] =
-            Copy<FORMAT_R8G8B8, FORMAT_B8G8R8>;
-        copyFuncs_[FORMAT_R8G8B8][FORMAT_A8B8G8R8] =
-            Copy<FORMAT_R8G8B8, FORMAT_A8B8G8R8>;
-        copyFuncs_[FORMAT_R8G8B8][FORMAT_A8R8G8B8] =
-            Copy<FORMAT_R8G8B8, FORMAT_A8R8G8B8>;
-        break;
-
-      case FORMAT_A8R8G8B8:
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8] =
-            Copy<FORMAT_A8R8G8B8, FORMAT_A8>;
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_L8] =
-            Copy<FORMAT_A8R8G8B8, FORMAT_L8>;
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8L8] =
-            Copy<FORMAT_A8R8G8B8, FORMAT_A8L8>;
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R5G6B5] =
-            Copy<FORMAT_A8R8G8B8, FORMAT_R5G6B5>;
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R8G8B8] =
-            Copy<FORMAT_A8R8G8B8, FORMAT_R8G8B8>;
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8R8G8B8] = CopyFast<uint32_t>;
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R5G5B5A1] =
-            Copy<FORMAT_A8R8G8B8, FORMAT_R5G5B5A1>;
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R4G4B4A4] =
-            Copy<FORMAT_A8R8G8B8, FORMAT_R4G4B4A4>;
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_B8G8R8] =
-            Copy<FORMAT_A8R8G8B8, FORMAT_B8G8R8>;
-        copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8B8G8R8] =
-            Copy<FORMAT_A8R8G8B8, FORMAT_A8B8G8R8>;
-        break;
-
-      case FORMAT_R5G5B5A1:
-        copyFuncs_[FORMAT_R5G5B5A1][FORMAT_A8] =
-            Copy<FORMAT_R5G5B5A1, FORMAT_A8>;
-        copyFuncs_[FORMAT_R5G5B5A1][FORMAT_L8] =
-            Copy<FORMAT_R5G5B5A1, FORMAT_L8>;
-        copyFuncs_[FORMAT_R5G5B5A1][FORMAT_A8L8] =
-            Copy<FORMAT_R5G5B5A1, FORMAT_A8L8>;
-        copyFuncs_[FORMAT_R5G5B5A1][FORMAT_RGB] =
-            Copy<FORMAT_R5G5B5A1, FORMAT_RGB>;
-        copyFuncs_[FORMAT_R5G5B5A1][FORMAT_ARGB] =
-            Copy<FORMAT_R5G5B5A1, FORMAT_ARGB>;
-        break;
-
-      case FORMAT_R4G4B4A4:
-        copyFuncs_[FORMAT_R4G4B4A4][FORMAT_A8] =
-            Copy<FORMAT_R4G4B4A4, FORMAT_A8>;
-        copyFuncs_[FORMAT_R4G4B4A4][FORMAT_L8] =
-            Copy<FORMAT_R4G4B4A4, FORMAT_L8>;
-        copyFuncs_[FORMAT_R4G4B4A4][FORMAT_A8L8] =
-            Copy<FORMAT_R4G4B4A4, FORMAT_A8L8>;
-        copyFuncs_[FORMAT_R4G4B4A4][FORMAT_RGB] =
-            Copy<FORMAT_R4G4B4A4, FORMAT_RGB>;
-        copyFuncs_[FORMAT_R4G4B4A4][FORMAT_ARGB] =
-            Copy<FORMAT_R4G4B4A4, FORMAT_ARGB>;
-        break;
-
-      case FORMAT_B8G8R8:
-        copyFuncs_[FORMAT_B8G8R8][FORMAT_L8] = Copy<FORMAT_B8G8R8, FORMAT_L8>;
-        copyFuncs_[FORMAT_B8G8R8][FORMAT_RGB] = Copy<FORMAT_B8G8R8, FORMAT_RGB>;
-        break;
-
-      case FORMAT_A8B8G8R8:
-        copyFuncs_[FORMAT_A8B8G8R8][FORMAT_A8] =
-            Copy<FORMAT_A8B8G8R8, FORMAT_A8>;
-        copyFuncs_[FORMAT_A8B8G8R8][FORMAT_L8] =
-            Copy<FORMAT_A8B8G8R8, FORMAT_L8>;
-        copyFuncs_[FORMAT_A8B8G8R8][FORMAT_A8L8] =
-            Copy<FORMAT_A8B8G8R8, FORMAT_A8L8>;
-        copyFuncs_[FORMAT_A8B8G8R8][FORMAT_RGB] =
-            Copy<FORMAT_A8B8G8R8, FORMAT_RGB>;
-        copyFuncs_[FORMAT_A8B8G8R8][FORMAT_ARGB] =
-            Copy<FORMAT_A8B8G8R8, FORMAT_ARGB>;
-        break;
-      }
-    }
-  }
-
-  PfnCopy get(uint32_t srcFormat, uint32_t dstFormat) const {
-    assert(srcFormat < FORMAT_COLOR_SIZE_);
-    assert(dstFormat < FORMAT_COLOR_SIZE_);
-    return copyFuncs_[srcFormat][dstFormat];
-  }
-
-private:
-  template <ePixelFormat SrcFormat, ePixelFormat DstFormat>
-  static int Copy(const SurfaceDesc &dstDesc, 
-                  uint32_t dstOffsetX,
-                  uint32_t dstOffsetY, 
-                  uint32_t copyWidth,
-                  uint32_t copyHeight, 
-                  const SurfaceDesc &srcDesc,
-                  uint32_t srcOffsetX, 
-                  uint32_t srcOffsetY) {
-    auto srcBPP = TFormatInfo<SrcFormat>::CBSIZE;
-    auto dstBPP = TFormatInfo<DstFormat>::CBSIZE;
-    auto srcNextLine = srcDesc.Pitch;
-    auto dstNextLine = dstDesc.Pitch;
-
-    auto pbSrc = srcDesc.pBits + srcOffsetX * srcBPP + srcOffsetY * srcDesc.Pitch;
-    auto pbDst = dstDesc.pBits + dstOffsetX * dstBPP + dstOffsetY * dstDesc.Pitch;
-
-    while (copyHeight--) {
-      auto pSrc = reinterpret_cast<const typename TFormatInfo<SrcFormat>::TYPE *>(pbSrc);
-      for (auto *pDst = reinterpret_cast<typename TFormatInfo<DstFormat>::TYPE *>(
-                    pbDst),
-                *const pEnd = pDst + copyWidth;
-           pDst != pEnd; ++pDst, ++pSrc) {
-        auto tmp = Format::ConvertFrom<SrcFormat, true>(pSrc);
-        Format::ConvertTo<DstFormat>(pDst, tmp);
-      }
-
-      pbSrc += srcNextLine;
-      pbDst += dstNextLine;
-    }    
-    return 0;
-  }
-
-  template <typename Type>
-  static int CopyFast(const SurfaceDesc &dstDesc, 
-                      uint32_t dstOffsetX,
-                      uint32_t dstOffsetY, 
-                      uint32_t copyWidth,
-                      uint32_t copyHeight, 
-                      const SurfaceDesc &srcDesc,
-                      uint32_t srcOffsetX, 
-                      uint32_t srcOffsetY) {
-    auto nBPP = sizeof(Type);
-    auto srcNextLine = srcDesc.Pitch;
-    auto dstNextLine = dstDesc.Pitch;
-
-    auto pbSrc = srcDesc.pBits + srcOffsetX * nBPP + srcOffsetY * srcDesc.Pitch;
-    auto pbDst = dstDesc.pBits + dstOffsetX * nBPP + dstOffsetY * dstDesc.Pitch;
-
-    while (copyHeight--) {
-      auto pSrc = reinterpret_cast<const Type *>(pbSrc);
-      for (auto *pDst = reinterpret_cast<Type *>(pbDst), *const pEnd = pDst + copyWidth;
-           pDst != pEnd; ++pDst, ++pSrc) {
-        *pDst = *pSrc;
-      }
-      pbSrc += srcNextLine;
-      pbDst += dstNextLine;
-    }
-    return 0;
-  }
-
-  static int CopyInvalid(const SurfaceDesc & /*dstDesc*/,
-                         uint32_t /*dstOffsetX*/, 
-                         uint32_t /*dstOffsetY*/,
-                         uint32_t /*copyWidth*/, 
-                         uint32_t /*copyHeight*/,
-                         const SurfaceDesc & /*srcDesc*/,
-                         uint32_t /*srcOffsetX*/, 
-                         uint32_t /*srcOffsetY*/)
-  {
-    std::cout << "Error: invalid format" << std::endl;
-    return -1;
-  }
-
-  PfnCopy copyFuncs_[FORMAT_COLOR_SIZE_][FORMAT_COLOR_SIZE_];
-};
--- a/tests/regression/tex/color.h
+++ b/tests/regression/tex/color.h
@@ -1,68 +0,0 @@
-//
-// Copyright (c) Blaise Tine.  All rights reserved.
-//
-//
-// Use of this sample source code is subject to the terms of the Microsoft
-// license agreement under which you licensed this sample source code. If
-// you did not accept the terms of the license agreement, you are not
-// authorized to use this sample source code. For the terms of the license,
-// please see the license agreement between you and Microsoft or, if applicable,
-// see the LICENSE.RTF on your install media or the root of your tools
-// installation.
-// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
-// INDEMNITIES.
-//
-#pragma once
-
-#include <cstdint>
-#include <assert.h>
-
-struct ColorARGB {
-  union {
-    struct {
-      uint32_t value;
-    };
-    struct {
-      uint8_t b, g, r, a;
-    };
-    struct {
-      uint8_t m[4];
-    };
-  };
-
-  ColorARGB() {}
-
-  ColorARGB(int a, int r, int g, int b) {
-    assert((a >= 0) && (a <= 0xff));
-    assert((r >= 0) && (r <= 0xff));
-    assert((g >= 0) && (g <= 0xff));
-    assert((b >= 0) && (b <= 0xff));
-
-    this->b = static_cast<uint8_t>(b);
-    this->g = static_cast<uint8_t>(g);
-    this->r = static_cast<uint8_t>(r);
-    this->a = static_cast<uint8_t>(a);
-  }
-
-  ColorARGB(int r, int g, int b) {
-    assert((r >= 0) && (r <= 0xff));
-    assert((g >= 0) && (g <= 0xff));
-    assert((b >= 0) && (b <= 0xff));
-
-    this->b = static_cast<uint8_t>(b);
-    this->g = static_cast<uint8_t>(g);
-    this->r = static_cast<uint8_t>(r);
-  }
-
-  ColorARGB(int value) {
-    this->value = value;
-  }
-
-  void operator=(const ColorARGB &rhs) {
-    this->value = rhs.value;
-  }
-
-  operator uint32_t() const {
-    return this->value;
-  }
-};
--- a/tests/regression/tex/common.h
+++ b/tests/regression/tex/common.h
@@ -15,7 +15,6 @@ typedef struct {
  uint8_t   src_logwidth;
  uint8_t   src_logheight;
  uint32_t  src_addr;
-  float     lod;
  uint32_t  mip_offs[TEX_LOD_MAX+1];  
  uint32_t  dst_width;
  uint32_t  dst_height;
--- a/tests/regression/tex/format.h
+++ b/tests/regression/tex/format.h
--- a/tests/regression/tex/int24.h
+++ b/tests/regression/tex/int24.h
@@ -1,37 +0,0 @@
-//
-// Copyright (c) Blaise Tine.  All rights reserved.
-//
-//
-// Use of this sample source code is subject to the terms of the Microsoft
-// license agreement under which you licensed this sample source code. If
-// you did not accept the terms of the license agreement, you are not
-// authorized to use this sample source code. For the terms of the license,
-// please see the license agreement between you and Microsoft or, if applicable,
-// see the LICENSE.RTF on your install media or the root of your tools
-// installation.
-// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
-// INDEMNITIES.
-//
-#pragma once
-
-#include <cstdint>
-
-struct uint24_t {
-  uint8_t m[3];
-
-  explicit uint24_t(uint32_t value) {
-    m[0] = (value >> 0) & 0xff;
-    m[1] = (value >> 8) & 0xff;
-    m[2] = (value >> 16) & 0xff;
-  }
-
-  explicit uint24_t(uint8_t x, uint8_t y, uint8_t z) {
-    m[0] = x;
-    m[1] = y;
-    m[2] = z;
-  }
-
-  operator uint32_t() const {
-    return (m[2] << 16) | (m[1] << 8) | m[0];
-  }
-};
--- a/tests/regression/tex/kernel.c
+++ b/tests/regression/tex/kernel.c
@@ -10,6 +10,7 @@ typedef struct {
 	uint32_t tile_height;
  	float deltaX;
  	float deltaY;
+	float minification;
 } tile_arg_t;

 template <typename T, T Start, T End>
@@ -35,10 +36,10 @@ void kernel_body(int task_id, tile_arg_t* arg) {

 	uint8_t* dst_ptr = (uint8_t*)(state->dst_addr + xoffset * state->dst_stride + yoffset * state->dst_pitch);

-	Fixed<16> xlod(state->lod);
+	Fixed<16> xj(arg->minification);

-	/*vx_printf("task_id=%d, deltaX=%f, deltaY=%f, tile_width=%d, tile_height=%d\n", 
-		task_id, arg->deltaX, arg->deltaY, arg->tile_width, arg->tile_height);*/
+	/*vx_printf("task_id=%d, tile_width=%d, tile_height=%d, deltaX=%f, deltaY=%f, minification=%f\n", 
+	 	task_id, arg->tile_width, arg->tile_height, arg->deltaX, arg->deltaY, arg->minification);*/

 	float fv = (yoffset + 0.5f) * arg->deltaY;
 	for (uint32_t y = 0; y < arg->tile_height; ++y) {
@@ -47,13 +48,7 @@ void kernel_body(int task_id, tile_arg_t* arg) {
 		for (uint32_t x = 0; x < arg->tile_width; ++x) {
 			Fixed<TEX_FXD_FRAC> xu(fu);
 			Fixed<TEX_FXD_FRAC> xv(fv);
-			uint32_t color;
-		#ifdef ENABLE_SW
-			if (state->use_sw)
-				color = tex_load_sw(state, xu, xv, xlod);
-			else
-		#endif
-			color = tex_load_hw(state, xu, xv, xlod);						
+			uint32_t color = tex_load(state, xu, xv, xj);
 			//vx_printf("task_id=%d, x=%d, y=%d, fu=%f, fv=%f, xu=0x%x, xv=0x%x, color=0x%x\n", task_id, x, y, fu, fv, xu.data(), xv.data(), color);			
 			dst_row[x] = color;
 			fu += arg->deltaX;
@@ -76,7 +71,7 @@ int main() {
 	csr_write(CSR_TEX(0, TEX_STATE_ADDR),   arg->src_addr);
 	static_for_t<int, 0, TEX_LOD_MAX+1>()([&](int i) {
 		csr_write(CSR_TEX(0, TEX_STATE_MIPOFF(i)), arg->mip_offs[i]);
-	});	
+	});

 	tile_arg_t targ;
 	targ.state       = arg;
@@ -84,6 +79,14 @@ int main() {
 	targ.tile_height = (arg->dst_height + arg->num_tasks - 1) / arg->num_tasks;    
 	targ.deltaX      = 1.0f / arg->dst_width;
 	targ.deltaY      = 1.0f / arg->dst_height;
+
+	{
+		uint32_t src_width  = (1 << arg->src_logwidth);
+		uint32_t src_height = (1 << arg->src_logheight);
+		float width_ratio   = float(src_width) / arg->dst_width;
+		float height_ratio  = float(src_height) / arg->dst_height;
+		targ.minification   = std::max<float>(width_ratio, height_ratio);
+	}
 	
 	vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, &targ);
 	/*for (uint32_t t=0; t < arg->num_tasks; ++t) {		
--- a/tests/regression/tex/lupng.c
+++ b/tests/regression/tex/lupng.c
--- a/tests/regression/tex/lupng.h
+++ b/tests/regression/tex/lupng.h
@@ -1,186 +0,0 @@
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2014 Jan Solanti
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#pragma once
-
-#if defined(_MSC_VER) && (_MSC_VER < 1600)
-typedef __int8  int8_t;
-typedef __int16 int16_t;
-typedef __int32 int32_t;
-typedef unsigned __int8 uint8_t;
-typedef unsigned __int16 uint16_t;
-typedef unsigned __int32 uint32_t;
-#else
-#include <stdlib.h>
-#include <stdint.h>
-#endif
-
-typedef struct {
-    int32_t width;
-    int32_t height;
-    uint8_t channels;
-    uint8_t depth; /* must be 8 or 16 */
-    size_t dataSize;
-    uint8_t *data;
-} LuImage;
-
-typedef size_t (*PngReadProc)(void *outPtr, size_t size, size_t count, void *userPtr);
-typedef size_t (*PngWriteProc)(const void *inPtr, size_t size, size_t count, void *userPtr);
-typedef void*  (*PngAllocProc)(size_t size, void *userPtr);
-typedef void   (*PngFreeProc)(void *ptr, void *userPtr);
-typedef void   (*PngWarnProc)(void *userPtr, const char *fmt, ...);
-
-typedef struct {
-    /* loader */
-    PngReadProc readProc;
-    void *readProcUserPtr;
-    int skipSig;
-
-    /* writer */
-    PngWriteProc writeProc;
-    void *writeProcUserPtr;
-    int compressionLevel;
-
-    /* memory allocation */
-    PngAllocProc allocProc;
-    void *allocProcUserPtr;
-    PngFreeProc freeProc;
-    void *freeProcUserPtr;
-
-    /* warnings/error output */
-    PngWarnProc warnProc; /* set to NULL to disable output altogether */
-    void *warnProcUserPtr;
-
-    /* special case: avoid allocating a LuImage when loading or creating
-     * an image, just use this one */
-    LuImage *overrideImage;
-} LuUserContext;
-
-/**
- * Initializes a LuUserContext to use the defaul malloc implementation.
- *
- * @param userCtx the LuUserContext to initialize
- */
-void luUserContextInitDefault(LuUserContext *userCtx);
-
-/**
- * Creates a new Image object with the specified attributes.
- * The data store of the Image is allocated but its contents are undefined.
- * Only 8 and 16 bits deep images with 1-4 channels are supported.
- *
- * @param buffer pointer to an existing buffer (which may already contain the
- *               image data), or NULL to internally allocate a new buffer
- * @param userCtx the user context (with the memory allocator function
- *                pointers to use), or NULL to use the default allocator
- *                (malloc).
- */
-LuImage *luImageCreate(size_t width, size_t height, uint8_t channels, uint8_t depth,
-                       uint8_t *buffer, const LuUserContext *usrCtx);
-
-/**
- * Releases the memory associated with the given Image object.
- *
- * @param userCtx the user context (with the memory deallocator function
- *                pointers to use), or NULL to use the default deallocator
- *                (free). The deallocator should match the ones used for
- *                allocation.
- */
-void luImageRelease(LuImage *img, const LuUserContext *usrCtx);
-
-/**
- * Extracts the raw image buffer form a LuImage and releases the 
- * then-orphaned LuImage object. This can be used if you want to use
- * the image data in your own structures.
- *
- * @param userCtx the user context (with the memory deallocator function
- *                pointers to use), or NULL to use the default deallocator
- *                (free). The deallocator should match the ones used for
- *                allocation.
- */
-uint8_t *luImageExtractBufAndRelease(LuImage *img, const LuUserContext *userCtx);
-
-/**
- * Decodes a PNG image from a file
- *
- * @param filename the file name (optionally with full path) to read from.
-  * @param userCtx the user context (with the memory allocator function
- *                pointers to use), or NULL to use the default allocator
- *                (malloc).
- */
-LuImage *luPngReadFile(const char *filename, LuUserContext *userCtx);
-
-/**
- * Decodes a PNG image with the provided read function into a LuImage struct
- *
- * @param readProc a function pointer to a user-defined function to use for
- * reading the PNG data.
- * @param userPtr an opaque pointer provided as an argument to readProc
- * @param skipSig don't verify PNG signature - the bytes have already been
- * removed from the input stream
- */
-LuImage *luPngRead(PngReadProc readProc, void *userPtr, int skipSig);
-
-/**
- * Decodes a PNG image with the provided user context into a LuImage struct
- *
- * @param userCtx the LuUserContext to use
- */
-LuImage *luPngReadUC(const LuUserContext *userCtx);
-
-/**
- * Encodes a LuImage struct to PNG and writes it out to a file.
- *
- * @param filename the file name (optionally with full path) to write to.
- *                 Existing files will be overwritten!
- * @param img the LuImage to encode
- */
-int luPngWriteFile(const char *filename, const LuImage *img);
-
-/**
- * Encodes a LuImage struct to PNG and writes it out using a user-defined write
- * function.
- *
- * @param writeProc a function pointer to a user-defined function that will be
- * used for writing the final PNG data.
- * @param userPtr an opaque pointer provided as an argument to writeProc
- * @param img the LuImage to encode
- */
-int luPngWrite(PngWriteProc writeProc, void *userPtr, const LuImage *img);
-
-/**
- * Encodes a LuImage struct to PNG and writes it out with the provided user
- * context.
- *
- * @param userCtx the LuUserContext to use
- * @param img the LuImage to encode
- */
-int luPngWriteUC(const LuUserContext *userCtx, const LuImage *img);
-
-#ifdef __cplusplus
-}
-#endif
--- a/tests/regression/tex/main.cpp
+++ b/tests/regression/tex/main.cpp
@@ -9,6 +9,8 @@
 #include "common.h"
 #include "utils.h"

+using namespace cocogfx;
+
 #define RT_CHECK(_expr)                                         \
   do {                                                         \
     int _ret = _expr;                                          \
@@ -29,7 +31,6 @@ int filter  = 0;    // 0-> point, 1->bilinear, 2->trilinear
 float scale = 1.0f;
 int format  = 0;
 bool use_sw = false;
-float lod   = 1.0f;  // >= 1.0f 
 ePixelFormat eformat = FORMAT_A8R8G8B8;

 vx_device_h device = nullptr;
@@ -37,18 +38,18 @@ vx_buffer_h buffer = nullptr;

 static void show_usage() {
   std::cout << "Vortex Texture Test." << std::endl;
-   std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f format] [-g filter] [-l lod] [-z no_hw] [-h: help]" << std::endl;
+   std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f format] [-g filter] [-z no_hw] [-h: help]" << std::endl;
 }

 static void parse_args(int argc, char **argv) {
  int c;
-  while ((c = getopt(argc, argv, "zi:o:k:w:f:g:h?")) != -1) {
+  while ((c = getopt(argc, argv, "zi:o:k:w:f:g:s:h?")) != -1) {
    switch (c) {
    case 'i':
-       input_file = optarg;
+      input_file = optarg;
      break;
    case 'o':
-       output_file = optarg;
+      output_file = optarg;
      break;
    case 's':
      scale = std::stof(optarg, NULL);
@@ -56,9 +57,6 @@ static void parse_args(int argc, char **argv) {
    case 'w':
      wrap = std::atoi(optarg);
      break;
-    case 'l':
-      lod = std::stof(optarg, NULL);
-      break;
    case 'z':
      use_sw = true;
      break;
@@ -67,9 +65,11 @@ static void parse_args(int argc, char **argv) {
      switch (format) {
      case 0: eformat = FORMAT_A8R8G8B8; break;
      case 1: eformat = FORMAT_R5G6B5; break;
-      case 2: eformat = FORMAT_R4G4B4A4; break;
-      case 3: eformat = FORMAT_L8; break;
-      case 4: eformat = FORMAT_A8; break;
+      case 2: eformat = FORMAT_A1R5G5B5; break;
+      case 3: eformat = FORMAT_A4R4G4B4; break;
+      case 4: eformat = FORMAT_A8L8; break;
+      case 5: eformat = FORMAT_L8; break;
+      case 6: eformat = FORMAT_A8; break;
      default:
        std::cout << "Error: invalid format: " << format << std::endl;
        exit(1);
@@ -105,7 +105,9 @@ void cleanup() {
 int run_test(const kernel_arg_t& kernel_arg, 
             uint32_t buf_size, 
             uint32_t width, 
-             uint32_t height) {
+             uint32_t height,
+             uint32_t bpp) {
+  (void)bpp;
  auto time_start = std::chrono::high_resolution_clock::now();

  // start device
@@ -132,7 +134,7 @@ int run_test(const kernel_arg_t& kernel_arg,

  // save output image
  std::cout << "save output image" << std::endl;  
-  //dump_image(dst_pixels, width, height, bpp);
+  //dump_image(dst_pixels, width, height, bpp);  
  RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, dst_pixels, width, height));

  return 0;
@@ -151,11 +153,9 @@ int main(int argc, char *argv[]) {
  {
    std::vector<uint8_t> staging;  
    RT_CHECK(LoadImage(input_file, eformat, staging, &src_width, &src_height));  
-    
-    RT_CHECK(GenerateMipmaps(src_pixels, mip_offsets, staging, eformat, src_width, src_height));
-
-    //uint32_t src_bpp = Format::GetInfo(eformat).BytePerPixel;  
-    //dump_image(src_pixels, src_pixels.size() / src_bpp, 1, src_bpp);
+    uint32_t src_bpp = GetInfo(eformat).BytePerPixel;
+    //dump_image(staging, src_width, src_height, src_bpp);
+    RT_CHECK(GenerateMipmaps(src_pixels, mip_offsets, staging, eformat, src_width, src_height, src_width * src_bpp));    
  }

  // check power of two support
@@ -167,12 +167,6 @@ int main(int argc, char *argv[]) {
  uint32_t src_logwidth  = log2ceil(src_width);
  uint32_t src_logheight = log2ceil(src_height);

-  uint32_t src_max_lod = std::max(src_logwidth, src_logheight);
-  if (lod > src_max_lod) {
-    std::cout << "Error: out-of-bound level-of-detail: lod=" << lod << ", source image=" << src_max_lod << std::endl;
-    return -1;
-  }
-
  uint32_t src_bufsize = src_pixels.size();

  uint32_t dst_width   = (uint32_t)(src_width * scale);
@@ -227,7 +221,6 @@ int main(int argc, char *argv[]) {
    kernel_arg.src_logwidth  = src_logwidth;
    kernel_arg.src_logheight = src_logheight;
    kernel_arg.src_addr      = src_addr;
-    kernel_arg.lod           = lod;

    for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
      assert(i < TEX_LOD_MAX);
@@ -267,7 +260,7 @@ int main(int argc, char *argv[]) {

  // run tests
  std::cout << "run tests" << std::endl;
-  RT_CHECK(run_test(kernel_arg, dst_bufsize, dst_width, dst_height));
+  RT_CHECK(run_test(kernel_arg, dst_bufsize, dst_width, dst_height, dst_bpp));

  // cleanup
  std::cout << "cleanup" << std::endl;  
--- a/tests/regression/tex/surfacedesc.h
+++ b/tests/regression/tex/surfacedesc.h
@@ -1,25 +0,0 @@
-//
-// Copyright (c) Blaise Tine.  All rights reserved.
-//
-//
-// Use of this sample source code is subject to the terms of the Microsoft
-// license agreement under which you licensed this sample source code. If
-// you did not accept the terms of the license agreement, you are not
-// authorized to use this sample source code. For the terms of the license,
-// please see the license agreement between you and Microsoft or, if applicable,
-// see the LICENSE.RTF on your install media or the root of your tools
-// installation.
-// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
-// INDEMNITIES.
-//
-#pragma once
-
-#include "format.h"
-
-struct SurfaceDesc {
-  ePixelFormat Format;
-  uint8_t *pBits;
-  uint32_t Width;
-  uint32_t Height;
-  uint32_t Pitch;
-};
--- a/tests/regression/tex/texsw.h
+++ b/tests/regression/tex/texsw.h
@@ -4,14 +4,30 @@
 #include <texturing.h>
 #include "common.h"

-inline uint32_t texel_read(uint8_t* address, uint32_t stride) {
+using namespace cocogfx;
+
+inline void  texel_read(uint32_t* texels,
+                        uint8_t** addresses,
+                        uint32_t count,
+                        uint32_t stride) {
    switch (stride) {
-    case 1: return *(uint8_t*)address;
-    case 2: return *(uint16_t*)address;
-    case 4: return *(uint32_t*)address;
+    case 1: 
+        for (uint32_t i = 0; i < count; ++i) {
+            texels[i] = *(uint8_t*)addresses[i];
+        }
+        break;
+    case 2: 
+        for (uint32_t i = 0; i < count; ++i) {
+            texels[i] = *(uint16_t*)addresses[i];
+        }
+        break;
+    case 4: 
+        for (uint32_t i = 0; i < count; ++i) {
+            texels[i] = *(uint32_t*)addresses[i];
+        }
+        break;
    default: 
        std::abort();
-        return 0;
    }
 }

@@ -34,32 +50,35 @@ inline uint32_t vx_tex_sw(kernel_arg_t* state,
        // addressing
        uint32_t offset00, offset01, offset10, offset11;
        uint32_t alpha, beta;
+        uint8_t* addr[4];
+        uint32_t texel[4];
+
        TexAddressLinear(xu, xv, log_width, log_height, wrapu, wrapv, 
            &offset00, &offset01, &offset10, &offset11, &alpha, &beta);

-        uint8_t* addr00 = base_addr + offset00 * stride;
-        uint8_t* addr01 = base_addr + offset01 * stride;
-        uint8_t* addr10 = base_addr + offset10 * stride;
-        uint8_t* addr11 = base_addr + offset11 * stride;
+        addr[0] = base_addr + offset00 * stride;
+        addr[1] = base_addr + offset01 * stride;
+        addr[2] = base_addr + offset10 * stride;
+        addr[3] = base_addr + offset11 * stride;

-        // memory lookup
-        uint32_t texel00 = texel_read(addr00, stride);
-        uint32_t texel01 = texel_read(addr01, stride);
-        uint32_t texel10 = texel_read(addr10, stride);
-        uint32_t texel11 = texel_read(addr11, stride);
+        // memory fetch
+        texel_read(texel, addr, 4, stride);

        // filtering
        color = TexFilterLinear(
-            format, texel00, texel01, texel10, texel11, alpha, beta);
+            format, texel[0], texel[1], texel[2], texel[3], alpha, beta);
    } else {
        // addressing
        uint32_t offset;
+        uint8_t* addr;
+        uint32_t texel;
+
        TexAddressPoint(xu, xv, log_width, log_height, wrapu, wrapv, &offset);
        
-        uint8_t* addr = base_addr + offset * stride;
+        addr = base_addr + offset * stride;
        
-        // memory lookup
-        uint32_t texel = texel_read(addr, stride);
+        // memory fetch
+        texel_read(&texel, &addr, 1, stride);

        // filtering
        color = TexFilterPoint(format, texel);
@@ -67,56 +86,40 @@ inline uint32_t vx_tex_sw(kernel_arg_t* state,
    return color;
 }

-inline uint32_t tex_load_hw(kernel_arg_t* state, 
-                            Fixed<TEX_FXD_FRAC> xu, 
-                            Fixed<TEX_FXD_FRAC> xv, 
-                            Fixed<16> xlod) {
+inline uint32_t tex_load(kernel_arg_t* state, 
+                         Fixed<TEX_FXD_FRAC> xu,
+                         Fixed<TEX_FXD_FRAC> xv,
+                         Fixed<16> xj) {
    uint32_t color;
-    int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
-    uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
+    uint32_t j = std::max<int32_t>(xj.data(), Fixed<16>::ONE);
+    uint32_t l = std::min<uint32_t>(log2floor(j) - 16, TEX_LOD_MAX);
    if (state->filter == 2) {        
-        uint32_t lod_n  = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
-        uint32_t frac   = ilod >> (lod + 16 - 8);
-        uint32_t texel0 = vx_tex(0, xu.data(), xv.data(), lod); 
-        uint32_t texel1 = vx_tex(0, xu.data(), xv.data(), lod_n);
+        uint32_t ln = std::min<uint32_t>(l + 1, TEX_LOD_MAX);
+        uint32_t f  = (j - (1 << (l + 16))) >> (l + 16 - 8);
+        uint32_t texel0, texel1;
+        if (state->use_sw) {
+            texel0 = vx_tex_sw(state, xu, xv, l);  
+            texel1 = vx_tex_sw(state, xu, xv, ln);
+        } else {
+            texel0 = vx_tex(0, xu.data(), xv.data(), l);
+            texel1 = vx_tex(0, xu.data(), xv.data(), ln);
+        }
        uint32_t cl, ch;
        {
-            uint32_t c0l, c0h;  
-            uint32_t c1l, c1h;
-            Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
-            Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
-            Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
+            uint32_t c0l, c0h, c1l, c1h;
+            Unpack8888(texel0, &c0l, &c0h);
+            Unpack8888(texel1, &c1l, &c1h);
+            cl = Lerp8888(c0l, c1l, f);
+            ch = Lerp8888(c0h, c1h, f);
        }
-        color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
+        color = Pack8888(cl, ch);
+        //vx_printf("j=0x%x, l=%d, ln=%d, f=%d, texel0=0x%x, texel1=0x%x, color=0x%x\n", j, l, ln, f, texel0, texel1, color);
    } else {
-        color = vx_tex(0, xu.data(), xv.data(), lod);
-    }
-    return color;
-}
-
-inline uint32_t tex_load_sw(kernel_arg_t* state, 
-                            Fixed<TEX_FXD_FRAC> xu, 
-                            Fixed<TEX_FXD_FRAC> xv, 
-                            Fixed<16> xlod) {
-    uint32_t color;
-    int32_t ilod = std::max<int32_t>(xlod.data(), Fixed<16>::ONE);
-    uint32_t lod = std::min<uint32_t>(log2floor(ilod) - 16, TEX_LOD_MAX);
-    if (state->filter == 2) {        
-        uint32_t lod_n  = std::min<uint32_t>(lod + 1, TEX_LOD_MAX);
-        uint32_t frac   = ilod >> (lod + 16 - 8);
-        uint32_t texel0 = vx_tex_sw(state, xu, xv, lod);  
-        uint32_t texel1 = vx_tex_sw(state, xu, xv, lod_n);
-        uint32_t cl, ch;
-        {
-            uint32_t c0l, c0h;  
-            uint32_t c1l, c1h;
-            Unpack8888(TexFormat::R8G8B8A8, texel0, &c0l, &c0h);
-            Unpack8888(TexFormat::R8G8B8A8, texel1, &c1l, &c1h);
-            Lerp8888(c0l, c0h, c1l, c1h, frac, &cl, &ch);
+        if (state->use_sw) {
+            color = vx_tex_sw(state, xu, xv, l);
+        } else {
+            color = vx_tex(0, xu.data(), xv.data(), l);
        }
-        color = Pack8888(TexFormat::R8G8B8A8, cl, ch);
-    } else {
-        color = vx_tex_sw(state, xu, xv, lod);
    }
    return color;
 }
--- a/tests/regression/tex/tga.cpp
+++ b/tests/regression/tex/tga.cpp
@@ -1,122 +0,0 @@
-#include "tga.h"
-#include <fstream>
-#include <iostream>
-#include "format.h"
-
-struct __attribute__((__packed__)) tga_header_t {
-  int8_t idlength;
-  int8_t colormaptype;
-  int8_t imagetype;
-  int16_t colormaporigin;
-  int16_t colormaplength;
-  int8_t colormapdepth;
-  int16_t xoffset;
-  int16_t yoffset;
-  int16_t width;
-  int16_t height;
-  int8_t bitsperpixel;
-  int8_t imagedescriptor;
-};
-
-int LoadTGA(const char *filename, 
-            std::vector<uint8_t> &pixels, 
-            uint32_t *width, 
-            uint32_t *height,
-            uint32_t *bpp) {
-  std::ifstream ifs(filename, std::ios::in | std::ios::binary);
-  if (!ifs.is_open()) {
-    std::cerr << "couldn't open file: " << filename << "!" << std::endl;
-    return -1;
-  }
-
-  tga_header_t header;
-  ifs.read(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
-  if (ifs.fail()) {
-    std::cerr << "invalid TGA file header!" << std::endl;
-    return -1;
-  }
-
-  if (header.imagetype != 2) {
-    std::cerr << "unsupported TGA encoding format!" << std::endl;
-    return -1;
-  }
-
-  ifs.seekg(header.idlength, std::ios::cur); // skip string
-  if (ifs.fail()) {
-    std::cerr << "invalid TGA file!" << std::endl;
-    return -1;
-  }
-
-  switch (header.bitsperpixel) {
-  case 16:
-  case 24:
-  case 32: {
-    // Read pixels data
-    auto stride = header.bitsperpixel / 8;
-    pixels.resize(stride * header.width * header.height);
-    ifs.read((char*)pixels.data(), pixels.size());
-    if (ifs.fail()) {
-      std::cerr << "invalid TGA file!" << std::endl;
-      return -1;
-    }   
-    *bpp = stride; 
-    break;
-  }
-  default:
-    std::cerr << "unsupported TGA bitsperpixel!" << std::endl;
-    return -1;
-  } 
-
-  *width  = header.width;
-  *height = header.height; 
-
-  return 0;
-}
-
-int SaveTGA(const char *filename, 
-            const std::vector<uint8_t> &pixels, 
-            uint32_t width, 
-            uint32_t height, 
-            uint32_t bpp) {              
-  std::ofstream ofs(filename, std::ios::out | std::ios::binary);
-  if (!ofs.is_open()) {
-    std::cerr << "couldn't create file: " << filename << "!" << std::endl;
-    return -1;
-  }
-
-  if (bpp < 2 || bpp > 4) {        
-    std::cerr << "unsupported pixel stride: " << bpp << "!" << std::endl;
-    return -1;
-  }
-
-  tga_header_t header;
-  header.idlength = 0;
-  header.colormaptype = 0; // no palette
-  header.imagetype = 2; // color mapped data
-  header.colormaporigin = 0;
-  header.colormaplength = 0;
-  header.colormapdepth = 0;
-  header.xoffset = 0;
-  header.yoffset = 0;
-  header.width = width;
-  header.height = height;
-  header.bitsperpixel = bpp * 8;
-  header.imagedescriptor = 0;
-
-  // write header
-  ofs.write(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
-
-  // write pixel data
-  uint32_t pitch = bpp * width;
-  const uint8_t* pixel_bytes = pixels.data() + (height - 1) * pitch;
-  for (uint32_t y = 0; y < height; ++y) {
-    const uint8_t* pixel_row = pixel_bytes;
-    for (uint32_t x = 0; x < width; ++x) {
-      ofs.write((const char*)pixel_row, bpp);      
-      pixel_row += bpp;
-    }
-    pixel_bytes -= pitch;
-  }
-
-  return 0;
-}
--- a/tests/regression/tex/tga.h
+++ b/tests/regression/tex/tga.h
@@ -1,14 +0,0 @@
-#include <cstdint>
-#include <vector>
-
-int LoadTGA(const char *filename, 
-            std::vector<uint8_t> &pixels, 
-            uint32_t *width,
-            uint32_t *height,
-            uint32_t *bpp);
-
-int SaveTGA(const char *filename, 
-            const std::vector<uint8_t> &pixels, 
-            uint32_t width,
-            uint32_t height, 
-            uint32_t bpp);
--- a/tests/regression/tex/utils.cpp
+++ b/tests/regression/tex/utils.cpp
@@ -1,10 +1,12 @@
 #include "utils.h"
 #include <assert.h>
-#include <cstring>
-#include "blitter.h"
-#include "format.h"
-#include "tga.h"
-#include "lupng.h"
+#include <string>
+#include <iostream>
+#include <iomanip>
+#include <tga.h>
+#include <png.h>
+
+using namespace cocogfx;

 std::string getFileExt(const std::string& str) {
   auto i = str.rfind('.');
@@ -41,22 +43,9 @@ int LoadImage(const char *filename,
      return ret;
  } else 
  if (iequals(ext, "png")) {
-    auto image = luPngReadFile(filename, NULL);
-    if (image == NULL)
-      return -1;
-    if (image->depth != 8 
-      || (image->channels != 3 
-       && image->channels != 4)) {
-      luImageRelease(image, NULL);
-      std::cerr << "invalid png file format!" << std::endl;  
-      return -1;
-    }
-    pixels.resize(image->channels * image->width * image->height);
-    memcpy(pixels.data(), image->data, pixels.size());
-    img_width  = image->width;
-    img_height = image->height;
-    img_bpp    = image->channels;
-    luImageRelease(image, NULL);
+    int ret = LoadPNG(filename, pixels, &img_width, &img_height, &img_bpp);
+    if (ret)
+      return ret;
  } else {
    std::cerr << "invalid file extension: " << ext << "!" << std::endl;
    return -1;
@@ -83,7 +72,7 @@ int LoadImage(const char *filename,
  if (img_format != format) {
    // format conversion to RGBA
    std::vector<uint8_t> staging;    
-    int ret = ConvertImage(staging, pixels, img_width, img_height, img_format, format);
+    int ret = ConvertImage(staging, format, pixels, img_format, img_width, img_height, img_width * img_bpp);
    if (ret)
      return ret;
    pixels.swap(staging);
@@ -100,19 +89,13 @@ int SaveImage(const char *filename,
              const std::vector<uint8_t> &pixels, 
              uint32_t width,
              uint32_t height) {
-  uint32_t bpp = Format::GetInfo(format).BytePerPixel;
+  uint32_t bpp = GetInfo(format).BytePerPixel;
  auto ext = getFileExt(filename);
  if (iequals(ext, "tga")) {
    return SaveTGA(filename, pixels, width, height, bpp);
  } else 
  if (iequals(ext, "png")) {
-    LuImage image;
-    image.width    = width;
-    image.height   = height;
-    image.depth    = 8;
-    image.channels = bpp;
-    image.data     = (uint8_t*)pixels.data();
-    return luPngWriteFile(filename, &image);
+    return SavePNG(filename, pixels, width, height, bpp);
  } else {
    std::cerr << "invalid file extension: " << ext << "!" << std::endl;
    return -1;
@@ -132,171 +115,8 @@ void dump_image(const std::vector<uint8_t>& pixels, uint32_t width, uint32_t hei
        pixel32 |= pixel8 << (b * 8);
      }
      if (x) std::cout << ", ";
-      std::cout << std::hex << pixel32;
+      std::cout << std::hex << std::setw(bpp * 2) << std::setfill('0') << pixel32;
    }
    std::cout << std::endl;
  }
-}
-
-int CopyBuffers(SurfaceDesc &dstDesc, 
-                int32_t dstOffsetX,
-                int32_t dstOffsetY, 
-                uint32_t copyWidth, 
-                uint32_t copyHeight,
-                const SurfaceDesc &srcDesc, 
-                int32_t srcOffsetX,                
-                int32_t srcOffsetY) {
-
-  static const BlitTable s_blitTable;
-
-  if ((srcOffsetX >= (int32_t)srcDesc.Width) || (srcOffsetY >= (int32_t)srcDesc.Height) ||
-      (dstOffsetX >= (int32_t)dstDesc.Width) || (dstOffsetY >= (int32_t)dstDesc.Height)) {
-    return -1;
-  }
-
-  if (copyWidth > dstDesc.Width) {
-    copyWidth = dstDesc.Width;
-  }
-
-  if (copyWidth > srcDesc.Width) {
-    copyWidth = srcDesc.Width;
-  }
-
-  if (copyHeight > dstDesc.Height) {
-    copyHeight = dstDesc.Height;
-  }
-
-  if (copyHeight > srcDesc.Height) {
-    copyHeight = srcDesc.Height;
-  }
-
-  return s_blitTable.get(srcDesc.Format, dstDesc.Format)(
-    dstDesc, dstOffsetX, dstOffsetY, copyWidth, copyHeight, srcDesc,
-    srcOffsetX, srcOffsetY);
-}
-
-int ConvertImage(std::vector<uint8_t>& dst_pixels,
-                 const std::vector<uint8_t>& src_pixels,
-                 uint32_t width,
-                 uint32_t height,
-                 ePixelFormat src_format,
-                 ePixelFormat dst_format) {
-
-  uint32_t src_pitch = Format::GetInfo(src_format).BytePerPixel * width;
-  uint32_t dst_pitch = Format::GetInfo(dst_format).BytePerPixel * width;
-
-  dst_pixels.resize(dst_pitch * height);
-
-  SurfaceDesc srcDesc{src_format, (uint8_t*)src_pixels.data(), width, height, src_pitch};            
-  SurfaceDesc dstDesc{dst_format, dst_pixels.data(), width, height, dst_pitch};
-
-  return CopyBuffers(dstDesc, 0, 0, width, height, srcDesc, 0, 0);
-}
-
-
-
-int GenerateMipmaps(std::vector<uint8_t>& dst_pixels,
-                    std::vector<uint32_t>& mip_offsets,
-                    const std::vector<uint8_t>& src_pixels,
-                    ePixelFormat format,
-                    uint32_t src_width,
-                    uint32_t src_height) {
-  std::vector<uint8_t> src_staging, dst_staging;
-  const std::vector<uint8_t> *pSrcPixels;
-  std::vector<uint8_t> *pDstPixels;
-
-  // convert source image if needed
-  bool need_conversion = (format != FORMAT_A8R8G8B8);
-  if (need_conversion) {
-    ConvertImage(src_staging, src_pixels, src_width, src_height, format, FORMAT_A8R8G8B8);
-    pSrcPixels = &src_staging;
-    pDstPixels = &dst_staging;
-  } else {
-    pSrcPixels = &src_pixels;
-    pDstPixels = &dst_pixels;
-  }
-
-  uint32_t src_logwidth  = log2ceil(src_width);
-  uint32_t src_logheight = log2ceil(src_height);
-  uint32_t max_lod       = std::max(src_logwidth, src_logheight) + 1;
-
-  mip_offsets.resize(max_lod);
-
-  // Calculate mipmaps buffer size
-  uint32_t dst_height = 1;
-  uint32_t dst_width = 0;
-  for (uint32_t lod = 0, w = src_width, h = src_height; lod < max_lod; ++lod) {
-    assert((w > 0) || (w > 0));
-    uint32_t pw = std::max<int>(w, 1);
-    uint32_t ph = std::max<int>(h, 1);
-    mip_offsets.at(lod) = dst_width;
-    dst_width += pw * ph;
-    w >>= 1;
-    h >>= 1;
-  }
-
-  // allocate mipmap
-  pDstPixels->resize(dst_width * 4);
-
-  // generate mipmaps  
-  {
-    auto pSrc = reinterpret_cast<const uint32_t*>(pSrcPixels->data());
-    auto pDst = reinterpret_cast<uint32_t*>(pDstPixels->data());
-
-    // copy level 0
-    memcpy(pDst, pSrc, pSrcPixels->size());
-    assert(pSrcPixels->size() == 4 * src_width * src_height);
-    pSrc = pDst;
-    pDst += src_width * src_height;    
-
-    // copy lower levels
-    for (uint32_t lod = 1, w = (src_width/2), h = (src_height/2); lod < max_lod;) {
-      assert((w > 0) || (w > 0));
-      uint32_t pw = std::max<int>(w, 1);
-      uint32_t ph = std::max<int>(h, 1);
-      for (uint32_t y = 0; y < pw; ++y) {
-        auto v0 = 2 * y;
-        auto v1 = 2 * y + ((ph > 1) ? 1 : 0);
-        auto pSrc0 = pSrc + v0 * (2 * pw);
-        auto pSrc1 = pSrc + v1 * (2 * pw);
-
-        for (uint32_t x = 0; x <pw; ++x) {
-          auto u0 = 2 * x;
-          auto u1 = 2 * x + ((pw > 1) ? 1 : 0);
-
-          auto c00 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc0 + u0);
-          auto c01 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc0 + u1);
-          auto c10 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc1 + u0);
-          auto c11 = Format::ConvertFrom<FORMAT_A8R8G8B8, false>(pSrc1 + u1);
-
-          const ColorARGB color((c00.a + c01.a + c10.a + c11.a+2) >> 2,
-                                (c00.r + c01.r + c10.r + c11.r+2) >> 2,
-                                (c00.g + c01.g + c10.g + c11.g+2) >> 2,
-                                (c00.b + c01.b + c10.b + c11.b+2) >> 2);
-                                
-          uint32_t ncolor;
-          Format::ConvertTo<FORMAT_A8R8G8B8>(&ncolor, color);
-          pDst[x + y * pw] = ncolor;
-        }
-      } 
-      ++lod; 
-      pSrc = pDst;
-      pDst += pw * ph;
-      w >>= 1;
-      h >>= 1;  
-    }
-    assert((pDst - reinterpret_cast<uint32_t*>(pDstPixels->data())) == dst_width);
-  }
-
-  // convert destination image if needed
-  if (need_conversion) {
-    ConvertImage(dst_staging, dst_staging, dst_width, dst_height, FORMAT_A8R8G8B8, format);
-  }
-
-  uint32_t bpp = Format::GetInfo(format).BytePerPixel;
-  for (auto& offset : mip_offsets) {
-    offset *= bpp;
-  }
-
-  return 0;
 }
--- a/tests/regression/tex/utils.h
+++ b/tests/regression/tex/utils.h
@@ -1,44 +1,21 @@
 #include <cstdint>
 #include <vector>
-#include <iostream>
+#include <format.h>
+#include <blitter.h>
 #include <bitmanip.h>
-#include "surfacedesc.h"

 int LoadImage(const char *filename,
-              ePixelFormat format,
+              cocogfx::ePixelFormat format,
              std::vector<uint8_t> &pixels,
              uint32_t *width,
              uint32_t *height);

 int SaveImage(const char *filename,
-              ePixelFormat format,
+              cocogfx::ePixelFormat format,
              const std::vector<uint8_t> &pixels,
              uint32_t width,
              uint32_t height);

-int CopyBuffers(SurfaceDesc &dstDesc, 
-                int32_t dstOffsetX,
-                int32_t dstOffsetY, 
-                uint32_t copyWidth, 
-                uint32_t copyHeight,
-                const SurfaceDesc &srcDesc, 
-                int32_t srcOffsetX,
-                int32_t srcOffsetY);
-
-int ConvertImage(std::vector<uint8_t>& dst_pixels,
-                 const std::vector<uint8_t>& src_pixels,
-                 uint32_t width,
-                 uint32_t height,
-                 ePixelFormat src_format,
-                 ePixelFormat dst_format);
-
-int GenerateMipmaps(std::vector<uint8_t>& dst_pixels,
-                    std::vector<uint32_t>& mip_offsets,
-                    const std::vector<uint8_t>& src_pixels,
-                    ePixelFormat format,
-                    uint32_t src_width,
-                    uint32_t src_height);
-
 void dump_image(const std::vector<uint8_t>& pixels, 
                uint32_t width, 
                uint32_t height,