diff --git a/README.md b/README.md index 71ba041e..80beccea 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Directory structure - benchmarks: OpenCL and RISC-V benchmarks -- docs: documentation. +- docs: [documentation](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Vortex.md). - hw: hardware sources. diff --git a/doc/Simulation.md b/doc/Simulation.md index 3b66a14f..b6861628 100644 --- a/doc/Simulation.md +++ b/doc/Simulation.md @@ -33,3 +33,19 @@ Example use of command line arguments: Run the sgemm benchmark using the vlsim d $ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=4 --driver=vlsim --app=sgemm +Output from terminal: +``` +Create context +Create program from kernel source +Upload source buffers +Execute the kernel +Elapsed time: 2463 ms +Download destination buffer +Verify result +PASSED! +PERF: core0: instrs=90802, cycles=52776, IPC=1.720517 +PERF: core1: instrs=90693, cycles=53108, IPC=1.707709 +PERF: core2: instrs=90849, cycles=53107, IPC=1.710678 +PERF: core3: instrs=90836, cycles=50347, IPC=1.804199 +PERF: instrs=363180, cycles=53108, IPC=6.838518 +``` \ No newline at end of file diff --git a/doc/Vortex.md b/doc/Vortex.md new file mode 100644 index 00000000..36846b30 --- /dev/null +++ b/doc/Vortex.md @@ -0,0 +1,30 @@ +# Vortex Documentation + +### Table of Contents + +- Vortex Architecture +- Vortex Software +- [Vortex Simulation](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Simulation.md) +- [FPGA](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Flubber_FPGA_Startup_Guide.md) +- Debugging +- Useful Links + +### Quick Start + +Setup Vortex environment: +``` +$ export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain +$ export PATH=:/opt/verilator/bin:$PATH +$ export VERILATOR_ROOT=/opt/verilator +``` + +Test Vortex with different drivers and configurations: +- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads + + $ ./ci/blackbox.sh --clusters=2 --cores=2 --warps=2 --threads=4 --driver=rtlsim --app=basic +- Run demo driver test with vlsim driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads + + $ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=2 --driver=vlsim --app=demo +- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads + + $ ./ci/blackbox.sh --clusters=4 --cores=4 --warps=8 --threads=6 --driver=simx --app=dogfood \ No newline at end of file diff --git a/driver/tests/tex_demo/Makefile b/driver/tests/tex_demo/Makefile index 2c75c070..9f910030 100644 --- a/driver/tests/tex_demo/Makefile +++ b/driver/tests/tex_demo/Makefile @@ -17,8 +17,8 @@ VX_LDFLAGS += -lm VX_SRCS = kernel.c -#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors +#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -Wfatal-errors CXXFLAGS += -I../../include diff --git a/driver/tests/tex_demo/blitter.h b/driver/tests/tex_demo/blitter.h index 7116fc92..1e50dfd6 100644 --- a/driver/tests/tex_demo/blitter.h +++ b/driver/tests/tex_demo/blitter.h @@ -1,23 +1,23 @@ #include "format.h" struct SurfaceDesc { - uint8_t Format; + ePixelFormat Format; uint8_t *pBits; - int32_t Width; - int32_t Height; - int32_t Pitch; + uint32_t Width; + uint32_t Height; + uint32_t Pitch; }; class BlitTable { public: - typedef void (*PfnCopy)(const SurfaceDesc &dstDesc, - uint32_t dstOffsetX, - uint32_t dstOffsetY, - uint32_t copyWidth, - uint32_t copyHeight, - const SurfaceDesc &srcDesc, - uint32_t srcOffsetX, - uint32_t srcOffsetY); + typedef int (*PfnCopy)(const SurfaceDesc &dstDesc, + uint32_t dstOffsetX, + uint32_t dstOffsetY, + uint32_t copyWidth, + uint32_t copyHeight, + const SurfaceDesc &srcDesc, + uint32_t srcOffsetX, + uint32_t srcOffsetY); BlitTable() { for (uint32_t s = 0; s < FORMAT_COLOR_SIZE_; ++s) { @@ -183,14 +183,14 @@ public: private: template - static void Copy(const SurfaceDesc &dstDesc, - uint32_t dstOffsetX, - uint32_t dstOffsetY, - uint32_t copyWidth, - uint32_t copyHeight, - const SurfaceDesc &srcDesc, - uint32_t srcOffsetX, - uint32_t srcOffsetY) { + static int Copy(const SurfaceDesc &dstDesc, + uint32_t dstOffsetX, + uint32_t dstOffsetY, + uint32_t copyWidth, + uint32_t copyHeight, + const SurfaceDesc &srcDesc, + uint32_t srcOffsetX, + uint32_t srcOffsetY) { auto srcBPP = TFormatInfo::CBSIZE; auto dstBPP = TFormatInfo::CBSIZE; auto srcNextLine = srcDesc.Pitch; @@ -211,14 +211,19 @@ private: pbSrc += srcNextLine; pbDst += dstNextLine; - } + } + return 0; } template - static void CopyFast(const SurfaceDesc &dstDesc, uint32_t dstOffsetX, - uint32_t dstOffsetY, uint32_t copyWidth, - uint32_t copyHeight, const SurfaceDesc &srcDesc, - uint32_t srcOffsetX, uint32_t srcOffsetY) { + static int CopyFast(const SurfaceDesc &dstDesc, + uint32_t dstOffsetX, + uint32_t dstOffsetY, + uint32_t copyWidth, + uint32_t copyHeight, + const SurfaceDesc &srcDesc, + uint32_t srcOffsetX, + uint32_t srcOffsetY) { auto nBPP = sizeof(Type); auto srcNextLine = srcDesc.Pitch; auto dstNextLine = dstDesc.Pitch; @@ -235,18 +240,20 @@ private: pbSrc += srcNextLine; pbDst += dstNextLine; } + return 0; } - static void CopyInvalid(const SurfaceDesc & /*dstDesc*/, - uint32_t /*dstOffsetX*/, - uint32_t /*dstOffsetY*/, - uint32_t /*copyWidth*/, - uint32_t /*copyHeight*/, - const SurfaceDesc & /*srcDesc*/, - uint32_t /*srcOffsetX*/, - uint32_t /*srcOffsetY*/) + static int CopyInvalid(const SurfaceDesc & /*dstDesc*/, + uint32_t /*dstOffsetX*/, + uint32_t /*dstOffsetY*/, + uint32_t /*copyWidth*/, + uint32_t /*copyHeight*/, + const SurfaceDesc & /*srcDesc*/, + uint32_t /*srcOffsetX*/, + uint32_t /*srcOffsetY*/) { - std::abort(); + std::cout << "Error: invalid format" << std::endl; + return -1; } PfnCopy copyFuncs_[FORMAT_COLOR_SIZE_][FORMAT_COLOR_SIZE_]; diff --git a/driver/tests/tex_demo/format.h b/driver/tests/tex_demo/format.h index fd06d172..4ee8268e 100644 --- a/driver/tests/tex_demo/format.h +++ b/driver/tests/tex_demo/format.h @@ -54,7 +54,7 @@ enum ePixelFormat { #define FORMAT_ARGB FORMAT_A8R8G8B8 #define FORMAT_ARGB_ FORMAT_A4R4G4B4 -template +template struct TFormatInfo {}; template <> @@ -462,7 +462,7 @@ public: namespace Format { -inline static const FormatInfo &GetInfo(uint32_t pixelFormat) { +inline static const FormatInfo &GetInfo(ePixelFormat pixelFormat) { static const FormatInfo sc_formatInfos[FORMAT_SIZE_] = { __formatInfo(FORMAT_UNKNOWN), __formatInfo(FORMAT_A8), @@ -501,26 +501,26 @@ typedef ColorARGB (*pfn_convert_from)(const void *pIn); typedef void (*pfn_convert_to)(void *pOut, const ColorARGB &in); -template +template static uint32_t ConvertTo(const ColorARGB &color); -template +template static void ConvertTo(void *pOut, const ColorARGB &in) { *reinterpret_cast::TYPE *>(pOut) = static_cast::TYPE>( ConvertTo(in)); } -template +template static ColorARGB ConvertFrom(uint32_t in); -template +template static ColorARGB ConvertFrom(const void *pIn) { return ConvertFrom( *reinterpret_cast::TYPE *>(pIn)); } -inline static pfn_convert_to GetConvertTo(uint32_t pixelFormat) { +inline static pfn_convert_to GetConvertTo(ePixelFormat pixelFormat) { switch (pixelFormat) { case FORMAT_A8: return &ConvertTo; @@ -550,11 +550,13 @@ inline static pfn_convert_to GetConvertTo(uint32_t pixelFormat) { return &ConvertTo; case FORMAT_X8S8D16: return &ConvertTo; + default: + return &ConvertTo; } return nullptr; } -inline static pfn_convert_from GetConvertFrom(uint32_t pixelFormat, +inline static pfn_convert_from GetConvertFrom(ePixelFormat pixelFormat, bool bForceAlpha) { if (bForceAlpha) { switch (pixelFormat) { @@ -586,6 +588,8 @@ inline static pfn_convert_from GetConvertFrom(uint32_t pixelFormat, return &ConvertFrom; case FORMAT_X8S8D16: return &ConvertFrom; + default: + return &ConvertFrom; } } else { switch (pixelFormat) { @@ -617,13 +621,15 @@ inline static pfn_convert_from GetConvertFrom(uint32_t pixelFormat, return &ConvertFrom; case FORMAT_X8S8D16: return &ConvertFrom; + default: + return &ConvertFrom; } } return nullptr; } -inline static uint32_t GetNativeFormat(uint32_t pixelFormat) { +inline static uint32_t GetNativeFormat(ePixelFormat pixelFormat) { switch (pixelFormat) { case FORMAT_PAL4_B8G8R8: case FORMAT_PAL8_B8G8R8: @@ -644,8 +650,10 @@ inline static uint32_t GetNativeFormat(uint32_t pixelFormat) { case FORMAT_PAL4_R5G5B5A1: case FORMAT_PAL8_R5G5B5A1: return FORMAT_R5G5B5A1; + + default: + return pixelFormat; } - return pixelFormat; } /////////////////////////////////////////////////////////////////////////////// diff --git a/driver/tests/tex_demo/main.cpp b/driver/tests/tex_demo/main.cpp index 053edfe4..1f3056cf 100644 --- a/driver/tests/tex_demo/main.cpp +++ b/driver/tests/tex_demo/main.cpp @@ -23,21 +23,23 @@ const char* kernel_file = "kernel.bin"; const char* input_file = "palette64.tga"; const char* output_file = "output.tga"; -int wrap = 0; -int filter = 0; +int wrap = 0; +int filter = 0; float scale = 1.0f; +int format = 0; +ePixelFormat eformat = FORMAT_A8R8G8B8; vx_device_h device = nullptr; vx_buffer_h buffer = nullptr; static void show_usage() { std::cout << "Vortex Texture Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f filter] [-h: help]" << std::endl; + std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f format] [-g filter] [-h: help]" << std::endl; } static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "i:o:k:w:f:h?")) != -1) { + while ((c = getopt(argc, argv, "i:o:k:w:f:g:h?")) != -1) { switch (c) { case 'i': input_file = optarg; @@ -51,7 +53,20 @@ static void parse_args(int argc, char **argv) { case 'w': wrap = std::atoi(optarg); break; - case 'f': + case 'f': { + format = std::atoi(optarg); + switch (format) { + case 0: eformat = FORMAT_A8R8G8B8; break; + case 1: eformat = FORMAT_R5G6B5; break; + case 2: eformat = FORMAT_R4G4B4A4; break; + case 3: eformat = FORMAT_L8; break; + case 4: eformat = FORMAT_A8; break; + default: + std::cout << "Error: invalid format: " << format << std::endl; + exit(1); + } + } break; + case 'g': filter = std::atoi(optarg); break; case 'k': @@ -78,8 +93,13 @@ void cleanup() { } } -int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, uint32_t height, uint32_t bpp) { +int run_test(const kernel_arg_t& kernel_arg, + uint32_t buf_size, + uint32_t width, + uint32_t height, + uint32_t bpp) { auto time_start = std::chrono::high_resolution_clock::now(); + // start device std::cout << "start device" << std::endl; RT_CHECK(vx_start(device)); @@ -111,7 +131,7 @@ int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, } int main(int argc, char *argv[]) { - kernel_arg_t kernel_arg; + kernel_arg_t kernel_arg; std::vector src_pixels; uint32_t src_width; uint32_t src_height; @@ -120,7 +140,8 @@ int main(int argc, char *argv[]) { // parse command arguments parse_args(argc, argv); - RT_CHECK(LoadTGA(input_file, src_pixels, &src_width, &src_height, &src_bpp)); + std::vector tmp_pixels; + RT_CHECK(LoadTGA(input_file, tmp_pixels, &src_width, &src_height)); // check power of two support if (!ISPOW2(src_width) || !ISPOW2(src_height)) { @@ -128,7 +149,11 @@ int main(int argc, char *argv[]) { return -1; } - //dump_image(src_pixels, src_width, src_height, src_bpp); + RT_CHECK(ConvertImage(src_pixels, tmp_pixels, src_width, src_height, FORMAT_A8R8G8B8, eformat)); + src_bpp = Format::GetInfo(eformat).BytePerPixel; + + dump_image(src_pixels, src_width, src_height, src_bpp); + uint32_t src_bufsize = src_bpp * src_width * src_height; uint32_t dst_width = (uint32_t)(src_width * scale); @@ -173,7 +198,7 @@ int main(int argc, char *argv[]) { std::cout << "upload kernel argument" << std::endl; { kernel_arg.num_tasks = std::min(num_tasks, dst_height); - kernel_arg.format = (src_bpp == 1) ? 5 : (src_bpp == 2) ? 1 : 0; + kernel_arg.format = format; kernel_arg.filter = filter; kernel_arg.wrap = wrap; diff --git a/driver/tests/tex_demo/utils.cpp b/driver/tests/tex_demo/utils.cpp index 304693b2..46f0862c 100644 --- a/driver/tests/tex_demo/utils.cpp +++ b/driver/tests/tex_demo/utils.cpp @@ -1,6 +1,7 @@ #include "utils.h" #include #include +#include "format.h" struct __attribute__((__packed__)) tga_header_t { int8_t idlength; @@ -20,8 +21,7 @@ struct __attribute__((__packed__)) tga_header_t { int LoadTGA(const char *filename, std::vector &pixels, uint32_t *width, - uint32_t *height, - uint32_t *bpp) { + uint32_t *height) { std::ifstream ifs(filename, std::ios::in | std::ios::binary); if (!ifs.is_open()) { std::cerr << "couldn't open file: " << filename << "!" << std::endl; @@ -70,7 +70,7 @@ int LoadTGA(const char *filename, ColorARGB color; switch (stride) { case 2: - color = Format::ConvertFrom(src_bytes); + color = Format::ConvertFrom(src_bytes); break; case 3: color = Format::ConvertFrom(src_bytes); @@ -92,7 +92,6 @@ int LoadTGA(const char *filename, *width = header.width; *height = header.height; - *bpp = 4; return 0; } @@ -162,19 +161,19 @@ void dump_image(const std::vector& pixels, uint32_t width, uint32_t hei } } -int CopyBuffers(const SurfaceDesc &dstDesc, +int CopyBuffers(SurfaceDesc &dstDesc, int32_t dstOffsetX, int32_t dstOffsetY, - int32_t copyWidth, - int32_t copyHeight, + uint32_t copyWidth, + uint32_t copyHeight, const SurfaceDesc &srcDesc, int32_t srcOffsetX, int32_t srcOffsetY) { static const BlitTable s_blitTable; - if ((srcOffsetX >= srcDesc.Width) || (srcOffsetY >= srcDesc.Height) || - (dstOffsetX >= dstDesc.Width) || (dstOffsetY >= dstDesc.Height)) { + if ((srcOffsetX >= (int32_t)srcDesc.Width) || (srcOffsetY >= (int32_t)srcDesc.Height) || + (dstOffsetX >= (int32_t)dstDesc.Width) || (dstOffsetY >= (int32_t)dstDesc.Height)) { return -1; } @@ -194,9 +193,25 @@ int CopyBuffers(const SurfaceDesc &dstDesc, copyHeight = srcDesc.Height; } - s_blitTable.get(srcDesc.Format, dstDesc.Format)( - dstDesc, dstOffsetX, dstOffsetY, copyWidth, copyHeight, srcDesc, - srcOffsetX, srcOffsetY); + return s_blitTable.get(srcDesc.Format, dstDesc.Format)( + dstDesc, dstOffsetX, dstOffsetY, copyWidth, copyHeight, srcDesc, + srcOffsetX, srcOffsetY); +} - return 0; +int ConvertImage(std::vector& dst_pixels, + const std::vector& src_pixels, + uint32_t width, + uint32_t height, + ePixelFormat src_format, + ePixelFormat dst_format) { + + uint32_t src_pitch = Format::GetInfo(src_format).BytePerPixel * width; + uint32_t dst_pitch = Format::GetInfo(dst_format).BytePerPixel * width; + + dst_pixels.resize(dst_pitch * height); + + SurfaceDesc srcDesc{src_format, (uint8_t*)src_pixels.data(), width, height, src_pitch}; + SurfaceDesc dstDesc{dst_format, dst_pixels.data(), width, height, dst_pitch}; + + return CopyBuffers(dstDesc, 0, 0, width, height, srcDesc, 0, 0); } \ No newline at end of file diff --git a/driver/tests/tex_demo/utils.h b/driver/tests/tex_demo/utils.h index 48c01ec8..2e7f7d7c 100644 --- a/driver/tests/tex_demo/utils.h +++ b/driver/tests/tex_demo/utils.h @@ -12,8 +12,7 @@ inline uint32_t ilog2 (uint32_t value) { int LoadTGA(const char *filename, std::vector &pixels, uint32_t *width, - uint32_t *height, - uint32_t *bpp); + uint32_t *height); int SaveTGA(const char *filename, const std::vector &pixels, @@ -21,13 +20,23 @@ int SaveTGA(const char *filename, uint32_t height, uint32_t bpp); -int CopyBuffers(const SurfaceDesc &dstDesc, +int CopyBuffers(SurfaceDesc &dstDesc, int32_t dstOffsetX, int32_t dstOffsetY, - int32_t copyWidth, - int32_t copyHeight, + uint32_t copyWidth, + uint32_t copyHeight, const SurfaceDesc &srcDesc, int32_t srcOffsetX, int32_t srcOffsetY); -void dump_image(const std::vector& pixels, uint32_t width, uint32_t height, uint32_t bpp); \ No newline at end of file +int ConvertImage(std::vector& dst_pixels, + const std::vector& src_pixels, + uint32_t width, + uint32_t height, + ePixelFormat src_format, + ePixelFormat dst_format); + +void dump_image(const std::vector& pixels, + uint32_t width, + uint32_t height, + uint32_t bpp); diff --git a/hw/rtl/tex_unit/VX_tex_format.v b/hw/rtl/tex_unit/VX_tex_format.v index b8168822..e3e7351d 100644 --- a/hw/rtl/tex_unit/VX_tex_format.v +++ b/hw/rtl/tex_unit/VX_tex_format.v @@ -14,16 +14,16 @@ module VX_tex_format #( always @(*) begin case (format) `TEX_FORMAT_R5G6B5: begin - texel_out_r[07:00] = `TEX_COLOR_BITS'(texel_in[4:0]); - texel_out_r[15:08] = `TEX_COLOR_BITS'(texel_in[10:5]); - texel_out_r[23:16] = `TEX_COLOR_BITS'(texel_in[15:11]); + texel_out_r[07:00] = `TEX_COLOR_BITS'({texel_in[15:11],texel_in[15:13]}); + texel_out_r[15:08] = `TEX_COLOR_BITS'({texel_in[10:5],texel_in[10:9]}); + texel_out_r[23:16] = `TEX_COLOR_BITS'({texel_in[4:0],texel_in[4:2]}); texel_out_r[31:24] = {`TEX_COLOR_BITS{1'b1}}; end `TEX_FORMAT_R4G4B4A4: begin - texel_out_r[07:00] = `TEX_COLOR_BITS'(texel_in[3:0]); - texel_out_r[15:08] = `TEX_COLOR_BITS'(texel_in[7:4]); - texel_out_r[23:16] = `TEX_COLOR_BITS'(texel_in[11:8]); - texel_out_r[31:24] = `TEX_COLOR_BITS'(texel_in[15:12]); + texel_out_r[07:00] = `TEX_COLOR_BITS'({texel_in[11:8],texel_in[15:12]}); + texel_out_r[15:08] = `TEX_COLOR_BITS'({2{texel_in[7:4]}}); + texel_out_r[23:16] = `TEX_COLOR_BITS'({2{texel_in[3:0]}}); + texel_out_r[31:24] = `TEX_COLOR_BITS'({2{texel_in[15:12]}}); end `TEX_FORMAT_L8A8: begin texel_out_r[07:00] = `TEX_COLOR_BITS'(texel_in[7:0]);