tex_unit partial update

This commit is contained in:
Blaise Tine
2021-03-20 08:40:57 -04:00
parent 50f5bdcfe3
commit 859877a00d
22 changed files with 352 additions and 183 deletions

View File

@@ -4,11 +4,13 @@ struct kernel_arg_t {
uint32_t num_tasks; uint32_t num_tasks;
uint32_t src_width; uint32_t src_width;
uint32_t src_height; uint32_t src_height;
uint32_t src_stride;
uint32_t src_pitch; uint32_t src_pitch;
uint32_t src_ptr;
uint32_t dst_width; uint32_t dst_width;
uint32_t dst_height; uint32_t dst_height;
uint32_t dst_stride;
uint32_t dst_pitch; uint32_t dst_pitch;
uint32_t src_ptr;
uint32_t dst_ptr; uint32_t dst_ptr;
}; };

Binary file not shown.

View File

@@ -156,9 +156,11 @@ int main(int argc, char *argv[]) {
kernel_arg.num_tasks = std::min<uint32_t>(num_tasks, dst_height); kernel_arg.num_tasks = std::min<uint32_t>(num_tasks, dst_height);
kernel_arg.src_width = src_width; kernel_arg.src_width = src_width;
kernel_arg.src_height = src_height; kernel_arg.src_height = src_height;
kernel_arg.src_stride = src_bpp;
kernel_arg.src_pitch = src_bpp * src_width * src_height; kernel_arg.src_pitch = src_bpp * src_width * src_height;
kernel_arg.dst_width = dst_width; kernel_arg.dst_width = dst_width;
kernel_arg.dst_height = dst_height; kernel_arg.dst_height = dst_height;
kernel_arg.dst_stride = dst_bpp;
kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height; kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height;
kernel_arg.src_ptr = src_addr; kernel_arg.src_ptr = src_addr;
kernel_arg.dst_ptr = dst_addr; kernel_arg.dst_ptr = dst_addr;

Binary file not shown.

View File

@@ -2,12 +2,16 @@
#include <vx_intrinsics.h> #include <vx_intrinsics.h>
#include <vx_tex.h> #include <vx_tex.h>
#include "common.h" #include "common.h"
uint32_t ilog2 (uint32_t value) {
return (uint32_t)(sizeof(uint32_t) * 8UL) - (uint32_t)__builtin_clzl((value << 1) - 1UL) - 1;
}
struct tile_arg_t { struct tile_arg_t {
struct kernel_arg_t karg; struct kernel_arg_t karg;
uint32_t tile_width; uint32_t tile_width;
uint32_t tile_height; uint32_t tile_height;
float deltaX; float deltaX;
float deltaY; float deltaY;
}; };
void kernel_body(int task_id, void* arg) { void kernel_body(int task_id, void* arg) {
@@ -36,15 +40,14 @@ int main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)0x0; struct kernel_arg_t* arg = (struct kernel_arg_t*)0x0;
// configure texture unit // configure texture unit
vx_csr_write(CSR_TEX0_ADDR, arg->src_ptr); vx_csr_write(CSR_TEX_ADDR(0), arg->src_ptr);
vx_csr_write(CSR_TEX0_FORMAT, 0); vx_csr_write(CSR_TEX_FORMAT(0), 0);
vx_csr_write(CSR_TEX0_WIDTH, arg->src_width); vx_csr_write(CSR_TEX_WIDTH(0), ilog2(arg->src_width));
vx_csr_write(CSR_TEX0_HEIGHT, arg->src_height); vx_csr_write(CSR_TEX_HEIGHT(0), ilog2(arg->src_height));
vx_csr_write(CSR_TEX0_PITCH, arg->src_pitch); vx_csr_write(CSR_TEX_STRIDE(0), ilog2(arg->src_stride));
vx_csr_write(CSR_TEX0_WRAP_U, 0); vx_csr_write(CSR_TEX_WRAP_U(0), 0);
vx_csr_write(CSR_TEX0_WRAP_V, 0); vx_csr_write(CSR_TEX_WRAP_V(0), 0);
vx_csr_write(CSR_TEX0_MIN_FILTER, 0); vx_csr_write(CSR_TEX_FILTER(0), 0);
vx_csr_write(CSR_TEX0_MAX_FILTER, 0);
struct tile_arg_t targ; struct tile_arg_t targ;
targ.karg = *arg; targ.karg = *arg;

View File

@@ -29,7 +29,7 @@ Disassembly of section .init:
Disassembly of section .text: Disassembly of section .text:
80000050 <main>: 80000050 <main>:
80000050: 01c02783 lw a5,28(zero) # 1c <__stack_usage+0x1c> 80000050: 01402783 lw a5,20(zero) # 14 <__stack_usage+0x14>
80000054: 00100073 ebreak 80000054: 00100073 ebreak
80000058 <register_fini>: 80000058 <register_fini>:

Binary file not shown.

View File

@@ -237,33 +237,21 @@
`define CSR_NW 12'hFC1 `define CSR_NW 12'hFC1
`define CSR_NC 12'hFC2 `define CSR_NC 12'hFC2
////////// Texture Unit CSRs ///////////// ////////// Texture Units //////////////////////////////////////////////////////
`define CSR_TEX_BEGIN 12'hFD0 `define NUM_TEX_UNITS 2
// Unit 1 `define CSR_TEX_STATES 8
`define CSR_TEX0_ADDR `CSR_TEX_BEGIN `define CSR_TEX_BEGIN(x) (12'hFD0 + (x) * `CSR_TEX_STATES)
`define CSR_TEX0_FORMAT `CSR_TEX_BEGIN + 12'h1
`define CSR_TEX0_WIDTH `CSR_TEX_BEGIN + 12'h2
`define CSR_TEX0_HEIGHT `CSR_TEX_BEGIN + 12'h3
`define CSR_TEX0_PITCH `CSR_TEX_BEGIN + 12'h4
`define CSR_TEX0_WRAP_U `CSR_TEX_BEGIN + 12'h5
`define CSR_TEX0_WRAP_V `CSR_TEX_BEGIN + 12'h6
`define CSR_TEX0_MIN_FILTER `CSR_TEX_BEGIN + 12'h7
`define CSR_TEX0_MAX_FILTER `CSR_TEX_BEGIN + 12'h8
// Unit 2 `define CSR_TEX_ADDR(x) (`CSR_TEX_BEGIN(x) + 12'h00)
`define CSR_TEX1_ADDR `CSR_TEX_BEGIN + 12'h9 `define CSR_TEX_FORMAT(x) (`CSR_TEX_BEGIN(x) + 12'h01)
`define CSR_TEX1_FORMAT `CSR_TEX_BEGIN + 12'hA `define CSR_TEX_WIDTH(x) (`CSR_TEX_BEGIN(x) + 12'h02)
`define CSR_TEX1_WIDTH `CSR_TEX_BEGIN + 12'hB `define CSR_TEX_HEIGHT(x) (`CSR_TEX_BEGIN(x) + 12'h03)
`define CSR_TEX1_HEIGHT `CSR_TEX_BEGIN + 12'hC `define CSR_TEX_STRIDE(x) (`CSR_TEX_BEGIN(x) + 12'h04)
`define CSR_TEX1_PITCH `CSR_TEX_BEGIN + 12'hD `define CSR_TEX_WRAP_U(x) (`CSR_TEX_BEGIN(x) + 12'h05)
`define CSR_TEX1_WRAP_U `CSR_TEX_BEGIN + 12'hE `define CSR_TEX_WRAP_V(x) (`CSR_TEX_BEGIN(x) + 12'h06)
`define CSR_TEX1_WRAP_V `CSR_TEX_BEGIN + 12'hF `define CSR_TEX_FILTER(x) (`CSR_TEX_BEGIN(x) + 12'h07)
`define CSR_TEX1_MIN_FILTER `CSR_TEX_BEGIN + 12'h10
`define CSR_TEX1_MAX_FILTER `CSR_TEX_BEGIN + 12'h11
`define CSR_TEX_END `CSR_TEX1_MAX_FILTER
// Pipeline Queues //////////////////////////////////////////////////////////// // Pipeline Queues ////////////////////////////////////////////////////////////

View File

@@ -57,27 +57,26 @@ module VX_csr_data #(
| fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0]; | fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0];
end end
if (write_enable && (write_addr > `CSR_TEX_END || write_addr < `CSR_TEX_BEGIN)) begin if (write_enable) begin
case (write_addr) case (write_addr)
`CSR_FFLAGS: fcsr[write_wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0]; `CSR_FFLAGS: fcsr[write_wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
`CSR_FRM: fcsr[write_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0]; `CSR_FRM: fcsr[write_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0]; `CSR_FCSR: fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
`CSR_SATP: csr_satp <= write_data; `CSR_SATP: csr_satp <= write_data;
`CSR_MSTATUS: csr_mstatus <= write_data;
`CSR_MSTATUS: csr_mstatus <= write_data; `CSR_MEDELEG: csr_medeleg <= write_data;
`CSR_MEDELEG: csr_medeleg <= write_data; `CSR_MIDELEG: csr_mideleg <= write_data;
`CSR_MIDELEG: csr_mideleg <= write_data; `CSR_MIE: csr_mie <= write_data;
`CSR_MIE: csr_mie <= write_data; `CSR_MTVEC: csr_mtvec <= write_data;
`CSR_MTVEC: csr_mtvec <= write_data; `CSR_MEPC: csr_mepc <= write_data;
`CSR_MEPC: csr_mepc <= write_data;
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data; `CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data; `CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
default: begin default: begin
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr); if (write_addr < `CSR_TEX_BEGIN(0) || write_addr > `CSR_TEX_BEGIN(`CSR_TEX_STATES)) begin
$error("%t: invalid CSR write address: %0h", $time, write_addr);
end
end end
endcase endcase
end end

View File

@@ -361,7 +361,8 @@ module VX_decode #(
`ifdef EXT_TEX_ENABLE `ifdef EXT_TEX_ENABLE
3'h5: begin 3'h5: begin
op_type = `OP_BITS'(`GPU_TEX); op_type = `OP_BITS'(`GPU_TEX);
use_rd = 1; op_mod = instr[26:25];
use_rd = 1;
use_rs1 = 1; use_rs1 = 1;
use_rs2 = 1; use_rs2 = 1;
use_rs3 = 1; use_rs3 = 1;

View File

@@ -26,7 +26,7 @@
`define CSR_ADDR_BITS 12 `define CSR_ADDR_BITS 12
`define CSR_WIDTH 32 `define CSR_WIDTH 12
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@@ -388,13 +388,17 @@
`define XDRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH+`CLOG2(2)) `define XDRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH+`CLOG2(2))
////////////////////////// Texture Unit Configurable Knobs ////////////////////////////// ////////////////////////// Texture Unit Configurable Knobs //////////////////////////////
`define NUM_TEX_UNITS 2
`define MADDRW 8 `define NTEX_BITS `LOG2UP(`NUM_TEX_UNITS)
`define MAXWTW 8
`define MAXHTW 8 `define TEX_ADDR_BITS 32
`define MAXFTW 8 `define TEX_FMT_BITS 3
`define MAXFMW 8 `define TEX_WRAP_BITS 2
`define MAXAMW 8 `define TEX_WIDTH_BITS 12
`define TEX_HEIGHT_BITS 12
`define TEX_STRIDE_BITS 12
`define TEX_FILTER_BITS 1
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
`include "VX_types.vh" `include "VX_types.vh"

View File

@@ -111,13 +111,11 @@ module VX_gpu_unit #(
assign tex_req_if.PC = gpu_req_if.PC; assign tex_req_if.PC = gpu_req_if.PC;
assign tex_req_if.rd = gpu_req_if.rd; assign tex_req_if.rd = gpu_req_if.rd;
assign tex_req_if.wb = gpu_req_if.wb; assign tex_req_if.wb = gpu_req_if.wb;
for (genvar i = 0; i < `NUM_THREADS; i++) begin assign tex_req_if.unit = gpu_req_if.op_mod[`NTEX_BITS-1:0];
assign tex_req_if.u[i] = gpu_req_if.rs1_data[i]; assign tex_req_if.u = gpu_req_if.rs1_data;
assign tex_req_if.v[i] = gpu_req_if.rs2_data[i]; assign tex_req_if.v = gpu_req_if.rs2_data;
assign tex_req_if.lod[i] = gpu_req_if.rs3_data[i][31:8]; assign tex_req_if.lod = gpu_req_if.rs3_data;
assign tex_req_if.t[i] = gpu_req_if.rs3_data[i][7:0];
end
VX_tex_unit #( VX_tex_unit #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
@@ -159,6 +157,7 @@ module VX_gpu_unit #(
assign rsp_wb = 0; assign rsp_wb = 0;
assign rsp_data = warp_ctl_data; assign rsp_data = warp_ctl_data;
`UNUSED_VAR (gpu_req_if.op_mod)
`UNUSED_VAR (gpu_req_if.rs2_data) `UNUSED_VAR (gpu_req_if.rs2_data)
`UNUSED_VAR (gpu_req_if.rs3_data) `UNUSED_VAR (gpu_req_if.rs3_data)
`UNUSED_VAR (gpu_req_if.wb) `UNUSED_VAR (gpu_req_if.wb)

View File

@@ -111,14 +111,14 @@ module VX_instr_demux (
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU); wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) //update number of bits .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) //update number of bits
) gpu_buffer ( ) gpu_buffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (gpu_req_valid), .valid_in (gpu_req_valid),
.ready_in (gpu_req_ready), .ready_in (gpu_req_ready),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}), .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}), .data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
.valid_out (gpu_req_if.valid), .valid_out (gpu_req_if.valid),
.ready_out (gpu_req_if.ready) .ready_out (gpu_req_if.ready)
); );

View File

@@ -9,12 +9,14 @@ interface VX_tex_req_if ();
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask; wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC; wire [31:0] PC;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire wb; wire wb;
wire [`NUM_THREADS-1:0][`NTEX_BITS-1:0] unit;
wire [`NUM_THREADS-1:0][31:0] u; wire [`NUM_THREADS-1:0][31:0] u;
wire [`NUM_THREADS-1:0][31:0] v; wire [`NUM_THREADS-1:0][31:0] v;
wire [`NUM_THREADS-1:0][23:0] lod; wire [`NUM_THREADS-1:0][31:0] lod;
wire [`NUM_THREADS-1:0][7:0] t;
wire ready; wire ready;
endinterface endinterface

View File

@@ -0,0 +1,70 @@
`include "VX_define.vh"
module VX_tex_addr_gen #(
parameter CORE_ID = 0,
parameter REQ_TAG_WIDTH = 1,
parameter FRAC_BITS = 20,
parameter INT_BITS = 32 - FRAC_BITS
) (
input wire clk,
input wire reset,
// handshake
input wire valid_in,
output wire ready_in,
// inputs
output wire [REQ_TAG_WIDTH-1:0] req_tag,
input wire [`TEX_FILTER_BITS-1:0] filter,
input wire [`TEX_WRAP_BITS-1:0] wrap_u,
input wire [`TEX_WRAP_BITS-1:0] wrap_v,
input wire [`TEX_ADDR_BITS-1:0] base_addr,
input wire [1:0] log2_stride,
input wire [`TEX_WIDTH_BITS-1:0] log2_width,
input wire [`TEX_HEIGHT_BITS-1:0] log2_height,
input wire [3:0] lod,
input wire [31:0] coord_u,
input wire [31:0] coord_v,
// outputs
output wire [3:0] mem_req_valid,
output wire [REQ_TAG_WIDTH-1:0] mem_req_tag,
output wire [3:0][31:0] mem_req_addr,
input wire mem_req_ready
);
`UNUSED_VAR (filter)
`UNUSED_VAR (lod)
wire [31:0] u, y;
wire [31:0] x_offset, y_offset;
wire [31:0] addr0;
// addressing mode
assign x_offset = u >> (5'(FRAC_BITS) - log2_width);
assign y_offset = v >> (5'(FRAC_BITS) - log2_height);
assign addr0 = base_addr + (x_offset + (y_offset << log2_width)) << log2_stride;
wire [3:0] req_valids = 4'(valid_in);
wire [3:0][31:0] req_address = {4{addr0}};
VX_pipe_register #(
.DATAW (1 + 4 + 4 * 32 + REQ_TAG_WIDTH),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({req_valids, req_address, req_tag}),
.data_out ({mem_req_valid, mem_req_addr, mem_req_tag})
);
assign ready_in = ~stall_out;
endmodule

View File

@@ -0,0 +1,22 @@
`include "VX_define.vh"
module VX_tex_addr_gen #(
parameter FRAC_BITS = 20,
parameter INT_BITS = 32 - FRAC_BITS
) (
input wire [`TEX_WRAP_BITS-1:0] wrap_i;
input wire [31:0] coord_i,
input wire [31:0] coord_o
)
always @(*) begin
case (wrap_i)
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
//`ALU_SLL,
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
endcase
end
endmodule

View File

@@ -0,0 +1,8 @@
module VX_tex_format #(
parameter CORE_ID = 0
) (
// TODO
)
// TODO
endmodule

View File

@@ -1,23 +1,29 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_tex_memory #( module VX_tex_memory #(
parameter CORE_ID = 0 parameter CORE_ID = 0,
parameter TAG_IN_WIDTH = 1
) ( ) (
`SCOPE_IO_VX_lsu_unit `SCOPE_IO_VX_lsu_unit
input wire clk, input wire clk,
input wire reset, input wire reset,
// Dcache interface // memory interface
VX_dcache_core_req_if dcache_req_if, VX_dcache_core_req_if dcache_req_if,
VX_dcache_core_rsp_if dcache_rsp_if, VX_dcache_core_rsp_if dcache_rsp_if,
// inputs // inputs
VX_lsu_req_if lsu_req_if, input wire [3:0] req_valid,
input wire [3:0][31:0] req_addr,
input wire [TAG_IN_WIDTH-1:0] req_tag,
output wire req_ready,
// outputs // outputs
VX_commit_if ld_commit_if output wire rsp_valid,
// VX_commit_if st_commit_if output wire [3:0][31:0] rsp_data,
output wire [TAG_IN_WIDTH-1:0] rsp_tag,
input wire rsp_ready
); );
`UNUSED_PARAM (CORE_ID) `UNUSED_PARAM (CORE_ID)

View File

@@ -0,0 +1,12 @@
`include "VX_define.vh"
module VX_tex_sampler #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset
);
// TODO
endmodule

View File

@@ -5,20 +5,22 @@ module VX_tex_unit #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Texture unit <-> Memory Unit
VX_dcache_core_req_if dcache_req_if,
VX_dcache_core_rsp_if dcache_rsp_if,
// Inputs // Inputs
VX_tex_req_if tex_req_if, VX_tex_req_if tex_req_if,
VX_tex_csr_if tex_csr_if, VX_tex_csr_if tex_csr_if,
// Outputs // Outputs
VX_tex_rsp_if tex_rsp_if, VX_tex_rsp_if tex_rsp_if
// Texture unit <-> Memory Unit
VX_dcache_core_req_if dcache_req_if,
VX_dcache_core_rsp_if dcache_rsp_if
); );
localparam MEM_REQ_TAGW = `NW_BITS + 32 + 1 + `NR_BITS + `NTEX_BITS;
`UNUSED_PARAM (CORE_ID) `UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
@@ -31,104 +33,131 @@ module VX_tex_unit #(
wire [`NUM_THREADS-1:0][31:0] rsp_data; wire [`NUM_THREADS-1:0][31:0] rsp_data;
wire stall_in, stall_out; wire stall_in, stall_out;
reg [`CSR_WIDTH-1:0] tex_addr [`NUM_TEX_UNITS-1: 0]; reg [`TEX_ADDR_BITS-1:0] tex_addr [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_format [`NUM_TEX_UNITS-1: 0]; reg [`TEX_FMT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_width [`NUM_TEX_UNITS-1: 0]; reg [`TEX_WIDTH_BITS-1:0] tex_width [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_height [`NUM_TEX_UNITS-1: 0]; reg [`TEX_HEIGHT_BITS-1:0] tex_height [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_stride [`NUM_TEX_UNITS-1: 0]; reg [`TEX_STRIDE_BITS-1:0] tex_stride [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_wrap_u [`NUM_TEX_UNITS-1: 0]; reg [`TEX_WRAP_BITS-1:0] tex_wrap_u [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_wrap_v [`NUM_TEX_UNITS-1: 0]; reg [`TEX_WRAP_BITS-1:0] tex_wrap_v [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_min_filter [`NUM_TEX_UNITS-1: 0]; reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_max_filter [`NUM_TEX_UNITS-1: 0];
`UNUSED_VAR (tex_format) // CSRs programming
`UNUSED_VAR (tex_stride)
`UNUSED_VAR (tex_wrap_u)
`UNUSED_VAR (tex_wrap_v)
`UNUSED_VAR (tex_min_filter)
`UNUSED_VAR (tex_max_filter)
//tex csr programming, need to make make consistent with `NUM_TEX_UNITS for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
always @(posedge clk ) begin always @(posedge clk ) begin
if (tex_csr_if.write_enable) begin if (reset) begin
case (tex_csr_if.write_addr) tex_addr[i] <= 0;
`CSR_TEX0_ADDR : tex_addr[0] <= tex_csr_if.write_data; tex_format[i] <= 0;
`CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data; tex_width[i] <= 0;
`CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data; tex_height[i] <= 0;
`CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data; tex_stride[i] <= 0;
`CSR_TEX0_PITCH : tex_stride[0] <= tex_csr_if.write_data; tex_wrap_u[i] <= 0;
`CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data; tex_wrap_v[i] <= 0;
`CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data; tex_filter[i] <= 0;
`CSR_TEX0_MIN_FILTER : tex_min_filter[0] <= tex_csr_if.write_data; end begin
`CSR_TEX0_MAX_FILTER : tex_max_filter[0] <= tex_csr_if.write_data; if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
`CSR_TEX1_ADDR : tex_addr[1] <= tex_csr_if.write_data; `CSR_TEX_ADDR(i) : tex_addr[i] <= tex_csr_if.write_data;
`CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data; `CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data;
`CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data; `CSR_TEX_WIDTH(i) : tex_width[i] <= tex_csr_if.write_data;
`CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data; `CSR_TEX_HEIGHT(i) : tex_height[i] <= tex_csr_if.write_data;
`CSR_TEX1_PITCH : tex_stride[1] <= tex_csr_if.write_data; `CSR_TEX_STRIDE(i) : tex_stride[i] <= tex_csr_if.write_data;
`CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data; `CSR_TEX_WRAP_U(i) : tex_wrap_u[i] <= tex_csr_if.write_data;
`CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data; `CSR_TEX_WRAP_V(i) : tex_wrap_v[i] <= tex_csr_if.write_data;
`CSR_TEX1_MIN_FILTER : tex_min_filter[1] <= tex_csr_if.write_data; `CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data;
`CSR_TEX1_MAX_FILTER : tex_max_filter[1] <= tex_csr_if.write_data; default:
default:; assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
endcase && tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
endcase
end
end
end end
end end
// texture response // address generation
`UNUSED_VAR (tex_req_if.lod)
// texture unit <-> dcache wire [3:0] mem_req_valid;
VX_lsu_req_if lsu_req_if(); wire [3:0][31:0] mem_req_addr;
VX_commit_if ld_commit_if(); wire [TAG_IN_WIDTH-1:0] mem_req_tag;
wire mem_req_ready;
VX_tex_memory #( wire mem_rsp_valid;
.CORE_ID(CORE_ID) wire [3:0][31:0] mem_rsp_data;
) tex_memory ( wire [TAG_IN_WIDTH-1:0] mem_rsp_tag;
wire mem_rsp_ready;
VX_tex_addr_gen #(
.FRAC_BITS(20)
) tex_addr_gen (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if), .valid_in (tex_req_if.valid),
.lsu_req_if (lsu_req_if), .ready_in (tex_req_if.ready),
.ld_commit_if (ld_commit_if)
.req_tag ({tex_req_if.wid, tex_req_if.PC, tex_req_if.rd, tex_req_if.wb}),
.filter (tex_filter[tex_req_if.unit]),
.wrap_u (tex_wrap_ufilter[tex_req_if.unit]),
.wrap_v (tex_wrap_v[tex_req_if.unit]),
.base_addr (tex_addr[tex_req_if.unit]),
.log2_stride (tex_stride[tex_req_if.unit]),
.log2_width (tex_width[tex_req_if.unit]),
.log2_height (tex_height[tex_req_if.unit]),
.coord_u (tex_req_if.u),
.coord_v (tex_req_if.v),
.lod (tex_req_if.lod),
.mem_req_valid (mem_req_valid),
.mem_req_tag (mem_req_tag),
.mem_req_addr (mem_req_addr),
.mem_req_ready (mem_req_ready)
); );
//point sampling - texel address computation // retrieve texel values from memory
wire [`NUM_THREADS-1:0] pt_addr_valid;
wire [`NUM_THREADS-1:0] pt_addr_ready; VX_tex_memory #(
.CORE_ID (CORE_ID),
.REQ_TAG_WIDTH (MEM_REQ_TAGW)
) tex_memory (
.clk (clk),
.reset (reset),
for (genvar i = 0; i < `NUM_THREADS; i++) begin // memory interface
wire [`CSR_WIDTH-1:0] tex_addr_select; .dcache_req_if (dcache_req_if),
wire [`CSR_WIDTH-1:0] tex_width_select; .dcache_rsp_if (dcache_rsp_if),
wire [`CSR_WIDTH-1:0] tex_height_select;
assign tex_addr_select = (tex_req_if.t[i] == 'b1) ? tex_addr[1] : tex_addr[0];
assign tex_width_select = (tex_req_if.t[i] == 'b1) ? tex_width[1] : tex_width[0];
assign tex_height_select = (tex_req_if.t[i] == 'b1) ? tex_height[1] : tex_height[0];
VX_tex_pt_addr #(
.FRAC_BITS(28)
) tex_pt_addr (
.clk (clk),
.reset (reset),
.valid_in (tex_req_if.valid), // inputs
.ready_out (pt_addr_ready[i]), req_valid (mem_req_valid),
req_addr (mem_req_addr),
req_tag (mem_req_tag),
req_ready (mem_req_ready),
.tex_addr (tex_addr_select), // outputs
.tex_width (tex_width_select), rsp_valid (mem_rsp_valid),
.tex_height (tex_height_select), rsp_texel (mem_rsp_data),
rsp_tag (mem_rsp_tag),
rsp_ready (mem_rsp_ready)
);
.tex_u (tex_req_if.u[i]), // apply sampler
.tex_v (tex_req_if.v[i]),
.pt_addr (lsu_req_if.base_addr[i]), VX_tex_sampler #(
.CORE_ID (CORE_ID)
) tex_sampler (
.clk (clk),
.reset (reset)
.valid_out (pt_addr_valid[i]), // inputs
.ready_in (lsu_req_if.ready) //.valid_in (mem_rsp_valid),
); //.texel (mem_rsp_data),
end //.req_wid (mem_rsp_tag),
//.req_PC (mem_rsp_tag),
//.format (mem_rsp_tag),
//.ready_in (mem_rsp_ready),
);
assign tex_req_if.ready = (& pt_addr_ready); assign tex_req_if.ready = (& pt_addr_ready);
@@ -176,8 +205,8 @@ module VX_tex_unit #(
`ifdef DBG_PRINT_TEX `ifdef DBG_PRINT_TEX
always @(posedge clk) begin always @(posedge clk) begin
if (tex_csr_if.write_enable if (tex_csr_if.write_enable
&& (tex_csr_if.write_addr <= `CSR_TEX_END && (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
|| tex_csr_if.write_addr >= `CSR_TEX_BEGIN)) begin && tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES))) begin
$display("%t: core%0d-tex_csr: csr_tex0_addr, csr_data=%0h", $time, CORE_ID, tex_addr[0]); $display("%t: core%0d-tex_csr: csr_tex0_addr, csr_data=%0h", $time, CORE_ID, tex_addr[0]);
$display("%t: core%0d-tex_csr: csr_tex0_format, csr_data=%0h", $time, CORE_ID, tex_format[0]); $display("%t: core%0d-tex_csr: csr_tex0_format, csr_data=%0h", $time, CORE_ID, tex_format[0]);
$display("%t: core%0d-tex_csr: csr_tex0_width, csr_data=%0h", $time, CORE_ID, tex_width[0]); $display("%t: core%0d-tex_csr: csr_tex0_width, csr_data=%0h", $time, CORE_ID, tex_width[0]);

View File

@@ -0,0 +1,23 @@
`include "VX_define.vh"
module VX_tex_wrap #(
parameter CORE_ID = 0,
parameter FRAC_BITS = 20,
parameter INT_BITS = 32 - FRAC_BITS
) (
input wire [`TEX_WRAP_BITS-1:0] wrap_i;
input wire [31:0] coord_i,
input wire [31:0] coord_o
)
always @(*) begin
case (wrap_i)
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
//`ALU_SLL,
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
endcase
end
endmodule

View File

@@ -14,41 +14,41 @@ extern "C" {
#endif #endif
#define vx_csr_swap(csr, val) ({ \ #define vx_csr_swap(csr, val) ({ \
unsigned long __v = (unsigned long)(val); \ unsigned __v = (unsigned )(val); \
__asm__ __volatile__ ("csrrw %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \ __asm__ __volatile__ ("csrrw %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \
__v; \ __v; \
}) })
#define vx_csr_read(csr) ({ \ #define vx_csr_read(csr) ({ \
register unsigned long __v; \ register unsigned __v; \
__asm__ __volatile__ ("csrr %0, " __ASM_STR(csr) : "=r" (__v) :: "memory"); \ __asm__ __volatile__ ("csrr %0, " __ASM_STR(csr) : "=r" (__v) :: "memory"); \
__v; \ __v; \
}) })
#define vx_csr_write(csr, val) ({ \ #define vx_csr_write(csr, val) ({ \
unsigned long __v = (unsigned long)(val); \ unsigned __v = (unsigned )(val); \
__asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \ __asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \
}) })
#define vx_csr_read_set(csr, val) ({ \ #define vx_csr_read_set(csr, val) ({ \
unsigned long __v = (unsigned long)(val); \ unsigned __v = (unsigned )(val); \
__asm__ __volatile__ ("csrrs %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \ __asm__ __volatile__ ("csrrs %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \
__v; \ __v; \
}) })
#define vx_csr_set(csr, val) ({ \ #define vx_csr_set(csr, val) ({ \
unsigned long __v = (unsigned long)(val); \ unsigned __v = (unsigned )(val); \
__asm__ __volatile__ ("csrs " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \ __asm__ __volatile__ ("csrs " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \
}) })
#define vx_csr_read_clear(csr, val) ({ \ #define vx_csr_read_clear(csr, val) ({ \
unsigned long __v = (unsigned long)(val); \ unsigned __v = (unsigned )(val); \
__asm__ __volatile__ ("csrrc %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \ __asm__ __volatile__ ("csrrc %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \
__v; \ __v; \
}) })
#define vx_csr_clear(csr, val) ({ \ #define vx_csr_clear(csr, val) ({ \
unsigned long __v = (unsigned long)(val); \ unsigned __v = (unsigned )(val); \
__asm__ __volatile__ ("csrc " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \ __asm__ __volatile__ ("csrc " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \
}) })
@@ -77,6 +77,13 @@ inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps)); asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps));
} }
// Texture load
#define vx_tex_ld(unit, u, v, lod) ({ \
register unsigned result; \
asm volatile (".insn r4 0x6b, 5, " __ASM_STR(unit) ", %0, %1, %2, %3" :: "r"(result), "r"(u), "r"(v), "r"(lod)); \
result; \
})
// Return active warp's thread id // Return active warp's thread id
inline int vx_thread_id() { inline int vx_thread_id() {
int result; int result;
@@ -154,14 +161,6 @@ inline int vx_num_instrs() {
return result; return result;
} }
// Texture load instruction
inline int vx_tex_ld(unsigned t, unsigned u, unsigned v, unsigned lod_t) {
lod_t = (lod_t << 8) | t;
int result;
asm volatile (".insn r4 0x6b, 5, 1, %0, %1, %2, %3" :: "r"(result), "r"(u), "r"(v), "r"(lod_t));
return result;
}
#define __if(b) vx_split(b); \ #define __if(b) vx_split(b); \
if (b) if (b)