diff --git a/hw/rtl/tex_unit/VX_tex_addr_gen.v b/hw/rtl/tex_unit/VX_tex_addr_gen.v index f29a21ef..54369163 100644 --- a/hw/rtl/tex_unit/VX_tex_addr_gen.v +++ b/hw/rtl/tex_unit/VX_tex_addr_gen.v @@ -43,52 +43,85 @@ module VX_tex_addr_gen #( `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (lod) + wire [`FIXED_FRAC-1:0] u[`NUM_THREADS-1:0][1:0]; + wire [`FIXED_FRAC-1:0] v[`NUM_THREADS-1:0][1:0]; + + // addressing mode + for (genvar i = 0; i < `NUM_THREADS; ++i) begin - // addressing mode - - wire [31:0] u, v; + wire [31:0] fu[1:0]; + wire [31:0] fv[1:0]; + + assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log2_width) : 0); + assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log2_height) : 0); + assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log2_width) : 0); + assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log2_height) : 0); VX_tex_wrap #( .CORE_ID (CORE_ID) - ) tex_wrap_u ( + ) tex_wrap_u0 ( .wrap_i (wrap_u), - .coord_i (coord_u[i]), - .coord_o (u) + .coord_i (fu[0]), + .coord_o (u[i][0]) ); VX_tex_wrap #( .CORE_ID (CORE_ID) - ) tex_wrap_v ( + ) tex_wrap_v0 ( .wrap_i (wrap_v), - .coord_i (coord_v[i]), - .coord_o (v) + .coord_i (fv[0]), + .coord_o (v[i][0]) ); - // texel addresses generation + VX_tex_wrap #( + .CORE_ID (CORE_ID) + ) tex_wrap_u1 ( + .wrap_i (wrap_u), + .coord_i (fu[1]), + .coord_o (u[i][1]) + ); - wire [31:0] x_offset, y_offset; - wire [31:0] addr0; + VX_tex_wrap #( + .CORE_ID (CORE_ID) + ) tex_wrap_v1 ( + .wrap_i (wrap_v), + .coord_i (fv[1]), + .coord_o (v[i][1]) + ); + end + + // addresses generation - assign x_offset = u >> (5'(`FIXED_FRAC) - log2_width); - assign y_offset = v >> (5'(`FIXED_FRAC) - log2_height); - assign addr0 = base_addr + (x_offset + (y_offset << log2_width)) << log2_stride; + wire [31:0] addr [`NUM_THREADS-1:0][3:0]; - wire [3:0] req_valids = 4'(valid_in); - wire [3:0][31:0] req_address = {4{addr0}}; + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + + wire [`FIXED_FRAC-1:0] x [1:0]; + wire [`FIXED_FRAC-1:0] y [1:0]; + + assign x[0] = u[i][0] >> ((`FIXED_FRAC) - log2_width); + assign x[1] = u[i][1] >> ((`FIXED_FRAC) - log2_width); + assign y[0] = v[i][0] >> ((`FIXED_FRAC) - log2_height); + assign y[1] = v[i][1] >> ((`FIXED_FRAC) - log2_height); + + assign addr [i][0] = base_addr + (x[0] + (y[0] << log2_width)) << log2_stride; + assign addr [i][1] = base_addr + (x[1] + (y[0] << log2_width)) << log2_stride; + assign addr [i][2] = base_addr + (x[0] + (y[1] << log2_width)) << log2_stride; + assign addr [i][3] = base_addr + (x[1] + (y[1] << log2_width)) << log2_stride; end wire stall_out = mem_req_valid && ~mem_req_ready; VX_pipe_register #( - .DATAW (1 + 4 + 4 * 32 + REQ_TAG_WIDTH), + .DATAW (1 + 4 + `NUM_THREADS * 4 * 32 + REQ_TAG_WIDTH), .RESETW (1) ) pipe_reg ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valids, req_address, req_tag}), - .data_out ({mem_req_valid, mem_req_addr, mem_req_tag}) + .data_in ({valid_in, req_tmask, filter, req_tag, addr}), + .data_out ({mem_req_valid, mem_req_tmask, mem_req_filter, mem_req_tag, mem_req_addr}) ); assign ready_in = ~stall_out; diff --git a/hw/rtl/tex_unit/VX_tex_define.vh b/hw/rtl/tex_unit/VX_tex_define.vh index 81872eba..0b72b821 100644 --- a/hw/rtl/tex_unit/VX_tex_define.vh +++ b/hw/rtl/tex_unit/VX_tex_define.vh @@ -6,6 +6,7 @@ `define FIXED_FRAC 20 `define FIXED_INT (32 - `FIXED_FRAC) `define FIXED_ONE (1 << `FIXED_FRAC) +`define FIXED_HALF (`FIXED_ONE >> 1) `define FIXED_MASK (`FIXED_ONE - 1) `define CLAMP(x,lo,hi) ((x < lo) ? lo : ((x > hi) ? hi : x)) @@ -13,8 +14,8 @@ `define TEX_ADDR_BITS 32 `define TEX_FORMAT_BITS 3 `define TEX_WRAP_BITS 2 -`define TEX_WIDTH_BITS 12 -`define TEX_HEIGHT_BITS 12 +`define TEX_WIDTH_BITS 4 +`define TEX_HEIGHT_BITS 4 `define TEX_STRIDE_BITS 2 `define TEX_FILTER_BITS 1 diff --git a/hw/rtl/tex_unit/VX_tex_wrap.v b/hw/rtl/tex_unit/VX_tex_wrap.v index 5c536a68..d6b0bb2e 100644 --- a/hw/rtl/tex_unit/VX_tex_wrap.v +++ b/hw/rtl/tex_unit/VX_tex_wrap.v @@ -1,35 +1,16 @@ `include "VX_tex_define.vh" -/* -switch(addressing_mode) { -case undefined: return is_undefined; -case clamp_to_edge: return intdowni(max(0, min(coord, coorddim - 1))); -case clamp_to_border: return is_border; -case repeat: - tile = intdowni(coord / coorddim); - return intdowni(coord - (tile * coorddim)); -case mirrored_repeat: - mirrored_coord = (coord < 0) ? (-coord - 1) : coord; - tile = intdowni(mirrored_coord / coorddim); - mirrored_coord = intdowni(mirrored_coord - (tile * coorddim)); - if (tile & 1) { - mirrored_coord = (coorddim - 1) - mirrored_coord; - } - return mirrored_coord; -} -*/ - module VX_tex_wrap #( parameter CORE_ID = 0 ) ( input wire [`TEX_WRAP_BITS-1:0] wrap_i, input wire [31:0] coord_i, - input wire [`FIXED_FRAC-1:0] coord_o + output wire [`FIXED_FRAC-1:0] coord_o ); `UNUSED_PARAM (CORE_ID) - reg [31:0] coord_r; + reg [`FIXED_FRAC-1:0] coord_r; wire [31:0] clamp = `CLAMP(coord_i, 0, `FIXED_MASK); diff --git a/simX/execute.cpp b/simX/execute.cpp index 43d07fbd..e8cef37c 100644 --- a/simX/execute.cpp +++ b/simX/execute.cpp @@ -776,46 +776,51 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) { case FMSUB: case FMNMADD: case FMNMSUB: { - // multiplicands are infinity and zero, them set FCSR - if (fpBinIsZero(rsdata[0]) || fpBinIsZero(rsdata[1]) || fpBinIsInf(rsdata[0]) || fpBinIsInf(rsdata[1])) { - core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit - core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit - } - if (fpBinIsNan(rsdata[0]) || fpBinIsNan(rsdata[1]) || fpBinIsNan(rsdata[2])) { - // if one of op is NaN, if addend is not quiet NaN, them set FCSR - if ((fpBinIsNan(rsdata[0])==2) | (fpBinIsNan(rsdata[1])==2) | (fpBinIsNan(rsdata[1])==2)) { - core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit - core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit - } - rddata = 0x7fc00000; // canonical(quiet) NaN + // select FP format + if (core_->get_csr(CSR_FPMODE, t, id_) == 1) { + // CODE } else { - float rs1 = intregToFloat(rsdata[0]); - float rs2 = intregToFloat(rsdata[1]); - float rs3 = intregToFloat(rsdata[2]); - float fpDest(0.0); - feclearexcept(FE_ALL_EXCEPT); - switch (opcode) { - case FMADD: - // rd = (rs1*rs2)+rs3 - fpDest = (rs1 * rs2) + rs3; break; - case FMSUB: - // rd = (rs1*rs2)-rs3 - fpDest = (rs1 * rs2) - rs3; break; - case FMNMADD: - // rd = -(rs1*rs2)+rs3 - fpDest = -1*(rs1 * rs2) - rs3; break; - case FMNMSUB: - // rd = -(rs1*rs2)-rs3 - fpDest = -1*(rs1 * rs2) + rs3; break; - default: - std::abort(); - break; - } + // multiplicands are infinity and zero, them set FCSR + if (fpBinIsZero(rsdata[0]) || fpBinIsZero(rsdata[1]) || fpBinIsInf(rsdata[0]) || fpBinIsInf(rsdata[1])) { + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit + } + if (fpBinIsNan(rsdata[0]) || fpBinIsNan(rsdata[1]) || fpBinIsNan(rsdata[2])) { + // if one of op is NaN, if addend is not quiet NaN, them set FCSR + if ((fpBinIsNan(rsdata[0])==2) | (fpBinIsNan(rsdata[1])==2) | (fpBinIsNan(rsdata[1])==2)) { + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit + } + rddata = 0x7fc00000; // canonical(quiet) NaN + } else { + float rs1 = intregToFloat(rsdata[0]); + float rs2 = intregToFloat(rsdata[1]); + float rs3 = intregToFloat(rsdata[2]); + float fpDest(0.0); + feclearexcept(FE_ALL_EXCEPT); + switch (opcode) { + case FMADD: + // rd = (rs1*rs2)+rs3 + fpDest = (rs1 * rs2) + rs3; break; + case FMSUB: + // rd = (rs1*rs2)-rs3 + fpDest = (rs1 * rs2) - rs3; break; + case FMNMADD: + // rd = -(rs1*rs2)+rs3 + fpDest = -1*(rs1 * rs2) - rs3; break; + case FMNMSUB: + // rd = -(rs1*rs2)-rs3 + fpDest = -1*(rs1 * rs2) + rs3; break; + default: + std::abort(); + break; + } - // update fcsrs - update_fcrs(core_, t, id_); + // update fcsrs + update_fcrs(core_, t, id_); - rddata = floatToBin(fpDest); + rddata = floatToBin(fpDest); + } } } break;