diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 425a5dbf..dc8810e6 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -51,20 +51,17 @@ module VX_alu_unit #( for (genvar i = 0; i < `NUM_THREADS; i++) begin wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]}; - `IGNORE_WARNINGS_BEGIN - wire [32:0] shr_value = $signed(shr_in1) >>> alu_in2_imm[i][4:0]; - `IGNORE_WARNINGS_END - assign shr_result[i] = shr_value[31:0]; + assign shr_result[i] = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]); end for (genvar i = 0; i < `NUM_THREADS; i++) begin always @(*) begin case (alu_op) - `ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; - `ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i]; - `ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i]; + `ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; + `ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i]; + `ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i]; //`ALU_SLL, - default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0]; + default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0]; endcase end end diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index e89b0e21..d231f501 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -53,6 +53,8 @@ module VX_decode #( wire [19:0] upper_imm = {func7, rs2, rs1, func3}; wire [11:0] alu_imm = (func3[0] && ~func3[1]) ? {{7{1'b0}}, rs2} : u_12; + wire [11:0] s_imm = {func7, rd}; + wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0}; wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0}; wire [11:0] jalr_imm = {func7, rs2}; @@ -70,7 +72,7 @@ module VX_decode #( use_PC = 0; use_rd = 0; is_join = 0; - is_wstall = 0; + is_wstall = 0; used_regs = 0; case (opcode) @@ -184,7 +186,7 @@ module VX_decode #( use_imm = 1; use_PC = 1; is_wstall = 1; - imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0}; + imm = {{19{b_imm[12]}}, b_imm}; `USED_IREG (rs1); `USED_IREG (rs2); end @@ -245,7 +247,7 @@ module VX_decode #( `INST_S: begin ex_type = `EX_LSU; op_type = `OP_BITS'({1'b1, func3}); - imm = {{20{func7[6]}}, func7, rd}; + imm = {{20{s_imm[6]}}, s_imm}; `USED_IREG (rs1); `ifdef EXT_F_ENABLE if (opcode[2]) begin diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index f0fbca65..b6a575ff 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -168,7 +168,7 @@ module VX_fpu_unit #( fflags_t rsp_fflags; always @(*) begin - rsp_fflags = 0; + rsp_fflags = '0; for (integer i = 0; i < `NUM_THREADS; i++) begin if (rsp_tmask[i]) begin rsp_fflags.NX |= fflags[i].NX; diff --git a/hw/rtl/afu/VX_to_mem.v b/hw/rtl/afu/VX_to_mem.v index 6401453a..472f8cb3 100644 --- a/hw/rtl/afu/VX_to_mem.v +++ b/hw/rtl/afu/VX_to_mem.v @@ -95,7 +95,9 @@ module VX_to_mem #( always @(*) begin mem_rsp_data_out_n = mem_rsp_data_out_r; - mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_in; + if (mem_rsp_in_fire) begin + mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_in; + end end always @(posedge clk) begin @@ -108,9 +110,9 @@ module VX_to_mem #( end if (mem_rsp_in_fire) begin rsp_ctr <= rsp_ctr + 1; - mem_rsp_data_out_r <= mem_rsp_data_out_n; end end + mem_rsp_data_out_r <= mem_rsp_data_out_n; end reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r; diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index a8aba5cf..8a995b30 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -70,7 +70,7 @@ localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS; localparam COUT_TID_WIDTH = $clog2(`IO_COUT_SIZE); localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8; -localparam COUT_QUEUE_SIZE = 256; +localparam COUT_QUEUE_SIZE = 64; localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ; localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE; @@ -470,9 +470,7 @@ wire vx_mem_is_cout; wire vx_mem_req_valid_qual; wire vx_mem_req_ready_qual; -assign vx_mem_req_valid_qual = vx_mem_req_valid - && vx_started - && ~vx_mem_is_cout; +assign vx_mem_req_valid_qual = vx_mem_req_valid && vx_started; assign vx_mem_req_ready = vx_mem_is_cout ? ~cout_q_full : vx_mem_req_ready_qual; @@ -534,8 +532,9 @@ VX_mem_arb #( .DATA_WIDTH (LMEM_LINE_WIDTH), .ADDR_WIDTH (LMEM_ADDR_WIDTH), .TAG_IN_WIDTH (AVS_REQ_TAGW), - .BUFFERED_REQ (1), - .BUFFERED_RSP (1) + .BUFFERED_REQ (0), + .BUFFERED_RSP (0), + .TYPE ("X") ) mem_arb ( .clk (clk), .reset (reset), @@ -918,7 +917,7 @@ Vortex #() vortex ( // COUT HANDLING ////////////////////////////////////////////////////////////// wire [COUT_TID_WIDTH-1:0] cout_tid; -wire [7:0] cout_char; +reg [7:0] cout_char; VX_onehot_encoder #( .N (`VX_MEM_BYTEEN_WIDTH) @@ -928,8 +927,14 @@ VX_onehot_encoder #( `UNUSED_PIN (valid) ); -wire [`VX_MEM_BYTEEN_WIDTH-1:0][7:0] vx_mem_req_data_ar = vx_mem_req_data; -assign cout_char = vx_mem_req_data_ar[cout_tid]; +VX_onehot_mux #( + .DATAW (8), + .COUNT (`VX_MEM_BYTEEN_WIDTH) +) cout_char_mux ( + .data_in (vx_mem_req_data), + .sel_in (vx_mem_req_byteen), + .data_out (cout_char) +); assign vx_mem_is_cout = (vx_mem_req_addr == `VX_MEM_ADDR_WIDTH'(`IO_COUT_ADDR >> (32 - `VX_MEM_ADDR_WIDTH))); @@ -943,8 +948,8 @@ wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid && ~cout_q_empty; VX_fifo_queue #( - .DATAW (COUT_QUEUE_DATAW), - .SIZE (COUT_QUEUE_SIZE) + .DATAW (COUT_QUEUE_DATAW), + .SIZE (COUT_QUEUE_SIZE) ) cout_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index ade74119..17749ccb 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -188,6 +188,7 @@ module VX_bank #( wire creq_pop_unqual = !mshr_pop_unqual && !mrsq_pop_unqual && !creq_empty && !flush_enable; wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1); + assign mshr_pop = mshr_pop_unqual && !(is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed && !crsq_in_stall; // ensure core response ready diff --git a/hw/rtl/cache/VX_nc_bypass.v b/hw/rtl/cache/VX_nc_bypass.v index 51751b95..390ce32e 100644 --- a/hw/rtl/cache/VX_nc_bypass.v +++ b/hw/rtl/cache/VX_nc_bypass.v @@ -93,7 +93,6 @@ module VX_nc_bypass #( // core request handling - reg [NUM_REQS-1:0] core_req_ready_in_r; wire [NUM_REQS-1:0] core_req_valid_in_nc; wire [NUM_REQS-1:0] core_req_nc_sel; wire [NUM_REQS-1:0] core_req_nc_tids; @@ -115,210 +114,130 @@ module VX_nc_bypass #( .valid_out (core_req_nc_valid) ); - assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids; - - `UNUSED_VAR (core_req_nc_sel) - - if (NUM_REQS > 1) begin - always @(*) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - if (core_req_valid_in_nc[i]) begin - core_req_ready_in_r[i] = ~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]; - end else begin - core_req_ready_in_r[i] = core_req_ready_out[i]; - end - end - end - end else begin - `UNUSED_VAR (core_req_nc_tid) - always @(*) begin - if (core_req_valid_in_nc) begin - core_req_ready_in_r = ~mem_req_valid_in && mem_req_ready_out; - end else begin - core_req_ready_in_r = core_req_ready_out; - end - end - end - + assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids; assign core_req_rw_out = core_req_rw_in; assign core_req_addr_out = core_req_addr_in; assign core_req_byteen_out = core_req_byteen_in; assign core_req_data_out = core_req_data_in; assign core_req_tag_out = core_req_tag_in; - assign core_req_ready_in = core_req_ready_in_r; + + if (NUM_REQS > 1) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? + (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i]; + end + end else begin + `UNUSED_VAR (core_req_nc_sel) + assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out; + end // memory request handling - reg mem_req_valid_out_r; - reg mem_req_rw_out_r; - reg [MEM_DATA_SIZE-1:0] mem_req_byteen_out_r; - reg [MEM_ADDR_WIDTH-1:0] mem_req_addr_out_r; - reg [MEM_DATA_WIDTH-1:0] mem_req_data_out_r; - reg [MEM_TAG_WIDTH-1:0] mem_req_tag_out_r; - reg mem_req_ready_in_r; - - always @(*) begin - if (mem_req_valid_in) begin - mem_req_valid_out_r = 1; - mem_req_ready_in_r = mem_req_ready_out; - end else begin - mem_req_valid_out_r = core_req_nc_valid; - mem_req_ready_in_r = 0; - end - end + assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid; + assign mem_req_ready_in = mem_req_valid_in && mem_req_ready_out; if (NUM_REQS > 1) begin - always @(*) begin - if (mem_req_valid_in) begin - mem_req_rw_out_r = mem_req_rw_in; - mem_req_addr_out_r = mem_req_addr_in; - mem_req_data_out_r = mem_req_data_in; - end else begin - mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid]; - mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH]; - for (integer i = 0; i < P; ++i) begin - mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid]; - end - end + + wire [CORE_TAG_WIDTH-1:0] core_req_tag_in_sel; + wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel; + wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel; + wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel; + wire core_req_rw_in_sel; + + wire [NUM_REQS-1:0][(CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1)-1:0] core_req_nc_mux_in; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]}; + end + + VX_onehot_mux #( + .DATAW (CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1), + .COUNT (NUM_REQS) + ) core_req_nc_mux ( + .data_in (core_req_nc_mux_in), + .sel_in (core_req_nc_sel), + .data_out ({core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel}) + ); + + assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel; + assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH]; + + for (genvar i = 0; i < P; ++i) begin + assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ? + mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in_sel; end if (D != 0) begin - wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0]; - always @(*) begin - if (mem_req_valid_in) begin - mem_req_byteen_out_r = mem_req_byteen_in; - mem_req_tag_out_r = mem_req_tag_in; - end else begin - mem_req_byteen_out_r = 0; - mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid]; - mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]}); - end + wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0]; + reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r; + always @(*) begin + mem_req_byteen_in_r = 0; + mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in_sel; end + assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r; + assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel}); end else begin - always @(*) begin - if (mem_req_valid_in) begin - mem_req_byteen_out_r = mem_req_byteen_in; - mem_req_tag_out_r = mem_req_tag_in; - end else begin - mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid]; - mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]}); - end - end + assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel; + assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in_sel}); end - end else begin - always @(*) begin - if (mem_req_valid_in) begin - mem_req_rw_out_r = mem_req_rw_in; - mem_req_addr_out_r = mem_req_addr_in; - mem_req_data_out_r = mem_req_data_in; - end else begin - mem_req_rw_out_r = core_req_rw_in; - mem_req_addr_out_r = core_req_addr_in[0][D +: MEM_ADDR_WIDTH]; - for (integer i = 0; i < P; ++i) begin - mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in; - end - end + end else begin + `UNUSED_VAR (core_req_nc_tid) + + assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in; + assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in[0][D +: MEM_ADDR_WIDTH]; + + for (genvar i = 0; i < P; ++i) begin + assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ? + mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in; end if (D != 0) begin - wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0]; - always @(*) begin - if (mem_req_valid_in) begin - mem_req_byteen_out_r = mem_req_byteen_in; - mem_req_tag_out_r = mem_req_tag_in; - end else begin - mem_req_byteen_out_r = 0; - mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in; - mem_req_tag_out_r = MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in}); - end + wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0]; + reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r; + always @(*) begin + mem_req_byteen_in_r = 0; + mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in; end + assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r; + assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in}); end else begin - always @(*) begin - if (mem_req_valid_in) begin - mem_req_byteen_out_r = mem_req_byteen_in; - mem_req_tag_out_r = mem_req_tag_in; - end else begin - mem_req_byteen_out_r = core_req_byteen_in; - mem_req_tag_out_r = MEM_TAG_WIDTH'(core_req_tag_in); - end - end + assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in; + assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'(core_req_tag_in); end end - assign mem_req_valid_out = mem_req_valid_out_r; - assign mem_req_rw_out = mem_req_rw_out_r; - assign mem_req_addr_out = mem_req_addr_out_r; - assign mem_req_byteen_out = mem_req_byteen_out_r; - assign mem_req_data_out = mem_req_data_out_r; - assign mem_req_tag_out = mem_req_tag_out_r; - assign mem_req_ready_in = mem_req_ready_in_r; - // core response handling - reg [NUM_REQS-1:0] core_rsp_valid_out_r; - reg [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out_r; - reg [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out_r; - reg [NUM_RSP_TAGS-1:0] core_rsp_ready_in_r; - wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT]; if (NUM_REQS > 1) begin wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; + reg [NUM_REQS-1:0] core_rsp_valid_in_r; always @(*) begin - if (is_mem_rsp_nc) begin - core_rsp_valid_out_r = 0; - core_rsp_valid_out_r[rsp_tid] = 1; - for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin - core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0]; - end - core_rsp_ready_in_r = 0; - end else begin - core_rsp_valid_out_r = core_rsp_valid_in; - core_rsp_tag_out_r = core_rsp_tag_in; - core_rsp_ready_in_r = core_rsp_ready_out; - end + core_rsp_valid_in_r = 0; + core_rsp_valid_in_r[rsp_tid] = 1; + end + assign core_rsp_valid_out = is_mem_rsp_nc ? core_rsp_valid_in_r : core_rsp_valid_in; + assign core_rsp_ready_in = is_mem_rsp_nc ? '0 : core_rsp_ready_out; + end else begin + assign core_rsp_valid_out = is_mem_rsp_nc || core_rsp_valid_in; + assign core_rsp_ready_in = ~is_mem_rsp_nc && core_rsp_ready_out; + end + + if (D != 0) begin + wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D]; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_data_out[i] = is_mem_rsp_nc ? + mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i]; end end else begin - always @(*) begin - if (is_mem_rsp_nc) begin - core_rsp_valid_out_r = 1; - core_rsp_tag_out_r = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0]; - core_rsp_ready_in_r = 0; - end else begin - core_rsp_valid_out_r = core_rsp_valid_in; - core_rsp_tag_out_r = core_rsp_tag_in; - core_rsp_ready_in_r = core_rsp_ready_out; - end + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_data_out[i] = is_mem_rsp_nc ? mem_rsp_data_in : core_rsp_data_in[i]; end end - if (D != 0) begin - wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D]; - always @(*) begin - if (is_mem_rsp_nc) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - core_rsp_data_out_r[i] = mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; - end - end else begin - core_rsp_data_out_r = core_rsp_data_in; - end - end - end else begin - always @(*) begin - if (is_mem_rsp_nc) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - core_rsp_data_out_r[i] = mem_rsp_data_in; - end - end else begin - core_rsp_data_out_r = core_rsp_data_in; - end - end + for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin + assign core_rsp_tag_out[i] = is_mem_rsp_nc ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in[i]; end - - assign core_rsp_valid_out = core_rsp_valid_out_r; - assign core_rsp_data_out = core_rsp_data_out_r; - assign core_rsp_tag_out = core_rsp_tag_out_r; - assign core_rsp_ready_in = core_rsp_ready_in_r; // memory response handling diff --git a/hw/rtl/libs/VX_onehot_encoder.v b/hw/rtl/libs/VX_onehot_encoder.v index a6236207..bd6b6081 100644 --- a/hw/rtl/libs/VX_onehot_encoder.v +++ b/hw/rtl/libs/VX_onehot_encoder.v @@ -67,7 +67,6 @@ module VX_onehot_encoder #( reg [LN-1:0] index_r; if (REVERSE) begin - always @(*) begin index_r = 'x; for (integer i = N-1; i >= 0; --i) begin @@ -76,7 +75,6 @@ module VX_onehot_encoder #( end end end - end else begin always @(*) begin index_r = 'x; diff --git a/hw/rtl/libs/VX_onehot_mux.v b/hw/rtl/libs/VX_onehot_mux.v new file mode 100644 index 00000000..7c5ddfda --- /dev/null +++ b/hw/rtl/libs/VX_onehot_mux.v @@ -0,0 +1,20 @@ +`include "VX_platform.vh" + +module VX_onehot_mux #( + parameter DATAW = 1, + parameter COUNT = 1 +) ( + input wire [COUNT-1:0][DATAW-1:0] data_in, + input wire [COUNT-1:0] sel_in, + output wire [DATAW-1:0] data_out +); + if (COUNT > 1) begin + for (genvar i = 0; i < COUNT; ++i) begin + assign data_out = sel_in[i] ? data_in[i] : 'z; + end + end else begin + `UNUSED_VAR (sel_in) + assign data_out = data_in; + end + +endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_stream_arbiter.v b/hw/rtl/libs/VX_stream_arbiter.v index 0feb7cdf..0a15f467 100644 --- a/hw/rtl/libs/VX_stream_arbiter.v +++ b/hw/rtl/libs/VX_stream_arbiter.v @@ -24,8 +24,7 @@ module VX_stream_arbiter #( if (NUM_REQS > 1) begin wire sel_valid; wire sel_ready; - wire [LOG_NUM_REQS-1:0] sel_idx; - wire [NUM_REQS-1:0] sel_1hot; + wire [NUM_REQS-1:0] sel_1hot; if (TYPE == "X") begin VX_fixed_arbiter #( @@ -37,8 +36,8 @@ module VX_stream_arbiter #( .requests (valid_in), .enable (sel_ready), .grant_valid (sel_valid), - .grant_index (sel_idx), - .grant_onehot (sel_1hot) + .grant_onehot (sel_1hot), + `UNUSED_PIN (grant_index) ); end else if (TYPE == "R") begin VX_rr_arbiter #( @@ -50,8 +49,8 @@ module VX_stream_arbiter #( .requests (valid_in), .enable (sel_ready), .grant_valid (sel_valid), - .grant_index (sel_idx), - .grant_onehot (sel_1hot) + .grant_onehot (sel_1hot), + `UNUSED_PIN (grant_index) ); end else if (TYPE == "F") begin VX_fair_arbiter #( @@ -63,8 +62,8 @@ module VX_stream_arbiter #( .requests (valid_in), .enable (sel_ready), .grant_valid (sel_valid), - .grant_index (sel_idx), - .grant_onehot (sel_1hot) + .grant_onehot (sel_1hot), + `UNUSED_PIN (grant_index) ); end else if (TYPE == "M") begin VX_matrix_arbiter #( @@ -76,13 +75,24 @@ module VX_stream_arbiter #( .requests (valid_in), .enable (sel_ready), .grant_valid (sel_valid), - .grant_index (sel_idx), - .grant_onehot (sel_1hot) + .grant_onehot (sel_1hot), + `UNUSED_PIN (grant_index) ); end else begin $error ("invalid parameter"); end + wire [DATAW-1:0] data_in_sel; + + VX_onehot_mux #( + .DATAW (DATAW), + .COUNT (NUM_REQS) + ) data_in_mux ( + .data_in (data_in), + .sel_in (sel_1hot), + .data_out (data_in_sel) + ); + VX_skid_buffer #( .DATAW (DATAW), .PASSTHRU (!BUFFERED) @@ -90,7 +100,7 @@ module VX_stream_arbiter #( .clk (clk), .reset (reset), .valid_in (sel_valid), - .data_in (data_in[sel_idx]), + .data_in (data_in_sel), .ready_in (sel_ready), .valid_out (valid_out), .data_out (data_out), diff --git a/tests/opencl/psort/main.cc b/tests/opencl/psort/main.cc index e6e14c65..ecd39c04 100644 --- a/tests/opencl/psort/main.cc +++ b/tests/opencl/psort/main.cc @@ -189,7 +189,7 @@ int main (int argc, char **argv) { for (int i = 0; i < size; ++i) { float ref = h_a[i]; int pos = 0; - for (uint32_t j = 0; j < size; ++j) { + for (int j = 0; j < size; ++j) { float cur = h_a[j]; pos += (cur < ref) || (cur == ref && j < i); }