diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index b0d0c20f..d1b30016 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -25,7 +25,7 @@ module VX_alu_unit ( wire[31:0] alu_in2 = (src_rs2 == `RS2_IMMED) ? itype_immed : src_b; wire[31:0] upper_immed_s = {upper_immed, {12{1'b0}}}; - + reg [7:0] inst_delay; reg [7:0] curr_inst_delay; @@ -70,7 +70,6 @@ module VX_alu_unit ( `ALU_SUBU: alu_result = (alu_in1 >= alu_in2) ? 32'h0 : 32'hffffffff; `ALU_LUI: alu_result = upper_immed_s; `ALU_AUIPC: alu_result = $signed(curr_PC) + $signed(upper_immed_s); - // TODO: profitable to roll these exceptional cases into inst_delay_tmp to avoid pipeline when possible? `ALU_MUL: alu_result = mul_result[31:0]; `ALU_MULH: alu_result = mul_result[63:32]; `ALU_MULHSU: alu_result = mul_result[63:32]; @@ -80,7 +79,7 @@ module VX_alu_unit ( `ALU_REM: alu_result = (alu_in2 == 0) ? alu_in1 : rem_result_signed; `ALU_REMU: alu_result = (alu_in2 == 0) ? alu_in1 : rem_result_unsigned; default: alu_result = 32'h0; - endcase // alu_op + endcase end VX_divide #( diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index 82c42d83..3da014ce 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -80,7 +80,7 @@ module VX_back_end #( .clk (clk), .reset (reset), .lsu_req_if (lsu_req_if), - .mem_wb_if_p1 (mem_wb_if), + .mem_wb_if (mem_wb_if), .dcache_req_if (dcache_req_if), .dcache_rsp_if (dcache_rsp_if), .delay (mem_delay), diff --git a/hw/rtl/VX_csr_pipe.v b/hw/rtl/VX_csr_pipe.v index beb8638f..aa6980ab 100644 --- a/hw/rtl/VX_csr_pipe.v +++ b/hw/rtl/VX_csr_pipe.v @@ -73,7 +73,7 @@ module VX_csr_pipe #( for (i = 0; i < `NUM_THREADS; i++) begin assign csr_wb_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i : (csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) : - csr_read_data_s2; + csr_read_data_s2; end assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && (| csr_req_if.valid); diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index af7afed5..deb3f44b 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -227,7 +227,7 @@ module VX_decode( case (curr_opcode) `INST_B: begin // $display("BRANCH IN DECODE"); - temp_branch_stall = 1'b1 && in_valid; + temp_branch_stall = in_valid; case (func3) 3'h0: temp_branch_type = `BR_EQ; 3'h1: temp_branch_type = `BR_NE; @@ -240,15 +240,15 @@ module VX_decode( end `INST_JAL: begin temp_branch_type = `BR_NO; - temp_branch_stall = 1'b1 && in_valid; + temp_branch_stall = in_valid; end `INST_JALR: begin temp_branch_type = `BR_NO; - temp_branch_stall = 1'b1 && in_valid; + temp_branch_stall = in_valid; end default: begin temp_branch_type = `BR_NO; - temp_branch_stall = 1'b0 && in_valid; + temp_branch_stall = 1'b0; end endcase end diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 21118a76..21da158e 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -72,7 +72,7 @@ `define CSR_WIDTH 12 -`define DIV_LATENCY 18 +`define DIV_LATENCY 22 `define MUL_LATENCY 2 diff --git a/hw/rtl/VX_exec_unit.v b/hw/rtl/VX_exec_unit.v index 133b0869..0b4259b5 100644 --- a/hw/rtl/VX_exec_unit.v +++ b/hw/rtl/VX_exec_unit.v @@ -15,18 +15,18 @@ module VX_exec_unit ( output wire delay ); - wire[`NUM_THREADS-1:0][31:0] in_a_reg_data; - wire[`NUM_THREADS-1:0][31:0] in_b_reg_data; - wire[4:0] in_alu_op; - wire in_rs2_src; - wire[31:0] in_itype_immed; + wire [`NUM_THREADS-1:0][31:0] in_a_reg_data; + wire [`NUM_THREADS-1:0][31:0] in_b_reg_data; + wire [4:0] in_alu_op; + wire in_rs2_src; + wire [31:0] in_itype_immed; `DEBUG_BEGIN - wire[2:0] in_branch_type; + wire [2:0] in_branch_type; `DEBUG_END - wire[19:0] in_upper_immed; - wire in_jal; - wire[31:0] in_jal_offset; - wire[31:0] in_curr_PC; + wire [19:0] in_upper_immed; + wire in_jal; + wire [31:0] in_jal_offset; + wire [31:0] in_curr_PC; assign in_a_reg_data = exec_unit_req_if.a_reg_data; assign in_b_reg_data = exec_unit_req_if.b_reg_data; @@ -39,12 +39,12 @@ module VX_exec_unit ( assign in_jal_offset = exec_unit_req_if.jal_offset; assign in_curr_PC = exec_unit_req_if.curr_PC; - wire[`NUM_THREADS-1:0][31:0] alu_result; - wire[`NUM_THREADS-1:0] alu_stall; + wire [`NUM_THREADS-1:0][31:0] alu_result; + wire [`NUM_THREADS-1:0] alu_stall; genvar i; generate - for (i = 0; i < `NUM_THREADS; i++) begin : alu_defs + for (i = 0; i < `NUM_THREADS; i++) begin VX_alu_unit alu_unit ( .clk (clk), .reset (reset), @@ -65,20 +65,17 @@ module VX_exec_unit ( assign delay = no_slot_exec || internal_stall; -`DEBUG_BEGIN wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index; - wire jal_branch_found_valid; -`DEBUG_END VX_priority_encoder #( .N(`NUM_THREADS) ) choose_alu_result ( - .data_in (exec_unit_req_if.valid), - .data_out (jal_branch_use_index), - .valid_out (jal_branch_found_valid) + .data_in (exec_unit_req_if.valid), + .data_out (jal_branch_use_index), + `UNUSED_PIN (valid_out) ); - wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index]; + wire [31:0] branch_use_alu_result = alu_result[jal_branch_use_index]; reg temp_branch_dir; always @(*) @@ -95,7 +92,7 @@ module VX_exec_unit ( endcase // in_branch_type end - wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data; + wire [`NUM_THREADS-1:0][31:0] duplicate_PC_data; generate for (i = 0; i < `NUM_THREADS; i++) begin diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index cb773cde..f2f8af73 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -12,7 +12,7 @@ module VX_lsu_unit #( VX_lsu_req_if lsu_req_if, // Write back to GPR - VX_wb_if mem_wb_if_p1, + VX_wb_if mem_wb_if, // Dcache interface VX_cache_core_req_if dcache_req_if, @@ -21,62 +21,68 @@ module VX_lsu_unit #( output wire delay ); - VX_wb_if mem_wb_if(); + VX_wb_if mem_wb_unqual_if(); - wire[`NUM_THREADS-1:0][31:0] use_address; - wire[`NUM_THREADS-1:0][31:0] use_store_data; - wire[`NUM_THREADS-1:0] use_valid; - wire[`BYTE_EN_BITS-1:0] use_mem_read; - wire[`BYTE_EN_BITS-1:0] use_mem_write; - wire[4:0] use_rd; - wire[`NW_BITS-1:0] use_warp_num; - wire[1:0] use_wb; - wire[31:0] use_pc; + wire [`NUM_THREADS-1:0] use_valid; + wire use_req_rw; + wire [`NUM_THREADS-1:0][29:0] use_req_addr; + wire [`NUM_THREADS-1:0][1:0] use_req_offset; + wire [`NUM_THREADS-1:0][3:0] use_req_byteen; + wire [`NUM_THREADS-1:0][31:0] use_req_data; + wire [`BYTE_EN_BITS-1:0] use_mem_read; + wire [4:0] use_rd; + wire [`NW_BITS-1:0] use_warp_num; + wire [1:0] use_wb; + wire [31:0] use_pc; genvar i; // Generate Full Addresses - wire[`NUM_THREADS-1:0][31:0] full_address; + wire[`NUM_THREADS-1:0][31:0] full_address; for (i = 0; i < `NUM_THREADS; i++) begin assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset; end - VX_generic_register #( - .N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65) - ) lsu_buffer ( - .clk (clk), - .reset (reset), - .stall (delay), - .flush (1'b0), - .in ({full_address,lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.curr_PC}), - .out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc }) - ); - - wire core_req_rw = (use_mem_write != `BYTE_EN_NO); - - wire [`NUM_THREADS-1:0][4:0] mem_req_offset; - wire [`NUM_THREADS-1:0][29:0] mem_req_addr; - wire [`NUM_THREADS-1:0][3:0] mem_req_byteen; - wire [`NUM_THREADS-1:0][31:0] mem_req_data; - - wire [`NUM_THREADS-1:0][4:0] mem_rsp_offset; - wire[2:0] core_rsp_mem_read; + wire mem_req_rw = (lsu_req_if.mem_write != `BYTE_EN_NO); reg [3:0] wmask; always @(*) begin - case ((core_req_rw ? use_mem_write[1:0] : use_mem_read[1:0])) + case ((mem_req_rw ? lsu_req_if.mem_write[1:0] : lsu_req_if.mem_read[1:0])) 0: wmask = 4'b0001; 1: wmask = 4'b0011; default : wmask = 4'b1111; endcase end + wire [`NUM_THREADS-1:0][29:0] mem_req_addr; + wire [`NUM_THREADS-1:0][1:0] mem_req_offset; + wire [`NUM_THREADS-1:0][3:0] mem_req_byteen; + wire [`NUM_THREADS-1:0][31:0] mem_req_data; + for (i = 0; i < `NUM_THREADS; ++i) begin - assign mem_req_addr[i] = use_address[i][31:2]; - assign mem_req_offset[i] = (5'(use_address[i][1:0])) << 3; - assign mem_req_byteen[i] = (wmask << use_address[i][1:0]); - assign mem_req_data[i] = (use_store_data[i] << mem_req_offset[i]); - end + assign mem_req_addr[i] = full_address[i][31:2]; + assign mem_req_offset[i] = full_address[i][1:0]; + assign mem_req_byteen[i] = wmask << full_address[i][1:0]; + assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0}; + end + +`IGNORE_WARNINGS_BEGIN + wire[`NUM_THREADS-1:0][31:0] use_address; +`IGNORE_WARNINGS_END + + VX_generic_register #( + .N((`NUM_THREADS * 1) + (`NUM_THREADS * 32) + `BYTE_EN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + 5 + `NW_BITS + 2 + 32) + ) lsu_buffer ( + .clk (clk), + .reset (reset), + .stall (delay), + .flush (1'b0), + .in ({lsu_req_if.valid, full_address, lsu_req_if.mem_read, mem_req_rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.curr_PC}), + .out ({use_valid , use_address, use_mem_read , use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd , use_warp_num , use_wb , use_pc}) + ); + + wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset; + wire [`BYTE_EN_BITS-1:0] core_rsp_mem_read; reg [`NUM_THREADS-1:0] mem_rsp_mask[`DCREQ_SIZE-1:0]; @@ -84,7 +90,7 @@ module VX_lsu_unit #( wire mrq_full; wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready - && (0 == core_req_rw); // only push read requests + && (0 == use_req_rw); // only push read requests wire mrq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; @@ -95,18 +101,18 @@ module VX_lsu_unit #( wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd); VX_indexable_queue #( - .DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 2 + (`NUM_THREADS * 5) + `BYTE_EN_BITS + 5 + `NW_BITS), + .DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 2 + (`NUM_THREADS * 2) + `BYTE_EN_BITS + 5 + `NW_BITS), .SIZE (`DCREQ_SIZE) ) mem_req_queue ( .clk (clk), .reset (reset), - .write_data ({mrq_write_addr, use_pc, use_wb, mem_req_offset, use_mem_read, use_rd, use_warp_num}), + .write_data ({mrq_write_addr, use_pc, use_wb, use_req_offset, use_mem_read, use_rd, use_warp_num}), .write_addr (mrq_write_addr), .push (mrq_push), .full (mrq_full), .pop (mrq_pop), .read_addr (mrq_read_addr), - .read_data ({dbg_mrq_write_addr, mem_wb_if.curr_PC, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num}), + .read_data ({dbg_mrq_write_addr, mem_wb_unqual_if.curr_PC, mem_wb_unqual_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_unqual_if.rd, mem_wb_unqual_if.warp_num}), `UNUSED_PIN (empty) ); @@ -122,11 +128,11 @@ module VX_lsu_unit #( // Core Request - assign dcache_req_if.valid = use_valid & {`NUM_THREADS{~mrq_full}}; - assign dcache_req_if.rw = {`NUM_THREADS{core_req_rw}}; - assign dcache_req_if.byteen= mem_req_byteen; - assign dcache_req_if.addr = mem_req_addr; - assign dcache_req_if.data = mem_req_data; + assign dcache_req_if.valid = use_valid & {`NUM_THREADS{~mrq_full}}; + assign dcache_req_if.rw = {`NUM_THREADS{use_req_rw}}; + assign dcache_req_if.byteen = use_req_byteen; + assign dcache_req_if.addr = use_req_addr; + assign dcache_req_if.data = use_req_data; `ifdef DBG_CORE_REQ_INFO assign dcache_req_if.tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr}; @@ -143,33 +149,33 @@ module VX_lsu_unit #( wire [`NUM_THREADS-1:0][31:0] rsp_data_shifted; for (i = 0; i < `NUM_THREADS; ++i) begin - assign rsp_data_shifted[i] = (dcache_rsp_if.data[i] >> mem_rsp_offset[i]); + assign rsp_data_shifted[i] = dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0}; always @(*) begin case (core_rsp_mem_read) - `BYTE_EN_SB: core_rsp_data[i] = rsp_data_shifted[i][7] ? (rsp_data_shifted[i] | 32'hFFFFFF00) : (rsp_data_shifted[i] & 32'h000000FF); - `BYTE_EN_SH: core_rsp_data[i] = rsp_data_shifted[i][15] ? (rsp_data_shifted[i] | 32'hFFFF0000) : (rsp_data_shifted[i] & 32'h0000FFFF); - `BYTE_EN_UB: core_rsp_data[i] = (rsp_data_shifted[i] & 32'h000000FF); - `BYTE_EN_UH: core_rsp_data[i] = (rsp_data_shifted[i] & 32'h0000FFFF); + `BYTE_EN_SB: core_rsp_data[i] = {{24{rsp_data_shifted[i][7]}}, rsp_data_shifted[i][7:0]}; + `BYTE_EN_SH: core_rsp_data[i] = {{16{rsp_data_shifted[i][15]}}, rsp_data_shifted[i][15:0]}; + `BYTE_EN_UB: core_rsp_data[i] = 32'(rsp_data_shifted[i][7:0]); + `BYTE_EN_UH: core_rsp_data[i] = 32'(rsp_data_shifted[i][15:0]); default : core_rsp_data[i] = rsp_data_shifted[i]; endcase end end - assign mem_wb_if.valid = dcache_rsp_if.valid; - assign mem_wb_if.data = core_rsp_data; + assign mem_wb_unqual_if.valid = dcache_rsp_if.valid; + assign mem_wb_unqual_if.data = core_rsp_data; // Can't accept new response - assign dcache_rsp_if.ready = !(no_slot_mem & (|mem_wb_if_p1.valid)); + assign dcache_rsp_if.ready = !(no_slot_mem & (|mem_wb_if.valid)); // From LSU to WB localparam WB_REQ_SIZE = (`NUM_THREADS) + (`NUM_THREADS * 32) + (`NW_BITS) + (5) + (2) + 32; - VX_generic_register #(.N(WB_REQ_SIZE)) lsu_to_wb( + VX_generic_register #(.N(WB_REQ_SIZE)) lsu_to_wb ( .clk (clk), .reset (reset), .stall (no_slot_mem), .flush (1'b0), - .in ({mem_wb_if.valid , mem_wb_if.data , mem_wb_if.warp_num , mem_wb_if.rd , mem_wb_if.wb , mem_wb_if.curr_PC }), - .out ({mem_wb_if_p1.valid, mem_wb_if_p1.data, mem_wb_if_p1.warp_num, mem_wb_if_p1.rd, mem_wb_if_p1.wb, mem_wb_if_p1.curr_PC}) + .in ({mem_wb_unqual_if.valid, mem_wb_unqual_if.data, mem_wb_unqual_if.warp_num, mem_wb_unqual_if.rd, mem_wb_unqual_if.wb, mem_wb_unqual_if.curr_PC}), + .out ({mem_wb_if.valid, mem_wb_if.data, mem_wb_if.warp_num, mem_wb_if.rd, mem_wb_if.wb, mem_wb_if.curr_PC}) ); `SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid); @@ -190,12 +196,12 @@ module VX_lsu_unit #( `ifdef DBG_PRINT_CORE_DCACHE always @(posedge clk) begin if ((| dcache_req_if.valid) && dcache_req_if.ready) begin - $display("%t: D%0d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h", - $time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data); + $display("%t: D%0d$ req: valid=%b, addr=%0h, tag=%0h, rw=%0b, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h", + $time, CORE_ID, use_valid, use_address, mrq_write_addr, use_req_rw, use_pc, use_rd, use_warp_num, use_req_byteen, use_req_data); end if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin $display("%t: D%0d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", - $time, CORE_ID, mem_wb_if.valid, mrq_read_addr, mem_wb_if.curr_PC, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data); + $time, CORE_ID, mem_wb_unqual_if.valid, mrq_read_addr, mem_wb_unqual_if.curr_PC, mem_wb_unqual_if.rd, mem_wb_unqual_if.warp_num, mem_wb_unqual_if.data); end end `endif diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 81825e54..9621d47b 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -1,14 +1,14 @@ `include "VX_define.vh" module VX_warp_sched ( - input wire clk, // Clock - input wire reset, - input wire stall, + input wire clk, + input wire reset, + input wire stall, // Wspawn - input wire wspawn, - input wire[31:0] wsapwn_pc, - input wire[`NUM_WARPS-1:0] wspawn_new_active, + input wire wspawn, + input wire[31:0] wsapwn_pc, + input wire[`NUM_WARPS-1:0] wspawn_new_active, // CTM input wire ctm, @@ -28,38 +28,38 @@ module VX_warp_sched ( // WSTALL input wire wstall, - input wire[`NW_BITS-1:0] wstall_warp_num, + input wire [`NW_BITS-1:0] wstall_warp_num, // Split input wire is_split, input wire dont_split, - input wire[`NUM_THREADS-1:0] split_new_mask, - input wire[`NUM_THREADS-1:0] split_later_mask, - input wire[31:0] split_save_pc, - input wire[`NW_BITS-1:0] split_warp_num, + input wire [`NUM_THREADS-1:0] split_new_mask, + input wire [`NUM_THREADS-1:0] split_later_mask, + input wire [31:0] split_save_pc, + input wire [`NW_BITS-1:0] split_warp_num, // Join input wire is_join, - input wire[`NW_BITS-1:0] join_warp_num, + input wire [`NW_BITS-1:0] join_warp_num, // JAL input wire jal, - input wire[31:0] dest, - input wire[`NW_BITS-1:0] jal_warp_num, + input wire [31:0] dest, + input wire [`NW_BITS-1:0] jal_warp_num, // Branch input wire branch_valid, input wire branch_dir, - input wire[31:0] branch_dest, - input wire[`NW_BITS-1:0] branch_warp_num, + input wire [31:0] branch_dest, + input wire [`NW_BITS-1:0] branch_warp_num, - output wire[`NUM_THREADS-1:0] thread_mask, - output wire[`NW_BITS-1:0] warp_num, - output wire[31:0] warp_pc, + output wire [`NUM_THREADS-1:0] thread_mask, + output wire [`NW_BITS-1:0] warp_num, + output wire [31:0] warp_pc, output wire busy, output wire scheduled_warp, - input wire[`NW_BITS-1:0] icache_stage_wid, + input wire [`NW_BITS-1:0] icache_stage_wid, input wire icache_stage_response ); wire update_use_wspawn; @@ -209,18 +209,18 @@ module VX_warp_sched ( // Branch if (branch_valid) begin - if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest; + if (branch_dir) begin + warp_pcs[branch_warp_num] <= branch_dest; + end warp_stalled[branch_warp_num] <= 0; end // Lock/Release if (scheduled_warp && !stall) begin warp_lock[warp_num] <= 1'b1; - // warp_lock <= {`NUM_WARPS{1'b1}}; end if (icache_stage_response) begin warp_lock[icache_stage_wid] <= 1'b0; - // warp_lock <= {`NUM_WARPS{1'b0}}; end end diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 2f835ce1..849d8907 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -29,12 +29,7 @@ module VX_cache_core_rsp_merge #( input wire core_rsp_ready ); - reg [NUM_BANKS-1:0] per_bank_core_rsp_pop_unqual; - - assign per_bank_core_rsp_ready = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}}; - wire [`BANK_BITS-1:0] main_bank_index; - wire grant_valid; VX_fair_arbiter #( .N(NUM_BANKS) ) sel_bank ( @@ -42,10 +37,14 @@ module VX_cache_core_rsp_merge #( .reset (reset), .requests (per_bank_core_rsp_valid), .grant_index (main_bank_index), - .grant_valid (grant_valid), + `UNUSED_PIN (grant_valid), `UNUSED_PIN (grant_onehot) ); + reg [NUM_BANKS-1:0] per_bank_core_rsp_pop_unqual; + + assign per_bank_core_rsp_ready = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}}; + integer i; if (CORE_TAG_ID_BITS != 0) begin @@ -54,7 +53,7 @@ module VX_cache_core_rsp_merge #( core_rsp_valid = 0; core_rsp_data = 0; for (i = 0; i < NUM_BANKS; i++) begin - if (grant_valid && per_bank_core_rsp_valid[i] + if (per_bank_core_rsp_valid[i] && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin core_rsp_valid[per_bank_core_rsp_tid[i]] = 1; core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; @@ -70,7 +69,7 @@ module VX_cache_core_rsp_merge #( core_rsp_data = 0; core_rsp_tag = 0; for (i = 0; i < NUM_BANKS; i++) begin - if (grant_valid && per_bank_core_rsp_valid[i] + if (per_bank_core_rsp_valid[i] && !core_rsp_valid[per_bank_core_rsp_tid[i]] && ((main_bank_index == `BANK_BITS'(i)) || (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin