merged fpu_port branch

This commit is contained in:
Blaise Tine
2020-07-31 17:13:22 -04:00
508 changed files with 45407 additions and 41832 deletions

View File

@@ -105,8 +105,8 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
/* verilator lint_off UNUSED */
wire[31:0] debug_use_pc_st0;
wire[`WB_BITS-1:0] debug_wb_st0;
wire[31:0] debug_pc_st0;
wire debug_wb_st0;
wire[`NR_BITS-1:0] debug_rd_st0;
wire[`NW_BITS-1:0] debug_warp_num_st0;
wire debug_rw_st0;
@@ -114,8 +114,8 @@ module VX_bank #(
wire[`REQS_BITS-1:0] debug_tid_st0;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
wire[31:0] debug_use_pc_st1e;
wire[`WB_BITS-1:0] debug_wb_st1e;
wire[31:0] debug_pc_st1e;
wire debug_wb_st1e;
wire[`NR_BITS-1:0] debug_rd_st1e;
wire[`NW_BITS-1:0] debug_warp_num_st1e;
wire debug_rw_st1e;
@@ -123,8 +123,8 @@ module VX_bank #(
wire[`REQS_BITS-1:0] debug_tid_st1e;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
wire[31:0] debug_use_pc_st2;
wire[`WB_BITS-1:0] debug_wb_st2;
wire[31:0] debug_pc_st2;
wire debug_wb_st2;
wire[`NR_BITS-1:0] debug_rd_st2;
wire[`NW_BITS-1:0] debug_warp_num_st2;
wire debug_rw_st2;
@@ -360,7 +360,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
end
`endif
@@ -432,6 +432,9 @@ module VX_bank #(
&& (addr_st2 == addr_st1e);
VX_tag_data_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CORE_TAG_ID_BITS(CORE_TAG_ID_BITS),
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
@@ -442,6 +445,15 @@ module VX_bank #(
) tag_data_access (
.clk (clk),
.reset (reset),
`ifdef DBG_CORE_REQ_INFO
.debug_pc_st1e(debug_pc_st1e),
.debug_wb_st1e(debug_wb_st1e),
.debug_rd_st1e(debug_rd_st1e),
.debug_warp_num_st1e(debug_warp_num_st1e),
.debug_tagid_st1e(debug_tagid_st1e),
`endif
.stall (stall_bank_pipe),
.stall_bank_pipe(stall_bank_pipe),
@@ -478,7 +490,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
end
`endif
@@ -513,13 +525,13 @@ module VX_bank #(
.reset (reset),
.stall (stall_bank_pipe),
.flush (0),
.in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
.in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e, snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2})
);
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end
`endif
@@ -587,7 +599,7 @@ module VX_bank #(
// Broadcast
.is_fill_st1 (is_fill_st1[STAGE_1_CYCLES-1]),
.fill_addr_st1 (addr_st1e),
.pending_hazard (mrvq_pending_hazard_st1e),
.pending_hazard_st1 (mrvq_pending_hazard_st1e),
// Dequeue
.miss_resrv_pop (mrvq_pop),

View File

@@ -130,10 +130,10 @@ module VX_cache #(
`ifdef DBG_CORE_REQ_INFO
/* verilator lint_off UNUSED */
wire[31:0] debug_core_req_use_pc;
wire[`WB_BITS-1:0] debug_core_req_wb;
wire debug_core_req_wb;
wire[`NR_BITS-1:0] debug_core_req_rd;
wire[`NW_BITS-1:0] debug_core_req_warp_num;
wire[`LOG2UP(CREQ_SIZE)-1:0] debug_core_req_idx;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx;
/* verilator lint_on UNUSED */
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin

View File

@@ -1,10 +1,13 @@
`ifndef VX_CACHE_CONFIG
`define VX_CACHE_CONFIG
`include "VX_define.vh"
`include "VX_platform.vh"
`include "VX_scope.vh"
`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_REQ_TAG_WIDTH)
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
// tag rw byteen tid
`define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS)

View File

@@ -1,4 +1,3 @@
`include "VX_cache_config.vh"
module VX_cache_core_req_bank_sel #(

View File

@@ -41,7 +41,7 @@ module VX_cache_miss_resrv #(
input wire is_fill_st1,
input wire[`LINE_ADDR_WIDTH-1:0] fill_addr_st1,
output wire pending_hazard,
output wire pending_hazard_st1,
// Miss dequeue
input wire miss_resrv_pop,
@@ -84,7 +84,7 @@ module VX_cache_miss_resrv #(
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
end
assign pending_hazard = |(valid_address_match);
assign pending_hazard_st1 = |(valid_address_match);
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;

View File

@@ -41,8 +41,8 @@ module VX_snp_forwarder #(
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr, dbg_sfq_write_addr;
wire sfq_push, sfq_pop, sfq_full;
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr;
wire sfq_acquire, sfq_release, sfq_full;
wire fwdin_valid;
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag;
@@ -56,32 +56,30 @@ module VX_snp_forwarder #(
assign sfq_read_addr = fwdin_tag;
assign sfq_push = snp_req_valid && !sfq_full && fwdout_ready;
assign sfq_pop = snp_rsp_valid;
assign sfq_acquire = snp_req_valid && !sfq_full && fwdout_ready;
assign sfq_release = snp_rsp_valid;
VX_index_queue #(
.DATAW (`LOG2UP(SNRQ_SIZE) + 1 +`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH),
VX_cam_buffer #(
.DATAW (`DRAM_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
.SIZE (SNRQ_SIZE)
) snp_fwd_queue (
.clk (clk),
.reset (reset),
.write_data ({sfq_write_addr, snp_req_addr, snp_req_invalidate, snp_req_tag}),
.write_addr (sfq_write_addr),
.push (sfq_push),
.pop (sfq_pop),
.full (sfq_full),
.read_addr (sfq_read_addr),
.read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
`UNUSED_PIN (empty)
) snp_fwd_buffer (
.clk (clk),
.reset (reset),
.write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
.write_addr (sfq_write_addr),
.acquire_slot (sfq_acquire),
.release_slot (sfq_release),
.read_addr (sfq_read_addr),
.read_data ({snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
.full (sfq_full)
);
always @(posedge clk) begin
if (sfq_push) begin
if (sfq_acquire) begin
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS;
end
if (fwdin_fire) begin
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
assert(sfq_read_addr == dbg_sfq_write_addr);
end
end

View File

@@ -1,26 +1,38 @@
`include "VX_cache_config.vh"
module VX_tag_data_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
parameter CORE_TAG_ID_BITS = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 0,
parameter CACHE_SIZE = 0,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 0,
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 0,
parameter NUM_BANKS = 0,
// Size of a word in bytes
parameter WORD_SIZE = 0,
parameter WORD_SIZE = 0,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 0,
parameter STAGE_1_CYCLES = 0,
// Enable cache writeable
parameter WRITE_ENABLE = 0,
parameter WRITE_ENABLE = 0,
// Enable dram update
parameter DRAM_ENABLE = 0
parameter DRAM_ENABLE = 0
) (
input wire clk,
input wire reset,
`ifdef DBG_CORE_REQ_INFO
input wire[31:0] debug_pc_st1e,
input wire debug_wb_st1e,
input wire[`NR_BITS-1:0] debug_rd_st1e,
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
`endif
input wire stall,
input wire is_snp_st1e,
input wire snp_invalidate_st1e,
@@ -78,17 +90,17 @@ module VX_tag_data_access #(
wire tags_match;
wire real_writefill = valid_req_st1e && writefill_st1e
&& ((!use_read_valid_st1e) || (use_read_valid_st1e && !tags_match));
&& ((~use_read_valid_st1e) || (use_read_valid_st1e && ~tags_match));
wire[`TAG_SELECT_BITS-1:0] writetag_st1e = writeaddr_st1e[`TAG_LINE_ADDR_RNG];
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
VX_tag_data_structure #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE)
) tag_data_structure (
VX_tag_data_store #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE)
) tag_data_store (
.clk (clk),
.reset (reset),
.stall_bank_pipe(stall_bank_pipe),
@@ -124,7 +136,7 @@ module VX_tag_data_access #(
genvar i;
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
VX_generic_register #(
.N( 1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
) s0_1_cc (
.clk (clk),
.reset (reset),
@@ -140,11 +152,16 @@ module VX_tag_data_access #(
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writetag_st1e; // Tag is always the same in SM
assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1];
assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1];
if (`WORD_SELECT_WIDTH != 0) begin
assign readword_st1e = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
wire [`WORD_WIDTH-1:0] readword = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
for (i = 0; i < WORD_SIZE; i++) begin
assign readword_st1e[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
end
end else begin
assign readword_st1e = use_read_data_st1e;
for (i = 0; i < WORD_SIZE; i++) begin
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
end
end
wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] we;
@@ -153,9 +170,9 @@ module VX_tag_data_access #(
wire should_write = mem_rw_st1e
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e
&& !real_writefill;
&& ~miss_st1e
&& ~is_snp_st1e
&& ~real_writefill;
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i)))
@@ -168,22 +185,22 @@ module VX_tag_data_access #(
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1e;
end
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;
assign use_write_enable = (writefill_st1e && ~real_writefill) ? 0 : we;
assign use_write_data = data_write;
// use "case equality" to handle uninitialized tag when block entry is not valid
assign tags_match = (writetag_st1e === use_read_tag_st1e);
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && !force_request_miss_st1e;
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;
wire req_miss = valid_req_st1e && !is_snp_st1e && use_read_valid_st1e && !writefill_st1e && !tags_match;
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && ~force_request_miss_st1e;
wire req_invalid = valid_req_st1e && ~is_snp_st1e && ~use_read_valid_st1e && ~writefill_st1e;
wire req_miss = valid_req_st1e && ~is_snp_st1e && use_read_valid_st1e && ~writefill_st1e && ~tags_match;
wire real_miss = req_invalid || req_miss;
wire force_core_miss = (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e && !real_miss);
wire force_core_miss = (force_request_miss_st1e && ~is_snp_st1e && ~writefill_st1e && valid_req_st1e && ~real_miss);
assign snp_to_mrvq_st1e = valid_req_st1e && is_snp_st1e && force_request_miss_st1e;
// The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss
assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e
|| (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e);
|| (force_request_miss_st1e && ~is_snp_st1e && ~writefill_st1e && valid_req_st1e);
assign miss_st1e = real_miss || snoop_hit_no_pending || force_core_miss;
assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e;
@@ -194,7 +211,23 @@ module VX_tag_data_access #(
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
assign invalidate_line = snoop_hit_no_pending;
endmodule
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
if (valid_req_st1e) begin
if ((| use_write_enable)) begin
if (writefill_st1e) begin
$display("%t: bank%0d:%0d store-fill: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
end else begin
$display("%t: bank%0d:%0d store-write: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
end
end else
if (miss_st1e) begin
$display("%t: bank%0d:%0d store-miss: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
end else begin
$display("%t: bank%0d:%0d store-read: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
end
end
end
`endif
endmodule

View File

@@ -1,6 +1,6 @@
`include "VX_cache_config.vh"
module VX_tag_data_structure #(
module VX_tag_data_store #(
// Size of cache in bytes
parameter CACHE_SIZE = 0,
// Size of line inside a bank in bytes