diff --git a/rtl/VX_cache/VX_bank.v b/rtl/VX_cache/VX_bank.v index 92f3d81a..d61a0b9c 100644 --- a/rtl/VX_cache/VX_bank.v +++ b/rtl/VX_cache/VX_bank.v @@ -37,6 +37,8 @@ module VX_bank parameter DFQQ_SIZE = 8, // Lower Level Cache Hit Queue Size parameter LLVQ_SIZE = 16, + // Fill Forward SNP Queue + parameter FFSQ_SIZE = 8, // Fill Invalidator Size {Fill invalidator must be active} parameter FILL_INVALIDAOR_SIZE = 16, @@ -95,7 +97,10 @@ module VX_bank // Snp Request input wire snp_req, - input wire[31:0] snp_req_addr + input wire[31:0] snp_req_addr, + + output wire snp_fwd, + output wire[31:0] snp_fwd_addr ); @@ -511,7 +516,7 @@ module VX_bank // Enqueue to miss reserv if it's a valid miss - assign miss_add = valid_st2 && miss_st2 && !mrvq_full && !((cwbq_push && cwbq_full) || (dwbq_push && dwbq_full) || (dram_fill_req && dram_fill_req_queue_full)); + assign miss_add = valid_st2 && miss_st2 && !mrvq_full && !(((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full)); assign miss_add_pc = pc_st2; assign miss_add_addr = addr_st2; assign miss_add_data = writeword_st2; @@ -519,7 +524,7 @@ module VX_bank // Enqueue to CWB Queue - wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `LLFUNC_ID) && (miss_add_wb == 0)); + wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `LLFUNC_ID) && (miss_add_wb == 0)) && !( (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full)); wire [`WORD_SIZE_RNG] cwbq_data = readword_st2; wire [`vx_clog2(NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid; wire [4:0] cwbq_rd = miss_add_rd; @@ -544,7 +549,7 @@ module VX_bank ); // Enqueue to DWB Queue - wire dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && !dwbq_full && !(!fill_saw_dirty_st2 && mrvq_full); + wire dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && !dwbq_full && !(((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full)); wire[31:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK; wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data = readdata_st2; wire dwbq_empty; @@ -603,9 +608,12 @@ module VX_bank .full (dwbq_full) ); + wire snp_fwd_push; + wire snp_fwd_pop; - assign stall_bank_pipe = (cwbq_push && cwbq_full) || (dwbq_push && dwbq_full) || (miss_add && mrvq_full) || (dram_fill_req && dram_fill_req_queue_full); + + assign stall_bank_pipe = ((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full); endmodule diff --git a/rtl/VX_cache/VX_cache.v b/rtl/VX_cache/VX_cache.v index 3ca9b52a..2c30a0b1 100644 --- a/rtl/VX_cache/VX_cache.v +++ b/rtl/VX_cache/VX_cache.v @@ -37,6 +37,8 @@ module VX_cache parameter DFQQ_SIZE = 8, // Lower Level Cache Hit Queue Size parameter LLVQ_SIZE = 16, + // Fill Forward SNP Queue + parameter FFSQ_SIZE = 8, // Fill Invalidator Size {Fill invalidator must be active} parameter FILL_INVALIDAOR_SIZE = 16, @@ -343,6 +345,7 @@ module VX_cache .DWBQ_SIZE (DWBQ_SIZE), .DFQQ_SIZE (DFQQ_SIZE), .LLVQ_SIZE (LLVQ_SIZE), + .FFSQ_SIZE (FFSQ_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES) ) diff --git a/rtl/VX_define.v b/rtl/VX_define.v index d1f23636..45c6c375 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -125,7 +125,11 @@ -199 -`define NUMBER_CORES 2 + +`define NUMBER_CORES_PER_CLUSTERS (2) +`define NUMBER_CLUSTERS (1) +`define NUMBER_CORES (`NUMBER_CORES_PER_CLUSTERS*`NUMBER_CLUSTERS) + // `define SINGLE_CORE_BENCH 0 `define GLOBAL_BLOCK_SIZE_BYTES 16 // ========================================= Dcache Configurable Knobs ========================================= @@ -169,6 +173,8 @@ `define DDFQQ_SIZE `DREQQ_SIZE // Lower Level Cache Hit Queue Size `define DLLVQ_SIZE 0 + // Fill Forward SNP Queue + `define DFFSQ_SIZE 8 // Fill Invalidator Size {Fill invalidator must be active} `define DFILL_INVALIDAOR_SIZE 16 @@ -220,6 +226,8 @@ `define IDFQQ_SIZE `IREQQ_SIZE // Lower Level Cache Hit Queue Size `define ILLVQ_SIZE 0 + // Fill Forward SNP Queue + `define IFFSQ_SIZE 8 // Fill Invalidator Size {Fill invalidator must be active} `define IFILL_INVALIDAOR_SIZE 16 @@ -270,6 +278,8 @@ `define SDFQQ_SIZE 0 // Lower Level Cache Hit Queue Size `define SLLVQ_SIZE 0 + // Fill Forward SNP Queue + `define SFFSQ_SIZE 0 // Fill Invalidator Size {Fill invalidator must be active} `define SFILL_INVALIDAOR_SIZE 16 @@ -293,7 +303,7 @@ // Size of a word in bytes `define LLWORD_SIZE_BYTES (`LLBANK_LINE_SIZE_BYTES) // Number of Word requests per cycle {1, 2, 4, 8, ...} - `define LLNUMBER_REQUESTS (2*`NUMBER_CORES) + `define LLNUMBER_REQUESTS (2*`NUMBER_CORES_PER_CLUSTERS) // Number of cycles to complete stage 1 (read from memory) `define LLSTAGE_1_CYCLES 2 // Function ID @@ -305,7 +315,7 @@ // Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Core Request Queue Size - `define LLREQQ_SIZE (`NT*`NW*`NUMBER_CORES) + `define LLREQQ_SIZE (`NT*`NW*`NUMBER_CORES_PER_CLUSTERS) // Miss Reserv Queue Knob `define LLMRVQ_SIZE `LLREQQ_SIZE // Dram Fill Rsp Queue Size @@ -322,6 +332,8 @@ `define LLDFQQ_SIZE `LLREQQ_SIZE // Lower Level Cache Hit Queue Size `define LLLLVQ_SIZE 0 + // Fill Forward SNP Queue + `define LLFFSQ_SIZE 8 // Fill Invalidator Size {Fill invalidator must be active} `define LLFILL_INVALIDAOR_SIZE 16 @@ -332,4 +344,57 @@ // ========================================= L2cache Configurable Knobs ========================================= +// ========================================= L3cache Configurable Knobs ========================================= + +// General Cache Knobs + // Size of cache in bytes + `define L3CACHE_SIZE_BYTES 1024 + // Size of line inside a bank in bytes + `define L3BANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES + // Number of banks {1, 2, 4, 8,...} + `define L3NUMBER_BANKS 8 + // Size of a word in bytes + `define L3WORD_SIZE_BYTES (`LLBANK_LINE_SIZE_BYTES) + // Number of Word requests per cycle {1, 2, 4, 8, ...} + `define L3NUMBER_REQUESTS (2*`NUMBER_CLUSTERS) + // Number of cycles to complete stage 1 (read from memory) + `define L3STAGE_1_CYCLES 2 + // Function ID + `define L3FUNC_ID 3 + + // Bank Number of words in a line + `define L3BANK_LINE_SIZE_WORDS (`LLBANK_LINE_SIZE_BYTES / `LLWORD_SIZE_BYTES) + `define L3BANK_LINE_SIZE_RNG `LLBANK_LINE_SIZE_WORDS-1:0 +// Queues feeding into banks Knobs {1, 2, 4, 8, ...} + + // Core Request Queue Size + `define L3REQQ_SIZE (`NT*`NW*`NUMBER_CLUSTERS) + // Miss Reserv Queue Knob + `define L3MRVQ_SIZE `LLREQQ_SIZE + // Dram Fill Rsp Queue Size + `define L3DFPQ_SIZE 2 + // Snoop Req Queue + `define L3SNRQ_SIZE 8 + +// Queues for writebacks Knobs {1, 2, 4, 8, ...} + // Core Writeback Queue Size + `define L3CWBQ_SIZE `LLREQQ_SIZE + // Dram Writeback Queue Size + `define L3DWBQ_SIZE 4 + // Dram Fill Req Queue Size + `define L3DFQQ_SIZE `LLREQQ_SIZE + // Lower Level Cache Hit Queue Size + `define L3LLVQ_SIZE 0 + // Fill Forward SNP Queue + `define L3FFSQ_SIZE 8 + + // Fill Invalidator Size {Fill invalidator must be active} + `define L3FILL_INVALIDAOR_SIZE 16 + +// Dram knobs + `define L3SIMULATED_DRAM_LATENCY_CYCLES 10 + +// ========================================= L3cache Configurable Knobs ========================================= + + `endif diff --git a/rtl/VX_dmem_controller.v b/rtl/VX_dmem_controller.v index 4e1596b7..75ba5c00 100644 --- a/rtl/VX_dmem_controller.v +++ b/rtl/VX_dmem_controller.v @@ -92,6 +92,7 @@ module VX_dmem_controller ( .DWBQ_SIZE (`SDWBQ_SIZE), .DFQQ_SIZE (`SDFQQ_SIZE), .LLVQ_SIZE (`SLLVQ_SIZE), + .FFSQ_SIZE (`SFFSQ_SIZE), .FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`SSIMULATED_DRAM_LATENCY_CYCLES) ) @@ -167,6 +168,7 @@ module VX_dmem_controller ( .DWBQ_SIZE (`DDWBQ_SIZE), .DFQQ_SIZE (`DDFQQ_SIZE), .LLVQ_SIZE (`DLLVQ_SIZE), + .FFSQ_SIZE (`DFFSQ_SIZE), .FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`DSIMULATED_DRAM_LATENCY_CYCLES) ) @@ -244,6 +246,7 @@ module VX_dmem_controller ( .DWBQ_SIZE (`IDWBQ_SIZE), .DFQQ_SIZE (`IDFQQ_SIZE), .LLVQ_SIZE (`ILLVQ_SIZE), + .FFSQ_SIZE (`IFFSQ_SIZE), .FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`ISIMULATED_DRAM_LATENCY_CYCLES) ) diff --git a/rtl/Vortex_SOC.v b/rtl/Vortex_SOC.v index 2305e55c..d2b47e25 100644 --- a/rtl/Vortex_SOC.v +++ b/rtl/Vortex_SOC.v @@ -216,6 +216,7 @@ module Vortex_SOC ( .DWBQ_SIZE (`LLDWBQ_SIZE), .DFQQ_SIZE (`LLDFQQ_SIZE), .LLVQ_SIZE (`LLLLVQ_SIZE), + .FFSQ_SIZE (`LLFFSQ_SIZE), .FILL_INVALIDAOR_SIZE (`LLFILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`LLSIMULATED_DRAM_LATENCY_CYCLES) )