diff --git a/rtl/VX_cache/VX_cache.v b/rtl/VX_cache/VX_cache.v index 4b9630b2..084de00c 100644 --- a/rtl/VX_cache/VX_cache.v +++ b/rtl/VX_cache/VX_cache.v @@ -43,6 +43,10 @@ module VX_cache // Fill Invalidator Size {Fill invalidator must be active} parameter FILL_INVALIDAOR_SIZE = 16, + // Prefetcher + parameter PRFQ_SIZE = 64, + parameter PRFQ_STRIDE = 0, + // Dram knobs parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 @@ -166,6 +170,8 @@ module VX_cache .DFQQ_SIZE (DFQQ_SIZE), .LLVQ_SIZE (LLVQ_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), + .PRFQ_SIZE (PRFQ_SIZE), + .PRFQ_STRIDE (PRFQ_STRIDE), .SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES) ) VX_cache_dram_req_arb diff --git a/rtl/VX_cache/VX_cache_dram_req_arb.v b/rtl/VX_cache/VX_cache_dram_req_arb.v index f6534714..0db1f560 100644 --- a/rtl/VX_cache/VX_cache_dram_req_arb.v +++ b/rtl/VX_cache/VX_cache_dram_req_arb.v @@ -39,6 +39,10 @@ module VX_cache_dram_req_arb // Fill Invalidator Size {Fill invalidator must be active} parameter FILL_INVALIDAOR_SIZE = 16, + // Prefetcher + parameter PRFQ_SIZE = 64, + parameter PRFQ_STRIDE = 2, + // Dram knobs parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 @@ -74,6 +78,33 @@ module VX_cache_dram_req_arb ); + + wire pref_pop; + wire pref_valid; + wire[31:0] pref_addr; + + assign pref_pop = !dwb_valid && !dfqq_req && !dram_req_delay && pref_valid; + VX_prefetcher #( + .PRFQ_SIZE (PRFQ_SIZE), + .PRFQ_STRIDE (PRFQ_STRIDE), + .BANK_LINE_SIZE_BYTES(BANK_LINE_SIZE_BYTES), + .WORD_SIZE_BYTES (WORD_SIZE_BYTES) + ) + prfqq + ( + .clk (clk), + .reset (reset), + + .dram_req (dram_req && dram_req_read), + .dram_req_addr(dram_req_addr), + + .pref_pop (pref_pop), + .pref_valid (pref_valid), + .pref_addr (pref_addr) + + + ); + wire dfqq_req; wire[31:0] dfqq_req_addr; wire dfqq_empty; @@ -107,10 +138,10 @@ module VX_cache_dram_req_arb assign per_bank_dram_wb_queue_pop = dram_req_delay ? 0 : use_wb_valid & ((1 << dwb_bank)); - assign dram_req = dwb_valid || dfqq_req; + assign dram_req = dwb_valid || dfqq_req || pref_pop; assign dram_req_write = dwb_valid; - assign dram_req_read = dfqq_req && !dwb_valid; - assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : dfqq_req_addr) & `BASE_ADDR_MASK; + assign dram_req_read = (dfqq_req && !dwb_valid) || pref_pop; + assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : (dfqq_req ? dfqq_req_addr : pref_addr)) & `BASE_ADDR_MASK; assign dram_req_size = BANK_LINE_SIZE_BYTES; assign {dram_req_data} = dwb_valid ? {per_bank_dram_wb_req_data[dwb_bank] }: 0; // assign dram_req_because_of_wb = dwb_valid ? per_bank_dram_because_of_snp[dwb_bank] : 0; diff --git a/rtl/VX_cache/VX_prefetcher.v b/rtl/VX_cache/VX_prefetcher.v new file mode 100644 index 00000000..32fe83ec --- /dev/null +++ b/rtl/VX_cache/VX_prefetcher.v @@ -0,0 +1,77 @@ +`include "VX_cache_config.v" + +module VX_prefetcher + #( + parameter PRFQ_SIZE = 64, + parameter PRFQ_STRIDE = 2, + // Size of line inside a bank in bytes + parameter BANK_LINE_SIZE_BYTES = 16, + // Size of a word in bytes + parameter WORD_SIZE_BYTES = 4 + ) + ( + input wire clk, + input wire reset, + + input wire dram_req, + input wire[31:0] dram_req_addr, + + input wire pref_pop, + output wire pref_valid, + output wire[31:0] pref_addr + +); + + + reg[`vx_clog2(PRFQ_STRIDE):0] use_valid; + reg[31:0] use_addr; + + + wire current_valid; + wire[31:0] current_addr; + + wire current_full; + wire current_empty; + + + assign current_valid = ~current_empty; + + wire update_use = ((use_valid == 0) || ((use_valid-1) == 0)) && current_valid; + + VX_generic_queue_ll #(.DATAW(32), .SIZE(PRFQ_SIZE)) pfq_queue( + .clk (clk), + .reset (reset), + + .push (dram_req && !current_full && !pref_pop), + .in_data (dram_req_addr & `BASE_ADDR_MASK), + + .pop (update_use), + .out_data(current_addr), + + .empty (current_empty), + .full (current_full) + ); + + + + assign pref_valid = use_valid != 0; + assign pref_addr = use_addr; + + + always @(posedge clk) begin + if (reset) begin + use_valid <= 0; + use_addr <= 0; + end else begin + if (update_use) begin + use_valid <= PRFQ_STRIDE; + use_addr <= current_addr + BANK_LINE_SIZE_BYTES; + end else if (pref_valid && pref_pop) begin + use_valid <= use_valid - 1; + use_addr <= use_addr + BANK_LINE_SIZE_BYTES; + end + + end + end + +endmodule \ No newline at end of file