From de8de00f6e2c1e58dfe8bc161a5dba20330ddbd1 Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Wed, 23 Oct 2019 19:07:26 -0400 Subject: [PATCH] Finished cache not tested --- rtl/VX_dmem_controller.v | 6 +- rtl/VX_generic_priority_encoder.v | 2 +- rtl/VX_lsu.v | 2 +- rtl/VX_priority_encoder_w_mask.v | 26 ++++ rtl/cache/VX_Cache_Bank.v | 21 +--- rtl/cache/VX_d_cache.v | 194 +++++++++++++++++++----------- syn/syn.tcl | 2 +- 7 files changed, 161 insertions(+), 92 deletions(-) create mode 100644 rtl/VX_priority_encoder_w_mask.v diff --git a/rtl/VX_dmem_controller.v b/rtl/VX_dmem_controller.v index d6711e67..207c7d25 100644 --- a/rtl/VX_dmem_controller.v +++ b/rtl/VX_dmem_controller.v @@ -33,7 +33,7 @@ module VX_dmem_controller ( wire[`NT_M1:0][31:0] sm_driver_out_data; wire[`NT_M1:0] cache_driver_out_valid; // Not used for now wire sm_delay; - wire cache_done; + wire cache_delay; VX_shared_memory #(.NB(7), .BITS_PER_BANK(3)) shared_memory ( @@ -57,7 +57,7 @@ module VX_dmem_controller ( .i_p_writedata (cache_driver_in_data), .i_p_read_or_write (read_or_write), .o_p_readdata (cache_driver_out_data), - .o_p_waitrequest (cache_done), + .o_p_delay (cache_delay), .o_m_addr (VX_dram_req_rsp.o_m_addr), .o_m_valid (VX_dram_req_rsp.o_m_valid), .o_m_writedata (VX_dram_req_rsp.o_m_writedata), @@ -69,7 +69,7 @@ module VX_dmem_controller ( assign VX_dcache_rsp.in_cache_driver_out_data = to_shm ? sm_driver_out_data : cache_driver_out_data; // assign VX_dcache_rsp.delay = sm_delay; - assign VX_dcache_rsp.delay = sm_delay || (!cache_done); + assign VX_dcache_rsp.delay = sm_delay || (!cache_delay); endmodule \ No newline at end of file diff --git a/rtl/VX_generic_priority_encoder.v b/rtl/VX_generic_priority_encoder.v index aad7e98c..3e3bad86 100644 --- a/rtl/VX_generic_priority_encoder.v +++ b/rtl/VX_generic_priority_encoder.v @@ -12,7 +12,7 @@ module VX_generic_priority_encoder always @(*) begin index = 0; found = 0; - for (i = `NW-1; i >= 0; i = i - 1) begin + for (i = N-1; i >= 0; i = i - 1) begin if (valids[i]) begin index = i[$clog2(N)-1:0]; found = 1; diff --git a/rtl/VX_lsu.v b/rtl/VX_lsu.v index ca28eaed..bd6874b4 100644 --- a/rtl/VX_lsu.v +++ b/rtl/VX_lsu.v @@ -33,7 +33,7 @@ module VX_lsu ( for (index = 0; index <= `NT_M1; index = index + 1) begin assign VX_dcache_req.out_cache_driver_in_address[index] = address[index]; assign VX_dcache_req.out_cache_driver_in_data[index] = VX_lsu_req.store_data[index]; - assign VX_dcache_req.out_cache_driver_in_valid[index] = VX_lsu_req.valid[index]; + assign VX_dcache_req.out_cache_driver_in_valid[index] = (VX_lsu_req.valid[index] && !VX_dcache_rsp.delay); assign VX_mem_wb.loaded_data[index] = VX_dcache_rsp.in_cache_driver_out_data[index]; end diff --git a/rtl/VX_priority_encoder_w_mask.v b/rtl/VX_priority_encoder_w_mask.v new file mode 100644 index 00000000..365df62f --- /dev/null +++ b/rtl/VX_priority_encoder_w_mask.v @@ -0,0 +1,26 @@ +module VX_priority_encoder_w_mask + #( + parameter N = 10 + ) + ( + input wire[N-1:0] valids, + output reg [N-1:0] mask, + output reg[$clog2(N)-1:0] index, + output reg found + ); + + integer i; + always @(*) begin + index = 0; + found = 0; + mask = 0; + for (i = 0; i < N; i=i+1) + begin + if (!found && valids[i]) begin + index = i[$clog2(N)-1:0]; + found = 1; + mask[i[$clog2(N)-1:0]] = 1; + end + end + end +endmodule \ No newline at end of file diff --git a/rtl/cache/VX_Cache_Bank.v b/rtl/cache/VX_Cache_Bank.v index a0be9b82..894bc593 100644 --- a/rtl/cache/VX_Cache_Bank.v +++ b/rtl/cache/VX_Cache_Bank.v @@ -4,7 +4,7 @@ `define NUM_WORDS_PER_BLOCK 4 -`include "VX_define.v" +`include "../VX_define.v" `include "VX_cache_data.v" module VX_Cache_Bank @@ -39,18 +39,9 @@ module VX_Cache_Bank parameter ways_per_set = 4; parameter Number_Blocks = 32; - localparam CACHE_IDLE = 0; // Idle - localparam SORT_BY_BANK = 1; // Determines the bank each thread will access - localparam INITIAL_ACCESS = 2; // Accesses the bank and checks if it is a hit or miss - localparam INITIAL_PROCESSING = 3; // Check to see if there were misses - localparam CONTINUED_PROCESSING = 4; // Keep checking status of banks that need to be written back or fetched - localparam DIRTY_EVICT_GRAB_BLOCK = 5; // Grab the full block of dirty data - localparam DIRTY_EVICT_WB = 6; // Write back this block into memory - localparam FETCH_FROM_MEM = 7; // Send a request to mem looking for read data - localparam FETCH2 = 8; // Stall until memory gets back with the data - localparam UPDATE_CACHE = 9; // Update the cache with the data read from mem - localparam RE_ACCESS = 10; // Access the cache after the block has been fetched from memory - localparam RE_ACCESS_PROCESSING = 11; // Access the cache after the block has been fetched from memory + localparam CACHE_IDLE = 0; // Idle + localparam SEND_MEM_REQ = 1; // Write back this block into memory + localparam RECIV_MEM_RSP = 2; // Inputs input wire clk; @@ -101,8 +92,8 @@ module VX_Cache_Bank assign eviction_wb = miss && (dirty_use != 1'b0); assign eviction_tag = tag_use; - assign access = (state == INITIAL_ACCESS || state == RE_ACCESS) && valid_in; - assign write_from_mem = (state == UPDATE_CACHE) && valid_in; + assign access = (state == CACHE_IDLE) && valid_in; + assign write_from_mem = (state == RECIV_MEM_RSP) && valid_in; assign readdata = (access) ? data_use[block_offset] : 32'b0; // Fix with actual data assign hit = (access && (tag_use == o_tag) && valid_use); //assign eviction_addr = {eviction_tag, actual_index, block_offset, 5'b0}; // Fix with actual data diff --git a/rtl/cache/VX_d_cache.v b/rtl/cache/VX_d_cache.v index 0c1637f2..8c9bae39 100644 --- a/rtl/cache/VX_d_cache.v +++ b/rtl/cache/VX_d_cache.v @@ -8,7 +8,7 @@ // TO DO: // - Send in a response from memory of what the data is from the test bench -`include "VX_define.v" +`include "../VX_define.v" //`include "VX_priority_encoder.v" `include "VX_Cache_Bank.v" //`include "cache_set.v" @@ -23,96 +23,86 @@ module VX_d_cache(clk, i_p_valid, //i_p_write, o_p_readdata, - o_p_waitrequest, // 0 = all threads done | 1 = Still threads that need to + o_p_delay, // 0 = all threads done | 1 = Still threads that need to + + o_m_evict_addr, + o_m_read_addr, - o_m_addr, - //o_m_byte_en, o_m_writedata, o_m_read_or_write, // 0 = Read | 1 = Write o_m_valid, - //o_m_write, i_m_readdata, - //i_m_readdata_ready, - //i_m_waitrequest, i_m_ready - - //cnt_r, - //cnt_w, - //cnt_hit_r, - //cnt_hit_w - //cnt_wb_r, - //cnt_wb_w ); parameter NUMBER_BANKS = 8; - localparam CACHE_IDLE = 0; // Idle - localparam SORT_BY_BANK = 1; // Determines the bank each thread will access - localparam INITIAL_ACCESS = 2; // Accesses the bank and checks if it is a hit or miss - localparam INITIAL_PROCESSING = 3; // Check to see if there were misses - localparam CONTINUED_PROCESSING = 4; // Keep checking status of banks that need to be written back or fetched - localparam DIRTY_EVICT_GRAB_BLOCK = 5; // Grab the full block of dirty data - localparam DIRTY_EVICT_WB = 6; // Write back this block into memory - localparam FETCH_FROM_MEM = 7; // Send a request to mem looking for read data - localparam FETCH2 = 8; // Stall until memory gets back with the data - localparam UPDATE_CACHE = 9; // Update the cache with the data read from mem - localparam RE_ACCESS = 10; // Access the cache after the block has been fetched from memory - localparam RE_ACCESS_PROCESSING = 11; // Access the cache after the block has been fetched from memory + localparam CACHE_IDLE = 0; // Idle + localparam SEND_MEM_REQ = 1; // Write back this block into memory + localparam RECIV_MEM_RSP = 2; //parameter cache_entry = 9; input wire clk, rst; input wire [`NT_M1:0] i_p_valid; - //input wire [`NT_M1:0][24:0] i_p_addr; // FIXME input wire [`NT_M1:0][31:0] i_p_addr; // FIXME - input wire i_p_initial_request; - //input wire [3:0] i_p_byte_en; input wire [`NT_M1:0][31:0] i_p_writedata; input wire i_p_read_or_write; //, i_p_write; output reg [`NT_M1:0][31:0] o_p_readdata; - output reg [`NT_M1:0] o_p_readdata_valid; - output wire o_p_waitrequest; - //output reg [24:0] o_m_addr; // Only one address is sent out at a time to memory -- FIXME - output reg [31:0] o_m_addr; // Address is xxxxxxxxxxoooobbbyy + output wire o_p_delay; + output reg [31:0] o_m_evict_addr; // Address is xxxxxxxxxxoooobbbyy + output reg [31:0] o_m_read_addr; output reg o_m_valid; - //output wire [255:0][31:0] evicted_data; - //output wire [3:0] o_m_byte_en; - //output reg [(NUMBER_BANKS * 32) - 1:0] o_m_writedata; output reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata; - output reg o_m_read_or_write; //, o_m_write; - //input wire [(NUMBER_BANKS * 32) - 1:0] i_m_readdata; // Read Data that is passed from the memory module back to the controller + output reg o_m_read_or_write; //, o_m_write; input wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata; - //input wire i_m_readdata_ready; - //input wire i_m_waitrequest; input wire i_m_ready; -// Actual logic - reg [3:0] state; - wire[3:0] new_state; + // Buffer for final data reg [`NT_M1:0][31:0] final_data_read; wire[`NT_M1:0][31:0] new_final_data_read; - wire[NUMBER_BANKS-1:0] readdata_per_bank; - - wire[NUMBER_BANKS-1:0] hit_per_bank; - - wire[`NT_M1:0] use_valid; - reg[`NT_M1:0] stored_valid; - wire[`NT_M1:0] new_stored_valid; + assign o_p_readdata = final_data_read; - wire[NUMBER_BANKS - 1 : 0][$clog2(`NT)-1:0] index_per_bank; - wire[NUMBER_BANKS - 1 : 0] valid_per_bank; + + wire[NUMBER_BANKS - 1 : 0][`NT_M1:0] thread_track_banks; // Valid thread mask per bank + wire[NUMBER_BANKS - 1 : 0][$clog2(`NT)-1:0] index_per_bank; // Index of thread each bank will try to service + wire[NUMBER_BANKS - 1 : 0][`NT_M1:0] use_mask_per_bank; // A mask of index_per_bank + wire[NUMBER_BANKS - 1 : 0] valid_per_bank; // Valid request going to each bank + wire[NUMBER_BANKS - 1 : 0][`NT_M1:0] threads_serviced_per_bank; // Bank successfully serviced per bank + + wire[NUMBER_BANKS-1:0][31:0] readdata_per_bank; // Data read from each bank + wire[NUMBER_BANKS-1:0] hit_per_bank; // Whether each bank got a hit or a miss + wire[NUMBER_BANKS-1:0] eviction_wb; + + // Internal State + reg [3:0] state; + wire[3:0] new_state; + + wire[`NT_M1:0] use_valid; // Valid used throught the code + reg[`NT_M1:0] stored_valid; // Saving the threads still left (bank conflict or bank miss) + wire[`NT_M1:0] new_stored_valid; // New stored valid + + + + reg[NUMBER_BANKS - 1 : 0][31:0] eviction_addr_per_bank; + + reg[31:0] miss_addr; + reg[31:0] evict_addr; assign use_valid = (stored_valid == 0) ? i_p_valid : stored_valid; - wire[NUMBER_BANKS - 1 : 0][`NT_M1:0] thread_track_banks; + + + + reg[`NT_M1:0] threads_serviced_Qual; VX_cache_bank_valid #(.NUMBER_BANKS(NUMBER_BANKS)) multip_banks( .i_p_valid (use_valid), @@ -125,18 +115,27 @@ module VX_d_cache(clk, genvar bank_ind; for (bank_ind = 0; bank_ind < NUMBER_BANKS; bank_ind=bank_ind+1) begin - detect_bank_conflict = detect_bank_conflict | ($countones(thread_track_banks[bank_ind]) > 1); + assign detect_bank_conflict = detect_bank_conflict | ($countones(thread_track_banks[bank_ind]) > 1); - VX_generic_priority_encoder #(.N(1)) choose_thread( + VX_priority_encoder_w_mask #(.N(`NT)) choose_thread( .valids(thread_track_banks[bank_ind]), + .mask (use_mask_per_bank[bank_ind]), .index (index_per_bank[bank_ind]), .found (valid_per_bank[bank_ind]) ); //////////////// - assign new_final_data_read[index_per_bank[bank_ind]] = hit_per_bank ? readdata_per_bank[bank_ind] : 0; + assign new_final_data_read[index_per_bank[bank_ind]] = hit_per_bank[bank_ind] ? readdata_per_bank[bank_ind] : 0; + assign threads_serviced_per_bank[bank_ind] = use_mask_per_bank[bank_ind] & {`NT{hit_per_bank[bank_ind]}}; + + end + + genvar bid; + for (bid = 0; bid < NUMBER_BANKS; bid=bid+1) + begin + assign threads_serviced_Qual = threads_serviced_Qual | threads_serviced_per_bank[bid]; end @@ -144,51 +143,104 @@ module VX_d_cache(clk, wire delay; assign delay = (new_stored_valid != 0); // add other states - // assign delay = detect_bank_conflict || (|detect_bank_miss) || (state != CACHE_IDLE); // add other states + assign o_p_delay = delay; wire[NUMBER_BANKS - 1 : 0][$clog2(`NT)-1:0] send_index_to_bank = index_per_bank; -// End actual logic + wire[$clog2(NUMBER_BANKS)-1:0] miss_bank_index; + wire miss_found; + VX_generic_priority_encoder #(.N(NUMBER_BANKS)) get_miss_index + ( + .valids(detect_bank_miss), + .index (miss_bank_index), + .found (miss_found) + ); - assign new_state = detect_bank_miss ? DIRTY_EVICT_WB : CACHE_IDLE; + assign new_state = ((state == CACHE_IDLE) && (|detect_bank_miss)) ? SEND_MEM_REQ : + (state == SEND_MEM_REQ) ? RECIV_MEM_RSP : + ((state == RECIV_MEM_RSP) && !i_m_ready) ? RECIV_MEM_RSP : + CACHE_IDLE; // Handle if there is more than one miss - assign new_stored_valid = (state == CACHE_IDLE) ? ( & ~hit_per_bank); + assign new_stored_valid = use_valid & (~threads_serviced_Qual); + + + genvar cur_t; + always @(posedge clk) begin + state <= new_state; + + if (state == CACHE_IDLE) stored_valid <= new_stored_valid; + + if (miss_found) begin + miss_addr <= i_p_addr[send_index_to_bank[miss_bank_index]]; + evict_addr <= eviction_addr_per_bank[miss_bank_index]; + end + + for (cur_t = 0; cur_t < `NT; cur_t=cur_t+1) + begin + if (threads_serviced_Qual[cur_t]) final_data_read[cur_t] <= new_final_data_read[cur_t]; + end + end genvar bank_id; generate for (bank_id = 0; bank_id < NUMBER_BANKS; bank_id = bank_id + 1) begin - wire[31:0] bank_addr = i_p_addr[send_index_to_bank[bank_ind]]; + wire[31:0] bank_addr = (state == SEND_MEM_REQ) ? evict_addr : + (state == RECIV_MEM_RSP) ? miss_addr : + i_p_addr[send_index_to_bank[bank_id]]; + + + wire[7:0] cache_index = bank_addr[14:7]; wire[16:0] cache_tag = bank_addr[31:15]; wire[1:0] cache_offset = bank_addr[6:5]; + + wire normal_valid_in = valid_per_bank[bank_id]; + wire use_valid_in = ((state == RECIV_MEM_RSP) && i_m_ready) ? 1'b1 : + ((state == RECIV_MEM_RSP) && !i_m_ready) ? 1'b0 : + ((state == SEND_MEM_REQ)) ? 1'b0 : + normal_valid_in; + VX_Cache_Bank bank_structure ( .clk (clk), .state (state), - .valid_in (valid_per_bank[bank_ind]) + .valid_in (use_valid_in), .actual_index (cache_index), .o_tag (cache_tag), .block_offset (cache_offset), - .writedata (i_p_writedata[send_index_to_bank[bank_ind]]), - .read_or_write (rd_or_wr), - .hit (hit_per_bank[bank_ind]), - .readdata (readdata_per_bank[bank_ind]), // Data read + .writedata (i_p_writedata[send_index_to_bank[bank_id]]), + .read_or_write (i_p_read_or_write), + .hit (hit_per_bank[bank_id]), + .readdata (readdata_per_bank[bank_id]), // Data read + .eviction_addr (eviction_addr_per_bank[bank_id]), + .data_evicted (o_m_writedata[bank_id]), + .eviction_wb (eviction_wb[bank_ind]), // Something needs to be written back - .fetched_writedata(fetched_writedata), // From memory - .eviction_wb (eviction_wb), - .eviction_addr (eviction_addr), - .data_evicted (data_evicted) + + .fetched_writedata(i_m_readdata[bank_ind]) // Data From memory ); end endgenerate + // Mem Rsp + + // Req to mem: + assign o_m_evict_addr = evict_addr; + assign o_m_read_addr = miss_addr; + assign o_m_valid = (state == SEND_MEM_REQ); + assign o_m_read_or_write = (state == SEND_MEM_REQ) && (|eviction_wb); //end -endmodule \ No newline at end of file +endmodule + + + + + diff --git a/syn/syn.tcl b/syn/syn.tcl index 45901746..489fd987 100755 --- a/syn/syn.tcl +++ b/syn/syn.tcl @@ -3,7 +3,7 @@ set link_library [concat * sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_ set symbol_library {} set target_library [concat sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_m40c.db] -set verilog_files [ list Vortex.v VX_dram_req_rsp_inter.v bank.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_set_bit.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \ +set verilog_files [ list Vortex.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v bank.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_set_bit.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \ ] set top_level Vortex