From 9d8273afe48321d5d5cbfac6127bcfdf2ef159b0 Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Tue, 22 Oct 2019 06:02:08 -0400 Subject: [PATCH] Finished Cache Integration --- rtl/Makefile | 2 +- rtl/VX_define.v | 5 + rtl/VX_dmem_controller.v | 40 +- rtl/Vortex.v | 41 +- rtl/cache/Makefile | 12 + rtl/cache/Notes | 46 ++ rtl/cache/VX_Cache_Bank.v | 143 +++++ rtl/cache/VX_Cache_Block_DM.v | 196 +++++++ rtl/cache/VX_cache_data.v | 193 +++++++ rtl/cache/VX_d_cache.v | 602 +++++++++++++++++++++ rtl/cache/VX_d_cache_encapsulate.v | 118 ++++ rtl/cache/VX_d_cache_tb.v | 58 ++ rtl/cache/VX_generic_pe.v | 24 + rtl/cache/bank.v | 201 +++++++ rtl/cache/cache_set.v | 233 ++++++++ rtl/cache/d_cache_test_bench.cpp | 29 + rtl/cache/d_cache_test_bench.h | 355 ++++++++++++ rtl/cache/d_cache_test_bench_debug.h | 1 + rtl/interfaces/VX_dram_req_rsp_inter.v | 24 + rtl/shared_memory/VX_shared_memory_block.v | 8 +- syn/syn.tcl | 2 +- 21 files changed, 2317 insertions(+), 16 deletions(-) create mode 100644 rtl/cache/Makefile create mode 100644 rtl/cache/Notes create mode 100644 rtl/cache/VX_Cache_Bank.v create mode 100644 rtl/cache/VX_Cache_Block_DM.v create mode 100644 rtl/cache/VX_cache_data.v create mode 100644 rtl/cache/VX_d_cache.v create mode 100644 rtl/cache/VX_d_cache_encapsulate.v create mode 100644 rtl/cache/VX_d_cache_tb.v create mode 100644 rtl/cache/VX_generic_pe.v create mode 100644 rtl/cache/bank.v create mode 100644 rtl/cache/cache_set.v create mode 100644 rtl/cache/d_cache_test_bench.cpp create mode 100644 rtl/cache/d_cache_test_bench.h create mode 100644 rtl/cache/d_cache_test_bench_debug.h create mode 100644 rtl/interfaces/VX_dram_req_rsp_inter.v diff --git a/rtl/Makefile b/rtl/Makefile index 88a4265e..eb9649ba 100644 --- a/rtl/Makefile +++ b/rtl/Makefile @@ -3,7 +3,7 @@ all: RUNFILE # /rf2_256x128_wm1/ BaseMEM=../models/memory/cln28hpm -INCLUDE=-I. -Ishared_memory -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm1/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Isimulate +INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Isimulate FILE=Vortex.v diff --git a/rtl/VX_define.v b/rtl/VX_define.v index eb95bddd..285c70a2 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -10,6 +10,11 @@ // `define ONLY + +`define NUMBER_BANKS 8 +`define NUM_WORDS_PER_BLOCK 4 + + `define NUM_BARRIERS 4 `define R_INST 7'd51 diff --git a/rtl/VX_dmem_controller.v b/rtl/VX_dmem_controller.v index 994abb91..df9d50da 100644 --- a/rtl/VX_dmem_controller.v +++ b/rtl/VX_dmem_controller.v @@ -3,7 +3,9 @@ module VX_dmem_controller ( input wire clk, - + input wire reset, + // MEM-RAM + VX_dram_req_rsp_inter VX_dram_req_rsp, // MEM-Processor VX_dcache_request_inter VX_dcache_req, VX_dcache_response_inter VX_dcache_rsp @@ -14,7 +16,10 @@ module VX_dmem_controller ( wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{to_shm}}; wire[`NT_M1:0] cache_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{~to_shm}}; - + + // Cache don't understand + wire initial_request = (|cache_driver_in_valid); + wire read_or_write = (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE); @@ -25,8 +30,10 @@ module VX_dmem_controller ( wire[`NT_M1:0][31:0] cache_driver_out_data; + wire[`NT_M1:0][31:0] sm_driver_out_data; wire[`NT_M1:0] cache_driver_out_valid; // Not used for now - wire delay; + wire sm_delay; + wire cache_delay; VX_shared_memory #(.NB(7), .BITS_PER_BANK(3)) shared_memory ( @@ -37,15 +44,34 @@ module VX_dmem_controller ( .mem_read (cache_driver_in_mem_read), .mem_write (cache_driver_in_mem_write), .out_valid (cache_driver_out_valid), - .out_data (cache_driver_out_data), - .stall (delay) + .out_data (sm_driver_out_data), + .stall (sm_delay) ); + VX_d_cache dcache( + .clk (clk), + .rst (reset), + .i_p_valid (cache_driver_in_valid), + .i_p_addr (cache_driver_in_address), + .i_p_initial_request(), + .i_p_writedata (cache_driver_in_data), + .i_p_read_or_write (read_or_write), + .o_p_readdata (cache_driver_out_data), + .o_p_readdata_valid (), + .o_p_waitrequest (cache_delay), + .o_m_addr (VX_dram_req_rsp.o_m_addr), + .o_m_valid (VX_dram_req_rsp.o_m_valid), + .o_m_writedata (VX_dram_req_rsp.o_m_writedata), + .o_m_read_or_write (VX_dram_req_rsp.o_m_read_or_write), + .i_m_readdata (VX_dram_req_rsp.i_m_readdata), + .i_m_ready (VX_dram_req_rsp.i_m_ready) + ); - assign VX_dcache_rsp.in_cache_driver_out_data = cache_driver_out_data; - assign VX_dcache_rsp.delay = delay; + + assign VX_dcache_rsp.in_cache_driver_out_data = to_shm ? sm_driver_out_data : cache_driver_out_data; + assign VX_dcache_rsp.delay = sm_delay || cache_delay; endmodule \ No newline at end of file diff --git a/rtl/Vortex.v b/rtl/Vortex.v index 88152d8e..86f9e8ae 100644 --- a/rtl/Vortex.v +++ b/rtl/Vortex.v @@ -1,17 +1,29 @@ `include "VX_define.v" + module Vortex( input wire clk, input wire reset, input wire[31:0] icache_response_instruction, output wire[31:0] icache_request_pc_address, + // Req + output reg [31:0] o_m_addr, + output reg o_m_valid, + output reg [31:0] o_m_writedata[`NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0], + output reg o_m_read_or_write, + + // Rsp + input wire [31:0] i_m_readdata[`NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0], + input wire i_m_ready, + // Remove Start input wire[31:0] in_cache_driver_out_data[`NT_M1:0], output wire[31:0] out_cache_driver_in_address[`NT_M1:0], output wire[2:0] out_cache_driver_in_mem_read, output wire[2:0] out_cache_driver_in_mem_write, output wire out_cache_driver_in_valid[`NT_M1:0], output wire[31:0] out_cache_driver_in_data[`NT_M1:0], + // Remove end output wire out_ebreak ); @@ -35,6 +47,27 @@ assign out_cache_driver_in_mem_write = `NO_MEM_WRITE; VX_dcache_response_inter VX_dcache_rsp(); VX_dcache_request_inter VX_dcache_req(); + +VX_dram_req_rsp_inter VX_dram_req_rsp(); + +assign o_m_addr = VX_dram_req_rsp.o_m_addr; +assign o_m_valid = VX_dram_req_rsp.o_m_valid; +assign o_m_read_or_write = VX_dram_req_rsp.o_m_read_or_write; + +assign VX_dram_req_rsp.i_m_ready = i_m_ready; + +genvar curr_bank; +genvar curr_word; +for (curr_bank = 0; curr_bank < `NUMBER_BANKS; curr_bank = curr_bank + 1) begin + + for (curr_word = 0; curr_word < `NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin + + assign o_m_writedata[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word]; + assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata[curr_bank][curr_word]; + + end +end + // Icache Interface VX_icache_response_inter icache_response_fe(); @@ -108,9 +141,11 @@ VX_back_end vx_back_end( VX_dmem_controller VX_dmem_controller( - .clk (clk), - .VX_dcache_req(VX_dcache_req), - .VX_dcache_rsp(VX_dcache_rsp) + .clk (clk), + .reset (reset), + .VX_dram_req_rsp(VX_dram_req_rsp), + .VX_dcache_req (VX_dcache_req), + .VX_dcache_rsp (VX_dcache_rsp) ); // VX_csr_handler vx_csr_handler( // .clk (clk), diff --git a/rtl/cache/Makefile b/rtl/cache/Makefile new file mode 100644 index 00000000..e1247633 --- /dev/null +++ b/rtl/cache/Makefile @@ -0,0 +1,12 @@ +all: RUNFILE + + +VERILATOR: + verilator --compiler gcc --Wno-UNOPTFLAT -Wall --trace -cc VX_d_cache_encapsulate.v -Iinterfaces/ --exe d_cache_test_bench.cpp -CFLAGS -std=c++11 + +RUNFILE: VERILATOR + (cd obj_dir && make -j -f VVX_d_cache_encapsulate.mk) + +clean: + rm ./obj_dir/* + diff --git a/rtl/cache/Notes b/rtl/cache/Notes new file mode 100644 index 00000000..0458c659 --- /dev/null +++ b/rtl/cache/Notes @@ -0,0 +1,46 @@ +Notes + + +8 kB L1 Data Cache | 16 kB L1 I cache (maybe) +[tag index offset_remaining_block bank wordOffset], use a blocksize of 128 bytes between memory and cache. So each bank gets 16 bytes. + total offset is b its + 4 bits new offset, 2 bits block, 2 bits word offset + xxxxxxxIIIIIIIIoobbbyy + 9876543210 + bbbyyyyy + o = index into block offset + b = bank + y = word offset + I = index into cach + 6 bits indexes (64 indeces) No ways || 16 indexes with 4 ways + Rest of the bits are tag bits + +blocks / banks = 16 bytes, 8 banks. 128 bytes. 256 indexes (height). width is 16 bytes. 4 words per block (per bank). 17 bit tag + +gtkwave ___.vcd + + +// Splitting it up + +// word byte +wire[127:0][3:0] data_from_ram; + + +// word byte bank +wire[15:0][3:0] bank_data_n[3:0] + +integer i; +for (i = 0; i < something; i+=8) +{ + bank_data_n[0][i/8] = data_from_ram[i+0] + bank_data_n[1][i/8] = data_from_ram[i+1] + bank_data_n[2][i/8] = data_from_ram[i+2] + bank_data_n[3][i/8] = data_from_ram[i+3] + bank_data_n[4][i/8] = data_from_ram[i+4] + bank_data_n[5][i/8] = data_from_ram[i+5] + bank_data_n[6][i/8] = data_from_ram[i+6] + bank_data_n[7][i/8] = data_from_ram[i+7] +} + + +With Cache. If miss. Go to memory, grab all data, replace that data in the cache. Generate a new request, feed that into the cache (this one will hit), return that diff --git a/rtl/cache/VX_Cache_Bank.v b/rtl/cache/VX_Cache_Bank.v new file mode 100644 index 00000000..a0be9b82 --- /dev/null +++ b/rtl/cache/VX_Cache_Bank.v @@ -0,0 +1,143 @@ +// To Do: Change way_id_out to an internal register which holds when in between access and finished. +// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default +// Also make sure all possible output states are transmitted back to the bank correctly + +`define NUM_WORDS_PER_BLOCK 4 + +`include "VX_define.v" +`include "VX_cache_data.v" + +module VX_Cache_Bank + #( + // parameter NUMBER_INDEXES = 256 + parameter NUMBER_INDEXES = 256 + ) + ( + clk, + state, + read_or_write, // Read = 0 | Write = 1 + valid_in, + //write_from_mem, + actual_index, + o_tag, + block_offset, + writedata, + fetched_writedata, + + + readdata, + hit, + //miss, + + eviction_wb, // Need to evict + eviction_addr, // What's the eviction tag + + data_evicted + ); + + parameter cache_entry = 14; + parameter ways_per_set = 4; + parameter Number_Blocks = 32; + + localparam CACHE_IDLE = 0; // Idle + localparam SORT_BY_BANK = 1; // Determines the bank each thread will access + localparam INITIAL_ACCESS = 2; // Accesses the bank and checks if it is a hit or miss + localparam INITIAL_PROCESSING = 3; // Check to see if there were misses + localparam CONTINUED_PROCESSING = 4; // Keep checking status of banks that need to be written back or fetched + localparam DIRTY_EVICT_GRAB_BLOCK = 5; // Grab the full block of dirty data + localparam DIRTY_EVICT_WB = 6; // Write back this block into memory + localparam FETCH_FROM_MEM = 7; // Send a request to mem looking for read data + localparam FETCH2 = 8; // Stall until memory gets back with the data + localparam UPDATE_CACHE = 9; // Update the cache with the data read from mem + localparam RE_ACCESS = 10; // Access the cache after the block has been fetched from memory + localparam RE_ACCESS_PROCESSING = 11; // Access the cache after the block has been fetched from memory + + // Inputs + input wire clk; + input wire [3:0] state; + //input wire write_from_mem; + + // Reading Data + input wire[$clog2(NUMBER_INDEXES)-1:0] actual_index; + input wire[16:0] o_tag; // When write_from_mem = 1, o_tag is the new tag + input wire[1:0] block_offset; + input wire[31:0] writedata; + input wire valid_in; + input wire read_or_write; // Specifies if it is a read or write operation + + input wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] fetched_writedata; + + + + // Outputs + // Normal shit + output wire[31:0] readdata; + output wire hit; + //output wire miss; + + // Eviction Data (Notice) + output wire eviction_wb; // Need to evict + output wire[31:0] eviction_addr; // What's the eviction tag + + // Eviction Data (Extraction) + output wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_evicted; + + + + wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_use; + wire[16:0] tag_use; + wire[16:0] eviction_tag; + wire valid_use; + wire dirty_use; + wire access; + wire write_from_mem; + wire miss; // -10/21 + + + assign miss = (tag_use != o_tag) && valid_use && valid_in; + + + assign data_evicted = data_use; + + assign eviction_wb = miss && (dirty_use != 1'b0); + assign eviction_tag = tag_use; + assign access = (state == INITIAL_ACCESS || state == RE_ACCESS) && valid_in; + assign write_from_mem = (state == UPDATE_CACHE) && valid_in; + assign readdata = (access) ? data_use[block_offset] : 32'b0; // Fix with actual data + assign hit = (access && (tag_use == o_tag) && valid_use); + //assign eviction_addr = {eviction_tag, actual_index, block_offset, 5'b0}; // Fix with actual data + assign eviction_addr = {eviction_tag, actual_index, 7'b0}; // Fix with actual data + + + wire[`NUM_WORDS_PER_BLOCK-1:0] we; + wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_write; + genvar g; + for (g = 0; g < `NUM_WORDS_PER_BLOCK; g = g + 1) begin + wire correct_block = (block_offset == g); + assign we[g] = (read_or_write && ((access && correct_block) || (write_from_mem && !correct_block)) ) ? 1'b1 : 1'b0; + //assign we[g] = (!(write_from_mem && correct_block) && ((write_from_mem || correct_block) && read_or_write == 1'b1)) ? 1 : 0; // added the "not" + assign data_write[g] = write_from_mem ? fetched_writedata[g] : writedata; + end + + VX_cache_data data_structures( + .clk (clk), + // Inputs + .addr (actual_index), + .we (we), + .evict (write_from_mem), + .data_write(data_write), + .tag_write (o_tag), + + // Outputs + .tag_use (tag_use), + .data_use (data_use), + .valid_use (valid_use), + .dirty_use (dirty_use) + ); + + +endmodule + + + + diff --git a/rtl/cache/VX_Cache_Block_DM.v b/rtl/cache/VX_Cache_Block_DM.v new file mode 100644 index 00000000..6181e7ca --- /dev/null +++ b/rtl/cache/VX_Cache_Block_DM.v @@ -0,0 +1,196 @@ +// To Do: Change way_id_out to an internal register which holds when in between access and finished. +// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default +// Also make sure all possible output states are transmitted back to the bank correctly + +`include "VX_define.v" +module VX_Cache_Block_DM(clk, + rst, + // These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes + access, // First + find_evict, + write_from_mem, + idle, + // entry, + o_tag, + block_offset, + writedata, + //byte_en, + write, + fetched_writedata, + //word_en, + //way_id_in, + //way_id_out, + readdata, + //wb_addr, + hit, + eviction_wb, + eviction_tag, + evicted_data, + //modify, + miss + //valid_data + //read_miss + ); + + parameter cache_entry = 14; + parameter ways_per_set = 4; + parameter Number_Blocks = 32; + + input wire clk, rst; + input wire access; + input wire find_evict; + input wire write_from_mem; + input wire idle; + //input wire [cache_entry-1:0] entry; + input wire [21:0] o_tag; + input wire [4:0] block_offset; + input wire [31:0] writedata; + //input wire [3:0] byte_en; + input wire write; // 0 == False + input wire [31:0][31:0] fetched_writedata; + //input wire [3:0] word_en; + //input wire read_miss; + //input wire [1:0] way_id_in; + //output reg [1:0] way_id_out; + //output reg [31:0] readdata; + output wire [31:0] readdata; + //output reg hit; + output wire hit; + output reg miss; + output wire eviction_wb; + output wire [21:0] eviction_tag; + output wire [31:0][31:0] evicted_data; + //reg [31:0] eviction_data; + //output wire [22:0] wb_addr; + //output wire modify, valid_data; + + + + //wire [2:0] i_tag; + //wire dirty; + //wire [24-cache_entry:0] write_tag_data; + + // Table for one set + //reg [2:0] counter; // Determines which to evict + reg valid; + reg [21:0] tag; + + reg clean; + + + //reg [31:0] data[31:0]; + reg [31:0] data[31:0]; + + integer j; + + // WS AW BS + //reg[3:0][31:0] some_data[5:0]; // before variable name is width, after name is height + + //wire blockNun; + //wire WordNumWIthinABlock; + + //ddata[31:0] =some_data[blockNun][WordNumWIthinABlock] + + + assign eviction_wb = miss && clean != 1'b1 && valid == 1'b1; + assign eviction_tag = tag; + assign readdata = (access && !write && tag == o_tag && valid) ? data[0] : 32'b0; // Fix with actual data + assign hit = (access && !write && tag == o_tag && valid) ? 1'b1 : 1'b0; + //assign evicted_data = (eviction_wb ) ? data : 0; + genvar k; + for (k = 0; k < Number_Blocks; k = k + 1) begin + assign evicted_data[k] = (eviction_wb) ? data[k] : 32'b0; + //data[j] <= fetched_writedata[(j+1) * 32 - 1 -: 32]; + end + //assign eviction_data = data[counter[1:0]]; + //assign hit = valid_data && (o_tag == i_tag); + //assign modify = valid_data && (o_tag != i_tag) && dirty; + //assign miss = !valid_data || ((o_tag != i_tag) && !dirty); + + //assign wb_addr = {i_tag, entry}; + always @(posedge clk) begin + if (rst) begin + + end + if (find_evict) begin + if (tag == o_tag && valid) begin + //readdata <= data; + // evicted_data <= data; + end + end else if (access) begin + // Hit in First Column + if (tag == o_tag && valid) begin + if (write == 1'b0) begin // if it is a read + if (clean == 1'b1 ) begin + //hit <= 1'b1; + //readdata <= data; + miss <= 1'b0; + end else begin + //hit <= 1'b0; + //readdata <= 32'b0; + miss <= 1'b1; + end + end else if (write == 1'b1) begin + //for (j = 0; j < Number_Blocks; j = j + 1) begin + //data[j] <= fetched_writedata[(j+1) * 32 - 1 -: 32]; + //end + data[block_offset] <= writedata; + clean <= 1'b0; + //hit <= 1'b1; + end + end + // Miss + else begin + //way_id_out <= counter; + miss <= 1'b1; + if (write == 1'b0) begin // Read Miss + clean <= 1'b1; + //data <= 0; // FIX WITH ACTUAL MEMORY ACCESS + for (j = 0; j < Number_Blocks; j = j + 1) begin + data[j] <= 32'b0; + end + end else if (write == 1'b1) begin // Write Miss + clean <= 1'b1; + data[block_offset] <= writedata; + //for (j = 0; j < Number_Blocks; j = j + 1) begin + //data[j] <= fetched_writedata[(j+1) * 32 - 1 -: 32]; + //end + end + end + + end + if (write_from_mem) begin + tag <= o_tag; + valid <= 1'b1; + //hit <= 1'b1; + if (write == 1'b0) begin // Read Miss + clean <= 1'b1; + //data <= 0; // FIX WITH ACTUAL MEMORY ACCESS + for (j = 0; j < Number_Blocks; j = j + 1) begin + data[j] <= 32'b0; + end + end else if (write == 1'b1) begin // Write Miss + clean <= 1'b0; + //data <= fetched_writedata; + for (j = 0; j < Number_Blocks; j = j + 1) begin + //data[j] <= fetched_writedata[(j+1) * 32 - 1 -: 32]; + data[j] <= fetched_writedata[j]; + end + end + end + if (idle) begin // Set "way" register equal to invalid value + //hit <= 1'b1; // set to know it is ready + miss <= 1'b0; + //readdata <= 32'hFFFFFFFF; + end + if (find_evict) begin // Keep "way" value the same !!!! Fix. Need to send back data with matching tag. Also need to ensure evicted data doesnt get lost + if (tag == o_tag && valid) begin + //readdata <= data; + end + //hit <= 1'b1; + miss <= 1'b0; + end + //eviction_data <= data; + end + +endmodule \ No newline at end of file diff --git a/rtl/cache/VX_cache_data.v b/rtl/cache/VX_cache_data.v new file mode 100644 index 00000000..106a5905 --- /dev/null +++ b/rtl/cache/VX_cache_data.v @@ -0,0 +1,193 @@ + +`define NUM_WORDS_PER_BLOCK 4 + +module VX_cache_data ( + input wire clk, // Clock + + // Addr + input wire[$clog2(NUMBER_INDEXES)-1:0] addr, + // WE + input wire[`NUM_WORDS_PER_BLOCK-1:0] we, + input wire evict, + // Data + input wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data + input wire[16:0] tag_write, + + + output wire[16:0] tag_use, + output wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_use, + output wire valid_use, + output wire dirty_use + +); + + + parameter NUMBER_INDEXES = 256; + + wire currently_writing = (|we); + wire update_dirty = ((!dirty_use) && currently_writing) || (evict); + +///////////////// + + // (3:0) 4 bytes + reg[`NUM_WORDS_PER_BLOCK-1:0][31:0] data[NUMBER_INDEXES-1:0]; // Actual Data + reg[16:0] tag[NUMBER_INDEXES-1:0]; + reg valid[NUMBER_INDEXES-1:0]; + reg dirty[NUMBER_INDEXES-1:0]; + + + // 16 bytes + assign data_use = data[addr]; // Read Port + assign tag_use = tag[addr]; + assign valid_use = valid[addr]; + assign dirty_use = dirty[addr]; + + wire dirt_new = evict ? 0 : (|we); + + integer f; + always @(posedge clk) begin : dirty_update + if (update_dirty) dirty[addr] <= dirt_new; // WRite Port + end + + always @(posedge clk) begin : data_update + for (f = 0; f < `NUM_WORDS_PER_BLOCK; f = f + 1) begin + if (we[f]) data[addr][f] <= data_write[f]; + end + end + + always @(posedge clk) begin : tag_update + if (evict) tag[addr] <= tag_write; + end + + always @(posedge clk) begin : valid_update + if (evict) valid[addr] <= 1; + end + + +////////////////////////////// + + + // wire cena = 1; + + // wire cenb_d = (|we); + // wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_d = data_write; + // wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] write_bit_mask_d; + // wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_d; + // genvar cur_b; + // for (cur_b = 0; cur_b < `NUM_WORDS_PER_BLOCK; cur_b=cur_b+1) begin + // assign write_bit_mask_d[cur_b] = {32{~we[cur_b]}}; + // end + // assign data_use = data_out_d; + + + // // Using ASIC MEM + // /* verilator lint_off PINCONNECTEMPTY */ + // rf2_256x128_wm1 data ( + // .CENYA(), + // .AYA(), + // .CENYB(), + // .WENYB(), + // .AYB(), + // .QA(data_out_d), + // .SOA(), + // .SOB(), + // .CLKA(clk), + // .CENA(cena), + // .AA(addr), + // .CLKB(clk), + // .CENB(cenb_d), + // .WENB(write_bit_mask_d), + // .AB(addr), + // .DB(wdata_d), + // .EMAA(3'b011), + // .EMASA(1'b0), + // .EMAB(3'b011), + // .TENA(1'b1), + // .TCENA(1'b0), + // .TAA(5'b0), + // .TENB(1'b1), + // .TCENB(1'b0), + // .TWENB(128'b0), + // .TAB(5'b0), + // .TDB(128'b0), + // .RET1N(1'b1), + // .SIA(2'b0), + // .SEA(1'b0), + // .DFTRAMBYP(1'b0), + // .SIB(2'b0), + // .SEB(1'b0), + // .COLLDISN(1'b1) + // ); + // /* verilator lint_on PINCONNECTEMPTY */ + + + + + + // wire[16:0] old_tag; + // wire old_valid; + // wire old_dirty; + + // wire[16:0] new_tag = evict ? tag_write : old_tag; + // wire new_valid = evict ? 1 : old_valid; + // wire new_dirty = update_dirty ? new_dirty : old_dirty; + + + // wire cenb_m = (evict || update_dirty); + // wire[19-1:0][31:0] write_bit_mask_m = cenb_m ? 19'b0 : 19'b1; + + + + + // wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_m = {new_tag, new_dirty, new_valid}; + // wire[`NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_m; + + // assign {old_tag, old_dirty, old_valid} = data_out_m; + + + // assign dirty_use = old_dirty; + // assign valid_use = old_valid; + // assign tag_use = old_tag; + + // /* verilator lint_off PINCONNECTEMPTY */ + // rf2_256x19_wm0 meta ( + // .CENYA(), + // .AYA(), + // .CENYB(), + // // .WENYB(), + // .AYB(), + // .QA(data_out_m), + // .SOA(), + // .SOB(), + // .CLKA(clk), + // .CENA(cena), + // .AA(addr), + // .CLKB(clk), + // .CENB(cenb_m), + // // .WENB(write_bit_mask_m), + // .AB(addr), + // .DB(wdata_m), + // .EMAA(3'b011), + // .EMASA(1'b0), + // .EMAB(3'b011), + // .TENA(1'b1), + // .TCENA(1'b0), + // .TAA(5'b0), + // .TENB(1'b1), + // .TCENB(1'b0), + // // .TWENB(128'b0), + // .TAB(5'b0), + // .TDB(128'b0), + // .RET1N(1'b1), + // .SIA(2'b0), + // .SEA(1'b0), + // .DFTRAMBYP(1'b0), + // .SIB(2'b0), + // .SEB(1'b0), + // .COLLDISN(1'b1) + // ); + // /* verilator lint_on PINCONNECTEMPTY */ + + + +endmodule \ No newline at end of file diff --git a/rtl/cache/VX_d_cache.v b/rtl/cache/VX_d_cache.v new file mode 100644 index 00000000..82216b99 --- /dev/null +++ b/rtl/cache/VX_d_cache.v @@ -0,0 +1,602 @@ +// Cache Memory (8way 4word) // +// i_ means input port // +// o_ means output port // +// _p_ means data exchange with processor // +// _m_ means data exchange with memory // + + +// TO DO: +// - Send in a response from memory of what the data is from the test bench + +`include "VX_define.v" +//`include "VX_priority_encoder.v" +`include "VX_Cache_Bank.v" +//`include "cache_set.v" + + +module VX_d_cache(clk, + rst, + i_p_initial_request, + i_p_addr, + //i_p_byte_en, + i_p_writedata, + i_p_read_or_write, // 0 = Read | 1 = Write + i_p_valid, + //i_p_write, + o_p_readdata, + o_p_readdata_valid, + o_p_waitrequest, // 0 = all threads done | 1 = Still threads that need to + + o_m_addr, + //o_m_byte_en, + o_m_writedata, + + o_m_read_or_write, // 0 = Read | 1 = Write + o_m_valid, + //o_m_write, + i_m_readdata, + + //i_m_readdata_ready, + //i_m_waitrequest, + i_m_ready + + //cnt_r, + //cnt_w, + //cnt_hit_r, + //cnt_hit_w + //cnt_wb_r, + //cnt_wb_w + ); + + parameter NUMBER_BANKS = 8; + + localparam CACHE_IDLE = 0; // Idle + localparam SORT_BY_BANK = 1; // Determines the bank each thread will access + localparam INITIAL_ACCESS = 2; // Accesses the bank and checks if it is a hit or miss + localparam INITIAL_PROCESSING = 3; // Check to see if there were misses + localparam CONTINUED_PROCESSING = 4; // Keep checking status of banks that need to be written back or fetched + localparam DIRTY_EVICT_GRAB_BLOCK = 5; // Grab the full block of dirty data + localparam DIRTY_EVICT_WB = 6; // Write back this block into memory + localparam FETCH_FROM_MEM = 7; // Send a request to mem looking for read data + localparam FETCH2 = 8; // Stall until memory gets back with the data + localparam UPDATE_CACHE = 9; // Update the cache with the data read from mem + localparam RE_ACCESS = 10; // Access the cache after the block has been fetched from memory + localparam RE_ACCESS_PROCESSING = 11; // Access the cache after the block has been fetched from memory + + + //parameter cache_entry = 9; + input wire clk, rst; + input wire [`NT_M1:0] i_p_valid; + //input wire [`NT_M1:0][24:0] i_p_addr; // FIXME + input wire [`NT_M1:0][31:0] i_p_addr; // FIXME + input wire i_p_initial_request; + //input wire [3:0] i_p_byte_en; + input wire [`NT_M1:0][31:0] i_p_writedata; + input wire i_p_read_or_write; //, i_p_write; + output reg [`NT_M1:0][31:0] o_p_readdata; + output reg [`NT_M1:0] o_p_readdata_valid; + output wire o_p_waitrequest; + //output reg [24:0] o_m_addr; // Only one address is sent out at a time to memory -- FIXME + output reg [31:0] o_m_addr; // Address is xxxxxxxxxxoooobbbyy + output reg o_m_valid; + //output wire [255:0][31:0] evicted_data; + //output wire [3:0] o_m_byte_en; + //output reg [(NUMBER_BANKS * 32) - 1:0] o_m_writedata; + output reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata; + output reg o_m_read_or_write; //, o_m_write; + //input wire [(NUMBER_BANKS * 32) - 1:0] i_m_readdata; // Read Data that is passed from the memory module back to the controller + input wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata; + //input wire i_m_readdata_ready; + //input wire i_m_waitrequest; + input wire i_m_ready; + + //output reg [31:0] cnt_r; + //output reg [31:0] cnt_w; + //output reg [31:0] cnt_hit_r; + //output reg [31:0] cnt_hit_w; + //output reg [31:0] cnt_wb_r; + //output reg [31:0] cnt_wb_w; + + //wire [1:0] tag [`NT_M1:0]; + //wire [3:0] index [`NT_M1:0]; + //wire [2:0] bank [`NT_M1:0]; + //wire all_done; + + //integer i; + reg [`NT_M1:0] thread_done; // Maybe should have "thread_serviced" and "thread_done", serviced==checked cache + //reg [`NT_M1:0] thread_serviced; // Maybe should have "thread_serviced" and "thread_done", serviced==checked cache + reg [NUMBER_BANKS - 1:0] banks_ready; + //reg [NUMBER_BANKS - 1:0] banks_missed; + reg [NUMBER_BANKS - 1:0] banks_to_service; + reg [NUMBER_BANKS - 1:0] banks_wb_needed; + reg [NUMBER_BANKS - 1:0][31:0] banks_wb_addr; + //reg [NUMBER_BANKS - 1:0] bank_states; + //reg [NUMBER_BANKS - 1:0][31:0] banks_wb_data; + //reg [NUMBER_BANKS - 1:0][13:0] banks_in_addr; + + + reg [3:0] state; + reg [NUMBER_BANKS - 1:0][31:0] data_from_bank; + //reg got_valid_data; + //reg [31:0] data_to_write; + + + //reg [`NT_M1:0] thread_track_bank_0; + //reg [`NT_M1:0] thread_track_bank_1; + //reg [`NT_M1:0] thread_track_bank_2; + //reg [`NT_M1:0] thread_track_bank_3; + //reg [`NT_M1:0] thread_track_bank_4; + //reg [`NT_M1:0] thread_track_bank_5; + //reg [`NT_M1:0] thread_track_bank_6; + //reg [`NT_M1:0] thread_track_bank_7; + reg [NUMBER_BANKS - 1 : 0][`NT_M1:0] thread_track_banks; + reg [NUMBER_BANKS - 1 : 0] bank_has_access; // Will track if a bank has been accessed in this cycle + reg [NUMBER_BANKS - 1 : 0][31:0] bank_access_addr; + reg [NUMBER_BANKS - 1 : 0][31:0] bank_access_data; + reg [NUMBER_BANKS - 1 : 0][1:0] threads_in_banks; + + + //reg [1:0] thread_in_memory; // keeps track of threadID which is in memory + reg rd_or_wr; + //reg did_miss, needs_service; Commented out Oct 21 + + integer bnk; + integer found; + integer t_id; + //integer num_misses; + //integer num_evictions_to_wb; + integer i; //reg [1:0] correct_tag; + integer index; + //reg [3:0] correct_index; + + //assign tag = i_p_addr[13:12]; + + assign o_p_waitrequest = (thread_done == 4'hF) ? 1'b0 : 1'b1; // change thread_done to be generic + //assign did_miss = (banks_missed != 8'h0) ? 1'b1 : 1'b0; + //assign needs_service = ((banks_to_service != 8'b0 || banks_to_service_temp != 8'b0)) ? 1'b1 : 1'b0; // added banks_to_service temp + //assign w_Test1 = r_Check ? 1'b1 : 1'b0; + //for ( i = 0;i < `NT_M1;i = i + 1) begin + // assign tag[i] = i_p_addr[i][13:12]; + + // Fares +// wire no_bank_misses; +// assign no_bank_misses = banks_to_service != 8'b0; + + reg[NUMBER_BANKS - 1:0] banks_to_service_temp; + reg[NUMBER_BANKS - 1:0] banks_to_wb; + reg[NUMBER_BANKS - 1:0] banks_to_wb_temp; + reg[NUMBER_BANKS - 1:0] banks_all_help; + + + always @(posedge clk) begin + if (rst) begin + state <= 0; + //banks_ready <= 8'b0; + //cnt_r <= 0; + //cnt_w <= 0; + //cnt_hit_r <= 0; + //cnt_hit_w <= 0; + //cnt_wb_r <= 0; + //cnt_wb_w <= 0; + + end else begin + // Change Logic of which state the cache is in + case (state) + CACHE_IDLE:begin + if (i_p_initial_request == 1'b1) begin + state <= SORT_BY_BANK; + end else begin + state <= CACHE_IDLE; + end + end + SORT_BY_BANK:begin + state <= INITIAL_ACCESS; + end + INITIAL_ACCESS:begin + if (thread_done == 4'hF) begin + state <= CACHE_IDLE; + end else begin + state <= INITIAL_PROCESSING; + end + end + INITIAL_PROCESSING:begin + if (bank_has_access == banks_ready ) begin // if all hits + state <= INITIAL_ACCESS; + end else begin + state <= CONTINUED_PROCESSING; + end + + end + CONTINUED_PROCESSING:begin + if (banks_to_wb == 8'b0 && banks_to_service == 8'b0) begin // If all threads are done, then the cache can go back into idle state (not currently fetching any requests) + state <= INITIAL_ACCESS; + //end else if (num_misses > 0) begin + end else if ((banks_to_wb != 8'b0)) begin // change 1pm + state <= DIRTY_EVICT_GRAB_BLOCK; + //end else if (did_miss == 1'b1 || needs_service == 1'b1) begin + end else if(banks_to_service != 8'b0) begin + state <= FETCH_FROM_MEM; + // end else if (did_miss == 1'b0 && num_evictions_to_wb > 0) begin + //end else if (needs_service == 1'b0 && did_miss == 1'b0 && (banks_to_wb != 8'b0)) begin + //end else if (did_miss == 1'b0 && needs_service == 1'b0) begin + //state <= INITIAL_ACCESS; + end + end + FETCH_FROM_MEM:begin + state <= FETCH2; + end + FETCH2:begin + if (i_m_ready == 1'b1) begin + state <= UPDATE_CACHE; // Not sure about this one !!!!!! Check + end else begin + state <= FETCH2; + end + end + UPDATE_CACHE:begin + state <= RE_ACCESS; + end + RE_ACCESS:begin + state <= RE_ACCESS_PROCESSING; + end + RE_ACCESS_PROCESSING: begin + state <= CONTINUED_PROCESSING; + end + DIRTY_EVICT_GRAB_BLOCK:begin + state <= DIRTY_EVICT_WB; + end + DIRTY_EVICT_WB:begin + state <= CONTINUED_PROCESSING; + end + endcase + end + + //tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12]; + end + + // Change values which will be fed into the cache + always @(*) begin + case (state) + CACHE_IDLE:begin + thread_done = 0; + o_m_read_or_write = 0; + o_m_valid = 0; + o_m_writedata = 0; + o_p_readdata = 0; + o_p_readdata_valid = 0; + bank_has_access = 8'b0; + //bank_states = CACHE_IDLE; + //thread_track_bank_0 = 4'b0; + //thread_track_bank_1 = 4'b0; + //thread_track_bank_2 = 4'b0; + //thread_track_bank_3 = 4'b0; + //thread_track_bank_4 = 4'b0; + //thread_track_bank_5 = 4'b0; + //thread_track_bank_6 = 4'b0; + //thread_track_bank_7 = 4'b0; + for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin + thread_track_banks[bnk] = 4'b0; + end + end + SORT_BY_BANK:begin + //bank_states = SORT_BY_BANK; + rd_or_wr = i_p_read_or_write; + for (t_id = 0; t_id <= `NT_M1; t_id = t_id + 1) begin + //t_id = {1'b0,t_id}; + if (i_p_valid[t_id] == 1'b0) begin + thread_done[t_id] = 1'b1; + end + //if (i_p_valid[t_id] == 1'b1 && thread_done[t_id] == 1'b0) begin // Need logic for thread done + else if (i_p_addr[t_id][4:2] == 3'b000) begin + //banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later + //thread_track_bank_0[t_id] = 1'b1; + thread_track_banks[0][t_id] = 1'b1; + end + else if (i_p_addr[t_id][4:2] == 3'b001) begin // !!!!!!! + //banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later + //thread_track_bank_1[t_id] = 1'b1; + thread_track_banks[1][t_id] = 1'b1; + end + else if (i_p_addr[t_id][4:2] == 3'b010) begin + //banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later + //thread_track_bank_2[t_id] = 1'b1; + thread_track_banks[2][t_id] = 1'b1; + end + else if (i_p_addr[t_id][4:2] == 3'b011) begin + //banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later + //thread_track_bank_3[t_id] = 1'b1; + thread_track_banks[3][t_id] = 1'b1; + end + else if (i_p_addr[t_id][4:2] == 3'b100) begin + //banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later + //thread_track_bank_4[t_id] = 1'b1; + thread_track_banks[4][t_id] = 1'b1; + end + else if (i_p_addr[t_id][4:2] == 3'b101) begin + //banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later + //thread_track_bank_5[t_id] = 1'b1; + thread_track_banks[5][t_id] = 1'b1; + end + else if (i_p_addr[t_id][4:2] == 3'b110) begin + //banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later + //thread_track_bank_6[t_id] = 1'b1; + thread_track_banks[6][t_id] = 1'b1; + end + else if (i_p_addr[t_id][4:2] == 3'b111) begin + //banks_in_addr[0] = i_p_addr[t_id]; // WIll need to do this later + //thread_track_bank_7[t_id] = 1'b1; + thread_track_banks[7][t_id] = 1'b1; + end + end + end + INITIAL_ACCESS:begin + //bank_states = INITIAL_ACCESS; + o_m_valid = 1'b0; + + // Before Access +// if (no_bank_misses) begin + // Dont do anything, next clock cycle it will switch back to (Fetch from mem) +// end else begin // Do logic to send requests to each bank (look through thread_track_bank regs) + bank_has_access = 8'b0; + for (t_id = 0; t_id <= `NT_M1; t_id = t_id + 1) begin + for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin + if(thread_track_banks[bnk][t_id] == 1'b1 && bank_has_access[bnk] == 1'b0) begin + bank_has_access[bnk] = 1'b1; + bank_access_data[bnk] = i_p_writedata[t_id]; + bank_access_addr[bnk] = i_p_addr[t_id]; + threads_in_banks[bnk] = t_id[1:0]; + end + end + //if (banks_wb_needed[bnk]) begin // need to fix this for multiple misses + //o_m_read_or_write = 1'b0; + //o_m_addr = banks_wb_addr[bnk]; + //o_m_valid = 1'b1; + //o_m_writedata = {banks_wb_data[bnk], 96'b0}; + //end + //if(thread_track_bank_0[t_id] == 1'b1 && bank_has_access[0] == 1'b0) begin + //bank_has_access[0] = 1'b1; + //bank_access_data[0] = i_p_writedata[t_id]; + //bank_access_addr[0] = i_p_addr[t_id]; + //threads_in_banks[0] = t_id; + //end + // NEED TO UPDATE HITS (STORE IN THREADS_DONE) + end + //num_misses = {28'b0, $countones(banks_missed)}; + //did_miss = (banks_missed == 4'hF); + +// end + + + end + INITIAL_PROCESSING:begin + //bank_has_access = 8'b0; + for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin + if(banks_ready[bnk]) begin // FIX to handle hits + thread_done[threads_in_banks[bnk]] = 1'b1; + o_p_readdata[threads_in_banks[bnk]] = data_from_bank[bnk]; + if(i_p_read_or_write == 1'b0) begin + o_p_readdata_valid[threads_in_banks[bnk]] = 1'b1; + end + thread_track_banks[bnk][threads_in_banks[bnk]] = 1'b0; // Update that this thread does not need to be serviced again + end + end + //banks_to_service_temp = !banks_ready; // These are clean misses + for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin + assign banks_to_service_temp[bnk] = (banks_ready[bnk] || (bank_has_access[bnk] == 0)) ? 1'b0 : 1'b1; + assign banks_to_wb_temp[bnk] = (banks_wb_needed[bnk]); + assign banks_all_help[bnk] = banks_to_service_temp[bnk] || banks_to_wb_temp[bnk]; + end + end + CONTINUED_PROCESSING:begin + //for (i = `NW-1; i >= 0; i = i - 1) begin + // if (thread_done[threads_in_banks[bnk]] == 1'b1) begin // Not sure about this logic + // //index = i[`NW_M1:0]; + // banks_to_service_temp[i] = 1'b0; + // banks_to_wb_temp[i] = 1'b0; + // end + //end + end + FETCH_FROM_MEM:begin + // NEED TO ADD LOGIC TO SEE IF MISSES GO TO SAME BLOCK + index = 0; + found = 0; + for (i = `NW-1; i >= 0; i = i - 1) begin + if (banks_to_service[i]) begin // Not sure about this logic + //index = i[`NW_M1:0]; + index = i; + found = 1; + end + end + if (found == 1) begin + //banks_missed[index] = 0; + //thread_done + + //thread_in_memory = threads_in_banks[index]; + //o_m_writedata = bank_access_data[index]; + banks_to_service_temp[index] = 0; + o_m_addr = bank_access_addr[index]; + o_m_valid = 1'b1; + o_m_read_or_write = 1'b0; + end + //bank_states = FETCH_FROM_MEM; + end + FETCH2:begin + o_m_valid = 1'b0; + end + UPDATE_CACHE:begin + for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin + //if(thread_track_banks[bnk][t_id] == 1'b1 && bank_has_access[bnk] == 1'b0) begin + bank_has_access[bnk] = 1'b1; + //bank_access_data[bnk] = i_m_readdata[(bnk+1)*32 - 1:bnk*32]; + bank_access_addr[bnk] = o_m_addr; + threads_in_banks[bnk] = t_id[1:0]; + //end + end + //bank_access_data = i_m_readdata; + rd_or_wr = 1'b1; + //thread_done[thread_in_memory] = 1'b1; // Removed, new cache style - Oct 21 + //o_p_readdata[thread_in_memory] = i_m_readdata[i_p_addr[thread_in_memory][9:5]]; // Removed, new cache style + end + DIRTY_EVICT_WB:begin // this begininng logic should be added to dirty evict grab block + + //thread_done[thread_in_memory] = 1'b1; + o_m_valid = 1'b1; + end + DIRTY_EVICT_GRAB_BLOCK:begin + index = 0; + found = 0; + for (i = `NW-1; i >= 0; i = i - 1) begin + if (banks_to_wb_temp[i]) begin + //index = i[`NW_M1:0]; + index = i; + found = 1; + end + end + if (found == 1) begin + banks_to_wb_temp[index] = 0; + for (i = `NW-1; i >= 0; i = i - 1) begin + if (banks_to_wb_temp[i] && banks_wb_addr[index][31:7] == banks_wb_addr[i][31:7]) begin + //index = i[`NW_M1:0]; + banks_to_wb_temp[i] = 0; + end + end + //thread_done + //thread_in_memory = threads_in_banks[index]; + //o_m_writedata[(bnk+1)*32 - 1:bnk*32] = banks_wb_data[index]; + o_m_addr = banks_wb_addr[index]; + o_m_read_or_write = 1'b1; + end + //for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin + //o_m_writedata[(bnk+1)*32 - 1:bnk*32] = banks_wb_data[index]; + //end + // NEXT LINE CONTAINS DATA TO WB !!!! Think need to just change this to be read data and can remove banks_wb_data + //o_m_writedata = {banks_wb_data[7],banks_wb_data[6],banks_wb_data[5],banks_wb_data[4],banks_wb_data[3],banks_wb_data[2],banks_wb_data[1],banks_wb_data[0]}; + //num_evictions_to_wb = {28'b0, $countones(banks_wb_needed)}; + rd_or_wr = 1'b0; + for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin + //if(thread_track_banks[bnk][t_id] == 1'b1 && bank_has_access[bnk] == 1'b0) begin + bank_has_access[bnk] = 1'b1; + bank_access_addr[bnk] = o_m_addr; + //end + end + end + RE_ACCESS:begin + //bank_states = INITIAL_ACCESS; + o_m_valid = 1'b0; + + // Before Access +// if (no_bank_misses) begin + // Dont do anything, next clock cycle it will switch back to (Fetch from mem) +// end else begin // Do logic to send requests to each bank (look through thread_track_bank regs) + //bank_has_access = banks_all_help & !(banks_to_wb) & !(banks_to_service); + for (t_id = 0; t_id <= `NT_M1; t_id = t_id + 1) begin + for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin + //bank_has_access[bnk] = banks_all_help[bnk] && !thread_done[threads_in_banks[bnk]]; // Not sure + bank_has_access[bnk] = banks_all_help[bnk] && !thread_done[t_id]; // Not sure + if(thread_track_banks[bnk][t_id] == 1'b1 && bank_has_access[bnk] == 1'b1) begin + //bank_has_access[bnk] = 1'b1; + bank_access_data[bnk] = i_p_writedata[t_id]; + bank_access_addr[bnk] = i_p_addr[t_id]; + threads_in_banks[bnk] = t_id[1:0]; + end + end + end + + + + end + RE_ACCESS_PROCESSING:begin + // After Access + for (bnk = 0; bnk < NUMBER_BANKS; bnk = bnk + 1) begin + if(banks_ready[bnk]) begin // FIX to handle hits + thread_done[threads_in_banks[bnk]] = 1'b1; + o_p_readdata[threads_in_banks[bnk]] = data_from_bank[bnk]; + if(i_p_read_or_write == 1'b0) begin + o_p_readdata_valid[threads_in_banks[bnk]] = 1'b1; + end + thread_track_banks[bnk][threads_in_banks[bnk]] = 1'b0; // Update that this thread does not need to be serviced again + // Added Oct 21 + banks_to_service_temp[bnk] = 1'b0; + banks_to_wb_temp[bnk] = 1'b0; + end + end + end + + endcase + end + + always @(posedge clk) begin + banks_to_service <= banks_to_service_temp; + banks_to_wb <= banks_to_wb_temp; + end + + + genvar bank_id; + generate + for (bank_id = 0; bank_id < NUMBER_BANKS; bank_id = bank_id + 1) + begin + //VX_alu vx_alu( + // .in_reg_data (in_reg_data[1:0]), + // .in_1 (in_a_reg_data[index_out_reg]), + // .in_2 (in_b_reg_data[index_out_reg]), + // .in_rs2_src (in_rs2_src), + // .in_itype_immed(in_itype_immed), + // .in_upper_immed(in_upper_immed), + // .in_alu_op (in_alu_op), + // .in_csr_data (in_csr_data), + // .in_curr_PC (in_curr_PC), + // .out_alu_result(VX_exe_mem_req.alu_result[index_out_reg]) + //); +// bank VX_banks( +// .clk (clk), +// .rst (rst), +// //.state (bank_states[bank_id]), +// .state (state), +// .read_or_write (rd_or_wr), +// //.index (correct_index), +// //.tag (correct_tag), +// .addr (bank_access_addr[bank_id]), +// .writedata (bank_access_data[bank_id]), +// .fetched_write_data(i_m_readdata[(bank_id+1)*32-1 -: 32]), +// .valid (bank_has_access[bank_id]), +// .readdata (data_from_bank[bank_id]), +// .miss_cache (banks_missed[bank_id]), +// .w2m_needed (banks_wb_needed[bank_id]), +// .w2m_addr (banks_wb_addr[bank_id]), +// .e_data (o_m_writedata[(bank_id+1)*32-1 -: 32]), +// //.w2m_data (banks_wb_data[bank_id]), +// .ready (banks_ready[bank_id]) +// //.valid_data (valid_in_set) +// //.read_miss (read_miss) +// ); + + VX_Cache_Bank bank_structure ( + .clk (clk), + .state (state), + .read_or_write (rd_or_wr), + .valid_in (bank_has_access[bank_id]), + .actual_index (bank_access_addr[bank_id][14:7]), // fix when size changes + .o_tag (bank_access_addr[bank_id][31:15]), // fix when size changes + .block_offset (bank_access_addr[bank_id][6:5]), + .writedata (bank_access_data[bank_id]), + //.fetched_writedata (i_m_readdata[(bank_id+1)*32-1 -: 32]), + .fetched_writedata (i_m_readdata[bank_id[3:0]]), + .readdata (data_from_bank[bank_id]), + .hit (banks_ready[bank_id]), + //.miss (banks_missed[bank_id]), + .eviction_wb (banks_wb_needed[bank_id]), + .eviction_addr (banks_wb_addr[bank_id]), + //.data_evicted (o_m_writedata[(bank_id+1)*32-1 -: 32]) + .data_evicted (o_m_writedata[bank_id[3:0]]) + ); + + end + endgenerate + + //end + +endmodule + + + + + diff --git a/rtl/cache/VX_d_cache_encapsulate.v b/rtl/cache/VX_d_cache_encapsulate.v new file mode 100644 index 00000000..8549ec52 --- /dev/null +++ b/rtl/cache/VX_d_cache_encapsulate.v @@ -0,0 +1,118 @@ + +`include "VX_define.v" + +// `define NUM_WORDS_PER_BLOCK 4 + +module VX_d_cache_encapsulate ( + clk, + rst, + + i_p_initial_request, + i_p_addr, + i_p_writedata, + i_p_read_or_write, + i_p_valid, + + o_p_readdata, + o_p_readdata_valid, + o_p_waitrequest, + + o_m_addr, + o_m_writedata, + o_m_read_or_write, + o_m_valid, + + i_m_readdata, + i_m_ready +); + + parameter NUMBER_BANKS = 8; + + + + + //parameter cache_entry = 9; + input wire clk, rst; + + input wire i_p_valid[`NT_M1:0]; + input wire [31:0] i_p_addr[`NT_M1:0]; + input wire i_p_initial_request; + input wire [31:0] i_p_writedata[`NT_M1:0]; + input wire i_p_read_or_write; + + input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0]; + input wire i_m_ready; + + output reg [31:0] o_p_readdata[`NT_M1:0]; + output reg o_p_readdata_valid[`NT_M1:0] ; + output reg o_p_waitrequest; + + output reg [31:0] o_m_addr; + output reg o_m_valid; + output reg [31:0] o_m_writedata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0]; + output reg o_m_read_or_write; + + + // Inter + wire [`NT_M1:0] i_p_valid_inter; + wire [`NT_M1:0][31:0] i_p_addr_inter; + wire [`NT_M1:0][31:0] i_p_writedata_inter; + + reg [`NT_M1:0][31:0] o_p_readdata_inter; + reg [`NT_M1:0] o_p_readdata_valid_inter; + + reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter; + wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter; + + + genvar curr_thraed; + for (curr_thraed = 0; curr_thraed < `NT; curr_thraed = curr_thraed + 1) begin + assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed]; + assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed]; + assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed]; + assign o_p_readdata[curr_thraed] = o_p_readdata_inter[curr_thraed]; + assign o_p_readdata_valid[curr_thraed] = o_p_readdata_valid_inter[curr_thraed]; + end + + + genvar curr_bank; + genvar curr_word; + for (curr_bank = 0; curr_bank < NUMBER_BANKS; curr_bank = curr_bank + 1) begin + + for (curr_word = 0; curr_word < `NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin + + assign o_m_writedata[curr_bank][curr_word] = o_m_writedata_inter[curr_bank][curr_word]; + assign i_m_readdata_inter[curr_bank][curr_word] = i_m_readdata[curr_bank][curr_word]; + + end + end + +VX_d_cache dcache( + .clk (clk), + .rst (rst), + .i_p_valid (i_p_valid_inter), + .i_p_addr (i_p_addr_inter), + .i_p_initial_request(i_p_initial_request), + .i_p_writedata (i_p_writedata_inter), + .i_p_read_or_write (i_p_read_or_write), + .o_p_readdata (o_p_readdata_inter), + .o_p_readdata_valid (o_p_readdata_valid_inter), + .o_p_waitrequest (o_p_waitrequest), + .o_m_addr (o_m_addr), + .o_m_valid (o_m_valid), + .o_m_writedata (o_m_writedata_inter), + .o_m_read_or_write (o_m_read_or_write), + .i_m_readdata (i_m_readdata_inter), + .i_m_ready (i_m_ready) + ); + + +endmodule + + + + + + + + diff --git a/rtl/cache/VX_d_cache_tb.v b/rtl/cache/VX_d_cache_tb.v new file mode 100644 index 00000000..4f5681c3 --- /dev/null +++ b/rtl/cache/VX_d_cache_tb.v @@ -0,0 +1,58 @@ +`include "VX_define.v" +`include "VX_d_cache.v" + +module VX_d_cache_tb; + + parameter NUMBER_BANKS = 8; + + reg clk, reset, im_ready; + reg [`NT_M1:0] i_p_valid; + reg [`NT_M1:0][13:0] i_p_addr; // FIXME + reg i_p_initial_request; + reg [`NT_M1:0][31:0] i_p_writedata; + reg i_p_read_or_write; //, i_p_write; + reg [`NT_M1:0][31:0] o_p_readdata; + reg [`NT_M1:0] o_p_readdata_valid; + reg o_p_waitrequest; + reg [13:0] o_m_addr; // Only one address is sent out at a time to memory + reg o_m_valid; + reg [(NUMBER_BANKS * 32) - 1:0] o_m_writedata; + reg o_m_read_or_write; //, o_m_write; + reg [(NUMBER_BANKS * 32) - 1:0] i_m_readdata; // Read Data that is passed from the memory module back to the controller + + + VX_d_cache d_cache(.clk(clk), + .rst(reset), + .i_p_initial_request(i_p_initial_request), + .i_p_addr(i_p_addr), + .i_p_writedata(i_p_writedata), + .i_p_read_or_write(i_p_read_or_write), // 0 = Read | 1 = Write + .i_p_valid(i_p_valid), + .o_p_readdata(o_p_readdata), + .o_p_readdata_valid(o_p_readdata_valid), + .o_p_waitrequest(o_p_waitrequest), // 0 = all threads done | 1 = Still threads that need to + .o_m_addr(o_m_addr), + .o_m_writedata(o_m_writedata), + .o_m_read_or_write(o_m_read_or_write), // 0 = Read | 1 = Write + .o_m_valid(o_m_valid), + .i_m_readdata(i_m_readdata), + .i_m_ready(im_ready) + //cnt_r, + //cnt_w, + //cnt_hit_r, + //cnt_hit_w + ); + + + + initial + begin + clk = 0; + reset = 0; + + end + + always + #5 clk = ! clk; + +endmodule \ No newline at end of file diff --git a/rtl/cache/VX_generic_pe.v b/rtl/cache/VX_generic_pe.v new file mode 100644 index 00000000..4ff3cc17 --- /dev/null +++ b/rtl/cache/VX_generic_pe.v @@ -0,0 +1,24 @@ +module VX_generic_pe + #( + parameter N = 8 + ) + ( + input wire[N-1:0] valids, + output reg[$clog2(N)-1:0] index, + output reg found + ); + +parameter my_secret = 0; + + integer i; + always @(*) begin + index = 0; + found = 0; + for (i = N-1; i >= 0; i = i - 1) begin + if (valids[i]) begin + index = i[$clog2(N)-1:0]; + found = 1; + end + end + end +endmodule \ No newline at end of file diff --git a/rtl/cache/bank.v b/rtl/cache/bank.v new file mode 100644 index 00000000..b6ea5939 --- /dev/null +++ b/rtl/cache/bank.v @@ -0,0 +1,201 @@ +`include "VX_define.v" +//`include "cache_set.v" +`include "VX_Cache_Block_DM.v" + +module bank(clk, + rst, + state, + read_or_write, + //index, + //tag, + addr, + writedata, + fetched_write_data, + valid, + readdata, + miss_cache, + w2m_needed, + w2m_addr, + e_data, + //w2m_data, + ready + ); + + //parameter NUMBER_INDEXES = 16; + parameter NUMBER_INDEXES = 64; + + localparam CACHE_IDLE = 0; // Idle + localparam SORT_BY_BANK = 1; // Determines the bank each thread will access + localparam CACHE_ACCESS = 2; // Accesses the bank and checks if it is a hit or miss + localparam FETCH_FROM_MEM = 3; // Send a request to mem looking for read data + localparam FETCH2 = 4; // Stall until memory gets back with the data + localparam UPDATE_CACHE = 5; // Update the cache with the data read from mem + localparam DIRTY_EVICT_GRAB_BLOCK = 6; // Grab the full block of dirty data + localparam DIRTY_EVICT_WB = 7; // Write back this block into memory + localparam WB_FROM_MEM = 8; // Currently unused + + input wire clk, rst; + input wire read_or_write; + input wire [31:0] writedata; + input wire [31:0][31:0] fetched_write_data; + input wire [3:0] state; + //input wire [1:0] tag; + //input wire [3:0] index; + input wire [31:0] addr; + input wire valid; + output wire[NUMBER_INDEXES-1:0] [31:0] readdata; + output wire ready; + //output wire miss_cache; + output reg miss_cache; + output wire [31:0][31:0] e_data; + output wire w2m_needed; + //output reg [31:0] w2m_data; + output reg [31:0] w2m_addr; + + wire [NUMBER_INDEXES-1:0] miss; + //wire [15:0][31:0] e_data; + wire [NUMBER_INDEXES-1:0] e_wb; + wire [NUMBER_INDEXES-1:0][21:0] e_tag; + //wire [3:0] index; + //wire valid_in_set; + //wire read_miss; + //wire modify; + wire hit; + reg [NUMBER_INDEXES-1:0] set_to_access; + reg [NUMBER_INDEXES-1:0] set_find_evict; + reg [NUMBER_INDEXES-1:0] set_idle; + reg [NUMBER_INDEXES-1:0] set_wfm; + //reg [1:0][15:0] way_id_recieved; + //reg [1:0][15:0] way_id_sending; + //reg wb_addr; // Concatination of tag and index for which we will write the data after a memory fetch + + // Do logic about processing before going into the cache set here + + assign miss_cache = (miss != 0); + assign ready = hit && (miss == 0); + //assign set_wfm = + //assign e_tag = miss ? + + //always @(state) begin + //miss_cache = (miss != 0); + //end + + + //always @(state) begin + //for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1) begin + //if (set_to_access == indeces) begin + //if ({28'b0,addr[11:8]} == indeces && state == UPDATE_CACHE && valid) begin + // reset + //set_wfm[indeces] = 1'b1; + //set_find_evict[indeces] = 1'b0; + //set_idle[indeces] = 1'b0; + //set_to_access[indeces] = 1'b0; + //end else if ({28'b0,addr[11:8]} == indeces && state == CACHE_ACCESS && valid) begin + //set_to_access[indeces] = 1'b1; + //set_wfm[indeces] = 1'b0; + //set_idle[indeces] = 1'b0; + //set_find_evict[indeces] = 1'b0; + //end else if ({28'b0,addr[11:8]} == indeces && state == DIRTY_EVICT_GRAB_BLOCK && valid) begin + //set_to_access[indeces] = 1'b0; + //set_wfm[indeces] = 1'b0; + //set_idle[indeces] = 1'b0; + //set_find_evict[indeces] = 1'b1; + //end else begin + //set_find_evict[indeces] = 1'b0; + //set_to_access[indeces] = 1'b0; + //set_idle[indeces] = 1'b1; + //set_wfm[indeces] = 1'b0; + //end + //end + //end + + for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1) begin + assign set_to_access[indeces] = ({28'b0,addr[11:8]} == indeces && state == CACHE_ACCESS && valid) ? 1'b1 : 1'b0; + assign set_find_evict[indeces] = ({28'b0,addr[11:8]} == indeces && state == DIRTY_EVICT_GRAB_BLOCK && valid) ? 1'b1 : 1'b0; + assign set_wfm[indeces] = ({28'b0,addr[11:8]} == indeces && state == UPDATE_CACHE && valid) ? 1'b1 : 1'b0; + assign set_idle[indeces] = (!set_to_access[indeces] && !set_wfm[indeces] && !set_find_evict[indeces]) ? 1'b1 : 1'b0; + end + + + // reg[31:0][31:0] data[NUMBER_INDEXES-1:0]; + + wire[$clog2(NUMBER_INDEXES)-1:0] actual_index; + + assign actual_index = addr[11:8]; + + genvar indeces; + generate + for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1) + begin + VX_Cache_Block_DM set( + .clk (clk), + .rst (rst), + .actual_index (actual_index) + .access (set_to_access[indeces]), + .find_evict (set_find_evict[indeces]), + .write_from_mem (set_wfm[indeces]), + .idle (set_idle[indeces]), + //.entry, + //.o_tag (tag), + .o_tag (addr[31:10]), + .block_offset (addr[9:5]), + .writedata (writedata), + //byte_en, + .write (read_or_write), + .fetched_writedata (fetched_write_data), + //.way_id_in (way_id_sending[indeces]), + //.way_id_out (way_id_recieved[indeces]), + //word_en, + + .readdata (readdata[indeces]), + //.wb_addr, + .hit (hit), + //.modify (modify), + .eviction_wb (e_wb[indeces]), + .eviction_tag (e_tag[indeces]), + //.evicted_data (e_data[indeces]), + .evicted_data (e_data), + .miss (miss[indeces]) + //.valid_data (valid_in_set) + //.read_miss (read_miss) + ); + end + endgenerate + + //always @(e_wb) begin + // for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1) begin + // //if (set_to_access == indeces) begin + // if (e_wb[indeces] == 1'b1) begin + // // reset + // w2m_needed = 1'b1; + // w2m_addr = {e_tag[indeces], addr[11:0]}; // FIXME !!! Need to figure out how to do this (reassemble the address) + // //w2m_data = e_data[indeces]; + // end + // end + //end + + wire[$clog2(NUMBER_INDEXES)-1:0] index_w2m_addr; + wire found_w2m_addr; + VX_generic_pe #(.N(NUMBER_INDEXES)) find_evicted( + .valids(e_wb), + .index(index_w2m_addr), + .found (found_w2m_addr) + ); + + assign w2m_addr = {e_tag[index_w2m_addr], addr[9:0]}; + + + + + assign w2m_needed = (e_wb != 0) ? 1'b1 : 1'b0; + for (indeces = 0; indeces < NUMBER_INDEXES; indeces = indeces + 1) begin + assign set_to_access[indeces] = ({28'b0,addr[11:8]} == indeces && state == CACHE_ACCESS && valid) ? 1'b1 : 1'b0; + end + // Do logic about processing done after going into the cache set here + +endmodule + + + + + diff --git a/rtl/cache/cache_set.v b/rtl/cache/cache_set.v new file mode 100644 index 00000000..5ef4d129 --- /dev/null +++ b/rtl/cache/cache_set.v @@ -0,0 +1,233 @@ +// To Do: Change way_id_out to an internal register which holds when in between access and finished. +// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default +// Also make sure all possible output states are transmitted back to the bank correctly + +`include "VX_define.v" +module cache_set(clk, + rst, + // These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes + access, // First + find_evict, + write_from_mem, + idle, + // entry, + o_tag, + writedata, + //byte_en, + write, + //word_en, + //way_id_in, + //way_id_out, + readdata, + //wb_addr, + hit, + eviction_wb, + eviction_tag, + //eviction_data, + //modify, + miss + //valid_data + //read_miss + ); + + parameter cache_entry = 14; + parameter ways_per_set = 4; + + input wire clk, rst; + input wire access; + input wire find_evict; + input wire write_from_mem; + input wire idle; + //input wire [cache_entry-1:0] entry; + input wire [1:0] o_tag; + input wire [31:0] writedata; + //input wire [3:0] byte_en; + input wire write; // 0 == False + //input wire [3:0] word_en; + //input wire read_miss; + //input wire [1:0] way_id_in; + //output reg [1:0] way_id_out; + output reg [31:0] readdata; + //output reg [3:0] hit; + output reg hit; + output reg miss; + output wire eviction_wb; + output wire [1:0] eviction_tag; + reg [31:0] eviction_data; + //output wire [22:0] wb_addr; + //output wire modify, valid_data; + + + + //wire [2:0] i_tag; + //wire dirty; + //wire [24-cache_entry:0] write_tag_data; + + // Table for one set + reg [2:0] counter; // Determines which to evict + reg valid [ways_per_set-1:0]; + reg [1:0] tag [ways_per_set-1:0]; + reg clean [ways_per_set-1:0]; + reg [31:0] data [ways_per_set-1:0]; + + + assign eviction_wb = miss && clean[counter[1:0]] != 1'b1 && valid[counter[1:0]] == 1'b1; + assign eviction_tag = tag[counter[1:0]]; + //assign eviction_data = data[counter[1:0]]; + //assign hit = valid_data && (o_tag == i_tag); + //assign modify = valid_data && (o_tag != i_tag) && dirty; + //assign miss = !valid_data || ((o_tag != i_tag) && !dirty); + + //assign wb_addr = {i_tag, entry}; + always @(posedge clk) begin + if (rst) begin + + end + if (find_evict) begin + if (tag[0] == o_tag && valid[0]) begin + readdata <= data[0]; + end else if (tag[1] == o_tag && valid[1]) begin + readdata <= data[1]; + end else if (tag[2] == o_tag && valid[2]) begin + readdata <= data[2]; + end else if (tag[3] == o_tag && valid[3]) begin + readdata <= data[3]; + end + end else if (access) begin + //tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12]; + counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC + // Hit in First Column + if (tag[0] == o_tag && valid[0]) begin + if (write == 1'b0) begin // if it is a read + if (clean[0] == 1'b1 ) begin + //hit <= 4'b0001; + hit <= 1'b1; + readdata <= data[0]; + miss <= 1'b0; + end else begin + //hit <= 4'b0000; // SHOULD PROBABLY TRACK WHERE THIS MISS IS IN A DIFFERENT VARIABLE + hit <= 1'b0; + readdata <= 32'b0; + miss <= 1'b1; + end + end else if (write == 1'b1) begin + data[0] <= writedata; + clean[0] <= 1'b0; + //hit <= 4'b0001; + hit <= 1'b1; + end + end + // Hit in Second Column + else if (tag[1] == o_tag && valid[1]) begin + if (write == 1'b0) begin // if it is a read + if (clean[1] == 1'b1 ) begin + //hit <= 4'b0010; + hit <= 1'b1; + readdata <= data[1]; + miss <= 1'b0; + end else begin + //hit <= 4'b0000; + hit <= 1'b0; + readdata <= 32'b0; + miss <= 1'b1; + end + end else if (write == 1'b1) begin + data[1] <= writedata; + clean[1] <= 1'b0; + //hit <= 4'b0010; + hit <= 1'b1; + end + end + // Hit in Third Column + else if (tag[2] == o_tag && valid[2]) begin + if (write == 1'b0) begin // if it is a read + if (clean[2] == 1'b1 ) begin + //hit <= 4'b0100; + hit <= 1'b1; + readdata <= data[2]; + miss <= 1'b0; + end else begin + //hit <= 4'b0000; + hit <= 1'b0; + readdata <= 32'b0; + miss <= 1'b1; + end + end else if (write == 1'b1) begin + data[2] <= writedata; + clean[2] <= 1'b0; + //hit <= 4'b0100; + hit <= 1'b1; + end + end + // Hit in Fourth Column + else if (tag[3] == o_tag && valid[3]) begin + if (write == 1'b0) begin // if it is a read + if (clean[3] == 1'b1 ) begin + //hit <= 4'b1000; + hit <= 1'b1; + readdata <= data[3]; + miss <= 1'b0; + end else begin + //hit <= 4'b0000; + hit <= 1'b0; + readdata <= 32'b0; + miss <= 1'b1; + end + end else if (write == 1'b1) begin + data[3] <= writedata; + clean[3] <= 1'b0; + //hit <= 4'b1000; + hit <= 1'b1; + end + end + // Miss + else begin + //way_id_out <= counter; + miss <= 1'b1; + if (write == 1'b0) begin // Read Miss + clean[counter[1:0]] <= 1'b1; + data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS + end else if (write == 1'b1) begin // Write Miss + clean[counter[1:0]] <= 1'b1; + data[counter[1:0]] <= writedata; + end + end + + end + if (write_from_mem) begin + tag[counter[1:0]] <= o_tag; + valid[counter[1:0]] <= 1'b1; + hit <= 1'b1; + if (write == 1'b0) begin // Read Miss + clean[counter[1:0]] <= 1'b1; + data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS + end else if (write == 1'b1) begin // Write Miss + clean[counter[1:0]] <= 1'b0; + data[counter[1:0]] <= writedata; + end + end + if (idle) begin // Set "way" register equal to invalid value + hit <= 1'b1; // set to know it is ready + miss <= 1'b0; + readdata <= 32'hFFFFFFFF; + end + if (find_evict) begin // Keep "way" value the same !!!! Fix. Need to send back data with matching tag. Also need to ensure evicted data doesnt get lost + if (tag[3] == o_tag && valid[3]) begin + readdata <= data[3]; + end else if (tag[1] == o_tag && valid[1]) begin + readdata <= data[1]; + end else if (tag[2] == o_tag && valid[2]) begin + readdata <= data[2]; + end else if (tag[0] == o_tag && valid[0]) begin + readdata <= data[0]; + end else begin + readdata <= eviction_data; + end + hit <= 1'b1; + miss <= 1'b0; + end + counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC + eviction_data <= data[counter[1:0]]; + end + +endmodule \ No newline at end of file diff --git a/rtl/cache/d_cache_test_bench.cpp b/rtl/cache/d_cache_test_bench.cpp new file mode 100644 index 00000000..e7fb3214 --- /dev/null +++ b/rtl/cache/d_cache_test_bench.cpp @@ -0,0 +1,29 @@ + + +#include "d_cache_test_bench.h" + +//#define NUM_TESTS 46 + +int main(int argc, char **argv) +{ + + Verilated::commandArgs(argc, argv); + + Verilated::traceEverOn(true); + + + VX_d_cache v; + + + bool curr = v.simulate(); + //if ( curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl; + //if (!curr) std::cerr << RED << "Test Failed: " << testing << std::endl; + if ( curr) std::cerr << GREEN << "Test Passed: " << std::endl; + if (!curr) std::cerr << RED << "Test Failed: " << std::endl; + + return 0; + +} + + + diff --git a/rtl/cache/d_cache_test_bench.h b/rtl/cache/d_cache_test_bench.h new file mode 100644 index 00000000..112aeb9a --- /dev/null +++ b/rtl/cache/d_cache_test_bench.h @@ -0,0 +1,355 @@ +// C++ libraries +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "VX_define.h" +#include "VVX_d_cache_encapsulate.h" +#include "verilated.h" + +#include "d_cache_test_bench_debug.h" + + +#ifdef VCD_OUTPUT +#include +#endif + +// void set_Index (auto & var, int index, int size, auto val) +// { +// int real_shift +// } + +class VX_d_cache +{ + public: + VX_d_cache(); + ~VX_d_cache(); + bool simulate(); + bool operation(int, bool); + + VVX_d_cache_encapsulate * vx_d_cache_; + long int curr_cycle; + int stats_total_cycles = 0; + int stats_dram_accesses = 0; + #ifdef VCD_OUTPUT + VerilatedVcdC *m_trace; + #endif +}; + + + +VX_d_cache::VX_d_cache() : curr_cycle(0), stats_total_cycles(0), stats_dram_accesses(0) +{ + + this->vx_d_cache_ = new VVX_d_cache_encapsulate; + #ifdef VCD_OUTPUT + this->m_trace = new VerilatedVcdC; + this->vx_d_cache_->trace(m_trace, 99); + this->m_trace->open("trace.vcd"); + #endif + //this->results.open("../results.txt"); +} + +VX_d_cache::~VX_d_cache() +{ + + + delete this->vx_d_cache_; + #ifdef VCD_OUTPUT + m_trace->close(); + #endif +} + +bool VX_d_cache::operation(int counter_value, bool do_op) { + if (do_op) { + vx_d_cache_->i_p_initial_request = 1; + } else { + vx_d_cache_->i_p_initial_request = 0; + } + + if (counter_value == 0 && do_op) { // Write to bank 1-4 at index 64 + vx_d_cache_->i_p_initial_request = 1; + vx_d_cache_->i_p_read_or_write = 1; + vx_d_cache_->i_m_ready = 0; + for (int j = 0; j < NT; j++) { + vx_d_cache_->i_p_valid[j] = 1; + vx_d_cache_->i_p_writedata[j] = 0x7f6f8f6f; + vx_d_cache_->i_m_readdata[j][0] = 1; + if (j == 0) { + vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1 + } else if (j == 1) { + vx_d_cache_->i_p_addr[1] = 0x30001008; // bank 2 + } else if (j == 2) { + vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3 + } else { + vx_d_cache_->i_p_addr[3] = 0x30010010; // bank 4 -- This is serviced 1st, then the other 3 banks are at once + } + } + + } else if (counter_value == 1 && do_op) { // Write to bank 4-7 at index 108 + vx_d_cache_->i_p_initial_request = 1; + vx_d_cache_->i_p_read_or_write = 1; + vx_d_cache_->i_m_ready = 0; + for (int j = 0; j < NT; j++) { + vx_d_cache_->i_p_valid[j] = 1; + vx_d_cache_->i_p_writedata[j] = 0xd1d2d2d3; + vx_d_cache_->i_m_readdata[j][0] = 1; + if (j == 0) { + vx_d_cache_->i_p_addr[0] = 0x30001c14; // bank 5 + } else if (j == 1) { + vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 6 + } else if (j == 2) { + vx_d_cache_->i_p_addr[2] = 0x30001c1c; // bank 7 + } else { + vx_d_cache_->i_p_addr[3] = 0x30001c10; // bank 4 + } + } + + } else if (counter_value == 2 && do_op) { // Read from bank 1-4 at those indexes + for (int j = 0; j < NT; j++) { + vx_d_cache_->i_p_initial_request = 1; + vx_d_cache_->i_p_read_or_write = 0; + vx_d_cache_->i_m_ready = 0; + for (int j = 0; j < NT; j++) { + vx_d_cache_->i_p_valid[j] = 1; + vx_d_cache_->i_p_writedata[j] = 0x23232332; + vx_d_cache_->i_m_readdata[j][0] = 1; + if (j == 0) { + vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1 + } else if (j == 1) { + vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 5 + } else if (j == 2) { + vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3 + } else { + vx_d_cache_->i_p_addr[3] = 0x30001c1c;; // bank 7 + } + } + } + } else if (counter_value == 3 && do_op) { // Write to Bank 1-5 (evictions will need to take place) + vx_d_cache_->i_p_initial_request = 1; + vx_d_cache_->i_p_read_or_write = 1; + vx_d_cache_->i_m_ready = 0; + for (int j = 0; j < NT; j++) { + vx_d_cache_->i_p_valid[j] = 1; + vx_d_cache_->i_m_readdata[j][0] = 1; + if (j == 0) { + vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1 + vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0; + } else if (j == 1) { + vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2 + vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1; + } else if (j == 2) { + vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3 + vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2; + } else { + vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5 + vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3; + } + } + } else if (counter_value == 4 && do_op) { // Read from addresses that were just overwritten above ^^^ + vx_d_cache_->i_p_initial_request = 1; + vx_d_cache_->i_p_read_or_write = 0; + vx_d_cache_->i_m_ready = 0; + for (int j = 0; j < NT; j++) { + vx_d_cache_->i_p_valid[j] = 1; + vx_d_cache_->i_p_writedata[j] = 0x23232332; + vx_d_cache_->i_m_readdata[j][0] = 1; + if (j == 0) { + vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1 + } else if (j == 1) { + vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2 + } else if (j == 2) { + vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3 + } else { + vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5 + } + } + } + /* These will check writing multiple threads writing to the same block + } else if (counter_value == 3 && do_op) { // Write to Bank 0 + vx_d_cache_->i_p_initial_request = 1; + vx_d_cache_->i_p_read_or_write = 1; + vx_d_cache_->i_m_ready = 0; + for (int j = 0; j < NT; j++) { + vx_d_cache_->i_p_valid[j] = 1; + vx_d_cache_->i_m_readdata[j][0] = 1; + if (j == 0) { + vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0 + vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0; + } else if (j == 1) { + vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0 + vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1; + } else if (j == 2) { + vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0 + vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2; + } else { + vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1 + vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3; + } + } + } else if (counter_value == 4 && do_op) { // Read from Bank 0 + vx_d_cache_->i_p_initial_request = 1; + vx_d_cache_->i_p_read_or_write = 0; + vx_d_cache_->i_m_ready = 0; + for (int j = 0; j < NT; j++) { + vx_d_cache_->i_p_valid[j] = 1; + vx_d_cache_->i_p_writedata[j] = 0x23232332; + vx_d_cache_->i_m_readdata[j][0] = 1; + if (j == 0) { + vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0 + } else if (j == 1) { + vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0 + } else if (j == 2) { + vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0 + } else { + vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1 + } + } + } + */ + // Handle Memory Accesses + unsigned int read_data_from_mem = 0x1111 + counter_value + this->stats_total_cycles; + + if (vx_d_cache_->o_m_valid) { + this->stats_dram_accesses = this->stats_dram_accesses + 1; // (assuming memory access takes 20 cycles) + + this->stats_total_cycles += 1; + vx_d_cache_->clk = 0; + vx_d_cache_->eval(); + #ifdef VCD_OUTPUT + m_trace->dump(2*this->stats_total_cycles); + #endif + vx_d_cache_->clk = 1; + vx_d_cache_->eval(); + #ifdef VCD_OUTPUT + m_trace->dump((2*this->stats_total_cycles)+1); + #endif + + vx_d_cache_->i_m_ready = 1; + for (int j1 = 0; j1 < 8; j1++) { + for (int j2 = 0; j2 < 4; j2++) { + vx_d_cache_->i_m_readdata[j1][j2] = read_data_from_mem; + } + } + } else { + vx_d_cache_->i_m_ready = 0; + } + + + if (vx_d_cache_->o_p_waitrequest == 0) { + return true; + } else { + return false; + } + + +} + + +bool VX_d_cache::simulate() +{ + +// this->instruction_file_name = file_to_simulate; + // this->results << "\n****************\t" << file_to_simulate << "\t****************\n"; + +// this->ProcessFile(); + + // auto start_time = std::chrono::high_resolution_clock::now(); + + + //static bool stop = false; + //static int counter = 0; + //counter = 0; + //stop = false; + + // auto start_time = clock(); + + + vx_d_cache_->clk = 0; + vx_d_cache_->rst = 1; + //vortex->eval(); + //counter = 0; + vx_d_cache_->rst = 0; + + bool cont = false; + bool out_operation = false; + bool do_operation = true; + int other_counter = 0; + //while (this->stop && ((other_counter < 5))) + while (other_counter < 5) + { + + // std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n"; + // istop = ibus_driver(); + // dstop = !dbus_driver(); + + vx_d_cache_->clk = 1; + vx_d_cache_->eval(); + #ifdef VCD_OUTPUT + m_trace->dump(2*this->stats_total_cycles); + #endif + + //vortex->eval(); + //dstop = !dbus_driver(); + + out_operation = operation(other_counter, do_operation); + vx_d_cache_->clk = 0; + vx_d_cache_->eval(); + #ifdef VCD_OUTPUT + m_trace->dump((2*this->stats_total_cycles)+1); + #endif + //vortex->eval(); + + /* + // stop = istop && dstop; + stop = vortex->out_ebreak; + if (stop || cont) + { + cont = true; + counter++; + } else + { + counter = 0; + } + */ + if (out_operation) { + other_counter++; + do_operation = true; + } else { + do_operation = false; + } + ++(this->stats_total_cycles); + + if (this->stats_total_cycles > 5000) { + break; + } + + } + + std::cerr << "New Total Cycles: " << (this->stats_total_cycles + (this->stats_dram_accesses * 20)) << "\n"; + + //uint32_t status; + //ram.getWord(0, &status); + + //this->print_stats(); + + + + return (true); +} + + + + + + + + + diff --git a/rtl/cache/d_cache_test_bench_debug.h b/rtl/cache/d_cache_test_bench_debug.h new file mode 100644 index 00000000..54afa11a --- /dev/null +++ b/rtl/cache/d_cache_test_bench_debug.h @@ -0,0 +1 @@ +#define VCD_OUTPUT \ No newline at end of file diff --git a/rtl/interfaces/VX_dram_req_rsp_inter.v b/rtl/interfaces/VX_dram_req_rsp_inter.v new file mode 100644 index 00000000..c415cc12 --- /dev/null +++ b/rtl/interfaces/VX_dram_req_rsp_inter.v @@ -0,0 +1,24 @@ + +`include "../VX_define.v" + +`ifndef VX_DRAM_REQ_RSP_INTER + +`define VX_DRAM_REQ_RSP_INTER + +interface VX_dram_req_rsp_inter (); + + // Req + wire [31:0] o_m_addr; + wire o_m_valid; + wire[`NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata; + wire o_m_read_or_write; + + // Rsp + wire[`NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata; + wire i_m_ready; + + +endinterface + + +`endif \ No newline at end of file diff --git a/rtl/shared_memory/VX_shared_memory_block.v b/rtl/shared_memory/VX_shared_memory_block.v index 44688b73..1ff1e0ff 100644 --- a/rtl/shared_memory/VX_shared_memory_block.v +++ b/rtl/shared_memory/VX_shared_memory_block.v @@ -33,10 +33,10 @@ module VX_shared_memory_block ( // wire[3:0][31:0] write_bit_mask; - // assign write_bit_mask[0] = (we == 2'b00) ? 0 : {32{1'b1}}; - // assign write_bit_mask[1] = (we == 2'b01) ? 0 : {32{1'b1}}; - // assign write_bit_mask[2] = (we == 2'b10) ? 0 : {32{1'b1}}; - // assign write_bit_mask[3] = (we == 2'b11) ? 0 : {32{1'b1}}; + // assign write_bit_mask[0] = (we == 2'b00) ? 1 : {32{1'b0}}; + // assign write_bit_mask[1] = (we == 2'b01) ? 1 : {32{1'b0}}; + // assign write_bit_mask[2] = (we == 2'b10) ? 1 : {32{1'b0}}; + // assign write_bit_mask[3] = (we == 2'b11) ? 1 : {32{1'b0}}; // // Using ASIC MEM // /* verilator lint_off PINCONNECTEMPTY */ diff --git a/syn/syn.tcl b/syn/syn.tcl index f5c86b33..ddd351e2 100755 --- a/syn/syn.tcl +++ b/syn/syn.tcl @@ -3,7 +3,7 @@ set link_library [concat * sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_ set symbol_library {} set target_library [concat sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_m40c.db] -set verilog_files [ list VX_bank_valids.v VX_priority_encoder_sm.v VX_set_bit.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v Vortex.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \ +set verilog_files [ list VX_dram_req_rsp_inter.v bank.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_set_bit.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v Vortex.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \ ] analyze -format sverilog $verilog_files