diff --git a/rtl/VX_define.v b/rtl/VX_define.v index 281ce7d0..d16cc89d 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -110,29 +110,90 @@ `define ZERO_REG 5'h0 +`define CLOG2(x) \ + (x <= 2) ? 1 : \ + (x <= 4) ? 2 : \ + (x <= 8) ? 3 : \ + (x <= 16) ? 4 : \ + (x <= 32) ? 5 : \ + (x <= 64) ? 6 : \ + (x <= 128) ? 7 : \ + (x <= 256) ? 8 : \ + (x <= 512) ? 9 : \ + (x <= 1024) ? 10 : \ + -199 // `define PARAM //Cache configurations -//Bytes -`define DCACHE_SIZE 4096 +//Cache configurations +`define ICACHE_SIZE 4096 //Bytes +`ifdef SYN +`define ICACHE_WAYS 1 +`else +`define ICACHE_WAYS 2 +`endif +`define ICACHE_BLOCK 128 //Bytes +`define ICACHE_BANKS 1 +`define ICACHE_LOG_NUM_BANKS `CLOG2(`ICACHE_BANKS) +`define ICACHE_NUM_WORDS_PER_BLOCK 16 +`define ICACHE_NUM_REQ 1 +`define ICACHE_LOG_NUM_REQ `CLOG2(`ICACHE_NUM_REQ) + +`define ICACHE_WAY_INDEX `CLOG2(`ICACHE_WAYS) //set this to 1 if CACHE_WAYS is 1 +//`define ICACHE_WAY_INDEX 1 +`define ICACHE_BLOCK_PER_BANK (`ICACHE_BLOCK / `ICACHE_BANKS) + +// Offset +`define ICACHE_OFFSET_NB (`CLOG2(`ICACHE_NUM_WORDS_PER_BLOCK)) + +`define ICACHE_ADDR_OFFSET_ST (2+$clog2(`ICACHE_BANKS)) +`define ICACHE_ADDR_OFFSET_ED (`ICACHE_ADDR_OFFSET_ST+(`ICACHE_OFFSET_NB)-1) + + +`define ICACHE_ADDR_OFFSET_RNG `ICACHE_ADDR_OFFSET_ED:`ICACHE_ADDR_OFFSET_ST +`define ICACHE_OFFSET_SIZE_RNG (`CLOG2(`ICACHE_NUM_WORDS_PER_BLOCK)-1):0 +`define ICACHE_OFFSET_ST 0 +`define ICACHE_OFFSET_ED ($clog2(`ICACHE_NUM_WORDS_PER_BLOCK)-1) + +// Index +`define ICACHE_NUM_IND (`ICACHE_SIZE / (`ICACHE_WAYS * `ICACHE_BLOCK_PER_BANK)) +`define ICACHE_IND_NB (`CLOG2(`ICACHE_NUM_IND)) + +`define ICACHE_IND_ST (`ICACHE_ADDR_OFFSET_ED+1) +`define ICACHE_IND_ED (`ICACHE_IND_ST+`ICACHE_IND_NB-1) + +`define ICACHE_ADDR_IND_RNG `ICACHE_IND_ED:`ICACHE_IND_ST +`define ICACHE_IND_SIZE_RNG `ICACHE_IND_NB-1:0 + +`define ICACHE_IND_SIZE_START 0 +`define ICACHE_IND_SIZE_END `ICACHE_IND_NB-1 + + +// Tag +`define ICACHE_ADDR_TAG_RNG 31:(`ICACHE_IND_ED+1) +`define ICACHE_TAG_SIZE_RNG (32-(`ICACHE_IND_ED+1)-1):0 +`define ICACHE_TAG_SIZE_START 0 +`define ICACHE_TAG_SIZE_END (32-(`ICACHE_IND_ED+1)-1) +`define ICACHE_ADDR_TAG_START (`ICACHE_IND_ED+1) +`define ICACHE_ADDR_TAG_END 31 + +//Cache configurations +`define DCACHE_SIZE 4096 //Bytes `ifdef SYN `define DCACHE_WAYS 1 `else -`define DCACHE_WAYS 2 +`define DCACHE_WAYS 4 `endif - -//Bytes -`define DCACHE_BLOCK 128 -`define DCACHE_BANKS 8 +`define DCACHE_BLOCK 128 //Bytes +`define DCACHE_BANKS 4 `define DCACHE_LOG_NUM_BANKS $clog2(`DCACHE_BANKS) `define DCACHE_NUM_WORDS_PER_BLOCK 4 `define DCACHE_NUM_REQ `NT `define DCACHE_LOG_NUM_REQ $clog2(`DCACHE_NUM_REQ) -//set this to 1 if CACHE_WAYS is 1 -`define DCACHE_WAY_INDEX $clog2(`DCACHE_WAYS) +`define DCACHE_WAY_INDEX $clog2(`DCACHE_WAYS) //set this to 1 if CACHE_WAYS is 1 //`define DCACHE_WAY_INDEX 1 `define DCACHE_BLOCK_PER_BANK (`DCACHE_BLOCK / `DCACHE_BANKS) @@ -159,7 +220,7 @@ `define DCACHE_IND_SIZE_RNG `DCACHE_IND_NB-1:0 `define DCACHE_IND_SIZE_START 0 -`define DCACHE_IND_SIZE_END `DCACHE_IND_NB-1 +`define DCACHE_IND_SIZE_END `DCACHE_IND_NB-1 // Tag @@ -170,8 +231,8 @@ `define DCACHE_ADDR_TAG_START (`DCACHE_IND_ED+1) `define DCACHE_ADDR_TAG_END 31 - // Mask `define DCACHE_MEM_REQ_ADDR_MASK (32'hffffffff - (`DCACHE_BLOCK-1)) +`define ICACHE_MEM_REQ_ADDR_MASK (32'hffffffff - (`ICACHE_BLOCK-1)) diff --git a/rtl/VX_dmem_controller.v b/rtl/VX_dmem_controller.v index 408b693b..780d6406 100644 --- a/rtl/VX_dmem_controller.v +++ b/rtl/VX_dmem_controller.v @@ -6,7 +6,10 @@ module VX_dmem_controller ( input wire reset, // MEM-RAM VX_dram_req_rsp_inter VX_dram_req_rsp, + VX_dram_req_rsp_inter VX_dram_req_rsp_icache, // MEM-Processor + VX_icache_request_inter VX_icache_req, + VX_icache_response_inter VX_icache_rsp, VX_dcache_request_inter VX_dcache_req, VX_dcache_response_inter VX_dcache_rsp ); @@ -34,6 +37,17 @@ module VX_dmem_controller ( wire cache_delay; + // I_Cache Signals + + wire[31:0] icache_instruction_out; + wire icache_delay; + wire icache_driver_in_valid = VX_icache_req.out_cache_driver_in_valid; + wire[31:0] icache_driver_in_address = VX_icache_req.pc_address; + wire[2:0] icache_driver_in_mem_read = !(|icache_driver_in_valid) ? `NO_MEM_READ : VX_icache_req.out_cache_driver_in_mem_read; + wire[2:0] icache_driver_in_mem_write = !(|icache_driver_in_valid) ? `NO_MEM_WRITE : VX_icache_req.out_cache_driver_in_mem_write; + wire[31:0] icache_driver_in_data = VX_icache_req.out_cache_driver_in_data; + wire read_or_write_ic = (VX_icache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|icache_driver_in_valid); + wire valid_read_cache = !cache_delay && cache_driver_in_valid[0]; @@ -98,8 +112,56 @@ module VX_dmem_controller ( ); +VX_d_cache#( + .CACHE_SIZE (`ICACHE_SIZE), + .CACHE_WAYS (`ICACHE_WAYS), + .CACHE_BLOCK (`ICACHE_BLOCK), + .CACHE_BANKS (`ICACHE_BANKS), + .LOG_NUM_BANKS (`ICACHE_LOG_NUM_BANKS), + .NUM_REQ (`ICACHE_NUM_REQ), + .LOG_NUM_REQ (`ICACHE_LOG_NUM_REQ), + .NUM_IND (`ICACHE_NUM_IND), + .CACHE_WAY_INDEX (`ICACHE_WAY_INDEX), + .NUM_WORDS_PER_BLOCK (`ICACHE_NUM_WORDS_PER_BLOCK), + .OFFSET_SIZE_START (`ICACHE_OFFSET_ST), + .OFFSET_SIZE_END (`ICACHE_OFFSET_ED), + .TAG_SIZE_START (`ICACHE_TAG_SIZE_START), + .TAG_SIZE_END (`ICACHE_TAG_SIZE_END), + .IND_SIZE_START (`ICACHE_IND_SIZE_START), + .IND_SIZE_END (`ICACHE_IND_SIZE_END), + .ADDR_TAG_START (`ICACHE_ADDR_TAG_START), + .ADDR_TAG_END (`ICACHE_ADDR_TAG_END), + .ADDR_OFFSET_START (`ICACHE_ADDR_OFFSET_ST), + .ADDR_OFFSET_END (`ICACHE_ADDR_OFFSET_ED), + .ADDR_IND_START (`ICACHE_IND_ST), + .ADDR_IND_END (`ICACHE_IND_ED), + .MEM_ADDR_REQ_MASK (`ICACHE_MEM_REQ_ADDR_MASK) + ) icache + ( + .clk (clk), + .rst (reset), + .i_p_valid (icache_driver_in_valid), + .i_p_addr (icache_driver_in_address), + .i_p_writedata (icache_driver_in_data), + .i_p_read_or_write (read_or_write_ic), + .i_p_mem_read (icache_driver_in_mem_read), + .i_p_mem_write (icache_driver_in_mem_write), + .o_p_readdata (icache_instruction_out), + .o_p_delay (icache_delay), + .o_m_evict_addr (VX_dram_req_rsp_icache.o_m_evict_addr), + .o_m_read_addr (VX_dram_req_rsp_icache.o_m_read_addr), + .o_m_valid (VX_dram_req_rsp_icache.o_m_valid), + .o_m_writedata (VX_dram_req_rsp_icache.o_m_writedata), + .o_m_read_or_write (VX_dram_req_rsp_icache.o_m_read_or_write), + .i_m_readdata (VX_dram_req_rsp_icache.i_m_readdata), + .i_m_ready (VX_dram_req_rsp_icache.i_m_ready) + ); + assign VX_dcache_rsp.in_cache_driver_out_data = to_shm ? sm_driver_out_data : cache_driver_out_data; assign VX_dcache_rsp.delay = sm_delay || cache_delay; + assign VX_icache_rsp.instruction = icache_instruction_out; + assign VX_icache_rsp.delay = icache_delay; + endmodule \ No newline at end of file diff --git a/rtl/VX_fetch.v b/rtl/VX_fetch.v index 5c101c6f..b535394d 100644 --- a/rtl/VX_fetch.v +++ b/rtl/VX_fetch.v @@ -21,7 +21,7 @@ module VX_fetch ( wire pipe_stall; - assign pipe_stall = schedule_delay; + assign pipe_stall = schedule_delay || icache_response.delay; wire[`NT_M1:0] thread_mask; wire[`NW_M1:0] warp_num; @@ -85,7 +85,12 @@ module VX_fetch ( // $display("Inside verilog instr: %h, pc: %h", icache_response.instruction, warp_pc); // end - assign icache_request.pc_address = warp_pc; + assign icache_request.pc_address = warp_pc; + assign icache_request.out_cache_driver_in_valid = !schedule_delay; + assign icache_request.out_cache_driver_in_mem_read = `LW_MEM_READ; + assign icache_request.out_cache_driver_in_mem_write = `NO_MEM_WRITE; + assign icache_request.out_cache_driver_in_data = 32'b0; + assign fe_inst_meta_fd.warp_num = warp_num; assign fe_inst_meta_fd.valid = thread_mask; diff --git a/rtl/VX_generic_priority_encoder.v b/rtl/VX_generic_priority_encoder.v index 3e3bad86..6bef1a4f 100644 --- a/rtl/VX_generic_priority_encoder.v +++ b/rtl/VX_generic_priority_encoder.v @@ -1,10 +1,14 @@ +`include "../VX_define.v" + module VX_generic_priority_encoder #( parameter N = 1 ) ( input wire[N-1:0] valids, - output reg[$clog2(N)-1:0] index, + //output reg[$clog2(N)-1:0] index, + output reg[(`CLOG2(N))-1:0] index, + //output reg[`CLOG2(N):0] index, // eh output reg found ); @@ -14,7 +18,8 @@ module VX_generic_priority_encoder found = 0; for (i = N-1; i >= 0; i = i - 1) begin if (valids[i]) begin - index = i[$clog2(N)-1:0]; + //index = i[$clog2(N)-1:0]; + index = i[(`CLOG2(N))-1:0]; found = 1; end end diff --git a/rtl/VX_priority_encoder_w_mask.v b/rtl/VX_priority_encoder_w_mask.v index 0fc18dde..fcd9d865 100644 --- a/rtl/VX_priority_encoder_w_mask.v +++ b/rtl/VX_priority_encoder_w_mask.v @@ -1,3 +1,4 @@ +`include "../VX_define.v" module VX_priority_encoder_w_mask #( parameter N = 10 @@ -5,7 +6,9 @@ module VX_priority_encoder_w_mask ( input wire[N-1:0] valids, output reg [N-1:0] mask, - output reg[$clog2(N)-1:0] index, + //output reg[$clog2(N)-1:0] index, + output reg[(`CLOG2(N))-1:0] index, + //output reg[`CLOG2(N):0] index, // eh output reg found ); @@ -16,7 +19,8 @@ module VX_priority_encoder_w_mask // mask = 0; for (i = 0; i < N; i=i+1) begin if (valids[i]) begin - index = i[$clog2(N)-1:0]; + //index = i[$clog2(N)-1:0]; + index = i[(`CLOG2(N))-1:0]; found = 1; // mask[index] = (1 << i); // $display("%h",(1 << i)); diff --git a/rtl/Vortex.v b/rtl/Vortex.v index d4c26aec..74e79036 100644 --- a/rtl/Vortex.v +++ b/rtl/Vortex.v @@ -18,17 +18,29 @@ module Vortex // IO output wire io_valid, output wire[31:0] io_data, - // Req - output reg [31:0] o_m_read_addr, - output reg [31:0] o_m_evict_addr, - output reg o_m_valid, - output reg [31:0] o_m_writedata[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0], - output reg o_m_read_or_write, - // Rsp - input wire [31:0] i_m_readdata[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0], - input wire i_m_ready, - output wire out_ebreak + // Req D Mem + output reg [31:0] o_m_read_addr_d, + output reg [31:0] o_m_evict_addr_d, + output reg o_m_valid_d, + output reg [31:0] o_m_writedata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0], + output reg o_m_read_or_write_d, + + // Rsp D Mem + input wire [31:0] i_m_readdata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0], + input wire i_m_ready_d, + + // Req I Mem + output reg [31:0] o_m_read_addr_i, + output reg [31:0] o_m_evict_addr_i, + output reg o_m_valid_i, + output reg [31:0] o_m_writedata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0], + output reg o_m_read_or_write_i, + + // Rsp I Mem + input wire [31:0] i_m_readdata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0], + input wire i_m_ready_i, + output wire out_ebreak ); @@ -49,34 +61,85 @@ assign io_data = temp_io_data; VX_dram_req_rsp_inter #( - .NUMBER_BANKS(`DCACHE_BANKS), - .NUM_WORDS_PER_BLOCK(`DCACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp(); + .NUMBER_BANKS(`DCACHE_BANKS), + .NUM_WORDS_PER_BLOCK(`DCACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp(); -assign o_m_read_addr = VX_dram_req_rsp.o_m_read_addr; -assign o_m_evict_addr = VX_dram_req_rsp.o_m_evict_addr; -assign o_m_valid = VX_dram_req_rsp.o_m_valid; -assign o_m_read_or_write = VX_dram_req_rsp.o_m_read_or_write; + VX_icache_response_inter icache_response_fe(); + VX_icache_request_inter icache_request_fe(); + VX_dram_req_rsp_inter #( + .NUMBER_BANKS(`ICACHE_BANKS), + .NUM_WORDS_PER_BLOCK(`ICACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp_icache(); -assign VX_dram_req_rsp.i_m_ready = i_m_ready; + //assign icache_response_fe.instruction = icache_response_instruction; + assign icache_request_pc_address = icache_request_fe.pc_address; + + // Need to fix this so that it is only 1 set of outputs + // o_m Values + + // L2 Cache + /* + assign VX_L2cache_req.out_cache_driver_in_valid = VX_dram_req_rsp.o_m_valid || VX_dram_req_rsp_icache.o_m_valid; // Ask about this (width) + // Ask about the adress + assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? icache_request_fe.pc_address: VX_dcache_req.out_cache_driver_in_address; + //assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_read_addr: VX_dram_req_rsp.o_m_read_addr; + //assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_evict_addr : VX_dram_req_rsp.o_m_evict_addr; + assign VX_L2cache_req.out_cache_driver_in_mem_read = (VX_dram_req_rsp_icache.o_m_valid) ? (VX_dram_req_rsp_icache.o_m_read_or_write ? icache_request_fe.out_cache_driver_in_mem_write : icache_request_fe.out_cache_driver_in_mem_read) + : (VX_dram_req_rsp.o_m_read_or_write ? VX_dcache_req.out_cache_driver_in_mem_write : VX_dcache_req.out_cache_driver_in_mem_read); + //assign VX_dram_req_rsp.i_m_ready = i_m_ready && !VX_dram_req_rsp_icache.o_m_valid && VX_dram_req_rsp.o_m_valid; + //assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready && VX_dram_req_rsp_icache.o_m_valid; + genvar cur_bank; + genvar cur_word; + for (cur_bank = 0; cur_bank < CACHE_BANKS; cur_bank = cur_bank + 1) begin + for (cur_word = 0; cur_word < NUM_WORDS_PER_BLOCK; cur_word = cur_word + 1) begin + assign VX_L2cache_req.out_cache_driver_in_data[cur_bank][cur_word] = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_writedata[cur_bank][cur_word] + : VX_dram_req_rsp.o_m_writedata[cur_bank][cur_word]; + assign VX_dram_req_rsp.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data + assign VX_dram_req_rsp_icache.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data + end + end + */ + + + assign o_m_valid_i = VX_dram_req_rsp_icache.o_m_valid; + assign o_m_valid_d = VX_dram_req_rsp.o_m_valid; + assign o_m_read_addr_i = VX_dram_req_rsp_icache.o_m_read_addr; + assign o_m_read_addr_d = VX_dram_req_rsp.o_m_read_addr; + assign o_m_evict_addr_i = VX_dram_req_rsp_icache.o_m_evict_addr; + assign o_m_evict_addr_d = VX_dram_req_rsp.o_m_evict_addr; + assign o_m_read_or_write_i = VX_dram_req_rsp_icache.o_m_read_or_write; + assign o_m_read_or_write_d = VX_dram_req_rsp.o_m_read_or_write; + assign VX_dram_req_rsp.i_m_ready = i_m_ready_d; + assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready_i; + genvar curr_bank; + genvar curr_word; + /* + for (curr_bank = 0; curr_bank < CACHE_BANKS; curr_bank = curr_bank + 1) begin + for (curr_word = 0; curr_word < NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin + assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word]; + assign o_m_writedata_d[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word]; + assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata_d[curr_bank][curr_word]; // fixed + assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed + end + end + */ -genvar curr_bank; -genvar curr_word; for (curr_bank = 0; curr_bank < `DCACHE_BANKS; curr_bank = curr_bank + 1) begin - for (curr_word = 0; curr_word < `DCACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin - assign o_m_writedata[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word]; - assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata[curr_bank][curr_word]; + + assign o_m_writedata_d[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word]; + assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata_d[curr_bank][curr_word]; // fixed end end -// Icache Interface -VX_icache_response_inter icache_response_fe(); -VX_icache_request_inter icache_request_fe(); +for (curr_bank = 0; curr_bank < `ICACHE_BANKS; curr_bank = curr_bank + 1) begin + for (curr_word = 0; curr_word < `ICACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin + assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word]; + assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed + end +end -assign icache_response_fe.instruction = icache_response_instruction; -assign icache_request_pc_address = icache_request_fe.pc_address; ///////////////////////////////////////////////////////////////////////// @@ -137,11 +200,14 @@ VX_back_end vx_back_end( VX_dmem_controller VX_dmem_controller( - .clk (clk), - .reset (reset), - .VX_dram_req_rsp(VX_dram_req_rsp), - .VX_dcache_req (VX_dcache_req), - .VX_dcache_rsp (VX_dcache_rsp) + .clk (clk), + .reset (reset), + .VX_dram_req_rsp (VX_dram_req_rsp), + .VX_dram_req_rsp_icache (VX_dram_req_rsp_icache), + .VX_icache_req (icache_request_fe), + .VX_icache_rsp (icache_response_fe), + .VX_dcache_req (VX_dcache_req), + .VX_dcache_rsp (VX_dcache_rsp) ); // VX_csr_handler vx_csr_handler( // .clk (clk),