Added Core Interface

This commit is contained in:
felsabbagh3
2020-03-03 22:14:56 -08:00
parent 58db00f555
commit 01ae6ffafe
8 changed files with 228 additions and 130 deletions

View File

@@ -16,8 +16,8 @@ module VX_back_end (
VX_warp_ctl_inter VX_warp_ctl,
VX_dcache_response_inter VX_dcache_rsp,
VX_dcache_request_inter VX_dcache_req
VX_gpu_dcache_res_inter VX_dcache_rsp,
VX_gpu_dcache_req_inter VX_dcache_req
);

View File

@@ -10,49 +10,37 @@ module VX_dmem_controller (
// MEM-Processor
VX_icache_request_inter VX_icache_req,
VX_icache_response_inter VX_icache_rsp,
VX_dcache_request_inter VX_dcache_req,
VX_dcache_response_inter VX_dcache_rsp
VX_gpu_dcache_req_inter VX_dcache_req,
VX_gpu_dcache_res_inter VX_dcache_rsp
);
wire to_shm = VX_dcache_req.out_cache_driver_in_address[0][31:24] == 8'hFF;
wire to_shm = VX_dcache_req.core_req_addr[0][31:24] == 8'hFF;
wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{to_shm}};
wire[`NT_M1:0] cache_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{~to_shm}};
wire read_or_write = (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|cache_driver_in_valid);
wire[`NT_M1:0] cache_driver_in_valid = VX_dcache_req.core_req_valid & {`NT{~to_shm}};
wire[`NT_M1:0][31:0] cache_driver_in_address = VX_dcache_req.out_cache_driver_in_address;
wire[2:0] cache_driver_in_mem_read = !(|cache_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.out_cache_driver_in_mem_read;
wire[2:0] cache_driver_in_mem_write = !(|cache_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.out_cache_driver_in_mem_write;
wire[`NT_M1:0][31:0] cache_driver_in_data = VX_dcache_req.out_cache_driver_in_data;
wire[2:0] sm_driver_in_mem_read = !(|sm_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.out_cache_driver_in_mem_read;
wire[2:0] sm_driver_in_mem_write = !(|sm_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.out_cache_driver_in_mem_write;
wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.core_req_valid & {`NT{to_shm}};
wire[2:0] sm_driver_in_mem_read = !(|sm_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.core_req_mem_read;
wire[2:0] sm_driver_in_mem_write = !(|sm_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.core_req_mem_write;
wire[`NT_M1:0][31:0] cache_driver_out_data;
wire[`NT_M1:0][31:0] sm_driver_out_data;
wire[`NT_M1:0] cache_driver_out_valid; // Not used for now
wire sm_delay;
wire cache_delay;
// I_Cache Signals
wire[31:0] icache_instruction_out;
wire icache_delay;
wire icache_driver_in_valid = VX_icache_req.out_cache_driver_in_valid;
wire icache_driver_in_valid = VX_icache_req.out_cache_driver_in_valid;
wire[31:0] icache_driver_in_address = VX_icache_req.pc_address;
wire[2:0] icache_driver_in_mem_read = !(|icache_driver_in_valid) ? `NO_MEM_READ : VX_icache_req.out_cache_driver_in_mem_read;
wire[2:0] icache_driver_in_mem_write = !(|icache_driver_in_valid) ? `NO_MEM_WRITE : VX_icache_req.out_cache_driver_in_mem_write;
wire[31:0] icache_driver_in_data = VX_icache_req.out_cache_driver_in_data;
wire read_or_write_ic = (VX_icache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|icache_driver_in_valid);
wire valid_read_cache = !cache_delay && cache_driver_in_valid[0];
wire[31:0] icache_driver_in_data = VX_icache_req.out_cache_driver_in_data;
wire read_or_write_ic = (VX_icache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|icache_driver_in_valid);
VX_shared_memory #(
@@ -86,53 +74,99 @@ module VX_dmem_controller (
);
VX_d_cache#(
.CACHE_SIZE (`DCACHE_SIZE),
.CACHE_WAYS (`DCACHE_WAYS),
.CACHE_BLOCK (`DCACHE_BLOCK),
.CACHE_BANKS (`DCACHE_BANKS),
.LOG_NUM_BANKS (`DCACHE_LOG_NUM_BANKS),
.NUM_REQ (`DCACHE_NUM_REQ),
.LOG_NUM_REQ (`DCACHE_LOG_NUM_REQ),
.NUM_IND (`DCACHE_NUM_IND),
.CACHE_WAY_INDEX (`DCACHE_WAY_INDEX),
.NUM_WORDS_PER_BLOCK (`DCACHE_NUM_WORDS_PER_BLOCK),
.OFFSET_SIZE_START (`DCACHE_OFFSET_ST),
.OFFSET_SIZE_END (`DCACHE_OFFSET_ED),
.TAG_SIZE_START (`DCACHE_TAG_SIZE_START),
.TAG_SIZE_END (`DCACHE_TAG_SIZE_END),
.IND_SIZE_START (`DCACHE_IND_SIZE_START),
.IND_SIZE_END (`DCACHE_IND_SIZE_END),
.ADDR_TAG_START (`DCACHE_ADDR_TAG_START),
.ADDR_TAG_END (`DCACHE_ADDR_TAG_END),
.ADDR_OFFSET_START (`DCACHE_ADDR_OFFSET_ST),
.ADDR_OFFSET_END (`DCACHE_ADDR_OFFSET_ED),
.ADDR_IND_START (`DCACHE_IND_ST),
.ADDR_IND_END (`DCACHE_IND_ED),
.MEM_ADDR_REQ_MASK (`DCACHE_MEM_REQ_ADDR_MASK)
)
dcache
(
.clk (clk),
.rst (reset),
.i_p_valid (cache_driver_in_valid),
.i_p_addr (cache_driver_in_address),
.i_p_writedata (cache_driver_in_data),
.i_p_read_or_write (read_or_write),
.i_p_mem_read (cache_driver_in_mem_read),
.i_p_mem_write (cache_driver_in_mem_write),
.o_p_readdata (cache_driver_out_data),
.o_p_delay (cache_delay),
.o_m_evict_addr (VX_dram_req_rsp.o_m_evict_addr),
.o_m_read_addr (VX_dram_req_rsp.o_m_read_addr),
.o_m_valid (VX_dram_req_rsp.o_m_valid),
.o_m_writedata (VX_dram_req_rsp.o_m_writedata),
.o_m_read_or_write (VX_dram_req_rsp.o_m_read_or_write),
.i_m_readdata (VX_dram_req_rsp.i_m_readdata),
.i_m_ready (VX_dram_req_rsp.i_m_ready)
VX_cache gpu_dcache(
.clk (clk),
.reset (reset),
// Core req
.core_req_valid (cache_driver_in_valid),
.core_req_addr (VX_dcache_req.core_req_addr),
.core_req_writedata(VX_dcache_req.core_req_writedata),
.core_req_mem_read (VX_dcache_req.core_req_mem_read),
.core_req_mem_write(VX_dcache_req.core_req_mem_write),
.core_req_rd (VX_dcache_req.core_req_rd),
.core_req_wb (VX_dcache_req.core_req_wb),
.core_req_warp_num (VX_dcache_req.core_req_warp_num),
// Delay Core Req
.delay_req (VX_dcache_rsp.delay_req),
// Core Cache Can't WB
.core_no_wb_slot (VX_dcache_req.core_no_wb_slot),
// Cache CWB
.core_wb_valid (VX_dcache_rsp.core_wb_valid),
.core_wb_req_rd (VX_dcache_rsp.core_wb_req_rd),
.core_wb_req_wb (VX_dcache_rsp.core_wb_req_wb),
.core_wb_warp_num (VX_dcache_rsp.core_wb_warp_num),
.core_wb_readdata (VX_dcache_rsp.core_wb_readdata),
// DRAM response
.dram_fill_rsp (dram_fill_rsp),
.dram_fill_rsp_addr(dram_fill_rsp_addr),
.dram_fill_rsp_data(dram_fill_rsp_data),
// DRAM accept response
.dram_fill_accept (dram_fill_accept),
// DRAM Req
.dram_req (dram_req),
.dram_req_write (dram_req_write),
.dram_req_read (dram_req_read),
.dram_req_addr (dram_req_addr),
.dram_req_size (dram_req_size),
.dram_req_data (dram_req_data),
);
// VX_d_cache#(
// .CACHE_SIZE (`DCACHE_SIZE),
// .CACHE_WAYS (`DCACHE_WAYS),
// .CACHE_BLOCK (`DCACHE_BLOCK),
// .CACHE_BANKS (`DCACHE_BANKS),
// .LOG_NUM_BANKS (`DCACHE_LOG_NUM_BANKS),
// .NUM_REQ (`DCACHE_NUM_REQ),
// .LOG_NUM_REQ (`DCACHE_LOG_NUM_REQ),
// .NUM_IND (`DCACHE_NUM_IND),
// .CACHE_WAY_INDEX (`DCACHE_WAY_INDEX),
// .NUM_WORDS_PER_BLOCK (`DCACHE_NUM_WORDS_PER_BLOCK),
// .OFFSET_SIZE_START (`DCACHE_OFFSET_ST),
// .OFFSET_SIZE_END (`DCACHE_OFFSET_ED),
// .TAG_SIZE_START (`DCACHE_TAG_SIZE_START),
// .TAG_SIZE_END (`DCACHE_TAG_SIZE_END),
// .IND_SIZE_START (`DCACHE_IND_SIZE_START),
// .IND_SIZE_END (`DCACHE_IND_SIZE_END),
// .ADDR_TAG_START (`DCACHE_ADDR_TAG_START),
// .ADDR_TAG_END (`DCACHE_ADDR_TAG_END),
// .ADDR_OFFSET_START (`DCACHE_ADDR_OFFSET_ST),
// .ADDR_OFFSET_END (`DCACHE_ADDR_OFFSET_ED),
// .ADDR_IND_START (`DCACHE_IND_ST),
// .ADDR_IND_END (`DCACHE_IND_ED),
// .MEM_ADDR_REQ_MASK (`DCACHE_MEM_REQ_ADDR_MASK)
// )
// dcache
// (
// .clk (clk),
// .rst (reset),
// .i_p_valid (cache_driver_in_valid),
// .i_p_addr (cache_driver_in_address),
// .i_p_writedata (cache_driver_in_data),
// .i_p_read_or_write (read_or_write),
// .i_p_mem_read (cache_driver_in_mem_read),
// .i_p_mem_write (cache_driver_in_mem_write),
// .o_p_readdata (cache_driver_out_data),
// .o_p_delay (cache_delay),
// .o_m_evict_addr (VX_dram_req_rsp.o_m_evict_addr),
// .o_m_read_addr (VX_dram_req_rsp.o_m_read_addr),
// .o_m_valid (VX_dram_req_rsp.o_m_valid),
// .o_m_writedata (VX_dram_req_rsp.o_m_writedata),
// .o_m_read_or_write (VX_dram_req_rsp.o_m_read_or_write),
// .i_m_readdata (VX_dram_req_rsp.i_m_readdata),
// .i_m_ready (VX_dram_req_rsp.i_m_ready)
// );
VX_d_cache#(
.CACHE_SIZE (`ICACHE_SIZE),
.CACHE_WAYS (`ICACHE_WAYS),
@@ -178,8 +212,8 @@ VX_d_cache#(
.i_m_ready (VX_dram_req_rsp_icache.i_m_ready)
);
assign VX_dcache_rsp.in_cache_driver_out_data = to_shm ? sm_driver_out_data : cache_driver_out_data;
assign VX_dcache_rsp.delay = sm_delay || cache_delay;
// assign VX_dcache_rsp.in_cache_driver_out_data = (to_shm && 0) ? sm_driver_out_data : cache_driver_out_data;
// assign VX_dcache_rsp.delay = (sm_delay && 0) || cache_delay;
assign VX_icache_rsp.instruction = icache_instruction_out;
assign VX_icache_rsp.delay = icache_delay;

View File

@@ -11,14 +11,12 @@ module VX_lsu (
// Write back to GPR
VX_inst_mem_wb_inter VX_mem_wb,
VX_dcache_response_inter VX_dcache_rsp,
VX_dcache_request_inter VX_dcache_req,
VX_gpu_dcache_res_inter VX_dcache_rsp,
VX_gpu_dcache_req_inter VX_dcache_req,
output wire out_delay
);
// VX_inst_mem_wb_inter VX_mem_wb_temp();
assign out_delay = VX_dcache_rsp.delay || no_slot_mem;
// Generate Addresses
@@ -55,27 +53,33 @@ module VX_lsu (
);
genvar index;
generate
for (index = 0; index <= `NT_M1; index = index + 1) begin : dcache_reqs
assign VX_dcache_req.out_cache_driver_in_address[index] = use_address[index];
assign VX_dcache_req.out_cache_driver_in_data[index] = use_store_data[index];
assign VX_dcache_req.out_cache_driver_in_valid[index] = (use_valid[index]);
// Core Request
assign VX_dcache_req.core_req_valid = use_valid;
assign VX_dcache_req.core_req_addr = use_address;
assign VX_dcache_req.core_req_writedata = use_store_data;
assign VX_dcache_req.core_req_mem_read = use_mem_read;
assign VX_dcache_req.core_req_mem_write = use_mem_write;
assign VX_dcache_req.core_req_rd = use_rd;
assign VX_dcache_req.core_req_wb = use_wb;
assign VX_dcache_req.core_req_warp_num = use_warp_num;
assign VX_mem_wb.loaded_data[index] = VX_dcache_rsp.in_cache_driver_out_data[index];
end
endgenerate
assign VX_dcache_req.out_cache_driver_in_mem_read = use_mem_read;
assign VX_dcache_req.out_cache_driver_in_mem_write = use_mem_write;
// Cache can't accept request
assign out_delay = VX_dcache_rsp.delay_req;
assign VX_mem_wb.rd = use_rd;
assign VX_mem_wb.wb = use_wb & {!VX_dcache_rsp.delay, !VX_dcache_rsp.delay};
assign VX_mem_wb.wb_valid = use_valid;
assign VX_mem_wb.wb_warp_num = use_warp_num;
assign VX_mem_wb.mem_wb_pc = use_pc;
// Core Response
assign VX_mem_wb.rd = VX_dcache_rsp.core_wb_req_rd;
assign VX_mem_wb.wb = VX_dcache_rsp.core_wb_req_wb;
assign VX_mem_wb.wb_valid = VX_dcache_rsp.core_wb_valid;
assign VX_mem_wb.wb_warp_num = VX_dcache_rsp.core_wb_warp_num;
assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata;
assign VX_mem_wb.mem_wb_pc = 32'hdeadbeff;
// Core can't accept response
assign VX_dcache_req.core_no_wb_slot = no_slot_mem;
// integer curr_t;
// always @(negedge clk) begin

View File

@@ -72,11 +72,11 @@ wire schedule_delay;
// Dcache Interface
VX_dcache_response_inter VX_dcache_rsp();
VX_dcache_request_inter VX_dcache_req();
VX_gpu_dcache_res_inter VX_dcache_rsp();
VX_gpu_dcache_req_inter VX_dcache_req();
wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.out_cache_driver_in_valid) && (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (VX_dcache_req.out_cache_driver_in_address[0] == 32'h00010000);
wire[31:0] temp_io_data = VX_dcache_req.out_cache_driver_in_data[0];
wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.core_req_valid) && (VX_dcache_req.core_req_mem_write != `NO_MEM_WRITE) && (VX_dcache_req.core_req_addr[0] == 32'h00010000);
wire[31:0] temp_io_data = VX_dcache_req.core_req_valid[0];
assign io_valid = temp_io_valid;
assign io_data = temp_io_data;
@@ -94,31 +94,6 @@ VX_dram_req_rsp_inter #(
//assign icache_response_fe.instruction = icache_response_instruction;
assign icache_request_pc_address = icache_request_fe.pc_address;
// Need to fix this so that it is only 1 set of outputs
// o_m Values
// L2 Cache
/*
assign VX_L2cache_req.out_cache_driver_in_valid = VX_dram_req_rsp.o_m_valid || VX_dram_req_rsp_icache.o_m_valid; // Ask about this (width)
// Ask about the adress
assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? icache_request_fe.pc_address: VX_dcache_req.out_cache_driver_in_address;
//assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_read_addr: VX_dram_req_rsp.o_m_read_addr;
//assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_evict_addr : VX_dram_req_rsp.o_m_evict_addr;
assign VX_L2cache_req.out_cache_driver_in_mem_read = (VX_dram_req_rsp_icache.o_m_valid) ? (VX_dram_req_rsp_icache.o_m_read_or_write ? icache_request_fe.out_cache_driver_in_mem_write : icache_request_fe.out_cache_driver_in_mem_read)
: (VX_dram_req_rsp.o_m_read_or_write ? VX_dcache_req.out_cache_driver_in_mem_write : VX_dcache_req.out_cache_driver_in_mem_read);
//assign VX_dram_req_rsp.i_m_ready = i_m_ready && !VX_dram_req_rsp_icache.o_m_valid && VX_dram_req_rsp.o_m_valid;
//assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready && VX_dram_req_rsp_icache.o_m_valid;
genvar cur_bank;
genvar cur_word;
for (cur_bank = 0; cur_bank < CACHE_BANKS; cur_bank = cur_bank + 1) begin
for (cur_word = 0; cur_word < NUM_WORDS_PER_BLOCK; cur_word = cur_word + 1) begin
assign VX_L2cache_req.out_cache_driver_in_data[cur_bank][cur_word] = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_writedata[cur_bank][cur_word]
: VX_dram_req_rsp.o_m_writedata[cur_bank][cur_word];
assign VX_dram_req_rsp.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data
assign VX_dram_req_rsp_icache.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data
end
end
*/
assign o_m_valid_i = VX_dram_req_rsp_icache.o_m_valid;
@@ -133,16 +108,6 @@ VX_dram_req_rsp_inter #(
assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready_i;
genvar curr_bank;
genvar curr_word;
/*
for (curr_bank = 0; curr_bank < CACHE_BANKS; curr_bank = curr_bank + 1) begin
for (curr_word = 0; curr_word < NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word];
assign o_m_writedata_d[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word];
assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata_d[curr_bank][curr_word]; // fixed
assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed
end
end
*/
generate
for (curr_bank = 0; curr_bank < `DCACHE_BANKS; curr_bank = curr_bank + 1) begin : dcache_setup

View File

@@ -0,0 +1,25 @@
`include "../VX_cache/VX_cache_config.v"
`ifndef VX_GPU_DRAM_DCACHE_REQ
`define VX_GPU_DRAM_DCACHE_REQ
interface VX_gpu_dcache_dram_req_inter ();
// DRAM Request
wire dram_req;
wire dram_req_write;
wire dram_req_read;
wire [31:0] dram_req_addr;
wire [31:0] dram_req_size;
wire [`BANK_LINE_SIZE_RNG][31:0] dram_req_data;
// DRAM Cache can't accept response
wire dram_fill_accept;
endinterface
`endif

View File

@@ -0,0 +1,19 @@
`include "../VX_cache/VX_cache_config.v"
`ifndef VX_GPU_DRAM_DCACHE_RES
`define VX_GPU_DRAM_DCACHE_RES
interface VX_gpu_dcache_dram_res_inter ();
// DRAM Rsponse
wire dram_fill_rsp;
wire [31:0] dram_fill_rsp_addr;
wire [`BANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data;
endinterface
`endif

View File

@@ -0,0 +1,27 @@
`include "../VX_cache/VX_cache_config.v"
`ifndef VX_GPU_DCACHE_REQ
`define VX_GPU_DCACHE_REQ
interface VX_gpu_dcache_req_inter ();
// Core Request
wire [`NUMBER_REQUESTS-1:0] core_req_valid;
wire [`NUMBER_REQUESTS-1:0][31:0] core_req_addr;
wire [`NUMBER_REQUESTS-1:0][31:0] core_req_writedata;
wire [2:0] core_req_mem_read;
wire [2:0] core_req_mem_write;
wire [4:0] core_req_rd;
wire [1:0] core_req_wb;
wire [`NW_M1:0] core_req_warp_num;
// Can't WB
wire core_no_wb_slot;
endinterface
`endif

View File

@@ -0,0 +1,24 @@
`include "../VX_cache/VX_cache_config.v"
`ifndef VX_GPU_DCACHE_RES
`define VX_GPU_DCACHE_RES
interface VX_gpu_dcache_res_inter ();
// Cache WB
wire [`NUMBER_REQUESTS-1:0] core_wb_valid;
wire [4:0] core_wb_req_rd;
wire [1:0] core_wb_req_wb;
wire [`NW_M1:0] core_wb_warp_num;
wire [`NUMBER_REQUESTS-1:0][31:0] core_wb_readdata;
// Cache Full
wire delay_req;
endinterface
`endif