Integrated Shared Memory

This commit is contained in:
felsabbagh3
2019-10-22 05:03:47 -04:00
parent 1bfafca896
commit b7af8c3f34
23 changed files with 2493 additions and 2101 deletions

View File

@@ -1,6 +1,9 @@
all: RUNFILE
INCLUDE=-I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -I/usr/local/systemc/ -Iinterfaces/ -Ipipe_regs/ -Isimulate
# /rf2_256x128_wm1/
BaseMEM=../models/memory/cln28hpm
INCLUDE=-I. -Ishared_memory -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm1/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Isimulate
FILE=Vortex.v
@@ -8,7 +11,7 @@ EXE=--exe ./simulate/test_bench.cpp
COMP=--compiler gcc
WNO=-Wno-UNOPTFLAT -Wno-UNDRIVEN --Wno-PINMISSING -Wno-STMTDLY -Wno-WIDTH
WNO=-Wno-UNOPTFLAT -Wno-UNDRIVEN --Wno-PINMISSING -Wno-STMTDLY -Wno-WIDTH -Wno-UNSIGNED
LIGHTW=-Wno-UNOPTFLAT --Wno-PINMISSING -Wno-WIDTH -Wno-STMTDLY
# LIB=-LDFLAGS '-L/usr/local/systemc/'

51
rtl/VX_dmem_controller.v Normal file
View File

@@ -0,0 +1,51 @@
`include "VX_define.v"
module VX_dmem_controller (
input wire clk,
// MEM-Processor
VX_dcache_request_inter VX_dcache_req,
VX_dcache_response_inter VX_dcache_rsp
);
wire to_shm = VX_dcache_req.out_cache_driver_in_address[0][31:24] == 8'hFF;
wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{to_shm}};
wire[`NT_M1:0] cache_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{~to_shm}};
wire[`NT_M1:0][31:0] cache_driver_in_address = VX_dcache_req.out_cache_driver_in_address;
wire[2:0] cache_driver_in_mem_read = VX_dcache_req.out_cache_driver_in_mem_read;
wire[2:0] cache_driver_in_mem_write = VX_dcache_req.out_cache_driver_in_mem_write;
wire[`NT_M1:0][31:0] cache_driver_in_data = VX_dcache_req.out_cache_driver_in_data;
wire[`NT_M1:0][31:0] cache_driver_out_data;
wire[`NT_M1:0] cache_driver_out_valid; // Not used for now
wire delay;
VX_shared_memory #(.NB(7), .BITS_PER_BANK(3)) shared_memory (
.clk (clk),
.in_valid (sm_driver_in_valid),
.in_address(cache_driver_in_address),
.in_data (cache_driver_in_data),
.mem_read (cache_driver_in_mem_read),
.mem_write (cache_driver_in_mem_write),
.out_valid (cache_driver_out_valid),
.out_data (cache_driver_out_data),
.stall (delay)
);
assign VX_dcache_rsp.in_cache_driver_out_data = cache_driver_out_data;
assign VX_dcache_rsp.delay = delay;
endmodule

View File

@@ -82,45 +82,6 @@ module VX_gpr (
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
// wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_32x128_wm1 first_ram (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(out_a_reg_data),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(cena_1),
// .AA(VX_gpr_read.rs1),
// .CLKB(clk),
// .CENB(cenb),
// .WENB(write_bit_mask),
// .AB(VX_writeback_inter.rd),
// .DB(VX_writeback_inter.write_data),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
// /* verilator lint_off PINCONNECTEMPTY */

View File

@@ -16,7 +16,7 @@ module VX_lsu (
// VX_inst_mem_wb_inter VX_mem_wb_temp();
assign out_delay = 1'b0;
assign out_delay = VX_dcache_rsp.delay;
// Generate Addresses

View File

@@ -15,19 +15,25 @@ module Vortex(
output wire out_ebreak
);
// assign out_cache_driver_in_address = 0;
assign out_cache_driver_in_mem_read = `NO_MEM_READ;
assign out_cache_driver_in_mem_write = `NO_MEM_WRITE;
// assign out_cache_driver_in_valid = 0;
// assign out_cache_driver_in_data = 0;
// assign out_cache_driver_in_address = VX_dcache_req.out_cache_driver_in_address;
// assign out_cache_driver_in_mem_read = VX_dcache_req.out_cache_driver_in_mem_read;
// assign out_cache_driver_in_mem_write = VX_dcache_req.out_cache_driver_in_mem_write;
// assign out_cache_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid;
// assign out_cache_driver_in_data = VX_dcache_req.out_cache_driver_in_data;
// assign VX_dcache_rsp.in_cache_driver_out_data = in_cache_driver_out_data;
// Dcache Interface
VX_dcache_response_inter VX_dcache_rsp();
VX_dcache_request_inter VX_dcache_req();
assign out_cache_driver_in_address = VX_dcache_req.out_cache_driver_in_address;
assign out_cache_driver_in_mem_read = VX_dcache_req.out_cache_driver_in_mem_read;
assign out_cache_driver_in_mem_write = VX_dcache_req.out_cache_driver_in_mem_write;
assign out_cache_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid;
assign out_cache_driver_in_data = VX_dcache_req.out_cache_driver_in_data;
assign VX_dcache_rsp.in_cache_driver_out_data = in_cache_driver_out_data;
VX_dcache_request_inter VX_dcache_req();
// Icache Interface
@@ -100,6 +106,12 @@ VX_back_end vx_back_end(
.gpr_stage_delay (gpr_stage_delay)
);
VX_dmem_controller VX_dmem_controller(
.clk (clk),
.VX_dcache_req(VX_dcache_req),
.VX_dcache_rsp(VX_dcache_rsp)
);
// VX_csr_handler vx_csr_handler(
// .clk (clk),
// .in_decode_csr_address(decode_csr_address),

View File

@@ -7,11 +7,11 @@
interface VX_dcache_request_inter ();
wire[31:0] out_cache_driver_in_address[`NT_M1:0];
wire[2:0] out_cache_driver_in_mem_read;
wire[2:0] out_cache_driver_in_mem_write;
wire out_cache_driver_in_valid[`NT_M1:0];
wire[31:0] out_cache_driver_in_data[`NT_M1:0];
wire[`NT_M1:0][31:0] out_cache_driver_in_address;
wire[2:0] out_cache_driver_in_mem_read;
wire[2:0] out_cache_driver_in_mem_write;
wire[`NT_M1:0] out_cache_driver_in_valid;
wire[`NT_M1:0][31:0] out_cache_driver_in_data;
endinterface

View File

@@ -7,7 +7,8 @@
interface VX_dcache_response_inter ();
wire[31:0] in_cache_driver_out_data[`NT_M1:0];
wire[`NT_M1:0][31:0] in_cache_driver_out_data;
wire delay;
endinterface

View File

@@ -1,7 +1,7 @@
# Dynamic Instructions: 52686
# of total cycles: 52701
# Dynamic Instructions: 14
# of total cycles: 26
# of forwarding stalls: 0
# of branch stalls: 0
# CPI: 1.00028
# time to simulate: 0 milliseconds
# CPI: 1.85714
# time to simulate: 6.95313e-310 milliseconds
# GRADE: Failed on test: 4294967295

View File

@@ -0,0 +1,36 @@
`include "../VX_define.v"
// Converts in_valids to bank_valids
module VX_bank_valids
#(
parameter NB = 4,
parameter BITS_PER_BANK = 3
)
(
input wire[`NT_M1:0] in_valids,
input wire[`NT_M1:0][31:0] in_addr,
output reg[NB:0][`NT_M1:0] bank_valids
);
integer i, j;
always@(*) begin
for(j = 0; j <= NB; j = j+1 ) begin
for(i = 0; i <= `NT_M1; i = i+1) begin
if(in_valids[i]) begin
if(in_addr[i][(2+BITS_PER_BANK-1):2] == j[BITS_PER_BANK-1:0]) begin
bank_valids[j][i] = 1'b1;
end
else begin
bank_valids[j][i] = 1'b0;
end
end
else begin
bank_valids[j][i] = 1'b0;
end
end
end
end
endmodule

View File

@@ -0,0 +1,88 @@
`include "../VX_define.v"
module VX_priority_encoder_sm
#(
parameter NB = 4,
parameter BITS_PER_BANK = 3
)
(
//INPUTS
input wire clk,
//input wire reset,
input wire[`NT_M1:0] in_valid,
input wire[`NT_M1:0][31:0] in_address,
input wire[`NT_M1:0][31:0] in_data,
// OUTPUTS
// To SM Module
output reg[NB:0] out_valid,
output reg[NB:0][31:0] out_address,
output reg[NB:0][31:0] out_data,
// To Processor
output wire[NB:0][1:0] req_num,
output reg stall,
output wire send_data // Finished all of the requests
);
wire[NB:0][`NT_M1:0] bank_valids;
wire[NB:0][`NT_M1:0] temp_bank_valids;
reg[NB:0][`NT_M1:0] temp_valid; // State - If there's any ones here, then stall
wire[NB:0] temp_stall;
integer counter[NB:0] ;
wire[NB:0][`NT_M1:0] mask;
wire[NB:0] update_temp_valid;
reg[NB:0] req_done;
VX_bank_valids #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_bank_valid(
.in_valids(in_valid),
.in_addr(in_address),
.bank_valids(bank_valids)
);
genvar j;
for(j=0; j <= NB; j++) begin
assign temp_stall[j] = ($countones(temp_valid[j]) != 0);
assign temp_bank_valids[j] = (temp_stall[j] || req_done[j]) ? temp_valid[j] : bank_valids[j];
assign update_temp_valid[j] = !req_done[j] && ($countones(bank_valids[j]) > 1);
VX_generic_priority_encoder #(.N(4)) vx_priority_encoder(
.valids(temp_bank_valids[j]),
.index(req_num[j]),
.found(out_valid[j])
);
VX_set_bit vx_set_bit(
.index(req_num[j]),
.mask (mask[j])
);
assign out_address[j] = out_valid[j] ? in_address[req_num[j]] : 0;
assign out_data[j] = out_valid[j] ? in_data[req_num[j]] : 0;
end
assign stall = |temp_stall;
assign send_data = &req_done;
genvar i;
always @(posedge clk) begin
for(i = 0; i <= NB; i = i+1) begin
if (update_temp_valid[i]) begin
counter[i] <= counter[i] + 1;
if(counter[i] == 0) temp_valid[i] <= bank_valids[i] & mask[i];
else if (counter[i] > 0) temp_valid[i] <= temp_bank_valids[i] & mask[i];
end
if(($countones(in_valid) > 0) && ($countones(bank_valids[i]) == 0)) begin
req_done[i] <= 1;
end
else if((counter[i][2:0] == ($countones(bank_valids[i])-1))) begin
req_done[i] <= 1;
counter[i] <= 0;
end
else begin
req_done[i] <= 0;
end
end
end
endmodule

View File

@@ -0,0 +1,21 @@
`include "../VX_define.v"
module VX_set_bit (
input wire[1:0] index,
output reg[`NT_M1:0] mask
);
integer some_index;
always @(*) begin
for (some_index = 0; some_index <= `NT_M1; some_index = some_index + 1) begin
if (some_index[1:0] == index) begin
assign mask[some_index] = 0;
end
else begin
assign mask[some_index] = 1;
end
end
end
endmodule

View File

@@ -0,0 +1,135 @@
`include "../VX_define.v"
module VX_shared_memory
#(
parameter NB = 4,
parameter BITS_PER_BANK = 3
)
(
//INPUTS
input wire clk,
input wire[`NT_M1:0] in_valid,
input wire[`NT_M1:0][31:0] in_address,
input wire[`NT_M1:0][31:0] in_data,
input wire[2:0] mem_read,
input wire[2:0] mem_write,
//OUTPUTS
output wire[`NT_M1:0] out_valid,
output wire[`NT_M1:0][31:0] out_data,
output wire stall
);
reg[NB:0][31:0] temp_address;
reg[NB:0][31:0] temp_in_data;
reg[NB:0] temp_in_valid;
reg[`NT_M1:0] temp_out_valid;
reg[`NT_M1:0][31:0] temp_out_data;
reg [NB:0][6:0] block_addr;
reg [NB:0][3:0][31:0] block_wdata;
reg [NB:0][3:0][31:0] block_rdata;
reg [NB:0][1:0] block_we;
wire send_data;
reg[NB:0][1:0] req_num;
reg shm_write;
wire [`NT_M1:0] orig_in_valid;
genvar i;
for(i = 0; i <= `NT_M1; i = i+1) begin
assign orig_in_valid[i] = in_valid[i];
end
assign out_valid = send_data ? temp_out_valid : 0;
assign out_data = send_data ? temp_out_data : 0;
VX_priority_encoder_sm #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_priority_encoder_sm(
.clk(clk),
//.reset(reset),
.in_valid(orig_in_valid),
.in_address(in_address),
.in_data(in_data),
.out_valid(temp_in_valid),
.out_address(temp_address),
.out_data(temp_in_data),
.req_num(req_num),
.stall(stall),
.send_data(send_data)
);
genvar j;
generate
for(j=0; j<= NB; j=j+1) begin
VX_shared_memory_block vx_shared_memory_block(
.clk(clk),
.addr(block_addr[j]),
.wdata(block_wdata[j]),
.we(block_we[j]),
.shm_write(shm_write),
.data_out(block_rdata[j])
);
end
endgenerate
always @(*) begin
block_addr = 0;
block_we = 0;
block_wdata = 0;
for(i = 0; i <= NB; i = i+1) begin
if(temp_in_valid[i] == 1'b1) begin
//1. Check if the request is actually to the shared memory
if((temp_address[i][31:24]) == 8'hFF) begin
// STORES
if(mem_write != `NO_MEM_WRITE) begin
shm_write = 1'b1;
if(mem_write == `SB_MEM_WRITE) begin
//TODO
end
else if(mem_write == `SH_MEM_WRITE) begin
//TODO
end
else if(mem_write == `SW_MEM_WRITE) begin
block_addr[i] = temp_address[i][13:7];
block_we[i] = temp_address[i][6:5];
block_wdata[i][temp_address[i][6:5]] = temp_in_data[i];
end
end
//LOADS
else if(mem_read != `NO_MEM_READ) begin
shm_write = 1'b0;
if(mem_read == `LB_MEM_READ) begin
//TODO
end
else if (mem_read == `LH_MEM_READ)
begin
//TODO
end
else if (mem_read == `LW_MEM_READ)
begin
block_addr[i] = temp_address[i][13:7];
temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][6:5]];
temp_out_valid[req_num[i]] = 1'b1;
end
else if (mem_read == `LBU_MEM_READ)
begin
//TODO
end
else if (mem_read == `LHU_MEM_READ)
begin
//TODO
end
end
end
end
end
end
endmodule

View File

@@ -0,0 +1,81 @@
module VX_shared_memory_block (
input clk, // Clock
input wire[6:0] addr,
input wire[3:0][31:0] wdata,
input wire[1:0] we,
input wire shm_write,
output wire[3:0][31:0] data_out
);
logic [3:0][31:0] shared_memory[127:0];
//wire need_to_write = (|we);
always @(posedge clk) begin
if(shm_write) begin
if (we == 2'b00) shared_memory[addr][0][31:0] <= wdata[0][31:0];
if (we == 2'b01) shared_memory[addr][1][31:0] <= wdata[1][31:0];
if (we == 2'b10) shared_memory[addr][2][31:0] <= wdata[2][31:0];
if (we == 2'b11) shared_memory[addr][3][31:0] <= wdata[3][31:0];
end
end
assign data_out = shm_write ? 0 : shared_memory[addr];
// wire cena = 1;
// wire cenb = shm_write;
// wire[3:0][31:0] write_bit_mask;
// assign write_bit_mask[0] = (we == 2'b00) ? 0 : {32{1'b1}};
// assign write_bit_mask[1] = (we == 2'b01) ? 0 : {32{1'b1}};
// assign write_bit_mask[2] = (we == 2'b10) ? 0 : {32{1'b1}};
// assign write_bit_mask[3] = (we == 2'b11) ? 0 : {32{1'b1}};
// // Using ASIC MEM
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_128x128_wm1 first_ram (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(data_out),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(cena),
// .AA(addr),
// .CLKB(clk),
// .CENB(cenb),
// .WENB(write_bit_mask),
// .AB(addr),
// .DB(wdata),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
endmodule