rtl refactoring

This commit is contained in:
Blaise Tine
2020-05-03 17:10:02 -04:00
parent a1dc90b951
commit 69f607b73e
83 changed files with 30487 additions and 30536 deletions

View File

@@ -2,11 +2,11 @@ all: singlecore
CF += -std=c++11 -fms-extensions
VF += -compiler gcc --language 1800-2009 --assert -Wall -Wpedantic
VF += --language 1800-2009 --assert -Wall -Wpedantic
VF += -exe $(SRCS) $(INCLUDE)
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate
@@ -21,58 +21,59 @@ THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu
build_config:
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./simulate/VX_config.h
gen-singlecore: build_config
gen-s: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG'
gen-singlecore-t: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
gen-singlecore-d: build_config
gen-sd: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
gen-multicore: build_config
gen-st: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
gen-m: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
gen-multicore-t: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
gen-multicore-d: build_config
gen-md: build_config
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
singlecore: gen-singlecore
gen-mt: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
build-s: gen-s
(cd obj_dir && make -j -f VVortex_Socket.mk)
singlecore-t: gen-singlecore-t
build-sd: gen-sd
(cd obj_dir && make -j -f VVortex_Socket.mk)
singlecore-d: gen-singlecore-d
build-st: gen-st
(cd obj_dir && make -j -f VVortex_Socket.mk)
multicore: gen-multicore
build-m: gen-m
(cd obj_dir && make -j -f VVortex_Socket.mk)
multicore-t: gen-multicore-t
build-md: gen-md
(cd obj_dir && make -j -f VVortex_Socket.mk)
multicore-d: gen-multicore-d
build-mt: gen-mt
(cd obj_dir && make -j -f VVortex_Socket.mk)
run: singlecore
run: run-s
run-s: build-s
(cd obj_dir && ./VVortex_Socket)
run-d: singlecore-d
run-sd: build-sd
(cd obj_dir && ./VVortex_Socket)
run-t: singlecore-t
run-st: build-st
(cd obj_dir && ./VVortex_Socket)
run-m: multicore
run-m: build-m
(cd obj_dir && ./VVortex_Socket)
run-md: multicore-d
run-md: build-md
(cd obj_dir && ./VVortex_Socket)
run-mt: multicore-t
run-mt: build-mt
(cd obj_dir && ./VVortex_Socket)
clean:

View File

@@ -28,21 +28,39 @@
`endif
`ifndef NUM_CSRS
`define NUM_CSRS 1024
`endif
`ifndef IO_BUS_ADDR
`define IO_BUS_ADDR 32'h00010000
`define NUM_CSRS 2
`endif
`ifndef STARTUP_ADDR
`define STARTUP_ADDR 32'h80000000
`endif
`ifndef SHARED_MEM_ADDR_MATCH
`define SHARED_MEM_ADDR_MATCH(x) (x[31:24] == 8'hFF)
`ifndef SHARED_MEM_TOP_ADDR
`define SHARED_MEM_TOP_ADDR 8'hFF
`endif
`ifndef IO_BUS_ADDR
`define IO_BUS_ADDR 32'h00010000
`endif
`ifndef STACK_BASE_ADDR
`define STACK_BASE_ADDR 20'h6ffff
`endif
`ifndef L2_ENABLE
`define L2_ENABLE (`NUM_CORES > 1)
`endif
`define CSR_LTID 12'h020
`define CSR_LWID 12'h021
`define CSR_GWID 12'h022
`define CSR_GTID 12'h023
`define CSR_CYCLL 12'hC00
`define CSR_CYCLH 12'hC80
`define CSR_INSTL 12'hC02
`define CSR_INSTH 12'hC82
// ========================= Dcache Configurable Knobs ========================
// Size of cache in bytes

View File

@@ -1,84 +1,57 @@
`include "VX_define.vh"
module VX_csr_data (
module VX_csr_data #(
parameter CORE_ID = 0
) (
input wire clk, // Clock
input wire reset,
input wire[`CSR_ADDR_SIZE-1:0] read_csr_address,
input wire write_valid,
input wire[`CSR_WIDTH-1:0] write_csr_data,
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
output reg[31:0] read_data,
input wire write_enable,
`IGNORE_WARNINGS_BEGIN
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
input wire[`CSR_ADDR_SIZE-1:0] write_csr_address,
input wire[`CSR_ADDR_SIZE-1:0] write_addr,
`IGNORE_WARNINGS_END
output wire[31:0] read_csr_data,
// For instruction retire counting
input wire writeback_valid
input wire[`CSR_WIDTH-1:0] write_data,
input wire[`NW_BITS-1:0] warp_num,
input wire wb_valid
);
// wire[`NUM_THREADS-1:0][31:0] thread_ids;
// wire[`NUM_THREADS-1:0][31:0] warp_ids;
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
// genvar cur_t;
// for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
// assign thread_ids[cur_t] = cur_t;
// end
// genvar cur_tw;
// for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
// assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num};
// end
reg [`CSR_WIDTH-1:0] csr[`NUM_CSRS-1:0];
reg [63:0] cycle;
reg [63:0] instret;
wire read_cycle;
wire read_cycleh;
wire read_instret;
wire read_instreth;
assign read_cycle = read_csr_address == `CSR_CYCL_L;
assign read_cycleh = read_csr_address == `CSR_CYCL_H;
assign read_instret = read_csr_address == `CSR_INST_L;
assign read_instreth = read_csr_address == `CSR_INST_H;
wire [$clog2(`NUM_CSRS)-1:0] read_addr, write_addr;
reg [63:0] num_cycles, num_instrs;
// cast address to physical CSR range
assign read_addr = $size(read_addr)'(read_csr_address);
assign write_addr = $size(write_addr)'(write_csr_address);
// wire thread_select = read_csr_address == 12'h20;
// wire warp_select = read_csr_address == 12'h21;
// assign read_csr_data = thread_select ? thread_ids :
// warp_select ? warp_ids :
// 0;
genvar curr_e;
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
assign rd_addr = $size(rd_addr)'(read_addr);
assign wr_addr = $size(wr_addr)'(write_addr);
always @(posedge clk) begin
if (reset) begin
cycle <= 0;
instret <= 0;
if (reset) begin
num_cycles <= 0;
num_instrs <= 0;
end else begin
cycle <= cycle + 1;
if (write_valid) begin
csr[write_addr] <= write_csr_data;
if (write_enable) begin
csr_table[wr_addr] <= write_data;
end
if (writeback_valid) begin
instret <= instret + 1;
num_cycles <= num_cycles + 1;
if (wb_valid) begin
num_instrs <= num_instrs + 1;
end
end
end
assign read_csr_data = read_cycle ? cycle[31:0] :
read_cycleh ? cycle[63:32] :
read_instret ? instret[31:0] :
read_instreth ? instret[63:32] :
{{20{1'b0}}, csr[read_addr]};
endmodule : VX_csr_data
always @(*) begin
case (read_addr)
`CSR_LWID : read_data = 32'(warp_num);
`CSR_GTID ,
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
`CSR_CYCLL : read_data = num_cycles[31:0];
`CSR_CYCLH : read_data = num_cycles[63:32];
`CSR_INSTL : read_data = num_instrs[31:0];
`CSR_INSTH : read_data = num_instrs[63:32];
default: read_data = 32'(csr_table[rd_addr]);
endcase
end
endmodule

View File

@@ -24,24 +24,23 @@ module VX_csr_pipe #(
wire[31:0] csr_read_data_unqual;
wire[31:0] csr_read_data;
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && |(csr_req_if.valid);
assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
wire writeback = |writeback_if.valid;
VX_csr_data csr_data(
.clk (clk),
.reset (reset),
.read_csr_address (csr_req_if.csr_address),
.write_valid (is_csr_s2),
.write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_csr_address (csr_address_s2),
.read_csr_data (csr_read_data_unqual),
.writeback_valid (writeback)
VX_csr_data #(
.CORE_ID(CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),
.read_addr (csr_req_if.csr_address),
.read_data (csr_read_data_unqual),
.write_enable (is_csr_s2),
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_addr (csr_address_s2),
.warp_num (csr_req_if.warp_num),
.wb_valid (| writeback_if.valid)
);
reg [31:0] csr_updated_data;
assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
reg [31:0] csr_updated_data;
always @(*) begin
case (csr_req_if.alu_op)
@@ -52,55 +51,29 @@ module VX_csr_pipe #(
endcase
end
wire zero = 0;
VX_generic_register #(
.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)
) csr_reg_s2 (
.clk (clk),
.reset(reset),
.stall(no_slot_csr),
.flush(zero),
.flush(0),
.in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_address, csr_read_data , csr_updated_data }),
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
);
wire [`NUM_THREADS-1:0][31:0] final_csr_data;
assign csr_wb_if.valid = valid_s2;
assign csr_wb_if.warp_num = warp_num_s2;
assign csr_wb_if.rd = rd_s2;
assign csr_wb_if.wb = wb_s2;
wire [`NUM_THREADS-1:0][31:0] thread_ids;
wire [`NUM_THREADS-1:0][31:0] warp_ids;
wire [`NUM_THREADS-1:0][31:0] warp_idz;
wire [`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2;
genvar i;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
assign csr_wb_if.data[i] = (csr_address_s2 == `CSR_LTID) ? i :
(csr_address_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
csr_read_data_s2;
end
genvar cur_t;
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
assign thread_ids[cur_t] = cur_t;
end
genvar cur_tw;
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = 32'(warp_num_s2);
assign warp_idz[cur_tw] = 32'(warp_num_s2) + (CORE_ID * `NUM_WARPS);
end
genvar cur_v;
for (cur_v = 0; cur_v < `NUM_THREADS; cur_v = cur_v + 1) begin
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
end
wire thread_select = (csr_address_s2 == `CSR_THREAD);
wire warp_select = (csr_address_s2 == `CSR_WARP);
wire warp_id_select = (csr_address_s2 == `CSR_WARP_ID);
assign final_csr_data = thread_select ? thread_ids :
warp_select ? warp_ids :
warp_id_select ? warp_idz :
csr_vec_read_data_s2;
assign csr_wb_if.valid = valid_s2;
assign csr_wb_if.warp_num = warp_num_s2;
assign csr_wb_if.rd = rd_s2;
assign csr_wb_if.wb = wb_s2;
assign csr_wb_if.data = final_csr_data;
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && (| csr_req_if.valid);
endmodule

View File

@@ -50,17 +50,6 @@
`define CSR_WIDTH 12
///////////////////////////////////////////////////////////////////////////////
`define CSR_THREAD 12'h020
`define CSR_WARP 12'h021
`define CSR_WARP_ID 12'h022
`define CSR_CYCL_L 12'hC00;
`define CSR_CYCL_H 12'hC80;
`define CSR_INST_L 12'hC02;
`define CSR_INST_H 12'hC82;
///////////////////////////////////////////////////////////////////////////////
`define R_INST 7'd51
@@ -192,7 +181,7 @@
`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE))
// DRAM request tag bits
`define L2DRAM_TAG_WIDTH ((`NUM_CORES > 1) ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+1))
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
////////////////////////// L3cache Configurable Knobs /////////////////////////

View File

@@ -46,7 +46,7 @@ module VX_dmem_ctrl (
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_rsp_dcache_if();
wire to_shm = `SHARED_MEM_ADDR_MATCH(dcache_core_req_if.core_req_addr[0]);
wire to_shm = (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR);
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_rsp_valid);
// Dcache Request

95
hw/rtl/VX_dram_arb.v Normal file
View File

@@ -0,0 +1,95 @@
`include "VX_define.vh"
module VX_dram_arb #(
parameter BANK_LINE_SIZE = 1,
parameter NUM_REQUESTS = 1,
parameter CORE_TAG_WIDTH = 1,
parameter DRAM_TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
// Core request
input wire [NUM_REQUESTS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0] core_req_data,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output reg [NUM_REQUESTS-1:0] core_req_ready,
// Core response
output wire [NUM_REQUESTS-1:0] core_rsp_valid,
output wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0]core_rsp_data,
output wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire [NUM_REQUESTS-1:0] core_rsp_ready,
// DRAM request
output reg dram_req_read,
output reg dram_req_write,
output reg [`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
output reg [`BANK_LINE_WIDTH-1:0] dram_req_data,
output reg [DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready,
// DRAM response
input wire dram_rsp_valid,
input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data,
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready
);
reg [`LOG2UP(NUM_REQUESTS)-1:0] bus_sel;
always @(posedge clk) begin
if (reset) begin
bus_sel <= 0;
end else begin
bus_sel <= bus_sel + 1;
end
end
integer i;
generate
always @(*) begin
dram_req_read = 'z;
dram_req_write = 'z;
dram_req_addr = 'z;
dram_req_data = 'z;
dram_req_tag = 'z;
for (i = 0; i < NUM_REQUESTS; i++) begin
if (bus_sel == (`LOG2UP(NUM_REQUESTS))'(i)) begin
dram_req_read = core_req_read[i];
dram_req_write = core_req_write[i];
dram_req_addr = core_req_addr[i];
dram_req_data = core_req_data[i];
dram_req_tag = {core_req_tag[i], (`LOG2UP(NUM_REQUESTS))'(i)};
core_req_ready[i] = dram_req_ready;
end else begin
core_req_ready[i] = 0;
end
end
end
endgenerate
reg is_valid;
generate
always @(*) begin
dram_rsp_ready = 0;
for (i = 0; i < NUM_REQUESTS; i++) begin
is_valid = (dram_rsp_tag[`LOG2UP(NUM_REQUESTS)-1:0] == (`LOG2UP(NUM_REQUESTS))'(i));
core_rsp_valid[i] = dram_rsp_valid & is_valid;
core_rsp_data[i] = dram_rsp_data;
core_rsp_tag[i] = dram_rsp_tag[`LOG2UP(NUM_REQUESTS) +: CORE_TAG_WIDTH];
if (is_valid) begin
dram_rsp_ready = core_rsp_ready[i];
end
end
end
endgenerate
endmodule

View File

@@ -1,54 +0,0 @@
`include "VX_define.vh"
module VX_l1c_to_dram_arb #(
parameter REQQ_SIZE = 8
) (
input wire clk,
input wire reset,
VX_cache_dram_req_if dcache_dram_req_if,
VX_cache_dram_rsp_if dcache_dram_rsp_if,
VX_cache_dram_req_if icache_dram_req_if,
VX_cache_dram_rsp_if icache_dram_rsp_if,
VX_cache_dram_req_if dram_req_if,
VX_cache_dram_rsp_if dram_rsp_if
);
reg cache_sel;
wire icache_req_valid, icache_sel_out, icache_sel_in;
assign icache_req_valid = icache_dram_req_if.dram_req_read || icache_dram_req_if.dram_req_write;
assign icache_sel_out = icache_req_valid && (cache_sel == 0);
assign dram_req_if.dram_req_read = icache_sel_out ? icache_dram_req_if.dram_req_read : dcache_dram_req_if.dram_req_read;
assign dram_req_if.dram_req_write = icache_sel_out ? icache_dram_req_if.dram_req_write : dcache_dram_req_if.dram_req_write;
assign dram_req_if.dram_req_addr = icache_sel_out ? icache_dram_req_if.dram_req_addr : dcache_dram_req_if.dram_req_addr;
assign dram_req_if.dram_req_data = icache_sel_out ? icache_dram_req_if.dram_req_data : dcache_dram_req_if.dram_req_data;
assign dram_req_if.dram_req_tag = {icache_sel_out ? icache_dram_req_if.dram_req_tag : dcache_dram_req_if.dram_req_tag, icache_sel_out};
assign icache_dram_req_if.dram_req_ready = dram_req_if.dram_req_ready && (cache_sel == 0);
assign dcache_dram_req_if.dram_req_ready = dram_req_if.dram_req_ready && (cache_sel == 1);
assign icache_sel_in = dram_rsp_if.dram_rsp_tag[0];
assign icache_dram_rsp_if.dram_rsp_valid = dram_rsp_if.dram_rsp_valid && icache_sel_in;
assign icache_dram_rsp_if.dram_rsp_data = dram_rsp_if.dram_rsp_data;
assign icache_dram_rsp_if.dram_rsp_tag = dram_rsp_if.dram_rsp_tag[1 +: $bits(icache_dram_rsp_if.dram_rsp_tag)];
assign dcache_dram_rsp_if.dram_rsp_valid = dram_rsp_if.dram_rsp_valid && ~icache_sel_in;
assign dcache_dram_rsp_if.dram_rsp_data = dram_rsp_if.dram_rsp_data;
assign dcache_dram_rsp_if.dram_rsp_tag = dram_rsp_if.dram_rsp_tag[1 +: $bits(dcache_dram_rsp_if.dram_rsp_tag)];
assign dram_rsp_if.dram_rsp_ready = icache_dram_rsp_if.dram_rsp_ready && dcache_dram_rsp_if.dram_rsp_ready;
always @(posedge clk) begin
if (reset) begin
cache_sel <= 0;
end else begin
cache_sel <= ~cache_sel;
end
end
endmodule

View File

@@ -8,10 +8,6 @@ module Vortex #(
input wire clk,
input wire reset,
// IO
output wire io_valid,
output wire [31:0] io_data,
// DRAM Dcache Req
output wire D_dram_req_read,
output wire D_dram_req_write,
@@ -40,11 +36,17 @@ module Vortex #(
input wire [`IDRAM_TAG_WIDTH-1:0] I_dram_rsp_tag,
output wire I_dram_rsp_ready,
// LLC Snooping
// Cache Snooping
input wire llc_snp_req_valid,
input wire [`DDRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
// I/O
output wire io_valid,
output wire [31:0] io_data,
input wire io_ready,
// Debug
output wire ebreak
);
`DEBUG_BEGIN
@@ -98,20 +100,17 @@ module Vortex #(
assign dcache_dram_rsp_if.dram_rsp_tag = D_dram_rsp_tag;
assign D_dram_rsp_ready = dcache_dram_rsp_if.dram_rsp_ready;
assign io_valid = (!memory_delay)
&& (|dcache_core_req_if.core_req_valid)
&& (dcache_core_req_if.core_req_write[0] != `WORD_SEL_NO)
&& (dcache_core_req_if.core_req_addr[0] == `IO_BUS_ADDR);
wire to_io_bus = (dcache_core_req_if.core_req_addr[0] == `IO_BUS_ADDR);
assign io_valid = |dcache_core_req_if.core_req_valid && to_io_bus;
assign io_data = dcache_core_req_if.core_req_data[0];
assign io_data = dcache_core_req_if.core_req_data[0];
assign dcache_core_req_qual_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~io_valid}};
assign dcache_core_req_qual_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~to_io_bus}};
assign dcache_core_req_qual_if.core_req_read = dcache_core_req_if.core_req_read;
assign dcache_core_req_qual_if.core_req_write = dcache_core_req_if.core_req_write;
assign dcache_core_req_qual_if.core_req_addr = dcache_core_req_if.core_req_addr;
assign dcache_core_req_qual_if.core_req_data = dcache_core_req_if.core_req_data;
assign dcache_core_req_qual_if.core_req_tag = dcache_core_req_if.core_req_tag;
assign dcache_core_req_if.core_req_ready = dcache_core_req_qual_if.core_req_ready;
assign dcache_core_req_if.core_req_ready = to_io_bus ? io_ready : dcache_core_req_qual_if.core_req_ready;
// Icache interfaces
VX_cache_core_req_if #(

View File

@@ -8,10 +8,6 @@ module Vortex_Cluster #(
input wire clk,
input wire reset,
// IO
output wire[`NUM_CORES-1:0] io_valid,
output wire[`NUM_CORES-1:0][31:0] io_data,
// DRAM Req
output wire dram_req_read,
output wire dram_req_write,
@@ -26,240 +22,140 @@ module Vortex_Cluster #(
input wire[`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// LLC Snooping
// Cache Snooping
input wire llc_snp_req_valid,
input wire[`L2DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
// IO
output wire io_valid,
output wire [31:0] io_data,
input wire io_ready,
// Debug
output wire ebreak
);
if (`NUM_CORES == 1) begin
);
// DRAM Dcache Req
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
wire[`NUM_CORES-1:0] per_core_D_dram_req_write;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_req_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_req_ready;
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) dcache_dram_req_if();
// DRAM Dcache Rsp
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready;
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) dcache_dram_rsp_if();
// DRAM Icache Req
wire[`NUM_CORES-1:0] per_core_I_dram_req_read;
wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_req_ready;
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) icache_dram_req_if();
// DRAM Icache Rsp
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH(`IDRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH(`IDRAM_TAG_WIDTH)
) icache_dram_rsp_if();
// Snooping
wire snp_fwd_valid;
wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`L2DRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH(`L2DRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH(`L2DRAM_TAG_WIDTH)
) dram_req_if();
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CORES-1:0] per_core_io_valid;
wire[`NUM_CORES-1:0][31:0] per_core_io_data;
`IGNORE_WARNINGS_END
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH(`L2DRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH(`L2DRAM_TAG_WIDTH)
) dram_rsp_if();
// ebreak
wire[`NUM_CORES-1:0] per_core_ebreak;
assign dram_req_read = dram_req_if.dram_req_read;
assign dram_req_write = dram_req_if.dram_req_write;
assign dram_req_addr = dram_req_if.dram_req_addr;
assign dram_req_data = dram_req_if.dram_req_data;
assign dram_req_tag = dram_req_if.dram_req_tag;
assign dram_req_if.dram_req_ready = dram_req_ready;
assign dram_rsp_if.dram_rsp_valid = dram_rsp_valid;
assign dram_rsp_if.dram_rsp_data = dram_rsp_data;
assign dram_rsp_if.dram_rsp_tag = dram_rsp_tag;
assign dram_rsp_ready = dram_rsp_if.dram_rsp_ready;
VX_l1c_to_dram_arb #(
.REQQ_SIZE(`L2REQQ_SIZE)
) l1c_to_dram_arb (
.clk (clk),
.reset (reset),
.dcache_dram_req_if (dcache_dram_req_if),
.dcache_dram_rsp_if (dcache_dram_rsp_if),
.icache_dram_req_if (icache_dram_req_if),
.icache_dram_rsp_if (icache_dram_rsp_if),
.dram_req_if (dram_req_if),
.dram_rsp_if (dram_rsp_if)
);
assign io_valid = per_core_io_valid[0];
assign io_data = per_core_io_data[0];
assign ebreak = (& per_core_ebreak);
genvar i;
for (i = 0; i < `NUM_CORES; i = i + 1) begin
Vortex #(
.CORE_ID(0)
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) vortex_core (
.clk (clk),
.reset (reset),
.io_valid (io_valid[0]),
.io_data (io_data[0]),
.D_dram_req_read (dcache_dram_req_if.dram_req_read),
.D_dram_req_write (dcache_dram_req_if.dram_req_write),
.D_dram_req_addr (dcache_dram_req_if.dram_req_addr),
.D_dram_req_data (dcache_dram_req_if.dram_req_data),
.D_dram_req_tag (dcache_dram_req_if.dram_req_tag),
.D_dram_req_ready (dcache_dram_req_if.dram_req_ready),
.D_dram_rsp_valid (dcache_dram_rsp_if.dram_rsp_valid),
.D_dram_rsp_data (dcache_dram_rsp_if.dram_rsp_data),
.D_dram_rsp_tag (dcache_dram_rsp_if.dram_rsp_tag),
.D_dram_rsp_ready (dcache_dram_rsp_if.dram_rsp_ready),
.I_dram_req_read (icache_dram_req_if.dram_req_read),
.I_dram_req_write (icache_dram_req_if.dram_req_write),
.I_dram_req_addr (icache_dram_req_if.dram_req_addr),
.I_dram_req_data (icache_dram_req_if.dram_req_data),
.I_dram_req_tag (icache_dram_req_if.dram_req_tag),
.I_dram_req_ready (icache_dram_req_if.dram_req_ready),
.I_dram_rsp_valid (icache_dram_rsp_if.dram_rsp_valid),
.I_dram_rsp_data (icache_dram_rsp_if.dram_rsp_data),
.I_dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready),
.I_dram_rsp_tag (icache_dram_rsp_if.dram_rsp_tag),
.llc_snp_req_valid (llc_snp_req_valid),
.llc_snp_req_addr (llc_snp_req_addr),
.llc_snp_req_ready (llc_snp_req_ready),
.ebreak (ebreak)
.D_dram_req_read (per_core_D_dram_req_read [i]),
.D_dram_req_write (per_core_D_dram_req_write [i]),
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
.D_dram_req_data (per_core_D_dram_req_data [i]),
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
.D_dram_req_ready (per_core_D_dram_req_ready [i]),
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
.I_dram_req_read (per_core_I_dram_req_read [i]),
`IGNORE_WARNINGS_BEGIN
.I_dram_req_write (),
`IGNORE_WARNINGS_END
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
.I_dram_req_data (per_core_I_dram_req_data [i]),
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
.I_dram_req_ready (per_core_I_dram_req_ready [i]),
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
.io_valid (per_core_io_valid [i]),
.io_data (per_core_io_data [i]),
.io_ready (io_ready),
.ebreak (per_core_ebreak [i])
);
end
end else begin
// DRAM Dcache Req
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
wire[`NUM_CORES-1:0] per_core_D_dram_req_write;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_req_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag;
// DRAM Dcache Rsp
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready;
// DRAM Icache Req
wire[`NUM_CORES-1:0] per_core_I_dram_req_read;
wire[`NUM_CORES-1:0] per_core_I_dram_req_write;
wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag;
// DRAM Icache Rsp
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
// Out ebreak
wire[`NUM_CORES-1:0] per_core_ebreak;
wire[`NUM_CORES-1:0] per_core_io_valid;
wire[`NUM_CORES-1:0][31:0] per_core_io_data;
wire l2_core_req_ready;
wire snp_fwd_valid;
wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
assign ebreak = (& per_core_ebreak);
genvar i;
for (i = 0; i < `NUM_CORES; i = i + 1) begin
wire [`IDRAM_LINE_WIDTH-1:0] curr_core_D_dram_req_data;
wire [`DDRAM_LINE_WIDTH-1:0] curr_core_I_dram_req_data;
assign io_valid[i] = per_core_io_valid[i];
assign io_data[i] = per_core_io_data[i];
Vortex #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) vortex_core (
.clk (clk),
.reset (reset),
.io_valid (per_core_io_valid [i]),
.io_data (per_core_io_data [i]),
.D_dram_req_read (per_core_D_dram_req_read [i]),
.D_dram_req_write (per_core_D_dram_req_write [i]),
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
.D_dram_req_data (curr_core_D_dram_req_data ),
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
.D_dram_req_ready (l2_core_req_ready ),
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
.I_dram_req_read (per_core_I_dram_req_read [i]),
.I_dram_req_write (per_core_I_dram_req_write [i]),
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
.I_dram_req_data (curr_core_I_dram_req_data ),
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
.I_dram_req_ready (l2_core_req_ready ),
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
.ebreak (per_core_ebreak [i])
);
assign per_core_D_dram_req_data [i] = curr_core_D_dram_req_data;
assign per_core_I_dram_req_data [i] = curr_core_I_dram_req_data;
end
if (`L2_ENABLE) begin
// L2 Cache ///////////////////////////////////////////////////////////
wire[`L2NUM_REQUESTS-1:0] l2_core_req_valid;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_mem_write;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_mem_read;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_write;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_read;
wire[`L2NUM_REQUESTS-1:0][31:0] l2_core_req_addr;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_req_tag;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_req_data;
wire l2_core_req_ready;
wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_valid;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_rsp_data;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag;
wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_ready;
wire[`DDRAM_LINE_WIDTH-1:0] l2_dram_req_data;
wire[`DDRAM_LINE_WIDTH-1:0] l2_dram_rsp_data;
assign dram_req_data = l2_dram_req_data;
assign l2_dram_rsp_data = dram_rsp_data;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
// Core Request
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
assign l2_core_req_valid [i+1] = (per_core_I_dram_req_read[(i/2)] | per_core_I_dram_req_write[(i/2)]);
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
assign l2_core_req_valid [i+1] = per_core_I_dram_req_read[(i/2)];
assign l2_core_req_read [i] = per_core_D_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_mem_write [i] = per_core_D_dram_req_write[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_mem_write [i+1] = `WORD_SEL_NO;
assign l2_core_req_write [i] = per_core_D_dram_req_write[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_write [i+1] = `WORD_SEL_NO;
assign l2_core_req_mem_read [i] = per_core_D_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_mem_read [i+1] = `WORD_SEL_NO;
assign l2_core_req_addr [i] = {per_core_D_dram_req_addr[(i/2)], {`LOG2UP(`DBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_addr [i+1] = {per_core_I_dram_req_addr[(i/2)], {`LOG2UP(`IBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_addr [i] = {per_core_D_dram_req_addr[(i/2)], {`LOG2UP(`DBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_addr [i+1] = {per_core_I_dram_req_addr[(i/2)], {`LOG2UP(`IBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
assign l2_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
assign l2_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
assign l2_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign per_core_D_dram_req_ready[(i/2)] = l2_core_req_ready;
assign per_core_I_dram_req_ready[(i/2)] = l2_core_req_ready;
assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1];
@@ -302,8 +198,8 @@ module Vortex_Cluster #(
// Core request
.core_req_valid (l2_core_req_valid),
.core_req_read (l2_core_req_mem_read),
.core_req_write (l2_core_req_mem_write),
.core_req_read (l2_core_req_read),
.core_req_write (l2_core_req_write),
.core_req_addr (l2_core_req_addr),
.core_req_data (l2_core_req_data),
.core_req_tag (l2_core_req_tag),
@@ -313,20 +209,20 @@ module Vortex_Cluster #(
.core_rsp_valid (l2_core_rsp_valid),
.core_rsp_data (l2_core_rsp_data),
.core_rsp_tag (l2_core_rsp_tag),
.core_rsp_ready (|l2_core_rsp_ready),
.core_rsp_ready (& l2_core_rsp_ready),
// DRAM request
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
.dram_req_addr (dram_req_addr),
.dram_req_data (l2_dram_req_data),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
// L2 Cache DRAM Fill response
// DRAM response
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_data (l2_dram_rsp_data),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_ready (dram_rsp_ready),
// Snoop request
@@ -339,6 +235,95 @@ module Vortex_Cluster #(
.snp_fwd_addr (snp_fwd_addr),
.snp_fwd_ready (& per_core_snp_fwd_ready)
);
end else begin
wire[`L2NUM_REQUESTS-1:0] per_core_req_read;
wire[`L2NUM_REQUESTS-1:0] per_core_req_write;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_req_addr;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_req_tag;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_req_data;
wire[`L2NUM_REQUESTS-1:0] per_core_req_ready;
wire[`L2NUM_REQUESTS-1:0] per_core_rsp_valid;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_rsp_data;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_rsp_tag;
wire[`L2NUM_REQUESTS-1:0] per_core_rsp_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign per_core_req_read [i] = per_core_D_dram_req_read[(i/2)];
assign per_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)];
assign per_core_req_write [i] = per_core_D_dram_req_write[(i/2)];
assign per_core_req_write [i+1] = 0;
assign per_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
assign per_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
assign per_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
assign per_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
assign per_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign per_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign per_core_D_dram_req_ready[(i/2)] = per_core_req_ready[i];
assign per_core_I_dram_req_ready[(i/2)] = per_core_req_ready[i+1];
assign per_core_D_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i+1];
assign per_core_D_dram_rsp_data [(i/2)] = per_core_rsp_data[i];
assign per_core_I_dram_rsp_data [(i/2)] = per_core_rsp_data[i+1];
assign per_core_D_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i];
assign per_core_I_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i+1];
assign per_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
assign per_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
end
VX_dram_arb #(
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
.NUM_REQUESTS (`L2NUM_REQUESTS),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
) dram_arb (
.clk (clk),
.reset (reset),
// Core request
.core_req_read (per_core_req_read),
.core_req_write (per_core_req_write),
.core_req_addr (per_core_req_addr),
.core_req_data (per_core_req_data),
.core_req_tag (per_core_req_tag),
.core_req_ready (per_core_req_ready),
// Core response
.core_rsp_valid (per_core_rsp_valid),
.core_rsp_data (per_core_rsp_data),
.core_rsp_tag (per_core_rsp_tag),
.core_rsp_ready (per_core_rsp_ready),
// DRAM request
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
// DRAM response
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_ready (dram_rsp_ready)
);
// Cache snooping
assign snp_fwd_valid = llc_snp_req_valid;
assign snp_fwd_addr = llc_snp_req_addr;
assign llc_snp_req_ready = & per_core_snp_fwd_ready;
end
endmodule

View File

@@ -6,10 +6,6 @@ module Vortex_Socket (
input wire clk,
input wire reset,
// IO
output wire io_valid[(`NUM_CORES * `NUM_CLUSTERS)-1:0],
output wire[31:0] io_data [(`NUM_CORES * `NUM_CLUSTERS)-1:0],
// DRAM Req
output wire dram_req_read,
output wire dram_req_write,
@@ -24,32 +20,26 @@ module Vortex_Socket (
input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// LLC Snooping
// Cache Snooping
input wire llc_snp_req_valid,
input wire[`L3DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
// I/O
output wire io_valid,
output wire [31:0] io_data,
input wire io_ready,
// Debug
output wire ebreak
);
if (`NUM_CLUSTERS == 1) begin
wire[`NUM_CORES-1:0] cluster_io_valid;
wire[`NUM_CORES-1:0][31:0] cluster_io_data;
genvar i;
for (i = 0; i < `NUM_CORES; i=i+1) begin
assign io_valid [i] = cluster_io_valid [i];
assign io_data [i] = cluster_io_data [i];
end
Vortex_Cluster #(
.CLUSTER_ID(0)
) Vortex_Cluster (
.clk (clk),
.reset (reset),
.io_valid (cluster_io_valid),
.io_data (cluster_io_data),
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
@@ -67,51 +57,46 @@ module Vortex_Socket (
.llc_snp_req_addr (llc_snp_req_addr),
.llc_snp_req_ready (llc_snp_req_ready),
.io_valid (io_valid),
.io_data (io_data),
.io_ready (io_ready),
.ebreak (ebreak)
);
end else begin
wire snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
assign ebreak = (& per_cluster_ebreak);
// // DRAM Dcache Req
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
// DRAM Dcache Req
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data_up;
wire l3_core_req_ready;
// // DRAM Dcache Rsp
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
// DRAM Dcache Rsp
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data_up;
wire snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
wire[`NUM_CLUSTERS-1:0][`NUM_CORES-1:0] per_cluster_io_valid;
wire[`NUM_CLUSTERS-1:0][`NUM_CORES-1:0][31:0] per_cluster_io_data;
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CLUSTERS-1:0] per_cluster_io_valid;
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_data;
`IGNORE_WARNINGS_END
genvar i, j;
for (i = 0; i < `NUM_CLUSTERS; i = i + 1) begin
for (j = 0; j < `NUM_CORES; j = j + 1) begin
assign io_valid[j+(i*`NUM_CORES)] = per_cluster_io_valid[i][j];
assign io_data[j+(i*`NUM_CORES)] = per_cluster_io_data[i][j];
end
assign per_cluster_dram_req_data[i] = per_cluster_dram_req_data_up[i];
assign per_cluster_dram_rsp_data_up[i] = per_cluster_dram_rsp_data[i];
end
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
assign io_valid = per_cluster_io_valid[0];
assign io_data = per_cluster_io_data[0];
assign ebreak = (& per_cluster_ebreak);
genvar i;
for (i = 0; i < `NUM_CLUSTERS; i=i+1) begin
Vortex_Cluster #(
.CLUSTER_ID(i)
@@ -119,34 +104,35 @@ module Vortex_Socket (
.clk (clk),
.reset (reset),
.io_valid (per_cluster_io_valid [i]),
.io_data (per_cluster_io_data [i]),
.dram_req_write (per_cluster_dram_req_write [i]),
.dram_req_read (per_cluster_dram_req_read [i]),
.dram_req_addr (per_cluster_dram_req_addr [i]),
.dram_req_data (per_cluster_dram_req_data_up [i]),
.dram_req_tag (per_cluster_dram_req_tag [i]),
.dram_req_write (per_cluster_dram_req_write [i]),
.dram_req_read (per_cluster_dram_req_read [i]),
.dram_req_addr (per_cluster_dram_req_addr [i]),
.dram_req_data (per_cluster_dram_req_data [i]),
.dram_req_tag (per_cluster_dram_req_tag [i]),
.dram_req_ready (l3_core_req_ready),
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
.dram_rsp_data (per_cluster_dram_rsp_data_up [i]),
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
.dram_rsp_data (per_cluster_dram_rsp_data [i]),
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_cluster_snp_fwd_ready [i]),
.llc_snp_req_ready (per_cluster_snp_fwd_ready [i]),
.ebreak (per_cluster_ebreak [i])
.io_valid (per_cluster_io_valid [i]),
.io_data (per_cluster_io_data [i]),
.io_ready (io_ready),
.ebreak (per_cluster_ebreak [i])
);
end
// L3 Cache ///////////////////////////////////////////////////////////
wire[`L3NUM_REQUESTS-1:0] l3_core_req_valid;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_mem_write;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_mem_read;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_read;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_write;
wire[`L3NUM_REQUESTS-1:0][31:0] l3_core_req_addr;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
@@ -156,23 +142,17 @@ module Vortex_Socket (
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
wire[`L3NUM_REQUESTS-1:0] l3_core_rsp_ready;
wire[`L3DRAM_LINE_WIDTH-1:0] l3_dram_req_data;
wire[`L3DRAM_LINE_WIDTH-1:0] l3_dram_rsp_data;
assign dram_req_data = l3_dram_req_data;
assign l3_dram_rsp_data = dram_rsp_data;
for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin
// Core Request
assign l3_core_req_valid [i] = (per_cluster_dram_req_read[i] | per_cluster_dram_req_write[i]);
assign l3_core_req_mem_read [i] = per_cluster_dram_req_read [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_mem_write [i] = per_cluster_dram_req_write[i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}};
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]);
assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_write [i] = per_cluster_dram_req_write [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}};
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
// Core can't accept Response
assign l3_core_rsp_ready [i] = per_cluster_dram_rsp_ready[i];
// Core Response
assign l3_core_rsp_ready [i] = per_cluster_dram_rsp_ready[i];
// Cache Fill Response
assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i];
@@ -208,8 +188,8 @@ module Vortex_Socket (
// Core request
.core_req_valid (l3_core_req_valid),
.core_req_read (l3_core_req_mem_read),
.core_req_write (l3_core_req_mem_write),
.core_req_read (l3_core_req_read),
.core_req_write (l3_core_req_write),
.core_req_addr (l3_core_req_addr),
.core_req_data (l3_core_req_data),
.core_req_tag (l3_core_req_tag),
@@ -219,19 +199,19 @@ module Vortex_Socket (
.core_rsp_valid (l3_core_rsp_valid),
.core_rsp_data (l3_core_rsp_data),
.core_rsp_tag (l3_core_rsp_tag),
.core_rsp_ready (|l3_core_rsp_ready),
.core_rsp_ready (& l3_core_rsp_ready),
// DRAM request
.dram_req_write (dram_req_write),
.dram_req_read (dram_req_read),
.dram_req_addr (dram_req_addr),
.dram_req_data (l3_dram_req_data),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
// DRAM response
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_data (l3_dram_rsp_data),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_ready (dram_rsp_ready),

View File

@@ -85,7 +85,7 @@ module VX_cache_core_rsp_merge #(
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];

View File

@@ -98,19 +98,13 @@ void Simulator::dbus_driver() {
vortex_->dram_req_ready = ~dram_stalled_;
}
void Simulator::io_handler() {
bool io_valid = false;
for (int c = 0; c < NUM_CORES; c++) {
if (vortex_->io_valid[c]) {
uint32_t data_write = (uint32_t)vortex_->io_data[c];
char c = (char)data_write;
std::cerr << c;
io_valid = true;
}
}
if (io_valid) {
std::cout << std::flush;
void Simulator::io_driver() {
if (vortex_->io_valid) {
uint32_t data_write = (uint32_t)vortex_->io_data;
char c = (char)data_write;
std::cerr << c;
}
vortex_->io_ready = true;
}
void Simulator::reset() {
@@ -128,7 +122,7 @@ void Simulator::step() {
this->eval();
dbus_driver();
io_handler();
io_driver();
}
void Simulator::eval() {
@@ -149,7 +143,9 @@ bool Simulator::is_busy() {
return (0 == vortex_->ebreak);
}
void Simulator::send_snoops(uint32_t mem_addr, uint32_t size) {
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
// send snoop requests to the caches
printf("[sim] total cycles: %ld\n", time_stamp/2);
// align address to LLC block boundaries
auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE;
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
@@ -169,12 +165,6 @@ void Simulator::send_snoops(uint32_t mem_addr, uint32_t size) {
vortex_->llc_snp_req_valid = true;
}
}
}
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
// send snoop requests to the caches
printf("[sim] total cycles: %ld\n", time_stamp/2);
this->send_snoops(mem_addr, size);
this->wait(PIPELINE_FLUSH_LATENCY);
}
@@ -192,12 +182,12 @@ bool Simulator::run() {
// check riscv-tests PASSED/FAILED status
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#else
#if (NUM_CLUSTERS == 1)
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk2__DOT__genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#else
int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk2__BRA__0__KET____DOT__Vortex_Cluster->genblk2__DOT__genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk1__BRA__0__KET____DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#endif
#endif

View File

@@ -35,18 +35,16 @@ public:
bool is_busy();
void reset();
void step();
void wait(uint32_t cycles);
void flush_caches(uint32_t mem_addr, uint32_t size);
bool run();
bool run();
void print_stats(std::ostream& out);
private:
void eval();
void wait(uint32_t cycles);
void eval();
void dbus_driver();
void io_handler();
void send_snoops(uint32_t mem_addr, uint32_t size);
void io_driver();
bool dram_stalled_;
std::vector<dram_req_t> dram_req_vec_;