RTL code refactoring
This commit is contained in:
@@ -23,6 +23,8 @@ SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
|
||||
|
||||
RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/generic_cache -I../../hw/rtl/shared_memory -I../../hw/rtl/pipe_regs -I../../hw/rtl/compat
|
||||
|
||||
VL_FLAGS += --assert -Wall -Wpedantic
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
@@ -10,6 +10,8 @@ EXE += --exe ./simulate/testbench.cpp ./simulate/simulator.cpp
|
||||
|
||||
VF += -compiler gcc --language 1800-2009
|
||||
|
||||
VF += --assert -Wall -Wpedantic
|
||||
|
||||
# LIB=-LDFLAGS '-L/usr/local/systemc/'
|
||||
LIB +=
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,3 @@
|
||||
|
||||
`include "../VX_define.vh"
|
||||
|
||||
//`define NUM_BANKS 8
|
||||
@@ -118,7 +117,7 @@ reg[31:0] io_data;
|
||||
.i_m_readdata_i (i_m_readdata_i),
|
||||
.i_m_ready_i (i_m_ready_i),
|
||||
.out_ebreak (out_ebreak)
|
||||
);
|
||||
);
|
||||
|
||||
always @(negedge clk) begin
|
||||
ibus_driver(clk, o_m_read_addr_i, o_m_evict_addr_i, o_m_valid_i, o_m_writedata_i, o_m_read_or_write_i, `ICACHE_BANKS, `ICACHE_NUM_WORDS_PER_BLOCK, i_m_readdata_i, i_m_ready_i);
|
||||
@@ -138,14 +137,13 @@ reg[31:0] io_data;
|
||||
cycle_num = cycle_num + 1;
|
||||
end
|
||||
|
||||
always @(clk, posedge reset) begin
|
||||
always @(clk) begin
|
||||
if (reset) begin
|
||||
reset = 0;
|
||||
clk = 0;
|
||||
end
|
||||
|
||||
#5 clk <= ~clk;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -92,7 +92,7 @@ vortex_afu.json
|
||||
../rtl/interfaces/VX_gpu_dcache_dram_req_inter.v
|
||||
../rtl/interfaces/VX_csr_req_inter.v
|
||||
../rtl/interfaces/VX_icache_request_inter.v
|
||||
../rtl/interfaces/VX_gpu_dcache_res_inter.v
|
||||
../rtl/interfaces/VX_gpu_dcache_rsp_inter.v
|
||||
../rtl/interfaces/VX_frE_to_bckE_req_inter.v
|
||||
../rtl/interfaces/VX_dram_req_rsp_inter.v
|
||||
../rtl/interfaces/VX_dcache_request_inter.v
|
||||
@@ -113,7 +113,7 @@ vortex_afu.json
|
||||
../rtl/interfaces/VX_jal_response_inter.v
|
||||
../rtl/interfaces/VX_warp_ctl_inter.v
|
||||
../rtl/interfaces/VX_gpu_dcache_snp_req_inter.v
|
||||
../rtl/interfaces/VX_gpu_dcache_dram_res_inter.v
|
||||
../rtl/interfaces/VX_gpu_dcache_dram_rsp_inter.v
|
||||
../rtl/interfaces/VX_inst_mem_wb_inter.v
|
||||
|
||||
ccip_interface_reg.sv
|
||||
|
||||
@@ -70,16 +70,16 @@ logic vx_dram_req_read;
|
||||
logic vx_dram_req_write;
|
||||
logic [31:0] vx_dram_req_addr;
|
||||
logic [31:0] vx_dram_req_data[15:0];
|
||||
logic vx_dram_req_delay;
|
||||
logic vx_dram_req_full;
|
||||
|
||||
logic vx_dram_fill_accept;
|
||||
logic vx_dram_fill_rsp;
|
||||
logic [31:0] vx_dram_fill_rsp_addr;
|
||||
logic [31:0] vx_dram_fill_rsp_data[15:0];
|
||||
logic vx_dram_rsp_ready;
|
||||
logic vx_dram_rsp_valid;
|
||||
logic [31:0] vx_dram_rsp_addr;
|
||||
logic [31:0] vx_dram_rsp_data[15:0];
|
||||
|
||||
logic vx_snp_req;
|
||||
logic [31:0] vx_snp_req_addr;
|
||||
logic vx_snp_req_delay;
|
||||
logic vx_snp_req_full;
|
||||
|
||||
logic vx_ebreak;
|
||||
|
||||
@@ -316,7 +316,7 @@ begin
|
||||
|
||||
STATE_RUN, STATE_CLFLUSH: begin
|
||||
if (vx_dram_req_read
|
||||
&& !vx_dram_req_delay)
|
||||
&& !vx_dram_req_full)
|
||||
begin
|
||||
avs_address <= (vx_dram_req_addr >> 6);
|
||||
avs_read <= 1;
|
||||
@@ -324,7 +324,7 @@ begin
|
||||
end
|
||||
|
||||
if (vx_dram_req_write
|
||||
&& !vx_dram_req_delay)
|
||||
&& !vx_dram_req_full)
|
||||
begin
|
||||
avs_writedata <= {>>{vx_dram_req_data}};
|
||||
avs_address <= (vx_dram_req_addr >> 6);
|
||||
@@ -348,16 +348,16 @@ logic vortex_enabled;
|
||||
always_comb
|
||||
begin
|
||||
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||
vx_dram_req_delay = !vortex_enabled || avs_waitrequest || avs_raq_full || avs_rdq_full;
|
||||
vx_dram_req_full = !vortex_enabled || avs_waitrequest || avs_raq_full || avs_rdq_full;
|
||||
end
|
||||
|
||||
// Vortex DRAM fill response
|
||||
|
||||
always_comb
|
||||
begin
|
||||
vx_dram_fill_rsp = vortex_enabled && !avs_rdq_empty && vx_dram_fill_accept;
|
||||
vx_dram_fill_rsp_addr = (avs_raq_dout << 6);
|
||||
{>>{vx_dram_fill_rsp_data}} = avs_rdq_dout;
|
||||
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready;
|
||||
vx_dram_rsp_addr = (avs_raq_dout << 6);
|
||||
{>>{vx_dram_rsp_data}} = avs_rdq_dout;
|
||||
end
|
||||
|
||||
// AVS address read request queue /////////////////////////////////////////////
|
||||
@@ -366,7 +366,7 @@ logic cci_write_req;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
avs_raq_pop = vx_dram_fill_rsp || cci_write_req;
|
||||
avs_raq_pop = vx_dram_rsp_valid || cci_write_req;
|
||||
avs_raq_din = avs_address;
|
||||
avs_raq_push = avs_read;
|
||||
end
|
||||
@@ -531,7 +531,7 @@ begin
|
||||
|
||||
if ((STATE_CLFLUSH == state)
|
||||
&& vx_snoop_ctr < csr_data_size
|
||||
&& !vx_snp_req_delay)
|
||||
&& !vx_snp_req_full)
|
||||
begin
|
||||
vx_snp_req_addr <= (csr_mem_addr + vx_snoop_ctr) << 6;
|
||||
vx_snp_req <= 1;
|
||||
@@ -548,29 +548,29 @@ end
|
||||
// Vortex binding /////////////////////////////////////////////////////////////
|
||||
|
||||
Vortex_Socket #() vx_socket (
|
||||
.clk (clk),
|
||||
.reset (SoftReset || vx_reset),
|
||||
.clk (clk),
|
||||
.reset (SoftReset || vx_reset),
|
||||
|
||||
// DRAM Req
|
||||
.out_dram_req_write (vx_dram_req_write),
|
||||
.out_dram_req_read (vx_dram_req_read),
|
||||
.out_dram_req_addr (vx_dram_req_addr),
|
||||
.out_dram_req_data (vx_dram_req_data),
|
||||
.out_dram_req_delay (vx_dram_req_delay),
|
||||
.dram_req_write (vx_dram_req_write),
|
||||
.dram_req_read (vx_dram_req_read),
|
||||
.dram_req_addr (vx_dram_req_addr),
|
||||
.dram_req_data (vx_dram_req_data),
|
||||
.dram_req_full (vx_dram_req_full),
|
||||
|
||||
// DRAM Rsp
|
||||
.out_dram_fill_accept (vx_dram_fill_accept),
|
||||
.out_dram_fill_rsp (vx_dram_fill_rsp),
|
||||
.out_dram_fill_rsp_addr (vx_dram_fill_rsp_addr),
|
||||
.out_dram_fill_rsp_data (vx_dram_fill_rsp_data),
|
||||
.out_dram_rsp_ready (vx_dram_rsp_ready),
|
||||
.dram_rsp_valid (vx_dram_rsp_valid),
|
||||
.out_dram_rsp_addr (vx_dram_rsp_addr),
|
||||
.out_dram_rsp_data (vx_dram_rsp_data),
|
||||
|
||||
// Cache Snooping Req
|
||||
.llc_snp_req (vx_snp_req),
|
||||
.llc_snp_req_addr (vx_snp_req_addr),
|
||||
.llc_snp_req_delay (vx_snp_req_delay),
|
||||
.llc_snp_req_valid (vx_snp_req),
|
||||
.llc_snp_req_addr (vx_snp_req_addr),
|
||||
.llc_snp_req_full (vx_snp_req_full),
|
||||
|
||||
// program exit signal
|
||||
.out_ebreak (vx_ebreak)
|
||||
.out_ebreak (vx_ebreak)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
onerror {resume}
|
||||
quietly WaveActivateNextPane {} 0
|
||||
add wave -noupdate -label clk /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/clk
|
||||
add wave -noupdate -label reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/SoftReset
|
||||
add wave -noupdate -label state /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/state
|
||||
add wave -noupdate -label cci_write_pending /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_pending
|
||||
add wave -noupdate -label cci_write_ctr -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_ctr
|
||||
add wave -noupdate -label csr_data_size -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/csr_data_size
|
||||
add wave -noupdate -label avs_read_ctr -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_read_ctr
|
||||
add wave -noupdate -label avs_waitrequest /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_waitrequest
|
||||
add wave -noupdate -label avs_address -radix hexadecimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_address
|
||||
add wave -noupdate -label avs_readdata -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_readdata
|
||||
add wave -noupdate -label avs_writedata -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_writedata
|
||||
add wave -noupdate -label avs_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_write
|
||||
add wave -noupdate -label avs_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_read
|
||||
add wave -noupdate -label avs_readdatavalid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_readdatavalid
|
||||
add wave -noupdate -label sRx.c0.rspValid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cp2af_sRxPort.c0.rspValid
|
||||
add wave -noupdate -label sRx.c1.rspValid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cp2af_sRxPort.c1.rspValid
|
||||
add wave -noupdate -label sTx.c0.valid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/af2cp_sTxPort.c0.valid
|
||||
add wave -noupdate -label sTx.c1.valid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/af2cp_sTxPort.c1.valid
|
||||
add wave -noupdate -label cci_write_req /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_req
|
||||
add wave -noupdate -label avs_raq_push /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_push
|
||||
add wave -noupdate -label avs_rdq_push /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_push
|
||||
add wave -noupdate -label avs_raq_pop /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_pop
|
||||
add wave -noupdate -label avs_rdq_pop /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_pop
|
||||
add wave -noupdate -label avs_raq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_full
|
||||
add wave -noupdate -label avs_rdq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_full
|
||||
add wave -noupdate -label avs_raq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_empty
|
||||
add wave -noupdate -label avs_rdq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_empty
|
||||
add wave -noupdate -label vortex_enabled /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vortex_enabled
|
||||
add wave -noupdate -label vx_reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/reset
|
||||
add wave -noupdate -label vx_dram_req_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_read
|
||||
add wave -noupdate -label vx_dram_req_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_write
|
||||
add wave -noupdate -label vx_dram_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_delay
|
||||
add wave -noupdate -label vx_dram_req_addr -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_addr
|
||||
add wave -noupdate -label vx_draw_req_data -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_data
|
||||
add wave -noupdate -label out_dram_fill_rsp /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_rsp
|
||||
add wave -noupdate -label out_dram_fill_accept /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_accept
|
||||
add wave -noupdate -label vx_draw_fill_rsp_data -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_fill_rsp_data
|
||||
add wave -noupdate -label vx_dram_fill_rsp_addr -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_fill_rsp_addr
|
||||
add wave -noupdate -label llc_snp_req /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req
|
||||
add wave -noupdate -label llc_snp_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req_delay
|
||||
add wave -noupdate -label out_break /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_ebreak
|
||||
add wave -noupdate -label warp_pc -radix hexadecimal -radixshowbase 0 {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_pc}
|
||||
add wave -noupdate -label scheduled_warp {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/scheduled_warp}
|
||||
add wave -noupdate -label thread_mask {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/thread_mask}
|
||||
add wave -noupdate -label warp_num {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_num}
|
||||
add wave -noupdate -label warp_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_active}
|
||||
add wave -noupdate -label warp_stalled {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_stalled}
|
||||
add wave -noupdate -label warp_lock {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_lock}
|
||||
add wave -noupdate -label use_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/use_active}
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 2} {360293 ps} 0}
|
||||
quietly wave cursor active 1
|
||||
configure wave -namecolwidth 195
|
||||
configure wave -valuecolwidth 100
|
||||
configure wave -justifyvalue left
|
||||
configure wave -signalnamewidth 0
|
||||
configure wave -snapdistance 10
|
||||
configure wave -datasetprefix 0
|
||||
configure wave -rowmargin 4
|
||||
configure wave -childrowmargin 2
|
||||
configure wave -gridoffset 0
|
||||
configure wave -gridperiod 1
|
||||
configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ps
|
||||
update
|
||||
WaveRestoreZoom {346453 ps} {711141 ps}
|
||||
356
hw/rtl/VX_alu.v
356
hw/rtl/VX_alu.v
@@ -14,196 +14,198 @@ module VX_alu(
|
||||
output reg out_alu_stall
|
||||
);
|
||||
|
||||
localparam div_pipeline_len = 20;
|
||||
localparam mul_pipeline_len = 8;
|
||||
localparam div_pipeline_len = 20;
|
||||
localparam mul_pipeline_len = 8;
|
||||
|
||||
wire[31:0] unsigned_div_result;
|
||||
wire[31:0] unsigned_rem_result;
|
||||
wire[31:0] signed_div_result;
|
||||
wire[31:0] signed_rem_result;
|
||||
wire[31:0] unsigned_div_result;
|
||||
wire[31:0] unsigned_rem_result;
|
||||
wire[31:0] signed_div_result;
|
||||
wire[31:0] signed_rem_result;
|
||||
|
||||
wire[63:0] mul_data_a, mul_data_b;
|
||||
wire[63:0] mul_result;
|
||||
wire[63:0] mul_data_a, mul_data_b;
|
||||
wire[63:0] mul_result;
|
||||
|
||||
wire[31:0] ALU_in1;
|
||||
wire[31:0] ALU_in2;
|
||||
wire[31:0] ALU_in1;
|
||||
wire[31:0] ALU_in2;
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.SPEED("HIGHEST"),
|
||||
.PIPELINE(div_pipeline_len)
|
||||
) unsigned_div (
|
||||
.clock(clk),
|
||||
.aclr(1'b0),
|
||||
.clken(1'b1), // TODO this could be disabled on inactive instructions
|
||||
.numer(ALU_in1),
|
||||
.denom(ALU_in2),
|
||||
.quotient(unsigned_div_result),
|
||||
.remainder(unsigned_rem_result)
|
||||
);
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.SPEED("HIGHEST"),
|
||||
.PIPELINE(div_pipeline_len)
|
||||
) unsigned_div (
|
||||
.clock(clk),
|
||||
.aclr(1'b0),
|
||||
.clken(1'b1), // TODO this could be disabled on inactive instructions
|
||||
.numer(ALU_in1),
|
||||
.denom(ALU_in2),
|
||||
.quotient(unsigned_div_result),
|
||||
.remainder(unsigned_rem_result)
|
||||
);
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.NREP("SIGNED"),
|
||||
.DREP("SIGNED"),
|
||||
.SPEED("HIGHEST"),
|
||||
.PIPELINE(div_pipeline_len)
|
||||
) signed_div (
|
||||
.clock(clk),
|
||||
.aclr(1'b0),
|
||||
.clken(1'b1), // TODO this could be disabled on inactive instructions
|
||||
.numer(ALU_in1),
|
||||
.denom(ALU_in2),
|
||||
.quotient(signed_div_result),
|
||||
.remainder(signed_rem_result)
|
||||
);
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.NREP("SIGNED"),
|
||||
.DREP("SIGNED"),
|
||||
.SPEED("HIGHEST"),
|
||||
.PIPELINE(div_pipeline_len)
|
||||
) signed_div (
|
||||
.clock(clk),
|
||||
.aclr(1'b0),
|
||||
.clken(1'b1), // TODO this could be disabled on inactive instructions
|
||||
.numer(ALU_in1),
|
||||
.denom(ALU_in2),
|
||||
.quotient(signed_div_result),
|
||||
.remainder(signed_rem_result)
|
||||
);
|
||||
|
||||
VX_mult #(
|
||||
.WIDTHA(64),
|
||||
.WIDTHB(64),
|
||||
.WIDTHP(64),
|
||||
.SPEED("HIGHEST"),
|
||||
.FORCE_LE("YES"),
|
||||
.PIPELINE(mul_pipeline_len)
|
||||
) multiplier (
|
||||
.clock(clk),
|
||||
.aclr(1'b0),
|
||||
.clken(1'b1), // TODO this could be disabled on inactive instructions
|
||||
.dataa(mul_data_a),
|
||||
.datab(mul_data_b),
|
||||
.result(mul_result)
|
||||
);
|
||||
VX_mult #(
|
||||
.WIDTHA(64),
|
||||
.WIDTHB(64),
|
||||
.WIDTHP(64),
|
||||
.SPEED("HIGHEST"),
|
||||
.FORCE_LE("YES"),
|
||||
.PIPELINE(mul_pipeline_len)
|
||||
) multiplier (
|
||||
.clock(clk),
|
||||
.aclr(1'b0),
|
||||
.clken(1'b1), // TODO this could be disabled on inactive instructions
|
||||
.dataa(mul_data_a),
|
||||
.datab(mul_data_b),
|
||||
.result(mul_result)
|
||||
);
|
||||
|
||||
// MUL, MULH (signed*signed), MULHSU (signed*unsigned), MULHU (unsigned*unsigned)
|
||||
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
|
||||
wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2};
|
||||
assign mul_data_a = (in_alu_op == `MULHU) ? {32'b0, ALU_in1} : alu_in1_signed;
|
||||
assign mul_data_b = (in_alu_op == `MULHU || in_alu_op == `MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed;
|
||||
// MUL, MULH (signed*signed), MULHSU (signed*unsigned), MULHU (unsigned*unsigned)
|
||||
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
|
||||
wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2};
|
||||
assign mul_data_a = (in_alu_op == `MULHU) ? {32'b0, ALU_in1} : alu_in1_signed;
|
||||
assign mul_data_b = (in_alu_op == `MULHU || in_alu_op == `MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed;
|
||||
|
||||
|
||||
reg [15:0] curr_inst_delay;
|
||||
reg [15:0] inst_delay;
|
||||
reg inst_was_stalling;
|
||||
reg [15:0] curr_inst_delay;
|
||||
reg [15:0] inst_delay;
|
||||
reg inst_was_stalling;
|
||||
|
||||
wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0;
|
||||
assign out_alu_stall = inst_delay_stall;
|
||||
wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0;
|
||||
assign out_alu_stall = inst_delay_stall;
|
||||
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`DIV,
|
||||
`DIVU,
|
||||
`REM,
|
||||
`REMU: curr_inst_delay = div_pipeline_len;
|
||||
`MUL,
|
||||
`MULH,
|
||||
`MULHSU,
|
||||
`MULHU: curr_inst_delay = mul_pipeline_len;
|
||||
default: curr_inst_delay = 0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`DIV,
|
||||
`DIVU,
|
||||
`REM,
|
||||
`REMU: curr_inst_delay = div_pipeline_len;
|
||||
`MUL,
|
||||
`MULH,
|
||||
`MULHSU,
|
||||
`MULHU: curr_inst_delay = mul_pipeline_len;
|
||||
default: curr_inst_delay = 0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
|
||||
always @(posedge clk or posedge reset) begin
|
||||
if (reset) begin
|
||||
inst_delay <= 0;
|
||||
inst_was_stalling <= 0;
|
||||
end
|
||||
else if (inst_delay_stall) begin
|
||||
if (inst_was_stalling) begin
|
||||
if (inst_delay > 0)
|
||||
inst_delay <= inst_delay - 1;
|
||||
end
|
||||
else begin
|
||||
inst_was_stalling <= 1;
|
||||
inst_delay <= curr_inst_delay - 1;
|
||||
end
|
||||
end
|
||||
else begin
|
||||
inst_was_stalling <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef SYN_FUNC
|
||||
wire which_in2;
|
||||
wire[31:0] upper_immed;
|
||||
|
||||
assign which_in2 = in_rs2_src == `RS2_IMMED;
|
||||
|
||||
assign ALU_in1 = in_1;
|
||||
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
|
||||
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
|
||||
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
|
||||
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
|
||||
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
|
||||
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
|
||||
`OR: out_alu_result = ALU_in1 | ALU_in2;
|
||||
`AND: out_alu_result = ALU_in2 & ALU_in1;
|
||||
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
|
||||
`LUI_ALU: out_alu_result = upper_immed;
|
||||
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
|
||||
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
|
||||
`MUL: out_alu_result = mul_result[31:0];
|
||||
`MULH: out_alu_result = mul_result[63:32];
|
||||
`MULHSU: out_alu_result = mul_result[63:32];
|
||||
`MULHU: out_alu_result = mul_result[63:32];
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
|
||||
default: out_alu_result = 32'h0;
|
||||
endcase // in_alu_op
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
inst_delay <= 0;
|
||||
inst_was_stalling <= 0;
|
||||
end
|
||||
|
||||
`else
|
||||
wire which_in2;
|
||||
wire[31:0] upper_immed;
|
||||
|
||||
|
||||
assign which_in2 = in_rs2_src == `RS2_IMMED;
|
||||
|
||||
assign ALU_in1 = in_1;
|
||||
|
||||
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
|
||||
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
|
||||
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
|
||||
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
|
||||
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
|
||||
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
|
||||
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
|
||||
`OR: out_alu_result = ALU_in1 | ALU_in2;
|
||||
`AND: out_alu_result = ALU_in2 & ALU_in1;
|
||||
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
|
||||
`LUI_ALU: out_alu_result = upper_immed;
|
||||
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
|
||||
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
|
||||
`MUL: out_alu_result = mul_result[31:0];
|
||||
`MULH: out_alu_result = mul_result[63:32];
|
||||
`MULHSU: out_alu_result = mul_result[63:32];
|
||||
`MULHU: out_alu_result = mul_result[63:32];
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
|
||||
default: out_alu_result = 32'h0;
|
||||
endcase // in_alu_op
|
||||
else if (inst_delay_stall) begin
|
||||
if (inst_was_stalling) begin
|
||||
if (inst_delay > 0)
|
||||
inst_delay <= inst_delay - 1;
|
||||
end
|
||||
else begin
|
||||
inst_was_stalling <= 1;
|
||||
inst_delay <= curr_inst_delay - 1;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
else begin
|
||||
inst_was_stalling <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef SYN_FUNC
|
||||
wire which_in2;
|
||||
wire[31:0] upper_immed;
|
||||
|
||||
assign which_in2 = in_rs2_src == `RS2_IMMED;
|
||||
|
||||
assign ALU_in1 = in_1;
|
||||
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
|
||||
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
|
||||
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
|
||||
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
|
||||
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
|
||||
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
|
||||
`OR: out_alu_result = ALU_in1 | ALU_in2;
|
||||
`AND: out_alu_result = ALU_in2 & ALU_in1;
|
||||
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
|
||||
`LUI_ALU: out_alu_result = upper_immed;
|
||||
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
|
||||
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
|
||||
`MUL: out_alu_result = mul_result[31:0];
|
||||
`MULH: out_alu_result = mul_result[63:32];
|
||||
`MULHSU: out_alu_result = mul_result[63:32];
|
||||
`MULHU: out_alu_result = mul_result[63:32];
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
|
||||
default: out_alu_result = 32'h0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
|
||||
`else
|
||||
|
||||
wire which_in2;
|
||||
wire[31:0] upper_immed;
|
||||
|
||||
|
||||
assign which_in2 = in_rs2_src == `RS2_IMMED;
|
||||
|
||||
assign ALU_in1 = in_1;
|
||||
|
||||
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
|
||||
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
|
||||
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
|
||||
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
|
||||
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
|
||||
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
|
||||
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
|
||||
`OR: out_alu_result = ALU_in1 | ALU_in2;
|
||||
`AND: out_alu_result = ALU_in2 & ALU_in1;
|
||||
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
|
||||
`LUI_ALU: out_alu_result = upper_immed;
|
||||
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
|
||||
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
|
||||
`MUL: out_alu_result = mul_result[31:0];
|
||||
`MULH: out_alu_result = mul_result[63:32];
|
||||
`MULHSU: out_alu_result = mul_result[63:32];
|
||||
`MULHU: out_alu_result = mul_result[63:32];
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
|
||||
default: out_alu_result = 32'h0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
endmodule : VX_alu
|
||||
@@ -9,71 +9,63 @@ module VX_back_end
|
||||
input wire reset,
|
||||
input wire schedule_delay,
|
||||
|
||||
VX_gpu_dcache_res_inter VX_dcache_rsp,
|
||||
VX_gpu_dcache_req_inter VX_dcache_req,
|
||||
VX_gpu_dcache_rsp_inter vx_dcache_rsp,
|
||||
VX_gpu_dcache_req_inter vx_dcache_req,
|
||||
|
||||
output wire out_mem_delay,
|
||||
output wire out_exec_delay,
|
||||
output wire gpr_stage_delay,
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
VX_jal_response_inter vx_jal_rsp,
|
||||
VX_branch_response_inter vx_branch_rsp,
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
VX_frE_to_bckE_req_inter vx_bckE_req,
|
||||
VX_wb_inter vx_writeback_inter,
|
||||
|
||||
VX_warp_ctl_inter VX_warp_ctl
|
||||
VX_warp_ctl_inter vx_warp_ctl
|
||||
);
|
||||
|
||||
|
||||
VX_wb_inter VX_writeback_temp();
|
||||
assign VX_writeback_inter.wb = VX_writeback_temp.wb;
|
||||
assign VX_writeback_inter.rd = VX_writeback_temp.rd;
|
||||
assign VX_writeback_inter.write_data = VX_writeback_temp.write_data;
|
||||
assign VX_writeback_inter.wb_valid = VX_writeback_temp.wb_valid;
|
||||
assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num;
|
||||
assign VX_writeback_inter.wb_pc = VX_writeback_temp.wb_pc;
|
||||
VX_wb_inter vx_writeback_temp();
|
||||
assign vx_writeback_inter.wb = vx_writeback_temp.wb;
|
||||
assign vx_writeback_inter.rd = vx_writeback_temp.rd;
|
||||
assign vx_writeback_inter.write_data = vx_writeback_temp.write_data;
|
||||
assign vx_writeback_inter.wb_valid = vx_writeback_temp.wb_valid;
|
||||
assign vx_writeback_inter.wb_warp_num = vx_writeback_temp.wb_warp_num;
|
||||
assign vx_writeback_inter.wb_pc = vx_writeback_temp.wb_pc;
|
||||
|
||||
// assign VX_writeback_inter(VX_writeback_temp);
|
||||
// assign VX_writeback_inter(vx_writeback_temp);
|
||||
|
||||
VX_mw_wb_inter VX_mw_wb();
|
||||
wire no_slot_mem;
|
||||
wire no_slot_exec;
|
||||
|
||||
|
||||
VX_mem_req_inter VX_exe_mem_req();
|
||||
VX_mem_req_inter VX_mem_req();
|
||||
|
||||
|
||||
|
||||
// LSU input + output
|
||||
VX_lsu_req_inter VX_lsu_req();
|
||||
VX_inst_mem_wb_inter VX_mem_wb();
|
||||
VX_lsu_req_inter vx_lsu_req();
|
||||
VX_inst_mem_wb_inter vx_mem_wb();
|
||||
|
||||
// Exec unit input + output
|
||||
VX_exec_unit_req_inter VX_exec_unit_req();
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb();
|
||||
|
||||
VX_exec_unit_req_inter vx_exec_unit_req();
|
||||
VX_inst_exec_wb_inter vx_inst_exec_wb();
|
||||
|
||||
// GPU unit input
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req();
|
||||
VX_gpu_inst_req_inter vx_gpu_inst_req();
|
||||
|
||||
// CSR unit inputs
|
||||
VX_csr_req_inter VX_csr_req();
|
||||
VX_csr_wb_inter VX_csr_wb();
|
||||
VX_csr_req_inter vx_csr_req();
|
||||
VX_csr_wb_inter vx_csr_wb();
|
||||
wire no_slot_csr;
|
||||
wire stall_gpr_csr;
|
||||
|
||||
VX_gpr_stage VX_gpr_stage(
|
||||
VX_gpr_stage vx_gpr_stage(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.VX_writeback_inter(VX_writeback_temp),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.vx_writeback_inter(vx_writeback_temp),
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
// New
|
||||
.VX_exec_unit_req(VX_exec_unit_req),
|
||||
.VX_lsu_req (VX_lsu_req),
|
||||
.VX_gpu_inst_req (VX_gpu_inst_req),
|
||||
.VX_csr_req (VX_csr_req),
|
||||
.vx_exec_unit_req(vx_exec_unit_req),
|
||||
.vx_lsu_req (vx_lsu_req),
|
||||
.vx_gpu_inst_req (vx_gpu_inst_req),
|
||||
.vx_csr_req (vx_csr_req),
|
||||
.stall_gpr_csr (stall_gpr_csr),
|
||||
// End new
|
||||
.memory_delay (out_mem_delay),
|
||||
@@ -81,62 +73,61 @@ VX_gpr_stage VX_gpr_stage(
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
|
||||
VX_lsu load_store_unit(
|
||||
VX_lsu load_store_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_lsu_req (VX_lsu_req),
|
||||
.VX_mem_wb (VX_mem_wb),
|
||||
.VX_dcache_rsp(VX_dcache_rsp),
|
||||
.VX_dcache_req(VX_dcache_req),
|
||||
.vx_lsu_req (vx_lsu_req),
|
||||
.vx_mem_wb (vx_mem_wb),
|
||||
.vx_dcache_rsp(vx_dcache_rsp),
|
||||
.vx_dcache_req(vx_dcache_req),
|
||||
.out_delay (out_mem_delay),
|
||||
.no_slot_mem (no_slot_mem)
|
||||
);
|
||||
);
|
||||
|
||||
|
||||
VX_execute_unit VX_execUnit(
|
||||
VX_execute_unit vx_execUnit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_exec_unit_req(VX_exec_unit_req),
|
||||
.VX_inst_exec_wb (VX_inst_exec_wb),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.vx_exec_unit_req(vx_exec_unit_req),
|
||||
.vx_inst_exec_wb (vx_inst_exec_wb),
|
||||
.vx_jal_rsp (vx_jal_rsp),
|
||||
.vx_branch_rsp (vx_branch_rsp),
|
||||
.out_delay (out_exec_delay),
|
||||
.no_slot_exec (no_slot_exec)
|
||||
);
|
||||
);
|
||||
|
||||
VX_gpgpu_inst vx_gpgpu_inst (
|
||||
.vx_gpu_inst_req(vx_gpu_inst_req),
|
||||
.vx_warp_ctl (vx_warp_ctl)
|
||||
);
|
||||
|
||||
VX_gpgpu_inst VX_gpgpu_inst(
|
||||
.VX_gpu_inst_req(VX_gpu_inst_req),
|
||||
.VX_warp_ctl (VX_warp_ctl)
|
||||
);
|
||||
|
||||
// VX_csr_wrapper VX_csr_wrapper(
|
||||
// .VX_csr_req(VX_csr_req),
|
||||
// .VX_csr_wb (VX_csr_wb)
|
||||
// VX_csr_wrapper vx_csr_wrapper(
|
||||
// .vx_csr_req(vx_csr_req),
|
||||
// .vx_csr_wb (vx_csr_wb)
|
||||
// );
|
||||
|
||||
VX_csr_pipe #(.CORE_ID(CORE_ID)) VX_csr_pipe(
|
||||
VX_csr_pipe #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) vx_csr_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.no_slot_csr (no_slot_csr),
|
||||
.VX_csr_req (VX_csr_req),
|
||||
.VX_writeback(VX_writeback_temp),
|
||||
.VX_csr_wb (VX_csr_wb),
|
||||
.vx_csr_req (vx_csr_req),
|
||||
.vx_writeback(vx_writeback_temp),
|
||||
.vx_csr_wb (vx_csr_wb),
|
||||
.stall_gpr_csr(stall_gpr_csr)
|
||||
);
|
||||
);
|
||||
|
||||
VX_writeback VX_wb(
|
||||
VX_writeback vx_wb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_mem_wb (VX_mem_wb),
|
||||
.VX_inst_exec_wb (VX_inst_exec_wb),
|
||||
.VX_csr_wb (VX_csr_wb),
|
||||
.vx_mem_wb (vx_mem_wb),
|
||||
.vx_inst_exec_wb (vx_inst_exec_wb),
|
||||
.vx_csr_wb (vx_csr_wb),
|
||||
|
||||
.VX_writeback_inter(VX_writeback_temp),
|
||||
.vx_writeback_inter(vx_writeback_temp),
|
||||
.no_slot_mem (no_slot_mem),
|
||||
.no_slot_exec (no_slot_exec),
|
||||
.no_slot_csr (no_slot_csr)
|
||||
);
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -4,21 +4,20 @@ module VX_csr_data (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
|
||||
input wire[11:0] in_read_csr_address,
|
||||
input wire[`CSR_ADDR_SIZE-1:0] in_read_csr_address,
|
||||
input wire in_write_valid,
|
||||
input wire[`CSR_WIDTH-1:0] in_write_csr_data,
|
||||
|
||||
input wire in_write_valid,
|
||||
input wire[31:0] in_write_csr_data,
|
||||
input wire[11:0] in_write_csr_address,
|
||||
/* verilator lint_off UNUSED */
|
||||
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
|
||||
input wire[`CSR_ADDR_SIZE-1:0] in_write_csr_address,
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
output wire[31:0] out_read_csr_data,
|
||||
|
||||
// For instruction retire counting
|
||||
input wire in_writeback_valid
|
||||
|
||||
);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
|
||||
// wire[`NUM_THREADS-1:0][31:0] thread_ids;
|
||||
// wire[`NUM_THREADS-1:0][31:0] warp_ids;
|
||||
|
||||
@@ -32,45 +31,44 @@ module VX_csr_data (
|
||||
// assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num};
|
||||
// end
|
||||
|
||||
reg[11:0] csr[1023:0];
|
||||
reg[63:0] cycle;
|
||||
reg[63:0] instret;
|
||||
reg [`CSR_WIDTH-1:0] csr[`NUM_CSRS-1:0];
|
||||
|
||||
reg [63:0] cycle;
|
||||
reg [63:0] instret;
|
||||
|
||||
wire read_cycle;
|
||||
wire read_cycleh;
|
||||
wire read_instret;
|
||||
wire read_instreth;
|
||||
|
||||
assign read_cycle = in_read_csr_address == 12'hC00;
|
||||
assign read_cycleh = in_read_csr_address == 12'hC80;
|
||||
assign read_instret = in_read_csr_address == 12'hC02;
|
||||
assign read_instreth = in_read_csr_address == 12'hC82;
|
||||
assign read_cycle = in_read_csr_address == `CSR_CYCL_L;
|
||||
assign read_cycleh = in_read_csr_address == `CSR_CYCL_H;
|
||||
assign read_instret = in_read_csr_address == `CSR_INST_L;
|
||||
assign read_instreth = in_read_csr_address == `CSR_INST_H;
|
||||
|
||||
wire [$clog2(`NUM_CSRS)-1:0] read_addr, write_addr;
|
||||
|
||||
// cast address to physical CSR range
|
||||
assign read_addr = $size(read_addr)'(in_read_csr_address);
|
||||
assign write_addr = $size(write_addr)'(in_write_csr_address);
|
||||
|
||||
// wire thread_select = in_read_csr_address == 12'h20;
|
||||
// wire warp_select = in_read_csr_address == 12'h21;
|
||||
|
||||
// assign out_read_csr_data = thread_select ? thread_ids :
|
||||
// assign out_read_csr_data = thread_select ? thread_ids :
|
||||
// warp_select ? warp_ids :
|
||||
// 0;
|
||||
|
||||
integer curr_e;
|
||||
always @(posedge clk or posedge reset) begin
|
||||
genvar curr_e;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (curr_e = 0; curr_e < 1024; curr_e=curr_e+1) begin
|
||||
`ifdef VERILATOR
|
||||
// - Verilator does not support delayed assignment in loops.
|
||||
csr[curr_e] = 0;
|
||||
`else
|
||||
csr[curr_e] <= 0;
|
||||
`endif
|
||||
end
|
||||
cycle <= 0;
|
||||
instret <= 0;
|
||||
end else begin
|
||||
cycle <= cycle + 1;
|
||||
if (in_write_valid) begin
|
||||
csr[in_write_csr_address] <= in_write_csr_data[11:0];
|
||||
csr[write_addr] <= in_write_csr_data;
|
||||
end
|
||||
if (in_writeback_valid) begin
|
||||
instret <= instret + 1;
|
||||
@@ -78,12 +76,9 @@ module VX_csr_data (
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign out_read_csr_data = read_cycle ? cycle[31:0] :
|
||||
read_cycleh ? cycle[63:32] :
|
||||
read_instret ? instret[31:0] :
|
||||
read_instreth ? instret[63:32] :
|
||||
{{20{1'b0}}, csr[in_read_csr_address]};
|
||||
|
||||
/* verilator lint_on WIDTH */
|
||||
assign out_read_csr_data = read_cycle ? cycle[31:0] :
|
||||
read_cycleh ? cycle[63:32] :
|
||||
read_instret ? instret[31:0] :
|
||||
read_instreth ? instret[63:32] :
|
||||
{{20{1'b0}}, csr[read_addr]};
|
||||
endmodule : VX_csr_data
|
||||
|
||||
@@ -1,78 +1,63 @@
|
||||
|
||||
|
||||
module VX_csr_handler (
|
||||
input wire clk,
|
||||
input wire[11:0] in_decode_csr_address, // done
|
||||
VX_csr_write_request_inter VX_csr_w_req,
|
||||
input wire in_wb_valid,
|
||||
output wire[31:0] out_decode_csr_data // done
|
||||
);
|
||||
input wire clk,
|
||||
input wire[`CSR_ADDR_SIZE-1:0] in_decode_csr_address, // done
|
||||
VX_csr_write_request_inter vx_csr_w_req,
|
||||
input wire in_wb_valid,
|
||||
output wire[31:0] out_decode_csr_data // done
|
||||
);
|
||||
wire in_mem_is_csr;
|
||||
wire[`CSR_ADDR_SIZE-1:0] in_mem_csr_address;
|
||||
wire[31:0] in_mem_csr_result;
|
||||
|
||||
wire in_mem_is_csr;
|
||||
wire[11:0] in_mem_csr_address;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] in_mem_csr_result;
|
||||
/* verilator lint_on UNUSED */
|
||||
assign in_mem_is_csr = vx_csr_w_req.is_csr;
|
||||
assign in_mem_csr_address = vx_csr_w_req.csr_address;
|
||||
assign in_mem_csr_result = vx_csr_w_req.csr_result;
|
||||
|
||||
reg [`CSR_WIDTH-1:0] csr [`NUM_CSRS-1:0];
|
||||
|
||||
reg [63:0] cycle;
|
||||
reg [63:0] instret;
|
||||
reg [`CSR_ADDR_SIZE-1:0] decode_csr_address;
|
||||
|
||||
assign in_mem_is_csr = VX_csr_w_req.is_csr;
|
||||
assign in_mem_csr_address = VX_csr_w_req.csr_address;
|
||||
assign in_mem_csr_result = VX_csr_w_req.csr_result;
|
||||
wire read_cycle;
|
||||
wire read_cycleh;
|
||||
wire read_instret;
|
||||
wire read_instreth;
|
||||
|
||||
initial begin
|
||||
cycle = 0;
|
||||
instret = 0;
|
||||
decode_csr_address = 0;
|
||||
end
|
||||
|
||||
reg[1024:0][11:0] csr;
|
||||
reg[63:0] cycle;
|
||||
reg[63:0] instret;
|
||||
reg[11:0] decode_csr_address;
|
||||
|
||||
|
||||
wire read_cycle;
|
||||
wire read_cycleh;
|
||||
wire read_instret;
|
||||
wire read_instreth;
|
||||
|
||||
initial begin
|
||||
cycle = 0;
|
||||
instret = 0;
|
||||
decode_csr_address = 0;
|
||||
always @(posedge clk) begin
|
||||
cycle <= cycle + 1;
|
||||
decode_csr_address <= in_decode_csr_address;
|
||||
if (in_wb_valid) begin
|
||||
instret <= instret + 1;
|
||||
end
|
||||
end
|
||||
|
||||
reg[`CSR_WIDTH-1:0] data_read;
|
||||
|
||||
always @(posedge clk) begin
|
||||
cycle <= cycle + 1;
|
||||
decode_csr_address <= in_decode_csr_address;
|
||||
if (in_wb_valid) begin
|
||||
instret <= instret + 1;
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (in_mem_is_csr) begin
|
||||
csr[in_mem_csr_address] <= in_mem_csr_result[11:0];
|
||||
end
|
||||
end
|
||||
|
||||
reg[11:0] data_read;
|
||||
always @(posedge clk) begin
|
||||
if(in_mem_is_csr) begin
|
||||
csr[in_mem_csr_address] <= in_mem_csr_result[11:0];
|
||||
end
|
||||
end
|
||||
|
||||
assign data_read = csr[decode_csr_address];
|
||||
|
||||
|
||||
assign read_cycle = decode_csr_address == 12'hC00;
|
||||
assign read_cycleh = decode_csr_address == 12'hC80;
|
||||
assign read_instret = decode_csr_address == 12'hC02;
|
||||
assign read_instreth = decode_csr_address == 12'hC82;
|
||||
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign out_decode_csr_data = read_cycle ? cycle[31:0] :
|
||||
read_cycleh ? cycle[63:32] :
|
||||
read_instret ? instret[31:0] :
|
||||
read_instreth ? instret[63:32] :
|
||||
{{20{1'b0}}, data_read};
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
|
||||
assign data_read = csr[decode_csr_address];
|
||||
|
||||
assign read_cycle = decode_csr_address == `CSR_CYCL_L;
|
||||
assign read_cycleh = decode_csr_address == `CSR_CYCL_H;
|
||||
assign read_instret = decode_csr_address == `CSR_INST_L;
|
||||
assign read_instreth = decode_csr_address == `CSR_INST_H;
|
||||
|
||||
assign out_decode_csr_data = read_cycle ? cycle[31:0] :
|
||||
read_cycleh ? cycle[63:32] :
|
||||
read_instret ? instret[31:0] :
|
||||
read_instreth ? instret[63:32] :
|
||||
{{20{1'b0}}, data_read};
|
||||
|
||||
endmodule // VX_csr_handler
|
||||
|
||||
|
||||
@@ -1,16 +1,14 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_pipe
|
||||
#(
|
||||
parameter CORE_ID = 0
|
||||
)
|
||||
(
|
||||
module VX_csr_pipe #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
input wire no_slot_csr,
|
||||
VX_csr_req_inter VX_csr_req,
|
||||
VX_wb_inter VX_writeback,
|
||||
VX_csr_wb_inter VX_csr_wb,
|
||||
VX_csr_req_inter vx_csr_req,
|
||||
VX_wb_inter vx_writeback,
|
||||
VX_csr_wb_inter vx_csr_wb,
|
||||
output wire stall_gpr_csr
|
||||
);
|
||||
|
||||
@@ -18,64 +16,61 @@ module VX_csr_pipe
|
||||
wire[`NW_BITS-1:0] warp_num_s2;
|
||||
wire[4:0] rd_s2;
|
||||
wire[1:0] wb_s2;
|
||||
wire[4:0] alu_op_s2;
|
||||
wire is_csr_s2;
|
||||
wire[11:0] csr_address_s2;
|
||||
wire[`CSR_ADDR_SIZE-1:0] csr_address_s2;
|
||||
wire[31:0] csr_read_data_s2;
|
||||
wire[31:0] csr_updated_data_s2;
|
||||
|
||||
wire[31:0] csr_read_data_unqual;
|
||||
wire[31:0] csr_read_data;
|
||||
|
||||
assign stall_gpr_csr = no_slot_csr && VX_csr_req.is_csr && |(VX_csr_req.valid);
|
||||
assign stall_gpr_csr = no_slot_csr && vx_csr_req.is_csr && |(vx_csr_req.valid);
|
||||
|
||||
assign csr_read_data = (csr_address_s2 == VX_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
assign csr_read_data = (csr_address_s2 == vx_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
|
||||
wire writeback = |VX_writeback.wb_valid;
|
||||
VX_csr_data VX_csr_data(
|
||||
wire writeback = |vx_writeback.wb_valid;
|
||||
|
||||
VX_csr_data vx_csr_data(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_read_csr_address (VX_csr_req.csr_address),
|
||||
|
||||
.in_read_csr_address (vx_csr_req.csr_address),
|
||||
.in_write_valid (is_csr_s2),
|
||||
.in_write_csr_data (csr_updated_data_s2),
|
||||
.in_write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
||||
.in_write_csr_address(csr_address_s2),
|
||||
|
||||
.out_read_csr_data (csr_read_data_unqual),
|
||||
|
||||
.in_writeback_valid (writeback)
|
||||
);
|
||||
);
|
||||
|
||||
reg [31:0] csr_updated_data;
|
||||
|
||||
|
||||
reg[31:0] csr_updated_data;
|
||||
always @(*) begin
|
||||
case(VX_csr_req.alu_op)
|
||||
`CSR_ALU_RW: csr_updated_data = VX_csr_req.csr_mask;
|
||||
`CSR_ALU_RS: csr_updated_data = csr_read_data | VX_csr_req.csr_mask;
|
||||
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - VX_csr_req.csr_mask);
|
||||
case (vx_csr_req.alu_op)
|
||||
`CSR_ALU_RW: csr_updated_data = vx_csr_req.csr_mask;
|
||||
`CSR_ALU_RS: csr_updated_data = csr_read_data | vx_csr_req.csr_mask;
|
||||
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - vx_csr_req.csr_mask);
|
||||
default: csr_updated_data = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
VX_generic_register #(.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)) csr_reg_s2 (
|
||||
VX_generic_register #(
|
||||
.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)
|
||||
) csr_reg_s2 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(no_slot_csr),
|
||||
.flush(zero),
|
||||
.in ({VX_csr_req.valid, VX_csr_req.warp_num, VX_csr_req.rd, VX_csr_req.wb, VX_csr_req.is_csr, VX_csr_req.csr_address, csr_read_data , csr_updated_data }),
|
||||
.in ({vx_csr_req.valid, vx_csr_req.warp_num, vx_csr_req.rd, vx_csr_req.wb, vx_csr_req.is_csr, vx_csr_req.csr_address, csr_read_data , csr_updated_data }),
|
||||
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
|
||||
);
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] final_csr_data;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] final_csr_data;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] thread_ids;
|
||||
wire[`NUM_THREADS-1:0][31:0] warp_ids;
|
||||
wire[`NUM_THREADS-1:0][31:0] warp_idz;
|
||||
wire[`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2;
|
||||
wire [`NUM_THREADS-1:0][31:0] thread_ids;
|
||||
wire [`NUM_THREADS-1:0][31:0] warp_ids;
|
||||
wire [`NUM_THREADS-1:0][31:0] warp_idz;
|
||||
wire [`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2;
|
||||
|
||||
genvar cur_t;
|
||||
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
|
||||
@@ -102,10 +97,10 @@ module VX_csr_pipe
|
||||
warp_id_select ? warp_idz :
|
||||
csr_vec_read_data_s2;
|
||||
|
||||
assign VX_csr_wb.valid = valid_s2;
|
||||
assign VX_csr_wb.warp_num = warp_num_s2;
|
||||
assign VX_csr_wb.rd = rd_s2;
|
||||
assign VX_csr_wb.wb = wb_s2;
|
||||
assign VX_csr_wb.csr_result = final_csr_data;
|
||||
assign vx_csr_wb.valid = valid_s2;
|
||||
assign vx_csr_wb.warp_num = warp_num_s2;
|
||||
assign vx_csr_wb.rd = rd_s2;
|
||||
assign vx_csr_wb.wb = wb_s2;
|
||||
assign vx_csr_wb.csr_result = final_csr_data;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -2,9 +2,8 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_wrapper (
|
||||
VX_csr_req_inter VX_csr_req,
|
||||
|
||||
VX_csr_wb_inter VX_csr_wb
|
||||
VX_csr_req_inter vx_csr_req,
|
||||
VX_csr_wb_inter vx_csr_wb
|
||||
);
|
||||
|
||||
|
||||
@@ -18,21 +17,21 @@ module VX_csr_wrapper (
|
||||
end
|
||||
|
||||
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, VX_csr_req.warp_num};
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, vx_csr_req.warp_num};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
assign VX_csr_wb.valid = VX_csr_req.valid;
|
||||
assign VX_csr_wb.warp_num = VX_csr_req.warp_num;
|
||||
assign VX_csr_wb.rd = VX_csr_req.rd;
|
||||
assign VX_csr_wb.wb = VX_csr_req.wb;
|
||||
assign vx_csr_wb.valid = vx_csr_req.valid;
|
||||
assign vx_csr_wb.warp_num = vx_csr_req.warp_num;
|
||||
assign vx_csr_wb.rd = vx_csr_req.rd;
|
||||
assign vx_csr_wb.wb = vx_csr_req.wb;
|
||||
|
||||
|
||||
wire thread_select = VX_csr_req.csr_address == 12'h20;
|
||||
wire warp_select = VX_csr_req.csr_address == 12'h21;
|
||||
wire thread_select = vx_csr_req.csr_address == 12'h20;
|
||||
wire warp_select = vx_csr_req.csr_address == 12'h21;
|
||||
|
||||
assign VX_csr_wb.csr_result = thread_select ? thread_ids :
|
||||
assign vx_csr_wb.csr_result = thread_select ? thread_ids :
|
||||
warp_select ? warp_ids :
|
||||
0;
|
||||
|
||||
|
||||
@@ -6,349 +6,338 @@ module VX_decode(
|
||||
VX_inst_meta_inter fd_inst_meta_de,
|
||||
|
||||
// Outputs
|
||||
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req,
|
||||
VX_wstall_inter VX_wstall,
|
||||
VX_join_inter VX_join,
|
||||
VX_frE_to_bckE_req_inter vx_frE_to_bckE_req,
|
||||
VX_wstall_inter vx_wstall,
|
||||
VX_join_inter vx_join,
|
||||
|
||||
output wire terminate_sim
|
||||
|
||||
);
|
||||
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
|
||||
assign VX_frE_to_bckE_req.curr_PC = in_curr_PC;
|
||||
assign vx_frE_to_bckE_req.curr_PC = in_curr_PC;
|
||||
|
||||
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
|
||||
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
|
||||
|
||||
wire[6:0] curr_opcode;
|
||||
wire[6:0] curr_opcode;
|
||||
|
||||
wire is_itype;
|
||||
wire is_rtype;
|
||||
wire is_stype;
|
||||
wire is_btype;
|
||||
wire is_linst;
|
||||
wire is_jal;
|
||||
wire is_jalr;
|
||||
wire is_lui;
|
||||
wire is_auipc;
|
||||
wire is_csr;
|
||||
wire is_csr_immed;
|
||||
wire is_e_inst;
|
||||
wire is_itype;
|
||||
wire is_rtype;
|
||||
wire is_stype;
|
||||
wire is_btype;
|
||||
wire is_linst;
|
||||
wire is_jal;
|
||||
wire is_jalr;
|
||||
wire is_lui;
|
||||
wire is_auipc;
|
||||
wire is_csr;
|
||||
wire is_csr_immed;
|
||||
wire is_e_inst;
|
||||
|
||||
wire is_gpgpu;
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
wire is_join;
|
||||
wire is_barrier;
|
||||
wire is_gpgpu;
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
wire is_join;
|
||||
wire is_barrier;
|
||||
|
||||
wire[2:0] func3;
|
||||
wire[6:0] func7;
|
||||
wire[11:0] u_12;
|
||||
wire[2:0] func3;
|
||||
wire[6:0] func7;
|
||||
wire[11:0] u_12;
|
||||
|
||||
|
||||
wire[7:0] jal_b_19_to_12;
|
||||
wire jal_b_11;
|
||||
wire[9:0] jal_b_10_to_1;
|
||||
wire jal_b_20;
|
||||
wire jal_b_0;
|
||||
wire[20:0] jal_unsigned_offset;
|
||||
wire[31:0] jal_1_offset;
|
||||
wire[7:0] jal_b_19_to_12;
|
||||
wire jal_b_11;
|
||||
wire[9:0] jal_b_10_to_1;
|
||||
wire jal_b_20;
|
||||
wire jal_b_0;
|
||||
wire[20:0] jal_unsigned_offset;
|
||||
wire[31:0] jal_1_offset;
|
||||
|
||||
wire[11:0] jalr_immed;
|
||||
wire[31:0] jal_2_offset;
|
||||
wire[11:0] jalr_immed;
|
||||
wire[31:0] jal_2_offset;
|
||||
|
||||
wire jal_sys_cond1;
|
||||
wire jal_sys_cond2;
|
||||
wire jal_sys_jal;
|
||||
wire[31:0] jal_sys_off;
|
||||
wire jal_sys_cond1;
|
||||
wire jal_sys_cond2;
|
||||
wire jal_sys_jal;
|
||||
wire[31:0] jal_sys_off;
|
||||
|
||||
wire csr_cond1;
|
||||
wire csr_cond2;
|
||||
wire csr_cond1;
|
||||
wire csr_cond2;
|
||||
|
||||
wire[11:0] alu_tempp;
|
||||
wire alu_shift_i;
|
||||
wire[11:0] alu_shift_i_immed;
|
||||
wire[11:0] alu_tempp;
|
||||
wire alu_shift_i;
|
||||
wire[11:0] alu_shift_i_immed;
|
||||
|
||||
wire[1:0] csr_type;
|
||||
wire[1:0] csr_type;
|
||||
|
||||
reg[4:0] csr_alu;
|
||||
reg[4:0] alu_op;
|
||||
reg[4:0] mul_alu;
|
||||
reg[19:0] temp_upper_immed;
|
||||
reg temp_jal;
|
||||
reg[31:0] temp_jal_offset;
|
||||
reg[31:0] temp_itype_immed;
|
||||
reg[2:0] temp_branch_type;
|
||||
reg temp_branch_stall;
|
||||
reg[4:0] csr_alu;
|
||||
reg[4:0] alu_op;
|
||||
reg[4:0] mul_alu;
|
||||
reg[19:0] temp_upper_immed;
|
||||
reg temp_jal;
|
||||
reg[31:0] temp_jal_offset;
|
||||
reg[31:0] temp_itype_immed;
|
||||
reg[2:0] temp_branch_type;
|
||||
reg temp_branch_stall;
|
||||
|
||||
// always @(posedge reset) begin
|
||||
|
||||
// end
|
||||
assign vx_frE_to_bckE_req.valid = fd_inst_meta_de.valid;
|
||||
|
||||
assign VX_frE_to_bckE_req.valid = fd_inst_meta_de.valid;
|
||||
assign vx_frE_to_bckE_req.warp_num = in_warp_num;
|
||||
|
||||
assign VX_frE_to_bckE_req.warp_num = in_warp_num;
|
||||
assign curr_opcode = in_instruction[6:0];
|
||||
|
||||
assign vx_frE_to_bckE_req.rd = in_instruction[11:7];
|
||||
assign vx_frE_to_bckE_req.rs1 = in_instruction[19:15];
|
||||
assign vx_frE_to_bckE_req.rs2 = in_instruction[24:20];
|
||||
assign func3 = in_instruction[14:12];
|
||||
assign func7 = in_instruction[31:25];
|
||||
assign u_12 = in_instruction[31:20];
|
||||
|
||||
assign vx_frE_to_bckE_req.PC_next = in_curr_PC + 32'h4;
|
||||
|
||||
// Write Back sigal
|
||||
assign is_rtype = (curr_opcode == `R_INST);
|
||||
assign is_linst = (curr_opcode == `L_INST);
|
||||
assign is_itype = (curr_opcode == `ALU_INST) || is_linst;
|
||||
assign is_stype = (curr_opcode == `S_INST);
|
||||
assign is_btype = (curr_opcode == `B_INST);
|
||||
assign is_jal = (curr_opcode == `JAL_INST);
|
||||
assign is_jalr = (curr_opcode == `JALR_INST);
|
||||
assign is_lui = (curr_opcode == `LUI_INST);
|
||||
assign is_auipc = (curr_opcode == `AUIPC_INST);
|
||||
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
|
||||
assign is_csr_immed = (is_csr) && (func3[2] == 1);
|
||||
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
|
||||
assign is_e_inst = in_instruction == 32'h00000073;
|
||||
|
||||
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
|
||||
|
||||
assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE
|
||||
assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE
|
||||
assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE
|
||||
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
|
||||
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
|
||||
|
||||
|
||||
assign curr_opcode = in_instruction[6:0];
|
||||
assign vx_join.is_join = is_join;
|
||||
assign vx_join.join_warp_num = in_warp_num;
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.rd = in_instruction[11:7];
|
||||
assign VX_frE_to_bckE_req.rs1 = in_instruction[19:15];
|
||||
assign VX_frE_to_bckE_req.rs2 = in_instruction[24:20];
|
||||
assign func3 = in_instruction[14:12];
|
||||
assign func7 = in_instruction[31:25];
|
||||
assign u_12 = in_instruction[31:20];
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.PC_next = in_curr_PC + 32'h4;
|
||||
|
||||
|
||||
// Write Back sigal
|
||||
assign is_rtype = (curr_opcode == `R_INST);
|
||||
assign is_linst = (curr_opcode == `L_INST);
|
||||
assign is_itype = (curr_opcode == `ALU_INST) || is_linst;
|
||||
assign is_stype = (curr_opcode == `S_INST);
|
||||
assign is_btype = (curr_opcode == `B_INST);
|
||||
assign is_jal = (curr_opcode == `JAL_INST);
|
||||
assign is_jalr = (curr_opcode == `JALR_INST);
|
||||
assign is_lui = (curr_opcode == `LUI_INST);
|
||||
assign is_auipc = (curr_opcode == `AUIPC_INST);
|
||||
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
|
||||
assign is_csr_immed = (is_csr) && (func3[2] == 1);
|
||||
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
|
||||
assign is_e_inst = in_instruction == 32'h00000073;
|
||||
|
||||
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
|
||||
|
||||
assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE
|
||||
assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE
|
||||
assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE
|
||||
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
|
||||
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
|
||||
|
||||
|
||||
assign VX_join.is_join = is_join;
|
||||
assign VX_join.join_warp_num = in_warp_num;
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.is_wspawn = is_wspawn;
|
||||
assign VX_frE_to_bckE_req.is_tmc = is_tmc;
|
||||
assign VX_frE_to_bckE_req.is_split = is_split;
|
||||
assign VX_frE_to_bckE_req.is_barrier = is_barrier;
|
||||
assign vx_frE_to_bckE_req.is_wspawn = is_wspawn;
|
||||
assign vx_frE_to_bckE_req.is_tmc = is_tmc;
|
||||
assign vx_frE_to_bckE_req.is_split = is_split;
|
||||
assign vx_frE_to_bckE_req.is_barrier = is_barrier;
|
||||
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.csr_immed = is_csr_immed;
|
||||
assign VX_frE_to_bckE_req.is_csr = is_csr;
|
||||
assign vx_frE_to_bckE_req.csr_immed = is_csr_immed;
|
||||
assign vx_frE_to_bckE_req.is_csr = is_csr;
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
|
||||
is_linst ? `WB_MEM :
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
`NO_WB;
|
||||
assign vx_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
|
||||
is_linst ? `WB_MEM :
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
`NO_WB;
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
|
||||
assign vx_frE_to_bckE_req.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
|
||||
|
||||
// MEM signals
|
||||
assign VX_frE_to_bckE_req.mem_read = (is_linst) ? func3 : `NO_MEM_READ;
|
||||
assign VX_frE_to_bckE_req.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE;
|
||||
// MEM signals
|
||||
assign vx_frE_to_bckE_req.mem_read = (is_linst) ? func3 : `NO_MEM_READ;
|
||||
assign vx_frE_to_bckE_req.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE;
|
||||
|
||||
// UPPER IMMEDIATE
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`LUI_INST: temp_upper_immed = {func7, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.rs1, func3};
|
||||
`AUIPC_INST: temp_upper_immed = {func7, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.rs1, func3};
|
||||
default: temp_upper_immed = 20'h0;
|
||||
endcase // curr_opcode
|
||||
end
|
||||
// UPPER IMMEDIATE
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`LUI_INST: temp_upper_immed = {func7, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.rs1, func3};
|
||||
`AUIPC_INST: temp_upper_immed = {func7, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.rs1, func3};
|
||||
default: temp_upper_immed = 20'h0;
|
||||
endcase // curr_opcode
|
||||
end
|
||||
|
||||
assign VX_frE_to_bckE_req.upper_immed = temp_upper_immed;
|
||||
assign vx_frE_to_bckE_req.upper_immed = temp_upper_immed;
|
||||
|
||||
|
||||
assign jal_b_19_to_12 = in_instruction[19:12];
|
||||
assign jal_b_11 = in_instruction[20];
|
||||
assign jal_b_10_to_1 = in_instruction[30:21];
|
||||
assign jal_b_20 = in_instruction[31];
|
||||
assign jal_b_0 = 1'b0;
|
||||
assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0};
|
||||
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
|
||||
assign jal_b_19_to_12 = in_instruction[19:12];
|
||||
assign jal_b_11 = in_instruction[20];
|
||||
assign jal_b_10_to_1 = in_instruction[30:21];
|
||||
assign jal_b_20 = in_instruction[31];
|
||||
assign jal_b_0 = 1'b0;
|
||||
assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0};
|
||||
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
|
||||
|
||||
|
||||
assign jalr_immed = {func7, VX_frE_to_bckE_req.rs2};
|
||||
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
|
||||
assign jalr_immed = {func7, vx_frE_to_bckE_req.rs2};
|
||||
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
|
||||
|
||||
|
||||
assign jal_sys_cond1 = func3 == 3'h0;
|
||||
assign jal_sys_cond2 = u_12 < 12'h2;
|
||||
assign jal_sys_cond1 = func3 == 3'h0;
|
||||
assign jal_sys_cond2 = u_12 < 12'h2;
|
||||
|
||||
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
|
||||
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
|
||||
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
|
||||
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
|
||||
|
||||
// JAL
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal_offset = jal_1_offset;
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal_offset = jal_2_offset;
|
||||
end
|
||||
`SYS_INST:
|
||||
begin
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) );
|
||||
temp_jal = jal_sys_jal && (|in_valid);
|
||||
temp_jal_offset = jal_sys_off;
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_jal = 1'b0 && (|in_valid);
|
||||
temp_jal_offset = 32'hdeadbeef;
|
||||
end
|
||||
// JAL
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal_offset = jal_1_offset;
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal_offset = jal_2_offset;
|
||||
end
|
||||
`SYS_INST:
|
||||
begin
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) );
|
||||
temp_jal = jal_sys_jal && (|in_valid);
|
||||
temp_jal_offset = jal_sys_off;
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_jal = 1'b0 && (|in_valid);
|
||||
temp_jal_offset = 32'hdeadbeef;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign vx_frE_to_bckE_req.jalQual = is_jal;
|
||||
assign vx_frE_to_bckE_req.jal = temp_jal;
|
||||
assign vx_frE_to_bckE_req.jal_offset = temp_jal_offset;
|
||||
|
||||
// wire is_ebreak;
|
||||
|
||||
|
||||
// assign is_ebreak = is_e_inst;
|
||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
|
||||
assign vx_frE_to_bckE_req.ebreak = ebreak;
|
||||
assign terminate_sim = is_e_inst;
|
||||
|
||||
|
||||
// CSR
|
||||
|
||||
assign csr_cond1 = func3 != 3'h0;
|
||||
assign csr_cond2 = u_12 >= 12'h2;
|
||||
|
||||
assign vx_frE_to_bckE_req.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
|
||||
|
||||
|
||||
// ITYPE IMEED
|
||||
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
assign alu_shift_i_immed = {{7{1'b0}}, vx_frE_to_bckE_req.rs2};
|
||||
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
|
||||
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
|
||||
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, vx_frE_to_bckE_req.rd};
|
||||
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
|
||||
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
|
||||
default: temp_itype_immed = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign VX_frE_to_bckE_req.jalQual = is_jal;
|
||||
assign VX_frE_to_bckE_req.jal = temp_jal;
|
||||
assign VX_frE_to_bckE_req.jal_offset = temp_jal_offset;
|
||||
assign vx_frE_to_bckE_req.itype_immed = temp_itype_immed;
|
||||
|
||||
// wire is_ebreak;
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`B_INST:
|
||||
begin
|
||||
// $display("BRANCH IN DECODE");
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
case(func3)
|
||||
3'h0: temp_branch_type = `BEQ;
|
||||
3'h1: temp_branch_type = `BNE;
|
||||
3'h4: temp_branch_type = `BLT;
|
||||
3'h5: temp_branch_type = `BGT;
|
||||
3'h6: temp_branch_type = `BLTU;
|
||||
3'h7: temp_branch_type = `BGTU;
|
||||
default: temp_branch_type = `NO_BRANCH;
|
||||
endcase
|
||||
end
|
||||
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b0 && (|in_valid);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// assign is_ebreak = is_e_inst;
|
||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
|
||||
assign VX_frE_to_bckE_req.ebreak = ebreak;
|
||||
wire out_ebreak = ebreak;
|
||||
assign terminate_sim = is_e_inst;
|
||||
assign vx_frE_to_bckE_req.branch_type = temp_branch_type;
|
||||
|
||||
assign vx_wstall.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
|
||||
assign vx_wstall.warp_num = in_warp_num;
|
||||
|
||||
// CSR
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
case(func3)
|
||||
3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB);
|
||||
3'h1: alu_op = `SLLA;
|
||||
3'h2: alu_op = `SLT;
|
||||
3'h3: alu_op = `SLTU;
|
||||
3'h4: alu_op = `XOR;
|
||||
3'h5: alu_op = (func7 == 7'h0) ? `SRL : `SRA;
|
||||
3'h6: alu_op = `OR;
|
||||
3'h7: alu_op = `AND;
|
||||
default: alu_op = `NO_ALU;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign csr_cond1 = func3 != 3'h0;
|
||||
assign csr_cond2 = u_12 >= 12'h2;
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
case(func3)
|
||||
3'h0: mul_alu = `MUL;
|
||||
3'h1: mul_alu = `MULH;
|
||||
3'h2: mul_alu = `MULHSU;
|
||||
3'h3: mul_alu = `MULHU;
|
||||
3'h4: mul_alu = `DIV;
|
||||
3'h5: mul_alu = `DIVU;
|
||||
3'h6: mul_alu = `REM;
|
||||
3'h7: mul_alu = `REMU;
|
||||
default: mul_alu = `NO_ALU;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign VX_frE_to_bckE_req.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
|
||||
assign csr_type = func3[1:0];
|
||||
|
||||
always @(*) begin
|
||||
case(csr_type)
|
||||
2'h1: csr_alu = `CSR_ALU_RW;
|
||||
2'h2: csr_alu = `CSR_ALU_RS;
|
||||
2'h3: csr_alu = `CSR_ALU_RC;
|
||||
default: csr_alu = `NO_ALU;
|
||||
endcase
|
||||
end
|
||||
|
||||
// ITYPE IMEED
|
||||
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
assign alu_shift_i_immed = {{7{1'b0}}, VX_frE_to_bckE_req.rs2};
|
||||
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
|
||||
wire[4:0] temp_final_alu;
|
||||
|
||||
assign temp_final_alu = is_btype ? ((vx_frE_to_bckE_req.branch_type < `BLTU) ? `SUB : `SUBU) :
|
||||
is_lui ? `LUI_ALU :
|
||||
is_auipc ? `AUIPC_ALU :
|
||||
is_csr ? csr_alu :
|
||||
(is_stype || is_linst) ? `ADD :
|
||||
alu_op;
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
|
||||
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, VX_frE_to_bckE_req.rd};
|
||||
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
|
||||
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
|
||||
default: temp_itype_immed = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign VX_frE_to_bckE_req.itype_immed = temp_itype_immed;
|
||||
|
||||
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`B_INST:
|
||||
begin
|
||||
// $display("BRANCH IN DECODE");
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
case(func3)
|
||||
3'h0: temp_branch_type = `BEQ;
|
||||
3'h1: temp_branch_type = `BNE;
|
||||
3'h4: temp_branch_type = `BLT;
|
||||
3'h5: temp_branch_type = `BGT;
|
||||
3'h6: temp_branch_type = `BLTU;
|
||||
3'h7: temp_branch_type = `BGTU;
|
||||
default: temp_branch_type = `NO_BRANCH;
|
||||
endcase
|
||||
end
|
||||
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b0 && (|in_valid);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign VX_frE_to_bckE_req.branch_type = temp_branch_type;
|
||||
|
||||
assign VX_wstall.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
|
||||
assign VX_wstall.warp_num = in_warp_num;
|
||||
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
case(func3)
|
||||
3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB);
|
||||
3'h1: alu_op = `SLLA;
|
||||
3'h2: alu_op = `SLT;
|
||||
3'h3: alu_op = `SLTU;
|
||||
3'h4: alu_op = `XOR;
|
||||
3'h5: alu_op = (func7 == 7'h0) ? `SRL : `SRA;
|
||||
3'h6: alu_op = `OR;
|
||||
3'h7: alu_op = `AND;
|
||||
default: alu_op = `NO_ALU;
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
case(func3)
|
||||
3'h0: mul_alu = `MUL;
|
||||
3'h1: mul_alu = `MULH;
|
||||
3'h2: mul_alu = `MULHSU;
|
||||
3'h3: mul_alu = `MULHU;
|
||||
3'h4: mul_alu = `DIV;
|
||||
3'h5: mul_alu = `DIVU;
|
||||
3'h6: mul_alu = `REM;
|
||||
3'h7: mul_alu = `REMU;
|
||||
default: mul_alu = `NO_ALU;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign csr_type = func3[1:0];
|
||||
|
||||
always @(*) begin
|
||||
case(csr_type)
|
||||
2'h1: csr_alu = `CSR_ALU_RW;
|
||||
2'h2: csr_alu = `CSR_ALU_RS;
|
||||
2'h3: csr_alu = `CSR_ALU_RC;
|
||||
default: csr_alu = `NO_ALU;
|
||||
endcase
|
||||
end
|
||||
|
||||
wire[4:0] temp_final_alu;
|
||||
|
||||
assign temp_final_alu = is_btype ? ((VX_frE_to_bckE_req.branch_type < `BLTU) ? `SUB : `SUBU) :
|
||||
is_lui ? `LUI_ALU :
|
||||
is_auipc ? `AUIPC_ALU :
|
||||
is_csr ? csr_alu :
|
||||
(is_stype || is_linst) ? `ADD :
|
||||
alu_op;
|
||||
|
||||
assign VX_frE_to_bckE_req.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
||||
assign vx_frE_to_bckE_req.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
@@ -9,15 +9,35 @@
|
||||
// `define ASIC 1
|
||||
// `define SYN_FUNC 1
|
||||
|
||||
`define STRINGIFY(x) `"x`"
|
||||
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
if (!(cond)) $error(msg); \
|
||||
endgenerate
|
||||
|
||||
`define LOG2UP(x) ((x > 1) ? $clog2(x) : 1)
|
||||
|
||||
`define NUM_CORES_PER_CLUSTER (`NUM_CORES / `NUM_CLUSTERS)
|
||||
|
||||
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
||||
`define NW_BITS (`LOG2UP(`NUM_WARPS))
|
||||
|
||||
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
||||
`define NT_BITS (`LOG2UP(`NUM_THREADS))
|
||||
|
||||
`define NC_BITS `LOG2UP(`NUM_CORES)
|
||||
`define NC_BITS (`LOG2UP(`NUM_CORES))
|
||||
|
||||
`define NUM_GPRS 32
|
||||
|
||||
`define CSR_ADDR_SIZE 12
|
||||
|
||||
`define NUM_CSRS 1024
|
||||
|
||||
`define CSR_WIDTH 12
|
||||
|
||||
`define CSR_CYCL_L 12'hC00;
|
||||
`define CSR_CYCL_H 12'hC80;
|
||||
`define CSR_INST_L 12'hC02;
|
||||
`define CSR_INST_H 12'hC82;
|
||||
|
||||
`define R_INST 7'd51
|
||||
`define L_INST 7'd3
|
||||
@@ -115,6 +135,9 @@
|
||||
// Bank Number of words in a line
|
||||
`define DBANK_LINE_WORDS (`DBANK_LINE_SIZE_BYTES / `DWORD_SIZE_BYTES)
|
||||
|
||||
// Word size in bits
|
||||
`define DWORD_SIZE_BITS (`DWORD_SIZE_BYTES * 8)
|
||||
|
||||
// ======================= Icache Configurable Knobs ==========================
|
||||
|
||||
// Function ID
|
||||
|
||||
@@ -5,78 +5,69 @@ module VX_dmem_controller (
|
||||
input wire reset,
|
||||
|
||||
// Dram <-> Dcache
|
||||
VX_gpu_dcache_dram_req_inter VX_gpu_dcache_dram_req,
|
||||
VX_gpu_dcache_dram_res_inter VX_gpu_dcache_dram_res,
|
||||
VX_gpu_snp_req_rsp VX_gpu_dcache_snp_req,
|
||||
VX_gpu_dcache_dram_req_inter vx_gpu_dcache_dram_req,
|
||||
VX_gpu_dcache_dram_rsp_inter vx_gpu_dcache_dram_res,
|
||||
VX_gpu_snp_req_rsp vx_gpu_dcache_snp_req,
|
||||
|
||||
// Dram <-> Icache
|
||||
VX_gpu_dcache_dram_req_inter VX_gpu_icache_dram_req,
|
||||
VX_gpu_dcache_dram_res_inter VX_gpu_icache_dram_res,
|
||||
VX_gpu_snp_req_rsp VX_gpu_icache_snp_req,
|
||||
VX_gpu_dcache_dram_req_inter vx_gpu_icache_dram_req,
|
||||
VX_gpu_dcache_dram_rsp_inter vx_gpu_icache_dram_res,
|
||||
VX_gpu_snp_req_rsp vx_gpu_icache_snp_req,
|
||||
|
||||
// Core <-> Dcache
|
||||
VX_gpu_dcache_res_inter VX_dcache_rsp,
|
||||
VX_gpu_dcache_req_inter VX_dcache_req,
|
||||
VX_gpu_dcache_rsp_inter vx_dcache_rsp,
|
||||
VX_gpu_dcache_req_inter vx_dcache_req,
|
||||
|
||||
// Core <-> Icache
|
||||
VX_gpu_dcache_res_inter VX_icache_rsp,
|
||||
VX_gpu_dcache_req_inter VX_icache_req
|
||||
VX_gpu_dcache_rsp_inter vx_icache_rsp,
|
||||
VX_gpu_dcache_req_inter vx_icache_req
|
||||
);
|
||||
|
||||
VX_gpu_dcache_rsp_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp_smem();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_smem();
|
||||
|
||||
VX_gpu_dcache_res_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) VX_dcache_rsp_smem();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) VX_dcache_req_smem();
|
||||
VX_gpu_dcache_rsp_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp_dcache();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_dcache();
|
||||
|
||||
|
||||
VX_gpu_dcache_res_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) VX_dcache_rsp_dcache();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) VX_dcache_req_dcache();
|
||||
|
||||
|
||||
wire to_shm = VX_dcache_req.core_req_addr[0][31:24] == 8'hFF;
|
||||
wire dcache_wants_wb = (|VX_dcache_rsp_dcache.core_wb_valid);
|
||||
wire to_shm = vx_dcache_req.core_req_addr[0][31:24] == 8'hFF;
|
||||
wire dcache_wants_wb = (|vx_dcache_rsp_dcache.core_wb_valid);
|
||||
|
||||
// Dcache Request
|
||||
assign VX_dcache_req_dcache.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~to_shm}};
|
||||
assign VX_dcache_req_dcache.core_req_addr = VX_dcache_req.core_req_addr;
|
||||
assign VX_dcache_req_dcache.core_req_writedata = VX_dcache_req.core_req_writedata;
|
||||
assign VX_dcache_req_dcache.core_req_mem_read = VX_dcache_req.core_req_mem_read;
|
||||
assign VX_dcache_req_dcache.core_req_mem_write = VX_dcache_req.core_req_mem_write;
|
||||
assign VX_dcache_req_dcache.core_req_rd = VX_dcache_req.core_req_rd;
|
||||
assign VX_dcache_req_dcache.core_req_wb = VX_dcache_req.core_req_wb;
|
||||
assign VX_dcache_req_dcache.core_req_warp_num = VX_dcache_req.core_req_warp_num;
|
||||
assign VX_dcache_req_dcache.core_req_pc = VX_dcache_req.core_req_pc;
|
||||
assign VX_dcache_req_dcache.core_no_wb_slot = VX_dcache_req.core_no_wb_slot;
|
||||
|
||||
assign vx_dcache_req_dcache.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{~to_shm}};
|
||||
assign vx_dcache_req_dcache.core_req_addr = vx_dcache_req.core_req_addr;
|
||||
assign vx_dcache_req_dcache.core_req_writedata = vx_dcache_req.core_req_writedata;
|
||||
assign vx_dcache_req_dcache.core_req_mem_read = vx_dcache_req.core_req_mem_read;
|
||||
assign vx_dcache_req_dcache.core_req_mem_write = vx_dcache_req.core_req_mem_write;
|
||||
assign vx_dcache_req_dcache.core_req_rd = vx_dcache_req.core_req_rd;
|
||||
assign vx_dcache_req_dcache.core_req_wb = vx_dcache_req.core_req_wb;
|
||||
assign vx_dcache_req_dcache.core_req_warp_num = vx_dcache_req.core_req_warp_num;
|
||||
assign vx_dcache_req_dcache.core_req_pc = vx_dcache_req.core_req_pc;
|
||||
assign vx_dcache_req_dcache.core_no_wb_slot = vx_dcache_req.core_no_wb_slot;
|
||||
|
||||
// Shred Memory Request
|
||||
assign VX_dcache_req_smem.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{to_shm}};
|
||||
assign VX_dcache_req_smem.core_req_addr = VX_dcache_req.core_req_addr;
|
||||
assign VX_dcache_req_smem.core_req_writedata = VX_dcache_req.core_req_writedata;
|
||||
assign VX_dcache_req_smem.core_req_mem_read = VX_dcache_req.core_req_mem_read;
|
||||
assign VX_dcache_req_smem.core_req_mem_write = VX_dcache_req.core_req_mem_write;
|
||||
assign VX_dcache_req_smem.core_req_rd = VX_dcache_req.core_req_rd;
|
||||
assign VX_dcache_req_smem.core_req_wb = VX_dcache_req.core_req_wb;
|
||||
assign VX_dcache_req_smem.core_req_warp_num = VX_dcache_req.core_req_warp_num;
|
||||
assign VX_dcache_req_smem.core_req_pc = VX_dcache_req.core_req_pc;
|
||||
assign VX_dcache_req_smem.core_no_wb_slot = VX_dcache_req.core_no_wb_slot || dcache_wants_wb;
|
||||
|
||||
assign vx_dcache_req_smem.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{to_shm}};
|
||||
assign vx_dcache_req_smem.core_req_addr = vx_dcache_req.core_req_addr;
|
||||
assign vx_dcache_req_smem.core_req_writedata = vx_dcache_req.core_req_writedata;
|
||||
assign vx_dcache_req_smem.core_req_mem_read = vx_dcache_req.core_req_mem_read;
|
||||
assign vx_dcache_req_smem.core_req_mem_write = vx_dcache_req.core_req_mem_write;
|
||||
assign vx_dcache_req_smem.core_req_rd = vx_dcache_req.core_req_rd;
|
||||
assign vx_dcache_req_smem.core_req_wb = vx_dcache_req.core_req_wb;
|
||||
assign vx_dcache_req_smem.core_req_warp_num = vx_dcache_req.core_req_warp_num;
|
||||
assign vx_dcache_req_smem.core_req_pc = vx_dcache_req.core_req_pc;
|
||||
assign vx_dcache_req_smem.core_no_wb_slot = vx_dcache_req.core_no_wb_slot || dcache_wants_wb;
|
||||
|
||||
// Dcache Response
|
||||
assign VX_dcache_rsp.core_wb_valid = dcache_wants_wb ? VX_dcache_rsp_dcache.core_wb_valid : VX_dcache_rsp_smem.core_wb_valid;
|
||||
assign VX_dcache_rsp.core_wb_req_rd = dcache_wants_wb ? VX_dcache_rsp_dcache.core_wb_req_rd : VX_dcache_rsp_smem.core_wb_req_rd;
|
||||
assign VX_dcache_rsp.core_wb_req_wb = dcache_wants_wb ? VX_dcache_rsp_dcache.core_wb_req_wb : VX_dcache_rsp_smem.core_wb_req_wb;
|
||||
assign VX_dcache_rsp.core_wb_warp_num = dcache_wants_wb ? VX_dcache_rsp_dcache.core_wb_warp_num : VX_dcache_rsp_smem.core_wb_warp_num;
|
||||
assign VX_dcache_rsp.core_wb_readdata = dcache_wants_wb ? VX_dcache_rsp_dcache.core_wb_readdata : VX_dcache_rsp_smem.core_wb_readdata;
|
||||
assign VX_dcache_rsp.core_wb_pc = dcache_wants_wb ? VX_dcache_rsp_dcache.core_wb_pc : VX_dcache_rsp_smem.core_wb_pc;
|
||||
|
||||
assign VX_dcache_rsp.delay_req = to_shm ? VX_dcache_rsp_smem.delay_req : VX_dcache_rsp_dcache.delay_req;
|
||||
|
||||
|
||||
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_res();
|
||||
assign vx_dcache_rsp.core_wb_valid = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_valid : vx_dcache_rsp_smem.core_wb_valid;
|
||||
assign vx_dcache_rsp.core_wb_req_rd = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_req_rd : vx_dcache_rsp_smem.core_wb_req_rd;
|
||||
assign vx_dcache_rsp.core_wb_req_wb = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_req_wb : vx_dcache_rsp_smem.core_wb_req_wb;
|
||||
assign vx_dcache_rsp.core_wb_warp_num = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_warp_num : vx_dcache_rsp_smem.core_wb_warp_num;
|
||||
assign vx_dcache_rsp.core_wb_readdata = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_readdata : vx_dcache_rsp_smem.core_wb_readdata;
|
||||
assign vx_dcache_rsp.core_wb_pc = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_pc : vx_dcache_rsp_smem.core_wb_pc;
|
||||
|
||||
assign vx_dcache_rsp.delay_req = to_shm ? vx_dcache_rsp_smem.delay_req : vx_dcache_rsp_dcache.delay_req;
|
||||
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_smem_dram_req();
|
||||
VX_gpu_dcache_dram_rsp_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_smem_dram_res();
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_SIZE_BYTES (`SCACHE_SIZE_BYTES),
|
||||
@@ -99,69 +90,67 @@ module VX_dmem_controller (
|
||||
.PRFQ_STRIDE (`SPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`SSIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
gpu_smem
|
||||
(
|
||||
) gpu_smem (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core req
|
||||
.core_req_valid (VX_dcache_req_smem.core_req_valid),
|
||||
.core_req_addr (VX_dcache_req_smem.core_req_addr),
|
||||
.core_req_writedata(VX_dcache_req_smem.core_req_writedata),
|
||||
.core_req_mem_read (VX_dcache_req_smem.core_req_mem_read),
|
||||
.core_req_mem_write(VX_dcache_req_smem.core_req_mem_write),
|
||||
.core_req_rd (VX_dcache_req_smem.core_req_rd),
|
||||
.core_req_wb (VX_dcache_req_smem.core_req_wb),
|
||||
.core_req_warp_num (VX_dcache_req_smem.core_req_warp_num),
|
||||
.core_req_pc (VX_dcache_req_smem.core_req_pc),
|
||||
.core_req_valid (vx_dcache_req_smem.core_req_valid),
|
||||
.core_req_mem_read (vx_dcache_req_smem.core_req_mem_read),
|
||||
.core_req_mem_write(vx_dcache_req_smem.core_req_mem_write),
|
||||
.core_req_addr (vx_dcache_req_smem.core_req_addr),
|
||||
.core_req_writedata(vx_dcache_req_smem.core_req_writedata),
|
||||
.core_req_rd (vx_dcache_req_smem.core_req_rd),
|
||||
.core_req_wb (vx_dcache_req_smem.core_req_wb),
|
||||
.core_req_warp_num (vx_dcache_req_smem.core_req_warp_num),
|
||||
.core_req_pc (vx_dcache_req_smem.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (VX_dcache_rsp_smem.delay_req),
|
||||
.delay_req (vx_dcache_rsp_smem.delay_req),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (VX_dcache_req_smem.core_no_wb_slot),
|
||||
.core_no_wb_slot (vx_dcache_req_smem.core_no_wb_slot),
|
||||
|
||||
// Cache CWB
|
||||
.core_wb_valid (VX_dcache_rsp_smem.core_wb_valid),
|
||||
.core_wb_req_rd (VX_dcache_rsp_smem.core_wb_req_rd),
|
||||
.core_wb_req_wb (VX_dcache_rsp_smem.core_wb_req_wb),
|
||||
.core_wb_warp_num (VX_dcache_rsp_smem.core_wb_warp_num),
|
||||
.core_wb_readdata (VX_dcache_rsp_smem.core_wb_readdata),
|
||||
.core_wb_pc (VX_dcache_rsp_smem.core_wb_pc),
|
||||
.core_wb_address (),
|
||||
.core_wb_valid (vx_dcache_rsp_smem.core_wb_valid),
|
||||
.core_wb_req_rd (vx_dcache_rsp_smem.core_wb_req_rd),
|
||||
.core_wb_req_wb (vx_dcache_rsp_smem.core_wb_req_wb),
|
||||
.core_wb_warp_num (vx_dcache_rsp_smem.core_wb_warp_num),
|
||||
.core_wb_readdata (vx_dcache_rsp_smem.core_wb_readdata),
|
||||
.core_wb_pc (vx_dcache_rsp_smem.core_wb_pc),
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
.core_wb_address (),
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
// DRAM response
|
||||
.dram_fill_rsp (VX_gpu_smem_dram_res.dram_fill_rsp),
|
||||
.dram_fill_rsp_addr(VX_gpu_smem_dram_res.dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_data(VX_gpu_smem_dram_res.dram_fill_rsp_data),
|
||||
.dram_rsp_valid (vx_gpu_smem_dram_res.dram_rsp_valid),
|
||||
.dram_rsp_addr (vx_gpu_smem_dram_res.dram_rsp_addr),
|
||||
.dram_rsp_data (vx_gpu_smem_dram_res.dram_rsp_data),
|
||||
|
||||
// DRAM accept response
|
||||
.dram_fill_accept (VX_gpu_smem_dram_req.dram_fill_accept),
|
||||
.dram_rsp_ready (vx_gpu_smem_dram_req.dram_rsp_ready),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req (VX_gpu_smem_dram_req.dram_req),
|
||||
.dram_req_write (VX_gpu_smem_dram_req.dram_req_write),
|
||||
.dram_req_read (VX_gpu_smem_dram_req.dram_req_read),
|
||||
.dram_req_addr (VX_gpu_smem_dram_req.dram_req_addr),
|
||||
.dram_req_size (VX_gpu_smem_dram_req.dram_req_size),
|
||||
.dram_req_data (VX_gpu_smem_dram_req.dram_req_data),
|
||||
.dram_req_delay (1),
|
||||
|
||||
// Snoop Response
|
||||
.dram_req_because_of_wb(VX_gpu_smem_dram_req.dram_because_of_snp),
|
||||
.dram_snp_full (VX_gpu_smem_dram_req.dram_snp_full),
|
||||
.dram_req_read (vx_gpu_smem_dram_req.dram_req_read),
|
||||
.dram_req_write (vx_gpu_smem_dram_req.dram_req_write),
|
||||
.dram_req_addr (vx_gpu_smem_dram_req.dram_req_addr),
|
||||
.dram_req_data (vx_gpu_smem_dram_req.dram_req_data),
|
||||
.dram_req_full (1),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req (0),
|
||||
.snp_req_addr (0),
|
||||
.snp_req_delay (),
|
||||
.snp_req_valid (0),
|
||||
.snp_req_addr (0),
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
.snp_req_full (),
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
// Snoop Forward
|
||||
.snp_fwd (),
|
||||
.snp_fwd_addr (),
|
||||
.snp_fwd_delay (0)
|
||||
);
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
.snp_fwd_valid (),
|
||||
.snp_fwd_addr (),
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
.snp_fwd_full (0)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_SIZE_BYTES (`DCACHE_SIZE_BYTES),
|
||||
@@ -184,72 +173,65 @@ module VX_dmem_controller (
|
||||
.PRFQ_STRIDE (`DPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`DSIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
gpu_dcache
|
||||
(
|
||||
) gpu_dcache (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core req
|
||||
.core_req_valid (VX_dcache_req_dcache.core_req_valid),
|
||||
.core_req_addr (VX_dcache_req_dcache.core_req_addr),
|
||||
.core_req_writedata(VX_dcache_req_dcache.core_req_writedata),
|
||||
.core_req_mem_read (VX_dcache_req_dcache.core_req_mem_read),
|
||||
.core_req_mem_write(VX_dcache_req_dcache.core_req_mem_write),
|
||||
.core_req_rd (VX_dcache_req_dcache.core_req_rd),
|
||||
.core_req_wb (VX_dcache_req_dcache.core_req_wb),
|
||||
.core_req_warp_num (VX_dcache_req_dcache.core_req_warp_num),
|
||||
.core_req_pc (VX_dcache_req_dcache.core_req_pc),
|
||||
.core_req_valid (vx_dcache_req_dcache.core_req_valid),
|
||||
.core_req_mem_read (vx_dcache_req_dcache.core_req_mem_read),
|
||||
.core_req_mem_write(vx_dcache_req_dcache.core_req_mem_write),
|
||||
.core_req_addr (vx_dcache_req_dcache.core_req_addr),
|
||||
.core_req_writedata(vx_dcache_req_dcache.core_req_writedata),
|
||||
.core_req_rd (vx_dcache_req_dcache.core_req_rd),
|
||||
.core_req_wb (vx_dcache_req_dcache.core_req_wb),
|
||||
.core_req_warp_num (vx_dcache_req_dcache.core_req_warp_num),
|
||||
.core_req_pc (vx_dcache_req_dcache.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (VX_dcache_rsp_dcache.delay_req),
|
||||
.delay_req (vx_dcache_rsp_dcache.delay_req),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (VX_dcache_req_dcache.core_no_wb_slot),
|
||||
.core_no_wb_slot (vx_dcache_req_dcache.core_no_wb_slot),
|
||||
|
||||
// Cache CWB
|
||||
.core_wb_valid (VX_dcache_rsp_dcache.core_wb_valid),
|
||||
.core_wb_req_rd (VX_dcache_rsp_dcache.core_wb_req_rd),
|
||||
.core_wb_req_wb (VX_dcache_rsp_dcache.core_wb_req_wb),
|
||||
.core_wb_warp_num (VX_dcache_rsp_dcache.core_wb_warp_num),
|
||||
.core_wb_readdata (VX_dcache_rsp_dcache.core_wb_readdata),
|
||||
.core_wb_pc (VX_dcache_rsp_dcache.core_wb_pc),
|
||||
.core_wb_valid (vx_dcache_rsp_dcache.core_wb_valid),
|
||||
.core_wb_req_rd (vx_dcache_rsp_dcache.core_wb_req_rd),
|
||||
.core_wb_req_wb (vx_dcache_rsp_dcache.core_wb_req_wb),
|
||||
.core_wb_warp_num (vx_dcache_rsp_dcache.core_wb_warp_num),
|
||||
.core_wb_readdata (vx_dcache_rsp_dcache.core_wb_readdata),
|
||||
.core_wb_pc (vx_dcache_rsp_dcache.core_wb_pc),
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
.core_wb_address (),
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
// DRAM response
|
||||
.dram_fill_rsp (VX_gpu_dcache_dram_res.dram_fill_rsp),
|
||||
.dram_fill_rsp_addr(VX_gpu_dcache_dram_res.dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_data(VX_gpu_dcache_dram_res.dram_fill_rsp_data),
|
||||
.dram_rsp_valid (vx_gpu_dcache_dram_res.dram_rsp_valid),
|
||||
.dram_rsp_addr (vx_gpu_dcache_dram_res.dram_rsp_addr),
|
||||
.dram_rsp_data (vx_gpu_dcache_dram_res.dram_rsp_data),
|
||||
|
||||
// DRAM accept response
|
||||
.dram_fill_accept (VX_gpu_dcache_dram_req.dram_fill_accept),
|
||||
.dram_rsp_ready (vx_gpu_dcache_dram_req.dram_rsp_ready),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req (VX_gpu_dcache_dram_req.dram_req),
|
||||
.dram_req_write (VX_gpu_dcache_dram_req.dram_req_write),
|
||||
.dram_req_read (VX_gpu_dcache_dram_req.dram_req_read),
|
||||
.dram_req_addr (VX_gpu_dcache_dram_req.dram_req_addr),
|
||||
.dram_req_size (VX_gpu_dcache_dram_req.dram_req_size),
|
||||
.dram_req_data (VX_gpu_dcache_dram_req.dram_req_data),
|
||||
.dram_req_delay (VX_gpu_dcache_dram_req.dram_req_delay),
|
||||
|
||||
// Snoop Response
|
||||
.dram_req_because_of_wb(VX_gpu_dcache_dram_req.dram_because_of_snp),
|
||||
.dram_snp_full (VX_gpu_dcache_dram_req.dram_snp_full),
|
||||
.dram_req_read (vx_gpu_dcache_dram_req.dram_req_read),
|
||||
.dram_req_write (vx_gpu_dcache_dram_req.dram_req_write),
|
||||
.dram_req_addr (vx_gpu_dcache_dram_req.dram_req_addr),
|
||||
.dram_req_data (vx_gpu_dcache_dram_req.dram_req_data),
|
||||
.dram_req_full (vx_gpu_dcache_dram_req.dram_req_full),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req (VX_gpu_dcache_snp_req.snp_req),
|
||||
.snp_req_addr (VX_gpu_dcache_snp_req.snp_req_addr),
|
||||
.snp_req_delay (VX_gpu_dcache_snp_req.snp_delay),
|
||||
|
||||
.snp_req_valid (vx_gpu_dcache_snp_req.snp_req_valid),
|
||||
.snp_req_addr (vx_gpu_dcache_snp_req.snp_req_addr),
|
||||
.snp_req_full (vx_gpu_dcache_snp_req.snp_req_full),
|
||||
|
||||
// Snoop Forward
|
||||
.snp_fwd (),
|
||||
.snp_fwd_addr (),
|
||||
.snp_fwd_delay (0)
|
||||
);
|
||||
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
.snp_fwd_valid (),
|
||||
.snp_fwd_addr (),
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
.snp_fwd_full (0)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_SIZE_BYTES (`ICACHE_SIZE_BYTES),
|
||||
@@ -272,71 +254,64 @@ module VX_dmem_controller (
|
||||
.PRFQ_STRIDE (`IPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`ISIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
gpu_icache
|
||||
(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
) gpu_icache (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core req
|
||||
.core_req_valid (VX_icache_req.core_req_valid),
|
||||
.core_req_addr (VX_icache_req.core_req_addr),
|
||||
.core_req_writedata(VX_icache_req.core_req_writedata),
|
||||
.core_req_mem_read (VX_icache_req.core_req_mem_read),
|
||||
.core_req_mem_write(VX_icache_req.core_req_mem_write),
|
||||
.core_req_rd (VX_icache_req.core_req_rd),
|
||||
.core_req_wb (VX_icache_req.core_req_wb),
|
||||
.core_req_warp_num (VX_icache_req.core_req_warp_num),
|
||||
.core_req_pc (VX_icache_req.core_req_pc),
|
||||
.core_req_valid (vx_icache_req.core_req_valid),
|
||||
.core_req_mem_read (vx_icache_req.core_req_mem_read),
|
||||
.core_req_mem_write (vx_icache_req.core_req_mem_write),
|
||||
.core_req_addr (vx_icache_req.core_req_addr),
|
||||
.core_req_writedata (vx_icache_req.core_req_writedata),
|
||||
.core_req_rd (vx_icache_req.core_req_rd),
|
||||
.core_req_wb (vx_icache_req.core_req_wb),
|
||||
.core_req_warp_num (vx_icache_req.core_req_warp_num),
|
||||
.core_req_pc (vx_icache_req.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (VX_icache_rsp.delay_req),
|
||||
.delay_req (vx_icache_rsp.delay_req),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (VX_icache_req.core_no_wb_slot),
|
||||
.core_no_wb_slot (vx_icache_req.core_no_wb_slot),
|
||||
|
||||
// Cache CWB
|
||||
.core_wb_valid (VX_icache_rsp.core_wb_valid),
|
||||
.core_wb_req_rd (VX_icache_rsp.core_wb_req_rd),
|
||||
.core_wb_req_wb (VX_icache_rsp.core_wb_req_wb),
|
||||
.core_wb_warp_num (VX_icache_rsp.core_wb_warp_num),
|
||||
.core_wb_readdata (VX_icache_rsp.core_wb_readdata),
|
||||
.core_wb_pc (VX_icache_rsp.core_wb_pc),
|
||||
.core_wb_address (),
|
||||
.core_wb_valid (vx_icache_rsp.core_wb_valid),
|
||||
.core_wb_req_rd (vx_icache_rsp.core_wb_req_rd),
|
||||
.core_wb_req_wb (vx_icache_rsp.core_wb_req_wb),
|
||||
.core_wb_warp_num (vx_icache_rsp.core_wb_warp_num),
|
||||
.core_wb_readdata (vx_icache_rsp.core_wb_readdata),
|
||||
.core_wb_pc (vx_icache_rsp.core_wb_pc),
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
.core_wb_address (),
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
// DRAM response
|
||||
.dram_fill_rsp (VX_gpu_icache_dram_res.dram_fill_rsp),
|
||||
.dram_fill_rsp_addr(VX_gpu_icache_dram_res.dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_data(VX_gpu_icache_dram_res.dram_fill_rsp_data),
|
||||
.dram_rsp_valid (vx_gpu_icache_dram_res.dram_rsp_valid),
|
||||
.dram_rsp_addr (vx_gpu_icache_dram_res.dram_rsp_addr),
|
||||
.dram_rsp_data (vx_gpu_icache_dram_res.dram_rsp_data),
|
||||
|
||||
// DRAM accept response
|
||||
.dram_fill_accept (VX_gpu_icache_dram_req.dram_fill_accept),
|
||||
.dram_rsp_ready (vx_gpu_icache_dram_req.dram_rsp_ready),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req (VX_gpu_icache_dram_req.dram_req),
|
||||
.dram_req_write (VX_gpu_icache_dram_req.dram_req_write),
|
||||
.dram_req_read (VX_gpu_icache_dram_req.dram_req_read),
|
||||
.dram_req_addr (VX_gpu_icache_dram_req.dram_req_addr),
|
||||
.dram_req_size (VX_gpu_icache_dram_req.dram_req_size),
|
||||
.dram_req_data (VX_gpu_icache_dram_req.dram_req_data),
|
||||
.dram_req_delay (VX_gpu_icache_dram_req.dram_req_delay),
|
||||
|
||||
// Snoop Response
|
||||
.dram_req_because_of_wb(VX_gpu_icache_dram_req.dram_because_of_snp),
|
||||
.dram_snp_full (VX_gpu_icache_dram_req.dram_snp_full),
|
||||
|
||||
.dram_req_read (vx_gpu_icache_dram_req.dram_req_read),
|
||||
.dram_req_write (vx_gpu_icache_dram_req.dram_req_write),
|
||||
.dram_req_addr (vx_gpu_icache_dram_req.dram_req_addr),
|
||||
.dram_req_data (vx_gpu_icache_dram_req.dram_req_data),
|
||||
.dram_req_full (vx_gpu_icache_dram_req.dram_req_full),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req (VX_gpu_icache_snp_req.snp_req),
|
||||
.snp_req_addr (VX_gpu_icache_snp_req.snp_req_addr),
|
||||
.snp_req_delay (VX_gpu_icache_snp_req.snp_delay),
|
||||
.snp_req_valid (vx_gpu_icache_snp_req.snp_req_valid),
|
||||
.snp_req_addr (vx_gpu_icache_snp_req.snp_req_addr),
|
||||
.snp_req_full (vx_gpu_icache_snp_req.snp_req_full),
|
||||
|
||||
// Snoop Forward
|
||||
.snp_fwd (),
|
||||
.snp_fwd_addr (),
|
||||
.snp_fwd_delay (0)
|
||||
);
|
||||
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
.snp_fwd_valid (),
|
||||
.snp_fwd_addr (),
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
.snp_fwd_full (0)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -4,15 +4,15 @@ module VX_execute_unit (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Request
|
||||
VX_exec_unit_req_inter VX_exec_unit_req,
|
||||
VX_exec_unit_req_inter vx_exec_unit_req,
|
||||
|
||||
// Output
|
||||
// Writeback
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb,
|
||||
VX_inst_exec_wb_inter vx_inst_exec_wb,
|
||||
// JAL Response
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_jal_response_inter vx_jal_rsp,
|
||||
// Branch Response
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
VX_branch_response_inter vx_branch_rsp,
|
||||
|
||||
input wire no_slot_exec,
|
||||
output wire out_delay
|
||||
@@ -23,23 +23,24 @@ module VX_execute_unit (
|
||||
wire[4:0] in_alu_op;
|
||||
wire in_rs2_src;
|
||||
wire[31:0] in_itype_immed;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[2:0] in_branch_type;
|
||||
/* verilator lint_on UNUSED */
|
||||
wire[19:0] in_upper_immed;
|
||||
wire in_jal;
|
||||
wire[31:0] in_jal_offset;
|
||||
wire[31:0] in_curr_PC;
|
||||
|
||||
assign in_a_reg_data = VX_exec_unit_req.a_reg_data;
|
||||
assign in_b_reg_data = VX_exec_unit_req.b_reg_data;
|
||||
assign in_alu_op = VX_exec_unit_req.alu_op;
|
||||
assign in_rs2_src = VX_exec_unit_req.rs2_src;
|
||||
assign in_itype_immed = VX_exec_unit_req.itype_immed;
|
||||
assign in_branch_type = VX_exec_unit_req.branch_type;
|
||||
assign in_upper_immed = VX_exec_unit_req.upper_immed;
|
||||
assign in_jal = VX_exec_unit_req.jal;
|
||||
assign in_jal_offset = VX_exec_unit_req.jal_offset;
|
||||
assign in_curr_PC = VX_exec_unit_req.curr_PC;
|
||||
|
||||
assign in_a_reg_data = vx_exec_unit_req.a_reg_data;
|
||||
assign in_b_reg_data = vx_exec_unit_req.b_reg_data;
|
||||
assign in_alu_op = vx_exec_unit_req.alu_op;
|
||||
assign in_rs2_src = vx_exec_unit_req.rs2_src;
|
||||
assign in_itype_immed = vx_exec_unit_req.itype_immed;
|
||||
assign in_branch_type = vx_exec_unit_req.branch_type;
|
||||
assign in_upper_immed = vx_exec_unit_req.upper_immed;
|
||||
assign in_jal = vx_exec_unit_req.jal;
|
||||
assign in_jal_offset = vx_exec_unit_req.jal_offset;
|
||||
assign in_curr_PC = vx_exec_unit_req.curr_PC;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[`NUM_THREADS-1:0] alu_stall;
|
||||
@@ -68,11 +69,15 @@ module VX_execute_unit (
|
||||
|
||||
assign out_delay = no_slot_exec || internal_stall;
|
||||
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index;
|
||||
wire jal_branch_found_valid;
|
||||
VX_generic_priority_encoder #(.N(`NUM_THREADS)) choose_alu_result(
|
||||
.valids(VX_exec_unit_req.valid),
|
||||
wire jal_branch_found_valid;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(`NUM_THREADS)
|
||||
) choose_alu_result (
|
||||
.valids(vx_exec_unit_req.valid),
|
||||
.index (jal_branch_use_index),
|
||||
.found (jal_branch_found_valid)
|
||||
);
|
||||
@@ -82,7 +87,7 @@ module VX_execute_unit (
|
||||
reg temp_branch_dir;
|
||||
always @(*)
|
||||
begin
|
||||
case(VX_exec_unit_req.branch_type)
|
||||
case (vx_exec_unit_req.branch_type)
|
||||
`BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
@@ -99,35 +104,35 @@ module VX_execute_unit (
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
|
||||
assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next;
|
||||
assign duplicate_PC_data[i] = vx_exec_unit_req.PC_next;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
// VX_inst_exec_wb_inter VX_inst_exec_wb_temp();
|
||||
// VX_inst_exec_wb_inter vx_inst_exec_wb_temp();
|
||||
// JAL Response
|
||||
VX_jal_response_inter VX_jal_rsp_temp();
|
||||
VX_jal_response_inter vx_jal_rsp_temp();
|
||||
// Branch Response
|
||||
VX_branch_response_inter VX_branch_rsp_temp();
|
||||
VX_branch_response_inter vx_branch_rsp_temp();
|
||||
|
||||
// Actual Writeback
|
||||
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
|
||||
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
|
||||
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid & {`NUM_THREADS{!internal_stall}};
|
||||
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
|
||||
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
|
||||
assign vx_inst_exec_wb.rd = vx_exec_unit_req.rd;
|
||||
assign vx_inst_exec_wb.wb = vx_exec_unit_req.wb;
|
||||
assign vx_inst_exec_wb.wb_valid = vx_exec_unit_req.valid & {`NUM_THREADS{!internal_stall}};
|
||||
assign vx_inst_exec_wb.wb_warp_num = vx_exec_unit_req.warp_num;
|
||||
assign vx_inst_exec_wb.alu_result = vx_exec_unit_req.jal ? duplicate_PC_data : alu_result;
|
||||
|
||||
assign VX_inst_exec_wb.exec_wb_pc = in_curr_PC;
|
||||
assign vx_inst_exec_wb.exec_wb_pc = in_curr_PC;
|
||||
// Jal rsp
|
||||
assign VX_jal_rsp_temp.jal = in_jal;
|
||||
assign VX_jal_rsp_temp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
|
||||
assign VX_jal_rsp_temp.jal_warp_num = VX_exec_unit_req.warp_num;
|
||||
assign vx_jal_rsp_temp.jal = in_jal;
|
||||
assign vx_jal_rsp_temp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
|
||||
assign vx_jal_rsp_temp.jal_warp_num = vx_exec_unit_req.warp_num;
|
||||
|
||||
// Branch rsp
|
||||
assign VX_branch_rsp_temp.valid_branch = (VX_exec_unit_req.branch_type != `NO_BRANCH) && (|VX_exec_unit_req.valid);
|
||||
assign VX_branch_rsp_temp.branch_dir = temp_branch_dir;
|
||||
assign VX_branch_rsp_temp.branch_warp_num = VX_exec_unit_req.warp_num;
|
||||
assign VX_branch_rsp_temp.branch_dest = $signed(VX_exec_unit_req.curr_PC) + ($signed(VX_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset
|
||||
assign vx_branch_rsp_temp.valid_branch = (vx_exec_unit_req.branch_type != `NO_BRANCH) && (|vx_exec_unit_req.valid);
|
||||
assign vx_branch_rsp_temp.branch_dir = temp_branch_dir;
|
||||
assign vx_branch_rsp_temp.branch_warp_num = vx_exec_unit_req.warp_num;
|
||||
assign vx_branch_rsp_temp.branch_dest = $signed(vx_exec_unit_req.curr_PC) + ($signed(vx_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset
|
||||
|
||||
|
||||
wire zero = 0;
|
||||
@@ -137,27 +142,31 @@ module VX_execute_unit (
|
||||
// .reset(reset),
|
||||
// .stall(zero),
|
||||
// .flush(zero),
|
||||
// .in ({VX_inst_exec_wb_temp.rd, VX_inst_exec_wb_temp.wb, VX_inst_exec_wb_temp.wb_valid, VX_inst_exec_wb_temp.wb_warp_num, VX_inst_exec_wb_temp.alu_result, VX_inst_exec_wb_temp.exec_wb_pc}),
|
||||
// .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc })
|
||||
// .in ({vx_inst_exec_wb_temp.rd, vx_inst_exec_wb_temp.wb, vx_inst_exec_wb_temp.wb_valid, vx_inst_exec_wb_temp.wb_warp_num, vx_inst_exec_wb_temp.alu_result, vx_inst_exec_wb_temp.exec_wb_pc}),
|
||||
// .out ({vx_inst_exec_wb.rd , vx_inst_exec_wb.wb , vx_inst_exec_wb.wb_valid , vx_inst_exec_wb.wb_warp_num , vx_inst_exec_wb.alu_result , vx_inst_exec_wb.exec_wb_pc })
|
||||
// );
|
||||
|
||||
VX_generic_register #(.N(33 + `NW_BITS-1 + 1)) jal_reg(
|
||||
VX_generic_register #(
|
||||
.N(33 + `NW_BITS-1 + 1)
|
||||
) jal_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zero),
|
||||
.flush(zero),
|
||||
.in ({VX_jal_rsp_temp.jal, VX_jal_rsp_temp.jal_dest, VX_jal_rsp_temp.jal_warp_num}),
|
||||
.out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num})
|
||||
);
|
||||
.in ({vx_jal_rsp_temp.jal, vx_jal_rsp_temp.jal_dest, vx_jal_rsp_temp.jal_warp_num}),
|
||||
.out ({vx_jal_rsp.jal , vx_jal_rsp.jal_dest , vx_jal_rsp.jal_warp_num})
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(34 + `NW_BITS-1 + 1)) branch_reg(
|
||||
VX_generic_register #(
|
||||
.N(34 + `NW_BITS-1 + 1)
|
||||
) branch_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zero),
|
||||
.flush(zero),
|
||||
.in ({VX_branch_rsp_temp.valid_branch, VX_branch_rsp_temp.branch_dir, VX_branch_rsp_temp.branch_warp_num, VX_branch_rsp_temp.branch_dest}),
|
||||
.out ({VX_branch_rsp.valid_branch , VX_branch_rsp.branch_dir , VX_branch_rsp.branch_warp_num , VX_branch_rsp.branch_dest })
|
||||
);
|
||||
.in ({vx_branch_rsp_temp.valid_branch, vx_branch_rsp_temp.branch_dir, vx_branch_rsp_temp.branch_warp_num, vx_branch_rsp_temp.branch_dest}),
|
||||
.out ({vx_branch_rsp.valid_branch , vx_branch_rsp.branch_dir , vx_branch_rsp.branch_warp_num , vx_branch_rsp.branch_dest })
|
||||
);
|
||||
|
||||
// always @(*) begin
|
||||
// case(in_alu_op)
|
||||
@@ -169,8 +178,7 @@ module VX_execute_unit (
|
||||
|
||||
// end
|
||||
|
||||
|
||||
// assign out_is_csr = VX_exec_unit_req.is_csr;
|
||||
// assign out_csr_address = VX_exec_unit_req.csr_address;
|
||||
// assign out_is_csr = vx_exec_unit_req.is_csr;
|
||||
// assign out_csr_address = vx_exec_unit_req.csr_address;
|
||||
|
||||
endmodule : VX_execute_unit
|
||||
@@ -3,103 +3,103 @@
|
||||
module VX_fetch (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_wstall_inter VX_wstall,
|
||||
VX_join_inter VX_join,
|
||||
VX_wstall_inter vx_wstall,
|
||||
VX_join_inter vx_join,
|
||||
input wire schedule_delay,
|
||||
input wire icache_stage_delay,
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
|
||||
output wire out_ebreak,
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
VX_jal_response_inter vx_jal_rsp,
|
||||
VX_branch_response_inter vx_branch_rsp,
|
||||
VX_inst_meta_inter fe_inst_meta_fi,
|
||||
VX_warp_ctl_inter VX_warp_ctl
|
||||
VX_warp_ctl_inter vx_warp_ctl
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] thread_mask;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[31:0] warp_pc;
|
||||
wire scheduled_warp;
|
||||
wire[`NUM_THREADS-1:0] thread_mask;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[31:0] warp_pc;
|
||||
wire scheduled_warp;
|
||||
|
||||
|
||||
wire pipe_stall;
|
||||
wire pipe_stall;
|
||||
|
||||
|
||||
// Only reason this is there is because there is a hidden assumption that decode is exactly after fetch
|
||||
// Only reason this is there is because there is a hidden assumption that decode is exactly after fetch
|
||||
|
||||
// Locals
|
||||
// Locals
|
||||
|
||||
|
||||
assign pipe_stall = schedule_delay || icache_stage_delay;
|
||||
assign pipe_stall = schedule_delay || icache_stage_delay;
|
||||
|
||||
VX_warp_scheduler warp_scheduler(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipe_stall),
|
||||
VX_warp_scheduler warp_scheduler(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipe_stall),
|
||||
|
||||
.is_barrier (VX_warp_ctl.is_barrier),
|
||||
.barrier_id (VX_warp_ctl.barrier_id),
|
||||
.num_warps (VX_warp_ctl.num_warps),
|
||||
.barrier_warp_num (VX_warp_ctl.warp_num),
|
||||
.is_barrier (vx_warp_ctl.is_barrier),
|
||||
.barrier_id (vx_warp_ctl.barrier_id),
|
||||
.num_warps (vx_warp_ctl.num_warps),
|
||||
.barrier_warp_num (vx_warp_ctl.warp_num),
|
||||
|
||||
// Wspawn
|
||||
.wspawn (VX_warp_ctl.wspawn),
|
||||
.wsapwn_pc (VX_warp_ctl.wspawn_pc),
|
||||
.wspawn_new_active(VX_warp_ctl.wspawn_new_active),
|
||||
// CTM
|
||||
.ctm (VX_warp_ctl.change_mask),
|
||||
.ctm_mask (VX_warp_ctl.thread_mask),
|
||||
.ctm_warp_num (VX_warp_ctl.warp_num),
|
||||
// WHALT
|
||||
.whalt (VX_warp_ctl.ebreak),
|
||||
.whalt_warp_num (VX_warp_ctl.warp_num),
|
||||
// Wstall
|
||||
.wstall (VX_wstall.wstall),
|
||||
.wstall_warp_num (VX_wstall.warp_num),
|
||||
// Wspawn
|
||||
.wspawn (vx_warp_ctl.wspawn),
|
||||
.wsapwn_pc (vx_warp_ctl.wspawn_pc),
|
||||
.wspawn_new_active(vx_warp_ctl.wspawn_new_active),
|
||||
// CTM
|
||||
.ctm (vx_warp_ctl.change_mask),
|
||||
.ctm_mask (vx_warp_ctl.thread_mask),
|
||||
.ctm_warp_num (vx_warp_ctl.warp_num),
|
||||
// WHALT
|
||||
.whalt (vx_warp_ctl.ebreak),
|
||||
.whalt_warp_num (vx_warp_ctl.warp_num),
|
||||
// Wstall
|
||||
.wstall (vx_wstall.wstall),
|
||||
.wstall_warp_num (vx_wstall.warp_num),
|
||||
|
||||
// Lock/release Stuff
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
// Lock/release Stuff
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
|
||||
// Join
|
||||
.is_join (VX_join.is_join),
|
||||
.join_warp_num (VX_join.join_warp_num),
|
||||
// Join
|
||||
.is_join (vx_join.is_join),
|
||||
.join_warp_num (vx_join.join_warp_num),
|
||||
|
||||
// Split
|
||||
.is_split (VX_warp_ctl.is_split),
|
||||
.dont_split (VX_warp_ctl.dont_split),
|
||||
.split_new_mask (VX_warp_ctl.split_new_mask),
|
||||
.split_later_mask (VX_warp_ctl.split_later_mask),
|
||||
.split_save_pc (VX_warp_ctl.split_save_pc),
|
||||
.split_warp_num (VX_warp_ctl.warp_num),
|
||||
// Split
|
||||
.is_split (vx_warp_ctl.is_split),
|
||||
.dont_split (vx_warp_ctl.dont_split),
|
||||
.split_new_mask (vx_warp_ctl.split_new_mask),
|
||||
.split_later_mask (vx_warp_ctl.split_later_mask),
|
||||
.split_save_pc (vx_warp_ctl.split_save_pc),
|
||||
.split_warp_num (vx_warp_ctl.warp_num),
|
||||
|
||||
// JAL
|
||||
.jal (VX_jal_rsp.jal),
|
||||
.jal_dest (VX_jal_rsp.jal_dest),
|
||||
.jal_warp_num (VX_jal_rsp.jal_warp_num),
|
||||
// JAL
|
||||
.jal (vx_jal_rsp.jal),
|
||||
.jal_dest (vx_jal_rsp.jal_dest),
|
||||
.jal_warp_num (vx_jal_rsp.jal_warp_num),
|
||||
|
||||
// Branch
|
||||
.branch_valid (VX_branch_rsp.valid_branch),
|
||||
.branch_dir (VX_branch_rsp.branch_dir),
|
||||
.branch_dest (VX_branch_rsp.branch_dest),
|
||||
.branch_warp_num (VX_branch_rsp.branch_warp_num),
|
||||
// Branch
|
||||
.branch_valid (vx_branch_rsp.valid_branch),
|
||||
.branch_dir (vx_branch_rsp.branch_dir),
|
||||
.branch_dest (vx_branch_rsp.branch_dest),
|
||||
.branch_warp_num (vx_branch_rsp.branch_warp_num),
|
||||
|
||||
// Outputs
|
||||
.thread_mask (thread_mask),
|
||||
.warp_num (warp_num),
|
||||
.warp_pc (warp_pc),
|
||||
.out_ebreak (out_ebreak),
|
||||
.scheduled_warp (scheduled_warp)
|
||||
);
|
||||
|
||||
assign fe_inst_meta_fi.warp_num = warp_num;
|
||||
assign fe_inst_meta_fi.valid = thread_mask;
|
||||
assign fe_inst_meta_fi.instruction = 32'h0;
|
||||
assign fe_inst_meta_fi.inst_pc = warp_pc;
|
||||
|
||||
wire start_mat_add = scheduled_warp && (warp_pc == 32'h80000ed8) && (warp_num == 0);
|
||||
wire end_mat_add = scheduled_warp && (warp_pc == 32'h80000fbc) && (warp_num == 0);
|
||||
// Outputs
|
||||
.thread_mask (thread_mask),
|
||||
.warp_num (warp_num),
|
||||
.warp_pc (warp_pc),
|
||||
.out_ebreak (out_ebreak),
|
||||
.scheduled_warp (scheduled_warp)
|
||||
);
|
||||
|
||||
assign fe_inst_meta_fi.warp_num = warp_num;
|
||||
assign fe_inst_meta_fi.valid = thread_mask;
|
||||
assign fe_inst_meta_fi.instruction = 32'h0;
|
||||
assign fe_inst_meta_fi.inst_pc = warp_pc;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire start_mat_add = scheduled_warp && (warp_pc == 32'h80000ed8) && (warp_num == 0);
|
||||
wire end_mat_add = scheduled_warp && (warp_pc == 32'h80000fbc) && (warp_num == 0);
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
endmodule
|
||||
@@ -6,15 +6,15 @@ module VX_front_end (
|
||||
|
||||
input wire schedule_delay,
|
||||
|
||||
VX_warp_ctl_inter VX_warp_ctl,
|
||||
VX_warp_ctl_inter vx_warp_ctl,
|
||||
|
||||
VX_gpu_dcache_res_inter VX_icache_rsp,
|
||||
VX_gpu_dcache_req_inter VX_icache_req,
|
||||
VX_gpu_dcache_rsp_inter vx_icache_rsp,
|
||||
VX_gpu_dcache_req_inter vx_icache_req,
|
||||
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
VX_jal_response_inter vx_jal_rsp,
|
||||
VX_branch_response_inter vx_branch_rsp,
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_frE_to_bckE_req_inter vx_bckE_req,
|
||||
|
||||
output wire fetch_ebreak
|
||||
);
|
||||
@@ -24,8 +24,8 @@ VX_inst_meta_inter fe_inst_meta_fi();
|
||||
VX_inst_meta_inter fe_inst_meta_fi2();
|
||||
VX_inst_meta_inter fe_inst_meta_id();
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req();
|
||||
VX_inst_meta_inter fd_inst_meta_de();
|
||||
VX_frE_to_bckE_req_inter vx_frE_to_bckE_req();
|
||||
VX_inst_meta_inter fd_inst_meta_de();
|
||||
|
||||
wire total_freeze = schedule_delay;
|
||||
wire icache_stage_delay;
|
||||
@@ -52,21 +52,21 @@ end
|
||||
assign fetch_ebreak = vortex_ebreak || terminate_sim || old_ebreak;
|
||||
|
||||
|
||||
VX_wstall_inter VX_wstall();
|
||||
VX_join_inter VX_join();
|
||||
VX_wstall_inter vx_wstall();
|
||||
VX_join_inter vx_join();
|
||||
|
||||
VX_fetch vx_fetch(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.VX_wstall (VX_wstall),
|
||||
.VX_join (VX_join),
|
||||
.vx_wstall (vx_wstall),
|
||||
.vx_join (vx_join),
|
||||
.schedule_delay (schedule_delay),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_warp_ctl (VX_warp_ctl),
|
||||
.vx_jal_rsp (vx_jal_rsp),
|
||||
.vx_warp_ctl (vx_warp_ctl),
|
||||
.icache_stage_delay (icache_stage_delay),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.vx_branch_rsp (vx_branch_rsp),
|
||||
.out_ebreak (vortex_ebreak), // fetch_ebreak
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi)
|
||||
);
|
||||
@@ -84,7 +84,7 @@ VX_f_d_reg vx_f_i_reg(
|
||||
.fd_inst_meta_de(fe_inst_meta_fi2)
|
||||
);
|
||||
|
||||
VX_icache_stage VX_icache_stage(
|
||||
VX_icache_stage vx_icache_stage(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.total_freeze (total_freeze),
|
||||
@@ -93,8 +93,8 @@ VX_icache_stage VX_icache_stage(
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi2),
|
||||
.fe_inst_meta_id (fe_inst_meta_id),
|
||||
.VX_icache_rsp (VX_icache_rsp),
|
||||
.VX_icache_req (VX_icache_req)
|
||||
.vx_icache_rsp (vx_icache_rsp),
|
||||
.vx_icache_req (vx_icache_req)
|
||||
);
|
||||
|
||||
|
||||
@@ -109,9 +109,9 @@ VX_i_d_reg vx_i_d_reg(
|
||||
|
||||
VX_decode vx_decode(
|
||||
.fd_inst_meta_de (fd_inst_meta_de),
|
||||
.VX_frE_to_bckE_req(VX_frE_to_bckE_req),
|
||||
.VX_wstall (VX_wstall),
|
||||
.VX_join (VX_join),
|
||||
.vx_frE_to_bckE_req(vx_frE_to_bckE_req),
|
||||
.vx_wstall (vx_wstall),
|
||||
.vx_join (vx_join),
|
||||
.terminate_sim (terminate_sim)
|
||||
);
|
||||
|
||||
@@ -122,8 +122,8 @@ VX_d_e_reg vx_d_e_reg(
|
||||
.reset (reset),
|
||||
.in_branch_stall(no_br_stall),
|
||||
.in_freeze (total_freeze),
|
||||
.VX_frE_to_bckE_req(VX_frE_to_bckE_req),
|
||||
.VX_bckE_req (VX_bckE_req)
|
||||
.vx_frE_to_bckE_req(vx_frE_to_bckE_req),
|
||||
.vx_bckE_req (vx_bckE_req)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
|
||||
module VX_generic_queue
|
||||
#(
|
||||
parameter DATAW = 4,
|
||||
parameter SIZE = 277
|
||||
)
|
||||
(
|
||||
module VX_generic_queue #(
|
||||
parameter DATAW = 4,
|
||||
parameter SIZE = 277
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
@@ -16,31 +13,26 @@ module VX_generic_queue
|
||||
output wire full
|
||||
);
|
||||
|
||||
reg[DATAW-1:0] data[SIZE-1:0];
|
||||
reg[$clog2(SIZE)-1:0] head;
|
||||
reg[$clog2(SIZE)-1:0] tail;
|
||||
reg [DATAW-1:0] data [SIZE-1:0];
|
||||
reg [`LOG2UP(SIZE)-1:0] head;
|
||||
reg [`LOG2UP(SIZE)-1:0] tail;
|
||||
|
||||
assign empty = head == tail;
|
||||
assign full = head == (tail+1);
|
||||
assign empty = (head == tail);
|
||||
assign full = (head == (tail+1));
|
||||
|
||||
integer i;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
head <= 0;
|
||||
tail <= 0;
|
||||
for (i = 0; i < SIZE; i=i+1) begin
|
||||
data[i] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (push && !full) begin
|
||||
data[tail] <= in_data;
|
||||
tail <= tail+1;
|
||||
end
|
||||
|
||||
if (pop && !empty) begin
|
||||
head <= head + 1;
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -1,40 +1,36 @@
|
||||
module VX_generic_queue_ll
|
||||
#(
|
||||
parameter DATAW = 4,
|
||||
parameter SIZE = 277
|
||||
)
|
||||
(
|
||||
module VX_generic_queue_ll #(
|
||||
parameter DATAW,
|
||||
parameter SIZE = 16
|
||||
) (
|
||||
/* verilator lint_off UNUSED */
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input wire [DATAW-1:0] in_data,
|
||||
|
||||
input wire pop,
|
||||
output wire [DATAW-1:0] out_data,
|
||||
input wire pop,
|
||||
output wire empty,
|
||||
output wire full
|
||||
output wire full,
|
||||
/* verilator lint_on UNUSED */
|
||||
input wire [DATAW-1:0] in_data,
|
||||
output wire [DATAW-1:0] out_data
|
||||
);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
|
||||
if (SIZE == 0) begin
|
||||
|
||||
assign empty = 1;
|
||||
assign out_data = 0;
|
||||
assign out_data = in_data;
|
||||
assign full = 0;
|
||||
|
||||
end else begin // (SIZE > 0)
|
||||
|
||||
|
||||
`ifdef QUEUE_FORCE_MLAB
|
||||
(* syn_ramstyle = "mlab" *) reg[DATAW-1:0] data[SIZE-1:0];
|
||||
(* syn_ramstyle = "mlab" *) reg [DATAW-1:0] data [SIZE-1:0];
|
||||
`else
|
||||
reg[ DATAW-1:0] data[SIZE-1:0];
|
||||
reg [DATAW-1:0] data [SIZE-1:0];
|
||||
`endif
|
||||
|
||||
reg [DATAW-1:0] head_r;
|
||||
reg [$clog2(SIZE+1)-1:0] size_r;
|
||||
wire reading;
|
||||
wire writing;
|
||||
reg [DATAW-1:0] head_r;
|
||||
reg [`LOG2UP(SIZE+1)-1:0] size_r;
|
||||
wire reading;
|
||||
wire writing;
|
||||
|
||||
assign reading = pop && !empty;
|
||||
assign writing = push && !full;
|
||||
@@ -65,9 +61,9 @@ module VX_generic_queue_ll
|
||||
end else begin // (SIZE > 1)
|
||||
|
||||
reg [DATAW-1:0] curr_r;
|
||||
reg [$clog2(SIZE)-1:0] wr_ctr_r;
|
||||
reg [$clog2(SIZE)-1:0] rd_ptr_r;
|
||||
reg [$clog2(SIZE)-1:0] rd_next_ptr_r;
|
||||
reg [`LOG2UP(SIZE)-1:0] wr_ctr_r;
|
||||
reg [`LOG2UP(SIZE)-1:0] rd_ptr_r;
|
||||
reg [`LOG2UP(SIZE)-1:0] rd_next_ptr_r;
|
||||
reg empty_r;
|
||||
reg full_r;
|
||||
reg bypass_r;
|
||||
@@ -106,7 +102,7 @@ module VX_generic_queue_ll
|
||||
data[wr_ctr_r] <= in_data;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
curr_r <= 0;
|
||||
@@ -135,7 +131,5 @@ module VX_generic_queue_ll
|
||||
assign full = full_r;
|
||||
end
|
||||
end
|
||||
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,24 +1,24 @@
|
||||
module VX_generic_register #(
|
||||
parameter N,
|
||||
parameter PassThru = 0
|
||||
) (
|
||||
/* verilator lint_off UNUSED */
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
input wire flush,
|
||||
/* verilator lint_on UNUSED */
|
||||
input wire[N-1:0] in,
|
||||
output wire[N-1:0] out
|
||||
);
|
||||
|
||||
module VX_generic_register
|
||||
#( parameter N = 1, parameter Valid = 1)
|
||||
(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
input wire flush,
|
||||
input wire[(N-1):0] in,
|
||||
output wire[(N-1):0] out
|
||||
);
|
||||
|
||||
if (Valid == 0) begin
|
||||
|
||||
if (PassThru) begin
|
||||
assign out = in;
|
||||
|
||||
end else begin
|
||||
|
||||
reg[(N-1):0] value;
|
||||
reg [(N-1):0] value;
|
||||
|
||||
always @(posedge clk or posedge reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
value <= 0;
|
||||
end else if (flush) begin
|
||||
@@ -29,7 +29,6 @@ module VX_generic_register
|
||||
end
|
||||
|
||||
assign out = value;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -2,56 +2,57 @@
|
||||
|
||||
module VX_gpgpu_inst (
|
||||
// Input
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req,
|
||||
VX_gpu_inst_req_inter vx_gpu_inst_req,
|
||||
|
||||
// Output
|
||||
VX_warp_ctl_inter VX_warp_ctl
|
||||
VX_warp_ctl_inter vx_warp_ctl
|
||||
);
|
||||
|
||||
|
||||
wire[`NUM_THREADS-1:0] curr_valids = VX_gpu_inst_req.valid;
|
||||
wire is_split = (VX_gpu_inst_req.is_split);
|
||||
wire[`NUM_THREADS-1:0] curr_valids = vx_gpu_inst_req.valid;
|
||||
wire is_split = (vx_gpu_inst_req.is_split);
|
||||
|
||||
wire[`NUM_THREADS-1:0] tmc_new_mask;
|
||||
wire all_threads = `NUM_THREADS < VX_gpu_inst_req.a_reg_data[0];
|
||||
wire all_threads = `NUM_THREADS < vx_gpu_inst_req.a_reg_data[0];
|
||||
|
||||
genvar curr_t;
|
||||
generate
|
||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
|
||||
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < VX_gpu_inst_req.a_reg_data[0];
|
||||
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < vx_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire valid_inst = (|curr_valids);
|
||||
|
||||
assign VX_warp_ctl.warp_num = VX_gpu_inst_req.warp_num;
|
||||
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc) && valid_inst;
|
||||
assign VX_warp_ctl.thread_mask = VX_gpu_inst_req.is_tmc ? tmc_new_mask : 0;
|
||||
assign vx_warp_ctl.warp_num = vx_gpu_inst_req.warp_num;
|
||||
assign vx_warp_ctl.change_mask = (vx_gpu_inst_req.is_tmc) && valid_inst;
|
||||
assign vx_warp_ctl.thread_mask = vx_gpu_inst_req.is_tmc ? tmc_new_mask : 0;
|
||||
|
||||
// assign VX_warp_ctl.ebreak = (VX_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
|
||||
assign VX_warp_ctl.ebreak = VX_warp_ctl.change_mask && (VX_warp_ctl.thread_mask == 0);
|
||||
// assign vx_warp_ctl.ebreak = (vx_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
|
||||
assign vx_warp_ctl.ebreak = vx_warp_ctl.change_mask && (vx_warp_ctl.thread_mask == 0);
|
||||
|
||||
|
||||
wire wspawn = VX_gpu_inst_req.is_wspawn;
|
||||
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
|
||||
wire all_active = `NUM_WARPS < VX_gpu_inst_req.a_reg_data[0];
|
||||
wire wspawn = vx_gpu_inst_req.is_wspawn;
|
||||
wire[31:0] wspawn_pc = vx_gpu_inst_req.rd2;
|
||||
wire all_active = `NUM_WARPS < vx_gpu_inst_req.a_reg_data[0];
|
||||
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
genvar curr_w;
|
||||
generate
|
||||
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < VX_gpu_inst_req.a_reg_data[0];
|
||||
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < vx_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign vx_warp_ctl.is_barrier = vx_gpu_inst_req.is_barrier && valid_inst;
|
||||
assign vx_warp_ctl.barrier_id = vx_gpu_inst_req.a_reg_data[0];
|
||||
|
||||
assign VX_warp_ctl.is_barrier = VX_gpu_inst_req.is_barrier && valid_inst;
|
||||
assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0];
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] num_warps_m1 = vx_gpu_inst_req.rd2 - 1;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
wire[31:0] num_warps_m1 = VX_gpu_inst_req.rd2 - 1;
|
||||
assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
|
||||
assign vx_warp_ctl.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
|
||||
|
||||
assign VX_warp_ctl.wspawn = wspawn;
|
||||
assign VX_warp_ctl.wspawn_pc = wspawn_pc;
|
||||
assign VX_warp_ctl.wspawn_new_active = wspawn_new_active;
|
||||
assign vx_warp_ctl.wspawn = wspawn;
|
||||
assign vx_warp_ctl.wspawn_pc = wspawn_pc;
|
||||
assign vx_warp_ctl.wspawn_new_active = wspawn_new_active;
|
||||
|
||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||
@@ -60,7 +61,7 @@ module VX_gpgpu_inst (
|
||||
genvar curr_s_t;
|
||||
generate
|
||||
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
|
||||
wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
|
||||
wire curr_bool = (vx_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
|
||||
|
||||
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
|
||||
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
|
||||
@@ -69,23 +70,24 @@ module VX_gpgpu_inst (
|
||||
|
||||
wire[$clog2(`NUM_THREADS):0] num_valids;
|
||||
|
||||
VX_countones #(.N(`NUM_THREADS)) valids_counter (
|
||||
VX_countones #(
|
||||
.N(`NUM_THREADS)
|
||||
) valids_counter (
|
||||
.valids(curr_valids),
|
||||
.count (num_valids)
|
||||
);
|
||||
);
|
||||
|
||||
// wire[`NW_BITS-1:0] num_valids = $countones(curr_valids);
|
||||
|
||||
|
||||
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
|
||||
assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
|
||||
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
|
||||
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
|
||||
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;
|
||||
assign VX_warp_ctl.split_warp_num = VX_gpu_inst_req.warp_num;
|
||||
assign vx_warp_ctl.is_split = is_split && (num_valids > 1);
|
||||
assign vx_warp_ctl.dont_split = vx_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
|
||||
assign vx_warp_ctl.split_new_mask = split_new_use_mask;
|
||||
assign vx_warp_ctl.split_later_mask = split_new_later_mask;
|
||||
assign vx_warp_ctl.split_save_pc = vx_gpu_inst_req.pc_next;
|
||||
assign vx_warp_ctl.split_warp_num = vx_gpu_inst_req.warp_num;
|
||||
|
||||
// VX_gpu_inst_req.is_wspawn
|
||||
// VX_gpu_inst_req.is_split
|
||||
// VX_gpu_inst_req.is_barrier
|
||||
// vx_gpu_inst_req.is_wspawn
|
||||
// vx_gpu_inst_req.is_split
|
||||
// vx_gpu_inst_req.is_barrier
|
||||
|
||||
endmodule
|
||||
224
hw/rtl/VX_gpr.v
224
hw/rtl/VX_gpr.v
@@ -1,168 +1,150 @@
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_write_request,
|
||||
VX_gpr_read_inter VX_gpr_read,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_write_request,
|
||||
VX_gpr_read_inter vx_gpr_read,
|
||||
VX_wb_inter vx_writeback_inter,
|
||||
|
||||
output reg[`NUM_THREADS-1:0][31:0] out_a_reg_data,
|
||||
output reg[`NUM_THREADS-1:0][31:0] out_b_reg_data
|
||||
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] out_a_reg_data,
|
||||
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] out_b_reg_data
|
||||
);
|
||||
|
||||
|
||||
|
||||
wire write_enable;
|
||||
|
||||
|
||||
`ifndef ASIC
|
||||
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0)) && (VX_writeback_inter.rd != 0);
|
||||
assign write_enable = valid_write_request && ((vx_writeback_inter.wb != 0)) && (vx_writeback_inter.rd != 0);
|
||||
|
||||
byte_enabled_simple_dual_port_ram first_ram(
|
||||
.we (write_enable),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.waddr (VX_writeback_inter.rd),
|
||||
.raddr1(VX_gpr_read.rs1),
|
||||
.raddr2(VX_gpr_read.rs2),
|
||||
.be (VX_writeback_inter.wb_valid),
|
||||
.wdata (VX_writeback_inter.write_data),
|
||||
.waddr (vx_writeback_inter.rd),
|
||||
.raddr1(vx_gpr_read.rs1),
|
||||
.raddr2(vx_gpr_read.rs2),
|
||||
.be (vx_writeback_inter.wb_valid),
|
||||
.wdata (vx_writeback_inter.write_data),
|
||||
.q1 (out_a_reg_data),
|
||||
.q2 (out_b_reg_data)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0));
|
||||
|
||||
|
||||
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
|
||||
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] write_bit_mask;
|
||||
assign write_enable = valid_write_request && ((vx_writeback_inter.wb != 0));
|
||||
wire going_to_write = write_enable & (|vx_writeback_inter.wb_valid);
|
||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
|
||||
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
|
||||
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
|
||||
assign write_bit_mask[curr_t] = {32{~local_write}};
|
||||
wire local_write = write_enable & vx_writeback_inter.wb_valid[curr_t];
|
||||
assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}};
|
||||
end
|
||||
|
||||
|
||||
|
||||
// wire cenb = !going_to_write;
|
||||
wire cenb = 0;
|
||||
|
||||
// wire cena_1 = (VX_gpr_read.rs1 == 0);
|
||||
// wire cena_2 = (VX_gpr_read.rs2 == 0);
|
||||
// wire cena_1 = (vx_gpr_read.rs1 == 0);
|
||||
// wire cena_2 = (vx_gpr_read.rs2 == 0);
|
||||
wire cena_1 = 0;
|
||||
wire cena_2 = 0;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] temp_a;
|
||||
wire[`NUM_THREADS-1:0][31:0] temp_b;
|
||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_a;
|
||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b;
|
||||
|
||||
|
||||
`ifndef SYN
|
||||
genvar thread;
|
||||
genvar curr_bit;
|
||||
for (thread = 0; thread < `NUM_THREADS; thread = thread + 1)
|
||||
`ifndef SYN
|
||||
genvar thread;
|
||||
genvar curr_bit;
|
||||
for (thread = 0; thread < `NUM_THREADS; thread = thread + 1)
|
||||
begin
|
||||
for (curr_bit = 0; curr_bit < `NUM_GPRS; curr_bit=curr_bit+1)
|
||||
begin
|
||||
for (curr_bit = 0; curr_bit < 32; curr_bit=curr_bit+1)
|
||||
begin
|
||||
assign out_a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
|
||||
assign out_b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
|
||||
end
|
||||
assign out_a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
|
||||
assign out_b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
|
||||
end
|
||||
|
||||
`else
|
||||
|
||||
end
|
||||
`else
|
||||
assign out_a_reg_data = temp_a;
|
||||
assign out_b_reg_data = temp_b;
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
|
||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (vx_writeback_inter.rd != 0) ? vx_writeback_inter.write_data : 0;
|
||||
|
||||
genvar curr_base_thread;
|
||||
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
|
||||
begin
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(VX_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(vx_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(vx_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(VX_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
rf2_`NUM_GPRSx128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(vx_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(vx_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
end
|
||||
|
||||
@@ -11,108 +11,100 @@ module VX_gpr_stage (
|
||||
output wire gpr_stage_delay,
|
||||
|
||||
// inputs
|
||||
// Instruction Information
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
|
||||
// WriteBack inputs
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
|
||||
|
||||
// Instruction Information
|
||||
VX_frE_to_bckE_req_inter vx_bckE_req,
|
||||
|
||||
// WriteBack inputs
|
||||
VX_wb_inter vx_writeback_inter,
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_inter VX_exec_unit_req,
|
||||
VX_lsu_req_inter VX_lsu_req,
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req,
|
||||
VX_csr_req_inter VX_csr_req
|
||||
VX_exec_unit_req_inter vx_exec_unit_req,
|
||||
VX_lsu_req_inter vx_lsu_req,
|
||||
VX_gpu_inst_req_inter vx_gpu_inst_req,
|
||||
VX_csr_req_inter vx_csr_req
|
||||
);
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] curr_PC = vx_bckE_req.curr_PC;
|
||||
wire[2:0] branchType = vx_bckE_req.branch_type;
|
||||
wire is_store = (vx_bckE_req.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (vx_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire jalQual = vx_bckE_req.jalQual;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
VX_gpr_read_inter vx_gpr_read();
|
||||
assign vx_gpr_read.rs1 = vx_bckE_req.rs1;
|
||||
assign vx_gpr_read.rs2 = vx_bckE_req.rs2;
|
||||
assign vx_gpr_read.warp_num = vx_bckE_req.warp_num;
|
||||
|
||||
wire[31:0] curr_PC = VX_bckE_req.curr_PC;
|
||||
wire[2:0] branchType = VX_bckE_req.branch_type;
|
||||
`ifndef ASIC
|
||||
VX_gpr_jal_inter vx_gpr_jal();
|
||||
assign vx_gpr_jal.is_jal = vx_bckE_req.jalQual;
|
||||
assign vx_gpr_jal.curr_PC = vx_bckE_req.curr_PC;
|
||||
`else
|
||||
VX_gpr_jal_inter vx_gpr_jal();
|
||||
assign vx_gpr_jal.is_jal = vx_exec_unit_req.jalQual;
|
||||
assign vx_gpr_jal.curr_PC = vx_exec_unit_req.curr_PC;
|
||||
`endif
|
||||
|
||||
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
VX_gpr_data_inter vx_gpr_datf();
|
||||
|
||||
VX_gpr_wrapper vx_grp_wrapper (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.vx_writeback_inter(vx_writeback_inter),
|
||||
.vx_gpr_read (vx_gpr_read),
|
||||
.vx_gpr_jal (vx_gpr_jal),
|
||||
|
||||
wire jalQual = VX_bckE_req.jalQual;
|
||||
.out_a_reg_data (vx_gpr_datf.a_reg_data),
|
||||
.out_b_reg_data (vx_gpr_datf.b_reg_data)
|
||||
);
|
||||
|
||||
VX_gpr_read_inter VX_gpr_read();
|
||||
assign VX_gpr_read.rs1 = VX_bckE_req.rs1;
|
||||
assign VX_gpr_read.rs2 = VX_bckE_req.rs2;
|
||||
assign VX_gpr_read.warp_num = VX_bckE_req.warp_num;
|
||||
|
||||
`ifndef ASIC
|
||||
VX_gpr_jal_inter VX_gpr_jal();
|
||||
assign VX_gpr_jal.is_jal = VX_bckE_req.jalQual;
|
||||
assign VX_gpr_jal.curr_PC = VX_bckE_req.curr_PC;
|
||||
`else
|
||||
VX_gpr_jal_inter VX_gpr_jal();
|
||||
assign VX_gpr_jal.is_jal = VX_exec_unit_req.jalQual;
|
||||
assign VX_gpr_jal.curr_PC = VX_exec_unit_req.curr_PC;
|
||||
`endif
|
||||
|
||||
|
||||
VX_gpr_data_inter VX_gpr_datf();
|
||||
|
||||
|
||||
VX_gpr_wrapper vx_grp_wrapper(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_writeback_inter(VX_writeback_inter),
|
||||
.VX_gpr_read (VX_gpr_read),
|
||||
.VX_gpr_jal (VX_gpr_jal),
|
||||
|
||||
.out_a_reg_data (VX_gpr_datf.a_reg_data),
|
||||
.out_b_reg_data (VX_gpr_datf.b_reg_data)
|
||||
);
|
||||
|
||||
// assign VX_bckE_req.is_csr = is_csr;
|
||||
// assign VX_bckE_req_out.csr_mask = (VX_bckE_req.sr_immed == 1'b1) ? {27'h0, VX_bckE_req.rs1} : VX_gpr_data.a_reg_data[0];
|
||||
// assign vx_bckE_req.is_csr = is_csr;
|
||||
// assign vx_bckE_req_out.csr_mask = (vx_bckE_req.sr_immed == 1'b1) ? {27'h0, vx_bckE_req.rs1} : vx_gpr_data.a_reg_data[0];
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_inter VX_exec_unit_req_temp();
|
||||
VX_lsu_req_inter VX_lsu_req_temp();
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req_temp();
|
||||
VX_csr_req_inter VX_csr_req_temp();
|
||||
|
||||
VX_inst_multiplex VX_inst_mult(
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.VX_gpr_data (VX_gpr_datf),
|
||||
.VX_exec_unit_req(VX_exec_unit_req_temp),
|
||||
.VX_lsu_req (VX_lsu_req_temp),
|
||||
.VX_gpu_inst_req (VX_gpu_inst_req_temp),
|
||||
.VX_csr_req (VX_csr_req_temp)
|
||||
);
|
||||
|
||||
wire is_lsu = (|VX_lsu_req_temp.valid);
|
||||
VX_exec_unit_req_inter vx_exec_unit_req_temp();
|
||||
VX_lsu_req_inter vx_lsu_req_temp();
|
||||
VX_gpu_inst_req_inter vx_gpu_inst_req_temp();
|
||||
VX_csr_req_inter vx_csr_req_temp();
|
||||
|
||||
VX_inst_multiplex vx_inst_mult(
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
.vx_gpr_data (vx_gpr_datf),
|
||||
.vx_exec_unit_req(vx_exec_unit_req_temp),
|
||||
.vx_lsu_req (vx_lsu_req_temp),
|
||||
.vx_gpu_inst_req (vx_gpu_inst_req_temp),
|
||||
.vx_csr_req (vx_csr_req_temp)
|
||||
);
|
||||
/* verilator lint_off UNUSED */
|
||||
wire is_lsu = (|vx_lsu_req_temp.valid);
|
||||
/* verilator lint_on UNUSED */
|
||||
wire stall_rest = 0;
|
||||
wire flush_rest = schedule_delay;
|
||||
|
||||
|
||||
wire stall_lsu = memory_delay;
|
||||
wire flush_lsu = schedule_delay && !stall_lsu;
|
||||
|
||||
wire stall_exec = exec_delay;
|
||||
wire flush_exec = schedule_delay && !stall_exec;
|
||||
|
||||
wire stall_csr = stall_gpr_csr && VX_bckE_req.is_csr && (|VX_bckE_req.valid);
|
||||
wire stall_csr = stall_gpr_csr && vx_bckE_req.is_csr && (|vx_bckE_req.valid);
|
||||
|
||||
assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr;
|
||||
|
||||
`ifdef ASIC
|
||||
`ifdef ASIC
|
||||
wire delayed_lsu_last_cycle;
|
||||
|
||||
VX_generic_register #(.N(1)) delayed_reg (
|
||||
VX_generic_register #(
|
||||
.N(1)
|
||||
) delayed_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(stall_rest),
|
||||
.in (stall_lsu),
|
||||
.out (delayed_lsu_last_cycle)
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] temp_store_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data
|
||||
@@ -122,107 +114,120 @@ module VX_gpr_stage (
|
||||
|
||||
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
|
||||
|
||||
VX_generic_register #(.N(`NUM_THREADS*32*2)) lsu_data(
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS*32*2)
|
||||
) lsu_data (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(!store_curr_real),
|
||||
.flush(stall_rest),
|
||||
.in ({real_store_data, real_base_address}),
|
||||
.out ({temp_store_data, temp_base_address})
|
||||
);
|
||||
);
|
||||
|
||||
assign real_store_data = VX_lsu_req_temp.store_data;
|
||||
assign real_base_address = VX_lsu_req_temp.base_address;
|
||||
assign real_store_data = vx_lsu_req_temp.store_data;
|
||||
assign real_base_address = vx_lsu_req_temp.base_address;
|
||||
|
||||
assign vx_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
|
||||
assign vx_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
|
||||
|
||||
assign VX_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
|
||||
assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
|
||||
|
||||
|
||||
VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))) lsu_reg(
|
||||
VX_generic_register #(
|
||||
.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_lsu),
|
||||
.flush(flush_lsu),
|
||||
.in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.lsu_pc, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}),
|
||||
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
|
||||
);
|
||||
.in ({vx_lsu_req_temp.valid, vx_lsu_req_temp.lsu_pc, vx_lsu_req_temp.warp_num, vx_lsu_req_temp.offset, vx_lsu_req_temp.mem_read, vx_lsu_req_temp.mem_write, vx_lsu_req_temp.rd, vx_lsu_req_temp.wb}),
|
||||
.out ({vx_lsu_req.valid , vx_lsu_req.lsu_pc ,vx_lsu_req.warp_num , vx_lsu_req.offset , vx_lsu_req.mem_read , vx_lsu_req.mem_write , vx_lsu_req.rd , vx_lsu_req.wb })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS))) exec_unit_reg(
|
||||
VX_generic_register #(
|
||||
.N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS))
|
||||
) exec_unit_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_exec),
|
||||
.flush(flush_exec),
|
||||
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
|
||||
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
|
||||
);
|
||||
.in ({vx_exec_unit_req_temp.valid, vx_exec_unit_req_temp.warp_num, vx_exec_unit_req_temp.curr_PC, vx_exec_unit_req_temp.PC_next, vx_exec_unit_req_temp.rd, vx_exec_unit_req_temp.wb, vx_exec_unit_req_temp.alu_op, vx_exec_unit_req_temp.rs1, vx_exec_unit_req_temp.rs2, vx_exec_unit_req_temp.rs2_src, vx_exec_unit_req_temp.itype_immed, vx_exec_unit_req_temp.upper_immed, vx_exec_unit_req_temp.branch_type, vx_exec_unit_req_temp.jalQual, vx_exec_unit_req_temp.jal, vx_exec_unit_req_temp.jal_offset, vx_exec_unit_req_temp.ebreak, vx_exec_unit_req_temp.wspawn, vx_exec_unit_req_temp.is_csr, vx_exec_unit_req_temp.csr_address, vx_exec_unit_req_temp.csr_immed, vx_exec_unit_req_temp.csr_mask}),
|
||||
.out ({vx_exec_unit_req.valid , vx_exec_unit_req.warp_num , vx_exec_unit_req.curr_PC , vx_exec_unit_req.PC_next , vx_exec_unit_req.rd , vx_exec_unit_req.wb , vx_exec_unit_req.alu_op , vx_exec_unit_req.rs1 , vx_exec_unit_req.rs2 , vx_exec_unit_req.rs2_src , vx_exec_unit_req.itype_immed , vx_exec_unit_req.upper_immed , vx_exec_unit_req.branch_type , vx_exec_unit_req.jalQual , vx_exec_unit_req.jal , vx_exec_unit_req.jal_offset , vx_exec_unit_req.ebreak , vx_exec_unit_req.wspawn , vx_exec_unit_req.is_csr , vx_exec_unit_req.csr_address , vx_exec_unit_req.csr_immed , vx_exec_unit_req.csr_mask })
|
||||
);
|
||||
|
||||
assign VX_exec_unit_req.a_reg_data = real_base_address;
|
||||
assign VX_exec_unit_req.b_reg_data = real_store_data;
|
||||
assign vx_exec_unit_req.a_reg_data = real_base_address;
|
||||
assign vx_exec_unit_req.b_reg_data = real_store_data;
|
||||
|
||||
VX_generic_register #(.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))) gpu_inst_reg(
|
||||
VX_generic_register #(
|
||||
.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))
|
||||
) gpu_inst_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next}),
|
||||
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next })
|
||||
);
|
||||
.in ({vx_gpu_inst_req_temp.valid, vx_gpu_inst_req_temp.warp_num, vx_gpu_inst_req_temp.is_wspawn, vx_gpu_inst_req_temp.is_tmc, vx_gpu_inst_req_temp.is_split, vx_gpu_inst_req_temp.is_barrier, vx_gpu_inst_req_temp.pc_next}),
|
||||
.out ({vx_gpu_inst_req.valid , vx_gpu_inst_req.warp_num , vx_gpu_inst_req.is_wspawn , vx_gpu_inst_req.is_tmc , vx_gpu_inst_req.is_split , vx_gpu_inst_req.is_barrier , vx_gpu_inst_req.pc_next })
|
||||
);
|
||||
|
||||
assign VX_gpu_inst_req.a_reg_data = real_base_address;
|
||||
assign VX_gpu_inst_req.rd2 = real_store_data;
|
||||
assign vx_gpu_inst_req.a_reg_data = real_base_address;
|
||||
assign vx_gpu_inst_req.rd2 = real_store_data;
|
||||
|
||||
VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg(
|
||||
VX_generic_register #(
|
||||
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_gpr_csr),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
|
||||
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
|
||||
);
|
||||
.in ({vx_csr_req_temp.valid, vx_csr_req_temp.warp_num, vx_csr_req_temp.rd, vx_csr_req_temp.wb, vx_csr_req_temp.alu_op, vx_csr_req_temp.is_csr, vx_csr_req_temp.csr_address, vx_csr_req_temp.csr_immed, vx_csr_req_temp.csr_mask}),
|
||||
.out ({vx_csr_req.valid , vx_csr_req.warp_num , vx_csr_req.rd , vx_csr_req.wb , vx_csr_req.alu_op , vx_csr_req.is_csr , vx_csr_req.csr_address , vx_csr_req.csr_immed , vx_csr_req.csr_mask })
|
||||
);
|
||||
|
||||
|
||||
// assign
|
||||
|
||||
`else
|
||||
`else
|
||||
|
||||
// 341
|
||||
VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) lsu_reg(
|
||||
VX_generic_register #(
|
||||
.N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_lsu),
|
||||
.flush(flush_lsu),
|
||||
.in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.lsu_pc, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.store_data, VX_lsu_req_temp.base_address, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}),
|
||||
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
|
||||
);
|
||||
.in ({vx_lsu_req_temp.valid, vx_lsu_req_temp.lsu_pc, vx_lsu_req_temp.warp_num, vx_lsu_req_temp.store_data, vx_lsu_req_temp.base_address, vx_lsu_req_temp.offset, vx_lsu_req_temp.mem_read, vx_lsu_req_temp.mem_write, vx_lsu_req_temp.rd, vx_lsu_req_temp.wb}),
|
||||
.out ({vx_lsu_req.valid , vx_lsu_req.lsu_pc , vx_lsu_req.warp_num , vx_lsu_req.store_data , vx_lsu_req.base_address , vx_lsu_req.offset , vx_lsu_req.mem_read , vx_lsu_req.mem_write , vx_lsu_req.rd , vx_lsu_req.wb })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) exec_unit_reg(
|
||||
VX_generic_register #(
|
||||
.N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))
|
||||
) exec_unit_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_exec),
|
||||
.flush(flush_exec),
|
||||
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.a_reg_data, VX_exec_unit_req_temp.b_reg_data, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
|
||||
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
|
||||
);
|
||||
.in ({vx_exec_unit_req_temp.valid, vx_exec_unit_req_temp.warp_num, vx_exec_unit_req_temp.curr_PC, vx_exec_unit_req_temp.PC_next, vx_exec_unit_req_temp.rd, vx_exec_unit_req_temp.wb, vx_exec_unit_req_temp.a_reg_data, vx_exec_unit_req_temp.b_reg_data, vx_exec_unit_req_temp.alu_op, vx_exec_unit_req_temp.rs1, vx_exec_unit_req_temp.rs2, vx_exec_unit_req_temp.rs2_src, vx_exec_unit_req_temp.itype_immed, vx_exec_unit_req_temp.upper_immed, vx_exec_unit_req_temp.branch_type, vx_exec_unit_req_temp.jalQual, vx_exec_unit_req_temp.jal, vx_exec_unit_req_temp.jal_offset, vx_exec_unit_req_temp.ebreak, vx_exec_unit_req_temp.wspawn, vx_exec_unit_req_temp.is_csr, vx_exec_unit_req_temp.csr_address, vx_exec_unit_req_temp.csr_immed, vx_exec_unit_req_temp.csr_mask}),
|
||||
.out ({vx_exec_unit_req.valid , vx_exec_unit_req.warp_num , vx_exec_unit_req.curr_PC , vx_exec_unit_req.PC_next , vx_exec_unit_req.rd , vx_exec_unit_req.wb , vx_exec_unit_req.a_reg_data , vx_exec_unit_req.b_reg_data , vx_exec_unit_req.alu_op , vx_exec_unit_req.rs1 , vx_exec_unit_req.rs2 , vx_exec_unit_req.rs2_src , vx_exec_unit_req.itype_immed , vx_exec_unit_req.upper_immed , vx_exec_unit_req.branch_type , vx_exec_unit_req.jalQual , vx_exec_unit_req.jal , vx_exec_unit_req.jal_offset , vx_exec_unit_req.ebreak , vx_exec_unit_req.wspawn , vx_exec_unit_req.is_csr , vx_exec_unit_req.csr_address , vx_exec_unit_req.csr_immed , vx_exec_unit_req.csr_mask })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS))) gpu_inst_reg(
|
||||
VX_generic_register #(
|
||||
.N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS))
|
||||
) gpu_inst_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next, VX_gpu_inst_req_temp.a_reg_data, VX_gpu_inst_req_temp.rd2}),
|
||||
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 })
|
||||
);
|
||||
.in ({vx_gpu_inst_req_temp.valid, vx_gpu_inst_req_temp.warp_num, vx_gpu_inst_req_temp.is_wspawn, vx_gpu_inst_req_temp.is_tmc, vx_gpu_inst_req_temp.is_split, vx_gpu_inst_req_temp.is_barrier, vx_gpu_inst_req_temp.pc_next, vx_gpu_inst_req_temp.a_reg_data, vx_gpu_inst_req_temp.rd2}),
|
||||
.out ({vx_gpu_inst_req.valid , vx_gpu_inst_req.warp_num , vx_gpu_inst_req.is_wspawn , vx_gpu_inst_req.is_tmc , vx_gpu_inst_req.is_split , vx_gpu_inst_req.is_barrier , vx_gpu_inst_req.pc_next , vx_gpu_inst_req.a_reg_data , vx_gpu_inst_req.rd2 })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg(
|
||||
VX_generic_register #(
|
||||
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_gpr_csr),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
|
||||
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
|
||||
);
|
||||
.in ({vx_csr_req_temp.valid, vx_csr_req_temp.warp_num, vx_csr_req_temp.rd, vx_csr_req_temp.wb, vx_csr_req_temp.alu_op, vx_csr_req_temp.is_csr, vx_csr_req_temp.csr_address, vx_csr_req_temp.csr_immed, vx_csr_req_temp.csr_mask}),
|
||||
.out ({vx_csr_req.valid , vx_csr_req.warp_num , vx_csr_req.rd , vx_csr_req.wb , vx_csr_req.alu_op , vx_csr_req.is_csr , vx_csr_req.csr_address , vx_csr_req.csr_immed , vx_csr_req.csr_mask })
|
||||
);
|
||||
|
||||
`endif
|
||||
`endif
|
||||
|
||||
endmodule : VX_gpr_stage
|
||||
@@ -3,9 +3,9 @@
|
||||
module VX_gpr_wrapper (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_gpr_read_inter VX_gpr_read,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
VX_gpr_jal_inter VX_gpr_jal,
|
||||
VX_gpr_read_inter vx_gpr_read,
|
||||
VX_wb_inter vx_writeback_inter,
|
||||
VX_gpr_jal_inter vx_gpr_jal,
|
||||
|
||||
output wire[`NUM_THREADS-1:0][31:0] out_a_reg_data,
|
||||
output wire[`NUM_THREADS-1:0][31:0] out_b_reg_data
|
||||
@@ -19,28 +19,30 @@ module VX_gpr_wrapper (
|
||||
genvar index;
|
||||
generate
|
||||
for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
|
||||
assign jal_data[index] = VX_gpr_jal.curr_PC;
|
||||
assign jal_data[index] = vx_gpr_jal.curr_PC;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
`ifndef ASIC
|
||||
assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[VX_gpr_read.warp_num]));
|
||||
assign out_b_reg_data = (temp_b_reg_data[VX_gpr_read.warp_num]);
|
||||
assign out_a_reg_data = (vx_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[vx_gpr_read.warp_num]));
|
||||
assign out_b_reg_data = (temp_b_reg_data[vx_gpr_read.warp_num]);
|
||||
`else
|
||||
|
||||
wire zer = 0;
|
||||
|
||||
wire[`NW_BITS-1:0] old_warp_num;
|
||||
VX_generic_register #(`NW_BITS-1+1) store_wn(
|
||||
VX_generic_register #(
|
||||
.N(`NW_BITS-1+1)
|
||||
) store_wn (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zer),
|
||||
.flush(zer),
|
||||
.in (VX_gpr_read.warp_num),
|
||||
.in (vx_gpr_read.warp_num),
|
||||
.out (old_warp_num)
|
||||
);
|
||||
);
|
||||
|
||||
assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
|
||||
assign out_a_reg_data = (vx_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
|
||||
assign out_b_reg_data = (temp_b_reg_data[old_warp_num]);
|
||||
|
||||
`endif
|
||||
@@ -50,13 +52,13 @@ module VX_gpr_wrapper (
|
||||
|
||||
for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
|
||||
|
||||
wire valid_write_request = warp_index == VX_writeback_inter.wb_warp_num;
|
||||
wire valid_write_request = warp_index == vx_writeback_inter.wb_warp_num;
|
||||
VX_gpr vx_gpr(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_write_request(valid_write_request),
|
||||
.VX_gpr_read (VX_gpr_read),
|
||||
.VX_writeback_inter (VX_writeback_inter),
|
||||
.vx_gpr_read (vx_gpr_read),
|
||||
.vx_writeback_inter (vx_writeback_inter),
|
||||
.out_a_reg_data (temp_a_reg_data[warp_index]),
|
||||
.out_b_reg_data (temp_b_reg_data[warp_index])
|
||||
);
|
||||
@@ -65,7 +67,6 @@ module VX_gpr_wrapper (
|
||||
|
||||
endgenerate
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
@@ -5,61 +5,56 @@ module VX_icache_stage (
|
||||
input wire reset,
|
||||
input wire total_freeze,
|
||||
output wire icache_stage_delay,
|
||||
output wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
output wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
output wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
output wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
VX_inst_meta_inter fe_inst_meta_fi,
|
||||
VX_inst_meta_inter fe_inst_meta_id,
|
||||
|
||||
VX_gpu_dcache_res_inter VX_icache_rsp,
|
||||
VX_gpu_dcache_req_inter VX_icache_req
|
||||
VX_gpu_dcache_rsp_inter vx_icache_rsp,
|
||||
VX_gpu_dcache_req_inter vx_icache_req
|
||||
);
|
||||
|
||||
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
|
||||
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_inst = (|fe_inst_meta_fi.valid);
|
||||
wire valid_inst = (|fe_inst_meta_fi.valid);
|
||||
|
||||
// Icache Request
|
||||
assign VX_icache_req.core_req_valid = valid_inst && !total_freeze;
|
||||
assign VX_icache_req.core_req_addr = fe_inst_meta_fi.inst_pc;
|
||||
assign VX_icache_req.core_req_writedata = 32'b0;
|
||||
assign VX_icache_req.core_req_mem_read = `LW_MEM_READ;
|
||||
assign VX_icache_req.core_req_mem_write = `NO_MEM_WRITE;
|
||||
assign VX_icache_req.core_req_rd = 5'b0;
|
||||
assign VX_icache_req.core_req_wb = {1{2'b1}};
|
||||
assign VX_icache_req.core_req_warp_num = fe_inst_meta_fi.warp_num;
|
||||
assign VX_icache_req.core_req_pc = fe_inst_meta_fi.inst_pc;
|
||||
// Icache Request
|
||||
assign vx_icache_req.core_req_valid = valid_inst && !total_freeze;
|
||||
assign vx_icache_req.core_req_addr = fe_inst_meta_fi.inst_pc;
|
||||
assign vx_icache_req.core_req_writedata = 32'b0;
|
||||
assign vx_icache_req.core_req_mem_read = `LW_MEM_READ;
|
||||
assign vx_icache_req.core_req_mem_write = `NO_MEM_WRITE;
|
||||
assign vx_icache_req.core_req_rd = 5'b0;
|
||||
assign vx_icache_req.core_req_wb = {1{2'b1}};
|
||||
assign vx_icache_req.core_req_warp_num = fe_inst_meta_fi.warp_num;
|
||||
assign vx_icache_req.core_req_pc = fe_inst_meta_fi.inst_pc;
|
||||
|
||||
assign fe_inst_meta_id.instruction = vx_icache_rsp.core_wb_readdata[0][31:0];
|
||||
assign fe_inst_meta_id.inst_pc = vx_icache_rsp.core_wb_pc[0];
|
||||
assign fe_inst_meta_id.warp_num = vx_icache_rsp.core_wb_warp_num;
|
||||
|
||||
assign fe_inst_meta_id.valid = vx_icache_rsp.core_wb_valid ? threads_active[vx_icache_rsp.core_wb_warp_num] : 0;
|
||||
|
||||
assign fe_inst_meta_id.instruction = VX_icache_rsp.core_wb_readdata[0][31:0];
|
||||
assign fe_inst_meta_id.inst_pc = VX_icache_rsp.core_wb_pc[0];
|
||||
assign fe_inst_meta_id.warp_num = VX_icache_rsp.core_wb_warp_num;
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign fe_inst_meta_id.valid = VX_icache_rsp.core_wb_valid ? threads_active[VX_icache_rsp.core_wb_warp_num] : 0;
|
||||
/* verilator lint_off WIDTH */
|
||||
assign icache_stage_wid = fe_inst_meta_id.warp_num;
|
||||
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
|
||||
|
||||
assign icache_stage_wid = fe_inst_meta_id.warp_num;
|
||||
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
|
||||
// Cache can't accept request
|
||||
assign icache_stage_delay = vx_icache_rsp.delay_req;
|
||||
|
||||
// Cache can't accept request
|
||||
assign icache_stage_delay = VX_icache_rsp.delay_req;
|
||||
// Core can't accept response
|
||||
assign vx_icache_req.core_no_wb_slot = total_freeze;
|
||||
|
||||
// Core can't accept response
|
||||
assign VX_icache_req.core_no_wb_slot = total_freeze;
|
||||
|
||||
integer curr_w;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) threads_active[curr_w] <= 0;
|
||||
end else begin
|
||||
if (valid_inst && !icache_stage_delay) begin
|
||||
/* verilator lint_off WIDTH */
|
||||
threads_active[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
|
||||
/* verilator lint_on WIDTH */
|
||||
end
|
||||
integer curr_w;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin
|
||||
threads_active[curr_w] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (valid_inst && !icache_stage_delay) begin
|
||||
threads_active[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -2,23 +2,23 @@
|
||||
|
||||
module VX_inst_multiplex (
|
||||
// Inputs
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_gpr_data_inter VX_gpr_data,
|
||||
VX_frE_to_bckE_req_inter vx_bckE_req,
|
||||
VX_gpr_data_inter vx_gpr_data,
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_inter VX_exec_unit_req,
|
||||
VX_lsu_req_inter VX_lsu_req,
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req,
|
||||
VX_csr_req_inter VX_csr_req
|
||||
VX_exec_unit_req_inter vx_exec_unit_req,
|
||||
VX_lsu_req_inter vx_lsu_req,
|
||||
VX_gpu_inst_req_inter vx_gpu_inst_req,
|
||||
VX_csr_req_inter vx_csr_req
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] is_mem_mask;
|
||||
wire[`NUM_THREADS-1:0] is_gpu_mask;
|
||||
wire[`NUM_THREADS-1:0] is_csr_mask;
|
||||
|
||||
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
|
||||
wire is_csr = VX_bckE_req.is_csr;
|
||||
wire is_mem = (vx_bckE_req.mem_write != `NO_MEM_WRITE) || (vx_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire is_gpu = (vx_bckE_req.is_wspawn || vx_bckE_req.is_tmc || vx_bckE_req.is_barrier || vx_bckE_req.is_split);
|
||||
wire is_csr = vx_bckE_req.is_csr;
|
||||
// wire is_gpu = 0;
|
||||
|
||||
genvar currT;
|
||||
@@ -31,64 +31,64 @@ module VX_inst_multiplex (
|
||||
endgenerate
|
||||
|
||||
// LSU Unit
|
||||
assign VX_lsu_req.valid = VX_bckE_req.valid & is_mem_mask;
|
||||
assign VX_lsu_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_lsu_req.base_address = VX_gpr_data.a_reg_data;
|
||||
assign VX_lsu_req.store_data = VX_gpr_data.b_reg_data;
|
||||
assign vx_lsu_req.valid = vx_bckE_req.valid & is_mem_mask;
|
||||
assign vx_lsu_req.warp_num = vx_bckE_req.warp_num;
|
||||
assign vx_lsu_req.base_address = vx_gpr_data.a_reg_data;
|
||||
assign vx_lsu_req.store_data = vx_gpr_data.b_reg_data;
|
||||
|
||||
assign VX_lsu_req.offset = VX_bckE_req.itype_immed;
|
||||
assign vx_lsu_req.offset = vx_bckE_req.itype_immed;
|
||||
|
||||
assign VX_lsu_req.mem_read = VX_bckE_req.mem_read;
|
||||
assign VX_lsu_req.mem_write = VX_bckE_req.mem_write;
|
||||
assign VX_lsu_req.rd = VX_bckE_req.rd;
|
||||
assign VX_lsu_req.wb = VX_bckE_req.wb;
|
||||
assign VX_lsu_req.lsu_pc = VX_bckE_req.curr_PC;
|
||||
assign vx_lsu_req.mem_read = vx_bckE_req.mem_read;
|
||||
assign vx_lsu_req.mem_write = vx_bckE_req.mem_write;
|
||||
assign vx_lsu_req.rd = vx_bckE_req.rd;
|
||||
assign vx_lsu_req.wb = vx_bckE_req.wb;
|
||||
assign vx_lsu_req.lsu_pc = vx_bckE_req.curr_PC;
|
||||
|
||||
|
||||
// Execute Unit
|
||||
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
|
||||
assign VX_exec_unit_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_exec_unit_req.curr_PC = VX_bckE_req.curr_PC;
|
||||
assign VX_exec_unit_req.PC_next = VX_bckE_req.PC_next;
|
||||
assign VX_exec_unit_req.rd = VX_bckE_req.rd;
|
||||
assign VX_exec_unit_req.wb = VX_bckE_req.wb;
|
||||
assign VX_exec_unit_req.a_reg_data = VX_gpr_data.a_reg_data;
|
||||
assign VX_exec_unit_req.b_reg_data = VX_gpr_data.b_reg_data;
|
||||
assign VX_exec_unit_req.alu_op = VX_bckE_req.alu_op;
|
||||
assign VX_exec_unit_req.rs1 = VX_bckE_req.rs1;
|
||||
assign VX_exec_unit_req.rs2 = VX_bckE_req.rs2;
|
||||
assign VX_exec_unit_req.rs2_src = VX_bckE_req.rs2_src;
|
||||
assign VX_exec_unit_req.itype_immed = VX_bckE_req.itype_immed;
|
||||
assign VX_exec_unit_req.upper_immed = VX_bckE_req.upper_immed;
|
||||
assign VX_exec_unit_req.branch_type = VX_bckE_req.branch_type;
|
||||
assign VX_exec_unit_req.jalQual = VX_bckE_req.jalQual;
|
||||
assign VX_exec_unit_req.jal = VX_bckE_req.jal;
|
||||
assign VX_exec_unit_req.jal_offset = VX_bckE_req.jal_offset;
|
||||
assign VX_exec_unit_req.ebreak = VX_bckE_req.ebreak;
|
||||
assign vx_exec_unit_req.valid = vx_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
|
||||
assign vx_exec_unit_req.warp_num = vx_bckE_req.warp_num;
|
||||
assign vx_exec_unit_req.curr_PC = vx_bckE_req.curr_PC;
|
||||
assign vx_exec_unit_req.PC_next = vx_bckE_req.PC_next;
|
||||
assign vx_exec_unit_req.rd = vx_bckE_req.rd;
|
||||
assign vx_exec_unit_req.wb = vx_bckE_req.wb;
|
||||
assign vx_exec_unit_req.a_reg_data = vx_gpr_data.a_reg_data;
|
||||
assign vx_exec_unit_req.b_reg_data = vx_gpr_data.b_reg_data;
|
||||
assign vx_exec_unit_req.alu_op = vx_bckE_req.alu_op;
|
||||
assign vx_exec_unit_req.rs1 = vx_bckE_req.rs1;
|
||||
assign vx_exec_unit_req.rs2 = vx_bckE_req.rs2;
|
||||
assign vx_exec_unit_req.rs2_src = vx_bckE_req.rs2_src;
|
||||
assign vx_exec_unit_req.itype_immed = vx_bckE_req.itype_immed;
|
||||
assign vx_exec_unit_req.upper_immed = vx_bckE_req.upper_immed;
|
||||
assign vx_exec_unit_req.branch_type = vx_bckE_req.branch_type;
|
||||
assign vx_exec_unit_req.jalQual = vx_bckE_req.jalQual;
|
||||
assign vx_exec_unit_req.jal = vx_bckE_req.jal;
|
||||
assign vx_exec_unit_req.jal_offset = vx_bckE_req.jal_offset;
|
||||
assign vx_exec_unit_req.ebreak = vx_bckE_req.ebreak;
|
||||
|
||||
|
||||
// GPR Req
|
||||
assign VX_gpu_inst_req.valid = VX_bckE_req.valid & is_gpu_mask;
|
||||
assign VX_gpu_inst_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_gpu_inst_req.is_wspawn = VX_bckE_req.is_wspawn;
|
||||
assign VX_gpu_inst_req.is_tmc = VX_bckE_req.is_tmc;
|
||||
assign VX_gpu_inst_req.is_split = VX_bckE_req.is_split;
|
||||
assign VX_gpu_inst_req.is_barrier = VX_bckE_req.is_barrier;
|
||||
assign VX_gpu_inst_req.a_reg_data = VX_gpr_data.a_reg_data;
|
||||
assign VX_gpu_inst_req.rd2 = VX_gpr_data.b_reg_data[0];
|
||||
assign VX_gpu_inst_req.pc_next = VX_bckE_req.PC_next;
|
||||
assign vx_gpu_inst_req.valid = vx_bckE_req.valid & is_gpu_mask;
|
||||
assign vx_gpu_inst_req.warp_num = vx_bckE_req.warp_num;
|
||||
assign vx_gpu_inst_req.is_wspawn = vx_bckE_req.is_wspawn;
|
||||
assign vx_gpu_inst_req.is_tmc = vx_bckE_req.is_tmc;
|
||||
assign vx_gpu_inst_req.is_split = vx_bckE_req.is_split;
|
||||
assign vx_gpu_inst_req.is_barrier = vx_bckE_req.is_barrier;
|
||||
assign vx_gpu_inst_req.a_reg_data = vx_gpr_data.a_reg_data;
|
||||
assign vx_gpu_inst_req.rd2 = vx_gpr_data.b_reg_data[0];
|
||||
assign vx_gpu_inst_req.pc_next = vx_bckE_req.PC_next;
|
||||
|
||||
|
||||
// CSR Req
|
||||
assign VX_csr_req.valid = VX_bckE_req.valid & is_csr_mask;
|
||||
assign VX_csr_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_csr_req.rd = VX_bckE_req.rd;
|
||||
assign VX_csr_req.wb = VX_bckE_req.wb;
|
||||
assign VX_csr_req.alu_op = VX_bckE_req.alu_op;
|
||||
assign VX_csr_req.is_csr = VX_bckE_req.is_csr;
|
||||
assign VX_csr_req.csr_address = VX_bckE_req.csr_address;
|
||||
assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed;
|
||||
assign VX_csr_req.csr_mask = VX_bckE_req.csr_mask;
|
||||
assign vx_csr_req.valid = vx_bckE_req.valid & is_csr_mask;
|
||||
assign vx_csr_req.warp_num = vx_bckE_req.warp_num;
|
||||
assign vx_csr_req.rd = vx_bckE_req.rd;
|
||||
assign vx_csr_req.wb = vx_bckE_req.wb;
|
||||
assign vx_csr_req.alu_op = vx_bckE_req.alu_op;
|
||||
assign vx_csr_req.is_csr = vx_bckE_req.is_csr;
|
||||
assign vx_csr_req.csr_address = vx_bckE_req.csr_address;
|
||||
assign vx_csr_req.csr_immed = vx_bckE_req.csr_immed;
|
||||
assign vx_csr_req.csr_mask = vx_bckE_req.csr_mask;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
@@ -1,89 +1,87 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire no_slot_mem,
|
||||
VX_lsu_req_inter VX_lsu_req,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire no_slot_mem,
|
||||
VX_lsu_req_inter vx_lsu_req,
|
||||
|
||||
// Write back to GPR
|
||||
VX_inst_mem_wb_inter VX_mem_wb,
|
||||
|
||||
VX_gpu_dcache_res_inter VX_dcache_rsp,
|
||||
VX_gpu_dcache_req_inter VX_dcache_req,
|
||||
output wire out_delay
|
||||
);
|
||||
// Write back to GPR
|
||||
VX_inst_mem_wb_inter vx_mem_wb,
|
||||
|
||||
VX_gpu_dcache_rsp_inter vx_dcache_rsp,
|
||||
VX_gpu_dcache_req_inter vx_dcache_req,
|
||||
output wire out_delay
|
||||
);
|
||||
// Generate Addresses
|
||||
wire[`NUM_THREADS-1:0][31:0] address;
|
||||
VX_lsu_addr_gen VX_lsu_addr_gen
|
||||
(
|
||||
.base_address(VX_lsu_req.base_address),
|
||||
.offset (VX_lsu_req.offset),
|
||||
.address (address)
|
||||
VX_lsu_addr_gen VX_lsu_addr_gen (
|
||||
.base_address (vx_lsu_req.base_address),
|
||||
.offset (vx_lsu_req.offset),
|
||||
.address (address)
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] use_address;
|
||||
wire[`NUM_THREADS-1:0][31:0] use_store_data;
|
||||
wire[`NUM_THREADS-1:0] use_valid;
|
||||
wire[`NUM_THREADS-1:0][31:0] use_address;
|
||||
wire[`NUM_THREADS-1:0][31:0] use_store_data;
|
||||
wire[`NUM_THREADS-1:0] use_valid;
|
||||
wire[2:0] use_mem_read;
|
||||
wire[2:0] use_mem_write;
|
||||
wire[4:0] use_rd;
|
||||
wire[`NW_BITS-1:0] use_warp_num;
|
||||
wire[`NW_BITS-1:0] use_warp_num;
|
||||
wire[1:0] use_wb;
|
||||
wire[31:0] use_pc;
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
VX_generic_register #(.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)) lsu_buffer(
|
||||
VX_generic_register #(
|
||||
.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)
|
||||
) lsu_buffer(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(out_delay),
|
||||
.flush(zero),
|
||||
.in ({address , VX_lsu_req.store_data, VX_lsu_req.valid, VX_lsu_req.mem_read, VX_lsu_req.mem_write, VX_lsu_req.rd, VX_lsu_req.warp_num, VX_lsu_req.wb, VX_lsu_req.lsu_pc}),
|
||||
.in ({address , vx_lsu_req.store_data, vx_lsu_req.valid, vx_lsu_req.mem_read, vx_lsu_req.mem_write, vx_lsu_req.rd, vx_lsu_req.warp_num, vx_lsu_req.wb, vx_lsu_req.lsu_pc}),
|
||||
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
// Core Request
|
||||
assign VX_dcache_req.core_req_valid = use_valid;
|
||||
assign VX_dcache_req.core_req_addr = use_address;
|
||||
assign VX_dcache_req.core_req_writedata = use_store_data;
|
||||
assign VX_dcache_req.core_req_mem_read = {`NUM_THREADS{use_mem_read}};
|
||||
assign VX_dcache_req.core_req_mem_write = {`NUM_THREADS{use_mem_write}};
|
||||
assign VX_dcache_req.core_req_rd = use_rd;
|
||||
assign VX_dcache_req.core_req_wb = {`NUM_THREADS{use_wb}};
|
||||
assign VX_dcache_req.core_req_warp_num = use_warp_num;
|
||||
assign VX_dcache_req.core_req_pc = use_pc;
|
||||
assign vx_dcache_req.core_req_valid = use_valid;
|
||||
assign vx_dcache_req.core_req_addr = use_address;
|
||||
assign vx_dcache_req.core_req_writedata = use_store_data;
|
||||
assign vx_dcache_req.core_req_mem_read = {`NUM_THREADS{use_mem_read}};
|
||||
assign vx_dcache_req.core_req_mem_write = {`NUM_THREADS{use_mem_write}};
|
||||
assign vx_dcache_req.core_req_rd = use_rd;
|
||||
assign vx_dcache_req.core_req_wb = {`NUM_THREADS{use_wb}};
|
||||
assign vx_dcache_req.core_req_warp_num = use_warp_num;
|
||||
assign vx_dcache_req.core_req_pc = use_pc;
|
||||
|
||||
// Core can't accept response
|
||||
assign VX_dcache_req.core_no_wb_slot = no_slot_mem;
|
||||
|
||||
assign vx_dcache_req.core_no_wb_slot = no_slot_mem;
|
||||
|
||||
// Cache can't accept request
|
||||
assign out_delay = VX_dcache_rsp.delay_req;
|
||||
assign out_delay = vx_dcache_rsp.delay_req;
|
||||
|
||||
// Core Response
|
||||
assign VX_mem_wb.rd = VX_dcache_rsp.core_wb_req_rd;
|
||||
assign VX_mem_wb.wb = VX_dcache_rsp.core_wb_req_wb;
|
||||
assign VX_mem_wb.wb_valid = VX_dcache_rsp.core_wb_valid;
|
||||
assign VX_mem_wb.wb_warp_num = VX_dcache_rsp.core_wb_warp_num;
|
||||
assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata;
|
||||
assign vx_mem_wb.rd = vx_dcache_rsp.core_wb_req_rd;
|
||||
assign vx_mem_wb.wb = vx_dcache_rsp.core_wb_req_wb;
|
||||
assign vx_mem_wb.wb_valid = vx_dcache_rsp.core_wb_valid;
|
||||
assign vx_mem_wb.wb_warp_num = vx_dcache_rsp.core_wb_warp_num;
|
||||
assign vx_mem_wb.loaded_data = vx_dcache_rsp.core_wb_readdata;
|
||||
|
||||
wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire found;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
VX_generic_priority_encoder #(.N(`NUM_THREADS)) pick_first_pc(
|
||||
.valids(VX_dcache_rsp.core_wb_valid),
|
||||
.valids(vx_dcache_rsp.core_wb_valid),
|
||||
.index (use_pc_index),
|
||||
.found (found)
|
||||
);
|
||||
|
||||
assign VX_mem_wb.mem_wb_pc = VX_dcache_rsp.core_wb_pc[use_pc_index];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
assign vx_mem_wb.mem_wb_pc = vx_dcache_rsp.core_wb_pc[use_pc_index];
|
||||
|
||||
endmodule // Memory
|
||||
|
||||
|
||||
|
||||
@@ -6,73 +6,75 @@ module VX_scheduler (
|
||||
input wire memory_delay,
|
||||
input wire exec_delay,
|
||||
input wire gpr_stage_delay,
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
VX_frE_to_bckE_req_inter vx_bckE_req,
|
||||
VX_wb_inter vx_writeback_inter,
|
||||
|
||||
output wire schedule_delay,
|
||||
output wire is_empty
|
||||
);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
reg[31:0] count_valid;
|
||||
|
||||
assign is_empty = count_valid == 0;
|
||||
|
||||
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0);
|
||||
wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0);
|
||||
wire valid_wb = (vx_writeback_inter.wb != 0) && (|vx_writeback_inter.wb_valid) && (vx_writeback_inter.rd != 0);
|
||||
wire wb_inc = (vx_bckE_req.wb != 0) && (vx_bckE_req.rd != 0);
|
||||
|
||||
wire rs1_rename = rename_table[VX_bckE_req.warp_num][VX_bckE_req.rs1] != 0;
|
||||
wire rs2_rename = rename_table[VX_bckE_req.warp_num][VX_bckE_req.rs2] != 0;
|
||||
wire rd_rename = rename_table[VX_bckE_req.warp_num][VX_bckE_req.rd ] != 0;
|
||||
wire rs1_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rs1] != 0;
|
||||
wire rs2_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rs2] != 0;
|
||||
wire rd_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rd ] != 0;
|
||||
|
||||
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire is_store = (vx_bckE_req.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (vx_bckE_req.mem_read != `NO_MEM_READ);
|
||||
|
||||
// classify our next instruction.
|
||||
wire is_mem = is_store || is_load;
|
||||
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
|
||||
wire is_csr = VX_bckE_req.is_csr;
|
||||
wire is_gpu = (vx_bckE_req.is_wspawn || vx_bckE_req.is_tmc || vx_bckE_req.is_barrier || vx_bckE_req.is_split);
|
||||
wire is_csr = vx_bckE_req.is_csr;
|
||||
wire is_exec = !is_mem && !is_gpu && !is_csr;
|
||||
|
||||
// wire rs1_pass = 0;
|
||||
// wire rs2_pass = 0;
|
||||
wire using_rs2 = (vx_bckE_req.rs2_src == `RS2_REG) || is_store || vx_bckE_req.is_barrier || vx_bckE_req.is_wspawn;
|
||||
|
||||
wire using_rs2 = (VX_bckE_req.rs2_src == `RS2_REG) || is_store || VX_bckE_req.is_barrier || VX_bckE_req.is_wspawn;
|
||||
|
||||
wire rs1_rename_qual = ((rs1_rename) && (VX_bckE_req.rs1 != 0));
|
||||
wire rs2_rename_qual = ((rs2_rename) && (VX_bckE_req.rs2 != 0 && using_rs2));
|
||||
wire rd_rename_qual = ((rd_rename ) && (VX_bckE_req.rd != 0));
|
||||
wire rs1_rename_qual = ((rs1_rename) && (vx_bckE_req.rs1 != 0));
|
||||
wire rs2_rename_qual = ((rs2_rename) && (vx_bckE_req.rs2 != 0 && using_rs2));
|
||||
wire rd_rename_qual = ((rd_rename ) && (vx_bckE_req.rd != 0));
|
||||
|
||||
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
|
||||
|
||||
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid))
|
||||
|| (memory_delay && is_mem)
|
||||
|| (gpr_stage_delay && (is_mem || is_exec))
|
||||
|| (exec_delay && is_exec);
|
||||
assign schedule_delay = ((rename_valid) && (|vx_bckE_req.valid))
|
||||
|| (memory_delay && is_mem)
|
||||
|| (gpr_stage_delay && (is_mem || is_exec))
|
||||
|| (exec_delay && is_exec);
|
||||
|
||||
integer i;
|
||||
integer w;
|
||||
always @(posedge clk or posedge reset) begin
|
||||
always @(posedge clk) begin
|
||||
|
||||
if (reset) begin
|
||||
for (w = 0; w < `NUM_WARPS; w=w+1)
|
||||
begin
|
||||
for (i = 0; i < 32; i = i + 1)
|
||||
begin
|
||||
rename_table[w][i] <= 0;
|
||||
for (w = 0; w < `NUM_WARPS; w=w+1) begin
|
||||
for (i = 0; i < 32; i = i + 1) begin
|
||||
rename_table[w][i] <= 0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
if (valid_wb ) rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] <= rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] & (~VX_writeback_inter.wb_valid);
|
||||
if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.warp_num ][VX_bckE_req.rd ] <= VX_bckE_req.valid;
|
||||
if (valid_wb) begin
|
||||
rename_table[vx_writeback_inter.wb_warp_num][vx_writeback_inter.rd] <= rename_table[vx_writeback_inter.wb_warp_num][vx_writeback_inter.rd] & (~vx_writeback_inter.wb_valid);
|
||||
end
|
||||
|
||||
if (!schedule_delay && wb_inc) begin
|
||||
rename_table[vx_bckE_req.warp_num][vx_bckE_req.rd] <= vx_bckE_req.valid;
|
||||
end
|
||||
|
||||
if (valid_wb && ((rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] & (~VX_writeback_inter.wb_valid)) == 0)) count_valid = count_valid - 1;
|
||||
if (!schedule_delay && wb_inc) count_valid = count_valid + 1;
|
||||
if (valid_wb
|
||||
&& (0 == (rename_table[vx_writeback_inter.wb_warp_num][vx_writeback_inter.rd] & ~vx_writeback_inter.wb_valid))) begin
|
||||
count_valid <= count_valid - 1;
|
||||
end
|
||||
|
||||
if (!schedule_delay && wb_inc) begin
|
||||
count_valid <= count_valid + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
endmodule
|
||||
@@ -38,7 +38,7 @@ module VX_warp (
|
||||
end
|
||||
|
||||
|
||||
always @(posedge clk, posedge reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (remove) begin
|
||||
valid <= valid_zero;
|
||||
end else if (in_change_mask) begin
|
||||
@@ -69,7 +69,7 @@ module VX_warp (
|
||||
assign use_PC = temp_PC;
|
||||
assign out_PC = temp_PC;
|
||||
|
||||
always @(posedge clk or posedge reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
real_PC <= 0;
|
||||
end else if (in_wspawn == 1'b1) begin
|
||||
|
||||
@@ -19,7 +19,9 @@ module VX_warp_scheduler (
|
||||
input wire[`NW_BITS-1:0] whalt_warp_num,
|
||||
|
||||
input wire is_barrier,
|
||||
/* verilator lint_off UNUSED */
|
||||
input wire[31:0] barrier_id,
|
||||
/* verilator lint_on UNUSED */
|
||||
input wire[$clog2(`NUM_WARPS):0] num_warps,
|
||||
input wire[`NW_BITS-1:0] barrier_warp_num,
|
||||
|
||||
@@ -60,10 +62,7 @@ module VX_warp_scheduler (
|
||||
input wire[`NUM_THREADS-1:0] icache_stage_valids
|
||||
|
||||
);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
wire update_use_wspawn;
|
||||
|
||||
wire update_visible_active;
|
||||
|
||||
wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
|
||||
@@ -72,10 +71,12 @@ module VX_warp_scheduler (
|
||||
wire[31:0] join_pc;
|
||||
wire[`NUM_THREADS-1:0] join_tm;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire in_wspawn = wspawn;
|
||||
wire in_ctm = ctm;
|
||||
wire in_whalt = whalt;
|
||||
wire in_wstall = wstall;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
reg[`NUM_WARPS-1:0] warp_active;
|
||||
reg[`NUM_WARPS-1:0] warp_stalled;
|
||||
@@ -114,13 +115,12 @@ module VX_warp_scheduler (
|
||||
|
||||
reg didnt_split;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
// wire[$clog2(`NUM_WARPS):0] num_active;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
integer curr_w_help;
|
||||
integer curr_barrier;
|
||||
always @(posedge clk or posedge reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (curr_barrier = 0; curr_barrier < `NUM_BARRIERS; curr_barrier=curr_barrier+1) begin
|
||||
barrier_stall_mask[curr_barrier] <= 0;
|
||||
|
||||
@@ -4,61 +4,61 @@ module VX_writeback (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Mem WB info
|
||||
VX_inst_mem_wb_inter VX_mem_wb,
|
||||
VX_inst_mem_wb_inter vx_mem_wb,
|
||||
// EXEC Unit WB info
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb,
|
||||
VX_inst_exec_wb_inter vx_inst_exec_wb,
|
||||
// CSR Unit WB info
|
||||
VX_csr_wb_inter VX_csr_wb,
|
||||
VX_csr_wb_inter vx_csr_wb,
|
||||
|
||||
// Actual WB to GPR
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
VX_wb_inter vx_writeback_inter,
|
||||
output wire no_slot_mem,
|
||||
output wire no_slot_exec,
|
||||
output wire no_slot_csr
|
||||
);
|
||||
|
||||
VX_wb_inter VX_writeback_tempp();
|
||||
VX_wb_inter vx_writeback_tempp();
|
||||
|
||||
wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid);
|
||||
wire mem_wb = (VX_mem_wb.wb != 0) && (|VX_mem_wb.wb_valid);
|
||||
wire csr_wb = (VX_csr_wb.wb != 0) && (|VX_csr_wb.valid);
|
||||
wire exec_wb = (vx_inst_exec_wb.wb != 0) && (|vx_inst_exec_wb.wb_valid);
|
||||
wire mem_wb = (vx_mem_wb.wb != 0) && (|vx_mem_wb.wb_valid);
|
||||
wire csr_wb = (vx_csr_wb.wb != 0) && (|vx_csr_wb.valid);
|
||||
|
||||
|
||||
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
|
||||
assign no_slot_csr = csr_wb && (exec_wb);
|
||||
assign no_slot_exec = 0;
|
||||
|
||||
assign VX_writeback_tempp.write_data = exec_wb ? VX_inst_exec_wb.alu_result :
|
||||
csr_wb ? VX_csr_wb.csr_result :
|
||||
mem_wb ? VX_mem_wb.loaded_data :
|
||||
assign vx_writeback_tempp.write_data = exec_wb ? vx_inst_exec_wb.alu_result :
|
||||
csr_wb ? vx_csr_wb.csr_result :
|
||||
mem_wb ? vx_mem_wb.loaded_data :
|
||||
0;
|
||||
|
||||
|
||||
assign VX_writeback_tempp.wb_valid = exec_wb ? VX_inst_exec_wb.wb_valid :
|
||||
csr_wb ? VX_csr_wb.valid :
|
||||
mem_wb ? VX_mem_wb.wb_valid :
|
||||
assign vx_writeback_tempp.wb_valid = exec_wb ? vx_inst_exec_wb.wb_valid :
|
||||
csr_wb ? vx_csr_wb.valid :
|
||||
mem_wb ? vx_mem_wb.wb_valid :
|
||||
0;
|
||||
|
||||
assign VX_writeback_tempp.rd = exec_wb ? VX_inst_exec_wb.rd :
|
||||
csr_wb ? VX_csr_wb.rd :
|
||||
mem_wb ? VX_mem_wb.rd :
|
||||
assign vx_writeback_tempp.rd = exec_wb ? vx_inst_exec_wb.rd :
|
||||
csr_wb ? vx_csr_wb.rd :
|
||||
mem_wb ? vx_mem_wb.rd :
|
||||
0;
|
||||
|
||||
assign VX_writeback_tempp.wb = exec_wb ? VX_inst_exec_wb.wb :
|
||||
csr_wb ? VX_csr_wb.wb :
|
||||
mem_wb ? VX_mem_wb.wb :
|
||||
assign vx_writeback_tempp.wb = exec_wb ? vx_inst_exec_wb.wb :
|
||||
csr_wb ? vx_csr_wb.wb :
|
||||
mem_wb ? vx_mem_wb.wb :
|
||||
0;
|
||||
|
||||
assign VX_writeback_tempp.wb_warp_num = exec_wb ? VX_inst_exec_wb.wb_warp_num :
|
||||
csr_wb ? VX_csr_wb.warp_num :
|
||||
mem_wb ? VX_mem_wb.wb_warp_num :
|
||||
assign vx_writeback_tempp.wb_warp_num = exec_wb ? vx_inst_exec_wb.wb_warp_num :
|
||||
csr_wb ? vx_csr_wb.warp_num :
|
||||
mem_wb ? vx_mem_wb.wb_warp_num :
|
||||
0;
|
||||
|
||||
|
||||
|
||||
assign VX_writeback_tempp.wb_pc = exec_wb ? VX_inst_exec_wb.exec_wb_pc :
|
||||
assign vx_writeback_tempp.wb_pc = exec_wb ? vx_inst_exec_wb.exec_wb_pc :
|
||||
csr_wb ? 32'hdeadbeef :
|
||||
mem_wb ? VX_mem_wb.mem_wb_pc :
|
||||
mem_wb ? vx_mem_wb.mem_wb_pc :
|
||||
32'hdeadbeef;
|
||||
|
||||
|
||||
@@ -71,19 +71,19 @@ module VX_writeback (
|
||||
.reset(reset),
|
||||
.stall(zero),
|
||||
.flush(zero),
|
||||
.in ({VX_writeback_tempp.write_data, VX_writeback_tempp.wb_valid, VX_writeback_tempp.rd, VX_writeback_tempp.wb, VX_writeback_tempp.wb_warp_num, VX_writeback_tempp.wb_pc}),
|
||||
.out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc})
|
||||
.in ({vx_writeback_tempp.write_data, vx_writeback_tempp.wb_valid, vx_writeback_tempp.rd, vx_writeback_tempp.wb, vx_writeback_tempp.wb_warp_num, vx_writeback_tempp.wb_pc}),
|
||||
.out ({use_wb_data , vx_writeback_inter.wb_valid, vx_writeback_inter.rd, vx_writeback_inter.wb, vx_writeback_inter.wb_warp_num, vx_writeback_inter.wb_pc})
|
||||
);
|
||||
|
||||
|
||||
reg[31:0] last_data_wb /* verilator public */ ;
|
||||
always @(posedge clk) begin
|
||||
if ((|VX_writeback_inter.wb_valid) && (VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd == 28)) begin
|
||||
if ((|vx_writeback_inter.wb_valid) && (vx_writeback_inter.wb != 0) && (vx_writeback_inter.rd == 28)) begin
|
||||
last_data_wb <= use_wb_data[0];
|
||||
end
|
||||
end
|
||||
|
||||
assign VX_writeback_inter.write_data = use_wb_data;
|
||||
assign vx_writeback_inter.write_data = use_wb_data;
|
||||
|
||||
endmodule : VX_writeback
|
||||
|
||||
|
||||
320
hw/rtl/Vortex.v
320
hw/rtl/Vortex.v
@@ -16,105 +16,82 @@ module Vortex
|
||||
output wire [31:0] io_data,
|
||||
|
||||
// DRAM Dcache Req
|
||||
output wire dram_req,
|
||||
output wire dram_req_write,
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [31:0] dram_req_size,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
output wire [31:0] dram_expected_lat,
|
||||
input wire dram_req_full,
|
||||
|
||||
input wire dram_req_delay,
|
||||
|
||||
// DRAM Dcache Res
|
||||
output wire dram_fill_accept,
|
||||
input wire dram_fill_rsp,
|
||||
input wire [31:0] dram_fill_rsp_addr,
|
||||
input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data,
|
||||
// DRAM Dcache Rsp
|
||||
input wire dram_rsp_valid,
|
||||
input wire [31:0] dram_rsp_addr,
|
||||
input wire [`DBANK_LINE_SIZE-1:0] dram_rsp_data,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// DRAM Icache Req
|
||||
output wire I_dram_req,
|
||||
output wire I_dram_req_write,
|
||||
output wire I_dram_req_read,
|
||||
output wire [31:0] I_dram_req_addr,
|
||||
output wire [31:0] I_dram_req_size,
|
||||
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
||||
output wire [31:0] I_dram_expected_lat,
|
||||
output wire I_dram_req_read,
|
||||
output wire I_dram_req_write,
|
||||
output wire [31:0] I_dram_req_addr,
|
||||
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
||||
input wire I_dram_req_full,
|
||||
|
||||
// DRAM Icache Res
|
||||
output wire I_dram_fill_accept,
|
||||
input wire I_dram_fill_rsp,
|
||||
input wire [31:0] I_dram_fill_rsp_addr,
|
||||
input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data,
|
||||
// DRAM Icache Rsp
|
||||
input wire I_dram_rsp_valid,
|
||||
input wire [31:0] I_dram_rsp_addr,
|
||||
input wire [`IBANK_LINE_SIZE-1:0] I_dram_rsp_data,
|
||||
output wire I_dram_rsp_ready,
|
||||
|
||||
// LLC Snooping
|
||||
input wire snp_req,
|
||||
input wire [31:0] snp_req_addr,
|
||||
output wire snp_req_delay,
|
||||
input wire snp_req_valid,
|
||||
input wire [31:0] snp_req_addr,
|
||||
output wire snp_req_full,
|
||||
|
||||
input wire I_snp_req,
|
||||
input wire [31:0] I_snp_req_addr,
|
||||
output wire I_snp_req_delay,
|
||||
|
||||
output wire out_ebreak
|
||||
output wire out_ebreak
|
||||
|
||||
`else
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// IO
|
||||
output wire io_valid,
|
||||
output wire[31:0] io_data,
|
||||
output wire io_valid,
|
||||
output wire[31:0] io_data,
|
||||
|
||||
// DRAM Dcache Req
|
||||
output wire dram_req,
|
||||
output wire dram_req_write,
|
||||
output wire dram_req_read,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [31:0] dram_req_size,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
output wire [31:0] dram_expected_lat,
|
||||
|
||||
// DRAM Dcache Res
|
||||
output wire dram_fill_accept,
|
||||
input wire dram_fill_rsp,
|
||||
input wire [31:0] dram_fill_rsp_addr,
|
||||
input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data,
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
input wire dram_req_full,
|
||||
|
||||
// DRAM Dcache Rsp
|
||||
input wire dram_rsp_valid,
|
||||
input wire [31:0] dram_rsp_addr,
|
||||
input wire [`DBANK_LINE_SIZE-1:0] dram_rsp_data,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// DRAM Icache Req
|
||||
output wire I_dram_req,
|
||||
output wire I_dram_req_write,
|
||||
output wire I_dram_req_read,
|
||||
output wire [31:0] I_dram_req_addr,
|
||||
output wire [31:0] I_dram_req_size,
|
||||
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
||||
output wire [31:0] I_dram_expected_lat,
|
||||
output wire I_dram_req_read,
|
||||
output wire I_dram_req_write,
|
||||
output wire [31:0] I_dram_req_addr,
|
||||
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
||||
input wire I_dram_req_full,
|
||||
|
||||
// DRAM Icache Res
|
||||
output wire I_dram_fill_accept,
|
||||
input wire I_dram_fill_rsp,
|
||||
input wire [31:0] I_dram_fill_rsp_addr,
|
||||
input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data,
|
||||
// DRAM Icache Rsp
|
||||
output wire I_dram_rsp_ready,
|
||||
input wire I_dram_rsp_valid,
|
||||
input wire [31:0] I_dram_rsp_addr,
|
||||
input wire [`IBANK_LINE_SIZE-1:0] I_dram_rsp_data,
|
||||
|
||||
input wire dram_req_delay,
|
||||
input wire snp_req_valid,
|
||||
input wire [31:0] snp_req_addr,
|
||||
output wire snp_req_full,
|
||||
|
||||
input wire snp_req,
|
||||
input wire [31:0] snp_req_addr,
|
||||
output wire snp_req_delay,
|
||||
|
||||
input wire I_snp_req,
|
||||
input wire [31:0] I_snp_req_addr,
|
||||
output wire I_snp_req_delay,
|
||||
|
||||
output wire out_ebreak
|
||||
output wire out_ebreak
|
||||
`endif
|
||||
);
|
||||
/* verilator lint_off UNUSED */
|
||||
wire scheduler_empty;
|
||||
wire out_ebreak_unqual;
|
||||
|
||||
// assign out_ebreak = out_ebreak_unqual && (scheduler_empty && 1);
|
||||
assign out_ebreak = out_ebreak_unqual;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
wire memory_delay;
|
||||
wire exec_delay;
|
||||
@@ -122,184 +99,165 @@ module Vortex
|
||||
wire schedule_delay;
|
||||
|
||||
// Dcache Interface
|
||||
VX_gpu_dcache_res_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) VX_dcache_rsp();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) VX_dcache_req();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) VX_dcache_req_qual();
|
||||
VX_gpu_dcache_rsp_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_qual();
|
||||
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_res();
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_req();
|
||||
VX_gpu_dcache_dram_rsp_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_res();
|
||||
|
||||
assign vx_gpu_dcache_dram_res.dram_rsp_valid = dram_rsp_valid;
|
||||
assign vx_gpu_dcache_dram_res.dram_rsp_addr = dram_rsp_addr;
|
||||
|
||||
assign VX_gpu_dcache_dram_res.dram_fill_rsp = dram_fill_rsp;
|
||||
assign VX_gpu_dcache_dram_res.dram_fill_rsp_addr = dram_fill_rsp_addr;
|
||||
assign dram_req_write = vx_gpu_dcache_dram_req.dram_req_write;
|
||||
assign dram_req_read = vx_gpu_dcache_dram_req.dram_req_read;
|
||||
assign dram_req_addr = vx_gpu_dcache_dram_req.dram_req_addr;
|
||||
assign dram_rsp_ready = vx_gpu_dcache_dram_req.dram_rsp_ready;
|
||||
|
||||
assign dram_req = VX_gpu_dcache_dram_req.dram_req;
|
||||
assign dram_req_write = VX_gpu_dcache_dram_req.dram_req_write;
|
||||
assign dram_req_read = VX_gpu_dcache_dram_req.dram_req_read;
|
||||
assign dram_req_addr = VX_gpu_dcache_dram_req.dram_req_addr;
|
||||
assign dram_req_size = VX_gpu_dcache_dram_req.dram_req_size;
|
||||
assign dram_expected_lat = `DSIMULATED_DRAM_LATENCY_CYCLES;
|
||||
assign dram_fill_accept = VX_gpu_dcache_dram_req.dram_fill_accept;
|
||||
|
||||
assign VX_gpu_dcache_dram_req.dram_req_delay = dram_req_delay;
|
||||
assign vx_gpu_dcache_dram_req.dram_req_full = dram_req_full;
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
|
||||
assign VX_gpu_dcache_dram_res.dram_fill_rsp_data[i] = dram_fill_rsp_data[i * 32 +: 32];
|
||||
assign dram_req_data[i * 32 +: 32] = VX_gpu_dcache_dram_req.dram_req_data[i];
|
||||
assign vx_gpu_dcache_dram_res.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
|
||||
assign dram_req_data[i * 32 +: 32] = vx_gpu_dcache_dram_req.dram_req_data[i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire temp_io_valid = (!memory_delay)
|
||||
&& (|VX_dcache_req.core_req_valid)
|
||||
&& (VX_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE)
|
||||
&& (VX_dcache_req.core_req_addr[0] == 32'h00010000);
|
||||
&& (|vx_dcache_req.core_req_valid)
|
||||
&& (vx_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE)
|
||||
&& (vx_dcache_req.core_req_addr[0] == 32'h00010000);
|
||||
|
||||
wire[31:0] temp_io_data = VX_dcache_req.core_req_writedata[0];
|
||||
wire[31:0] temp_io_data = vx_dcache_req.core_req_writedata[0];
|
||||
assign io_valid = temp_io_valid;
|
||||
assign io_data = temp_io_data;
|
||||
|
||||
assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}};
|
||||
assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr;
|
||||
assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata;
|
||||
assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read;
|
||||
assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write;
|
||||
assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd;
|
||||
assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb;
|
||||
assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num;
|
||||
assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc;
|
||||
assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot;
|
||||
assign vx_dcache_req_qual.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}};
|
||||
assign vx_dcache_req_qual.core_req_addr = vx_dcache_req.core_req_addr;
|
||||
assign vx_dcache_req_qual.core_req_writedata = vx_dcache_req.core_req_writedata;
|
||||
assign vx_dcache_req_qual.core_req_mem_read = vx_dcache_req.core_req_mem_read;
|
||||
assign vx_dcache_req_qual.core_req_mem_write = vx_dcache_req.core_req_mem_write;
|
||||
assign vx_dcache_req_qual.core_req_rd = vx_dcache_req.core_req_rd;
|
||||
assign vx_dcache_req_qual.core_req_wb = vx_dcache_req.core_req_wb;
|
||||
assign vx_dcache_req_qual.core_req_warp_num = vx_dcache_req.core_req_warp_num;
|
||||
assign vx_dcache_req_qual.core_req_pc = vx_dcache_req.core_req_pc;
|
||||
assign vx_dcache_req_qual.core_no_wb_slot = vx_dcache_req.core_no_wb_slot;
|
||||
|
||||
VX_gpu_dcache_rsp_inter #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_rsp();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_req();
|
||||
|
||||
VX_gpu_dcache_res_inter #(.NUM_REQUESTS(`INUM_REQUESTS)) VX_icache_rsp();
|
||||
VX_gpu_dcache_req_inter #(.NUM_REQUESTS(`INUM_REQUESTS)) VX_icache_req();
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_req();
|
||||
VX_gpu_dcache_dram_rsp_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_res();
|
||||
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_res();
|
||||
assign vx_gpu_icache_dram_res.dram_rsp_valid = I_dram_rsp_valid;
|
||||
assign vx_gpu_icache_dram_res.dram_rsp_addr = I_dram_rsp_addr;
|
||||
|
||||
assign I_dram_req_write = vx_gpu_icache_dram_req.dram_req_write;
|
||||
assign I_dram_req_read = vx_gpu_icache_dram_req.dram_req_read;
|
||||
assign I_dram_req_addr = vx_gpu_icache_dram_req.dram_req_addr;
|
||||
assign I_dram_rsp_ready = vx_gpu_icache_dram_req.dram_rsp_ready;
|
||||
|
||||
assign VX_gpu_icache_dram_res.dram_fill_rsp = I_dram_fill_rsp;
|
||||
assign VX_gpu_icache_dram_res.dram_fill_rsp_addr = I_dram_fill_rsp_addr;
|
||||
|
||||
assign I_dram_req = VX_gpu_icache_dram_req.dram_req;
|
||||
assign I_dram_req_write = VX_gpu_icache_dram_req.dram_req_write;
|
||||
assign I_dram_req_read = VX_gpu_icache_dram_req.dram_req_read;
|
||||
assign I_dram_req_addr = VX_gpu_icache_dram_req.dram_req_addr;
|
||||
assign I_dram_req_size = VX_gpu_icache_dram_req.dram_req_size;
|
||||
assign I_dram_expected_lat = `ISIMULATED_DRAM_LATENCY_CYCLES;
|
||||
assign I_dram_fill_accept = VX_gpu_icache_dram_req.dram_fill_accept;
|
||||
|
||||
assign VX_gpu_icache_dram_req.dram_req_delay = dram_req_delay;
|
||||
assign vx_gpu_icache_dram_req.dram_req_full = I_dram_req_full;
|
||||
|
||||
genvar j;
|
||||
generate
|
||||
for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin
|
||||
assign VX_gpu_icache_dram_res.dram_fill_rsp_data[j] = I_dram_fill_rsp_data[j * 32 +: 32];
|
||||
assign I_dram_req_data[j * 32 +: 32] = VX_gpu_icache_dram_req.dram_req_data[j];
|
||||
assign vx_gpu_icache_dram_res.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32];
|
||||
assign I_dram_req_data[j * 32 +: 32] = vx_gpu_icache_dram_req.dram_req_data[j];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Front-end to Back-end
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req(); // New instruction request to EXE/MEM
|
||||
VX_frE_to_bckE_req_inter vx_bckE_req(); // New instruction request to EXE/MEM
|
||||
|
||||
// Back-end to Front-end
|
||||
VX_wb_inter VX_writeback_inter(); // Writeback to GPRs
|
||||
VX_branch_response_inter VX_branch_rsp(); // Branch Resolution to Fetch
|
||||
VX_jal_response_inter VX_jal_rsp(); // Jump resolution to Fetch
|
||||
VX_wb_inter vx_writeback_inter(); // Writeback to GPRs
|
||||
VX_branch_response_inter vx_branch_rsp(); // Branch Resolution to Fetch
|
||||
VX_jal_response_inter vx_jal_rsp(); // Jump resolution to Fetch
|
||||
|
||||
// CSR Buses
|
||||
// VX_csr_write_request_inter VX_csr_w_req();
|
||||
// VX_csr_write_request_inter vx_csr_w_req();
|
||||
|
||||
VX_warp_ctl_inter vx_warp_ctl();
|
||||
VX_gpu_snp_req_rsp vx_gpu_icache_snp_req();
|
||||
VX_gpu_snp_req_rsp vx_gpu_dcache_snp_req();
|
||||
|
||||
VX_warp_ctl_inter VX_warp_ctl();
|
||||
|
||||
|
||||
VX_gpu_snp_req_rsp VX_gpu_icache_snp_req();
|
||||
VX_gpu_snp_req_rsp VX_gpu_dcache_snp_req();
|
||||
|
||||
assign VX_gpu_icache_snp_req.snp_req = I_snp_req;
|
||||
assign VX_gpu_icache_snp_req.snp_req_addr = I_snp_req_addr;
|
||||
assign I_snp_req_delay = VX_gpu_icache_snp_req.snp_delay;
|
||||
|
||||
assign VX_gpu_dcache_snp_req.snp_req = snp_req;
|
||||
assign VX_gpu_dcache_snp_req.snp_req_addr = snp_req_addr;
|
||||
assign snp_req_delay = VX_gpu_dcache_snp_req.snp_delay;
|
||||
assign vx_gpu_dcache_snp_req.snp_req_valid = snp_req_valid;
|
||||
assign vx_gpu_dcache_snp_req.snp_req_addr = snp_req_addr;
|
||||
assign snp_req_full = vx_gpu_dcache_snp_req.snp_req_full;
|
||||
|
||||
VX_front_end vx_front_end(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_warp_ctl (VX_warp_ctl),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.vx_warp_ctl (vx_warp_ctl),
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
.schedule_delay (schedule_delay),
|
||||
.VX_icache_rsp (VX_icache_rsp),
|
||||
.VX_icache_req (VX_icache_req),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.fetch_ebreak (out_ebreak_unqual)
|
||||
.vx_icache_rsp (vx_icache_rsp),
|
||||
.vx_icache_req (vx_icache_req),
|
||||
.vx_jal_rsp (vx_jal_rsp),
|
||||
.vx_branch_rsp (vx_branch_rsp),
|
||||
.fetch_ebreak (out_ebreak)
|
||||
);
|
||||
|
||||
VX_scheduler schedule(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.memory_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.VX_writeback_inter(VX_writeback_inter),
|
||||
.schedule_delay (schedule_delay),
|
||||
.is_empty (scheduler_empty)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.memory_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay),
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
.vx_writeback_inter (vx_writeback_inter),
|
||||
.schedule_delay (schedule_delay),
|
||||
.is_empty (scheduler_empty)
|
||||
);
|
||||
|
||||
VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.VX_warp_ctl (VX_warp_ctl),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.VX_dcache_rsp (VX_dcache_rsp),
|
||||
.VX_dcache_req (VX_dcache_req),
|
||||
.VX_writeback_inter (VX_writeback_inter),
|
||||
.vx_warp_ctl (vx_warp_ctl),
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
.vx_jal_rsp (vx_jal_rsp),
|
||||
.vx_branch_rsp (vx_branch_rsp),
|
||||
.vx_dcache_rsp (vx_dcache_rsp),
|
||||
.vx_dcache_req (vx_dcache_req),
|
||||
.vx_writeback_inter (vx_writeback_inter),
|
||||
.out_mem_delay (memory_delay),
|
||||
.out_exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
|
||||
VX_dmem_controller VX_dmem_controller(
|
||||
VX_dmem_controller vx_dmem_controller(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Dram <-> Dcache
|
||||
.VX_gpu_dcache_dram_req (VX_gpu_dcache_dram_req),
|
||||
.VX_gpu_dcache_dram_res (VX_gpu_dcache_dram_res),
|
||||
.VX_gpu_dcache_snp_req (VX_gpu_dcache_snp_req),
|
||||
.vx_gpu_dcache_dram_req (vx_gpu_dcache_dram_req),
|
||||
.vx_gpu_dcache_dram_res (vx_gpu_dcache_dram_res),
|
||||
.vx_gpu_dcache_snp_req (vx_gpu_dcache_snp_req),
|
||||
|
||||
// Dram <-> Icache
|
||||
.VX_gpu_icache_dram_req (VX_gpu_icache_dram_req),
|
||||
.VX_gpu_icache_dram_res (VX_gpu_icache_dram_res),
|
||||
.VX_gpu_icache_snp_req (VX_gpu_icache_snp_req),
|
||||
.vx_gpu_icache_dram_req (vx_gpu_icache_dram_req),
|
||||
.vx_gpu_icache_dram_res (vx_gpu_icache_dram_res),
|
||||
.vx_gpu_icache_snp_req (vx_gpu_icache_snp_req),
|
||||
|
||||
// Core <-> Icache
|
||||
.VX_icache_req (VX_icache_req),
|
||||
.VX_icache_rsp (VX_icache_rsp),
|
||||
.vx_icache_req (vx_icache_req),
|
||||
.vx_icache_rsp (vx_icache_rsp),
|
||||
|
||||
// Core <-> Dcache
|
||||
.VX_dcache_req (VX_dcache_req_qual),
|
||||
.VX_dcache_rsp (VX_dcache_rsp)
|
||||
.vx_dcache_req (vx_dcache_req_qual),
|
||||
.vx_dcache_rsp (vx_dcache_rsp)
|
||||
);
|
||||
|
||||
// VX_csr_handler vx_csr_handler(
|
||||
// .clk (clk),
|
||||
// .in_decode_csr_address(decode_csr_address),
|
||||
// .VX_csr_w_req (VX_csr_w_req),
|
||||
// .in_wb_valid (VX_writeback_inter.wb_valid[0]),
|
||||
|
||||
// .vx_csr_w_req (vx_csr_w_req),
|
||||
// .in_wb_valid (vx_writeback_inter.wb_valid[0]),
|
||||
// .out_decode_csr_data (csr_decode_csr_data)
|
||||
// );
|
||||
|
||||
|
||||
@@ -15,57 +15,48 @@ module Vortex_Cluster
|
||||
output wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] io_data,
|
||||
|
||||
// DRAM Req
|
||||
output wire out_dram_req,
|
||||
output wire out_dram_req_write,
|
||||
output wire out_dram_req_read,
|
||||
output wire [31:0] out_dram_req_addr,
|
||||
output wire [31:0] out_dram_req_size,
|
||||
output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0],
|
||||
output wire [31:0] out_dram_expected_lat,
|
||||
input wire out_dram_req_delay,
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
input wire dram_req_full,
|
||||
|
||||
// DRAM Res
|
||||
output wire out_dram_fill_accept,
|
||||
input wire out_dram_fill_rsp,
|
||||
input wire [31:0] out_dram_fill_rsp_addr,
|
||||
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0],
|
||||
// DRAM Rsp
|
||||
input wire dram_rsp_valid,
|
||||
input wire [31:0] dram_rsp_addr,
|
||||
input wire [`DBANK_LINE_SIZE-1:0] dram_rsp_data,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// LLC Snooping
|
||||
input wire llc_snp_req,
|
||||
input wire llc_snp_req_valid,
|
||||
input wire[31:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_delay,
|
||||
output wire llc_snp_req_full,
|
||||
|
||||
output wire out_ebreak
|
||||
);
|
||||
// DRAM Dcache Req
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_write;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_read;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_write;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_size;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_req_data;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_expected_lat;
|
||||
|
||||
// DRAM Dcache Res
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_accept;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_rsp;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_fill_rsp_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_fill_rsp_data;
|
||||
// DRAM Dcache Rsp
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_rsp_valid;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_rsp_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_rsp_data;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_rsp_ready;
|
||||
|
||||
// DRAM Icache Req
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_read;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_size;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_req_data;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_expected_lat;
|
||||
|
||||
// DRAM Icache Res
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_accept;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_rsp;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_fill_rsp_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_fill_rsp_data;
|
||||
// DRAM Icache Rsp
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_rsp_valid;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_rsp_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_rsp_data;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_rsp_ready;
|
||||
|
||||
// Out ebreak
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_out_ebreak;
|
||||
@@ -75,9 +66,9 @@ module Vortex_Cluster
|
||||
|
||||
wire l2c_core_accept;
|
||||
|
||||
wire snp_fwd;
|
||||
wire snp_fwd_valid;
|
||||
wire[31:0] snp_fwd_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] snp_fwd_delay;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] snp_fwd_full;
|
||||
|
||||
assign out_ebreak = (&per_core_out_ebreak);
|
||||
|
||||
@@ -99,36 +90,28 @@ module Vortex_Cluster
|
||||
.reset (reset),
|
||||
.io_valid (per_core_io_valid [curr_core]),
|
||||
.io_data (per_core_io_data [curr_core]),
|
||||
.dram_req (per_core_dram_req [curr_core]),
|
||||
.dram_req_write (per_core_dram_req_write [curr_core]),
|
||||
.dram_req_read (per_core_dram_req_read [curr_core]),
|
||||
.dram_req_write (per_core_dram_req_write [curr_core]),
|
||||
.dram_req_addr (per_core_dram_req_addr [curr_core]),
|
||||
.dram_req_size (per_core_dram_req_size [curr_core]),
|
||||
.dram_req_data (curr_core_dram_req_data ),
|
||||
.dram_expected_lat (per_core_dram_expected_lat [curr_core]),
|
||||
.dram_fill_accept (per_core_dram_fill_accept [curr_core]),
|
||||
.dram_fill_rsp (per_core_dram_fill_rsp [curr_core]),
|
||||
.dram_fill_rsp_addr (per_core_dram_fill_rsp_addr [curr_core]),
|
||||
.dram_fill_rsp_data (per_core_dram_fill_rsp_data [curr_core]),
|
||||
.I_dram_req (per_core_I_dram_req [curr_core]),
|
||||
.I_dram_req_write (per_core_I_dram_req_write [curr_core]),
|
||||
.dram_req_full (l2c_core_accept ),
|
||||
.dram_rsp_valid (per_core_dram_rsp_valid [curr_core]),
|
||||
.dram_rsp_addr (per_core_dram_rsp_addr [curr_core]),
|
||||
.dram_rsp_data (per_core_dram_rsp_data [curr_core]),
|
||||
.dram_rsp_ready (per_core_dram_rsp_ready [curr_core]),
|
||||
.I_dram_req_read (per_core_I_dram_req_read [curr_core]),
|
||||
.I_dram_req_addr (per_core_I_dram_req_addr [curr_core]),
|
||||
.I_dram_req_size (per_core_I_dram_req_size [curr_core]),
|
||||
.I_dram_req_write (per_core_I_dram_req_write [curr_core]),
|
||||
.I_dram_req_addr (per_core_I_dram_req_addr [curr_core]),
|
||||
.I_dram_req_data (curr_core_I_dram_req_data ),
|
||||
.I_dram_expected_lat (per_core_I_dram_expected_lat [curr_core]),
|
||||
.I_dram_fill_accept (per_core_I_dram_fill_accept [curr_core]),
|
||||
.I_dram_fill_rsp (per_core_I_dram_fill_rsp [curr_core]),
|
||||
.I_dram_fill_rsp_addr (per_core_I_dram_fill_rsp_addr[curr_core]),
|
||||
.I_dram_fill_rsp_data (per_core_I_dram_fill_rsp_data[curr_core]),
|
||||
.dram_req_delay (l2c_core_accept ),
|
||||
.out_ebreak (per_core_out_ebreak [curr_core]),
|
||||
.snp_req (snp_fwd),
|
||||
.I_dram_req_full (l2c_core_accept ),
|
||||
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [curr_core]),
|
||||
.I_dram_rsp_addr (per_core_I_dram_rsp_addr [curr_core]),
|
||||
.I_dram_rsp_data (per_core_I_dram_rsp_data [curr_core]),
|
||||
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [curr_core]),
|
||||
.snp_req_valid (snp_fwd_valid),
|
||||
.snp_req_addr (snp_fwd_addr),
|
||||
.snp_req_delay (snp_fwd_delay[curr_core]),
|
||||
.I_snp_req (0),
|
||||
.I_snp_req_addr (),
|
||||
.I_snp_req_delay ()
|
||||
.snp_req_full (snp_fwd_full [curr_core]),
|
||||
.out_ebreak (per_core_out_ebreak [curr_core])
|
||||
);
|
||||
|
||||
assign per_core_dram_req_data [curr_core] = curr_core_dram_req_data;
|
||||
@@ -137,27 +120,28 @@ module Vortex_Cluster
|
||||
endgenerate
|
||||
|
||||
//////////////////// L2 Cache ////////////////////
|
||||
wire[`L2NUM_REQUESTS-1:0] l2c_core_req;
|
||||
wire[`L2NUM_REQUESTS-1:0][2:0] l2c_core_req_mem_write;
|
||||
wire[`L2NUM_REQUESTS-1:0][2:0] l2c_core_req_mem_read;
|
||||
wire[`L2NUM_REQUESTS-1:0][31:0] l2c_core_req_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_core_req_data;
|
||||
wire[`L2NUM_REQUESTS-1:0][1:0] l2c_core_req_wb;
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] l2c_core_req_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0][2:0] l2c_core_req_mem_write;
|
||||
wire[`L2NUM_REQUESTS-1:0][2:0] l2c_core_req_mem_read;
|
||||
wire[`L2NUM_REQUESTS-1:0][31:0] l2c_core_req_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_core_req_data;
|
||||
wire[`L2NUM_REQUESTS-1:0][1:0] l2c_core_req_wb;
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] l2c_core_no_wb_slot;
|
||||
wire[`L2NUM_REQUESTS-1:0] l2c_core_no_wb_slot;
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] l2c_wb;
|
||||
wire[`L2NUM_REQUESTS-1:0] [31:0] l2c_wb_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_wb_data;
|
||||
wire[`L2NUM_REQUESTS-1:0] l2c_wb;
|
||||
wire[`L2NUM_REQUESTS-1:0] [31:0] l2c_wb_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_wb_data;
|
||||
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_rsp_data_port;
|
||||
|
||||
genvar llb_index;
|
||||
generate
|
||||
for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin
|
||||
assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index];
|
||||
assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index];
|
||||
assign dram_req_data [llb_index * `DWORD_SIZE_BITS +: `DWORD_SIZE_BITS] = dram_req_data_port[llb_index];
|
||||
assign dram_rsp_data_port [llb_index] = dram_rsp_data[llb_index * `DWORD_SIZE_BITS +: `DWORD_SIZE_BITS];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
@@ -165,9 +149,9 @@ module Vortex_Cluster
|
||||
generate
|
||||
for (l2c_curr_core = 0; l2c_curr_core < `L2NUM_REQUESTS; l2c_curr_core=l2c_curr_core+2) begin
|
||||
// Core Request
|
||||
assign l2c_core_req [l2c_curr_core] = per_core_dram_req [(l2c_curr_core/2)];
|
||||
assign l2c_core_req [l2c_curr_core+1] = per_core_I_dram_req[(l2c_curr_core/2)];
|
||||
|
||||
assign l2c_core_req_valid [l2c_curr_core] = (per_core_dram_req_read[(l2c_curr_core/2)] | per_core_dram_req_write[(l2c_curr_core/2)]);
|
||||
assign l2c_core_req_valid [l2c_curr_core+1] = (per_core_I_dram_req_read[(l2c_curr_core/2)] | per_core_I_dram_req_write[(l2c_curr_core/2)]);
|
||||
|
||||
assign l2c_core_req_mem_write [l2c_curr_core] = per_core_dram_req_write[(l2c_curr_core/2)] ? `SW_MEM_WRITE : `NO_MEM_WRITE;
|
||||
assign l2c_core_req_mem_write [l2c_curr_core+1] = `NO_MEM_WRITE; // I caches don't write
|
||||
|
||||
@@ -184,23 +168,21 @@ module Vortex_Cluster
|
||||
assign l2c_core_req_data [l2c_curr_core+1] = per_core_I_dram_req_data[(l2c_curr_core/2)];
|
||||
|
||||
// Core can't accept Response
|
||||
assign l2c_core_no_wb_slot [l2c_curr_core] = ~per_core_dram_fill_accept [(l2c_curr_core/2)];
|
||||
assign l2c_core_no_wb_slot [l2c_curr_core+1] = ~per_core_I_dram_fill_accept[(l2c_curr_core/2)];
|
||||
assign l2c_core_no_wb_slot [l2c_curr_core] = ~per_core_dram_rsp_ready [(l2c_curr_core/2)];
|
||||
assign l2c_core_no_wb_slot [l2c_curr_core+1] = ~per_core_I_dram_rsp_ready[(l2c_curr_core/2)];
|
||||
|
||||
// Cache Fill Response
|
||||
assign per_core_dram_fill_rsp [(l2c_curr_core/2)] = l2c_wb[l2c_curr_core];
|
||||
assign per_core_I_dram_fill_rsp [(l2c_curr_core/2)] = l2c_wb[l2c_curr_core+1];
|
||||
assign per_core_dram_rsp_valid [(l2c_curr_core/2)] = l2c_wb[l2c_curr_core];
|
||||
assign per_core_I_dram_rsp_valid [(l2c_curr_core/2)] = l2c_wb[l2c_curr_core+1];
|
||||
|
||||
assign per_core_dram_fill_rsp_data[(l2c_curr_core/2)] = l2c_wb_data[l2c_curr_core];
|
||||
assign per_core_I_dram_fill_rsp_data[(l2c_curr_core/2)] = l2c_wb_data[l2c_curr_core+1];
|
||||
assign per_core_dram_rsp_data [(l2c_curr_core/2)] = l2c_wb_data[l2c_curr_core];
|
||||
assign per_core_I_dram_rsp_data [(l2c_curr_core/2)] = l2c_wb_data[l2c_curr_core+1];
|
||||
|
||||
assign per_core_dram_fill_rsp_addr[(l2c_curr_core/2)] = l2c_wb_addr[l2c_curr_core];
|
||||
assign per_core_I_dram_fill_rsp_addr[(l2c_curr_core/2)] = l2c_wb_addr[l2c_curr_core+1];
|
||||
assign per_core_dram_rsp_addr [(l2c_curr_core/2)] = l2c_wb_addr[l2c_curr_core];
|
||||
assign per_core_I_dram_rsp_addr [(l2c_curr_core/2)] = l2c_wb_addr[l2c_curr_core+1];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire dram_snp_full;
|
||||
wire dram_req_because_of_wb;
|
||||
VX_cache #(
|
||||
.CACHE_SIZE_BYTES (`L2CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (`L2BANK_LINE_SIZE_BYTES),
|
||||
@@ -223,64 +205,60 @@ module Vortex_Cluster
|
||||
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`L2SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) gpu_l2cache (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core Req (DRAM Fills/WB) To L2 Request
|
||||
.core_req_valid (l2c_core_req),
|
||||
.core_req_addr (l2c_core_req_addr),
|
||||
.core_req_writedata({l2c_core_req_data}),
|
||||
.core_req_mem_read (l2c_core_req_mem_read),
|
||||
.core_req_mem_write(l2c_core_req_mem_write),
|
||||
.core_req_rd (0),
|
||||
.core_req_wb (l2c_core_req_wb),
|
||||
.core_req_warp_num (0),
|
||||
.core_req_pc (0),
|
||||
.core_req_valid (l2c_core_req_valid),
|
||||
.core_req_mem_read (l2c_core_req_mem_read),
|
||||
.core_req_mem_write (l2c_core_req_mem_write),
|
||||
.core_req_addr (l2c_core_req_addr),
|
||||
.core_req_writedata ({l2c_core_req_data}),
|
||||
.core_req_rd (0),
|
||||
.core_req_wb (l2c_core_req_wb),
|
||||
.core_req_warp_num (0),
|
||||
.core_req_pc (0),
|
||||
|
||||
// L2 can't accept Core Request
|
||||
.delay_req (l2c_core_accept),
|
||||
.delay_req (l2c_core_accept),
|
||||
|
||||
// Core can't accept L2 Request
|
||||
.core_no_wb_slot (|l2c_core_no_wb_slot),
|
||||
.core_no_wb_slot (|l2c_core_no_wb_slot),
|
||||
|
||||
// Core Writeback
|
||||
.core_wb_valid (l2c_wb),
|
||||
.core_wb_req_rd (),
|
||||
.core_wb_req_wb (),
|
||||
.core_wb_warp_num (),
|
||||
.core_wb_readdata ({l2c_wb_data}),
|
||||
.core_wb_address (l2c_wb_addr),
|
||||
.core_wb_pc (),
|
||||
|
||||
.core_wb_valid (l2c_wb),
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
.core_wb_req_rd (),
|
||||
.core_wb_req_wb (),
|
||||
.core_wb_warp_num (),
|
||||
.core_wb_pc (),
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
.core_wb_readdata ({l2c_wb_data}),
|
||||
.core_wb_address (l2c_wb_addr),
|
||||
|
||||
// L2 Cache DRAM Fill response
|
||||
.dram_fill_rsp (out_dram_fill_rsp),
|
||||
.dram_fill_rsp_addr(out_dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_data({dram_fill_rsp_data_port}),
|
||||
.dram_rsp_valid (dram_rsp_valid),
|
||||
.dram_rsp_addr (dram_rsp_addr),
|
||||
.dram_rsp_data ({dram_rsp_data_port}),
|
||||
|
||||
// L2 Cache can't accept Fill Response
|
||||
.dram_fill_accept (out_dram_fill_accept),
|
||||
.dram_rsp_ready (dram_rsp_ready),
|
||||
|
||||
// L2 Cache DRAM Fill Request
|
||||
.dram_req (out_dram_req),
|
||||
.dram_req_write (out_dram_req_write),
|
||||
.dram_req_read (out_dram_req_read),
|
||||
.dram_req_addr (out_dram_req_addr),
|
||||
.dram_req_size (out_dram_req_size),
|
||||
.dram_req_data ({dram_req_data_port}),
|
||||
.dram_req_delay (out_dram_req_delay),
|
||||
|
||||
// Snoop Response
|
||||
.dram_req_because_of_wb(dram_req_because_of_wb),
|
||||
.dram_snp_full (dram_snp_full),
|
||||
.dram_req_read (dram_req_read),
|
||||
.dram_req_write (dram_req_write),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data ({dram_req_data_port}),
|
||||
.dram_req_full (dram_req_full),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req (llc_snp_req),
|
||||
.snp_req_addr (llc_snp_req_addr),
|
||||
.snp_req_delay (llc_snp_req_delay),
|
||||
.snp_req_valid (llc_snp_req_valid),
|
||||
.snp_req_addr (llc_snp_req_addr),
|
||||
.snp_req_full (llc_snp_req_full),
|
||||
|
||||
.snp_fwd (snp_fwd),
|
||||
.snp_fwd_addr (snp_fwd_addr),
|
||||
.snp_fwd_delay (|snp_fwd_delay)
|
||||
.snp_fwd_valid (snp_fwd_valid),
|
||||
.snp_fwd_addr (snp_fwd_addr),
|
||||
.snp_fwd_full (|snp_fwd_full)
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -11,33 +11,26 @@ module Vortex_Socket (
|
||||
output wire io_valid[`NUM_CORES-1:0],
|
||||
output wire[31:0] io_data [`NUM_CORES-1:0],
|
||||
|
||||
output wire[31:0] number_cores,
|
||||
|
||||
// DRAM Req
|
||||
output wire out_dram_req,
|
||||
output wire out_dram_req_write,
|
||||
output wire out_dram_req_read,
|
||||
output wire [31:0] out_dram_req_addr,
|
||||
output wire [31:0] out_dram_req_size,
|
||||
output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0],
|
||||
output wire [31:0] out_dram_expected_lat,
|
||||
input wire out_dram_req_delay,
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
input wire dram_req_full,
|
||||
|
||||
// DRAM Res
|
||||
output wire out_dram_fill_accept,
|
||||
input wire out_dram_fill_rsp,
|
||||
input wire [31:0] out_dram_fill_rsp_addr,
|
||||
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0],
|
||||
// DRAM Rsp
|
||||
input wire dram_rsp_valid,
|
||||
input wire [31:0] dram_rsp_addr,
|
||||
input wire [`DBANK_LINE_SIZE-1:0] dram_rsp_data,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// LLC Snooping
|
||||
input wire llc_snp_req,
|
||||
input wire llc_snp_req_valid,
|
||||
input wire[31:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_delay,
|
||||
output wire llc_snp_req_full,
|
||||
|
||||
output wire out_ebreak
|
||||
);
|
||||
assign number_cores = `NUM_CORES;
|
||||
|
||||
if (`NUM_CLUSTERS == 1) begin
|
||||
|
||||
wire[`NUM_CORES-1:0] cluster_io_valid;
|
||||
@@ -51,59 +44,55 @@ module Vortex_Socket (
|
||||
end
|
||||
|
||||
Vortex_Cluster #(.CLUSTER_ID(0)) Vortex_Cluster(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.io_valid (cluster_io_valid),
|
||||
.io_data (cluster_io_data),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.io_valid (cluster_io_valid),
|
||||
.io_data (cluster_io_data),
|
||||
|
||||
.out_dram_req (out_dram_req),
|
||||
.out_dram_req_write (out_dram_req_write),
|
||||
.out_dram_req_read (out_dram_req_read),
|
||||
.out_dram_req_addr (out_dram_req_addr),
|
||||
.out_dram_req_size (out_dram_req_size),
|
||||
.out_dram_req_data (out_dram_req_data),
|
||||
.out_dram_expected_lat (out_dram_expected_lat),
|
||||
.out_dram_req_delay (out_dram_req_delay),
|
||||
.dram_req_read (dram_req_read),
|
||||
.dram_req_write (dram_req_write),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_full (dram_req_full),
|
||||
|
||||
.out_dram_fill_accept (out_dram_fill_accept),
|
||||
.out_dram_fill_rsp (out_dram_fill_rsp),
|
||||
.out_dram_fill_rsp_addr(out_dram_fill_rsp_addr),
|
||||
.out_dram_fill_rsp_data(out_dram_fill_rsp_data),
|
||||
.dram_rsp_valid (dram_rsp_valid),
|
||||
.dram_rsp_addr (dram_rsp_addr),
|
||||
.dram_rsp_data (dram_rsp_data),
|
||||
.dram_rsp_ready (dram_rsp_ready),
|
||||
|
||||
.llc_snp_req (llc_snp_req),
|
||||
.llc_snp_req_addr (llc_snp_req_addr),
|
||||
.llc_snp_req_delay (llc_snp_req_delay),
|
||||
.out_ebreak (out_ebreak)
|
||||
.llc_snp_req_valid (llc_snp_req_valid),
|
||||
.llc_snp_req_addr (llc_snp_req_addr),
|
||||
.llc_snp_req_full (llc_snp_req_full),
|
||||
|
||||
.out_ebreak (out_ebreak)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
wire snp_fwd;
|
||||
wire[31:0] snp_fwd_addr;
|
||||
wire[`NUM_CLUSTERS-1:0] snp_fwd_delay;
|
||||
wire snp_fwd_valid;
|
||||
wire[31:0] snp_fwd_addr;
|
||||
wire[`NUM_CLUSTERS-1:0] snp_fwd_full;
|
||||
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_out_ebreak;
|
||||
|
||||
assign out_ebreak = (&per_cluster_out_ebreak);
|
||||
|
||||
// // DRAM Dcache Req
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read;
|
||||
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_addr;
|
||||
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_size;
|
||||
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_expected_lat;
|
||||
wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_req_data;
|
||||
wire[31:0] per_cluster_dram_req_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
|
||||
|
||||
wire l3c_core_accept;
|
||||
wire l3c_core_req_full;
|
||||
|
||||
// // DRAM Dcache Res
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_accept;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_rsp;
|
||||
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_fill_rsp_addr;
|
||||
wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_fill_rsp_data;
|
||||
wire[31:0] per_cluster_dram_fill_rsp_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
|
||||
// // DRAM Dcache Rsp
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
|
||||
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_rsp_addr;
|
||||
wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_rsp_data;
|
||||
wire[31:0] per_cluster_dram_rsp_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
|
||||
|
||||
wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0] per_cluster_io_valid;
|
||||
wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0][31:0] per_cluster_io_data;
|
||||
@@ -115,96 +104,83 @@ module Vortex_Socket (
|
||||
assign io_data [curr_cc+(curr_c*`NUM_CORES_PER_CLUSTER)] = per_cluster_io_data [curr_c][curr_cc];
|
||||
end
|
||||
|
||||
|
||||
for (curr_word = 0; curr_word < `DBANK_LINE_WORDS; curr_word = curr_word+1) begin
|
||||
assign per_cluster_dram_req_data [curr_c][curr_word] = per_cluster_dram_req_data_up [curr_c][curr_word];
|
||||
assign per_cluster_dram_fill_rsp_data_up[curr_c][curr_word] = per_cluster_dram_fill_rsp_data[curr_c][curr_word];
|
||||
assign per_cluster_dram_rsp_data_up[curr_c][curr_word] = per_cluster_dram_rsp_data[curr_c][curr_word];
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
genvar curr_cluster;
|
||||
for (curr_cluster = 0; curr_cluster < `NUM_CLUSTERS; curr_cluster=curr_cluster+1) begin
|
||||
|
||||
Vortex_Cluster #(.CLUSTER_ID(curr_cluster)) Vortex_Cluster(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.io_valid (per_cluster_io_valid [curr_cluster]),
|
||||
.io_data (per_cluster_io_data [curr_cluster]),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.io_valid (per_cluster_io_valid [curr_cluster]),
|
||||
.io_data (per_cluster_io_data [curr_cluster]),
|
||||
|
||||
.out_dram_req (per_cluster_dram_req [curr_cluster]),
|
||||
.out_dram_req_write (per_cluster_dram_req_write [curr_cluster]),
|
||||
.out_dram_req_read (per_cluster_dram_req_read [curr_cluster]),
|
||||
.out_dram_req_addr (per_cluster_dram_req_addr [curr_cluster]),
|
||||
.out_dram_req_size (per_cluster_dram_req_size [curr_cluster]),
|
||||
.out_dram_req_data (per_cluster_dram_req_data_up [curr_cluster]),
|
||||
.out_dram_expected_lat (per_cluster_dram_expected_lat [curr_cluster]),
|
||||
.out_dram_req_delay (l3c_core_accept),
|
||||
.dram_req_write (per_cluster_dram_req_write [curr_cluster]),
|
||||
.dram_req_read (per_cluster_dram_req_read [curr_cluster]),
|
||||
.dram_req_addr (per_cluster_dram_req_addr [curr_cluster]),
|
||||
.dram_req_data (per_cluster_dram_req_data_up [curr_cluster]),
|
||||
.dram_req_full (l3c_core_req_full),
|
||||
|
||||
.out_dram_fill_accept (per_cluster_dram_fill_accept [curr_cluster]),
|
||||
.out_dram_fill_rsp (per_cluster_dram_fill_rsp [curr_cluster]),
|
||||
.out_dram_fill_rsp_addr(per_cluster_dram_fill_rsp_addr [curr_cluster]),
|
||||
.out_dram_fill_rsp_data(per_cluster_dram_fill_rsp_data_up[curr_cluster]),
|
||||
.dram_rsp_valid (per_cluster_dram_rsp_valid [curr_cluster]),
|
||||
.dram_rsp_addr (per_cluster_dram_rsp_addr [curr_cluster]),
|
||||
.dram_rsp_data (per_cluster_dram_rsp_data_up [curr_cluster]),
|
||||
.dram_rsp_ready (per_cluster_dram_rsp_ready [curr_cluster]),
|
||||
|
||||
.llc_snp_req (snp_fwd),
|
||||
.llc_snp_req_addr (snp_fwd_addr),
|
||||
.llc_snp_req_delay (snp_fwd_delay[curr_cluster]),
|
||||
.llc_snp_req_valid (snp_fwd_valid),
|
||||
.llc_snp_req_addr (snp_fwd_addr),
|
||||
.llc_snp_req_full (snp_fwd_full[curr_cluster]),
|
||||
|
||||
.out_ebreak (per_cluster_out_ebreak [curr_cluster])
|
||||
.out_ebreak (per_cluster_out_ebreak [curr_cluster])
|
||||
);
|
||||
end
|
||||
|
||||
//////////////////// L3 Cache ////////////////////
|
||||
wire[`L3NUM_REQUESTS-1:0] l3c_core_req;
|
||||
wire[`L3NUM_REQUESTS-1:0][2:0] l3c_core_req_mem_write;
|
||||
wire[`L3NUM_REQUESTS-1:0][2:0] l3c_core_req_mem_read;
|
||||
wire[`L3NUM_REQUESTS-1:0][31:0] l3c_core_req_addr;
|
||||
wire[`L3NUM_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_core_req_data;
|
||||
wire[`L3NUM_REQUESTS-1:0][1:0] l3c_core_req_wb;
|
||||
wire[`L3NUM_REQUESTS-1:0] l3c_core_req_valid;
|
||||
wire[`L3NUM_REQUESTS-1:0][2:0] l3c_core_req_mem_write;
|
||||
wire[`L3NUM_REQUESTS-1:0][2:0] l3c_core_req_mem_read;
|
||||
wire[`L3NUM_REQUESTS-1:0][31:0] l3c_core_req_addr;
|
||||
wire[`L3NUM_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_core_req_data;
|
||||
wire[`L3NUM_REQUESTS-1:0][1:0] l3c_core_req_wb;
|
||||
|
||||
wire[`L3NUM_REQUESTS-1:0] l3c_core_no_wb_slot;
|
||||
wire[`L3NUM_REQUESTS-1:0] l3c_core_no_wb_slot;
|
||||
|
||||
wire[`L3NUM_REQUESTS-1:0] l3c_wb;
|
||||
wire[`L3NUM_REQUESTS-1:0] [31:0] l3c_wb_addr;
|
||||
wire[`L3NUM_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_wb_data;
|
||||
wire[`L3NUM_REQUESTS-1:0] l3c_wb;
|
||||
wire[`L3NUM_REQUESTS-1:0] [31:0] l3c_wb_addr;
|
||||
wire[`L3NUM_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_wb_data;
|
||||
|
||||
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_rsp_data_port;
|
||||
|
||||
genvar llb_index;
|
||||
for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin
|
||||
assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index];
|
||||
assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index];
|
||||
end
|
||||
for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin
|
||||
assign dram_req_data [llb_index] = dram_req_data_port[llb_index];
|
||||
assign dram_rsp_data_port[llb_index] = dram_rsp_data[llb_index];
|
||||
end
|
||||
|
||||
//
|
||||
genvar l3c_curr_cluster;
|
||||
for (l3c_curr_cluster = 0; l3c_curr_cluster < `L3NUM_REQUESTS; l3c_curr_cluster=l3c_curr_cluster+1) begin
|
||||
// Core Request
|
||||
assign l3c_core_req [l3c_curr_cluster] = per_cluster_dram_req [l3c_curr_cluster];
|
||||
|
||||
assign l3c_core_req_mem_write [l3c_curr_cluster] = per_cluster_dram_req_write[l3c_curr_cluster] ? `SW_MEM_WRITE : `NO_MEM_WRITE;
|
||||
|
||||
assign l3c_core_req_valid [l3c_curr_cluster] = per_cluster_dram_req_valid[l3c_curr_cluster];
|
||||
assign l3c_core_req_mem_read [l3c_curr_cluster] = per_cluster_dram_req_read [l3c_curr_cluster] ? `LW_MEM_READ : `NO_MEM_READ;
|
||||
|
||||
assign l3c_core_req_mem_write [l3c_curr_cluster] = per_cluster_dram_req_write[l3c_curr_cluster] ? `SW_MEM_WRITE : `NO_MEM_WRITE;
|
||||
assign l3c_core_req_wb [l3c_curr_cluster] = per_cluster_dram_req_read [l3c_curr_cluster] ? 1 : 0;
|
||||
|
||||
assign l3c_core_req_addr [l3c_curr_cluster] = per_cluster_dram_req_addr [l3c_curr_cluster];
|
||||
|
||||
assign l3c_core_req_data [l3c_curr_cluster] = per_cluster_dram_req_data [l3c_curr_cluster];
|
||||
|
||||
// Core can't accept Response
|
||||
assign l3c_core_no_wb_slot [l3c_curr_cluster] = ~per_cluster_dram_fill_accept[l3c_curr_cluster];
|
||||
assign l3c_core_no_wb_slot [l3c_curr_cluster] = ~per_cluster_dram_rsp_ready[l3c_curr_cluster];
|
||||
|
||||
// Cache Fill Response
|
||||
assign per_cluster_dram_fill_rsp [l3c_curr_cluster] = l3c_wb [l3c_curr_cluster];
|
||||
assign per_cluster_dram_fill_rsp_data[l3c_curr_cluster] = l3c_wb_data[l3c_curr_cluster];
|
||||
assign per_cluster_dram_fill_rsp_addr[l3c_curr_cluster] = l3c_wb_addr[l3c_curr_cluster];
|
||||
assign per_cluster_dram_rsp_valid [l3c_curr_cluster] = l3c_wb [l3c_curr_cluster];
|
||||
assign per_cluster_dram_rsp_data [l3c_curr_cluster] = l3c_wb_data [l3c_curr_cluster];
|
||||
assign per_cluster_dram_rsp_addr [l3c_curr_cluster] = l3c_wb_addr [l3c_curr_cluster];
|
||||
end
|
||||
|
||||
wire dram_snp_full;
|
||||
wire dram_req_because_of_wb;
|
||||
VX_cache #(
|
||||
.CACHE_SIZE_BYTES (`L3CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (`L3BANK_LINE_SIZE_BYTES),
|
||||
@@ -230,62 +206,58 @@ module Vortex_Socket (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core Req (DRAM Fills/WB) To L2 Request
|
||||
.core_req_valid (l3c_core_req),
|
||||
.core_req_addr (l3c_core_req_addr),
|
||||
.core_req_writedata({l3c_core_req_data}),
|
||||
.core_req_mem_read (l3c_core_req_mem_read),
|
||||
.core_req_mem_write(l3c_core_req_mem_write),
|
||||
.core_req_rd (0),
|
||||
.core_req_wb (l3c_core_req_wb),
|
||||
.core_req_warp_num (0),
|
||||
.core_req_pc (0),
|
||||
// Core Req (DRAM Fills/WB) To L2 Request
|
||||
.core_req_valid (l3c_core_req_valid),
|
||||
.core_req_mem_read (l3c_core_req_mem_read),
|
||||
.core_req_mem_write (l3c_core_req_mem_write),
|
||||
.core_req_addr (l3c_core_req_addr),
|
||||
.core_req_writedata ({l3c_core_req_data}),
|
||||
.core_req_rd (0),
|
||||
.core_req_wb (l3c_core_req_wb),
|
||||
.core_req_warp_num (0),
|
||||
.core_req_pc (0),
|
||||
|
||||
// L2 can't accept Core Request
|
||||
.delay_req (l3c_core_accept),
|
||||
.delay_req (l3c_core_req_full),
|
||||
|
||||
// Core can't accept L2 Request
|
||||
.core_no_wb_slot (|l3c_core_no_wb_slot),
|
||||
.core_no_wb_slot (|l3c_core_no_wb_slot),
|
||||
|
||||
// Core Writeback
|
||||
.core_wb_valid (l3c_wb),
|
||||
.core_wb_req_rd (),
|
||||
.core_wb_req_wb (),
|
||||
.core_wb_warp_num (),
|
||||
.core_wb_readdata ({l3c_wb_data}),
|
||||
.core_wb_address (l3c_wb_addr),
|
||||
.core_wb_pc (),
|
||||
.core_wb_valid (l3c_wb),
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
.core_wb_req_rd (),
|
||||
.core_wb_req_wb (),
|
||||
.core_wb_warp_num (),
|
||||
.core_wb_pc (),
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
.core_wb_readdata ({l3c_wb_data}),
|
||||
.core_wb_address (l3c_wb_addr),
|
||||
|
||||
// L2 Cache DRAM Fill response
|
||||
.dram_fill_rsp (out_dram_fill_rsp),
|
||||
.dram_fill_rsp_addr(out_dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_data({dram_fill_rsp_data_port}),
|
||||
.dram_rsp_valid (dram_rsp_valid),
|
||||
.dram_rsp_addr (dram_rsp_addr),
|
||||
.dram_rsp_data ({dram_rsp_data_port}),
|
||||
|
||||
// L2 Cache can't accept Fill Response
|
||||
.dram_fill_accept (out_dram_fill_accept),
|
||||
.dram_rsp_ready (dram_rsp_ready),
|
||||
|
||||
// L2 Cache DRAM Fill Request
|
||||
.dram_req (out_dram_req),
|
||||
.dram_req_write (out_dram_req_write),
|
||||
.dram_req_read (out_dram_req_read),
|
||||
.dram_req_addr (out_dram_req_addr),
|
||||
.dram_req_size (out_dram_req_size),
|
||||
.dram_req_data ({dram_req_data_port}),
|
||||
.dram_req_delay (out_dram_req_delay),
|
||||
|
||||
// Snoop Response
|
||||
.dram_req_because_of_wb(dram_req_because_of_wb),
|
||||
.dram_snp_full (dram_snp_full),
|
||||
.dram_req_write (dram_req_write),
|
||||
.dram_req_read (dram_req_read),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data ({dram_req_data_port}),
|
||||
.dram_req_full (dram_req_full),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req (llc_snp_req),
|
||||
.snp_req_addr (llc_snp_req_addr),
|
||||
.snp_req_delay (llc_snp_req_delay),
|
||||
.snp_req_valid (llc_snp_req_valid),
|
||||
.snp_req_addr (llc_snp_req_addr),
|
||||
.snp_req_full (llc_snp_req_full),
|
||||
|
||||
// Snoop Forward
|
||||
.snp_fwd (snp_fwd),
|
||||
.snp_fwd_addr (snp_fwd_addr),
|
||||
.snp_fwd_delay (|snp_fwd_delay)
|
||||
.snp_fwd_valid (snp_fwd_valid),
|
||||
.snp_fwd_addr (snp_fwd_addr),
|
||||
.snp_fwd_full (|snp_fwd_full)
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
@@ -17,29 +17,28 @@ module byte_enabled_simple_dual_port_ram
|
||||
// Thread Byte Bit
|
||||
logic [`NUM_THREADS-1:0][3:0][7:0] GPR[31:0];
|
||||
|
||||
// initial begin
|
||||
// for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] = 0;
|
||||
// end
|
||||
|
||||
integer ini;
|
||||
always @(posedge clk) begin
|
||||
if (we) begin
|
||||
integer thread_ind;
|
||||
for (thread_ind = 0; thread_ind < `NUM_THREADS; thread_ind = thread_ind + 1) begin
|
||||
if (be[thread_ind]) begin
|
||||
GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
|
||||
GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
|
||||
GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
|
||||
GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
|
||||
if (reset) begin
|
||||
//--
|
||||
end else begin
|
||||
if (we) begin
|
||||
integer thread_ind;
|
||||
for (thread_ind = 0; thread_ind < `NUM_THREADS; thread_ind = thread_ind + 1) begin
|
||||
if (be[thread_ind]) begin
|
||||
GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
|
||||
GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
|
||||
GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
|
||||
GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
// $display("^^^^^^^^^^^^^^^^^^^^^^^");
|
||||
// for (regi = 0; regi <= 31; regi = regi + 1) begin
|
||||
// for (threadi = 0; threadi < `NUM_THREADS; threadi = threadi + 1) begin
|
||||
// if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]);
|
||||
// end
|
||||
// end
|
||||
end
|
||||
// $display("^^^^^^^^^^^^^^^^^^^^^^^");
|
||||
// for (regi = 0; regi <= 31; regi = regi + 1) begin
|
||||
// for (threadi = 0; threadi < `NUM_THREADS; threadi = threadi + 1) begin
|
||||
// if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]);
|
||||
// end
|
||||
// end
|
||||
end
|
||||
end
|
||||
|
||||
assign q1 = GPR[raddr1];
|
||||
|
||||
383
hw/rtl/cache/VX_cache_data.v
vendored
383
hw/rtl/cache/VX_cache_data.v
vendored
@@ -1,51 +1,46 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_cache_data
|
||||
#(
|
||||
parameter NUM_IND = 8,
|
||||
parameter NUM_WORDS_PER_BLOCK = 4,
|
||||
parameter TAG_SIZE_START = 0,
|
||||
parameter TAG_SIZE_END = 16,
|
||||
parameter IND_SIZE_START = 0,
|
||||
parameter IND_SIZE_END = 7
|
||||
)
|
||||
(
|
||||
module VX_cache_data #(
|
||||
parameter NUM_IND = 8,
|
||||
parameter NUM_WORDS_PER_BLOCK = 4,
|
||||
parameter TAG_SIZE_START = 0,
|
||||
parameter TAG_SIZE_END = 16,
|
||||
parameter IND_SIZE_START = 0,
|
||||
parameter IND_SIZE_END = 7
|
||||
) (
|
||||
input wire clk, rst, // Clock
|
||||
|
||||
// `ifdef PARAM
|
||||
// Addr
|
||||
input wire[IND_SIZE_END:IND_SIZE_START] addr,
|
||||
// WE
|
||||
input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
|
||||
input wire evict,
|
||||
// Data
|
||||
input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write,
|
||||
input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write,
|
||||
// `ifdef PARAM
|
||||
// Addr
|
||||
input wire[IND_SIZE_END:IND_SIZE_START] addr,
|
||||
// WE
|
||||
input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
|
||||
input wire evict,
|
||||
// Data
|
||||
input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write,
|
||||
input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write,
|
||||
|
||||
output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use,
|
||||
output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
|
||||
output wire valid_use,
|
||||
output wire dirty_use
|
||||
// `else
|
||||
// // Addr
|
||||
// input wire[7:0] addr,
|
||||
// // WE
|
||||
// input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
|
||||
// input wire evict,
|
||||
// // Data
|
||||
// input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data
|
||||
// input wire[16:0] tag_write,
|
||||
|
||||
|
||||
output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use,
|
||||
output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
|
||||
output wire valid_use,
|
||||
output wire dirty_use
|
||||
// `else
|
||||
// // Addr
|
||||
// input wire[7:0] addr,
|
||||
// // WE
|
||||
// input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
|
||||
// input wire evict,
|
||||
// // Data
|
||||
// input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data
|
||||
// input wire[16:0] tag_write,
|
||||
|
||||
|
||||
// output wire[16:0] tag_use,
|
||||
// output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
|
||||
// output wire valid_use,
|
||||
// output wire dirty_use
|
||||
// `endif
|
||||
|
||||
// output wire[16:0] tag_use,
|
||||
// output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
|
||||
// output wire valid_use,
|
||||
// output wire dirty_use
|
||||
// `endif
|
||||
);
|
||||
|
||||
//localparam NUM_BANKS = CACHE_BANKS;
|
||||
//localparam CACHE_BLOCK_PER_BANK = (CACHE_BLOCK / CACHE_BANKS);
|
||||
// localparam NUM_WORDS_PER_BLOCK = CACHE_BLOCK / (CACHE_BANKS*4);
|
||||
@@ -53,179 +48,165 @@ module VX_cache_data
|
||||
|
||||
wire currently_writing = (|we);
|
||||
wire update_dirty = ((!dirty_use) && currently_writing) || (evict);
|
||||
|
||||
wire dirt_new = evict ? 0 : (|we);
|
||||
|
||||
`ifndef SYN
|
||||
// (3:0) 4 bytes
|
||||
reg[NUM_WORDS_PER_BLOCK-1:0][3:0][7:0] data[NUM_IND-1:0]; // Actual Data
|
||||
reg[TAG_SIZE_END:TAG_SIZE_START] tag[NUM_IND-1:0];
|
||||
reg valid[NUM_IND-1:0];
|
||||
reg dirty[NUM_IND-1:0];
|
||||
|
||||
`ifndef SYN
|
||||
// 16 bytes
|
||||
assign data_use = data[addr]; // Read Port
|
||||
assign tag_use = tag[addr];
|
||||
assign valid_use = valid[addr];
|
||||
assign dirty_use = dirty[addr];
|
||||
|
||||
// (3:0) 4 bytes
|
||||
reg[NUM_WORDS_PER_BLOCK-1:0][3:0][7:0] data[NUM_IND-1:0]; // Actual Data
|
||||
reg[TAG_SIZE_END:TAG_SIZE_START] tag[NUM_IND-1:0];
|
||||
reg valid[NUM_IND-1:0];
|
||||
reg dirty[NUM_IND-1:0];
|
||||
integer f;
|
||||
integer ini_ind;
|
||||
always @(posedge clk, posedge rst) begin : update_all
|
||||
if (rst) begin
|
||||
for (ini_ind = 0; ini_ind < NUM_IND; ini_ind=ini_ind+1) begin
|
||||
//data[ini_ind] <= 0;
|
||||
//tag[ini_ind] <= 0;
|
||||
valid[ini_ind] <= 0;
|
||||
//dirty[ini_ind] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (update_dirty) dirty[addr] <= dirt_new; // WRite Port
|
||||
if (evict) tag[addr] <= tag_write;
|
||||
if (evict) valid[addr] <= 1;
|
||||
|
||||
|
||||
// 16 bytes
|
||||
assign data_use = data[addr]; // Read Port
|
||||
assign tag_use = tag[addr];
|
||||
assign valid_use = valid[addr];
|
||||
assign dirty_use = dirty[addr];
|
||||
|
||||
integer f;
|
||||
integer ini_ind;
|
||||
always @(posedge clk, posedge rst) begin : update_all
|
||||
if (rst) begin
|
||||
for (ini_ind = 0; ini_ind < NUM_IND; ini_ind=ini_ind+1) begin
|
||||
//data[ini_ind] <= 0;
|
||||
//tag[ini_ind] <= 0;
|
||||
valid[ini_ind] <= 0;
|
||||
//dirty[ini_ind] <= 0;
|
||||
for (f = 0; f < NUM_WORDS_PER_BLOCK; f = f + 1) begin
|
||||
if (we[f][0]) data[addr][f][0] <= data_write[f][7 :0 ];
|
||||
if (we[f][1]) data[addr][f][1] <= data_write[f][15:8 ];
|
||||
if (we[f][2]) data[addr][f][2] <= data_write[f][23:16];
|
||||
if (we[f][3]) data[addr][f][3] <= data_write[f][31:24];
|
||||
end
|
||||
end else begin
|
||||
if (update_dirty) dirty[addr] <= dirt_new; // WRite Port
|
||||
if (evict) tag[addr] <= tag_write;
|
||||
if (evict) valid[addr] <= 1;
|
||||
|
||||
for (f = 0; f < NUM_WORDS_PER_BLOCK; f = f + 1) begin
|
||||
if (we[f][0]) data[addr][f][0] <= data_write[f][7 :0 ];
|
||||
if (we[f][1]) data[addr][f][1] <= data_write[f][15:8 ];
|
||||
if (we[f][2]) data[addr][f][2] <= data_write[f][23:16];
|
||||
if (we[f][3]) data[addr][f][3] <= data_write[f][31:24];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`else
|
||||
`else
|
||||
|
||||
wire[IND_SIZE_END:IND_SIZE_START] use_addr = addr;
|
||||
wire[IND_SIZE_END:IND_SIZE_START] use_addr = addr;
|
||||
|
||||
wire cena = 1;
|
||||
wire cena = 1;
|
||||
|
||||
wire cenb_d = (|we);
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_d = data_write;
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] write_bit_mask_d;
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_d;
|
||||
genvar cur_b;
|
||||
for (cur_b = 0; cur_b < NUM_WORDS_PER_BLOCK; cur_b=cur_b+1) begin
|
||||
assign write_bit_mask_d[cur_b] = {32{~we[cur_b]}};
|
||||
end
|
||||
assign data_use = data_out_d;
|
||||
wire cenb_d = (|we);
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_d = data_write;
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] write_bit_mask_d;
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_d;
|
||||
genvar cur_b;
|
||||
for (cur_b = 0; cur_b < NUM_WORDS_PER_BLOCK; cur_b=cur_b+1) begin
|
||||
assign write_bit_mask_d[cur_b] = {32{~we[cur_b]}};
|
||||
end
|
||||
assign data_use = data_out_d;
|
||||
|
||||
// Using ASIC MEM
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 data (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(data_out_d),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena),
|
||||
.AA(use_addr),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb_d),
|
||||
.WENB(write_bit_mask_d),
|
||||
.AB(use_addr),
|
||||
.DB(wdata_d),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
wire[16:0] old_tag;
|
||||
wire old_valid;
|
||||
wire old_dirty;
|
||||
|
||||
wire[16:0] new_tag = evict ? tag_write : old_tag;
|
||||
wire new_valid = evict ? 1 : old_valid;
|
||||
wire new_dirty = update_dirty ? dirt_new : old_dirty;
|
||||
|
||||
wire cenb_m = (evict || update_dirty);
|
||||
wire[19-1:0][31:0] write_bit_mask_m = cenb_m ? 19'b0 : 19'b1;
|
||||
|
||||
// Try to fix the error in memory conneciton, modified by Lingjun Zhu on Oct. 28 2019
|
||||
// wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_m = {new_tag, new_dirty, new_valid};
|
||||
// wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_m;
|
||||
|
||||
wire[19-1:0] wdata_m = {new_tag, new_dirty, new_valid};
|
||||
|
||||
wire[19-1:0] data_out_m;
|
||||
|
||||
assign {old_tag, old_dirty, old_valid} = data_out_m;
|
||||
|
||||
|
||||
// Using ASIC MEM
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 data (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(data_out_d),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena),
|
||||
.AA(use_addr),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb_d),
|
||||
.WENB(write_bit_mask_d),
|
||||
.AB(use_addr),
|
||||
.DB(wdata_d),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
assign dirty_use = old_dirty;
|
||||
assign valid_use = old_valid;
|
||||
assign tag_use = old_tag;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
wire[16:0] old_tag;
|
||||
wire old_valid;
|
||||
wire old_dirty;
|
||||
|
||||
wire[16:0] new_tag = evict ? tag_write : old_tag;
|
||||
wire new_valid = evict ? 1 : old_valid;
|
||||
wire new_dirty = update_dirty ? dirt_new : old_dirty;
|
||||
|
||||
|
||||
wire cenb_m = (evict || update_dirty);
|
||||
wire[19-1:0][31:0] write_bit_mask_m = cenb_m ? 19'b0 : 19'b1;
|
||||
|
||||
|
||||
|
||||
// Try to fix the error in memory conneciton, modified by Lingjun Zhu on Oct. 28 2019
|
||||
// wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_m = {new_tag, new_dirty, new_valid};
|
||||
// wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_m;
|
||||
|
||||
wire[19-1:0] wdata_m = {new_tag, new_dirty, new_valid};
|
||||
|
||||
wire[19-1:0] data_out_m;
|
||||
|
||||
assign {old_tag, old_dirty, old_valid} = data_out_m;
|
||||
|
||||
|
||||
assign dirty_use = old_dirty;
|
||||
assign valid_use = old_valid;
|
||||
assign tag_use = old_tag;
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x19_wm0 meta (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
// .WENYB(),
|
||||
.AYB(),
|
||||
.QA(data_out_m),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena),
|
||||
.AA(use_addr),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb_m),
|
||||
// .WENB(write_bit_mask_m),
|
||||
.AB(use_addr),
|
||||
.DB(wdata_m),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
// .TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(19'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
|
||||
`endif
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x19_wm0 meta (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
// .WENYB(),
|
||||
.AYB(),
|
||||
.QA(data_out_m),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena),
|
||||
.AA(use_addr),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb_m),
|
||||
// .WENB(write_bit_mask_m),
|
||||
.AB(use_addr),
|
||||
.DB(wdata_m),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
// .TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(19'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -1,21 +1,19 @@
|
||||
module VX_divide
|
||||
#(
|
||||
parameter WIDTHN=1,
|
||||
parameter WIDTHD=1,
|
||||
parameter NREP="UNSIGNED",
|
||||
parameter DREP="UNSIGNED",
|
||||
parameter SPEED="MIXED", // "MIXED" or "HIGHEST"
|
||||
parameter PIPELINE=0
|
||||
)
|
||||
(
|
||||
input clock, aclr, clken,
|
||||
module VX_divide #(
|
||||
parameter WIDTHN=1,
|
||||
parameter WIDTHD=1,
|
||||
parameter NREP="UNSIGNED",
|
||||
parameter DREP="UNSIGNED",
|
||||
parameter SPEED="MIXED", // "MIXED" or "HIGHEST"
|
||||
parameter PIPELINE=0
|
||||
) (
|
||||
input clock, aclr, clken,
|
||||
|
||||
input [WIDTHN-1:0] numer,
|
||||
input [WIDTHD-1:0] denom,
|
||||
input [WIDTHN-1:0] numer,
|
||||
input [WIDTHD-1:0] denom,
|
||||
|
||||
output reg [WIDTHN-1:0] quotient,
|
||||
output reg [WIDTHD-1:0] remainder
|
||||
);
|
||||
output reg [WIDTHN-1:0] quotient,
|
||||
output reg [WIDTHD-1:0] remainder
|
||||
);
|
||||
|
||||
// synthesis read_comments_as_HDL on
|
||||
// localparam IMPL = "quartus";
|
||||
@@ -27,14 +25,16 @@ module VX_divide
|
||||
|
||||
generate
|
||||
if (NREP != DREP) begin
|
||||
/* verilator lint_off DECLFILENAME */
|
||||
different_nrep_drep_not_yet_supported non_existing_module();
|
||||
/* verilator lint_on DECLFILENAME */
|
||||
end
|
||||
|
||||
if (IMPL == "quartus") begin
|
||||
|
||||
localparam lpm_speed=SPEED == "HIGHEST" ? 9:5;
|
||||
|
||||
lpm_divide#(
|
||||
lpm_divide #(
|
||||
.LPM_WIDTHN(WIDTHN),
|
||||
.LPM_WIDTHD(WIDTHD),
|
||||
.LPM_NREPRESENTATION(NREP),
|
||||
@@ -42,7 +42,7 @@ module VX_divide
|
||||
.LPM_PIPELINE(PIPELINE),
|
||||
.LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator
|
||||
.MAXIMIZE_SPEED(lpm_speed)
|
||||
) quartus_divider(
|
||||
) quartus_divider (
|
||||
.clock(clock),
|
||||
.aclr(aclr),
|
||||
.clken(clken),
|
||||
@@ -51,7 +51,6 @@ module VX_divide
|
||||
.quotient(quotient),
|
||||
.remain(remainder)
|
||||
);
|
||||
|
||||
end
|
||||
else begin
|
||||
|
||||
|
||||
@@ -1,21 +1,19 @@
|
||||
module VX_mult
|
||||
#(
|
||||
parameter WIDTHA=1,
|
||||
parameter WIDTHB=1,
|
||||
parameter WIDTHP=1,
|
||||
parameter REP="UNSIGNED",
|
||||
parameter SPEED="MIXED", // "MIXED" or "HIGHEST"
|
||||
parameter PIPELINE=0,
|
||||
parameter FORCE_LE="NO"
|
||||
)
|
||||
(
|
||||
input clock, aclr, clken,
|
||||
module VX_mult #(
|
||||
parameter WIDTHA=1,
|
||||
parameter WIDTHB=1,
|
||||
parameter WIDTHP=1,
|
||||
parameter REP="UNSIGNED",
|
||||
parameter SPEED="MIXED", // "MIXED" or "HIGHEST"
|
||||
parameter PIPELINE=0,
|
||||
parameter FORCE_LE="NO"
|
||||
) (
|
||||
input clock, aclr, clken,
|
||||
|
||||
input [WIDTHA-1:0] dataa,
|
||||
input [WIDTHB-1:0] datab,
|
||||
input [WIDTHA-1:0] dataa,
|
||||
input [WIDTHB-1:0] datab,
|
||||
|
||||
output reg [WIDTHP-1:0] result
|
||||
);
|
||||
output reg [WIDTHP-1:0] result
|
||||
);
|
||||
|
||||
// synthesis read_comments_as_HDL on
|
||||
// localparam IMPL = "quartus";
|
||||
@@ -29,10 +27,11 @@ module VX_mult
|
||||
|
||||
if (IMPL == "quartus") begin
|
||||
|
||||
localparam lpm_speed=SPEED == "HIGHEST" ? 10:5;
|
||||
localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5;
|
||||
|
||||
if (FORCE_LE == "YES") begin
|
||||
lpm_mult#(
|
||||
/* verilator lint_off DECLFILENAME */
|
||||
lpm_mult #(
|
||||
.LPM_WIDTHA(WIDTHA),
|
||||
.LPM_WIDTHB(WIDTHB),
|
||||
.LPM_WIDTHP(WIDTHP),
|
||||
@@ -40,7 +39,7 @@ module VX_mult
|
||||
.LPM_PIPELINE(PIPELINE),
|
||||
.DSP_BLOCK_BALANCING("LOGIC ELEMENTS"),
|
||||
.MAXIMIZE_SPEED(lpm_speed)
|
||||
) quartus_mult(
|
||||
) quartus_mult (
|
||||
.clock(clock),
|
||||
.aclr(aclr),
|
||||
.clken(clken),
|
||||
@@ -48,6 +47,7 @@ module VX_mult
|
||||
.datab(datab),
|
||||
.result(result)
|
||||
);
|
||||
/* verilator lint_on DECLFILENAME */
|
||||
end
|
||||
else begin
|
||||
lpm_mult#(
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
`include "VX_cache_config.vh"
|
||||
`include "VX_define.vh"
|
||||
module VX_bank
|
||||
#(
|
||||
module VX_bank #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
@@ -17,8 +16,7 @@ module VX_bank
|
||||
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
|
||||
parameter FUNC_ID = 0,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
@@ -28,7 +26,7 @@ module VX_bank
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
@@ -43,12 +41,9 @@ module VX_bank
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
// Dram knobs
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
@@ -77,30 +72,29 @@ module VX_bank
|
||||
output wire [31:0] bank_wb_address,
|
||||
|
||||
// Dram Fill Requests
|
||||
output wire dram_fill_req,
|
||||
output wire dram_fill_req_valid,
|
||||
output wire[31:0] dram_fill_req_addr,
|
||||
output wire dram_because_of_snp,
|
||||
output wire dram_snp_full,
|
||||
output wire dram_fill_req_is_snp,
|
||||
input wire dram_fill_req_queue_full,
|
||||
|
||||
// Dram Fill Response
|
||||
input wire dram_fill_rsp,
|
||||
input wire [31:0] dram_fill_addr,
|
||||
input wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_fill_rsp_data,
|
||||
output wire dram_fill_accept,
|
||||
input wire dram_rsp_valid,
|
||||
input wire [31:0] dram_rsp_addr,
|
||||
input wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_rsp_data,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// Dram WB Requests
|
||||
input wire dram_wb_queue_pop,
|
||||
output wire dram_wb_req,
|
||||
output wire dram_wb_req_valid,
|
||||
output wire[31:0] dram_wb_req_addr,
|
||||
output wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_wb_req_data,
|
||||
|
||||
// Snp Request
|
||||
input wire snp_req,
|
||||
input wire snp_req_valid,
|
||||
input wire[31:0] snp_req_addr,
|
||||
output wire snrq_full,
|
||||
output wire snp_req_full,
|
||||
|
||||
output wire snp_fwd,
|
||||
output wire snp_fwd_valid,
|
||||
output wire[31:0] snp_fwd_addr,
|
||||
input wire snp_fwd_pop
|
||||
);
|
||||
@@ -111,7 +105,7 @@ module VX_bank
|
||||
if (reset) begin
|
||||
snoop_state <= 0;
|
||||
end else begin
|
||||
snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID));
|
||||
snoop_state <= (snoop_state | snp_req_valid) && ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID));
|
||||
end
|
||||
end
|
||||
|
||||
@@ -123,16 +117,20 @@ module VX_bank
|
||||
wire[31:0] snrq_addr_st0;
|
||||
|
||||
assign snrq_valid_st0 = !snrq_empty;
|
||||
VX_generic_queue_ll #(.DATAW(32), .SIZE(SNRQ_SIZE)) snr_queue(
|
||||
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW(32),
|
||||
.SIZE(SNRQ_SIZE)
|
||||
) snr_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (snp_req),
|
||||
.push (snp_req_valid),
|
||||
.in_data (snp_req_addr),
|
||||
.pop (snrq_pop),
|
||||
.out_data(snrq_addr_st0),
|
||||
.empty (snrq_empty),
|
||||
.full (snrq_full)
|
||||
);
|
||||
.full (snp_req_full)
|
||||
);
|
||||
|
||||
wire dfpq_pop;
|
||||
wire dfpq_empty;
|
||||
@@ -140,13 +138,16 @@ module VX_bank
|
||||
wire[31:0] dfpq_addr_st0;
|
||||
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dfpq_filldata_st0;
|
||||
|
||||
assign dram_fill_accept = !dfpq_full;
|
||||
assign dram_rsp_ready = !dfpq_full;
|
||||
|
||||
VX_generic_queue_ll #(.DATAW(32+(`BANK_LINE_WORDS*`WORD_SIZE)), .SIZE(DFPQ_SIZE)) dfp_queue(
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW(32+(`BANK_LINE_WORDS*`WORD_SIZE)),
|
||||
.SIZE(DFPQ_SIZE)
|
||||
) dfp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (dram_fill_rsp),
|
||||
.in_data ({dram_fill_addr, dram_fill_rsp_data}),
|
||||
.push (dram_rsp_valid),
|
||||
.in_data ({dram_rsp_addr, dram_rsp_data}),
|
||||
.pop (dfpq_pop),
|
||||
.out_data({dfpq_addr_st0, dfpq_filldata_st0}),
|
||||
.empty (dfpq_empty),
|
||||
@@ -186,9 +187,7 @@ module VX_bank
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
req_queue
|
||||
(
|
||||
) req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
// Enqueue
|
||||
@@ -217,7 +216,7 @@ module VX_bank
|
||||
.reqq_req_pc_st0 (reqq_req_pc_st0),
|
||||
.reqq_empty (reqq_empty),
|
||||
.reqq_full (reqq_full)
|
||||
);
|
||||
);
|
||||
|
||||
wire mrvq_pop;
|
||||
wire mrvq_full;
|
||||
@@ -265,9 +264,7 @@ module VX_bank
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
mrvq_queue
|
||||
(
|
||||
) mrvq_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
// Enqueue
|
||||
@@ -300,35 +297,39 @@ module VX_bank
|
||||
.miss_resrv_warp_num_st0 (mrvq_warp_num_st0),
|
||||
.miss_resrv_mem_read_st0 (mrvq_mem_read_st0),
|
||||
.miss_resrv_mem_write_st0(mrvq_mem_write_st0)
|
||||
);
|
||||
);
|
||||
|
||||
wire stall_bank_pipe;
|
||||
reg is_fill_in_pipe;
|
||||
|
||||
wire valid_st1 [STAGE_1_CYCLES-1:0];
|
||||
wire is_fill_st1 [STAGE_1_CYCLES-1:0];
|
||||
/* verilator lint_off UNUSED */
|
||||
wire going_to_write_st1[STAGE_1_CYCLES-1:0];
|
||||
/* verilator lint_on UNUSED */
|
||||
wire [31:0] addr_st1 [STAGE_1_CYCLES-1:0];
|
||||
|
||||
integer p_stage;
|
||||
always @(*) begin
|
||||
is_fill_in_pipe = 0;
|
||||
for (p_stage = 0; p_stage < STAGE_1_CYCLES; p_stage=p_stage+1) begin
|
||||
if (is_fill_st1[p_stage]) is_fill_in_pipe = 1;
|
||||
if (is_fill_st1[p_stage]) begin
|
||||
is_fill_in_pipe = 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (is_fill_st2) is_fill_in_pipe = 1;
|
||||
if (is_fill_st2) begin
|
||||
is_fill_in_pipe = 1;
|
||||
end
|
||||
end
|
||||
|
||||
// assign is_fill_in_pipe = (|is_fill_st1) || is_fill_st2;
|
||||
// assign is_fill_in_pipe = (|is_fill_st1) || is_fill_st2;
|
||||
|
||||
assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe;
|
||||
assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe;
|
||||
assign reqq_pop = !mrvq_stop && !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !is_fill_in_pipe;
|
||||
assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && snrq_valid_st0 && !stall_bank_pipe;
|
||||
|
||||
integer st1_cycle;
|
||||
|
||||
wire qual_is_fill_st0;
|
||||
wire qual_valid_st0;
|
||||
wire [31:0] qual_addr_st0;
|
||||
@@ -384,13 +385,15 @@ module VX_bank
|
||||
reqq_pop ? reqq_req_writeword_st0 :
|
||||
0;
|
||||
|
||||
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_c0 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_bank_pipe),
|
||||
.flush(0),
|
||||
.in ({qual_is_snp , qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0, qual_pc_st0 }),
|
||||
.out ({is_snp_st1[0], going_to_write_st1[0] , valid_st1[0] , addr_st1[0] , writeword_st1[0] , inst_meta_st1[0] , is_fill_st1[0] , writedata_st1[0] , pc_st1[0]})
|
||||
VX_generic_register #(
|
||||
.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)
|
||||
) s0_1_c0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_bank_pipe),
|
||||
.flush (0),
|
||||
.in ({qual_is_snp , qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0, qual_pc_st0 }),
|
||||
.out ({is_snp_st1[0], going_to_write_st1[0] , valid_st1[0] , addr_st1[0] , writeword_st1[0] , inst_meta_st1[0] , is_fill_st1[0] , writedata_st1[0] , pc_st1[0]})
|
||||
);
|
||||
|
||||
genvar curr_stage;
|
||||
@@ -414,14 +417,14 @@ module VX_bank
|
||||
wire miss_st1e;
|
||||
wire dirty_st1e;
|
||||
wire[31:0] pc_st1e;
|
||||
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire [4:0] rd_st1e;
|
||||
wire [1:0] wb_st1e;
|
||||
wire [`NW_BITS-1:0] warp_num_st1e;
|
||||
wire [2:0] mem_read_st1e;
|
||||
wire [2:0] mem_write_st1e;
|
||||
wire [`LOG2UP(NUM_REQUESTS)-1:0] tid_st1e;
|
||||
/* verilator lint_on UNUSED */
|
||||
wire [2:0] mem_read_st1e;
|
||||
wire [2:0] mem_write_st1e;
|
||||
wire fill_saw_dirty_st1e;
|
||||
wire is_snp_st1e;
|
||||
|
||||
@@ -429,7 +432,6 @@ module VX_bank
|
||||
assign pc_st1e = pc_st1[STAGE_1_CYCLES-1];
|
||||
assign {rd_st1e, wb_st1e, warp_num_st1e, mem_read_st1e, mem_write_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||
|
||||
|
||||
VX_tag_data_access #(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
@@ -448,9 +450,7 @@ module VX_bank
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
VX_tag_data_access
|
||||
(
|
||||
) vx_tag_data_access (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_bank_pipe),
|
||||
@@ -494,16 +494,16 @@ module VX_bank
|
||||
wire is_snp_st2;
|
||||
wire [31:0] pc_st2;
|
||||
|
||||
|
||||
VX_generic_register #(.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)) st_1e_2 (
|
||||
VX_generic_register #(
|
||||
.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)
|
||||
) st_1e_2 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_bank_pipe),
|
||||
.flush(0),
|
||||
.in ({is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, pc_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
|
||||
.out ({is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , pc_st2 , inst_meta_st2 })
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
wire should_flush;
|
||||
wire dwbq_push;
|
||||
@@ -520,7 +520,6 @@ module VX_bank
|
||||
assign miss_add_data = writeword_st2;
|
||||
assign {miss_add_rd, miss_add_wb, miss_add_warp_num, miss_add_mem_read, miss_add_mem_write, miss_add_tid} = inst_meta_st2;
|
||||
|
||||
|
||||
// Enqueue to CWB Queue
|
||||
wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `L2FUNC_ID) && (miss_add_wb == 0)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
wire [`WORD_SIZE_RNG] cwbq_data = readword_st2;
|
||||
@@ -532,7 +531,10 @@ module VX_bank
|
||||
|
||||
wire cwbq_empty;
|
||||
assign bank_wb_valid = !cwbq_empty;
|
||||
VX_generic_queue_ll #(.DATAW( `LOG2UP(NUM_REQUESTS) + 5 + 2 + (`NW_BITS-1+1) + `WORD_SIZE + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue(
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW( `LOG2UP(NUM_REQUESTS) + 5 + 2 + (`NW_BITS-1+1) + `WORD_SIZE + 32 + 32),
|
||||
.SIZE(CWBQ_SIZE)
|
||||
) cwb_queue(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -543,13 +545,13 @@ module VX_bank
|
||||
.out_data({bank_wb_tid, bank_wb_rd, bank_wb_wb, bank_wb_warp_num, bank_wb_data, bank_wb_pc, bank_wb_address}),
|
||||
.empty (cwbq_empty),
|
||||
.full (cwbq_full)
|
||||
);
|
||||
);
|
||||
|
||||
assign should_flush = snoop_state && valid_st2 && (miss_add_mem_write != `NO_MEM_WRITE) && !is_snp_st2 && !is_fill_st2;
|
||||
// Enqueue to DWB Queue
|
||||
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
wire[31:0] dwbq_req_addr;
|
||||
wire dwbq_empty;
|
||||
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
wire[31:0] dwbq_req_addr;
|
||||
wire dwbq_empty;
|
||||
|
||||
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dwbq_req_data;
|
||||
if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
@@ -560,10 +562,9 @@ module VX_bank
|
||||
assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK;
|
||||
end
|
||||
|
||||
|
||||
|
||||
wire possible_fill = valid_st2 && miss_st2 && !dram_fill_req_queue_full && !is_snp_st2;
|
||||
wire[31:0] fill_invalidator_addr = addr_st2 & `BASE_ADDR_MASK;
|
||||
|
||||
VX_fill_invalidator #(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
@@ -581,9 +582,7 @@ module VX_bank
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
VX_fill_invalidator
|
||||
(
|
||||
) vx_fill_invalidator (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.possible_fill (possible_fill),
|
||||
@@ -591,16 +590,19 @@ module VX_bank
|
||||
.fill_addr (fill_invalidator_addr),
|
||||
|
||||
.invalidate_fill (invalidate_fill)
|
||||
);
|
||||
);
|
||||
|
||||
// Enqueu in dram_fill_req
|
||||
assign dram_fill_req = possible_fill && !invalidate_fill;
|
||||
assign dram_because_of_snp = is_snp_st2 && valid_st2 && miss_st2;
|
||||
assign dram_snp_full = snrq_full && snp_req;
|
||||
assign dram_fill_req_addr = addr_st2 & `BASE_ADDR_MASK;
|
||||
// Enqueue in dram_fill_req
|
||||
assign dram_fill_req_valid = possible_fill && !invalidate_fill;
|
||||
assign dram_fill_req_is_snp = is_snp_st2 && valid_st2 && miss_st2;
|
||||
assign dram_fill_req_addr = addr_st2 & `BASE_ADDR_MASK;
|
||||
|
||||
assign dram_wb_req = !dwbq_empty;
|
||||
VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_WORDS * `WORD_SIZE)), .SIZE(DWBQ_SIZE)) dwb_queue(
|
||||
assign dram_wb_req_valid = !dwbq_empty;
|
||||
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW( 32 + (`BANK_LINE_WORDS * `WORD_SIZE)),
|
||||
.SIZE(DWBQ_SIZE)
|
||||
) dwb_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -611,14 +613,18 @@ module VX_bank
|
||||
.out_data({dram_wb_req_addr, dram_wb_req_data}),
|
||||
.empty (dwbq_empty),
|
||||
.full (dwbq_full)
|
||||
);
|
||||
);
|
||||
|
||||
wire snp_fwd_push;
|
||||
wire ffsq_empty;
|
||||
|
||||
assign snp_fwd_push = is_snp_st2 && valid_st2 && !ffsq_full && !(((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
assign snp_fwd = !ffsq_empty;
|
||||
VX_generic_queue_ll #(.DATAW(32), .SIZE(FFSQ_SIZE)) ffs_queue(
|
||||
assign snp_fwd_push = is_snp_st2 && valid_st2 && !ffsq_full && !(((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
assign snp_fwd_valid = !ffsq_empty;
|
||||
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW(32),
|
||||
.SIZE(FFSQ_SIZE)
|
||||
) ffs_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (snp_fwd_push),
|
||||
@@ -627,7 +633,7 @@ module VX_bank
|
||||
.out_data({snp_fwd_addr}),
|
||||
.empty (ffsq_empty),
|
||||
.full (ffsq_full)
|
||||
);
|
||||
);
|
||||
|
||||
assign stall_bank_pipe = (is_snp_st2 && valid_st2 && ffsq_full) || ((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full);
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache
|
||||
#(
|
||||
module VX_cache #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
@@ -17,7 +16,7 @@ module VX_cache
|
||||
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
|
||||
parameter FUNC_ID = 3,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
@@ -28,7 +27,7 @@ module VX_cache
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
@@ -47,21 +46,18 @@ module VX_cache
|
||||
parameter PRFQ_SIZE = 64,
|
||||
parameter PRFQ_STRIDE = 0,
|
||||
|
||||
// Dram knobs
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Req Info
|
||||
// Req Info
|
||||
input wire [NUM_REQUESTS-1:0] core_req_valid,
|
||||
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] core_req_writedata,
|
||||
input wire[NUM_REQUESTS-1:0][2:0] core_req_mem_read,
|
||||
input wire[NUM_REQUESTS-1:0][2:0] core_req_mem_write,
|
||||
input wire [NUM_REQUESTS-1:0][2:0] core_req_mem_read,
|
||||
input wire [NUM_REQUESTS-1:0][2:0] core_req_mem_write,
|
||||
|
||||
// Req meta
|
||||
input wire [4:0] core_req_rd,
|
||||
@@ -80,39 +76,31 @@ module VX_cache
|
||||
output wire [NUM_REQUESTS-1:0][31:0] core_wb_pc,
|
||||
output wire [NUM_REQUESTS-1:0][31:0] core_wb_address,
|
||||
|
||||
|
||||
// Dram Fill Response
|
||||
input wire dram_fill_rsp,
|
||||
input wire [31:0] dram_fill_rsp_addr,
|
||||
input wire [`IBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data,
|
||||
output wire dram_fill_accept,
|
||||
input wire dram_rsp_valid,
|
||||
input wire [31:0] dram_rsp_addr,
|
||||
input wire [`IBANK_LINE_WORDS-1:0][31:0] dram_rsp_data,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// Dram request
|
||||
output wire dram_req,
|
||||
output wire dram_req_write,
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [31:0] dram_req_size,
|
||||
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
|
||||
output wire dram_req_because_of_wb,
|
||||
input wire dram_req_delay,
|
||||
|
||||
output wire dram_snp_full,
|
||||
|
||||
input wire dram_req_full,
|
||||
|
||||
|
||||
// Snoop Req
|
||||
input wire snp_req,
|
||||
input wire[31:0] snp_req_addr,
|
||||
output wire snp_req_delay,
|
||||
input wire snp_req_valid,
|
||||
input wire [31:0] snp_req_addr,
|
||||
output wire snp_req_full,
|
||||
|
||||
// Snoop Forward
|
||||
output wire snp_fwd,
|
||||
output wire[31:0] snp_fwd_addr,
|
||||
input wire snp_fwd_delay
|
||||
|
||||
output wire snp_fwd_valid,
|
||||
output wire [31:0] snp_fwd_addr,
|
||||
input wire snp_fwd_full
|
||||
);
|
||||
|
||||
|
||||
wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids;
|
||||
wire [NUM_BANKS-1:0] per_bank_wb_pop;
|
||||
wire [NUM_BANKS-1:0] per_bank_wb_valid;
|
||||
@@ -124,104 +112,90 @@ module VX_cache
|
||||
wire [NUM_BANKS-1:0][31:0] per_bank_wb_pc;
|
||||
wire [NUM_BANKS-1:0][31:0] per_bank_wb_address;
|
||||
|
||||
|
||||
wire dfqq_full;
|
||||
wire[NUM_BANKS-1:0] per_bank_dram_fill_req;
|
||||
wire[NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr;
|
||||
wire[NUM_BANKS-1:0] per_bank_dram_fill_accept;
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid;
|
||||
wire [NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_fill_req_is_snp;
|
||||
/* verilator lint_on UNUSED */
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_rsp_ready;
|
||||
|
||||
wire[NUM_BANKS-1:0] per_bank_dram_wb_queue_pop;
|
||||
wire[NUM_BANKS-1:0] per_bank_dram_wb_req;
|
||||
wire[NUM_BANKS-1:0] per_bank_dram_because_of_snp;
|
||||
wire[NUM_BANKS-1:0][31:0] per_bank_dram_wb_req_addr;
|
||||
wire[NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data;
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop;
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid;
|
||||
wire [NUM_BANKS-1:0][31:0] per_bank_dram_wb_req_addr;
|
||||
wire [NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data;
|
||||
|
||||
wire[NUM_BANKS-1:0] per_bank_reqq_full;
|
||||
|
||||
wire[NUM_BANKS-1:0] per_bank_snrq_full;
|
||||
|
||||
wire[NUM_BANKS-1:0] per_bank_snp_fwd;
|
||||
wire[NUM_BANKS-1:0][31:0] per_bank_snp_fwd_addr;
|
||||
wire[NUM_BANKS-1:0] per_bank_snp_fwd_pop;
|
||||
wire [NUM_BANKS-1:0] per_bank_reqq_full;
|
||||
wire [NUM_BANKS-1:0] per_bank_snrq_full;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_snp_fwd;
|
||||
wire [NUM_BANKS-1:0][31:0] per_bank_snp_fwd_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_snp_fwd_pop;
|
||||
|
||||
assign delay_req = (|per_bank_reqq_full);
|
||||
assign snp_req_full = (|per_bank_snrq_full);
|
||||
|
||||
|
||||
assign snp_req_delay = (|per_bank_snrq_full);
|
||||
|
||||
|
||||
// assign dram_fill_accept = (NUM_BANKS == 1) ? per_bank_dram_fill_accept[0] : per_bank_dram_fill_accept[dram_fill_rsp_addr[`BANK_SELECT_ADDR_RNG]];
|
||||
assign dram_fill_accept = (|per_bank_dram_fill_accept);
|
||||
// assign dram_rsp_ready = (NUM_BANKS == 1) ? per_bank_dram_rsp_ready[0] : per_bank_dram_rsp_ready[dram_rsp_addr[`BANK_SELECT_ADDR_RNG]];
|
||||
assign dram_rsp_ready = (|per_bank_dram_rsp_ready);
|
||||
|
||||
VX_cache_dram_req_arb #(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE_BYTES (WORD_SIZE_BYTES),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.PRFQ_SIZE (PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (PRFQ_STRIDE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
VX_cache_dram_req_arb
|
||||
(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dfqq_full (dfqq_full),
|
||||
.per_bank_dram_fill_req (per_bank_dram_fill_req),
|
||||
.per_bank_dram_fill_req_addr(per_bank_dram_fill_req_addr),
|
||||
.per_bank_dram_wb_queue_pop (per_bank_dram_wb_queue_pop),
|
||||
.per_bank_dram_wb_req (per_bank_dram_wb_req),
|
||||
.per_bank_dram_because_of_snp(per_bank_dram_because_of_snp),
|
||||
.per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr),
|
||||
.per_bank_dram_wb_req_data (per_bank_dram_wb_req_data),
|
||||
.dram_req (dram_req),
|
||||
.dram_req_write (dram_req_write),
|
||||
.dram_req_read (dram_req_read),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_size (dram_req_size),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_because_of_wb (dram_req_because_of_wb),
|
||||
.dram_req_delay (dram_req_delay)
|
||||
);
|
||||
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE_BYTES (WORD_SIZE_BYTES),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.PRFQ_SIZE (PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (PRFQ_STRIDE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES (SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) vx_cache_dram_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dfqq_full (dfqq_full),
|
||||
.per_bank_dram_fill_req_valid(per_bank_dram_fill_req_valid),
|
||||
.per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr),
|
||||
.per_bank_dram_wb_queue_pop (per_bank_dram_wb_queue_pop),
|
||||
.per_bank_dram_wb_req_valid (per_bank_dram_wb_req_valid),
|
||||
.per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr),
|
||||
.per_bank_dram_wb_req_data (per_bank_dram_wb_req_data),
|
||||
.dram_req_read (dram_req_read),
|
||||
.dram_req_write (dram_req_write),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_full (dram_req_full)
|
||||
);
|
||||
|
||||
VX_cache_core_req_bank_sel #(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE_BYTES (WORD_SIZE_BYTES),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
VX_cache_core_req_bank_sell
|
||||
(
|
||||
.core_req_valid (core_req_valid),
|
||||
.core_req_addr (core_req_addr),
|
||||
.per_bank_valids(per_bank_valids)
|
||||
);
|
||||
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE_BYTES (WORD_SIZE_BYTES),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES (SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) vx_cache_core_req_bank_sell (
|
||||
.core_req_valid (core_req_valid),
|
||||
.core_req_addr (core_req_addr),
|
||||
.per_bank_valids (per_bank_valids)
|
||||
);
|
||||
|
||||
VX_cache_wb_sel_merge #(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
@@ -241,9 +215,7 @@ module VX_cache
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
VX_cache_core_wb_sel_merge
|
||||
(
|
||||
) vx_cache_core_wb_sel_merge (
|
||||
.per_bank_wb_valid (per_bank_wb_valid),
|
||||
.per_bank_wb_tid (per_bank_wb_tid),
|
||||
.per_bank_wb_rd (per_bank_wb_rd),
|
||||
@@ -262,28 +234,27 @@ module VX_cache
|
||||
.core_wb_readdata (core_wb_readdata),
|
||||
.core_wb_address (core_wb_address),
|
||||
.core_wb_pc (core_wb_pc)
|
||||
);
|
||||
|
||||
|
||||
|
||||
);
|
||||
|
||||
// Snoop Forward Logic
|
||||
VX_snp_fwd_arb #(.NUM_BANKS(NUM_BANKS)) VX_snp_fwd_arb(
|
||||
VX_snp_fwd_arb #(
|
||||
.NUM_BANKS(NUM_BANKS)
|
||||
) vx_snp_fwd_arb(
|
||||
.per_bank_snp_fwd (per_bank_snp_fwd),
|
||||
.per_bank_snp_fwd_addr(per_bank_snp_fwd_addr),
|
||||
.per_bank_snp_fwd_pop (per_bank_snp_fwd_pop),
|
||||
.snp_fwd (snp_fwd),
|
||||
.snp_fwd_valid (snp_fwd_valid),
|
||||
.snp_fwd_addr (snp_fwd_addr),
|
||||
.snp_fwd_delay (snp_fwd_delay)
|
||||
);
|
||||
.snp_fwd_full (snp_fwd_full)
|
||||
);
|
||||
|
||||
// Snoop Forward Logic
|
||||
|
||||
genvar curr_bank;
|
||||
generate
|
||||
for (curr_bank = 0; curr_bank < NUM_BANKS; curr_bank=curr_bank+1) begin
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_valids;
|
||||
wire [NUM_REQUESTS-1:0][31:0] curr_bank_addr;
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_valids;
|
||||
wire [NUM_REQUESTS-1:0][31:0] curr_bank_addr;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] curr_bank_writedata;
|
||||
wire [4:0] curr_bank_rd;
|
||||
wire [NUM_REQUESTS-1:0][1:0] curr_bank_wb;
|
||||
@@ -294,7 +265,7 @@ module VX_cache
|
||||
|
||||
wire curr_bank_wb_pop;
|
||||
wire curr_bank_wb_valid;
|
||||
wire [`LOG2UP(NUM_REQUESTS)-1:0] curr_bank_wb_tid;
|
||||
wire [`LOG2UP(NUM_REQUESTS)-1:0] curr_bank_wb_tid;
|
||||
wire [31:0] curr_bank_wb_pc;
|
||||
wire [4:0] curr_bank_wb_rd;
|
||||
wire [1:0] curr_bank_wb_wb;
|
||||
@@ -302,19 +273,18 @@ module VX_cache
|
||||
wire [`WORD_SIZE_RNG] curr_bank_wb_data;
|
||||
wire [31:0] curr_bank_wb_address;
|
||||
|
||||
wire curr_bank_dram_fill_rsp;
|
||||
wire [31:0] curr_bank_dram_fill_rsp_addr;
|
||||
wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_fill_rsp_data;
|
||||
wire curr_bank_dram_fill_accept;
|
||||
wire curr_bank_dram_rsp_valid;
|
||||
wire [31:0] curr_bank_dram_rsp_addr;
|
||||
wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_rsp_data;
|
||||
wire curr_bank_dram_rsp_ready;
|
||||
|
||||
wire curr_bank_dfqq_full;
|
||||
wire curr_bank_dram_fill_req;
|
||||
wire curr_bank_dram_because_of_snp;
|
||||
wire curr_bank_dram_snp_full;
|
||||
wire curr_bank_dram_fill_req_valid;
|
||||
wire curr_bank_dram_fill_req_is_snp;
|
||||
wire[31:0] curr_bank_dram_fill_req_addr;
|
||||
|
||||
wire curr_bank_dram_wb_queue_pop;
|
||||
wire curr_bank_dram_wb_req;
|
||||
wire curr_bank_dram_wb_req_valid;
|
||||
wire[31:0] curr_bank_dram_wb_req_addr;
|
||||
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_wb_req_data;
|
||||
|
||||
@@ -326,9 +296,7 @@ module VX_cache
|
||||
wire curr_bank_snp_fwd;
|
||||
wire[31:0] curr_bank_snp_fwd_addr;
|
||||
wire curr_bank_snp_fwd_pop;
|
||||
wire curr_bank_snrq_full;
|
||||
|
||||
|
||||
wire curr_bank_snp_req_full;
|
||||
|
||||
// Core Req
|
||||
assign curr_bank_valids = per_bank_valids[curr_bank];
|
||||
@@ -354,56 +322,53 @@ module VX_cache
|
||||
assign per_bank_wb_address [curr_bank] = curr_bank_wb_address;
|
||||
|
||||
// Dram fill request
|
||||
assign curr_bank_dfqq_full = dfqq_full;
|
||||
assign per_bank_dram_fill_req[curr_bank] = curr_bank_dram_fill_req;
|
||||
assign per_bank_dram_fill_req_addr[curr_bank] = curr_bank_dram_fill_req_addr;
|
||||
assign curr_bank_dfqq_full = dfqq_full;
|
||||
assign per_bank_dram_fill_req_valid[curr_bank] = curr_bank_dram_fill_req_valid;
|
||||
assign per_bank_dram_fill_req_addr[curr_bank] = curr_bank_dram_fill_req_addr;
|
||||
assign per_bank_dram_fill_req_is_snp[curr_bank] = curr_bank_dram_fill_req_is_snp;
|
||||
|
||||
// Dram fill response
|
||||
assign curr_bank_dram_fill_rsp = (NUM_BANKS == 1) || (dram_fill_rsp && (curr_bank_dram_fill_rsp_addr[`BANK_SELECT_ADDR_RNG] == curr_bank));
|
||||
assign curr_bank_dram_fill_rsp_addr = dram_fill_rsp_addr;
|
||||
assign curr_bank_dram_fill_rsp_data = dram_fill_rsp_data;
|
||||
assign per_bank_dram_fill_accept[curr_bank] = curr_bank_dram_fill_accept;
|
||||
assign curr_bank_dram_rsp_valid = (NUM_BANKS == 1) || (dram_rsp_valid && (curr_bank_dram_rsp_addr[`BANK_SELECT_ADDR_RNG] == curr_bank));
|
||||
assign curr_bank_dram_rsp_addr = dram_rsp_addr;
|
||||
assign curr_bank_dram_rsp_data = dram_rsp_data;
|
||||
assign per_bank_dram_rsp_ready[curr_bank] = curr_bank_dram_rsp_ready;
|
||||
|
||||
// Dram writeback request
|
||||
assign curr_bank_dram_wb_queue_pop = per_bank_dram_wb_queue_pop[curr_bank];
|
||||
assign per_bank_dram_wb_req[curr_bank] = curr_bank_dram_wb_req;
|
||||
assign per_bank_dram_because_of_snp[curr_bank] = curr_bank_dram_because_of_snp;
|
||||
assign per_bank_dram_wb_req_valid[curr_bank] = curr_bank_dram_wb_req_valid;
|
||||
assign per_bank_dram_wb_req_addr[curr_bank] = curr_bank_dram_wb_req_addr;
|
||||
assign per_bank_dram_wb_req_data[curr_bank] = curr_bank_dram_wb_req_data;
|
||||
|
||||
// Snoop Request
|
||||
assign curr_bank_snp_req = snp_req && (snp_req_addr[`BANK_SELECT_ADDR_RNG] == curr_bank);
|
||||
assign curr_bank_snp_req_addr = snp_req_addr;
|
||||
assign per_bank_snrq_full[curr_bank] = curr_bank_snrq_full;
|
||||
assign curr_bank_snp_req = snp_req_valid && (snp_req_addr[`BANK_SELECT_ADDR_RNG] == curr_bank);
|
||||
assign curr_bank_snp_req_addr = snp_req_addr;
|
||||
assign per_bank_snrq_full[curr_bank] = curr_bank_snp_req_full;
|
||||
|
||||
// Snoop Fwd
|
||||
assign curr_bank_snp_fwd_pop = per_bank_snp_fwd_pop[curr_bank];
|
||||
assign per_bank_snp_fwd[curr_bank] = curr_bank_snp_fwd;
|
||||
assign per_bank_snp_fwd_addr[curr_bank] = curr_bank_snp_fwd_addr;
|
||||
|
||||
|
||||
VX_bank #(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE_BYTES (WORD_SIZE_BYTES),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.FUNC_ID (FUNC_ID),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FFSQ_SIZE (FFSQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
bank
|
||||
(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE_BYTES (WORD_SIZE_BYTES),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.FUNC_ID (FUNC_ID),
|
||||
.REQQ_SIZE (REQQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FFSQ_SIZE (FFSQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) bank (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
// Core req
|
||||
@@ -431,39 +396,35 @@ module VX_cache
|
||||
.bank_wb_address (curr_bank_wb_address),
|
||||
|
||||
// Dram fill req
|
||||
.dram_fill_req (curr_bank_dram_fill_req),
|
||||
.dram_fill_req_valid (curr_bank_dram_fill_req_valid),
|
||||
.dram_fill_req_addr (curr_bank_dram_fill_req_addr),
|
||||
.dram_fill_req_is_snp (curr_bank_dram_fill_req_is_snp),
|
||||
.dram_fill_req_queue_full(curr_bank_dfqq_full),
|
||||
|
||||
// Dram fill rsp
|
||||
.dram_fill_rsp (curr_bank_dram_fill_rsp),
|
||||
.dram_fill_addr (curr_bank_dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_data (curr_bank_dram_fill_rsp_data),
|
||||
.dram_fill_accept (curr_bank_dram_fill_accept),
|
||||
.dram_rsp_valid (curr_bank_dram_rsp_valid),
|
||||
.dram_rsp_addr (curr_bank_dram_rsp_addr),
|
||||
.dram_rsp_data (curr_bank_dram_rsp_data),
|
||||
.dram_rsp_ready (curr_bank_dram_rsp_ready),
|
||||
|
||||
// Dram writeback
|
||||
.dram_wb_queue_pop (curr_bank_dram_wb_queue_pop),
|
||||
.dram_wb_req (curr_bank_dram_wb_req),
|
||||
.dram_wb_req_valid (curr_bank_dram_wb_req_valid),
|
||||
.dram_wb_req_addr (curr_bank_dram_wb_req_addr),
|
||||
.dram_wb_req_data (curr_bank_dram_wb_req_data),
|
||||
.dram_because_of_snp (curr_bank_dram_because_of_snp),
|
||||
.dram_snp_full (curr_bank_dram_snp_full),
|
||||
.dram_wb_req_data (curr_bank_dram_wb_req_data),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req (curr_bank_snp_req),
|
||||
.snp_req_valid (curr_bank_snp_req),
|
||||
.snp_req_addr (curr_bank_snp_req_addr),
|
||||
.snrq_full (curr_bank_snrq_full),
|
||||
.snp_req_full (curr_bank_snp_req_full),
|
||||
|
||||
// Snoop Fwd
|
||||
.snp_fwd (curr_bank_snp_fwd),
|
||||
.snp_fwd_valid (curr_bank_snp_fwd),
|
||||
.snp_fwd_addr (curr_bank_snp_fwd_addr),
|
||||
.snp_fwd_pop (curr_bank_snp_fwd_pop)
|
||||
|
||||
);
|
||||
|
||||
);
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
||||
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -9,24 +9,24 @@
|
||||
// 5 + 2 + 4 + 3 + 3 + 1
|
||||
`define REQ_INST_META_SIZE (5 + 2 + (`NW_BITS-1+1) + 3 + 3 + `LOG2UP(NUM_REQUESTS))
|
||||
|
||||
`define WORD_SIZE (8*WORD_SIZE_BYTES)
|
||||
`define WORD_SIZE (8 * WORD_SIZE_BYTES)
|
||||
`define WORD_SIZE_RNG (`WORD_SIZE)-1:0
|
||||
|
||||
// 128
|
||||
`define BANK_SIZE_BYTES CACHE_SIZE_BYTES/NUM_BANKS
|
||||
`define BANK_SIZE_BYTES (CACHE_SIZE_BYTES / NUM_BANKS)
|
||||
|
||||
// 8
|
||||
`define BANK_LINE_COUNT (`BANK_SIZE_BYTES/BANK_LINE_SIZE_BYTES)
|
||||
`define BANK_LINE_COUNT (`BANK_SIZE_BYTES / BANK_LINE_SIZE_BYTES)
|
||||
// 4
|
||||
`define BANK_LINE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES)
|
||||
|
||||
// Offset is fixed
|
||||
`define OFFSET_ADDR_NUM_BITS 2
|
||||
`define OFFSET_SIZE_END 1
|
||||
`define OFFSET_ADDR_START 0
|
||||
`define OFFSET_ADDR_END 1
|
||||
`define OFFSET_ADDR_RNG `OFFSET_ADDR_END:`OFFSET_ADDR_START
|
||||
`define OFFSET_SIZE_RNG `OFFSET_SIZE_END:0
|
||||
`define OFFSET_ADDR_NUM_BITS 2
|
||||
`define OFFSET_SIZE_END 1
|
||||
`define OFFSET_ADDR_START 0
|
||||
`define OFFSET_ADDR_END 1
|
||||
`define OFFSET_ADDR_RNG `OFFSET_ADDR_END:`OFFSET_ADDR_START
|
||||
`define OFFSET_SIZE_RNG `OFFSET_SIZE_END:0
|
||||
|
||||
// 2
|
||||
`define WORD_SELECT_NUM_BITS (`LOG2UP(`BANK_LINE_WORDS))
|
||||
@@ -55,17 +55,14 @@
|
||||
|
||||
// 3
|
||||
`define LINE_SELECT_NUM_BITS (`LOG2UP(`BANK_LINE_COUNT))
|
||||
// 3
|
||||
`define LINE_SELECT_SIZE_END (`LINE_SELECT_NUM_BITS)
|
||||
// 7
|
||||
`define LINE_SELECT_ADDR_START (1+`BANK_SELECT_ADDR_END)
|
||||
// 9
|
||||
`define LINE_SELECT_ADDR_END (`LINE_SELECT_SIZE_END+`LINE_SELECT_ADDR_START-1)
|
||||
`define LINE_SELECT_ADDR_END (`LINE_SELECT_NUM_BITS+`LINE_SELECT_ADDR_START-1)
|
||||
// 9:7
|
||||
`define LINE_SELECT_ADDR_RNG `LINE_SELECT_ADDR_END:`LINE_SELECT_ADDR_START
|
||||
// 2:0
|
||||
`define LINE_SELECT_SIZE_RNG `LINE_SELECT_SIZE_END-1:0
|
||||
|
||||
`define LINE_SELECT_SIZE_RNG `LINE_SELECT_NUM_BITS-1:0
|
||||
|
||||
// 10
|
||||
`define TAG_SELECT_ADDR_START (1+`LINE_SELECT_ADDR_END)
|
||||
@@ -76,9 +73,10 @@
|
||||
// 22
|
||||
`define TAG_SELECT_SIZE_END (`TAG_SELECT_NUM_BITS)
|
||||
// 21:0
|
||||
`define TAG_SELECT_SIZE_RNG `TAG_SELECT_SIZE_END-1:0
|
||||
`define TAG_SELECT_SIZE_RNG `TAG_SELECT_NUM_BITS-1:0
|
||||
|
||||
`define TAG_LINE_SELECT_BITS (`TAG_SELECT_NUM_BITS+`LINE_SELECT_NUM_BITS)
|
||||
|
||||
`define BASE_ADDR_MASK (~((1<<(`WORD_SELECT_ADDR_END+1))-1))
|
||||
|
||||
`endif
|
||||
|
||||
|
||||
@@ -54,8 +54,6 @@ module VX_cache_core_req_bank_sel
|
||||
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids
|
||||
);
|
||||
|
||||
wire[31:0] req_address;
|
||||
|
||||
generate
|
||||
integer curr_req;
|
||||
always @(*) begin
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_dfq_queue
|
||||
#(
|
||||
module VX_cache_dfq_queue #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
@@ -15,8 +14,7 @@ module VX_cache_dfq_queue
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
@@ -26,7 +24,7 @@ module VX_cache_dfq_queue
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
@@ -39,16 +37,13 @@ module VX_cache_dfq_queue
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
// Dram knobs
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire dfqq_push,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_fill_req,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr,
|
||||
|
||||
input wire dfqq_pop,
|
||||
@@ -61,17 +56,14 @@ module VX_cache_dfq_queue
|
||||
wire[NUM_BANKS-1:0] out_per_bank_dram_fill_req;
|
||||
wire[NUM_BANKS-1:0][31:0] out_per_bank_dram_fill_req_addr;
|
||||
|
||||
|
||||
reg [NUM_BANKS-1:0] use_per_bank_dram_fill_req;
|
||||
reg [NUM_BANKS-1:0][31:0] use_per_bank_dram_fill_req_addr;
|
||||
|
||||
|
||||
wire[NUM_BANKS-1:0] qual_bank_dram_fill_req;
|
||||
wire[NUM_BANKS-1:0][31:0] qual_bank_dram_fill_req_addr;
|
||||
|
||||
wire[NUM_BANKS-1:0] updated_bank_dram_fill_req;
|
||||
|
||||
|
||||
wire o_empty;
|
||||
|
||||
wire use_empty = !(|use_per_bank_dram_fill_req);
|
||||
@@ -79,27 +71,34 @@ module VX_cache_dfq_queue
|
||||
|
||||
wire push_qual = dfqq_push && !dfqq_full;
|
||||
wire pop_qual = dfqq_pop && use_empty && !out_empty;
|
||||
VX_generic_queue_ll #(.DATAW(NUM_BANKS * (1+32)), .SIZE(DFQQ_SIZE)) dfqq_queue(
|
||||
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW(NUM_BANKS * (1+32)),
|
||||
.SIZE(DFQQ_SIZE)
|
||||
) dfqq_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push_qual),
|
||||
.in_data ({per_bank_dram_fill_req, per_bank_dram_fill_req_addr}),
|
||||
.in_data ({per_bank_dram_fill_req_valid, per_bank_dram_fill_req_addr}),
|
||||
.pop (pop_qual),
|
||||
.out_data({out_per_bank_dram_fill_req, out_per_bank_dram_fill_req_addr}),
|
||||
.empty (o_empty),
|
||||
.full (dfqq_full)
|
||||
);
|
||||
);
|
||||
|
||||
assign qual_bank_dram_fill_req = use_empty ? (out_per_bank_dram_fill_req & {NUM_BANKS{!o_empty}}) : (use_per_bank_dram_fill_req & {NUM_BANKS{!use_empty}});
|
||||
assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr;
|
||||
|
||||
wire[`LOG2UP(NUM_BANKS)-1:0] qual_request_index;
|
||||
wire qual_has_request;
|
||||
VX_generic_priority_encoder #(.N(NUM_BANKS)) VX_sel_bank(
|
||||
wire qual_has_request;
|
||||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
) vx_sel_bank (
|
||||
.valids(qual_bank_dram_fill_req),
|
||||
.index (qual_request_index),
|
||||
.found (qual_has_request)
|
||||
);
|
||||
);
|
||||
|
||||
assign dfqq_empty = !qual_has_request;
|
||||
assign dfqq_req = qual_bank_dram_fill_req [qual_request_index];
|
||||
@@ -119,5 +118,4 @@ module VX_cache_dfq_queue
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,7 +1,6 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_dram_req_arb
|
||||
#(
|
||||
module VX_cache_dram_req_arb #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
@@ -15,7 +14,7 @@ module VX_cache_dram_req_arb
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
@@ -26,7 +25,7 @@ module VX_cache_dram_req_arb
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
@@ -45,39 +44,29 @@ module VX_cache_dram_req_arb
|
||||
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
||||
// Fill Request
|
||||
output wire dfqq_full,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_fill_req,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr,
|
||||
|
||||
// DFQ Request
|
||||
output wire[NUM_BANKS-1:0] per_bank_dram_wb_queue_pop,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_wb_req,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_dram_wb_req_addr,
|
||||
input wire[NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_because_of_snp,
|
||||
|
||||
// real Dram request
|
||||
output wire dram_req,
|
||||
output wire dram_req_write,
|
||||
output wire dram_req_read,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [31:0] dram_req_size,
|
||||
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
|
||||
output wire dram_req_because_of_wb,
|
||||
|
||||
input wire dram_req_delay
|
||||
output wire dfqq_full,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr,
|
||||
|
||||
);
|
||||
// DFQ Request
|
||||
output wire[NUM_BANKS-1:0] per_bank_dram_wb_queue_pop,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_wb_req_valid,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_dram_wb_req_addr,
|
||||
input wire[NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
|
||||
|
||||
// real Dram request
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
|
||||
|
||||
input wire dram_req_full
|
||||
);
|
||||
|
||||
wire pref_pop;
|
||||
wire pref_valid;
|
||||
@@ -86,66 +75,62 @@ module VX_cache_dram_req_arb
|
||||
wire dwb_valid;
|
||||
wire dfqq_req;
|
||||
|
||||
assign pref_pop = !dwb_valid && !dfqq_req && !dram_req_delay && pref_valid;
|
||||
assign pref_pop = !dwb_valid && !dfqq_req && !dram_req_full && pref_valid;
|
||||
VX_prefetcher #(
|
||||
.PRFQ_SIZE (PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (PRFQ_STRIDE),
|
||||
.BANK_LINE_SIZE_BYTES(BANK_LINE_SIZE_BYTES),
|
||||
.WORD_SIZE_BYTES (WORD_SIZE_BYTES)
|
||||
)
|
||||
prfqq
|
||||
(
|
||||
) prfqq (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.dram_req (dram_req && dram_req_read),
|
||||
.dram_req (dram_req_read),
|
||||
.dram_req_addr(dram_req_addr),
|
||||
|
||||
.pref_pop (pref_pop),
|
||||
.pref_valid (pref_valid),
|
||||
.pref_addr (pref_addr)
|
||||
|
||||
|
||||
);
|
||||
);
|
||||
|
||||
wire[31:0] dfqq_req_addr;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire dfqq_empty;
|
||||
wire dfqq_pop = !dwb_valid && dfqq_req && !dram_req_delay; // If no dwb, and dfqq has valids, then pop
|
||||
wire dfqq_push = (|per_bank_dram_fill_req);
|
||||
/* verilator lint_on UNUSED */
|
||||
wire dfqq_pop = !dwb_valid && dfqq_req && !dram_req_full; // If no dwb, and dfqq has valids, then pop
|
||||
wire dfqq_push = (|per_bank_dram_fill_req_valid);
|
||||
|
||||
VX_cache_dfq_queue VX_cache_dfq_queue(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dfqq_push (dfqq_push),
|
||||
.per_bank_dram_fill_req (per_bank_dram_fill_req),
|
||||
.per_bank_dram_fill_req_addr(per_bank_dram_fill_req_addr),
|
||||
.dfqq_pop (dfqq_pop),
|
||||
.dfqq_req (dfqq_req),
|
||||
.dfqq_req_addr (dfqq_req_addr),
|
||||
.dfqq_empty (dfqq_empty),
|
||||
.dfqq_full (dfqq_full)
|
||||
);
|
||||
VX_cache_dfq_queue vx_cache_dfq_queue(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dfqq_push (dfqq_push),
|
||||
.per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid),
|
||||
.per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr),
|
||||
.dfqq_pop (dfqq_pop),
|
||||
.dfqq_req (dfqq_req),
|
||||
.dfqq_req_addr (dfqq_req_addr),
|
||||
.dfqq_empty (dfqq_empty),
|
||||
.dfqq_full (dfqq_full)
|
||||
);
|
||||
|
||||
wire[`LOG2UP(NUM_BANKS)-1:0] dwb_bank;
|
||||
// wire[NUM_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req | per_bank_dram_because_of_snp;
|
||||
wire[NUM_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req;
|
||||
VX_generic_priority_encoder #(.N(NUM_BANKS)) VX_sel_dwb(
|
||||
|
||||
wire[NUM_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req_valid;
|
||||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
) vx_sel_dwb (
|
||||
.valids(use_wb_valid),
|
||||
.index (dwb_bank),
|
||||
.found (dwb_valid)
|
||||
);
|
||||
);
|
||||
|
||||
assign per_bank_dram_wb_queue_pop = dram_req_full ? 0 : use_wb_valid & ((1 << dwb_bank));
|
||||
|
||||
assign per_bank_dram_wb_queue_pop = dram_req_delay ? 0 : use_wb_valid & ((1 << dwb_bank));
|
||||
|
||||
|
||||
assign dram_req = dwb_valid || dfqq_req || pref_pop;
|
||||
assign dram_req_write = dwb_valid && dram_req;
|
||||
assign dram_req_read = ((dfqq_req && !dwb_valid) || pref_pop) && dram_req;
|
||||
assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : (dfqq_req ? dfqq_req_addr : pref_addr)) & `BASE_ADDR_MASK;
|
||||
assign dram_req_size = BANK_LINE_SIZE_BYTES;
|
||||
assign {dram_req_data} = dwb_valid ? {per_bank_dram_wb_req_data[dwb_bank] }: 0;
|
||||
// assign dram_req_because_of_wb = dwb_valid ? per_bank_dram_because_of_snp[dwb_bank] : 0;
|
||||
assign dram_req_because_of_wb = 0;
|
||||
wire dram_req = dwb_valid || dfqq_req || pref_pop;
|
||||
assign dram_req_read = ((dfqq_req && !dwb_valid) || pref_pop) && dram_req;
|
||||
assign dram_req_write = dwb_valid && dram_req;
|
||||
assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : (dfqq_req ? dfqq_req_addr : pref_addr)) & `BASE_ADDR_MASK;
|
||||
assign {dram_req_data} = dwb_valid ? {per_bank_dram_wb_req_data[dwb_bank] }: 0;
|
||||
|
||||
endmodule
|
||||
@@ -1,8 +1,7 @@
|
||||
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_miss_resrv
|
||||
#(
|
||||
module VX_cache_miss_resrv #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
@@ -16,8 +15,7 @@ module VX_cache_miss_resrv
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
@@ -27,7 +25,7 @@ module VX_cache_miss_resrv
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
@@ -40,12 +38,9 @@ module VX_cache_miss_resrv
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
// Dram knobs
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
@@ -65,7 +60,11 @@ module VX_cache_miss_resrv
|
||||
|
||||
// Broadcast Fill
|
||||
input wire is_fill_st1,
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
// TODO: should fix this
|
||||
input wire[31:0] fill_addr_st1,
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
// Miss dequeue
|
||||
input wire miss_resrv_pop,
|
||||
@@ -81,96 +80,91 @@ module VX_cache_miss_resrv
|
||||
output wire[2:0] miss_resrv_mem_write_st0
|
||||
|
||||
);
|
||||
// Size of metadata = 32 + `LOG2UP(NUM_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1)
|
||||
reg [`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0];
|
||||
reg [MRVQ_SIZE-1:0][31:0] addr_table;
|
||||
reg [MRVQ_SIZE-1:0][31:0] pc_table;
|
||||
reg [MRVQ_SIZE-1:0] valid_table;
|
||||
reg [MRVQ_SIZE-1:0] ready_table;
|
||||
reg [`LOG2UP(MRVQ_SIZE)-1:0] head_ptr;
|
||||
reg [`LOG2UP(MRVQ_SIZE)-1:0] tail_ptr;
|
||||
|
||||
// Size of metadata = 32 + `LOG2UP(NUM_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1)
|
||||
reg[`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0];
|
||||
reg[MRVQ_SIZE-1:0][31:0] addr_table;
|
||||
reg[MRVQ_SIZE-1:0][31:0] pc_table;
|
||||
reg[MRVQ_SIZE-1:0] valid_table;
|
||||
reg[MRVQ_SIZE-1:0] ready_table;
|
||||
reg[`LOG2UP(MRVQ_SIZE)-1:0] head_ptr;
|
||||
reg[`LOG2UP(MRVQ_SIZE)-1:0] tail_ptr;
|
||||
reg [31:0] size;
|
||||
|
||||
reg[31:0] size;
|
||||
// assign miss_resrv_full = (MRVQ_SIZE != 2) && (tail_ptr+1) == head_ptr;
|
||||
assign miss_resrv_full = (MRVQ_SIZE != 2) && (size == MRVQ_SIZE );
|
||||
assign miss_resrv_stop = (MRVQ_SIZE != 2) && (size > (MRVQ_SIZE-5));
|
||||
|
||||
wire enqueue_possible = !miss_resrv_full;
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
|
||||
// assign miss_resrv_full = (MRVQ_SIZE != 2) && (tail_ptr+1) == head_ptr;
|
||||
assign miss_resrv_full = (MRVQ_SIZE != 2) && (size == MRVQ_SIZE );
|
||||
assign miss_resrv_stop = (MRVQ_SIZE != 2) && (size > (MRVQ_SIZE-5));
|
||||
reg [MRVQ_SIZE-1:0] make_ready;
|
||||
genvar curr_e;
|
||||
generate
|
||||
for (curr_e = 0; curr_e < MRVQ_SIZE; curr_e=curr_e+1) begin
|
||||
assign make_ready[curr_e] = is_fill_st1 && valid_table[curr_e]
|
||||
&& addr_table[curr_e][31:`LINE_SELECT_ADDR_START] == fill_addr_st1[31:`LINE_SELECT_ADDR_START];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire enqueue_possible = !miss_resrv_full;
|
||||
wire[`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
wire dequeue_possible = valid_table[head_ptr] && ready_table[head_ptr];
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = head_ptr;
|
||||
|
||||
reg[MRVQ_SIZE-1:0] make_ready;
|
||||
genvar curr_e;
|
||||
generate
|
||||
for (curr_e = 0; curr_e < MRVQ_SIZE; curr_e=curr_e+1) begin
|
||||
assign make_ready[curr_e] = is_fill_st1 && valid_table[curr_e]
|
||||
&& addr_table[curr_e][31:`LINE_SELECT_ADDR_START] == fill_addr_st1[31:`LINE_SELECT_ADDR_START];
|
||||
assign miss_resrv_valid_st0 = (MRVQ_SIZE != 2) && dequeue_possible;
|
||||
assign miss_resrv_pc_st0 = pc_table[dequeue_index];
|
||||
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
|
||||
assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_rd_st0, miss_resrv_wb_st0, miss_resrv_warp_num_st0, miss_resrv_mem_read_st0, miss_resrv_mem_write_st0} = metadata_table[dequeue_index];
|
||||
|
||||
wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2);
|
||||
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
|
||||
|
||||
wire update_ready = (|make_ready);
|
||||
integer i;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (i = 0; i < MRVQ_SIZE; i=i+1) begin
|
||||
metadata_table[i] <= 0;
|
||||
end
|
||||
valid_table <= 0;
|
||||
ready_table <= 0;
|
||||
addr_table <= 0;
|
||||
pc_table <= 0;
|
||||
size <= 0;
|
||||
head_ptr <= 0;
|
||||
tail_ptr <= 0;
|
||||
end else begin
|
||||
if (mrvq_push) begin
|
||||
valid_table[enqueue_index] <= 1;
|
||||
ready_table[enqueue_index] <= 0;
|
||||
pc_table[enqueue_index] <= miss_add_pc;
|
||||
addr_table[enqueue_index] <= miss_add_addr;
|
||||
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_rd, miss_add_wb, miss_add_warp_num, miss_add_mem_read, miss_add_mem_write};
|
||||
tail_ptr <= tail_ptr + 1;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
if (update_ready) begin
|
||||
ready_table <= ready_table | make_ready;
|
||||
end
|
||||
|
||||
wire dequeue_possible = valid_table[head_ptr] && ready_table[head_ptr];
|
||||
wire[`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = head_ptr;
|
||||
if (mrvq_pop) begin
|
||||
valid_table[dequeue_index] <= 0;
|
||||
ready_table[dequeue_index] <= 0;
|
||||
addr_table[dequeue_index] <= 0;
|
||||
metadata_table[dequeue_index] <= 0;
|
||||
pc_table[dequeue_index] <= 0;
|
||||
head_ptr <= head_ptr + 1;
|
||||
end
|
||||
|
||||
assign miss_resrv_valid_st0 = (MRVQ_SIZE != 2) && dequeue_possible;
|
||||
assign miss_resrv_pc_st0 = pc_table[dequeue_index];
|
||||
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
|
||||
assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_rd_st0, miss_resrv_wb_st0, miss_resrv_warp_num_st0, miss_resrv_mem_read_st0, miss_resrv_mem_write_st0} = metadata_table[dequeue_index];
|
||||
|
||||
|
||||
wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2);
|
||||
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
|
||||
|
||||
wire update_ready = (|make_ready);
|
||||
integer i;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (i = 0; i < MRVQ_SIZE; i=i+1) metadata_table[i] <= 0;
|
||||
valid_table <= 0;
|
||||
ready_table <= 0;
|
||||
addr_table <= 0;
|
||||
pc_table <= 0;
|
||||
size <= 0;
|
||||
head_ptr <= 0;
|
||||
tail_ptr <= 0;
|
||||
end else begin
|
||||
if (!(mrvq_push && mrvq_pop)) begin
|
||||
if (mrvq_push) begin
|
||||
valid_table[enqueue_index] <= 1;
|
||||
ready_table[enqueue_index] <= 0;
|
||||
pc_table[enqueue_index] <= miss_add_pc;
|
||||
addr_table[enqueue_index] <= miss_add_addr;
|
||||
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_rd, miss_add_wb, miss_add_warp_num, miss_add_mem_read, miss_add_mem_write};
|
||||
tail_ptr <= tail_ptr + 1;
|
||||
end
|
||||
|
||||
if (update_ready) begin
|
||||
ready_table <= ready_table | make_ready;
|
||||
size <= size + 1;
|
||||
end
|
||||
|
||||
if (mrvq_pop) begin
|
||||
valid_table[dequeue_index] <= 0;
|
||||
ready_table[dequeue_index] <= 0;
|
||||
addr_table[dequeue_index] <= 0;
|
||||
metadata_table[dequeue_index] <= 0;
|
||||
pc_table[dequeue_index] <= 0;
|
||||
head_ptr <= head_ptr + 1;
|
||||
size <= size - 1;
|
||||
end
|
||||
|
||||
if (!(mrvq_push && mrvq_pop)) begin
|
||||
if (mrvq_push) begin
|
||||
size <= size + 1;
|
||||
end
|
||||
|
||||
if (mrvq_pop) begin
|
||||
size <= size - 1;
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,7 +1,6 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_req_queue
|
||||
#(
|
||||
module VX_cache_req_queue #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
@@ -15,8 +14,7 @@ module VX_cache_req_queue
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
@@ -26,7 +24,7 @@ module VX_cache_req_queue
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
@@ -39,12 +37,9 @@ module VX_cache_req_queue
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
// Dram knobs
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
@@ -88,7 +83,6 @@ module VX_cache_req_queue
|
||||
wire [NUM_REQUESTS-1:0][2:0] out_per_mem_write;
|
||||
wire [31:0] out_per_pc;
|
||||
|
||||
|
||||
reg [NUM_REQUESTS-1:0] use_per_valids;
|
||||
reg [NUM_REQUESTS-1:0][31:0] use_per_addr;
|
||||
reg [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] use_per_writedata;
|
||||
@@ -99,7 +93,6 @@ module VX_cache_req_queue
|
||||
reg [NUM_REQUESTS-1:0][2:0] use_per_mem_read;
|
||||
reg [NUM_REQUESTS-1:0][2:0] use_per_mem_write;
|
||||
|
||||
|
||||
wire [NUM_REQUESTS-1:0] qual_valids;
|
||||
wire [NUM_REQUESTS-1:0][31:0] qual_addr;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] qual_writedata;
|
||||
@@ -110,7 +103,9 @@ module VX_cache_req_queue
|
||||
wire [NUM_REQUESTS-1:0][2:0] qual_mem_write;
|
||||
wire [31:0] qual_pc;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
reg [NUM_REQUESTS-1:0] updated_valids;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
wire o_empty;
|
||||
|
||||
@@ -120,17 +115,19 @@ module VX_cache_req_queue
|
||||
wire push_qual = reqq_push && !reqq_full;
|
||||
wire pop_qual = !out_empty && use_empty;
|
||||
|
||||
VX_generic_queue_ll #(.DATAW( (NUM_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUM_REQUESTS*2) + (`NW_BITS-1+1) + (NUM_REQUESTS * (3 + 3)) + 32 ), .SIZE(REQQ_SIZE)) reqq_queue(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push_qual),
|
||||
.in_data ({bank_valids , bank_addr , bank_writedata , bank_rd , bank_wb , bank_warp_num , bank_mem_read , bank_mem_write , bank_pc}),
|
||||
.pop (pop_qual),
|
||||
.out_data({out_per_valids, out_per_addr, out_per_writedata, out_per_rd, out_per_wb, out_per_warp_num, out_per_mem_read, out_per_mem_write, out_per_pc}),
|
||||
.empty (o_empty),
|
||||
.full (reqq_full)
|
||||
);
|
||||
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW( (NUM_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUM_REQUESTS*2) + (`NW_BITS-1+1) + (NUM_REQUESTS * (3 + 3)) + 32 ),
|
||||
.SIZE(REQQ_SIZE)
|
||||
) reqq_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push_qual),
|
||||
.in_data ({bank_valids , bank_addr , bank_writedata , bank_rd , bank_wb , bank_warp_num , bank_mem_read , bank_mem_write , bank_pc}),
|
||||
.pop (pop_qual),
|
||||
.out_data ({out_per_valids, out_per_addr, out_per_writedata, out_per_rd, out_per_wb, out_per_warp_num, out_per_mem_read, out_per_mem_write, out_per_pc}),
|
||||
.empty (o_empty),
|
||||
.full (reqq_full)
|
||||
);
|
||||
|
||||
wire[NUM_REQUESTS-1:0] real_out_per_valids = out_per_valids & {NUM_REQUESTS{~out_empty}};
|
||||
|
||||
@@ -146,11 +143,13 @@ module VX_cache_req_queue
|
||||
|
||||
wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index;
|
||||
wire qual_has_request;
|
||||
VX_generic_priority_encoder #(.N(NUM_REQUESTS)) VX_sel_bank(
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_REQUESTS)
|
||||
) vx_sel_bank (
|
||||
.valids(qual_valids),
|
||||
.index (qual_request_index),
|
||||
.found (qual_has_request)
|
||||
);
|
||||
);
|
||||
|
||||
assign reqq_empty = !qual_has_request;
|
||||
assign reqq_req_st0 = qual_has_request;
|
||||
@@ -164,7 +163,6 @@ module VX_cache_req_queue
|
||||
assign reqq_req_mem_write_st0 = qual_mem_write[qual_request_index];
|
||||
assign reqq_req_pc_st0 = qual_pc;
|
||||
|
||||
|
||||
always @(*) begin
|
||||
updated_valids = qual_valids;
|
||||
if (qual_has_request) begin
|
||||
@@ -172,7 +170,6 @@ module VX_cache_req_queue
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
use_per_valids <= 0;
|
||||
@@ -204,5 +201,4 @@ module VX_cache_req_queue
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,7 +1,6 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_wb_sel_merge
|
||||
#(
|
||||
module VX_cache_wb_sel_merge #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
@@ -17,8 +16,7 @@ module VX_cache_wb_sel_merge
|
||||
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
|
||||
parameter FUNC_ID = 0,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
@@ -28,7 +26,7 @@ module VX_cache_wb_sel_merge
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
@@ -41,35 +39,29 @@ module VX_cache_wb_sel_merge
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
// Dram knobs
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
|
||||
) (
|
||||
// Per Bank WB
|
||||
input wire [NUM_BANKS-1:0] per_bank_wb_valid,
|
||||
input wire [NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_wb_tid,
|
||||
input wire [NUM_BANKS-1:0][4:0] per_bank_wb_rd,
|
||||
input wire [NUM_BANKS-1:0][1:0] per_bank_wb_wb,
|
||||
input wire [NUM_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num,
|
||||
input wire [NUM_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data,
|
||||
input wire [NUM_BANKS-1:0][31:0] per_bank_wb_pc,
|
||||
input wire [NUM_BANKS-1:0][31:0] per_bank_wb_address,
|
||||
output wire [NUM_BANKS-1:0] per_bank_wb_pop,
|
||||
|
||||
input wire [NUM_BANKS-1:0] per_bank_wb_valid,
|
||||
input wire [NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_wb_tid,
|
||||
input wire [NUM_BANKS-1:0][4:0] per_bank_wb_rd,
|
||||
input wire [NUM_BANKS-1:0][1:0] per_bank_wb_wb,
|
||||
input wire [NUM_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num,
|
||||
input wire [NUM_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data,
|
||||
input wire [NUM_BANKS-1:0][31:0] per_bank_wb_pc,
|
||||
input wire [NUM_BANKS-1:0][31:0] per_bank_wb_address,
|
||||
output wire [NUM_BANKS-1:0] per_bank_wb_pop,
|
||||
|
||||
// Core Writeback
|
||||
input wire core_no_wb_slot,
|
||||
output reg [NUM_REQUESTS-1:0] core_wb_valid,
|
||||
output reg [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] core_wb_readdata,
|
||||
output reg [NUM_REQUESTS-1:0][31:0] core_wb_pc,
|
||||
output wire [4:0] core_wb_req_rd,
|
||||
output wire [1:0] core_wb_req_wb,
|
||||
output wire [`NW_BITS-1:0] core_wb_warp_num,
|
||||
output reg [NUM_REQUESTS-1:0][31:0] core_wb_address
|
||||
|
||||
input wire core_no_wb_slot,
|
||||
output reg [NUM_REQUESTS-1:0] core_wb_valid,
|
||||
output reg [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] core_wb_readdata,
|
||||
output reg [NUM_REQUESTS-1:0][31:0] core_wb_pc,
|
||||
output wire [4:0] core_wb_req_rd,
|
||||
output wire [1:0] core_wb_req_wb,
|
||||
output wire [`NW_BITS-1:0] core_wb_warp_num,
|
||||
output reg [NUM_REQUESTS-1:0][31:0] core_wb_address
|
||||
);
|
||||
|
||||
reg [NUM_BANKS-1:0] per_bank_wb_pop_unqual;
|
||||
@@ -83,15 +75,16 @@ module VX_cache_wb_sel_merge
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
|
||||
wire [`LOG2UP(NUM_BANKS)-1:0] main_bank_index;
|
||||
wire found_bank;
|
||||
wire found_bank;
|
||||
|
||||
VX_generic_priority_encoder #(.N(NUM_BANKS)) VX_sel_bank(
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
) vx_sel_bank (
|
||||
.valids(per_bank_wb_valid),
|
||||
.index (main_bank_index),
|
||||
.found (found_bank)
|
||||
);
|
||||
);
|
||||
|
||||
assign core_wb_req_rd = per_bank_wb_rd[main_bank_index];
|
||||
assign core_wb_req_wb = per_bank_wb_wb[main_bank_index];
|
||||
@@ -106,42 +99,36 @@ module VX_cache_wb_sel_merge
|
||||
core_wb_address = 0;
|
||||
for (this_bank = 0; this_bank < NUM_BANKS; this_bank = this_bank + 1) begin
|
||||
if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
|
||||
if (found_bank
|
||||
&& !core_wb_valid[per_bank_wb_tid[this_bank]]
|
||||
&& per_bank_wb_valid[this_bank]
|
||||
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(this_bank))
|
||||
|| (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
end
|
||||
|
||||
if (found_bank
|
||||
&& !core_wb_valid[per_bank_wb_tid[this_bank]]
|
||||
&& per_bank_wb_valid[this_bank]
|
||||
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(this_bank))
|
||||
|| (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
end
|
||||
end else begin
|
||||
|
||||
if (((main_bank_index == `LOG2UP(NUM_BANKS)'(this_bank))
|
||||
|| (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))
|
||||
&& found_bank
|
||||
&& !core_wb_valid[per_bank_wb_tid[this_bank]]
|
||||
&& (per_bank_wb_valid[this_bank])
|
||||
&& (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index])
|
||||
&& (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
|
||||
end
|
||||
|
||||
if (((main_bank_index == `LOG2UP(NUM_BANKS)'(this_bank))
|
||||
|| (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))
|
||||
&& found_bank
|
||||
&& !core_wb_valid[per_bank_wb_tid[this_bank]]
|
||||
&& (per_bank_wb_valid[this_bank])
|
||||
&& (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index])
|
||||
&& (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
@@ -47,14 +47,14 @@ module VX_dcache_llv_resp_bank_sel
|
||||
(
|
||||
output reg [NUM_BANKS-1:0] per_bank_llvq_pop,
|
||||
input wire[NUM_BANKS-1:0] per_bank_llvq_valid,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_llvq_res_addr,
|
||||
input wire[NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][31:0] per_bank_llvq_res_data,
|
||||
input wire[NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_llvq_res_tid,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_llvq_rsp_addr,
|
||||
input wire[NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][31:0] per_bank_llvq_rsp_data,
|
||||
input wire[NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_llvq_rsp_tid,
|
||||
|
||||
input wire llvq_pop,
|
||||
output reg[NUM_REQUESTS-1:0] llvq_valid,
|
||||
output reg[NUM_REQUESTS-1:0][31:0] llvq_res_addr,
|
||||
output reg[NUM_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_res_data
|
||||
output reg[NUM_REQUESTS-1:0][31:0] llvq_rsp_addr,
|
||||
output reg[NUM_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_rsp_data
|
||||
|
||||
|
||||
);
|
||||
@@ -62,7 +62,7 @@ module VX_dcache_llv_resp_bank_sel
|
||||
wire [(`LOG2UP(NUM_BANKS))-1:0] main_bank_index;
|
||||
wire found_bank;
|
||||
|
||||
VX_generic_priority_encoder #(.N(NUM_BANKS)) VX_sel_bank(
|
||||
VX_generic_priority_encoder #(.N(NUM_BANKS)) vx_sel_bank(
|
||||
.valids(per_bank_llvq_valid),
|
||||
.index (main_bank_index),
|
||||
.found (found_bank)
|
||||
@@ -71,13 +71,13 @@ module VX_dcache_llv_resp_bank_sel
|
||||
|
||||
always @(*) begin
|
||||
llvq_valid = 0;
|
||||
llvq_res_addr = 0;
|
||||
llvq_res_data = 0;
|
||||
llvq_rsp_addr = 0;
|
||||
llvq_rsp_data = 0;
|
||||
per_bank_llvq_pop = 0;
|
||||
if (found_bank && llvq_pop) begin
|
||||
llvq_valid [per_bank_llvq_res_tid[main_bank_index]] = 1'b1;
|
||||
llvq_res_addr[per_bank_llvq_res_tid[main_bank_index]] = per_bank_llvq_res_addr[main_bank_index];
|
||||
llvq_res_data[per_bank_llvq_res_tid[main_bank_index]] = per_bank_llvq_res_data[main_bank_index];
|
||||
llvq_valid [per_bank_llvq_rsp_tid[main_bank_index]] = 1'b1;
|
||||
llvq_rsp_addr[per_bank_llvq_rsp_tid[main_bank_index]] = per_bank_llvq_rsp_addr[main_bank_index];
|
||||
llvq_rsp_data[per_bank_llvq_rsp_tid[main_bank_index]] = per_bank_llvq_rsp_data[main_bank_index];
|
||||
per_bank_llvq_pop[main_bank_index] = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
@@ -82,17 +82,18 @@ module VX_fill_invalidator
|
||||
|
||||
|
||||
wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
|
||||
wire enqueue_found;
|
||||
VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) VX_sel_bank(
|
||||
wire enqueue_found;
|
||||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(FILL_INVALIDAOR_SIZE)
|
||||
) vx_sel_bank (
|
||||
.valids(~fills_active),
|
||||
.index (enqueue_index),
|
||||
.found (enqueue_found)
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
assign invalidate_fill = possible_fill && matched;
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
fills_active <= 0;
|
||||
@@ -109,7 +110,6 @@ module VX_fill_invalidator
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// reg success_found;
|
||||
// reg[(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] success_index;
|
||||
|
||||
@@ -133,21 +133,15 @@ module VX_fill_invalidator
|
||||
// end
|
||||
// end
|
||||
|
||||
|
||||
|
||||
|
||||
// wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
|
||||
// wire enqueue_found;
|
||||
|
||||
// VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) VX_sel_bank(
|
||||
// VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) vx_sel_bank(
|
||||
// .valids(~fills_active),
|
||||
// .index (enqueue_index),
|
||||
// .found (enqueue_found)
|
||||
// );
|
||||
|
||||
|
||||
|
||||
|
||||
// always @(posedge clk) begin
|
||||
// if (reset) begin
|
||||
// fills_active <= 0;
|
||||
@@ -165,8 +159,6 @@ module VX_fill_invalidator
|
||||
// end
|
||||
// end
|
||||
|
||||
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,15 +1,13 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_prefetcher
|
||||
#(
|
||||
parameter PRFQ_SIZE = 64,
|
||||
parameter PRFQ_STRIDE = 2,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE_BYTES = 16,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE_BYTES = 4
|
||||
)
|
||||
(
|
||||
module VX_prefetcher #(
|
||||
parameter PRFQ_SIZE = 64,
|
||||
parameter PRFQ_STRIDE = 2,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE_BYTES = 16,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE_BYTES = 4
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
@@ -21,24 +19,23 @@ module VX_prefetcher
|
||||
output wire[31:0] pref_addr
|
||||
|
||||
);
|
||||
|
||||
|
||||
reg[`LOG2UP(PRFQ_STRIDE):0] use_valid;
|
||||
reg[31:0] use_addr;
|
||||
|
||||
|
||||
wire current_valid;
|
||||
wire[31:0] current_addr;
|
||||
|
||||
wire current_full;
|
||||
wire current_empty;
|
||||
|
||||
|
||||
assign current_valid = ~current_empty;
|
||||
|
||||
wire update_use = ((use_valid == 0) || ((use_valid-1) == 0)) && current_valid;
|
||||
|
||||
VX_generic_queue_ll #(.DATAW(32), .SIZE(PRFQ_SIZE)) pfq_queue(
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW(32),
|
||||
.SIZE(PRFQ_SIZE)
|
||||
) pfq_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -50,14 +47,11 @@ module VX_prefetcher
|
||||
|
||||
.empty (current_empty),
|
||||
.full (current_full)
|
||||
);
|
||||
|
||||
|
||||
);
|
||||
|
||||
assign pref_valid = use_valid != 0;
|
||||
assign pref_addr = use_addr;
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
use_valid <= 0;
|
||||
@@ -70,7 +64,6 @@ module VX_prefetcher
|
||||
use_valid <= use_valid - 1;
|
||||
use_addr <= use_addr + BANK_LINE_SIZE_BYTES;
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -5,28 +5,30 @@ module VX_snp_fwd_arb
|
||||
parameter NUM_BANKS = 8
|
||||
)
|
||||
(
|
||||
input wire[NUM_BANKS-1:0] per_bank_snp_fwd,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_snp_fwd_addr,
|
||||
output reg[NUM_BANKS-1:0] per_bank_snp_fwd_pop,
|
||||
input wire [NUM_BANKS-1:0] per_bank_snp_fwd,
|
||||
input wire [NUM_BANKS-1:0][31:0] per_bank_snp_fwd_addr,
|
||||
output reg [NUM_BANKS-1:0] per_bank_snp_fwd_pop,
|
||||
|
||||
output wire snp_fwd,
|
||||
output wire[31:0] snp_fwd_addr,
|
||||
input wire snp_fwd_delay
|
||||
output wire snp_fwd_valid,
|
||||
output wire [31:0] snp_fwd_addr,
|
||||
input wire snp_fwd_full
|
||||
|
||||
);
|
||||
|
||||
wire[NUM_BANKS-1:0] qual_per_bank_snp_fwd = per_bank_snp_fwd & {NUM_BANKS{!snp_fwd_delay}};
|
||||
wire[NUM_BANKS-1:0] qual_per_bank_snp_fwd = per_bank_snp_fwd & {NUM_BANKS{!snp_fwd_full}};
|
||||
|
||||
wire[`LOG2UP(NUM_BANKS)-1:0] fsq_bank;
|
||||
wire fsq_valid;
|
||||
wire fsq_valid;
|
||||
|
||||
VX_generic_priority_encoder #(.N(NUM_BANKS)) VX_sel_ffsq(
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
) vx_sel_ffsq(
|
||||
.valids(qual_per_bank_snp_fwd),
|
||||
.index (fsq_bank),
|
||||
.found (fsq_valid)
|
||||
);
|
||||
);
|
||||
|
||||
assign snp_fwd = fsq_valid;
|
||||
assign snp_fwd_valid = fsq_valid;
|
||||
assign snp_fwd_addr = per_bank_snp_fwd_addr[fsq_bank];
|
||||
|
||||
always @(*) begin
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_tag_data_access
|
||||
#(
|
||||
module VX_tag_data_access #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
@@ -17,8 +16,7 @@ module VX_tag_data_access
|
||||
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
|
||||
parameter FUNC_ID = 0,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
@@ -28,7 +26,7 @@ module VX_tag_data_access
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
@@ -41,24 +39,26 @@ module VX_tag_data_access
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
// Dram knobs
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
input wire is_snp_st1e,
|
||||
input wire stall_bank_pipe,
|
||||
// Initial Reading
|
||||
/* verilator lint_off UNUSED */
|
||||
// TODO:
|
||||
input wire[31:0] readaddr_st10,
|
||||
|
||||
/* verilator lint_on UNUSED */
|
||||
// Write/Read Logic
|
||||
input wire valid_req_st1e,
|
||||
input wire writefill_st1e,
|
||||
/* verilator lint_off UNUSED */
|
||||
// TODO:
|
||||
input wire[31:0] writeaddr_st1e,
|
||||
/* verilator lint_on UNUSED */
|
||||
input wire[`WORD_SIZE_RNG] writeword_st1e,
|
||||
input wire[`DBANK_LINE_WORDS-1:0][31:0] writedata_st1e,
|
||||
input wire[2:0] mem_write_st1e,
|
||||
@@ -69,19 +69,14 @@ module VX_tag_data_access
|
||||
output wire[`TAG_SELECT_SIZE_RNG] readtag_st1e,
|
||||
output wire miss_st1e,
|
||||
output wire dirty_st1e,
|
||||
output wire fill_saw_dirty_st1e
|
||||
|
||||
output wire fill_saw_dirty_st1e
|
||||
);
|
||||
|
||||
|
||||
reg[`DBANK_LINE_WORDS-1:0][31:0] readdata_st[STAGE_1_CYCLES-1:0];
|
||||
|
||||
reg read_valid_st1c[STAGE_1_CYCLES-1:0];
|
||||
reg read_dirty_st1c[STAGE_1_CYCLES-1:0];
|
||||
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-1:0];
|
||||
reg[`DBANK_LINE_WORDS-1:0][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
|
||||
|
||||
|
||||
wire qual_read_valid_st1;
|
||||
wire qual_read_dirty_st1;
|
||||
wire[`TAG_SELECT_SIZE_RNG] qual_read_tag_st1;
|
||||
@@ -98,9 +93,9 @@ module VX_tag_data_access
|
||||
|
||||
wire real_writefill = writefill_st1e && ((valid_req_st1e && !use_read_valid_st1e) || (valid_req_st1e && use_read_valid_st1e && (writeaddr_st1e[`TAG_SELECT_ADDR_RNG] != use_read_tag_st1e)));
|
||||
|
||||
|
||||
wire fill_sent;
|
||||
wire invalidate_line;
|
||||
|
||||
VX_tag_data_structure #(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
|
||||
@@ -119,14 +114,12 @@ module VX_tag_data_access
|
||||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
VX_tag_data_structure
|
||||
(
|
||||
) vx_tag_data_structure (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall_bank_pipe(stall_bank_pipe),
|
||||
|
||||
.read_addr (readaddr_st10),
|
||||
.read_addr (readaddr_st10[`LINE_SELECT_ADDR_RNG]),
|
||||
.read_valid (qual_read_valid_st1),
|
||||
.read_dirty (qual_read_dirty_st1),
|
||||
.read_tag (qual_read_tag_st1),
|
||||
@@ -135,13 +128,17 @@ module VX_tag_data_access
|
||||
.invalidate (invalidate_line),
|
||||
.write_enable(use_write_enable),
|
||||
.write_fill (real_writefill),
|
||||
.write_addr (writeaddr_st1e),
|
||||
.write_addr (writeaddr_st1e[`LINE_SELECT_ADDR_RNG]),
|
||||
.tag_index (writeaddr_st1e[`TAG_SELECT_ADDR_RNG]),
|
||||
.write_data (use_write_data),
|
||||
.fill_sent (fill_sent)
|
||||
);
|
||||
);
|
||||
|
||||
// VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_c0 (
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) ), .Valid(0)) s0_1_c0 (
|
||||
VX_generic_register #(
|
||||
.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) ),
|
||||
.PassThru(1)
|
||||
) s0_1_c0 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
@@ -153,7 +150,9 @@ module VX_tag_data_access
|
||||
genvar curr_stage;
|
||||
generate
|
||||
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_cc (
|
||||
VX_generic_register #(
|
||||
.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32))
|
||||
) s0_1_cc (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
@@ -164,7 +163,6 @@ module VX_tag_data_access
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || (FUNC_ID == `SFUNC_ID); // If shared memory, always valid
|
||||
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && (FUNC_ID != `SFUNC_ID); // Dirty only applies in Dcache
|
||||
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM
|
||||
@@ -178,6 +176,7 @@ module VX_tag_data_access
|
||||
wire[`OFFSET_SIZE_RNG] byte_select = writeaddr_st1e[`OFFSET_ADDR_RNG];
|
||||
wire[`WORD_SELECT_SIZE_RNG] block_offset = writeaddr_st1e[`WORD_SELECT_ADDR_RNG];
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire lw = valid_req_st1e && (mem_read_st1e == `LW_MEM_READ);
|
||||
wire lb = valid_req_st1e && (mem_read_st1e == `LB_MEM_READ);
|
||||
wire lh = valid_req_st1e && (mem_read_st1e == `LH_MEM_READ);
|
||||
@@ -187,49 +186,15 @@ module VX_tag_data_access
|
||||
wire b0 = (byte_select == 0);
|
||||
wire b1 = (byte_select == 1);
|
||||
wire b2 = (byte_select == 2);
|
||||
wire b3 = (byte_select == 3);
|
||||
wire b3 = (byte_select == 3);
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] w0 = read_data_st1c[STAGE_1_CYCLES-1][0][31:0];
|
||||
wire[31:0] w1 = read_data_st1c[STAGE_1_CYCLES-1][1][31:0];
|
||||
wire[31:0] w2 = read_data_st1c[STAGE_1_CYCLES-1][2][31:0];
|
||||
wire[31:0] w3 = read_data_st1c[STAGE_1_CYCLES-1][3][31:0];
|
||||
|
||||
wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset][31:0];
|
||||
|
||||
wire[31:0] data_unQual = (b0 || lw) ? (data_unmod) :
|
||||
b1 ? (data_unmod >> 8) :
|
||||
b2 ? (data_unmod >> 16) :
|
||||
(data_unmod >> 24);
|
||||
|
||||
|
||||
wire[31:0] lb_data = (data_unQual[7] ) ? (data_unQual | 32'hFFFFFF00) : (data_unQual & 32'hFF);
|
||||
wire[31:0] lh_data = (data_unQual[15]) ? (data_unQual | 32'hFFFF0000) : (data_unQual & 32'hFFFF);
|
||||
wire[31:0] lbu_data = (data_unQual & 32'hFF);
|
||||
wire[31:0] lhu_data = (data_unQual & 32'hFFFF);
|
||||
wire[31:0] lw_data = (data_unQual);
|
||||
|
||||
|
||||
wire[31:0] sw_data = writeword_st1e[31:0];
|
||||
|
||||
wire[31:0] sb_data = b1 ? {{16{1'b0}}, writeword_st1e[7:0], { 8{1'b0}}} :
|
||||
b2 ? {{ 8{1'b0}}, writeword_st1e[7:0], {16{1'b0}}} :
|
||||
b3 ? {{ 0{1'b0}}, writeword_st1e[7:0], {24{1'b0}}} :
|
||||
writeword_st1e[31:0];
|
||||
|
||||
wire[31:0] sh_data = b2 ? {writeword_st1e[15:0], {16{1'b0}}} : writeword_st1e[31:0];
|
||||
|
||||
|
||||
|
||||
wire[31:0] use_write_dat = sb ? sb_data :
|
||||
sh ? sh_data :
|
||||
sw_data;
|
||||
|
||||
|
||||
wire[31:0] data_Qual = lb ? lb_data :
|
||||
lh ? lh_data :
|
||||
lhu ? lhu_data :
|
||||
lbu ? lbu_data :
|
||||
lw_data;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
/////////////////////// STORE LOGIC ///////////////////
|
||||
|
||||
@@ -245,6 +210,7 @@ module VX_tag_data_access
|
||||
|
||||
wire[`DBANK_LINE_WORDS-1:0][3:0] we;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] data_write;
|
||||
|
||||
genvar g;
|
||||
generate
|
||||
for (g = 0; g < `DBANK_LINE_WORDS; g = g + 1) begin : write_enables
|
||||
@@ -257,9 +223,18 @@ module VX_tag_data_access
|
||||
(normal_write && sh) ? sh_mask :
|
||||
4'b0000;
|
||||
|
||||
if (!(FUNC_ID == `L2FUNC_ID)) assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat;
|
||||
if (FUNC_ID != `L2FUNC_ID) begin
|
||||
wire[31:0] sb_data = b1 ? {{16{1'b0}}, writeword_st1e[7:0], { 8{1'b0}}} :
|
||||
b2 ? {{ 8{1'b0}}, writeword_st1e[7:0], {16{1'b0}}} :
|
||||
b3 ? {{ 0{1'b0}}, writeword_st1e[7:0], {24{1'b0}}} :
|
||||
writeword_st1e[31:0];
|
||||
wire[31:0] sw_data = writeword_st1e[31:0];
|
||||
wire[31:0] sh_data = b2 ? {writeword_st1e[15:0], {16{1'b0}}} : writeword_st1e[31:0];
|
||||
wire[31:0] use_write_dat = sb ? sb_data : sh ? sh_data : sw_data;
|
||||
assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat;
|
||||
end
|
||||
end
|
||||
if ((FUNC_ID == `L2FUNC_ID)) begin
|
||||
if (FUNC_ID == `L2FUNC_ID) begin
|
||||
assign data_write = force_write ? writedata_st1e : writeword_st1e;
|
||||
end
|
||||
endgenerate
|
||||
@@ -268,13 +243,29 @@ module VX_tag_data_access
|
||||
assign use_write_data = data_write;
|
||||
|
||||
///////////////////////
|
||||
|
||||
if (FUNC_ID == `L2FUNC_ID) begin
|
||||
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
|
||||
end else begin
|
||||
wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset][31:0];
|
||||
wire[31:0] data_unQual = (b0 || lw) ? (data_unmod) :
|
||||
b1 ? (data_unmod >> 8) :
|
||||
b2 ? (data_unmod >> 16) :
|
||||
(data_unmod >> 24);
|
||||
wire[31:0] lb_data = (data_unQual[7] ) ? (data_unQual | 32'hFFFFFF00) : (data_unQual & 32'hFF);
|
||||
wire[31:0] lh_data = (data_unQual[15]) ? (data_unQual | 32'hFFFF0000) : (data_unQual & 32'hFFFF);
|
||||
wire[31:0] lbu_data = (data_unQual & 32'hFF);
|
||||
wire[31:0] lhu_data = (data_unQual & 32'hFFFF);
|
||||
wire[31:0] lw_data = (data_unQual);
|
||||
wire[31:0] data_Qual = lb ? lb_data :
|
||||
lh ? lh_data :
|
||||
lhu ? lhu_data :
|
||||
lbu ? lbu_data :
|
||||
lw_data;
|
||||
|
||||
assign readword_st1e = data_Qual;
|
||||
end
|
||||
|
||||
|
||||
wire[`TAG_SELECT_ADDR_RNG] writeaddr_tag = writeaddr_st1e[`TAG_SELECT_ADDR_RNG];
|
||||
|
||||
wire tags_mismatch = writeaddr_tag != use_read_tag_st1e;
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_tag_data_structure
|
||||
#(
|
||||
module VX_tag_data_structure #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
@@ -17,8 +16,7 @@ module VX_tag_data_structure
|
||||
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
|
||||
parameter FUNC_ID = 0,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
@@ -28,7 +26,7 @@ module VX_tag_data_structure
|
||||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
@@ -41,44 +39,37 @@ module VX_tag_data_structure
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
// Dram knobs
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall_bank_pipe,
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall_bank_pipe,
|
||||
|
||||
input wire[31:0] read_addr,
|
||||
output wire read_valid,
|
||||
output wire read_dirty,
|
||||
output wire[`TAG_SELECT_SIZE_RNG] read_tag,
|
||||
input wire[`LINE_SELECT_SIZE_RNG] read_addr,
|
||||
output wire read_valid,
|
||||
output wire read_dirty,
|
||||
output wire[`TAG_SELECT_SIZE_RNG] read_tag,
|
||||
output wire[`DBANK_LINE_WORDS-1:0][31:0] read_data,
|
||||
|
||||
input wire invalidate,
|
||||
input wire[`DBANK_LINE_WORDS-1:0][3:0] write_enable,
|
||||
input wire write_fill,
|
||||
input wire[31:0] write_addr,
|
||||
input wire invalidate,
|
||||
input wire[`DBANK_LINE_WORDS-1:0][3:0] write_enable,
|
||||
input wire write_fill,
|
||||
input wire[`LINE_SELECT_SIZE_RNG] write_addr,
|
||||
input wire[`TAG_SELECT_SIZE_RNG] tag_index,
|
||||
input wire[`DBANK_LINE_WORDS-1:0][31:0] write_data,
|
||||
input wire fill_sent
|
||||
|
||||
input wire fill_sent
|
||||
);
|
||||
|
||||
reg[`DBANK_LINE_WORDS-1:0][3:0][7:0] data [`BANK_LINE_COUNT-1:0];
|
||||
reg[`TAG_SELECT_SIZE_RNG] tag [`BANK_LINE_COUNT-1:0];
|
||||
reg valid[`BANK_LINE_COUNT-1:0];
|
||||
reg dirty[`BANK_LINE_COUNT-1:0];
|
||||
reg [`DBANK_LINE_WORDS-1:0][3:0][7:0] data [`BANK_LINE_COUNT-1:0];
|
||||
reg [`TAG_SELECT_SIZE_RNG] tag [`BANK_LINE_COUNT-1:0];
|
||||
reg valid [`BANK_LINE_COUNT-1:0];
|
||||
reg dirty [`BANK_LINE_COUNT-1:0];
|
||||
|
||||
|
||||
wire[`TAG_SELECT_ADDR_RNG] curr_tag = write_addr[`TAG_SELECT_ADDR_RNG];
|
||||
wire[`LINE_SELECT_ADDR_RNG] curr_inx = write_addr[`LINE_SELECT_ADDR_RNG];
|
||||
|
||||
assign read_valid = valid[read_addr[`LINE_SELECT_ADDR_RNG]];
|
||||
assign read_dirty = dirty[read_addr[`LINE_SELECT_ADDR_RNG]];
|
||||
assign read_tag = tag [read_addr[`LINE_SELECT_ADDR_RNG]];
|
||||
assign read_data = data [read_addr[`LINE_SELECT_ADDR_RNG]];
|
||||
assign read_valid = valid [read_addr];
|
||||
assign read_dirty = dirty [read_addr];
|
||||
assign read_tag = tag [read_addr];
|
||||
assign read_data = data [read_addr];
|
||||
|
||||
wire going_to_write = (|write_enable);
|
||||
|
||||
@@ -94,27 +85,27 @@ module VX_tag_data_structure
|
||||
end
|
||||
end else if (!stall_bank_pipe) begin
|
||||
if (going_to_write) begin
|
||||
valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 1;
|
||||
tag [write_addr[`LINE_SELECT_ADDR_RNG]] <= write_addr[`TAG_SELECT_ADDR_RNG];
|
||||
valid[write_addr] <= 1;
|
||||
tag [write_addr] <= tag_index;
|
||||
if (write_fill) begin
|
||||
dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
||||
dirty[write_addr] <= 0;
|
||||
end else begin
|
||||
dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 1;
|
||||
dirty[write_addr] <= 1;
|
||||
end
|
||||
end else if (fill_sent) begin
|
||||
dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
||||
// valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
||||
dirty[write_addr] <= 0;
|
||||
// valid[write_addr] <= 0;
|
||||
end
|
||||
|
||||
if (invalidate) begin
|
||||
valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
||||
valid[write_addr] <= 0;
|
||||
end
|
||||
|
||||
for (f = 0; f < `DBANK_LINE_WORDS; f = f + 1) begin
|
||||
if (write_enable[f][0]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][0] <= write_data[f][7 :0 ];
|
||||
if (write_enable[f][1]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][1] <= write_data[f][15:8 ];
|
||||
if (write_enable[f][2]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][2] <= write_data[f][23:16];
|
||||
if (write_enable[f][3]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][3] <= write_data[f][31:24];
|
||||
if (write_enable[f][0]) data[write_addr][f][0] <= write_data[f][7 :0 ];
|
||||
if (write_enable[f][1]) data[write_addr][f][1] <= write_data[f][15:8 ];
|
||||
if (write_enable[f][2]) data[write_addr][f][2] <= write_data[f][23:16];
|
||||
if (write_enable[f][3]) data[write_addr][f][3] <= write_data[f][31:24];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -4,10 +4,11 @@
|
||||
`include "../VX_define.vh"
|
||||
|
||||
interface VX_branch_response_inter ();
|
||||
wire valid_branch;
|
||||
wire branch_dir;
|
||||
wire[31:0] branch_dest;
|
||||
wire[`NW_BITS-1:0] branch_warp_num;
|
||||
|
||||
wire valid_branch;
|
||||
wire branch_dir;
|
||||
wire [31:0] branch_dest;
|
||||
wire [`NW_BITS-1:0] branch_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,15 +5,15 @@
|
||||
|
||||
interface VX_csr_req_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[4:0] alu_op;
|
||||
wire is_csr;
|
||||
wire[11:0] csr_address;
|
||||
wire csr_immed;
|
||||
wire[31:0] csr_mask;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
wire [4:0] alu_op;
|
||||
wire is_csr;
|
||||
wire [11:0] csr_address;
|
||||
wire csr_immed;
|
||||
wire [31:0] csr_mask;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
|
||||
interface VX_csr_wb_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] csr_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] csr_result;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,11 +5,11 @@
|
||||
|
||||
interface VX_dcache_request_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_address;
|
||||
wire[2:0] out_cache_driver_in_mem_read;
|
||||
wire[2:0] out_cache_driver_in_mem_write;
|
||||
wire[`NUM_THREADS-1:0] out_cache_driver_in_valid;
|
||||
wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] out_cache_driver_in_address;
|
||||
wire [2:0] out_cache_driver_in_mem_read;
|
||||
wire [2:0] out_cache_driver_in_mem_write;
|
||||
wire [`NUM_THREADS-1:0] out_cache_driver_in_valid;
|
||||
wire [`NUM_THREADS-1:0][31:0] out_cache_driver_in_data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
interface VX_dcache_response_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] in_cache_driver_out_data;
|
||||
wire delay;
|
||||
wire [`NUM_THREADS-1:0][31:0] in_cache_driver_out_data;
|
||||
wire delay;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,18 +6,19 @@
|
||||
|
||||
interface VX_dram_req_rsp_inter #(
|
||||
parameter NUM_BANKS = 8,
|
||||
parameter NUM_WORDS_PER_BLOCK = 4) ();
|
||||
parameter NUM_WORDS_PER_BLOCK = 4
|
||||
) ();
|
||||
|
||||
// Req
|
||||
wire [31:0] o_m_evict_addr;
|
||||
wire [31:0] o_m_read_addr;
|
||||
wire o_m_valid;
|
||||
wire[NUM_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata;
|
||||
wire o_m_read_or_write;
|
||||
wire [31:0] o_m_evict_addr;
|
||||
wire [31:0] o_m_read_addr;
|
||||
wire o_m_valid;
|
||||
wire [NUM_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata;
|
||||
wire o_m_read_or_write;
|
||||
|
||||
// Rsp
|
||||
wire[NUM_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata;
|
||||
wire i_m_ready;
|
||||
wire [NUM_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata;
|
||||
wire i_m_ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,43 +6,43 @@
|
||||
interface VX_exec_unit_req_inter ();
|
||||
|
||||
// Meta
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[31:0] curr_PC;
|
||||
wire[31:0] PC_next;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] PC_next;
|
||||
|
||||
// Write Back Info
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
|
||||
// Data and alu op
|
||||
wire[`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] b_reg_data;
|
||||
wire[4:0] alu_op;
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire rs2_src;
|
||||
wire[31:0] itype_immed;
|
||||
wire[19:0] upper_immed;
|
||||
wire [`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] b_reg_data;
|
||||
wire [4:0] alu_op;
|
||||
wire [4:0] rs1;
|
||||
wire [4:0] rs2;
|
||||
wire rs2_src;
|
||||
wire [31:0] itype_immed;
|
||||
wire [19:0] upper_immed;
|
||||
|
||||
// Branch type
|
||||
wire[2:0] branch_type;
|
||||
wire [2:0] branch_type;
|
||||
|
||||
// Jal info
|
||||
wire jalQual;
|
||||
wire jal;
|
||||
wire[31:0] jal_offset;
|
||||
wire jalQual;
|
||||
wire jal;
|
||||
wire [31:0] jal_offset;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire ebreak;
|
||||
wire wspawn;
|
||||
/* verilator lint_on UNUSED */
|
||||
/* verilator lint_off UNUSED */
|
||||
wire ebreak;
|
||||
wire wspawn;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
// CSR info
|
||||
wire is_csr;
|
||||
wire[11:0] csr_address;
|
||||
wire csr_immed;
|
||||
wire[31:0] csr_mask;
|
||||
wire is_csr;
|
||||
wire [11:0] csr_address;
|
||||
wire csr_immed;
|
||||
wire [31:0] csr_mask;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,37 +5,37 @@
|
||||
|
||||
interface VX_frE_to_bckE_req_inter ();
|
||||
|
||||
wire[11:0] csr_address;
|
||||
wire is_csr;
|
||||
wire csr_immed;
|
||||
wire[31:0] csr_mask;
|
||||
wire[4:0] rd;
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire[4:0] alu_op;
|
||||
wire[1:0] wb;
|
||||
wire rs2_src;
|
||||
wire[31:0] itype_immed;
|
||||
wire[2:0] mem_read;
|
||||
wire[2:0] mem_write;
|
||||
wire[2:0] branch_type;
|
||||
wire[19:0] upper_immed;
|
||||
wire[31:0] curr_PC;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire ebreak;
|
||||
/* verilator lint_on UNUSED */
|
||||
wire jalQual;
|
||||
wire jal;
|
||||
wire[31:0] jal_offset;
|
||||
wire[31:0] PC_next;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire [11:0] csr_address;
|
||||
wire is_csr;
|
||||
wire csr_immed;
|
||||
wire [31:0] csr_mask;
|
||||
wire [4:0] rd;
|
||||
wire [4:0] rs1;
|
||||
wire [4:0] rs2;
|
||||
wire [4:0] alu_op;
|
||||
wire [1:0] wb;
|
||||
wire rs2_src;
|
||||
wire [31:0] itype_immed;
|
||||
wire [2:0] mem_read;
|
||||
wire [2:0] mem_write;
|
||||
wire [2:0] branch_type;
|
||||
wire [19:0] upper_immed;
|
||||
wire [31:0] curr_PC;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire ebreak;
|
||||
/* verilator lint_on UNUSED */
|
||||
wire jalQual;
|
||||
wire jal;
|
||||
wire [31:0] jal_offset;
|
||||
wire [31:0] PC_next;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
// GPGPU stuff
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
wire is_barrier;
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
wire is_barrier;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,12 +5,10 @@
|
||||
`include "../VX_define.vh"
|
||||
|
||||
interface VX_gpr_clone_inter ();
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire is_clone;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire is_clone;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -6,8 +6,8 @@
|
||||
|
||||
interface VX_gpr_data_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] b_reg_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] b_reg_data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
|
||||
interface VX_gpr_read_inter ();
|
||||
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire [4:0] rs1;
|
||||
wire [4:0] rs2;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,10 +6,9 @@
|
||||
interface VX_gpr_wspawn_inter ();
|
||||
/* verilator lint_off UNUSED */
|
||||
wire is_wspawn;
|
||||
wire[`NW_BITS-1:0] which_wspawn;
|
||||
wire [`NW_BITS-1:0] which_wspawn;
|
||||
// wire[`NW_BITS-1:0] warp_num;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -1,33 +1,20 @@
|
||||
|
||||
|
||||
`ifndef VX_GPU_DRAM_DCACHE_REQ
|
||||
`define VX_GPU_DRAM_DCACHE_REQ
|
||||
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
interface VX_gpu_dcache_dram_req_inter
|
||||
#(
|
||||
parameter BANK_LINE_WORDS = 2
|
||||
)
|
||||
();
|
||||
interface VX_gpu_dcache_dram_req_inter #(
|
||||
parameter BANK_LINE_WORDS = 2
|
||||
) ();
|
||||
|
||||
// DRAM Request
|
||||
wire dram_req;
|
||||
wire dram_req_write;
|
||||
wire dram_req_read;
|
||||
wire [31:0] dram_req_addr;
|
||||
wire [31:0] dram_req_size;
|
||||
wire [BANK_LINE_WORDS-1:0][31:0] dram_req_data;
|
||||
wire [BANK_LINE_WORDS-1:0][31:0] dram_req_data;
|
||||
wire dram_req_full;
|
||||
|
||||
// Snoop
|
||||
wire dram_because_of_snp;
|
||||
wire dram_snp_full;
|
||||
|
||||
// DRAM Cache can't accept response
|
||||
wire dram_fill_accept;
|
||||
|
||||
// DRAM Cache can't accept request
|
||||
wire dram_req_delay;
|
||||
wire dram_rsp_ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
`ifndef VX_GPU_DRAM_DCACHE_RES
|
||||
`define VX_GPU_DRAM_DCACHE_RES
|
||||
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
interface VX_gpu_dcache_dram_res_inter
|
||||
#(
|
||||
parameter BANK_LINE_WORDS = 2
|
||||
)
|
||||
();
|
||||
// DRAM Rsponse
|
||||
wire dram_fill_rsp;
|
||||
wire [31:0] dram_fill_rsp_addr;
|
||||
wire [BANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
16
hw/rtl/interfaces/VX_gpu_dcache_dram_rsp_inter.v
Normal file
16
hw/rtl/interfaces/VX_gpu_dcache_dram_rsp_inter.v
Normal file
@@ -0,0 +1,16 @@
|
||||
`ifndef VX_GPU_DRAM_DCACHE_RSP
|
||||
`define VX_GPU_DRAM_DCACHE_RSP
|
||||
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
interface VX_gpu_dcache_dram_rsp_inter #(
|
||||
parameter BANK_LINE_WORDS = 2
|
||||
) ();
|
||||
// DRAM Response
|
||||
wire dram_rsp_valid;
|
||||
wire [31:0] dram_rsp_addr;
|
||||
wire [BANK_LINE_WORDS-1:0][31:0] dram_rsp_data;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -1,15 +1,11 @@
|
||||
|
||||
|
||||
`ifndef VX_GPU_DCACHE_REQ
|
||||
`define VX_GPU_DCACHE_REQ
|
||||
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
interface VX_gpu_dcache_req_inter
|
||||
#(
|
||||
parameter NUM_REQUESTS = 32
|
||||
)
|
||||
();
|
||||
interface VX_gpu_dcache_req_inter #(
|
||||
parameter NUM_REQUESTS = 32
|
||||
) ();
|
||||
|
||||
// Core Request
|
||||
wire [NUM_REQUESTS-1:0] core_req_valid;
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
`ifndef VX_GPU_DCACHE_RES
|
||||
`define VX_GPU_DCACHE_RES
|
||||
`ifndef VX_GPU_DCACHE_RSP
|
||||
`define VX_GPU_DCACHE_RSP
|
||||
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
interface VX_gpu_dcache_res_inter
|
||||
#(
|
||||
parameter NUM_REQUESTS = 32
|
||||
) ();
|
||||
interface VX_gpu_dcache_rsp_inter #(
|
||||
parameter NUM_REQUESTS = 32
|
||||
) ();
|
||||
|
||||
// Cache WB
|
||||
wire [NUM_REQUESTS-1:0] core_wb_valid;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire [4:0] core_wb_req_rd;
|
||||
wire [1:0] core_wb_req_wb;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire [`NW_BITS-1:0] core_wb_warp_num;
|
||||
wire [NUM_REQUESTS-1:0][31:0] core_wb_readdata;
|
||||
wire [NUM_REQUESTS-1:0][31:0] core_wb_pc;
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
interface VX_gpu_dcache_snp_req_inter ();
|
||||
// Snoop Req
|
||||
wire snp_req;
|
||||
wire snp_req_valid;
|
||||
wire [31:0] snp_req_addr;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
interface VX_gpu_inst_req_inter();
|
||||
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
@@ -15,8 +15,8 @@ interface VX_gpu_inst_req_inter();
|
||||
|
||||
wire[31:0] pc_next;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire[31:0] rd2;
|
||||
wire [`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire [31:0] rd2;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,11 +6,12 @@
|
||||
interface VX_gpu_snp_req_rsp ();
|
||||
|
||||
// Snoop request
|
||||
wire snp_req;
|
||||
wire[31:0] snp_req_addr;
|
||||
wire snp_req_valid;
|
||||
wire [31:0] snp_req_addr;
|
||||
wire snp_req_full;
|
||||
|
||||
// Snoop Response
|
||||
wire snp_delay;
|
||||
// TODO:
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,11 +6,11 @@
|
||||
|
||||
interface VX_icache_request_inter ();
|
||||
|
||||
wire[31:0] pc_address;
|
||||
wire[2:0] out_cache_driver_in_mem_read;
|
||||
wire[2:0] out_cache_driver_in_mem_write;
|
||||
wire out_cache_driver_in_valid;
|
||||
wire[31:0] out_cache_driver_in_data;
|
||||
wire [31:0] pc_address;
|
||||
wire [2:0] out_cache_driver_in_mem_read;
|
||||
wire [2:0] out_cache_driver_in_mem_write;
|
||||
wire out_cache_driver_in_valid;
|
||||
wire [31:0] out_cache_driver_in_data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -7,8 +7,8 @@ interface VX_icache_response_inter ();
|
||||
|
||||
// wire ready;
|
||||
// wire stall;
|
||||
wire[31:0] instruction;
|
||||
wire delay;
|
||||
wire [31:0] instruction;
|
||||
wire delay;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
|
||||
interface VX_inst_exec_wb_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[31:0] exec_wb_pc;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NUM_THREADS-1:0] wb_valid;
|
||||
wire[`NW_BITS-1:0] wb_warp_num;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [31:0] exec_wb_pc;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
wire [`NUM_THREADS-1:0] wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
|
||||
interface VX_inst_mem_wb_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] loaded_data;
|
||||
wire[31:0] mem_wb_pc;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NUM_THREADS-1:0] wb_valid;
|
||||
wire[`NW_BITS-1:0] wb_warp_num;
|
||||
wire [`NUM_THREADS-1:0][31:0] loaded_data;
|
||||
wire [31:0] mem_wb_pc;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
wire [`NUM_THREADS-1:0] wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,10 +5,10 @@
|
||||
|
||||
interface VX_inst_meta_inter ();
|
||||
|
||||
wire[31:0] instruction;
|
||||
wire[31:0] inst_pc;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire [31:0] instruction;
|
||||
wire [31:0] inst_pc;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
interface VX_jal_response_inter ();
|
||||
|
||||
wire jal;
|
||||
wire[31:0] jal_dest;
|
||||
wire[`NW_BITS-1:0] jal_warp_num;
|
||||
wire [31:0] jal_dest;
|
||||
wire [`NW_BITS-1:0] jal_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
|
||||
interface VX_join_inter ();
|
||||
|
||||
wire is_join;
|
||||
wire[`NW_BITS-1:0] join_warp_num;
|
||||
wire is_join;
|
||||
wire [`NW_BITS-1:0] join_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,16 +6,16 @@
|
||||
|
||||
interface VX_lsu_req_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[31:0] lsu_pc;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] base_address; // A reg data
|
||||
wire[31:0] offset; // itype_immed
|
||||
wire[2:0] mem_read;
|
||||
wire[2:0] mem_write;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [31:0] lsu_pc;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] base_address; // A reg data
|
||||
wire [31:0] offset; // itype_immed
|
||||
wire [2:0] mem_read;
|
||||
wire [2:0] mem_write;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,20 +5,20 @@
|
||||
|
||||
interface VX_mem_req_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[2:0] mem_read;
|
||||
wire[2:0] mem_write;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire[`NUM_THREADS-1:0][31:0] rd2;
|
||||
wire[31:0] PC_next;
|
||||
wire[31:0] curr_PC;
|
||||
wire[31:0] branch_offset;
|
||||
wire[2:0] branch_type;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [2:0] mem_read;
|
||||
wire [2:0] mem_write;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
wire [4:0] rs1;
|
||||
wire [4:0] rs2;
|
||||
wire [`NUM_THREADS-1:0][31:0] rd2;
|
||||
wire [31:0] PC_next;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] branch_offset;
|
||||
wire [2:0] branch_type;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,13 +6,13 @@
|
||||
|
||||
interface VX_mw_wb_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[`NUM_THREADS-1:0][31:0] mem_result;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[31:0] PC_next;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] mem_result;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
wire [31:0] PC_next;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -6,27 +6,29 @@
|
||||
|
||||
interface VX_warp_ctl_inter ();
|
||||
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire change_mask;
|
||||
wire[`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire change_mask;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
|
||||
wire wspawn;
|
||||
wire[31:0] wspawn_pc;
|
||||
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
||||
wire wspawn;
|
||||
wire [31:0] wspawn_pc;
|
||||
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
wire ebreak;
|
||||
wire ebreak;
|
||||
|
||||
// barrier
|
||||
wire is_barrier;
|
||||
wire[31:0] barrier_id;
|
||||
wire[$clog2(`NUM_WARPS):0] num_warps;
|
||||
wire is_barrier;
|
||||
wire [31:0] barrier_id;
|
||||
wire [$clog2(`NUM_WARPS):0] num_warps;
|
||||
|
||||
wire is_split;
|
||||
wire dont_split;
|
||||
wire[`NW_BITS-1:0] split_warp_num;
|
||||
wire[`NUM_THREADS-1:0] split_new_mask;
|
||||
wire[`NUM_THREADS-1:0] split_later_mask;
|
||||
wire[31:0] split_save_pc;
|
||||
wire is_split;
|
||||
wire dont_split;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire [`NW_BITS-1:0] split_warp_num;
|
||||
/* verilator lint_on UNUSED */
|
||||
wire [`NUM_THREADS-1:0] split_new_mask;
|
||||
wire [`NUM_THREADS-1:0] split_later_mask;
|
||||
wire [31:0] split_save_pc;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
|
||||
interface VX_wb_inter ();
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] write_data;
|
||||
wire[31:0] wb_pc;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NUM_THREADS-1:0] wb_valid;
|
||||
wire[`NW_BITS-1:0] wb_warp_num;
|
||||
wire [`NUM_THREADS-1:0][31:0] write_data;
|
||||
wire [31:0] wb_pc;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
wire [`NUM_THREADS-1:0] wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
interface VX_wstall_inter();
|
||||
|
||||
wire wstall;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire wstall;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -1,32 +1,28 @@
|
||||
`include "../VX_define.vh"
|
||||
|
||||
module VX_d_e_reg (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire in_branch_stall,
|
||||
input wire in_freeze,
|
||||
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire in_branch_stall,
|
||||
input wire in_freeze,
|
||||
VX_frE_to_bckE_req_inter vx_frE_to_bckE_req,
|
||||
VX_frE_to_bckE_req_inter vx_bckE_req
|
||||
);
|
||||
|
||||
wire stall = in_freeze;
|
||||
wire flush = (in_branch_stall == `STALL);
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req
|
||||
VX_generic_register #(
|
||||
.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)
|
||||
) d_e_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({vx_frE_to_bckE_req.csr_address, vx_frE_to_bckE_req.jalQual, vx_frE_to_bckE_req.ebreak, vx_frE_to_bckE_req.is_csr, vx_frE_to_bckE_req.csr_immed, vx_frE_to_bckE_req.csr_mask, vx_frE_to_bckE_req.rd, vx_frE_to_bckE_req.rs1, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.alu_op, vx_frE_to_bckE_req.wb, vx_frE_to_bckE_req.rs2_src, vx_frE_to_bckE_req.itype_immed, vx_frE_to_bckE_req.mem_read, vx_frE_to_bckE_req.mem_write, vx_frE_to_bckE_req.branch_type, vx_frE_to_bckE_req.upper_immed, vx_frE_to_bckE_req.curr_PC, vx_frE_to_bckE_req.jal, vx_frE_to_bckE_req.jal_offset, vx_frE_to_bckE_req.PC_next, vx_frE_to_bckE_req.valid, vx_frE_to_bckE_req.warp_num, vx_frE_to_bckE_req.is_wspawn, vx_frE_to_bckE_req.is_tmc, vx_frE_to_bckE_req.is_split, vx_frE_to_bckE_req.is_barrier}),
|
||||
.out ({vx_bckE_req.csr_address , vx_bckE_req.jalQual , vx_bckE_req.ebreak ,vx_bckE_req.is_csr , vx_bckE_req.csr_immed , vx_bckE_req.csr_mask , vx_bckE_req.rd , vx_bckE_req.rs1 , vx_bckE_req.rs2 , vx_bckE_req.alu_op , vx_bckE_req.wb , vx_bckE_req.rs2_src , vx_bckE_req.itype_immed , vx_bckE_req.mem_read , vx_bckE_req.mem_write , vx_bckE_req.branch_type , vx_bckE_req.upper_immed , vx_bckE_req.curr_PC , vx_bckE_req.jal , vx_bckE_req.jal_offset , vx_bckE_req.PC_next , vx_bckE_req.valid , vx_bckE_req.warp_num , vx_bckE_req.is_wspawn , vx_bckE_req.is_tmc , vx_bckE_req.is_split , vx_bckE_req.is_barrier })
|
||||
);
|
||||
|
||||
|
||||
wire stall = in_freeze;
|
||||
wire flush = (in_branch_stall == `STALL);
|
||||
|
||||
|
||||
VX_generic_register #(.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)) d_e_reg
|
||||
(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
.flush(flush),
|
||||
.in ({VX_frE_to_bckE_req.csr_address, VX_frE_to_bckE_req.jalQual, VX_frE_to_bckE_req.ebreak, VX_frE_to_bckE_req.is_csr, VX_frE_to_bckE_req.csr_immed, VX_frE_to_bckE_req.csr_mask, VX_frE_to_bckE_req.rd, VX_frE_to_bckE_req.rs1, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.alu_op, VX_frE_to_bckE_req.wb, VX_frE_to_bckE_req.rs2_src, VX_frE_to_bckE_req.itype_immed, VX_frE_to_bckE_req.mem_read, VX_frE_to_bckE_req.mem_write, VX_frE_to_bckE_req.branch_type, VX_frE_to_bckE_req.upper_immed, VX_frE_to_bckE_req.curr_PC, VX_frE_to_bckE_req.jal, VX_frE_to_bckE_req.jal_offset, VX_frE_to_bckE_req.PC_next, VX_frE_to_bckE_req.valid, VX_frE_to_bckE_req.warp_num, VX_frE_to_bckE_req.is_wspawn, VX_frE_to_bckE_req.is_tmc, VX_frE_to_bckE_req.is_split, VX_frE_to_bckE_req.is_barrier}),
|
||||
.out ({VX_bckE_req.csr_address , VX_bckE_req.jalQual , VX_bckE_req.ebreak ,VX_bckE_req.is_csr , VX_bckE_req.csr_immed , VX_bckE_req.csr_mask , VX_bckE_req.rd , VX_bckE_req.rs1 , VX_bckE_req.rs2 , VX_bckE_req.alu_op , VX_bckE_req.wb , VX_bckE_req.rs2_src , VX_bckE_req.itype_immed , VX_bckE_req.mem_read , VX_bckE_req.mem_write , VX_bckE_req.branch_type , VX_bckE_req.upper_immed , VX_bckE_req.curr_PC , VX_bckE_req.jal , VX_bckE_req.jal_offset , VX_bckE_req.PC_next , VX_bckE_req.valid , VX_bckE_req.warp_num , VX_bckE_req.is_wspawn , VX_bckE_req.is_tmc , VX_bckE_req.is_split , VX_bckE_req.is_barrier })
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ module VX_priority_encoder_sm
|
||||
|
||||
// wire[`NUM_THREADS-1:0] new_left_requests = left_requests & ~(serviced_qual);
|
||||
|
||||
always @(posedge clk, posedge reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
left_requests <= 0;
|
||||
// serviced = 0;
|
||||
|
||||
@@ -8,8 +8,8 @@ module VX_shared_memory_block
|
||||
parameter BITS_PER_BANK = 3
|
||||
)
|
||||
(
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
//input wire[6:0] addr,
|
||||
//input wire[3:0][31:0] wdata,
|
||||
//input wire[1:0] we,
|
||||
@@ -22,28 +22,16 @@ module VX_shared_memory_block
|
||||
input wire shm_write,
|
||||
|
||||
output wire[SMB_WORDS_PER_READ-1:0][31:0] data_out
|
||||
|
||||
);
|
||||
|
||||
|
||||
`ifndef SYN
|
||||
|
||||
reg[SMB_WORDS_PER_READ-1:0][3:0][7:0] shared_memory[SMB_HEIGHT-1:0];
|
||||
|
||||
wire [$clog2(SMB_HEIGHT) - 1:0]reg_addr;
|
||||
reg [SMB_WORDS_PER_READ-1:0][3:0][7:0] shared_memory[SMB_HEIGHT-1:0];
|
||||
wire [$clog2(SMB_HEIGHT) - 1:0] reg_addr;
|
||||
|
||||
//wire need_to_write = (|we);
|
||||
integer curr_ind;
|
||||
// initial begin
|
||||
// for (curr_ind = 0; curr_ind < SMB_HEIGHT; curr_ind = curr_ind + 1)
|
||||
// begin
|
||||
// shared_memory[curr_ind] = 0;
|
||||
// end
|
||||
// end
|
||||
always @(posedge clk, posedge reset) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
//for (curr_ind = 0; curr_ind < 128; curr_ind = curr_ind + 1)
|
||||
end else if(shm_write) begin
|
||||
//--
|
||||
end else if (shm_write) begin
|
||||
if (we == 2'b00) shared_memory[reg_addr][0] <= wdata[0];
|
||||
if (we == 2'b01) shared_memory[reg_addr][1] <= wdata[1];
|
||||
if (we == 2'b10) shared_memory[reg_addr][2] <= wdata[2];
|
||||
@@ -52,10 +40,6 @@ module VX_shared_memory_block
|
||||
end
|
||||
|
||||
assign reg_addr = addr;
|
||||
// always @(posedge clk)
|
||||
// reg_addr <= addr;
|
||||
|
||||
|
||||
assign data_out = shm_write ? 0 : shared_memory[reg_addr];
|
||||
|
||||
`else
|
||||
@@ -69,6 +53,7 @@ module VX_shared_memory_block
|
||||
//assign write_bit_mask[1] = (we == 2'b01) ? {32{1'b1}} : {32{1'b0}};
|
||||
//assign write_bit_mask[2] = (we == 2'b10) ? {32{1'b1}} : {32{1'b0}};
|
||||
//assign write_bit_mask[3] = (we == 2'b11) ? {32{1'b1}} : {32{1'b0}};
|
||||
|
||||
genvar curr_word;
|
||||
for (curr_word = 0; curr_word < SMB_WORDS_PER_READ; curr_word = curr_word + 1)
|
||||
begin
|
||||
@@ -115,7 +100,6 @@ module VX_shared_memory_block
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -51,18 +51,17 @@ void Simulator::ibus_driver() {
|
||||
}
|
||||
}
|
||||
|
||||
if (vortex_->I_dram_req && !I_dram_stalled_) {
|
||||
if (!I_dram_stalled_) {
|
||||
// std::cout << "Icache Dram Request received!\n";
|
||||
if (vortex_->I_dram_req_read) {
|
||||
// std::cout << "Icache Dram Request is read!\n";
|
||||
// Need to add an element
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.data_length = vortex_->I_dram_req_size / 4;
|
||||
dram_req.base_addr = vortex_->I_dram_req_addr;
|
||||
dram_req.data = (unsigned *)malloc(dram_req.data_length * sizeof(unsigned));
|
||||
dram_req.data = (unsigned *)malloc(GLOBAL_BLOCK_SIZE_BYTES);
|
||||
|
||||
for (int i = 0; i < dram_req.data_length; i++) {
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = dram_req.base_addr + (i * 4);
|
||||
unsigned data_rd;
|
||||
ram_->getWord(curr_addr, &data_rd);
|
||||
@@ -74,9 +73,8 @@ void Simulator::ibus_driver() {
|
||||
|
||||
if (vortex_->I_dram_req_write) {
|
||||
unsigned base_addr = vortex_->I_dram_req_addr;
|
||||
unsigned data_length = vortex_->I_dram_req_size / 4;
|
||||
|
||||
for (int i = 0; i < data_length; i++) {
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = base_addr + (i * 4);
|
||||
unsigned data_wr = vortex_->I_dram_req_data[i];
|
||||
ram_->writeWord(curr_addr, &data_wr);
|
||||
@@ -84,22 +82,22 @@ void Simulator::ibus_driver() {
|
||||
}
|
||||
}
|
||||
|
||||
if (vortex_->I_dram_fill_accept && dequeue_valid) {
|
||||
if (vortex_->I_dram_rsp_ready && dequeue_valid) {
|
||||
// std::cout << "Icache Dram Response Sending...!\n";
|
||||
|
||||
vortex_->I_dram_fill_rsp = 1;
|
||||
vortex_->I_dram_fill_rsp_addr = I_dram_req_vec_[dequeue_index].base_addr;
|
||||
vortex_->I_dram_rsp_valid = 1;
|
||||
vortex_->I_dram_rsp_addr = I_dram_req_vec_[dequeue_index].base_addr;
|
||||
// std::cout << "Fill Rsp -> Addr: " << std::hex << (I_dram_req_vec_[dequeue_index].base_addr) << std::dec << "\n";
|
||||
|
||||
for (int i = 0; i < I_dram_req_vec_[dequeue_index].data_length; i++) {
|
||||
vortex_->I_dram_fill_rsp_data[i] = I_dram_req_vec_[dequeue_index].data[i];
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
vortex_->I_dram_rsp_data[i] = I_dram_req_vec_[dequeue_index].data[i];
|
||||
}
|
||||
free(I_dram_req_vec_[dequeue_index].data);
|
||||
|
||||
I_dram_req_vec_.erase(I_dram_req_vec_.begin() + dequeue_index);
|
||||
} else {
|
||||
vortex_->I_dram_fill_rsp = 0;
|
||||
vortex_->I_dram_fill_rsp_addr = 0;
|
||||
vortex_->I_dram_rsp_valid = 0;
|
||||
vortex_->I_dram_rsp_addr = 0;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DRAM_STALLS
|
||||
@@ -112,7 +110,7 @@ void Simulator::ibus_driver() {
|
||||
}
|
||||
#endif
|
||||
|
||||
vortex_->dram_req_delay = I_dram_stalled_;
|
||||
vortex_->dram_req_full = I_dram_stalled_;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -144,63 +142,15 @@ void Simulator::dbus_driver() {
|
||||
|
||||
#ifdef USE_MULTICORE
|
||||
|
||||
if (vortex_->out_dram_req && !dram_stalled_) {
|
||||
if (vortex_->out_dram_req_read) {
|
||||
// Need to add an element
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.data_length = vortex_->out_dram_req_size / 4;
|
||||
dram_req.base_addr = vortex_->out_dram_req_addr;
|
||||
dram_req.data = (unsigned *)malloc(dram_req.data_length * sizeof(unsigned));
|
||||
|
||||
for (int i = 0; i < dram_req.data_length; i++) {
|
||||
unsigned curr_addr = dram_req.base_addr + (i * 4);
|
||||
unsigned data_rd;
|
||||
ram_->getWord(curr_addr, &data_rd);
|
||||
dram_req.data[i] = data_rd;
|
||||
}
|
||||
dram_req_vec_.push_back(dram_req);
|
||||
}
|
||||
|
||||
if (vortex_->out_dram_req_write) {
|
||||
unsigned base_addr = vortex_->out_dram_req_addr;
|
||||
unsigned data_length = vortex_->out_dram_req_size / 4;
|
||||
|
||||
for (int i = 0; i < data_length; i++) {
|
||||
unsigned curr_addr = base_addr + (i * 4);
|
||||
unsigned data_wr = vortex_->out_dram_req_data[i];
|
||||
ram_->writeWord(curr_addr, &data_wr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vortex_->out_dram_fill_accept && dequeue_valid) {
|
||||
vortex_->out_dram_fill_rsp = 1;
|
||||
vortex_->out_dram_fill_rsp_addr = dram_req_vec_[dequeue_index].base_addr;
|
||||
|
||||
for (int i = 0; i < dram_req_vec_[dequeue_index].data_length; i++) {
|
||||
vortex_->out_dram_fill_rsp_data[i] = dram_req_vec_[dequeue_index].data[i];
|
||||
}
|
||||
free(dram_req_vec_[dequeue_index].data);
|
||||
|
||||
dram_req_vec_.erase(dram_req_vec_.begin() + dequeue_index);
|
||||
} else {
|
||||
vortex_->out_dram_fill_rsp = 0;
|
||||
vortex_->out_dram_fill_rsp_addr = 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if (vortex_->dram_req && !dram_stalled_) {
|
||||
if (!dram_stalled_) {
|
||||
if (vortex_->dram_req_read) {
|
||||
// Need to add an element
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.data_length = vortex_->dram_req_size / 4;
|
||||
dram_req.base_addr = vortex_->dram_req_addr;
|
||||
dram_req.data = (unsigned *)malloc(dram_req.data_length * sizeof(unsigned));
|
||||
dram_req.data = (unsigned *)malloc(GLOBAL_BLOCK_SIZE_BYTES);
|
||||
|
||||
for (int i = 0; i < dram_req.data_length; i++) {
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = dram_req.base_addr + (i * 4);
|
||||
unsigned data_rd;
|
||||
ram_->getWord(curr_addr, &data_rd);
|
||||
@@ -211,9 +161,8 @@ void Simulator::dbus_driver() {
|
||||
|
||||
if (vortex_->dram_req_write) {
|
||||
unsigned base_addr = vortex_->dram_req_addr;
|
||||
unsigned data_length = vortex_->dram_req_size / 4;
|
||||
|
||||
for (int i = 0; i < data_length; i++) {
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = base_addr + (i * 4);
|
||||
unsigned data_wr = vortex_->dram_req_data[i];
|
||||
ram_->writeWord(curr_addr, &data_wr);
|
||||
@@ -221,34 +170,79 @@ void Simulator::dbus_driver() {
|
||||
}
|
||||
}
|
||||
|
||||
if (vortex_->dram_fill_accept && dequeue_valid) {
|
||||
vortex_->dram_fill_rsp = 1;
|
||||
vortex_->dram_fill_rsp_addr = dram_req_vec_[dequeue_index].base_addr;
|
||||
if (vortex_->dram_rsp_ready && dequeue_valid) {
|
||||
vortex_->dram_rsp_valid = 1;
|
||||
vortex_->dram_rsp_addr = dram_req_vec_[dequeue_index].base_addr;
|
||||
|
||||
for (int i = 0; i < dram_req_vec_[dequeue_index].data_length; i++) {
|
||||
vortex_->dram_fill_rsp_data[i] = dram_req_vec_[dequeue_index].data[i];
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
vortex_->dram_rsp_data[i] = dram_req_vec_[dequeue_index].data[i];
|
||||
}
|
||||
free(dram_req_vec_[dequeue_index].data);
|
||||
|
||||
dram_req_vec_.erase(dram_req_vec_.begin() + dequeue_index);
|
||||
} else {
|
||||
vortex_->dram_fill_rsp = 0;
|
||||
vortex_->dram_fill_rsp_addr = 0;
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
vortex_->dram_rsp_addr = 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if (!dram_stalled_) {
|
||||
if (vortex_->dram_req_read) {
|
||||
// Need to add an element
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.base_addr = vortex_->dram_req_addr;
|
||||
dram_req.data = (unsigned *)malloc(GLOBAL_BLOCK_SIZE_BYTES);
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = dram_req.base_addr + (i * 4);
|
||||
unsigned data_rd;
|
||||
ram_->getWord(curr_addr, &data_rd);
|
||||
dram_req.data[i] = data_rd;
|
||||
}
|
||||
dram_req_vec_.push_back(dram_req);
|
||||
}
|
||||
|
||||
if (vortex_->dram_req_write) {
|
||||
unsigned base_addr = vortex_->dram_req_addr;
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = base_addr + (i * 4);
|
||||
unsigned data_wr = vortex_->dram_req_data[i];
|
||||
ram_->writeWord(curr_addr, &data_wr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vortex_->dram_rsp_ready && dequeue_valid) {
|
||||
vortex_->dram_rsp_valid = 1;
|
||||
vortex_->dram_rsp_addr = dram_req_vec_[dequeue_index].base_addr;
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
vortex_->dram_rsp_data[i] = dram_req_vec_[dequeue_index].data[i];
|
||||
}
|
||||
free(dram_req_vec_[dequeue_index].data);
|
||||
|
||||
dram_req_vec_.erase(dram_req_vec_.begin() + dequeue_index);
|
||||
} else {
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
vortex_->dram_rsp_addr = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef USE_MULTICORE
|
||||
vortex_->out_dram_req_delay = dram_stalled_;
|
||||
vortex_->dram_req_full = dram_stalled_;
|
||||
#else
|
||||
vortex_->dram_req_delay = dram_stalled_;
|
||||
vortex_->dram_req_full = dram_stalled_;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Simulator::io_handler() {
|
||||
#ifdef USE_MULTICORE
|
||||
bool io_valid = false;
|
||||
for (int c = 0; c < vortex_->number_cores; c++) {
|
||||
for (int c = 0; c < NUM_CORES; c++) {
|
||||
if (vortex_->io_valid[c]) {
|
||||
uint32_t data_write = (uint32_t)vortex_->io_data[c];
|
||||
char c = (char)data_write;
|
||||
@@ -318,33 +312,33 @@ void Simulator::send_snoops(uint32_t mem_addr, uint32_t size) {
|
||||
#ifdef USE_MULTICORE
|
||||
// submit snoop requests for the needed blocks
|
||||
vortex_->llc_snp_req_addr = aligned_addr_start;
|
||||
vortex_->llc_snp_req = false;
|
||||
vortex_->llc_snp_req_valid = false;
|
||||
for (;;) {
|
||||
this->step();
|
||||
if (vortex_->llc_snp_req) {
|
||||
vortex_->llc_snp_req = false;
|
||||
if (vortex_->llc_snp_req_valid) {
|
||||
vortex_->llc_snp_req_valid = false;
|
||||
if (vortex_->llc_snp_req_addr >= aligned_addr_end)
|
||||
break;
|
||||
vortex_->llc_snp_req_addr += GLOBAL_BLOCK_SIZE_BYTES;
|
||||
}
|
||||
if (!vortex_->llc_snp_req_delay) {
|
||||
vortex_->llc_snp_req = true;
|
||||
if (!vortex_->llc_snp_req_full) {
|
||||
vortex_->llc_snp_req_valid = true;
|
||||
}
|
||||
}
|
||||
#else
|
||||
// submit snoop requests for the needed blocks
|
||||
vortex_->snp_req_addr = aligned_addr_start;
|
||||
vortex_->snp_req = false;
|
||||
vortex_->snp_req_valid = false;
|
||||
for (;;) {
|
||||
this->step();
|
||||
if (vortex_->snp_req) {
|
||||
vortex_->snp_req = false;
|
||||
if (vortex_->snp_req_valid) {
|
||||
vortex_->snp_req_valid = false;
|
||||
if (vortex_->snp_req_addr >= aligned_addr_end)
|
||||
break;
|
||||
vortex_->snp_req_addr += GLOBAL_BLOCK_SIZE_BYTES;
|
||||
}
|
||||
if (!vortex_->snp_req_delay) {
|
||||
vortex_->snp_req = true;
|
||||
if (!vortex_->snp_req_full) {
|
||||
vortex_->snp_req_valid = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -362,7 +356,6 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
// this->send_snoops(mem_addr, size);
|
||||
// this->wait(PIPELINE_FLUSH_LATENCY);
|
||||
// #endif
|
||||
|
||||
}
|
||||
|
||||
bool Simulator::run() {
|
||||
@@ -381,7 +374,7 @@ bool Simulator::run() {
|
||||
int status = 0;
|
||||
#else
|
||||
// check riscv-tests PASSED/FAILED status
|
||||
int status = (int)vortex_->Vortex->vx_back_end->VX_wb->last_data_wb & 0xf;
|
||||
int status = (int)vortex_->Vortex->vx_back_end->vx_wb->last_data_wb & 0xf;
|
||||
#endif
|
||||
|
||||
return (status == 1);
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
int data_length;
|
||||
unsigned base_addr;
|
||||
unsigned *data;
|
||||
} dram_req_t;
|
||||
|
||||
@@ -12,7 +12,7 @@ int main(int argc, char **argv)
|
||||
|
||||
Verilated::commandArgs(argc, argv);
|
||||
|
||||
//#define ALL_TESTS
|
||||
#define ALL_TESTS
|
||||
#ifdef ALL_TESTS
|
||||
bool passed = true;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user