add fpnew

This commit is contained in:
Blaise Tine
2020-07-23 06:23:05 -04:00
46 changed files with 867 additions and 341 deletions

View File

@@ -16,12 +16,12 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO DBG_FLAGS += -DDBG_CORE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
DEBUG=1 #DEBUG=1
#AFU=1 #AFU=1
CFLAGS += -fPIC CFLAGS += -fPIC

View File

@@ -4,13 +4,16 @@ FPGA_BUILD_DIR=build_fpga
all: ase-1c all: ase-1c
ase-1c: setup-ase-1c sources.txt:
./gen_sources.sh
ase-1c: setup-ase-1c sources.txt
make -C $(ASE_BUILD_DIR)_1c make -C $(ASE_BUILD_DIR)_1c
ase-2c: setup-ase-2c ase-2c: setup-ase-2c sources.txt
make -C $(ASE_BUILD_DIR)_2c make -C $(ASE_BUILD_DIR)_2c
ase-4c: setup-ase-4c ase-4c: setup-ase-4c sources.txt
make -C $(ASE_BUILD_DIR)_4c make -C $(ASE_BUILD_DIR)_4c
setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile
@@ -28,13 +31,13 @@ $(ASE_BUILD_DIR)_2c/Makefile:
$(ASE_BUILD_DIR)_4c/Makefile: $(ASE_BUILD_DIR)_4c/Makefile:
afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c
fpga-1c: setup-fpga-1c fpga-1c: setup-fpga-1c sources.txt
cd $(FPGA_BUILD_DIR)_1c && qsub-synth cd $(FPGA_BUILD_DIR)_1c && qsub-synth
fpga-2c: setup-fpga-2c fpga-2c: setup-fpga-2c sources.txt
cd $(FPGA_BUILD_DIR)_2c && qsub-synth cd $(FPGA_BUILD_DIR)_2c && qsub-synth
fpga-4c: setup-fpga-4c fpga-4c: setup-fpga-4c sources.txt
cd $(FPGA_BUILD_DIR)_4c && qsub-synth cd $(FPGA_BUILD_DIR)_4c && qsub-synth
setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf

View File

@@ -60,8 +60,8 @@ qsub-sim
make ase make ase
# tests # tests
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic ./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo ./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd ./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
# modify "vsim_run.tcl" to dump VCD trace # modify "vsim_run.tcl" to dump VCD trace

21
hw/opae/gen_sources.sh Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl'
inc_list=""
for dir in $dir_list; do
inc_list="$inc_list -I$dir"
done
echo "inc_list=$inc_list"
{
# read design sources
for dir in $dir_list; do
echo "+incdir+$dir"
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
do
echo $file
done
done
} > sources.txt

View File

@@ -1,46 +1,34 @@
vortex_afu.json
QI:vortex_afu.qsf
#+define+SCOPE
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE
#+define+DBG_PRINT_SCOPE
+incdir+.
+incdir+../rtl
+incdir+../rtl/interfaces
+incdir+../rtl/pipe_regs
+incdir+../rtl/cache
+incdir+../rtl/libs +incdir+../rtl/libs
../rtl/libs/VX_countones.v
../rtl/VX_user_config.vh ../rtl/libs/VX_divide.v
../rtl/VX_config.vh ../rtl/libs/VX_fair_arbiter.v
../rtl/VX_define.vh ../rtl/libs/VX_fixed_arbiter.v
../rtl/libs/VX_generic_queue.v
../rtl/cache/VX_cache_config.vh ../rtl/libs/VX_generic_register.v
../rtl/cache/VX_cache.v ../rtl/libs/VX_generic_stack.v
../rtl/cache/VX_cache_core_rsp_merge.v ../rtl/libs/VX_index_queue.v
../rtl/cache/VX_cache_core_req_bank_sel.v ../rtl/libs/VX_matrix_arbiter.v
../rtl/cache/VX_cache_dram_req_arb.v ../rtl/libs/VX_mult.v
../rtl/cache/VX_cache_dram_fill_arb.v ../rtl/libs/VX_priority_encoder.v
../rtl/cache/VX_cache_miss_resrv.v ../rtl/libs/VX_rr_arbiter.v
../rtl/libs/VX_onehot_encooder.v
+incdir+../rtl/cache
../rtl/cache/VX_bank.v ../rtl/cache/VX_bank.v
../rtl/cache/VX_bank_core_req_arb.v ../rtl/cache/VX_bank_core_req_arb.v
../rtl/cache/VX_cache.v
../rtl/cache/VX_cache_core_req_bank_sel.v
../rtl/cache/VX_cache_core_rsp_merge.v
../rtl/cache/VX_cache_dram_fill_arb.v
../rtl/cache/VX_cache_dram_req_arb.v
../rtl/cache/VX_cache_miss_resrv.v
../rtl/cache/VX_prefetcher.v
../rtl/cache/VX_snp_forwarder.v
../rtl/cache/VX_snp_rsp_arb.v ../rtl/cache/VX_snp_rsp_arb.v
../rtl/cache/VX_tag_data_access.v ../rtl/cache/VX_tag_data_access.v
../rtl/cache/VX_tag_data_structure.v ../rtl/cache/VX_tag_data_structure.v
../rtl/cache/VX_snp_forwarder.v +incdir+../rtl/interfaces
../rtl/cache/VX_prefetcher.v ../rtl/interfaces/VX_alu_req_if.v
../rtl/interfaces/VX_branch_ctl_if.v
../rtl/interfaces/VX_branch_rsp_if.v
../rtl/interfaces/VX_cache_core_req_if.v ../rtl/interfaces/VX_cache_core_req_if.v
../rtl/interfaces/VX_cache_core_rsp_if.v ../rtl/interfaces/VX_cache_core_rsp_if.v
../rtl/interfaces/VX_cache_dram_req_if.v ../rtl/interfaces/VX_cache_dram_req_if.v
@@ -48,65 +36,46 @@ QI:vortex_afu.qsf
../rtl/interfaces/VX_cache_snp_req_if.v ../rtl/interfaces/VX_cache_snp_req_if.v
../rtl/interfaces/VX_cache_snp_rsp_if.v ../rtl/interfaces/VX_cache_snp_rsp_if.v
../rtl/interfaces/VX_csr_req_if.v ../rtl/interfaces/VX_csr_req_if.v
../rtl/interfaces/VX_commit_if.v
../rtl/interfaces/VX_csr_io_req_if.v ../rtl/interfaces/VX_csr_io_req_if.v
../rtl/interfaces/VX_csr_io_rsp_if.v ../rtl/interfaces/VX_decode_if.v
../rtl/interfaces/VX_exec_unit_req_if.v ../rtl/interfaces/VX_gpr_data_if.v
../rtl/interfaces/VX_backend_req_if.v ../rtl/interfaces/VX_gpu_req_if.v
../rtl/interfaces/VX_gpr_read_if.v
../rtl/interfaces/VX_gpu_inst_req_if.v
../rtl/interfaces/VX_inst_meta_if.v
../rtl/interfaces/VX_jal_rsp_if.v
../rtl/interfaces/VX_join_if.v ../rtl/interfaces/VX_join_if.v
../rtl/interfaces/VX_lsu_req_if.v ../rtl/interfaces/VX_lsu_req_if.v
../rtl/interfaces/VX_warp_ctl_if.v ../rtl/interfaces/VX_warp_ctl_if.v
../rtl/interfaces/VX_wb_if.v ../rtl/interfaces/VX_wb_if.v
../rtl/interfaces/VX_wstall_if.v ../rtl/interfaces/VX_wstall_if.v
../rtl/interfaces/VX_csr_io_rsp_if.v
../rtl/libs/VX_generic_register.v ../rtl/interfaces/VX_ifetch_req_if.v
../rtl/libs/VX_mult.v ../rtl/interfaces/VX_ifetch_rsp_if.v
../rtl/libs/VX_divide.v ../rtl/interfaces/VX_mul_req_if.v
../rtl/libs/VX_generic_stack.v ../rtl/interfaces/VX_perf_cntrs_if.v
../rtl/libs/VX_priority_encoder.v +incdir+../rtl
../rtl/libs/VX_generic_queue.v ../rtl/VX_alu_unit.v
../rtl/libs/VX_indexable_queue.v ../rtl/VX_commit.v
../rtl/libs/VX_fair_arbiter.v
../rtl/libs/VX_fixed_arbiter.v
../rtl/libs/VX_rr_arbiter.v
../rtl/libs/VX_countones.v
../rtl/libs/VX_scope.v
../rtl/Vortex.v
../rtl/VX_cluster.v ../rtl/VX_cluster.v
../rtl/VX_core.v ../rtl/VX_core.v
../rtl/VX_mem_unit.v
../rtl/VX_pipeline.v
../rtl/VX_front_end.v
../rtl/VX_back_end.v
../rtl/VX_fetch.v
../rtl/VX_scheduler.v
../rtl/VX_exec_unit.v
../rtl/VX_warp.v
../rtl/VX_icache_stage.v
../rtl/VX_gpr_wrapper.v
../rtl/VX_gpu_inst.v
../rtl/VX_writeback.v
../rtl/VX_csr_pipe.v
../rtl/VX_csr_data.v ../rtl/VX_csr_data.v
../rtl/VX_csr_arb.v ../rtl/VX_csr_arb.v
../rtl/VX_dcache_arb.v
../rtl/VX_decode.v
../rtl/VX_csr_io_arb.v ../rtl/VX_csr_io_arb.v
../rtl/VX_warp_sched.v ../rtl/VX_fetch.v
../rtl/VX_csr_unit.v
../rtl/VX_gpr_ram.v ../rtl/VX_gpr_ram.v
../rtl/VX_gpr_stage.v ../rtl/VX_gpr_stage.v
../rtl/VX_alu_unit.v ../rtl/VX_execute.v
../rtl/VX_gpu_unit.v
../rtl/VX_icache_stage.v
../rtl/VX_issue.v
../rtl/VX_lsu_unit.v ../rtl/VX_lsu_unit.v
../rtl/VX_decode.v
../rtl/VX_inst_multiplex.v
../rtl/VX_dcache_arb.v
../rtl/VX_mem_arb.v ../rtl/VX_mem_arb.v
../rtl/VX_f_d_reg.v ../rtl/VX_mem_unit.v
../rtl/VX_i_d_reg.v ../rtl/VX_pipeline.v
../rtl/VX_d_e_reg.v ../rtl/VX_scheduler.v
../rtl/VX_issue_mux.v
ccip_interface_reg.sv ../rtl/VX_warp_sched.v
ccip_std_afu.sv ../rtl/VX_writeback.v
vortex_afu.sv ../rtl/Vortex.v
../rtl/VX_mul_unit.v

View File

@@ -1,3 +1,21 @@
+define+NUM_CORES=1 +define+NUM_CORES=1
#+define+SCOPE
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE
#+define+DBG_PRINT_SCOPE
vortex_afu.json
QI:vortex_afu.qsf
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
C:sources.txt C:sources.txt

View File

@@ -1,4 +1,10 @@
+define+NUM_CORES=2 +define+NUM_CORES=2
+define+L2_ENABLE=0 +define+L2_ENABLE=0
vortex_afu.json
QI:vortex_afu.qsf
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
C:sources.txt C:sources.txt

View File

@@ -1,4 +1,10 @@
+define+NUM_CORES=4 +define+NUM_CORES=4
+define+L2_ENABLE=0 +define+L2_ENABLE=0
vortex_afu.json
QI:vortex_afu.qsf
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
C:sources.txt C:sources.txt

View File

@@ -1,4 +1,6 @@
`include "VX_define.vh" `include "VX_define.vh"
`include "fpnew_pkg.sv"
`include "defs_div_sqrt_mvp.sv"
module VX_alu_unit #( module VX_alu_unit #(
parameter CORE_ID = 0 parameter CORE_ID = 0
@@ -13,7 +15,7 @@ module VX_alu_unit #(
VX_branch_ctl_if branch_ctl_if, VX_branch_ctl_if branch_ctl_if,
VX_commit_if alu_commit_if VX_commit_if alu_commit_if
); );
wire [`NUM_THREADS-1:0][31:0] alu_result; reg [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][32:0] sub_result; wire [`NUM_THREADS-1:0][32:0] sub_result;
wire [`NUM_THREADS-1:0][32:0] shift_result; wire [`NUM_THREADS-1:0][32:0] shift_result;
@@ -99,7 +101,7 @@ module VX_alu_unit #(
); );
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)), .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
) alu_reg ( ) alu_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -11,6 +11,7 @@ module VX_commit #(
VX_commit_if lsu_commit_if, VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if, VX_commit_if mul_commit_if,
VX_commit_if csr_commit_if, VX_commit_if csr_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if, VX_commit_if gpu_commit_if,
// outputs // outputs
@@ -21,8 +22,9 @@ module VX_commit #(
wire [`NUM_EXS-1:0] commited_mask; wire [`NUM_EXS-1:0] commited_mask;
assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready), assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready),
((| lsu_commit_if.valid) && lsu_commit_if.ready), ((| lsu_commit_if.valid) && lsu_commit_if.ready),
((| mul_commit_if.valid) && mul_commit_if.ready),
((| csr_commit_if.valid) && csr_commit_if.ready), ((| csr_commit_if.valid) && csr_commit_if.ready),
((| mul_commit_if.valid) && mul_commit_if.ready),
((| fpu_commit_if.valid) && fpu_commit_if.ready),
((| gpu_commit_if.valid) && gpu_commit_if.ready)}; ((| gpu_commit_if.valid) && gpu_commit_if.ready)};
wire [`NE_BITS:0] num_commits; wire [`NE_BITS:0] num_commits;
@@ -65,6 +67,7 @@ module VX_commit #(
.lsu_commit_if (lsu_commit_if), .lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if), .csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if), .mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.writeback_if (writeback_if) .writeback_if (writeback_if)
); );
@@ -77,11 +80,14 @@ module VX_commit #(
if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data); $display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
end end
if ((| csr_commit_if.valid) && csr_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
end
if ((| mul_commit_if.valid) && mul_commit_if.ready) begin if ((| mul_commit_if.valid) && mul_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data); $display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
end end
if ((| csr_commit_if.valid) && csr_commit_if.ready) begin if ((| fpu_commit_if.valid) && fpu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data); $display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.warp_num, fpu_commit_if.curr_PC, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
end end
if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data); $display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);

View File

@@ -15,40 +15,41 @@ module VX_csr_arb (
VX_commit_if csr_rsp_if, VX_commit_if csr_rsp_if,
// outputs // outputs
VX_commit_if csr_commit_if,
VX_csr_io_rsp_if csr_io_rsp_if, VX_csr_io_rsp_if csr_io_rsp_if,
VX_commit_if csr_commit_if
input wire select_io_req,
input wire select_io_rsp
); );
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
wire core_select = ~(| csr_io_req_if.valid);
// requests // requests
assign csr_req_if.valid = core_select ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}}; assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
assign csr_req_if.warp_num = core_select ? csr_core_req_if.warp_num : 0; assign csr_req_if.warp_num = (~select_io_req) ? csr_core_req_if.warp_num : 0;
assign csr_req_if.curr_PC = core_select ? csr_core_req_if.curr_PC : 0; assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
assign csr_req_if.csr_op = core_select ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS); assign csr_req_if.csr_op = (~select_io_req) ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_req_if.csr_addr = core_select ? csr_core_req_if.csr_addr : csr_io_req_if.addr; assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign csr_req_if.csr_mask = core_select ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0); assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_req_if.rd = core_select ? csr_core_req_if.rd : 0; assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
assign csr_req_if.wb = core_select ? csr_core_req_if.wb : 0; assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
assign csr_req_if.is_io = ~core_select; assign csr_req_if.is_io = select_io_req;
assign csr_core_req_if.ready = csr_req_if.ready && core_select; assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req);
assign csr_io_req_if.ready = csr_req_if.ready && ~core_select; assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
// responses // responses
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & csr_rsp_if.is_io; assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & select_io_rsp;
assign csr_io_rsp_if.data = csr_rsp_if.data[0]; assign csr_io_rsp_if.data = csr_rsp_if.data[0];
assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}}; assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~select_io_rsp}};
assign csr_commit_if.warp_num = csr_rsp_if.warp_num; assign csr_commit_if.warp_num = csr_rsp_if.warp_num;
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC; assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
assign csr_commit_if.data = csr_rsp_if.data; assign csr_commit_if.data = csr_rsp_if.data;
assign csr_commit_if.rd = csr_rsp_if.rd; assign csr_commit_if.rd = csr_rsp_if.rd;
assign csr_commit_if.wb = csr_rsp_if.wb; assign csr_commit_if.wb = csr_rsp_if.wb;
assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_commit_if.ready; assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
endmodule endmodule

View File

@@ -7,6 +7,7 @@ module VX_csr_unit #(
input wire reset, input wire reset,
VX_perf_cntrs_if perf_cntrs_if, VX_perf_cntrs_if perf_cntrs_if,
VX_fpu_to_csr_if fpu_to_csr_if,
VX_csr_io_req_if csr_io_req_if, VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if, VX_csr_io_rsp_if csr_io_rsp_if,
@@ -17,15 +18,23 @@ module VX_csr_unit #(
VX_csr_req_if csr_pipe_req_if(); VX_csr_req_if csr_pipe_req_if();
VX_commit_if csr_pipe_commit_if(); VX_commit_if csr_pipe_commit_if();
wire select_io_req = (| csr_io_req_if.valid);
wire select_io_rsp;
VX_csr_arb csr_arb ( VX_csr_arb csr_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.csr_core_req_if (csr_req_if), .csr_core_req_if (csr_req_if),
.csr_io_req_if (csr_io_req_if), .csr_io_req_if (csr_io_req_if),
.csr_req_if (csr_pipe_req_if), .csr_req_if (csr_pipe_req_if),
.csr_rsp_if (csr_pipe_commit_if), .csr_rsp_if (csr_pipe_commit_if),
.csr_io_rsp_if (csr_io_rsp_if), .csr_io_rsp_if (csr_io_rsp_if),
.csr_commit_if (csr_commit_if) .csr_commit_if (csr_commit_if),
.select_io_req (select_io_req),
.select_io_rsp (select_io_rsp)
); );
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2; wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
@@ -68,14 +77,14 @@ module VX_csr_unit #(
wire stall = ~csr_pipe_commit_if.ready && (| csr_pipe_commit_if.valid); wire stall = ~csr_pipe_commit_if.ready && (| csr_pipe_commit_if.valid);
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32) .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + `CSR_ADDR_SIZE + 1 + 32 + 32)
) csr_reg ( ) csr_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (0), .flush (0),
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}), .in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, csr_pipe_commit_if.is_io, csr_read_data_s2, csr_updated_data_s2}) .out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, select_io_rsp, csr_read_data_s2, csr_updated_data_s2})
); );
genvar i; genvar i;

View File

@@ -20,9 +20,10 @@ module VX_decode #(
reg [`ALU_BITS-1:0] alu_op; reg [`ALU_BITS-1:0] alu_op;
reg [`BR_BITS-1:0] br_op; reg [`BR_BITS-1:0] br_op;
reg [`MUL_BITS-1:0] mul_op;
wire [`LSU_BITS-1:0] lsu_op; wire [`LSU_BITS-1:0] lsu_op;
reg [`CSR_BITS-1:0] csr_op; reg [`CSR_BITS-1:0] csr_op;
reg [`MUL_BITS-1:0] mul_op;
reg [`FPU_BITS-1:0] fpu_op;
reg [`GPU_BITS-1:0] gpu_op; reg [`GPU_BITS-1:0] gpu_op;
reg [19:0] upper_imm; reg [19:0] upper_imm;
@@ -37,6 +38,7 @@ module VX_decode #(
wire [`NR_BITS-1:0] rd = instr[11:7]; wire [`NR_BITS-1:0] rd = instr[11:7];
wire [`NR_BITS-1:0] rs1 = instr[19:15]; wire [`NR_BITS-1:0] rs1 = instr[19:15];
wire [`NR_BITS-1:0] rs2 = instr[24:20]; wire [`NR_BITS-1:0] rs2 = instr[24:20];
wire [`NR_BITS-1:0] rs3 = instr[31:27];
// opcode types // opcode types
wire is_rtype = (opcode == `INST_R); wire is_rtype = (opcode == `INST_R);
@@ -51,10 +53,9 @@ module VX_decode #(
wire is_jals = (opcode == `INST_SYS) && (func3 == 0); wire is_jals = (opcode == `INST_SYS) && (func3 == 0);
wire is_csr = (opcode == `INST_SYS) && (func3 != 0); wire is_csr = (opcode == `INST_SYS) && (func3 != 0);
wire is_gpu = (opcode == `INST_GPU); wire is_gpu = (opcode == `INST_GPU);
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
wire is_mul = is_rtype && (func7 == 7'h1);
// upper immediate // upper immediate
always @(*) begin always @(*) begin
case (opcode) case (opcode)
`INST_LUI: upper_imm = {func7, rs2, rs1, func3}; `INST_LUI: upper_imm = {func7, rs2, rs1, func3};
@@ -63,20 +64,8 @@ module VX_decode #(
endcase endcase
end end
// JAL
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
wire [11:0] jalr_imm = {func7, rs2};
wire [31:0] jalr_offset = {{20{jalr_imm[11]}}, jalr_imm};
always @(*) begin
case (opcode)
`INST_JAL: jalx_offset = jal_offset;
`INST_JALR: jalx_offset = jalr_offset;
default: jalx_offset = 32'd4;
endcase
end
// I-type immediate // I-type immediate
wire alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5); wire alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
wire [11:0] alu_shift_imm = {{7{1'b0}}, rs2}; wire [11:0] alu_shift_imm = {{7{1'b0}}, rs2};
wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12; wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12;
@@ -90,7 +79,24 @@ module VX_decode #(
endcase endcase
end end
// JAL
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
wire [11:0] jalr_imm = {func7, rs2};
wire [31:0] jalr_offset = {{20{jalr_imm[11]}}, jalr_imm};
always @(*) begin
case (opcode)
`INST_JAL: jalx_offset = jal_offset;
`INST_JALR: jalx_offset = jalr_offset;
default: jalx_offset = 32'd4;
endcase
end
// BRANCH // BRANCH
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
always @(*) begin always @(*) begin
br_op = `BR_EQ; br_op = `BR_EQ;
case (opcode) case (opcode)
@@ -119,6 +125,7 @@ module VX_decode #(
end end
// ALU // ALU
always @(*) begin always @(*) begin
alu_op = `ALU_OTHER; alu_op = `ALU_OTHER;
if (is_lui) begin if (is_lui) begin
@@ -140,7 +147,29 @@ module VX_decode #(
end end
end end
// LSU
wire is_lsu = (is_ltype || is_stype);
assign lsu_op = {is_stype, func3};
// CSR
wire is_csr_imm = is_csr && (func3[2] == 1);
always @(*) begin
csr_op = `CSR_OTHER;
case (func3[1:0])
2'h1: csr_op = `CSR_RW;
2'h2: csr_op = `CSR_RS;
2'h3: csr_op = `CSR_RC;
default:;
endcase
end
// MUL // MUL
wire is_mul = is_rtype && (func7 == 7'h1);
always @(*) begin always @(*) begin
mul_op = `MUL_MUL; mul_op = `MUL_MUL;
case (func3) case (func3)
@@ -156,23 +185,50 @@ module VX_decode #(
endcase endcase
end end
// LSU // FPU
wire is_lsu = (is_ltype || is_stype);
assign lsu_op = {is_stype, func3}; wire is_fl = (opcode == `INST_FL) && ((func3 == 2));
wire is_fs = (opcode == `INST_FS) && ((func3 == 2));
wire is_fci = (opcode == `INST_FCI);
wire is_fmadd = (opcode == `INST_FMADD);
wire is_fmsub = (opcode == `INST_FMSUB);
wire is_fnmsub = (opcode == `INST_FNMSUB);
wire is_fnmadd = (opcode == `INST_FNMADD);
wire is_fr4 = is_fmadd || is_fmsub || is_fnmsub || is_fnmadd;
wire is_fpu = (is_fl || is_fs || is_fci || is_fr4);
// CSR
wire is_csr_imm = is_csr && (func3[2] == 1);
always @(*) begin always @(*) begin
csr_op = `CSR_OTHER; fpu_op = `FPU_OTHER;
case (func3[1:0]) if (is_fr4) begin
2'h1: csr_op = `CSR_RW; case ({is_fmadd, is_fmsub, is_fnmsub, is_fnmadd})
2'h2: csr_op = `CSR_RS; 4'b1000: fpu_op = `FPU_MADD;
2'h3: csr_op = `CSR_RC; 4'b0100: fpu_op = `FPU_MSUB;
default:; 4'b0010: fpu_op = `FPU_NMSUB;
endcase 4'b0001: fpu_op = `FPU_NMADD;
default:;
endcase
end
else begin
case (func7)
7'h00: fpu_op = `FPU_ADD;
7'h04: fpu_op = `FPU_SUB;
7'h08: fpu_op = `FPU_MUL;
7'h0C: fpu_op = `FPU_DIV;
7'h2C: fpu_op = `FPU_SQRT;
7'h14: fpu_op = (func3 == 3'h0) ? `FPU_MIN : `FPU_MAX;
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
7'h70: fpu_op = (func3 == 3'h0) ? `FPU_MVXW : `FPU_CLASS; // both wb to intReg
7'h78: fpu_op = `FPU_MVWX;
7'h50: fpu_op = `FPU_CMP; // wb to intReg
7'h10: fpu_op = (func3[1]) ? `FPU_SGNJX : ((func3[0]) ? `FPU_SGNJN : `FPU_SGNJ);
default:;
endcase
end
end end
// GPU // GPU
always @(*) begin always @(*) begin
gpu_op = `GPU_OTHER; gpu_op = `GPU_OTHER;
case (func3) case (func3)
@@ -195,23 +251,23 @@ module VX_decode #(
assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU : assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU :
is_csr ? `EX_CSR : is_csr ? `EX_CSR :
is_mul ? `EX_MUL : is_mul ? `EX_MUL :
is_gpu ? `EX_GPU : is_fpu ? `EX_FPU :
is_br ? `EX_ALU : is_gpu ? `EX_GPU :
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU : is_br ? `EX_ALU :
`EX_NOP; (is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
`EX_NOP;
assign decode_tmp_if.instr_op = is_lsu ? `OP_BITS'(lsu_op) : assign decode_tmp_if.instr_op = is_lsu ? `OP_BITS'(lsu_op) :
is_csr ? `OP_BITS'(csr_op) : is_csr ? `OP_BITS'(csr_op) :
is_mul ? `OP_BITS'(mul_op) : is_mul ? `OP_BITS'(mul_op) :
is_gpu ? `OP_BITS'(gpu_op) : is_fpu ? `OP_BITS'(fpu_op) :
is_br ? `OP_BITS'({1'b1, br_op}) : is_gpu ? `OP_BITS'(gpu_op) :
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) : is_br ? `OP_BITS'({1'b1, br_op}) :
0; (is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
0;
assign decode_tmp_if.rd = rd; assign decode_tmp_if.rd = rd;
assign decode_tmp_if.rs1 = is_lui ? `NR_BITS'(0) : rs1; assign decode_tmp_if.rs1 = is_lui ? `NR_BITS'(0) : rs1;
assign decode_tmp_if.rs2 = rs2; assign decode_tmp_if.rs2 = rs2;
assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} : assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
@@ -220,7 +276,6 @@ module VX_decode #(
src2_imm; src2_imm;
assign decode_tmp_if.rs1_is_PC = is_auipc; assign decode_tmp_if.rs1_is_PC = is_auipc;
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm; assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
assign decode_tmp_if.use_rs1 = (decode_tmp_if.rs1 != 0) assign decode_tmp_if.use_rs1 = (decode_tmp_if.rs1 != 0)
@@ -229,11 +284,14 @@ module VX_decode #(
assign decode_tmp_if.use_rs2 = (decode_tmp_if.rs2 != 0) assign decode_tmp_if.use_rs2 = (decode_tmp_if.rs2 != 0)
&& (is_btype || is_stype || is_rtype || (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN))); && (is_btype || is_stype || is_rtype || (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN)));
assign decode_tmp_if.wb = (rd == 0) ? `WB_NO : // disable writeback to r0 assign decode_tmp_if.rs1_is_fp = (is_fci && ((func7 != 7'h68) && (fpu_op != `FPU_MVWX)) || is_fr4);
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU : assign decode_tmp_if.rs2_is_fp = is_fs || (is_fci && ((func7 != 7'h60) && (func7 != 7'h68)) || is_fr4);
(is_jal || is_jalr || is_jals) ? `WB_JAL : assign decode_tmp_if.rs3 = rs3;
is_ltype ? `WB_MEM : assign decode_tmp_if.use_rs3 = is_fr4;
`WB_NO; assign decode_tmp_if.frm = func3;
assign decode_tmp_if.wb = (is_fpu && (is_fl || (is_fci && ((func7 != 7'h50) || (func7 != 7'h70) || (func7 != 7'h60))) || is_fr4))
|| (~is_fpu && (rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN); assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN);
assign join_if.warp_num = ifetch_rsp_if.warp_num; assign join_if.warp_num = ifetch_rsp_if.warp_num;
@@ -244,14 +302,14 @@ module VX_decode #(
wire stall = ~decode_if.ready && (| decode_if.valid); wire stall = ~decode_if.ready && (| decode_if.valid);
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS) .N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + 1 + `FRM_BITS)
) decode_reg ( ) decode_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (0), .flush (0),
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb}), .in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.frm}),
.out ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb}) .out ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm})
); );
assign ifetch_rsp_if.ready = ~stall; assign ifetch_rsp_if.ready = ~stall;
@@ -263,9 +321,7 @@ module VX_decode #(
print_ex_type(decode_tmp_if.ex_type); print_ex_type(decode_tmp_if.ex_type);
$write(", op="); $write(", op=");
print_instr_op(decode_tmp_if.ex_type, decode_tmp_if.instr_op); print_instr_op(decode_tmp_if.ex_type, decode_tmp_if.instr_op);
$write(", wb="); $write(", wb=%b, rd=%0d, rs1=%0d, rs2=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b\n", decode_tmp_if.wb, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2);
print_wb(decode_tmp_if.wb);
$write(", rd=%0d, rs1=%0d, rs2=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b\n", decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2);
// trap unsupported instructions // trap unsupported instructions
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER)); assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER));

View File

@@ -76,7 +76,7 @@
`define CSR_WIDTH 12 `define CSR_WIDTH 12
`define DIV_LATENCY 2 `define DIV_LATENCY 21
`define MUL_LATENCY 2 `define MUL_LATENCY 2
@@ -390,6 +390,8 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
task print_ex_type; task print_ex_type;
input [`EX_BITS-1:0] ex; input [`EX_BITS-1:0] ex;
begin begin

View File

@@ -25,6 +25,7 @@ module VX_execute #(
VX_lsu_req_if lsu_req_if, VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if, VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if, VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if, VX_gpu_req_if gpu_req_if,
// outputs // outputs
@@ -34,10 +35,13 @@ module VX_execute #(
VX_commit_if lsu_commit_if, VX_commit_if lsu_commit_if,
VX_commit_if csr_commit_if, VX_commit_if csr_commit_if,
VX_commit_if mul_commit_if, VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if, VX_commit_if gpu_commit_if,
output wire ebreak output wire ebreak
); );
VX_fpu_to_csr_if fpu_to_csr_if();
VX_fpu_from_csr_if fpu_from_csr_if();
VX_alu_unit #( VX_alu_unit #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
@@ -67,6 +71,7 @@ module VX_execute #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.perf_cntrs_if (perf_cntrs_if), .perf_cntrs_if (perf_cntrs_if),
.fpu_to_csr_if (fpu_to_csr_if),
.csr_io_req_if (csr_io_req_if), .csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if), .csr_io_rsp_if (csr_io_rsp_if),
.csr_req_if (csr_req_if), .csr_req_if (csr_req_if),
@@ -82,6 +87,17 @@ module VX_execute #(
.mul_commit_if (mul_commit_if) .mul_commit_if (mul_commit_if)
); );
VX_fpu_unit #(
.CORE_ID(CORE_ID)
) fpu_unit (
.clk (clk),
.reset (reset),
.fpu_req_if (fpu_req_if),
.fpu_from_csr_if(fpu_from_csr_if),
.fpu_to_csr_if (fpu_to_csr_if),
.fpu_commit_if (fpu_commit_if)
);
VX_gpu_unit #( VX_gpu_unit #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) gpu_unit ( ) gpu_unit (

140
hw/rtl/VX_fpu_unit.v Normal file
View File

@@ -0,0 +1,140 @@
`include "VX_define.vh"
module VX_fpu_unit #(
parameter CORE_ID = 0
) (
// inputs
input wire clk,
input wire reset,
// inputs
VX_fpu_req_if fpu_req_if,
VX_fpu_from_csr_if fpu_from_csr_if,
// outputs
VX_commit_if fpu_commit_if,
VX_fpu_to_csr_if fpu_to_csr_if
);
localparam FOP_BITS = fpnew_pkg::OP_BITS;
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
localparam int FPU_DPATHW = `NUM_THREADS * 32;
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: FPU_DPATHW,
EnableVectors: 1,
EnableNanBox: 1,
FpFmtMask: 5'b10000,
IntFmtMask: 4'b0010
};
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs:'{'{`LATENCY_FMULADD, 0, 0, 0, 0}, // ADDMUL
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
'{default: `LATENCY_FNONCOMP}, // NONCOMP
'{default: `LATENCY_FCONV}}, // CONV
UnitTypes:'{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
wire fpu_in_ready;
wire fpu_in_valid;
wire fpu_out_ready;
wire fpu_out_valid;
wire [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
assign fpu_in_valid = (| fpu_req_if.valid);
assign fpu_operands[0] = fpu_req_if.rs1_data;
assign fpu_operands[1] = fpu_req_if.rs2_data;
assign fpu_operands[2] = fpu_req_if.rs3_data;
assign fpu_req_if.ready = fpu_in_ready;
wire [`NUM_THREADS-1:0][31:0] fpu_result;
fpnew_pkg::status_t fpu_status;
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
always @(*) begin
fpu_op = fpnew_pkg::SGNJ;
fpu_op_mod = 0;
fpu_rnd = fpu_req_if.frm;
case (fpu_req_if.fpu_op)
`FPU_ADD: fpu_op = fpnew_pkg::ADD;
`FPU_SUB: begin fpu_op = fpnew_pkg::ADD; fpu_op_mod = 1; end
`FPU_MUL: fpu_op = fpnew_pkg::MUL;
`FPU_DIV: fpu_op = fpnew_pkg::DIV;
`FPU_SQRT: fpu_op = fpnew_pkg::SQRT;
`FPU_MADD: fpu_op = fpnew_pkg::FMADD;
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: fpu_op = fpnew_pkg::FNMSUB;
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; end
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; end
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; end
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
`FPU_CVTWS: fpu_op = fpnew_pkg::F2I;
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::ADD; fpu_op_mod = 1; end
`FPU_CVTSW: fpu_op = fpnew_pkg::I2F;
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; end
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; end
`FPU_CLASS: fpu_op = fpnew_pkg::CLASSIFY;
`FPU_CMP: fpu_op = fpnew_pkg::CMP;
default:;
endcase
end
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic)
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i (fpu_operands),
.rnd_mode_i (fpu_rnd),
.op_i (fpu_op),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpu_src_fmt),
.dst_fmt_i (fpu_dst_fmt),
.int_fmt_i (fpu_int_fmt),
.vectorial_op_i (1'b1),
.tag_i (1'b0),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i (reset),
.result_o (fpu_result),
.status_o (fpu_status),
`UNUSED_PIN (tag_o),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
assign fpu_commit_if.valid = fpu_req_if.valid & {`NUM_THREADS{fpu_out_valid}};
assign fpu_commit_if.data = fpu_result;
assign fpu_commit_if.wb = fpu_req_if.wb;
assign fpu_commit_if.rd = fpu_req_if.rd;
assign fpu_out_ready = fpu_commit_if.ready;
assign fpu_to_csr_if.valid = fpu_out_valid;
assign fpu_to_csr_if.warp_num = fpu_req_if.warp_num;
assign fpu_to_csr_if.fflags_NV = fpu_status.NV;
assign fpu_to_csr_if.fflags_DZ = fpu_status.DZ;
assign fpu_to_csr_if.fflags_OF = fpu_status.OF;
assign fpu_to_csr_if.fflags_UF = fpu_status.UF;
assign fpu_to_csr_if.fflags_NX = fpu_status.NX;
endmodule

94
hw/rtl/VX_gpr_fp_ctrl.v Normal file
View File

@@ -0,0 +1,94 @@
`include "VX_define.vh"
// control module to support multi-cycle read for fp register
module VX_gpr_fp_ctrl (
input wire clk,
input wire reset,
VX_decode_if decode_if,
input wire [`NUM_THREADS-1:0][31:0] rs1_int_data,
input wire [`NUM_THREADS-1:0][31:0] rs2_int_data,
input wire [`NUM_THREADS-1:0][31:0] rs1_fp_data,
input wire [`NUM_THREADS-1:0][31:0] rs2_fp_data,
// outputs
output wire [`NR_BITS-1:0] raddr1,
output wire [`NR_BITS-1:0] raddr2,
VX_gpr_data_if gpr_data_if,
input wire schedule_delay,
output wire gpr_delay
);
// param
localparam GPR_DELAY_WID = 1;
reg [GPR_DELAY_WID-1:0] multi_cyc_state;
reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data;
reg [`NUM_THREADS-1:0][31:0] tmp_rs2_data;
reg [`NUM_THREADS-1:0][31:0] rs1_data;
reg [`NUM_THREADS-1:0][31:0] rs2_data;
reg [`NUM_THREADS-1:0][31:0] rs3_data;
always @(posedge clk) begin
if (reset) begin
multi_cyc_state <= 0;
end else if (!schedule_delay) begin
multi_cyc_state <= decode_if.use_rs3 && (multi_cyc_state == 0);
end else begin
multi_cyc_state <= 0;
end
end
// select rs1 data
always @(posedge clk) begin
if (reset) begin
tmp_rs1_data <= 0;
end else begin
if (decode_if.rs1_is_fp) begin
tmp_rs1_data <= rs1_fp_data;
end else begin
tmp_rs1_data <= decode_if.rs1_is_PC ? {`NUM_THREADS{decode_if.curr_PC}} : rs1_int_data;
end
end
end
// select rs2 data
always @(posedge clk) begin
if(reset) begin
tmp_rs2_data <= 0;
end else begin
if (decode_if.rs2_is_fp) begin
tmp_rs2_data <= rs2_fp_data;
end else begin
tmp_rs2_data <= decode_if.rs2_is_imm ? {`NUM_THREADS{decode_if.imm}} : rs2_int_data;
end
end
end
// outputs
assign gpr_delay = (multi_cyc_state == 0) && decode_if.use_rs3;
assign raddr1 = multi_cyc_state ? decode_if.rs3 : decode_if.rs1 ;
assign raddr2 = decode_if.rs2;
always @(*) begin
if (decode_if.use_rs3) begin
rs1_data = tmp_rs1_data;
rs2_data = tmp_rs2_data;
rs3_data = rs1_fp_data;
end else begin
rs1_data = decode_if.rs1_is_fp ? rs1_fp_data : rs1_int_data;
rs2_data = decode_if.rs2_is_fp ? rs2_fp_data : rs2_int_data;
rs3_data = {`NUM_THREADS{32'h8000_0000}}; // default value: -0 in single fp
end
end
assign gpr_data_if.rs1_data = rs1_data;
assign gpr_data_if.rs2_data = rs2_data;
assign gpr_data_if.rs3_data = rs3_data;
endmodule

View File

@@ -4,42 +4,76 @@ module VX_gpr_stage #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset,
// inputs // inputs
VX_wb_if writeback_if, VX_wb_if writeback_if,
VX_decode_if decode_if, VX_decode_if decode_if,
// outputs // outputs
VX_gpr_data_if gpr_data_if VX_gpr_data_if gpr_data_if,
input wire schedule_delay,
output wire gpr_delay
); );
wire [`NUM_THREADS-1:0][31:0] rs1_data_all [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0][31:0] rs2_data_all [`NUM_WARPS-1:0]; wire [`NUM_THREADS-1:0][31:0] rs1_int_data [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0][31:0] rs1_PC; wire [`NUM_THREADS-1:0][31:0] rs2_int_data [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0][31:0] rs2_imm; wire [`NUM_THREADS-1:0][31:0] rs1_fp_data [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0][31:0] rs2_fp_data [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0]; wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0];
wire [`NR_BITS-1:0] raddr1;
wire [`NR_BITS-1:0] raddr2;
genvar i; genvar i;
for (i = 0; i < `NUM_THREADS; i++) begin
assign rs1_PC[i] = decode_if.curr_PC;
assign rs2_imm[i] = decode_if.imm;
end
assign gpr_data_if.rs1_data = decode_if.rs1_is_PC ? rs1_PC : rs1_data_all[decode_if.warp_num];
assign gpr_data_if.rs2_data = decode_if.rs2_is_imm ? rs2_imm : rs2_data_all[decode_if.warp_num];
for (i = 0; i < `NUM_WARPS; i++) begin for (i = 0; i < `NUM_WARPS; i++) begin
assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}}; assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}};
VX_gpr_ram gpr_ram (
// Int GPRs
VX_gpr_ram gpr_int_ram (
.clk (clk), .clk (clk),
.we (we[i]), .we (we[i] & {`NUM_THREADS{~writeback_if.is_fp}}),
.waddr (writeback_if.rd), .waddr (writeback_if.rd),
.wdata (writeback_if.data), .wdata (writeback_if.data),
.rs1 (decode_if.rs1), .rs1 (raddr1),
.rs2 (decode_if.rs2), .rs2 (raddr2),
.rs1_data (rs1_data_all[i]), .rs1_data (rs1_int_data[i]),
.rs2_data (rs2_data_all[i]) .rs2_data (rs2_int_data[i])
); );
// FP GPRs
VX_gpr_ram gpr_fp_ram (
.clk (clk),
.we (we[i] & {`NUM_THREADS{writeback_if.is_fp}}),
.waddr (writeback_if.rd),
.wdata (writeback_if.data),
.rs1 (raddr1),
.rs2 (raddr2),
.rs1_data (rs1_fp_data[i]),
.rs2_data (rs2_fp_data[i])
);
// controller for multi-cycle read
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
.clk (clk),
.reset (reset),
//inputs
.decode_if (decode_if),
.rs1_int_data (rs1_int_data[i]),
.rs2_int_data (rs2_int_data[i]),
.rs1_fp_data (rs1_fp_data[i]),
.rs2_fp_data (rs2_fp_data[i]),
// outputs
.raddr1 (raddr1),
.raddr2 (raddr2),
.gpr_data_if (gpr_data_if),
.schedule_delay (schedule_delay),
.gpr_delay (gpr_delay)
);
end end
assign writeback_if.ready = 1'b1; assign writeback_if.ready = 1'b1;

View File

@@ -79,7 +79,7 @@ module VX_gpu_unit #(
assign gpu_commit_if.valid = gpu_req_if.valid; assign gpu_commit_if.valid = gpu_req_if.valid;
assign gpu_commit_if.warp_num = gpu_req_if.warp_num; assign gpu_commit_if.warp_num = gpu_req_if.warp_num;
assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC; assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC;
assign gpu_commit_if.wb = `WB_NO; assign gpu_commit_if.wb = 0;
assign gpu_commit_if.rd = 0; assign gpu_commit_if.rd = 0;
assign gpu_commit_if.data = 0; assign gpu_commit_if.data = 0;

View File

@@ -13,16 +13,19 @@ module VX_issue #(
VX_lsu_req_if lsu_req_if, VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if, VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if, VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if VX_gpu_req_if gpu_req_if
); );
VX_gpr_data_if gpr_data_if(); VX_gpr_data_if gpr_data_if();
wire schedule_delay; wire schedule_delay;
wire gpr_delay;
wire alu_busy = ~alu_req_if.ready/* && (| alu_req_if.valid)*/; wire alu_busy = ~alu_req_if.ready;
wire lsu_busy = ~lsu_req_if.ready/* && (| lsu_req_if.valid)*/; wire lsu_busy = ~lsu_req_if.ready;
wire csr_busy = ~csr_req_if.ready/* && (| csr_req_if.valid)*/; wire csr_busy = ~csr_req_if.ready;
wire mul_busy = ~mul_req_if.ready/* && (| mul_req_if.valid)*/; wire mul_busy = ~mul_req_if.ready;
wire gpu_busy = ~gpu_req_if.ready/* && (| gpu_req_if.valid)*/; wire fpu_busy = ~mul_req_if.ready;
wire gpu_busy = ~gpu_req_if.ready;
VX_scheduler #( VX_scheduler #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
@@ -31,10 +34,12 @@ module VX_issue #(
.reset (reset), .reset (reset),
.decode_if (decode_if), .decode_if (decode_if),
.writeback_if (writeback_if), .writeback_if (writeback_if),
.gpr_busy (gpr_delay),
.alu_busy (alu_busy), .alu_busy (alu_busy),
.lsu_busy (lsu_busy), .lsu_busy (lsu_busy),
.csr_busy (csr_busy), .csr_busy (csr_busy),
.mul_busy (mul_busy), .mul_busy (mul_busy),
.fpu_busy (fpu_busy),
.gpu_busy (gpu_busy), .gpu_busy (gpu_busy),
.schedule_delay (schedule_delay), .schedule_delay (schedule_delay),
`UNUSED_PIN (is_empty) `UNUSED_PIN (is_empty)
@@ -44,15 +49,19 @@ module VX_issue #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) gpr_stage ( ) gpr_stage (
.clk (clk), .clk (clk),
.reset (reset),
.decode_if (decode_if), .decode_if (decode_if),
.writeback_if (writeback_if), .writeback_if (writeback_if),
.gpr_data_if (gpr_data_if) .gpr_data_if (gpr_data_if),
.schedule_delay (schedule_delay),
.gpr_delay (gpr_delay)
); );
VX_alu_req_if alu_req_tmp_if(); VX_alu_req_if alu_req_tmp_if();
VX_lsu_req_if lsu_req_tmp_if(); VX_lsu_req_if lsu_req_tmp_if();
VX_csr_req_if csr_req_tmp_if(); VX_csr_req_if csr_req_tmp_if();
VX_mul_req_if mul_req_tmp_if(); VX_mul_req_if mul_req_tmp_if();
VX_fpu_req_if fpu_req_tmp_if();
VX_gpu_req_if gpu_req_tmp_if(); VX_gpu_req_if gpu_req_tmp_if();
VX_issue_mux issue_mux ( VX_issue_mux issue_mux (
@@ -62,6 +71,7 @@ module VX_issue #(
.lsu_req_if (lsu_req_tmp_if), .lsu_req_if (lsu_req_tmp_if),
.csr_req_if (csr_req_tmp_if), .csr_req_if (csr_req_tmp_if),
.mul_req_if (mul_req_tmp_if), .mul_req_if (mul_req_tmp_if),
.fpu_req_if (fpu_req_tmp_if),
.gpu_req_if (gpu_req_tmp_if) .gpu_req_if (gpu_req_tmp_if)
); );
@@ -69,16 +79,18 @@ module VX_issue #(
wire stall_lsu = ~lsu_req_if.ready || schedule_delay; wire stall_lsu = ~lsu_req_if.ready || schedule_delay;
wire stall_csr = ~csr_req_if.ready || schedule_delay; wire stall_csr = ~csr_req_if.ready || schedule_delay;
wire stall_mul = ~mul_req_if.ready || schedule_delay; wire stall_mul = ~mul_req_if.ready || schedule_delay;
wire stall_fpu = ~fpu_req_if.ready || schedule_delay;
wire stall_gpu = ~gpu_req_if.ready || schedule_delay; wire stall_gpu = ~gpu_req_if.ready || schedule_delay;
wire flush_alu = alu_req_if.ready && schedule_delay; wire flush_alu = alu_req_if.ready && schedule_delay;
wire flush_lsu = lsu_req_if.ready && schedule_delay; wire flush_lsu = lsu_req_if.ready && schedule_delay;
wire flush_csr = csr_req_if.ready && schedule_delay; wire flush_csr = csr_req_if.ready && schedule_delay;
wire flush_mul = mul_req_if.ready && schedule_delay; wire flush_mul = mul_req_if.ready && schedule_delay;
wire flush_fpu = fpu_req_if.ready && schedule_delay;
wire flush_gpu = gpu_req_if.ready && schedule_delay; wire flush_gpu = gpu_req_if.ready && schedule_delay;
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32) .N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
) alu_reg ( ) alu_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -89,7 +101,7 @@ module VX_issue #(
); );
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32) .N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
) lsu_reg ( ) lsu_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -100,7 +112,7 @@ module VX_issue #(
); );
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + `WB_BITS + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1) .N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + 1 + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
) csr_reg ( ) csr_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -110,8 +122,8 @@ module VX_issue #(
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.wb, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io}) .out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.wb, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
); );
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32)) .N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
) mul_reg ( ) mul_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -121,6 +133,17 @@ module VX_issue #(
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.wb, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data}) .out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.wb, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data})
); );
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
) fpu_reg (
.clk (clk),
.reset (reset),
.stall (stall_fpu),
.flush (flush_fpu),
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
);
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32) .N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
) gpu_reg ( ) gpu_reg (
@@ -140,6 +163,9 @@ module VX_issue #(
if ((| mul_req_tmp_if.valid) && ~stall_mul) begin if ((| mul_req_tmp_if.valid) && ~stall_mul) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data); $display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data);
end end
if ((| fpu_req_tmp_if.valid) && ~stall_fpu) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data);
end
if ((| lsu_req_tmp_if.valid) && ~stall_lsu) begin if ((| lsu_req_tmp_if.valid) && ~stall_lsu) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, rw=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset); $display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, rw=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset);
end end

View File

@@ -10,6 +10,7 @@ module VX_issue_mux (
VX_lsu_req_if lsu_req_if, VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if, VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if, VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if VX_gpu_req_if gpu_req_if
); );
@@ -17,6 +18,7 @@ module VX_issue_mux (
wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{decode_if.ex_type == `EX_LSU}}; wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{decode_if.ex_type == `EX_LSU}};
wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{decode_if.ex_type == `EX_CSR}}; wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{decode_if.ex_type == `EX_CSR}};
wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{decode_if.ex_type == `EX_MUL}}; wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{decode_if.ex_type == `EX_MUL}};
wire[`NUM_THREADS-1:0] is_fpu = {`NUM_THREADS{decode_if.ex_type == `EX_FPU}};
wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{decode_if.ex_type == `EX_GPU}}; wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{decode_if.ex_type == `EX_GPU}};
// ALU unit // ALU unit
@@ -64,6 +66,18 @@ module VX_issue_mux (
assign mul_req_if.rd = decode_if.rd; assign mul_req_if.rd = decode_if.rd;
assign mul_req_if.wb = decode_if.wb; assign mul_req_if.wb = decode_if.wb;
// FPU unit
assign fpu_req_if.valid = decode_if.valid & is_fpu;
assign fpu_req_if.warp_num = decode_if.warp_num;
assign fpu_req_if.curr_PC = decode_if.curr_PC;
assign fpu_req_if.fpu_op = `FPU_OP(decode_if.instr_op);
assign fpu_req_if.rs1_data = gpr_data_if.rs1_data;
assign fpu_req_if.rs2_data = gpr_data_if.rs2_data;
assign fpu_req_if.rs3_data = gpr_data_if.rs3_data;
assign fpu_req_if.frm = decode_if.frm;
assign fpu_req_if.rd = decode_if.rd;
assign fpu_req_if.wb = decode_if.wb;
// GPU unit // GPU unit
assign gpu_req_if.valid = decode_if.valid & is_gpu; assign gpu_req_if.valid = decode_if.valid & is_gpu;
assign gpu_req_if.warp_num = decode_if.warp_num; assign gpu_req_if.warp_num = decode_if.warp_num;

View File

@@ -28,8 +28,9 @@ module VX_lsu_unit #(
wire [`BYTEEN_BITS-1:0] mem_byteen; wire [`BYTEEN_BITS-1:0] mem_byteen;
wire [`NR_BITS-1:0] use_rd; wire [`NR_BITS-1:0] use_rd;
wire [`NW_BITS-1:0] use_warp_num; wire [`NW_BITS-1:0] use_warp_num;
wire [`WB_BITS-1:0] use_wb; wire use_wb;
wire [31:0] use_pc; wire [31:0] use_pc;
wire mrq_full;
genvar i; genvar i;
@@ -68,7 +69,7 @@ module VX_lsu_unit #(
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + `NW_BITS + `WB_BITS + 32) .N(`NUM_THREADS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + `NW_BITS + 1 + 32)
) mem_req_reg ( ) mem_req_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -83,7 +84,6 @@ module VX_lsu_unit #(
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr; wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr;
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset; wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
wire [`BYTEEN_BITS-1:0] core_rsp_mem_read; wire [`BYTEEN_BITS-1:0] core_rsp_mem_read;
wire mrq_full;
wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready
&& (0 == use_req_rw); // only push read requests && (0 == use_req_rw); // only push read requests
@@ -97,7 +97,7 @@ module VX_lsu_unit #(
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd); wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd);
VX_index_queue #( VX_index_queue #(
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + `WB_BITS + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS), .DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 1 + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
.SIZE (`DCREQ_SIZE) .SIZE (`DCREQ_SIZE)
) mem_req_queue ( ) mem_req_queue (
.clk (clk), .clk (clk),

View File

@@ -12,7 +12,7 @@ module VX_mul_unit #(
// Outputs // Outputs
VX_commit_if mul_commit_if VX_commit_if mul_commit_if
); );
wire [`NUM_THREADS-1:0][31:0] alu_result; reg [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][63:0] mul_result; wire [`NUM_THREADS-1:0][63:0] mul_result;
wire [`NUM_THREADS-1:0][31:0] div_result; wire [`NUM_THREADS-1:0][31:0] div_result;
wire [`NUM_THREADS-1:0][31:0] rem_result; wire [`NUM_THREADS-1:0][31:0] rem_result;
@@ -36,7 +36,7 @@ module VX_mul_unit #(
.WIDTHB(33), .WIDTHB(33),
.WIDTHP(64), .WIDTHP(64),
.SIGNED(1), .SIGNED(1),
.PIPELINE(`MUL_LATENCY) .PIPELINE(`LATENCY_IMUL)
) multiplier ( ) multiplier (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
@@ -52,7 +52,7 @@ module VX_mul_unit #(
.WIDTHR(32), .WIDTHR(32),
.NSIGNED(1), .NSIGNED(1),
.DSIGNED(1), .DSIGNED(1),
.PIPELINE(`DIV_LATENCY) .PIPELINE(`LATENCY_IDIV)
) sdiv ( ) sdiv (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
@@ -77,9 +77,11 @@ module VX_mul_unit #(
end end
end end
wire stall;
reg result_avail; reg result_avail;
reg [4:0] pending_ctr; reg [4:0] pending_ctr;
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `DIV_LATENCY : `MUL_LATENCY; wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `LATENCY_IDIV : `LATENCY_IMUL;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
@@ -104,13 +106,13 @@ module VX_mul_unit #(
wire pipeline_stall = ~result_avail && (| mul_req_if.valid); wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
wire stall = (~mul_commit_if.ready && (| mul_commit_if.valid)) assign stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|| pipeline_stall; || pipeline_stall;
wire flush = mul_commit_if.ready && pipeline_stall; wire flush = mul_commit_if.ready && pipeline_stall;
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)), .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
) mul_reg ( ) mul_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -110,6 +110,7 @@ module VX_pipeline #(
VX_lsu_req_if lsu_req_if(); VX_lsu_req_if lsu_req_if();
VX_csr_req_if csr_req_if(); VX_csr_req_if csr_req_if();
VX_mul_req_if mul_req_if(); VX_mul_req_if mul_req_if();
VX_fpu_req_if fpu_req_if();
VX_gpu_req_if gpu_req_if(); VX_gpu_req_if gpu_req_if();
VX_wb_if writeback_if(); VX_wb_if writeback_if();
VX_wstall_if wstall_if(); VX_wstall_if wstall_if();
@@ -118,6 +119,7 @@ module VX_pipeline #(
VX_commit_if lsu_commit_if(); VX_commit_if lsu_commit_if();
VX_commit_if csr_commit_if(); VX_commit_if csr_commit_if();
VX_commit_if mul_commit_if(); VX_commit_if mul_commit_if();
VX_commit_if fpu_commit_if();
VX_commit_if gpu_commit_if(); VX_commit_if gpu_commit_if();
VX_fetch #( VX_fetch #(
@@ -159,6 +161,7 @@ module VX_pipeline #(
.lsu_req_if (lsu_req_if), .lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if), .csr_req_if (csr_req_if),
.mul_req_if (mul_req_if), .mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if) .gpu_req_if (gpu_req_if)
); );
@@ -181,6 +184,7 @@ module VX_pipeline #(
.lsu_req_if (lsu_req_if), .lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if), .csr_req_if (csr_req_if),
.mul_req_if (mul_req_if), .mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if), .gpu_req_if (gpu_req_if),
.warp_ctl_if (warp_ctl_if), .warp_ctl_if (warp_ctl_if),
@@ -189,6 +193,7 @@ module VX_pipeline #(
.lsu_commit_if (lsu_commit_if), .lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if), .csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if), .mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if), .gpu_commit_if (gpu_commit_if),
.ebreak (ebreak) .ebreak (ebreak)
@@ -204,6 +209,7 @@ module VX_pipeline #(
.lsu_commit_if (lsu_commit_if), .lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if), .csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if), .mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if), .gpu_commit_if (gpu_commit_if),
.writeback_if (writeback_if), .writeback_if (writeback_if),

View File

@@ -8,37 +8,43 @@ module VX_scheduler #(
VX_decode_if decode_if, VX_decode_if decode_if,
VX_wb_if writeback_if, VX_wb_if writeback_if,
input wire gpr_busy,
input wire alu_busy, input wire alu_busy,
input wire lsu_busy, input wire lsu_busy,
input wire csr_busy, input wire csr_busy,
input wire mul_busy, input wire mul_busy,
input wire fpu_busy,
input wire gpu_busy, input wire gpu_busy,
output wire schedule_delay, output wire schedule_delay,
output wire is_empty output wire is_empty
); );
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1); localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; reg [`NUM_REGS-1:0][`NUM_THREADS-1:0] rename_table [`NUM_WARPS-1:0];
reg [`NUM_REGS-1:0] busy_table [`NUM_WARPS-1:0];
reg [CTVW-1:0] count_valid; reg [CTVW-1:0] count_valid;
wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0); wire rs1_rename = busy_table[decode_if.warp_num][decode_if.rs1];
wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0); wire rs2_rename = busy_table[decode_if.warp_num][decode_if.rs2];
wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0); wire rs3_rename = busy_table[decode_if.warp_num][decode_if.rs3];
wire rd_rename = busy_table[decode_if.warp_num][decode_if.rd];
wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1); wire rs1_rename_qual = rs1_rename && decode_if.use_rs1;
wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2); wire rs2_rename_qual = rs2_rename && decode_if.use_rs2;
wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0); wire rs3_rename_qual = rs3_rename && decode_if.use_rs3;
wire rd_rename_qual = rd_rename && decode_if.wb;
wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual); wire rename_valid = (rs1_rename_qual || rs2_rename_qual || rs3_rename_qual || rd_rename_qual);
wire ex_stalled = (| decode_if.valid) wire ex_stalled = ((gpr_busy)
&& ((alu_busy && (decode_if.ex_type == `EX_ALU)) || (alu_busy && (decode_if.ex_type == `EX_ALU))
|| (lsu_busy && (decode_if.ex_type == `EX_LSU)) || (lsu_busy && (decode_if.ex_type == `EX_LSU))
|| (csr_busy && (decode_if.ex_type == `EX_CSR)) || (csr_busy && (decode_if.ex_type == `EX_CSR))
|| (mul_busy && (decode_if.ex_type == `EX_MUL)) || (mul_busy && (decode_if.ex_type == `EX_MUL))
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|| (gpu_busy && (decode_if.ex_type == `EX_GPU))); || (gpu_busy && (decode_if.ex_type == `EX_GPU)));
wire stall = ex_stalled || rename_valid; wire stall = (ex_stalled || rename_valid) && (| decode_if.valid);
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && ~stall; wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && ~stall;
@@ -49,23 +55,25 @@ module VX_scheduler #(
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) : reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) : (~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
count_valid; count_valid;
integer i, w; always @(posedge clk) begin
always @(posedge clk) begin
if (reset) begin if (reset) begin
integer i, w;
for (w = 0; w < `NUM_WARPS; w++) begin for (w = 0; w < `NUM_WARPS; w++) begin
for (i = 0; i < 32; i++) begin for (i = 0; i < 32; i++) begin
rename_table[w][i] <= 0; rename_table[w][i] <= 0;
busy_table[w][i] <= 0;
end end
end end
count_valid <= 0; count_valid <= 0;
end else begin end else begin
if (acquire_rd) begin if (acquire_rd) begin
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid; rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
busy_table[decode_if.warp_num][decode_if.rd] <= 1;
end end
if (release_rd) begin if (release_rd) begin
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0); assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask; rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
busy_table[writeback_if.warp_num][writeback_if.rd] <= (| valid_wb_new_mask);
end end
count_valid <= count_valid_next; count_valid <= count_valid_next;
end end
@@ -80,7 +88,7 @@ module VX_scheduler #(
`ifdef DBG_PRINT_PIPELINE `ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin always @(posedge clk) begin
if (stall) begin if (stall) begin
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, rename=%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, rd_rename_qual, rs1_rename_qual, rs2_rename_qual, alu_busy, lsu_busy, csr_busy, mul_busy, gpu_busy); $display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, rename=%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, rd_rename_qual, rs1_rename_qual, rs2_rename_qual, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
end end
end end
`endif `endif

View File

@@ -18,7 +18,6 @@ module VX_warp_sched #(
); );
wire update_use_wspawn; wire update_use_wspawn;
wire update_visible_active; wire update_visible_active;
wire scheduled_warp;
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0]; wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];

View File

@@ -10,16 +10,18 @@ module VX_writeback #(
VX_commit_if alu_commit_if, VX_commit_if alu_commit_if,
VX_commit_if lsu_commit_if, VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if, VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if csr_commit_if, VX_commit_if csr_commit_if,
// outputs // outputs
VX_wb_if writeback_if VX_wb_if writeback_if
); );
wire lsu_valid = (| lsu_commit_if.valid) && (lsu_commit_if.wb != `WB_NO); wire alu_valid = (| alu_commit_if.valid) && alu_commit_if.wb;
wire mul_valid = (| mul_commit_if.valid) && (mul_commit_if.wb != `WB_NO); wire lsu_valid = (| lsu_commit_if.valid) && lsu_commit_if.wb;
wire alu_valid = (| alu_commit_if.valid) && (alu_commit_if.wb != `WB_NO); wire csr_valid = (| csr_commit_if.valid) && csr_commit_if.wb;
wire csr_valid = (| csr_commit_if.valid) && (csr_commit_if.wb != `WB_NO); wire mul_valid = (| mul_commit_if.valid) && mul_commit_if.wb;
wire fpu_valid = (| fpu_commit_if.valid) && fpu_commit_if.wb;
VX_wb_if writeback_tmp_if(); VX_wb_if writeback_tmp_if();
@@ -47,23 +49,26 @@ module VX_writeback #(
csr_valid ? csr_commit_if.rd : csr_valid ? csr_commit_if.rd :
0; 0;
assign writeback_tmp_if.is_fp = fpu_valid && fpu_commit_if.ready;
wire stall = ~writeback_if.ready && (| writeback_if.valid); wire stall = ~writeback_if.ready && (| writeback_if.valid);
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32)) .N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32) + 1)
) wb_reg ( ) wb_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (0), .flush (0),
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data}), .in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data, writeback_tmp_if.is_fp}),
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data}) .out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data, writeback_if.is_fp})
); );
assign lsu_commit_if.ready = !stall; assign lsu_commit_if.ready = !stall;
assign mul_commit_if.ready = !stall && !lsu_valid; assign fpu_commit_if.ready = !stall && !lsu_valid;
assign alu_commit_if.ready = !stall && !lsu_valid && !mul_valid; assign mul_commit_if.ready = !stall && !lsu_valid && !fpu_valid;
assign csr_commit_if.ready = !stall && !lsu_valid && !mul_valid && !alu_valid; assign alu_commit_if.ready = !stall && !lsu_valid && !fpu_valid && !mul_valid;
assign csr_commit_if.ready = !stall && !lsu_valid && !fpu_valid && !mul_valid && !alu_valid;
// special workaround to control RISC-V benchmarks termination on Verilator // special workaround to control RISC-V benchmarks termination on Verilator
reg [31:0] last_data_wb /* verilator public */; reg [31:0] last_data_wb /* verilator public */;

View File

@@ -139,54 +139,54 @@ module Vortex (
end else begin end else begin
wire per_cluster_dram_req_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
wire per_cluster_dram_req_rw [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw;
wire [`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen;
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire l3_core_req_ready; wire l3_core_req_ready;
wire per_cluster_dram_rsp_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire per_cluster_dram_rsp_ready [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire per_cluster_snp_req_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
wire per_cluster_snp_req_invalidate [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate;
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
wire per_cluster_snp_req_ready [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
wire per_cluster_snp_rsp_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid;
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
wire per_cluster_snp_rsp_ready [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
wire per_cluster_io_req_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_valid;
wire per_cluster_io_req_rw [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
wire [3:0] per_cluster_io_req_byteen [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen;
wire [29:0] per_cluster_io_req_addr [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr;
wire [31:0] per_cluster_io_req_data [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data;
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
wire per_cluster_io_req_ready [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready;
wire per_cluster_io_rsp_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid;
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag;
wire [31:0] per_cluster_io_rsp_data [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data;
wire per_cluster_io_rsp_ready [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
wire per_cluster_csr_io_req_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid;
wire [11:0] per_cluster_csr_io_req_addr [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr;
wire per_cluster_csr_io_req_rw [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw;
wire [31:0] per_cluster_csr_io_req_data [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data;
wire per_cluster_csr_io_req_ready [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready;
wire per_cluster_csr_io_rsp_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid;
wire [31:0] per_cluster_csr_io_rsp_data [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data;
wire per_cluster_csr_io_rsp_ready [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready;
wire per_cluster_busy [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
wire per_cluster_ebreak [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak;
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS)); wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid); wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
@@ -336,27 +336,27 @@ module Vortex (
// L3 Cache /////////////////////////////////////////////////////////// // L3 Cache ///////////////////////////////////////////////////////////
wire l3_core_req_valid [`L3NUM_REQUESTS-1:0]; wire [`L3NUM_REQUESTS-1:0] l3_core_req_valid;
wire l3_core_req_rw [`L3NUM_REQUESTS-1:0]; wire [`L3NUM_REQUESTS-1:0] l3_core_req_rw;
wire [`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen [`L3NUM_REQUESTS-1:0]; wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen;
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr [`L3NUM_REQUESTS-1:0]; wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr;
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data [`L3NUM_REQUESTS-1:0]; wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag [`L3NUM_REQUESTS-1:0]; wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
wire l3_core_rsp_valid [`L3NUM_REQUESTS-1:0]; wire [`L3NUM_REQUESTS-1:0] l3_core_rsp_valid;
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data [`L3NUM_REQUESTS-1:0]; wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data;
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag [`L3NUM_REQUESTS-1:0]; wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
wire l3_core_rsp_ready; wire l3_core_rsp_ready;
wire l3_snp_fwdout_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid;
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr;
wire l3_snp_fwdout_invalidate [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate;
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag;
wire l3_snp_fwdout_ready [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready;
wire l3_snp_fwdin_valid [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid;
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
wire l3_snp_fwdin_ready [`NUM_CLUSTERS-1:0]; wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
for (i = 0; i < `L3NUM_REQUESTS; i++) begin for (i = 0; i < `L3NUM_REQUESTS; i++) begin
// Core Request // Core Request

Submodule hw/rtl/fp_cores/fpu_div_sqrt_mvp added at d9a27f3c4e

View File

@@ -11,7 +11,7 @@ interface VX_alu_req_if ();
wire [`ALU_BITS-1:0] alu_op; wire [`ALU_BITS-1:0] alu_op;
wire [`WB_BITS-1:0] wb; wire wb;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs1_data;

View File

@@ -10,8 +10,7 @@ interface VX_commit_if ();
wire [31:0] curr_PC; wire [31:0] curr_PC;
wire [`NUM_THREADS-1:0][31:0] data; wire [`NUM_THREADS-1:0][31:0] data;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire [`WB_BITS-1:0] wb; wire wb;
wire is_io;
wire ready; wire ready;
endinterface endinterface

View File

@@ -15,7 +15,7 @@ interface VX_csr_req_if ();
wire [31:0] csr_mask; wire [31:0] csr_mask;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire [`WB_BITS-1:0] wb; wire wb;
wire is_io; wire is_io;
wire ready; wire ready;

View File

@@ -24,7 +24,14 @@ interface VX_decode_if ();
wire use_rs1; wire use_rs1;
wire use_rs2; wire use_rs2;
wire [`WB_BITS-1:0] wb; // FP states
wire [`NR_BITS-1:0] rs3;
wire use_rs3;
wire rs1_is_fp;
wire rs2_is_fp;
wire [`FRM_BITS-1:0] frm;
wire wb;
wire ready; wire ready;

View File

@@ -0,0 +1,16 @@
`ifndef VX_FPU_FROM_CSR_IF
`define VX_FPU_FROM_CSR_IF
`include "VX_define.vh"
interface VX_fpu_from_csr_if ();
`IGNORE_WARNINGS_BEGIN
wire [`NUM_WARPS-1:0][`FRM_BITS-1:0] frm;
`IGNORE_WARNINGS_END
endinterface
`endif

View File

@@ -0,0 +1,26 @@
`ifndef VX_FPU_REQ_IF
`define VX_FPU_REQ_IF
`include "VX_define.vh"
interface VX_fpu_req_if ();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
wire [31:0] curr_PC;
wire [`FPU_BITS-1:0] fpu_op;
wire [`FRM_BITS-1:0] frm;
wire wb;
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire ready;
endinterface
`endif

View File

@@ -0,0 +1,23 @@
`ifndef VX_FPU_TO_CSR_IF
`define VX_FPU_TO_CSR_IF
`include "VX_define.vh"
interface VX_fpu_to_csr_if ();
`IGNORE_WARNINGS_BEGIN
wire valid;
wire [`NW_BITS-1:0] warp_num;
wire fflags_NV;
wire fflags_DZ;
wire fflags_OF;
wire fflags_UF;
wire fflags_NX;
`IGNORE_WARNINGS_END
endinterface
`endif

View File

@@ -7,6 +7,7 @@ interface VX_gpr_data_if ();
wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data; wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
endinterface endinterface

View File

@@ -12,7 +12,7 @@ interface VX_lsu_req_if ();
wire rw; wire rw;
wire [`BYTEEN_BITS-1:0] byteen; wire [`BYTEEN_BITS-1:0] byteen;
wire [`WB_BITS-1:0] wb; wire wb;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] store_data; wire [`NUM_THREADS-1:0][31:0] store_data;

View File

@@ -11,7 +11,7 @@ interface VX_mul_req_if ();
wire [`MUL_BITS-1:0] mul_op; wire [`MUL_BITS-1:0] mul_op;
wire [`WB_BITS-1:0] wb; wire wb;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs1_data;

View File

@@ -9,6 +9,7 @@ interface VX_wb_if ();
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] data; wire [`NUM_THREADS-1:0][31:0] data;
wire is_fp;
wire ready; wire ready;
endinterface endinterface

View File

@@ -2,7 +2,7 @@
module VX_tex_mgr ( module VX_tex_mgr (
input wire clk, input wire clk,
input wire reset, input wire reset
); );
//-- //--

View File

@@ -11,7 +11,7 @@ module VX_tex_unit #(
parameter MAXAMW = 2, parameter MAXAMW = 2,
parameter TAGW = 16, parameter TAGW = 16,
parameter NUMCRQS = 32, parameter NUMCRQS = 32
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,

View File

@@ -11,7 +11,7 @@ double sc_time_stamp() {
Simulator::Simulator() { Simulator::Simulator() {
// force random values for unitialized signals // force random values for unitialized signals
Verilated::randReset(2); Verilated::randReset(1);
// Turn off assertion before reset // Turn off assertion before reset
Verilated::assertOn(false); Verilated::assertOn(false);
@@ -105,9 +105,8 @@ void Simulator::eval_dram_bus() {
if (!dram_rsp_active_) { if (!dram_rsp_active_) {
if (dequeue_index != -1) { if (dequeue_index != -1) {
vortex_->dram_rsp_valid = 1; vortex_->dram_rsp_valid = 1;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE); memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].block.data(), GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag; vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
free(dram_rsp_vec_[dequeue_index].data);
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index); dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
dram_rsp_active_ = true; dram_rsp_active_ = true;
} else { } else {
@@ -141,9 +140,8 @@ void Simulator::eval_dram_bus() {
} else { } else {
dram_req_t dram_req; dram_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY; dram_req.cycles_left = DRAM_LATENCY;
dram_req.data = (uint8_t*)malloc(GLOBAL_BLOCK_SIZE);
dram_req.tag = vortex_->dram_req_tag; dram_req.tag = vortex_->dram_req_tag;
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.data); ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
dram_rsp_vec_.push_back(dram_req); dram_rsp_vec_.push_back(dram_req);
} }
} }

View File

@@ -21,7 +21,7 @@
typedef struct { typedef struct {
int cycles_left; int cycles_left;
uint8_t *data; std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
unsigned tag; unsigned tag;
} dram_req_t; } dram_req_t;

11
hw/simulate/verilator.vlt Normal file
View File

@@ -0,0 +1,11 @@
`verilator_config
lint_off -rule BLKANDNBLK -file "../rtl/fp_cores/fpnew/*"
lint_off -rule UNOPTFLAT -file "../rtl/fp_cores/fpnew/*"
lint_off -rule WIDTH -file "../rtl/fp_cores/fpnew/*"
lint_off -rule UNUSED -file "../rtl/fp_cores/fpnew/*"
lint_off -rule LITENDIAN -file "../rtl/fp_cores/fpnew/*"
lint_off -rule IMPORTSTAR -file "../rtl/fp_cores/fpnew/*"
lint_off -rule PINCONNECTEMPTY -file "../rtl/fp_cores/fpnew/*"
//lint_off -rule CASEINCOMPLETE -file "../rtl/fp_cores/fpnew/*"

View File

@@ -12,7 +12,7 @@ echo "inc_list=$inc_list"
{ {
# read design sources # read design sources
for dir in $dir_list; do for dir in $dir_list; do
for file in $(find $dir -name '*.v' -o -name '*.sv' -type f) for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
do do
echo "read_verilog -sv $inc_list $file" echo "read_verilog -sv $inc_list $file"
done done