merged fpu_port branch
This commit is contained in:
@@ -1,4 +1,7 @@
|
||||
.PHONY: build_config
|
||||
|
||||
build_config:
|
||||
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
|
||||
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
|
||||
|
||||
clean:
|
||||
rm ./rtl/VX_user_config.vh ./VX_config.h
|
||||
@@ -4,13 +4,18 @@ FPGA_BUILD_DIR=build_fpga
|
||||
|
||||
all: ase-1c
|
||||
|
||||
ase-1c: setup-ase-1c
|
||||
sources.txt:
|
||||
./gen_sources.sh > sources.txt
|
||||
|
||||
gen_sources: sources.txt
|
||||
|
||||
ase-1c: setup-ase-1c gen_sources
|
||||
make -C $(ASE_BUILD_DIR)_1c
|
||||
|
||||
ase-2c: setup-ase-2c
|
||||
ase-2c: setup-ase-2c gen_sources
|
||||
make -C $(ASE_BUILD_DIR)_2c
|
||||
|
||||
ase-4c: setup-ase-4c
|
||||
ase-4c: setup-ase-4c gen_sources
|
||||
make -C $(ASE_BUILD_DIR)_4c
|
||||
|
||||
setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile
|
||||
@@ -19,22 +24,22 @@ setup-ase-2c: $(ASE_BUILD_DIR)_2c/Makefile
|
||||
|
||||
setup-ase-4c: $(ASE_BUILD_DIR)_4c/Makefile
|
||||
|
||||
$(ASE_BUILD_DIR)_1c/Makefile:
|
||||
$(ASE_BUILD_DIR)_1c/Makefile: sources.txt
|
||||
afu_sim_setup -s sources_1c.txt $(ASE_BUILD_DIR)_1c
|
||||
|
||||
$(ASE_BUILD_DIR)_2c/Makefile:
|
||||
$(ASE_BUILD_DIR)_2c/Makefile: sources.txt
|
||||
afu_sim_setup -s sources_2c.txt $(ASE_BUILD_DIR)_2c
|
||||
|
||||
$(ASE_BUILD_DIR)_4c/Makefile:
|
||||
$(ASE_BUILD_DIR)_4c/Makefile: sources.txt
|
||||
afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c
|
||||
|
||||
fpga-1c: setup-fpga-1c
|
||||
fpga-1c: setup-fpga-1c gen_sources
|
||||
cd $(FPGA_BUILD_DIR)_1c && qsub-synth
|
||||
|
||||
fpga-2c: setup-fpga-2c
|
||||
fpga-2c: setup-fpga-2c gen_sources
|
||||
cd $(FPGA_BUILD_DIR)_2c && qsub-synth
|
||||
|
||||
fpga-4c: setup-fpga-4c
|
||||
fpga-4c: setup-fpga-4c gen_sources
|
||||
cd $(FPGA_BUILD_DIR)_4c && qsub-synth
|
||||
|
||||
setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf
|
||||
@@ -62,20 +67,20 @@ run-ase-4c:
|
||||
cd $(ASE_BUILD_DIR)_4c && make sim
|
||||
|
||||
clean-ase-1c:
|
||||
rm -rf $(ASE_BUILD_DIR)_1c
|
||||
rm -rf $(ASE_BUILD_DIR)_1c sources.txt
|
||||
|
||||
clean-ase-2c:
|
||||
rm -rf $(ASE_BUILD_DIR)_2c
|
||||
rm -rf $(ASE_BUILD_DIR)_2c sources.txt
|
||||
|
||||
clean-ase-4c:
|
||||
rm -rf $(ASE_BUILD_DIR)_4c
|
||||
rm -rf $(ASE_BUILD_DIR)_4c sources.txt
|
||||
|
||||
clean-fpga-1c:
|
||||
rm -rf $(FPGA_BUILD_DIR)_1c
|
||||
rm -rf $(FPGA_BUILD_DIR)_1c sources.txt
|
||||
|
||||
clean-fpga-2c:
|
||||
rm -rf $(FPGA_BUILD_DIR)_2c
|
||||
rm -rf $(FPGA_BUILD_DIR)_2c sources.txt
|
||||
|
||||
clean-fpga-4c:
|
||||
rm -rf $(FPGA_BUILD_DIR)_4c
|
||||
rm -rf $(FPGA_BUILD_DIR)_4c sources.txt
|
||||
|
||||
|
||||
@@ -60,8 +60,8 @@ qsub-sim
|
||||
make ase
|
||||
|
||||
# tests
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
||||
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
||||
|
||||
# modify "vsim_run.tcl" to dump VCD trace
|
||||
|
||||
17
hw/opae/gen_sources.sh
Executable file
17
hw/opae/gen_sources.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl ../rtl/fp_cores/fpnew/src/common_cells/include ../rtl/fp_cores ../rtl/fp_cores/altera ../rtl/fp_cores/fpnew/src/common_cells/src ../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl ../rtl/fp_cores/fpnew/src'
|
||||
|
||||
inc_list=""
|
||||
for dir in $dir_list; do
|
||||
inc_list="$inc_list -I$dir"
|
||||
done
|
||||
|
||||
# read design sources
|
||||
for dir in $dir_list; do
|
||||
echo "+incdir+$dir"
|
||||
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
|
||||
do
|
||||
echo $file
|
||||
done
|
||||
done
|
||||
@@ -1,112 +0,0 @@
|
||||
vortex_afu.json
|
||||
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
#+define+SCOPE
|
||||
|
||||
#+define+DBG_PRINT_CORE_ICACHE
|
||||
#+define+DBG_PRINT_CORE_DCACHE
|
||||
#+define+DBG_PRINT_CACHE_BANK
|
||||
#+define+DBG_PRINT_CACHE_SNP
|
||||
#+define+DBG_PRINT_CACHE_MSRQ
|
||||
#+define+DBG_PRINT_DRAM
|
||||
#+define+DBG_PRINT_PIPELINE
|
||||
#+define+DBG_PRINT_OPAE
|
||||
#+define+DBG_PRINT_SCOPE
|
||||
|
||||
+incdir+.
|
||||
+incdir+../rtl
|
||||
+incdir+../rtl/interfaces
|
||||
+incdir+../rtl/pipe_regs
|
||||
+incdir+../rtl/cache
|
||||
+incdir+../rtl/libs
|
||||
|
||||
../rtl/VX_user_config.vh
|
||||
../rtl/VX_config.vh
|
||||
../rtl/VX_define.vh
|
||||
|
||||
../rtl/cache/VX_cache_config.vh
|
||||
../rtl/cache/VX_cache.v
|
||||
../rtl/cache/VX_cache_core_rsp_merge.v
|
||||
../rtl/cache/VX_cache_core_req_bank_sel.v
|
||||
../rtl/cache/VX_cache_dram_req_arb.v
|
||||
../rtl/cache/VX_cache_dram_fill_arb.v
|
||||
../rtl/cache/VX_cache_miss_resrv.v
|
||||
../rtl/cache/VX_bank.v
|
||||
../rtl/cache/VX_bank_core_req_arb.v
|
||||
../rtl/cache/VX_snp_rsp_arb.v
|
||||
../rtl/cache/VX_tag_data_access.v
|
||||
../rtl/cache/VX_tag_data_structure.v
|
||||
../rtl/cache/VX_snp_forwarder.v
|
||||
../rtl/cache/VX_prefetcher.v
|
||||
|
||||
../rtl/interfaces/VX_branch_rsp_if.v
|
||||
../rtl/interfaces/VX_cache_core_req_if.v
|
||||
../rtl/interfaces/VX_cache_core_rsp_if.v
|
||||
../rtl/interfaces/VX_cache_dram_req_if.v
|
||||
../rtl/interfaces/VX_cache_dram_rsp_if.v
|
||||
../rtl/interfaces/VX_cache_snp_req_if.v
|
||||
../rtl/interfaces/VX_cache_snp_rsp_if.v
|
||||
../rtl/interfaces/VX_csr_req_if.v
|
||||
../rtl/interfaces/VX_csr_io_req_if.v
|
||||
../rtl/interfaces/VX_csr_io_rsp_if.v
|
||||
../rtl/interfaces/VX_exec_unit_req_if.v
|
||||
../rtl/interfaces/VX_backend_req_if.v
|
||||
../rtl/interfaces/VX_gpr_read_if.v
|
||||
../rtl/interfaces/VX_gpu_inst_req_if.v
|
||||
../rtl/interfaces/VX_inst_meta_if.v
|
||||
../rtl/interfaces/VX_jal_rsp_if.v
|
||||
../rtl/interfaces/VX_join_if.v
|
||||
../rtl/interfaces/VX_lsu_req_if.v
|
||||
../rtl/interfaces/VX_warp_ctl_if.v
|
||||
../rtl/interfaces/VX_wb_if.v
|
||||
../rtl/interfaces/VX_wstall_if.v
|
||||
|
||||
../rtl/libs/VX_generic_register.v
|
||||
../rtl/libs/VX_mult.v
|
||||
../rtl/libs/VX_divide.v
|
||||
../rtl/libs/VX_generic_stack.v
|
||||
../rtl/libs/VX_priority_encoder.v
|
||||
../rtl/libs/VX_generic_queue.v
|
||||
../rtl/libs/VX_indexable_queue.v
|
||||
../rtl/libs/VX_fair_arbiter.v
|
||||
../rtl/libs/VX_fixed_arbiter.v
|
||||
../rtl/libs/VX_rr_arbiter.v
|
||||
../rtl/libs/VX_countones.v
|
||||
../rtl/libs/VX_scope.v
|
||||
|
||||
../rtl/Vortex.v
|
||||
../rtl/VX_cluster.v
|
||||
../rtl/VX_core.v
|
||||
../rtl/VX_mem_unit.v
|
||||
../rtl/VX_pipeline.v
|
||||
../rtl/VX_front_end.v
|
||||
../rtl/VX_back_end.v
|
||||
../rtl/VX_fetch.v
|
||||
../rtl/VX_scheduler.v
|
||||
../rtl/VX_exec_unit.v
|
||||
../rtl/VX_warp.v
|
||||
../rtl/VX_icache_stage.v
|
||||
../rtl/VX_gpr_wrapper.v
|
||||
../rtl/VX_gpu_inst.v
|
||||
../rtl/VX_writeback.v
|
||||
../rtl/VX_csr_pipe.v
|
||||
../rtl/VX_csr_data.v
|
||||
../rtl/VX_csr_arb.v
|
||||
../rtl/VX_csr_io_arb.v
|
||||
../rtl/VX_warp_sched.v
|
||||
../rtl/VX_gpr_ram.v
|
||||
../rtl/VX_gpr_stage.v
|
||||
../rtl/VX_alu_unit.v
|
||||
../rtl/VX_lsu_unit.v
|
||||
../rtl/VX_decode.v
|
||||
../rtl/VX_inst_multiplex.v
|
||||
../rtl/VX_dcache_arb.v
|
||||
../rtl/VX_mem_arb.v
|
||||
../rtl/VX_f_d_reg.v
|
||||
../rtl/VX_i_d_reg.v
|
||||
../rtl/VX_d_e_reg.v
|
||||
|
||||
ccip_interface_reg.sv
|
||||
ccip_std_afu.sv
|
||||
vortex_afu.sv
|
||||
@@ -1,3 +1,21 @@
|
||||
+define+NUM_CORES=1
|
||||
|
||||
#+define+SCOPE
|
||||
|
||||
#+define+DBG_PRINT_CORE_ICACHE
|
||||
#+define+DBG_PRINT_CORE_DCACHE
|
||||
#+define+DBG_PRINT_CACHE_BANK
|
||||
#+define+DBG_PRINT_CACHE_SNP
|
||||
#+define+DBG_PRINT_CACHE_MSRQ
|
||||
#+define+DBG_PRINT_DRAM
|
||||
#+define+DBG_PRINT_PIPELINE
|
||||
#+define+DBG_PRINT_OPAE
|
||||
#+define+DBG_PRINT_SCOPE
|
||||
|
||||
vortex_afu.json
|
||||
QI:vortex_afu.qsf
|
||||
ccip_interface_reg.sv
|
||||
ccip_std_afu.sv
|
||||
vortex_afu.sv
|
||||
|
||||
C:sources.txt
|
||||
@@ -1,4 +1,10 @@
|
||||
+define+NUM_CORES=2
|
||||
+define+L2_ENABLE=0
|
||||
|
||||
vortex_afu.json
|
||||
QI:vortex_afu.qsf
|
||||
ccip_interface_reg.sv
|
||||
ccip_std_afu.sv
|
||||
vortex_afu.sv
|
||||
|
||||
C:sources.txt
|
||||
@@ -1,4 +1,10 @@
|
||||
+define+NUM_CORES=4
|
||||
+define+L2_ENABLE=0
|
||||
|
||||
vortex_afu.json
|
||||
QI:vortex_afu.qsf
|
||||
ccip_interface_reg.sv
|
||||
ccip_std_afu.sv
|
||||
vortex_afu.sv
|
||||
|
||||
C:sources.txt
|
||||
@@ -1017,8 +1017,8 @@ localparam SCOPE_SR_DEPTH = 2;
|
||||
|
||||
wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|
||||
|| (scope_icache_rsp_valid && scope_icache_rsp_ready)
|
||||
|| ((| scope_dcache_req_valid) && scope_dcache_req_ready)
|
||||
|| ((| scope_dcache_rsp_valid) && scope_dcache_rsp_ready)
|
||||
|| (scope_dcache_req_valid && scope_dcache_req_ready)
|
||||
|| (scope_dcache_rsp_valid && scope_dcache_rsp_ready)
|
||||
|| (scope_dram_req_valid && scope_dram_req_ready)
|
||||
|| (scope_dram_rsp_valid && scope_dram_rsp_ready)
|
||||
|| (scope_snp_req_valid && scope_snp_req_ready)
|
||||
|
||||
@@ -11,9 +11,9 @@ module VX_alu_unit #(
|
||||
|
||||
// Outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_commit_if alu_commit_if
|
||||
VX_exu_to_cmt_if alu_commit_if
|
||||
);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] shift_result;
|
||||
|
||||
@@ -46,14 +46,14 @@ module VX_alu_unit #(
|
||||
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`NT_BITS-1:0] br_result_index;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N(`NUM_THREADS)
|
||||
) choose_alu_result (
|
||||
.data_in (alu_req_if.valid),
|
||||
.data_in (alu_req_if.thread_mask),
|
||||
.data_out (br_result_index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
@@ -61,53 +61,46 @@ module VX_alu_unit #(
|
||||
wire [32:0] br_result = sub_result[br_result_index];
|
||||
wire br_sign = br_result[32];
|
||||
wire br_nzero = (| br_result[31:0]);
|
||||
|
||||
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.alu_op);
|
||||
|
||||
reg br_taken;
|
||||
always @(*) begin
|
||||
case (br_op)
|
||||
`BR_NE: br_taken = br_nzero;
|
||||
`BR_EQ: br_taken = ~br_nzero;
|
||||
`BR_LT,
|
||||
`BR_LTU: br_taken = br_sign;
|
||||
`BR_GE,
|
||||
`BR_GEU: br_taken = ~br_sign;
|
||||
default: br_taken = 1'b1;
|
||||
endcase
|
||||
end
|
||||
wire br_sign_s1;
|
||||
wire br_nzero_s1;
|
||||
|
||||
wire [`BR_BITS-1:0] br_op = `IS_BR_OP(alu_req_if.alu_op) ? `BR_OP(alu_req_if.alu_op) : `BR_NO;
|
||||
wire [`BR_BITS-1:0] br_op_s1;
|
||||
|
||||
wire [31:0] br_addr = (br_op == `BR_JALR) ? alu_req_if.rs1_data[br_result_index] : alu_req_if.curr_PC;
|
||||
wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset);
|
||||
|
||||
wire is_jal = (alu_op == `ALU_JAL || alu_op == `ALU_JALR);
|
||||
wire is_br_valid = `IS_BR_OP(alu_op) && (| alu_req_if.valid);
|
||||
|
||||
wire is_jal = (alu_op == `ALU_JAL || alu_op == `ALU_JALR);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
||||
|
||||
wire stall = ~alu_commit_if.ready && (| alu_commit_if.valid);
|
||||
wire stall = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + 1 + 32)
|
||||
) branch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({is_br_valid, alu_req_if.warp_num, br_taken, br_dest}),
|
||||
.out ({branch_ctl_if.valid, branch_ctl_if.warp_num, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
||||
.N(1 + `NW_BITS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BR_BITS + 32 + 1 + 1)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result}),
|
||||
.out ({alu_commit_if.valid, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data})
|
||||
);
|
||||
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.issue_tag, alu_jal_result, br_op, br_dest, br_sign, br_nzero}),
|
||||
.out ({alu_commit_if.valid, branch_ctl_if.warp_num, alu_commit_if.issue_tag, alu_commit_if.data, br_op_s1, branch_ctl_if.dest, br_sign_s1, br_nzero_s1})
|
||||
);
|
||||
|
||||
reg br_taken;
|
||||
always @(*) begin
|
||||
case (br_op_s1)
|
||||
`BR_NE: br_taken = br_nzero_s1;
|
||||
`BR_EQ: br_taken = ~br_nzero_s1;
|
||||
`BR_LT,
|
||||
`BR_LTU: br_taken = br_sign_s1;
|
||||
`BR_GE,
|
||||
`BR_GEU: br_taken = ~br_sign_s1;
|
||||
default: br_taken = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign branch_ctl_if.valid = alu_commit_if.valid && (br_op_s1 != `BR_NO);
|
||||
assign branch_ctl_if.taken = br_taken;
|
||||
|
||||
assign alu_req_if.ready = ~stall;
|
||||
|
||||
|
||||
@@ -3,57 +3,79 @@
|
||||
module VX_commit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if lsu_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
VX_exu_to_cmt_if alu_commit_if,
|
||||
VX_exu_to_cmt_if lsu_commit_if,
|
||||
VX_exu_to_cmt_if mul_commit_if,
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
VX_fpu_to_cmt_if fpu_commit_if,
|
||||
VX_exu_to_cmt_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_wb_if writeback_if,
|
||||
VX_perf_cntrs_if perf_cntrs_if
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_csr_if cmt_to_csr_if
|
||||
);
|
||||
// update CRSs
|
||||
|
||||
wire [`NUM_EXS-1:0] commited_mask;
|
||||
assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready),
|
||||
((| lsu_commit_if.valid) && lsu_commit_if.ready),
|
||||
((| mul_commit_if.valid) && mul_commit_if.ready),
|
||||
((| csr_commit_if.valid) && csr_commit_if.ready),
|
||||
((| gpu_commit_if.valid) && gpu_commit_if.ready)};
|
||||
assign commited_mask = {(alu_commit_if.valid && alu_commit_if.ready),
|
||||
(lsu_commit_if.valid && lsu_commit_if.ready),
|
||||
(csr_commit_if.valid && csr_commit_if.ready),
|
||||
(mul_commit_if.valid && mul_commit_if.ready),
|
||||
(fpu_commit_if.valid && fpu_commit_if.ready),
|
||||
(gpu_commit_if.valid && gpu_commit_if.ready)};
|
||||
|
||||
wire [`NE_BITS:0] num_commits;
|
||||
|
||||
VX_countones #(
|
||||
VX_countones #(
|
||||
.N(`NUM_EXS)
|
||||
) valids_counter (
|
||||
.valids(commited_mask),
|
||||
.count (num_commits)
|
||||
);
|
||||
|
||||
wire has_committed = (| commited_mask);
|
||||
|
||||
reg [63:0] total_cycles, total_instrs;
|
||||
assign cmt_to_csr_if.valid = (| commited_mask);
|
||||
assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num;
|
||||
assign cmt_to_csr_if.num_commits = num_commits;
|
||||
|
||||
assign cmt_to_csr_if.has_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.has_fflags;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
total_cycles <= 0;
|
||||
total_instrs <= 0;
|
||||
end else begin
|
||||
total_cycles <= total_cycles + 1;
|
||||
if (has_committed) begin
|
||||
total_instrs <= total_instrs + 64'(num_commits);
|
||||
integer i;
|
||||
|
||||
reg [`FFG_BITS-1:0] fflags;
|
||||
always @(*) begin
|
||||
fflags = 0;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
|
||||
fflags[0] |= fpu_commit_if.fflags[i][0];
|
||||
fflags[1] |= fpu_commit_if.fflags[i][1];
|
||||
fflags[2] |= fpu_commit_if.fflags[i][2];
|
||||
fflags[3] |= fpu_commit_if.fflags[i][3];
|
||||
fflags[4] |= fpu_commit_if.fflags[i][4];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign cmt_to_csr_if.fflags = fflags;
|
||||
|
||||
assign perf_cntrs_if.total_cycles = total_cycles;
|
||||
assign perf_cntrs_if.total_instrs = total_instrs;
|
||||
// Notify issue stage
|
||||
|
||||
assign gpu_commit_if.ready = 1'b1; // doesn't writeback
|
||||
assign cmt_to_issue_if.alu_valid = alu_commit_if.valid && alu_commit_if.ready;
|
||||
assign cmt_to_issue_if.lsu_valid = lsu_commit_if.valid && lsu_commit_if.ready;
|
||||
assign cmt_to_issue_if.csr_valid = csr_commit_if.valid && csr_commit_if.ready;
|
||||
assign cmt_to_issue_if.mul_valid = mul_commit_if.valid && mul_commit_if.ready;
|
||||
assign cmt_to_issue_if.fpu_valid = fpu_commit_if.valid && fpu_commit_if.ready;
|
||||
assign cmt_to_issue_if.gpu_valid = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
|
||||
assign cmt_to_issue_if.alu_tag = alu_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.lsu_tag = lsu_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.csr_tag = csr_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.mul_tag = mul_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.fpu_tag = fpu_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.gpu_tag = gpu_commit_if.issue_tag;
|
||||
|
||||
VX_writeback #(
|
||||
.CORE_ID(CORE_ID)
|
||||
@@ -65,26 +87,32 @@ module VX_commit #(
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| alu_commit_if.valid) && alu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=ALU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data);
|
||||
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.alu_data.warp_num, cmt_to_issue_if.alu_data.curr_PC, alu_commit_if.issue_tag, cmt_to_issue_if.alu_data.thread_mask, cmt_to_issue_if.alu_data.wb, cmt_to_issue_if.alu_data.rd, alu_commit_if.data);
|
||||
end
|
||||
if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
|
||||
if (lsu_commit_if.valid && lsu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.lsu_data.warp_num, cmt_to_issue_if.lsu_data.curr_PC, lsu_commit_if.issue_tag, cmt_to_issue_if.lsu_data.thread_mask, cmt_to_issue_if.lsu_data.wb, cmt_to_issue_if.lsu_data.rd, lsu_commit_if.data);
|
||||
end
|
||||
if ((| mul_commit_if.valid) && mul_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.csr_data.warp_num, cmt_to_issue_if.csr_data.curr_PC, csr_commit_if.issue_tag, cmt_to_issue_if.csr_data.thread_mask, cmt_to_issue_if.csr_data.wb, cmt_to_issue_if.csr_data.rd, csr_commit_if.data);
|
||||
end
|
||||
if (mul_commit_if.valid && mul_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.mul_data.warp_num, cmt_to_issue_if.mul_data.curr_PC, mul_commit_if.issue_tag, cmt_to_issue_if.mul_data.thread_mask, cmt_to_issue_if.mul_data.wb, cmt_to_issue_if.mul_data.rd, mul_commit_if.data);
|
||||
end
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.fpu_data.warp_num, cmt_to_issue_if.fpu_data.curr_PC, fpu_commit_if.issue_tag, cmt_to_issue_if.fpu_data.thread_mask, cmt_to_issue_if.fpu_data.wb, cmt_to_issue_if.fpu_data.rd, fpu_commit_if.data);
|
||||
end
|
||||
if ((| csr_commit_if.valid) && csr_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
|
||||
end
|
||||
if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);
|
||||
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.gpu_data.warp_num, cmt_to_issue_if.gpu_data.curr_PC, gpu_commit_if.issue_tag, cmt_to_issue_if.gpu_data.thread_mask, cmt_to_issue_if.gpu_data.wb, cmt_to_issue_if.gpu_data.rd, gpu_commit_if.data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -27,10 +27,6 @@
|
||||
`define GLOBAL_BLOCK_SIZE 16
|
||||
`endif
|
||||
|
||||
`ifndef NUM_CSRS
|
||||
`define NUM_CSRS 1024
|
||||
`endif
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 32'h80000000
|
||||
`endif
|
||||
@@ -39,10 +35,6 @@
|
||||
`define SHARED_MEM_BASE_ADDR 32'h6FFFF000
|
||||
`endif
|
||||
|
||||
`ifndef STACK_BASE_ADDR
|
||||
`define STACK_BASE_ADDR 20'h6FFFF
|
||||
`endif
|
||||
|
||||
`ifndef IO_BUS_BASE_ADDR
|
||||
`define IO_BUS_BASE_ADDR 32'hFFFFFF00
|
||||
`endif
|
||||
@@ -59,37 +51,68 @@
|
||||
`define L3_ENABLE (`NUM_CLUSTERS > 1)
|
||||
`endif
|
||||
|
||||
`ifndef EXT_M_ENABLE
|
||||
`define EXT_M_ENABLE 1
|
||||
`endif
|
||||
`define EXT_M_ENABLE
|
||||
|
||||
// Configuration Values =======================================================
|
||||
`define EXT_F_ENABLE
|
||||
|
||||
// Device identification
|
||||
`define VENDOR_ID 0
|
||||
`define ARCHITECTURE_ID 0
|
||||
`define IMPLEMENTATION_ID 0
|
||||
|
||||
// CSR Addresses ==============================================================
|
||||
// Size of MUL Request Queue Size
|
||||
`ifndef MULRQ_SIZE
|
||||
`define MULRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
`define CSR_VEND_ID 12'hF11
|
||||
`define CSR_ARCH_ID 12'hF12
|
||||
`define CSR_IMPL_ID 12'hF13
|
||||
`define CSR_GTID 12'hF14
|
||||
// Size of FPU Request Queue Size
|
||||
`ifndef FPURQ_SIZE
|
||||
`define FPURQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Size of issue queue
|
||||
`ifndef ISSUEQ_SIZE
|
||||
`define ISSUEQ_SIZE (8 + `NUM_WARPS)
|
||||
`endif
|
||||
|
||||
// CSR Addresses //////////////////////////////////////////////////////////////
|
||||
|
||||
`define CSR_FFLAGS 12'h001
|
||||
`define CSR_FRM 12'h002
|
||||
`define CSR_FCSR 12'h003
|
||||
|
||||
`define CSR_LTID 12'h020
|
||||
`define CSR_LWID 12'h021
|
||||
`define CSR_GTID 12'h022
|
||||
`define CSR_GWID 12'h023
|
||||
`define CSR_GCID 12'h024
|
||||
`define CSR_NT 12'h025
|
||||
`define CSR_NW 12'h026
|
||||
`define CSR_NC 12'h027
|
||||
|
||||
`define CSR_CYCLE_L 12'hC00
|
||||
`define CSR_CYCLE_H 12'hC80
|
||||
`define CSR_INSTR_L 12'hC02
|
||||
`define CSR_INSTR_H 12'hC82
|
||||
`define CSR_SATP 12'h180
|
||||
|
||||
`define CSR_PMPCFG0 12'h3A0
|
||||
`define CSR_PMPADDR0 12'h3B0
|
||||
|
||||
`define CSR_MSTATUS 12'h300
|
||||
`define CSR_MISA 12'h301
|
||||
`define CSR_MEDELEG 12'h302
|
||||
`define CSR_MIDELEG 12'h303
|
||||
`define CSR_MIE 12'h304
|
||||
`define CSR_MTVEC 12'h305
|
||||
|
||||
`define CSR_MEPC 12'h341
|
||||
|
||||
`define CSR_CYCLE 12'hC00
|
||||
`define CSR_CYCLE_H 12'hC80
|
||||
`define CSR_INSTRET 12'hC02
|
||||
`define CSR_INSTRET_H 12'hC82
|
||||
|
||||
`define CSR_MVENDORID 12'hF11
|
||||
`define CSR_MARCHID 12'hF12
|
||||
`define CSR_MIMPID 12'hF13
|
||||
`define CSR_MHARTID 12'hF14
|
||||
|
||||
// Dcache Configurable Knobs ==================================================
|
||||
|
||||
@@ -130,12 +153,12 @@
|
||||
|
||||
// Dram Fill Rsp Queue Size
|
||||
`ifndef DDFPQ_SIZE
|
||||
`define DDFPQ_SIZE 16
|
||||
`define DDFPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
`ifndef DSNRQ_SIZE
|
||||
`define DSNRQ_SIZE 16
|
||||
`define DSNRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Core Writeback Queue Size
|
||||
@@ -155,7 +178,7 @@
|
||||
|
||||
// Prefetcher
|
||||
`ifndef DPRFQ_SIZE
|
||||
`define DPRFQ_SIZE 16
|
||||
`define DPRFQ_SIZE 8
|
||||
`endif
|
||||
|
||||
`ifndef DPRFQ_STRIDE
|
||||
@@ -201,7 +224,7 @@
|
||||
|
||||
// Dram Fill Rsp Queue Size
|
||||
`ifndef IDFPQ_SIZE
|
||||
`define IDFPQ_SIZE 16
|
||||
`define IDFPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Core Writeback Queue Size
|
||||
@@ -211,7 +234,7 @@
|
||||
|
||||
// Dram Writeback Queue Size
|
||||
`ifndef IDWBQ_SIZE
|
||||
`define IDWBQ_SIZE 16
|
||||
`define IDWBQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dram Fill Req Queue Size
|
||||
@@ -221,7 +244,7 @@
|
||||
|
||||
// Prefetcher
|
||||
`ifndef IPRFQ_SIZE
|
||||
`define IPRFQ_SIZE 16
|
||||
`define IPRFQ_SIZE 8
|
||||
`endif
|
||||
|
||||
`ifndef IPRFQ_STRIDE
|
||||
@@ -294,7 +317,7 @@
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L2CREQ_SIZE
|
||||
`define L2CREQ_SIZE 16
|
||||
`define L2CREQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Miss Reserv Queue Knob
|
||||
@@ -304,12 +327,12 @@
|
||||
|
||||
// Dram Fill Rsp Queue Size
|
||||
`ifndef L2DFPQ_SIZE
|
||||
`define L2DFPQ_SIZE 16
|
||||
`define L2DFPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
`ifndef L2SNRQ_SIZE
|
||||
`define L2SNRQ_SIZE 16
|
||||
`define L2SNRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Core Writeback Queue Size
|
||||
@@ -319,7 +342,7 @@
|
||||
|
||||
// Dram Writeback Queue Size
|
||||
`ifndef L2DWBQ_SIZE
|
||||
`define L2DWBQ_SIZE 16
|
||||
`define L2DWBQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dram Fill Req Queue Size
|
||||
@@ -329,7 +352,7 @@
|
||||
|
||||
// Prefetcher
|
||||
`ifndef L2PRFQ_SIZE
|
||||
`define L2PRFQ_SIZE 16
|
||||
`define L2PRFQ_SIZE 8
|
||||
`endif
|
||||
|
||||
`ifndef L2PRFQ_STRIDE
|
||||
@@ -365,7 +388,7 @@
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L3CREQ_SIZE
|
||||
`define L3CREQ_SIZE 16
|
||||
`define L3CREQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Miss Reserv Queue Knob
|
||||
@@ -375,12 +398,12 @@
|
||||
|
||||
// Dram Fill Rsp Queue Size
|
||||
`ifndef L3DFPQ_SIZE
|
||||
`define L3DFPQ_SIZE 16
|
||||
`define L3DFPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
`ifndef L3SNRQ_SIZE
|
||||
`define L3SNRQ_SIZE 16
|
||||
`define L3SNRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Core Writeback Queue Size
|
||||
@@ -390,7 +413,7 @@
|
||||
|
||||
// Dram Writeback Queue Size
|
||||
`ifndef L3DWBQ_SIZE
|
||||
`define L3DWBQ_SIZE 16
|
||||
`define L3DWBQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dram Fill Req Queue Size
|
||||
@@ -400,12 +423,11 @@
|
||||
|
||||
// Prefetcher
|
||||
`ifndef L3PRFQ_SIZE
|
||||
`define L3PRFQ_SIZE 16
|
||||
`define L3PRFQ_SIZE 8
|
||||
`endif
|
||||
|
||||
`ifndef L3PRFQ_STRIDE
|
||||
`define L3PRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// VX_CONFIG
|
||||
`endif
|
||||
|
||||
@@ -166,15 +166,15 @@ module VX_core #(
|
||||
VX_cache_core_req_if #(
|
||||
.NUM_REQUESTS(`INUM_REQUESTS),
|
||||
.WORD_SIZE(`IWORD_SIZE),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
|
||||
.CORE_TAG_WIDTH(`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS)
|
||||
) core_icache_req_if();
|
||||
|
||||
VX_cache_core_rsp_if #(
|
||||
.NUM_REQUESTS(`INUM_REQUESTS),
|
||||
.WORD_SIZE(`IWORD_SIZE),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
|
||||
.CORE_TAG_WIDTH(`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS)
|
||||
) core_icache_rsp_if();
|
||||
|
||||
VX_pipeline #(
|
||||
|
||||
@@ -12,43 +12,42 @@ module VX_csr_arb (
|
||||
VX_csr_req_if csr_req_if,
|
||||
|
||||
// input
|
||||
VX_commit_if csr_rsp_if,
|
||||
VX_exu_to_cmt_if csr_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
VX_commit_if csr_commit_if
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
||||
input wire select_io_req,
|
||||
input wire select_io_rsp
|
||||
);
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire core_select = ~(| csr_io_req_if.valid);
|
||||
|
||||
// requests
|
||||
assign csr_req_if.valid = core_select ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
|
||||
assign csr_req_if.warp_num = core_select ? csr_core_req_if.warp_num : 0;
|
||||
assign csr_req_if.curr_PC = core_select ? csr_core_req_if.curr_PC : 0;
|
||||
assign csr_req_if.csr_op = core_select ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.csr_addr = core_select ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_req_if.csr_mask = core_select ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_req_if.rd = core_select ? csr_core_req_if.rd : 0;
|
||||
assign csr_req_if.wb = core_select ? csr_core_req_if.wb : 0;
|
||||
assign csr_req_if.is_io = ~core_select;
|
||||
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
|
||||
assign csr_req_if.issue_tag = (~select_io_req) ? csr_core_req_if.issue_tag : 0;
|
||||
assign csr_req_if.warp_num = (~select_io_req) ? csr_core_req_if.warp_num : 0;
|
||||
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
|
||||
assign csr_req_if.csr_op = (~select_io_req) ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
|
||||
assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
|
||||
assign csr_req_if.is_io = select_io_req;
|
||||
|
||||
assign csr_core_req_if.ready = csr_req_if.ready && core_select;
|
||||
assign csr_io_req_if.ready = csr_req_if.ready && ~core_select;
|
||||
assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req);
|
||||
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
|
||||
|
||||
// responses
|
||||
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & csr_rsp_if.is_io;
|
||||
assign csr_io_rsp_if.valid = csr_rsp_if.valid & select_io_rsp;
|
||||
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
|
||||
|
||||
assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}};
|
||||
assign csr_commit_if.warp_num = csr_rsp_if.warp_num;
|
||||
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
|
||||
assign csr_commit_if.data = csr_rsp_if.data;
|
||||
assign csr_commit_if.rd = csr_rsp_if.rd;
|
||||
assign csr_commit_if.wb = csr_rsp_if.wb;
|
||||
assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp;
|
||||
assign csr_commit_if.issue_tag= csr_rsp_if.issue_tag;
|
||||
assign csr_commit_if.data = csr_rsp_if.data;
|
||||
|
||||
assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -4,50 +4,136 @@ module VX_csr_data #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||
|
||||
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
|
||||
output reg[31:0] read_data,
|
||||
input wire write_enable,
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
|
||||
input wire[`CSR_ADDR_SIZE-1:0] write_addr,
|
||||
`IGNORE_WARNINGS_END
|
||||
input wire[`CSR_WIDTH-1:0] write_data,
|
||||
input wire[`NW_BITS-1:0] warp_num,
|
||||
VX_perf_cntrs_if perf_cntrs_if
|
||||
);
|
||||
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
|
||||
|
||||
// cast address to physical CSR range
|
||||
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
|
||||
assign rd_addr = $size(rd_addr)'(read_addr);
|
||||
assign wr_addr = $size(wr_addr)'(write_addr);
|
||||
input wire read_enable,
|
||||
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
||||
output reg[31:0] read_data,
|
||||
|
||||
input wire write_enable,
|
||||
input wire[`CSR_ADDR_BITS-1:0] write_addr,
|
||||
input wire[`CSR_WIDTH-1:0] write_data
|
||||
);
|
||||
|
||||
reg [`CSR_WIDTH-1:0] csr_satp;
|
||||
reg [`CSR_WIDTH-1:0] csr_mstatus;
|
||||
reg [`CSR_WIDTH-1:0] csr_medeleg;
|
||||
reg [`CSR_WIDTH-1:0] csr_mideleg;
|
||||
reg [`CSR_WIDTH-1:0] csr_mie;
|
||||
reg [`CSR_WIDTH-1:0] csr_mtvec;
|
||||
reg [`CSR_WIDTH-1:0] csr_mepc;
|
||||
reg [`CSR_WIDTH-1:0] csr_pmpcfg [0:0];
|
||||
reg [`CSR_WIDTH-1:0] csr_pmpaddr [0:0];
|
||||
reg [63:0] csr_cycle;
|
||||
reg [63:0] csr_instret;
|
||||
|
||||
reg [`FFG_BITS-1:0] csr_fflags [`NUM_WARPS-1:0];
|
||||
reg [`FRM_BITS-1:0] csr_frm [`NUM_WARPS-1:0];
|
||||
reg [`FRM_BITS+`FFG_BITS-1:0] csr_fcsr [`NUM_WARPS-1:0]; // fflags + frm
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (cmt_to_csr_if.has_fflags) begin
|
||||
csr_fflags[cmt_to_csr_if.warp_num] <= cmt_to_csr_if.fflags;
|
||||
csr_fcsr[cmt_to_csr_if.warp_num][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
|
||||
end
|
||||
|
||||
if (write_enable) begin
|
||||
csr_table[wr_addr] <= write_data;
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: begin
|
||||
csr_fcsr[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
|
||||
end
|
||||
`CSR_FRM: begin
|
||||
csr_fcsr[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||
csr_frm[warp_num] <= write_data[`FRM_BITS-1:0];
|
||||
end
|
||||
`CSR_FCSR: begin
|
||||
csr_fcsr[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||
csr_frm[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
|
||||
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
|
||||
end
|
||||
`CSR_SATP: csr_satp <= write_data;
|
||||
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data;
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data;
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data;
|
||||
`CSR_MIE: csr_mie <= write_data;
|
||||
`CSR_MTVEC: csr_mtvec <= write_data;
|
||||
|
||||
`CSR_MEPC: csr_mepc <= write_data;
|
||||
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
||||
|
||||
default: begin
|
||||
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
csr_cycle <= 0;
|
||||
csr_instret <= 0;
|
||||
end else begin
|
||||
csr_cycle <= csr_cycle + 1;
|
||||
if (cmt_to_csr_if.valid) begin
|
||||
csr_instret <= csr_instret + 64'(cmt_to_csr_if.num_commits);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
case (read_addr)
|
||||
`CSR_FFLAGS : read_data = 32'(csr_fflags[warp_num]);
|
||||
`CSR_FRM : read_data = 32'(csr_frm[warp_num]);
|
||||
`CSR_FCSR : read_data = 32'(csr_fcsr[warp_num]);
|
||||
|
||||
`CSR_LWID : read_data = 32'(warp_num);
|
||||
`CSR_LTID ,
|
||||
`CSR_GTID ,
|
||||
`CSR_MHARTID ,
|
||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
||||
`CSR_GCID : read_data = CORE_ID;
|
||||
`CSR_NT : read_data = `NUM_THREADS;
|
||||
`CSR_NW : read_data = `NUM_WARPS;
|
||||
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
|
||||
`CSR_CYCLE_L : read_data = perf_cntrs_if.total_cycles[31:0];
|
||||
`CSR_CYCLE_H : read_data = perf_cntrs_if.total_cycles[63:32];
|
||||
`CSR_INSTR_L : read_data = perf_cntrs_if.total_instrs[31:0];
|
||||
`CSR_INSTR_H : read_data = perf_cntrs_if.total_instrs[63:32];
|
||||
`CSR_VEND_ID : read_data = `VENDOR_ID;
|
||||
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
|
||||
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;
|
||||
|
||||
`CSR_SATP : read_data = 32'(csr_satp);
|
||||
|
||||
`CSR_MSTATUS : read_data = 32'(csr_mstatus);
|
||||
`CSR_MISA : read_data = `ISA_CODE;
|
||||
default : read_data = 32'(csr_table[rd_addr]);
|
||||
endcase
|
||||
end
|
||||
`CSR_MEDELEG : read_data = 32'(csr_medeleg);
|
||||
`CSR_MIDELEG : read_data = 32'(csr_mideleg);
|
||||
`CSR_MIE : read_data = 32'(csr_mie);
|
||||
`CSR_MTVEC : read_data = 32'(csr_mtvec);
|
||||
|
||||
endmodule
|
||||
`CSR_MEPC : read_data = 32'(csr_mepc);
|
||||
|
||||
`CSR_PMPCFG0 : read_data = 32'(csr_pmpcfg[0]);
|
||||
`CSR_PMPADDR0: read_data = 32'(csr_pmpaddr[0]);
|
||||
|
||||
`CSR_CYCLE : read_data = csr_cycle[31:0];
|
||||
`CSR_CYCLE_H : read_data = csr_cycle[63:32];
|
||||
`CSR_INSTRET : read_data = csr_instret[31:0];
|
||||
`CSR_INSTRET_H:read_data = csr_instret[63:32];
|
||||
|
||||
`CSR_MVENDORID:read_data = `VENDOR_ID;
|
||||
`CSR_MARCHID : read_data = `ARCHITECTURE_ID;
|
||||
`CSR_MIMPID : read_data = `IMPLEMENTATION_ID;
|
||||
|
||||
default: begin
|
||||
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.warp_num];
|
||||
|
||||
endmodule
|
||||
@@ -6,83 +6,108 @@ module VX_csr_unit #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_perf_cntrs_if perf_cntrs_if,
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_commit_if csr_commit_if
|
||||
VX_exu_to_cmt_if csr_commit_if
|
||||
);
|
||||
VX_csr_req_if csr_pipe_req_if();
|
||||
VX_commit_if csr_pipe_commit_if();
|
||||
VX_exu_to_cmt_if csr_pipe_commit_if();
|
||||
|
||||
wire select_io_req = csr_io_req_if.valid;
|
||||
wire select_io_rsp;
|
||||
|
||||
VX_csr_arb csr_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.csr_core_req_if (csr_req_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_req_if (csr_pipe_req_if),
|
||||
|
||||
.csr_rsp_if (csr_pipe_commit_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_commit_if (csr_commit_if)
|
||||
.csr_commit_if (csr_commit_if),
|
||||
|
||||
.select_io_req (select_io_req),
|
||||
.select_io_rsp (select_io_rsp)
|
||||
);
|
||||
|
||||
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
|
||||
wire [31:0] csr_read_data_s2;
|
||||
wire [31:0] csr_updated_data_s2;
|
||||
wire [31:0] csr_read_data_unqual;
|
||||
|
||||
wire is_csr_s2 = (| csr_pipe_commit_if.valid);
|
||||
wire csr_we_s1;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||
wire [31:0] csr_read_data, csr_read_data_s1;
|
||||
wire [31:0] csr_updated_data_s1;
|
||||
wire [`NW_BITS-1:0] warp_num_s1;
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.csr_to_fpu_if (csr_to_fpu_if),
|
||||
.read_enable (csr_pipe_req_if.valid),
|
||||
.read_addr (csr_pipe_req_if.csr_addr),
|
||||
.read_data (csr_read_data_unqual),
|
||||
.write_enable (is_csr_s2),
|
||||
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
||||
.write_addr (csr_addr_s2),
|
||||
.warp_num (csr_pipe_req_if.warp_num),
|
||||
.perf_cntrs_if (perf_cntrs_if)
|
||||
);
|
||||
.read_data (csr_read_data),
|
||||
.write_enable (csr_we_s1),
|
||||
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
|
||||
.write_addr (csr_addr_s1),
|
||||
.warp_num (csr_pipe_req_if.warp_num)
|
||||
);
|
||||
|
||||
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
|
||||
&& (csr_pipe_commit_if.warp_num == csr_pipe_req_if.warp_num)
|
||||
&& is_csr_s2;
|
||||
wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr)
|
||||
&& (warp_num_s1 == csr_pipe_req_if.warp_num)
|
||||
&& csr_pipe_commit_if.valid;
|
||||
|
||||
wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
wire [31:0] csr_read_data_qual = csr_hazard ? csr_updated_data_s1 : csr_read_data;
|
||||
|
||||
reg [31:0] csr_updated_data;
|
||||
|
||||
reg csr_we_s0_unqual;
|
||||
|
||||
always @(*) begin
|
||||
csr_we_s0_unqual = 0;
|
||||
case (csr_pipe_req_if.csr_op)
|
||||
`CSR_RW: csr_updated_data = csr_pipe_req_if.csr_mask;
|
||||
`CSR_RS: csr_updated_data = csr_read_data | csr_pipe_req_if.csr_mask;
|
||||
`CSR_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask);
|
||||
`CSR_RW: begin
|
||||
csr_updated_data = csr_pipe_req_if.csr_mask;
|
||||
csr_we_s0_unqual = 1;
|
||||
end
|
||||
`CSR_RS: begin
|
||||
csr_updated_data = csr_read_data_qual | csr_pipe_req_if.csr_mask;
|
||||
csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0);
|
||||
end
|
||||
`CSR_RC: begin
|
||||
csr_updated_data = csr_read_data_qual & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask);
|
||||
csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0);
|
||||
end
|
||||
default: csr_updated_data = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
wire stall = ~csr_pipe_commit_if.ready && (| csr_pipe_commit_if.valid);
|
||||
wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid;
|
||||
|
||||
wire stall = ~csr_pipe_commit_if.ready && csr_pipe_commit_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
|
||||
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, csr_pipe_commit_if.is_io, csr_read_data_s2, csr_updated_data_s2})
|
||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s1, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||
);
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign csr_pipe_commit_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
|
||||
(csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
|
||||
csr_read_data_s2;
|
||||
assign csr_pipe_commit_if.data[i] = (csr_addr_s1 == `CSR_LTID) ? i :
|
||||
(csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
|
||||
csr_read_data_s1;
|
||||
end
|
||||
|
||||
assign csr_pipe_req_if.ready = ~stall;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_print_instr.vh"
|
||||
|
||||
module VX_decode #(
|
||||
parameter CORE_ID = 0
|
||||
@@ -15,14 +15,15 @@ module VX_decode #(
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if
|
||||
);
|
||||
wire in_valid = (| ifetch_rsp_if.valid);
|
||||
wire in_valid = ifetch_rsp_if.valid;
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
|
||||
reg [`ALU_BITS-1:0] alu_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
wire [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
reg [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`CSR_BITS-1:0] csr_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
reg [`FPU_BITS-1:0] fpu_op;
|
||||
reg [`GPU_BITS-1:0] gpu_op;
|
||||
|
||||
reg [19:0] upper_imm;
|
||||
@@ -34,9 +35,10 @@ module VX_decode #(
|
||||
wire [6:0] func7 = instr[31:25];
|
||||
wire [11:0] u_12 = instr[31:20];
|
||||
|
||||
wire [`NR_BITS-1:0] rd = instr[11:7];
|
||||
wire [`NR_BITS-1:0] rs1 = instr[19:15];
|
||||
wire [`NR_BITS-1:0] rs2 = instr[24:20];
|
||||
wire [4:0] rd = instr[11:7];
|
||||
wire [4:0] rs1 = instr[19:15];
|
||||
wire [4:0] rs2 = instr[24:20];
|
||||
wire [4:0] rs3 = instr[31:27];
|
||||
|
||||
// opcode types
|
||||
wire is_rtype = (opcode == `INST_R);
|
||||
@@ -51,10 +53,9 @@ module VX_decode #(
|
||||
wire is_jals = (opcode == `INST_SYS) && (func3 == 0);
|
||||
wire is_csr = (opcode == `INST_SYS) && (func3 != 0);
|
||||
wire is_gpu = (opcode == `INST_GPU);
|
||||
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||
|
||||
|
||||
// upper immediate
|
||||
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_LUI: upper_imm = {func7, rs2, rs1, func3};
|
||||
@@ -63,7 +64,25 @@ module VX_decode #(
|
||||
endcase
|
||||
end
|
||||
|
||||
// I-type immediate
|
||||
|
||||
wire alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
wire [11:0] alu_shift_imm = {{7{1'b0}}, rs2};
|
||||
wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12;
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_I: src2_imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
`INST_S,
|
||||
`INST_FS: src2_imm = {{20{func7[6]}}, func7, rd};
|
||||
`INST_L,
|
||||
`INST_FL: src2_imm = {{20{u_12[11]}}, u_12};
|
||||
`INST_B: src2_imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
default: src2_imm = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
// JAL
|
||||
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
@@ -74,23 +93,12 @@ module VX_decode #(
|
||||
`INST_JALR: jalx_offset = jalr_offset;
|
||||
default: jalx_offset = 32'd4;
|
||||
endcase
|
||||
end
|
||||
|
||||
// I-type immediate
|
||||
wire alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
wire [11:0] alu_shift_imm = {{7{1'b0}}, rs2};
|
||||
wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12;
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_I: src2_imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
`INST_S: src2_imm = {{20{func7[6]}}, func7, rd};
|
||||
`INST_L: src2_imm = {{20{u_12[11]}}, u_12};
|
||||
`INST_B: src2_imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
default: src2_imm = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// BRANCH
|
||||
|
||||
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
||||
|
||||
always @(*) begin
|
||||
br_op = `BR_EQ;
|
||||
case (opcode)
|
||||
@@ -119,6 +127,7 @@ module VX_decode #(
|
||||
end
|
||||
|
||||
// ALU
|
||||
|
||||
always @(*) begin
|
||||
alu_op = `ALU_OTHER;
|
||||
if (is_lui) begin
|
||||
@@ -140,7 +149,23 @@ module VX_decode #(
|
||||
end
|
||||
end
|
||||
|
||||
// MUL
|
||||
// CSR
|
||||
|
||||
wire is_csr_imm = is_csr && (func3[2] == 1);
|
||||
|
||||
always @(*) begin
|
||||
csr_op = `CSR_OTHER;
|
||||
case (func3[1:0])
|
||||
2'h1: csr_op = `CSR_RW;
|
||||
2'h2: csr_op = `CSR_RS;
|
||||
2'h3: csr_op = `CSR_RC;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
// MUL
|
||||
`ifdef EXT_M_ENABLE
|
||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||
always @(*) begin
|
||||
mul_op = `MUL_MUL;
|
||||
case (func3)
|
||||
@@ -155,24 +180,86 @@ module VX_decode #(
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`else
|
||||
wire is_mul = 0;
|
||||
always @(*) begin
|
||||
mul_op = `MUL_MUL;
|
||||
end
|
||||
`endif
|
||||
|
||||
// FPU
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire is_fl = (opcode == `INST_FL) && ((func3 == 2));
|
||||
wire is_fs = (opcode == `INST_FS) && ((func3 == 2));
|
||||
wire is_fci = (opcode == `INST_FCI);
|
||||
wire is_fmadd = (opcode == `INST_FMADD);
|
||||
wire is_fmsub = (opcode == `INST_FMSUB);
|
||||
wire is_fnmsub = (opcode == `INST_FNMSUB);
|
||||
wire is_fnmadd = (opcode == `INST_FNMADD);
|
||||
|
||||
wire is_fcmp = is_fci && (func7 == 7'h50); // compare
|
||||
wire is_fcvti = is_fci && (func7 == 7'h60); // convert to int
|
||||
wire is_fcvtf = is_fci && (func7 == 7'h68); // convert to float
|
||||
wire is_fmvcls = is_fci && (func7 == 7'h70 || func7 == 7'h78); // move + class
|
||||
wire is_fr4 = is_fmadd || is_fmsub || is_fnmsub || is_fnmadd;
|
||||
wire is_fpu = (is_fl || is_fs || is_fci || is_fr4);
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = `FPU_OTHER;
|
||||
if (is_fr4) begin
|
||||
case ({is_fmadd, is_fmsub, is_fnmsub, is_fnmadd})
|
||||
4'b1000: fpu_op = `FPU_MADD;
|
||||
4'b0100: fpu_op = `FPU_MSUB;
|
||||
4'b0010: fpu_op = `FPU_NMSUB;
|
||||
4'b0001: fpu_op = `FPU_NMADD;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
else begin
|
||||
case (func7)
|
||||
7'h00: fpu_op = `FPU_ADD;
|
||||
7'h04: fpu_op = `FPU_SUB;
|
||||
7'h08: fpu_op = `FPU_MUL;
|
||||
7'h0C: fpu_op = `FPU_DIV;
|
||||
7'h10: fpu_op = (func3[1]) ? `FPU_SGNJX : ((func3[0]) ? `FPU_SGNJN : `FPU_SGNJ);
|
||||
7'h14: fpu_op = (func3 == 3'h0) ? `FPU_MIN : `FPU_MAX;
|
||||
7'h2C: fpu_op = `FPU_SQRT;
|
||||
7'h50: fpu_op = `FPU_CMP; // wb to intReg
|
||||
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
|
||||
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
|
||||
7'h70: fpu_op = (func3 == 3'h0) ? `FPU_MVXW : `FPU_CLASS; // both wb to intReg
|
||||
7'h78: fpu_op = `FPU_MVWX;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire is_fl = 0;
|
||||
wire is_fs = 0;
|
||||
wire is_fci = 0;
|
||||
wire is_fcmp = 0;
|
||||
wire is_fcvti = 0;
|
||||
wire is_fcvtf = 0;
|
||||
wire is_fmvcls = 0;
|
||||
wire is_fr4 = 0;
|
||||
wire is_fpu = 0;
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = `FPU_OTHER;
|
||||
end
|
||||
`endif
|
||||
|
||||
// LSU
|
||||
wire is_lsu = (is_ltype || is_stype);
|
||||
assign lsu_op = {is_stype, func3};
|
||||
|
||||
// CSR
|
||||
wire is_csr_imm = is_csr && (func3[2] == 1);
|
||||
wire is_lsu = (is_ltype || is_stype || is_fl || is_fs);
|
||||
always @(*) begin
|
||||
csr_op = `CSR_OTHER;
|
||||
case (func3[1:0])
|
||||
2'h1: csr_op = `CSR_RW;
|
||||
2'h2: csr_op = `CSR_RS;
|
||||
2'h3: csr_op = `CSR_RC;
|
||||
default:;
|
||||
endcase
|
||||
lsu_op = {is_stype, func3};
|
||||
if (is_fl) lsu_op = `LSU_LW;
|
||||
if (is_fs) lsu_op = `LSU_SW;
|
||||
end
|
||||
|
||||
// GPU
|
||||
|
||||
always @(*) begin
|
||||
gpu_op = `GPU_OTHER;
|
||||
case (func3)
|
||||
@@ -185,34 +272,75 @@ module VX_decode #(
|
||||
endcase
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire use_rd = (is_fl || is_fci || is_fr4)
|
||||
|| ((rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
|
||||
|
||||
wire use_rs1 = is_fpu
|
||||
|| is_gpu
|
||||
|| ((is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || ~is_csr_imm || is_gpu) && (rs1 != 0));
|
||||
|
||||
wire use_rs2 = (is_fpu && ~(is_fl || (fpu_op == `FPU_SQRT) || is_fcvti || is_fcvtf || is_fmvcls))
|
||||
|| (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN))
|
||||
|| ((is_btype || is_stype || is_rtype) && (rs2 != 0));
|
||||
|
||||
wire use_rs3 = is_fr4;
|
||||
|
||||
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || (fpu_op == `FPU_MVXW || fpu_op == `FPU_CLASS));
|
||||
wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || (fpu_op == `FPU_MVWX)));
|
||||
wire rs2_is_fp = is_fs || is_fr4 || is_fci;
|
||||
|
||||
wire [4:0] rs1_qual = is_lui ? 5'h0 : rs1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_decode_if decode_tmp_if();
|
||||
|
||||
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_tmp_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
||||
assign decode_tmp_if.next_PC = ifetch_rsp_if.curr_PC + 32'h4;
|
||||
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_tmp_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
assign decode_tmp_if.thread_mask= ifetch_rsp_if.thread_mask;
|
||||
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
||||
assign decode_tmp_if.next_PC = ifetch_rsp_if.curr_PC + 32'h4;
|
||||
|
||||
assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU :
|
||||
is_csr ? `EX_CSR :
|
||||
is_mul ? `EX_MUL :
|
||||
is_gpu ? `EX_GPU :
|
||||
is_br ? `EX_ALU :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
is_fpu ? `EX_FPU :
|
||||
is_gpu ? `EX_GPU :
|
||||
is_br ? `EX_ALU :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
|
||||
assign decode_tmp_if.instr_op = is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
assign decode_tmp_if.ex_op = is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_fpu ? `OP_BITS'(fpu_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_br ? `OP_BITS'({1'b1, br_op}) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||
0;
|
||||
|
||||
assign decode_tmp_if.rd = rd;
|
||||
assign decode_tmp_if.wb = use_rd;
|
||||
|
||||
assign decode_tmp_if.rs1 = is_lui ? `NR_BITS'(0) : rs1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign decode_tmp_if.rd = {rd_is_fp, rd};
|
||||
assign decode_tmp_if.rs1 = {rs1_is_fp, rs1_qual};
|
||||
assign decode_tmp_if.rs2 = {rs2_is_fp, rs2};
|
||||
assign decode_tmp_if.rs3 = {1'b1, rs3};
|
||||
`else
|
||||
assign decode_tmp_if.rd = rd;
|
||||
assign decode_tmp_if.rs1 = rs1_qual;
|
||||
assign decode_tmp_if.rs2 = rs2;
|
||||
assign decode_tmp_if.rs3 = rs3;
|
||||
`endif
|
||||
|
||||
assign decode_tmp_if.rs2 = rs2;
|
||||
assign decode_tmp_if.use_rs3 = use_rs3;
|
||||
|
||||
assign decode_tmp_if.reg_use_mask = ((`NUM_REGS)'(use_rd) << decode_tmp_if.rd)
|
||||
| ((`NUM_REGS)'(use_rs1) << decode_tmp_if.rs1)
|
||||
| ((`NUM_REGS)'(use_rs2) << decode_tmp_if.rs2)
|
||||
| ((`NUM_REGS)'(use_rs3) << decode_tmp_if.rs3);
|
||||
|
||||
assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
||||
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
||||
@@ -220,20 +348,9 @@ module VX_decode #(
|
||||
src2_imm;
|
||||
|
||||
assign decode_tmp_if.rs1_is_PC = is_auipc;
|
||||
|
||||
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
||||
|
||||
assign decode_tmp_if.use_rs1 = (decode_tmp_if.rs1 != 0)
|
||||
&& (is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || ~is_csr_imm || is_gpu);
|
||||
|
||||
assign decode_tmp_if.use_rs2 = (decode_tmp_if.rs2 != 0)
|
||||
&& (is_btype || is_stype || is_rtype || (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN)));
|
||||
|
||||
assign decode_tmp_if.wb = (rd == 0) ? `WB_NO : // disable writeback to r0
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
(is_jal || is_jalr || is_jals) ? `WB_JAL :
|
||||
is_ltype ? `WB_MEM :
|
||||
`WB_NO;
|
||||
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
||||
|
||||
assign decode_tmp_if.frm = func3;
|
||||
|
||||
assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
@@ -241,36 +358,36 @@ module VX_decode #(
|
||||
assign wstall_if.wstall = in_valid && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
||||
wire stall = ~decode_if.ready && (| decode_if.valid);
|
||||
|
||||
wire stall = ~decode_if.ready && decode_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS)
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + `FRM_BITS + `NUM_REGS)
|
||||
) decode_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb}),
|
||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb})
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, decode_tmp_if.reg_use_mask}),
|
||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, decode_if.reg_use_mask})
|
||||
);
|
||||
|
||||
assign ifetch_rsp_if.ready = ~stall;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| decode_tmp_if.valid) && ~stall) begin
|
||||
if (decode_tmp_if.valid && ~stall) begin
|
||||
$write("%t: Core%0d-Decode: warp=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC);
|
||||
print_ex_type(decode_tmp_if.ex_type);
|
||||
$write(", op=");
|
||||
print_instr_op(decode_tmp_if.ex_type, decode_tmp_if.instr_op);
|
||||
$write(", wb=");
|
||||
print_wb(decode_tmp_if.wb);
|
||||
$write(", rd=%0d, rs1=%0d, rs2=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b\n", decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2);
|
||||
print_ex_op(decode_tmp_if.ex_type, decode_tmp_if.ex_op);
|
||||
$write(", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, frm=", decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.rs3, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm);
|
||||
print_frm(decode_tmp_if.frm);
|
||||
$write("\n");
|
||||
|
||||
// trap unsupported instructions
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.instr_op) == `CSR_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.instr_op) == `GPU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.ex_op) == `ALU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.ex_op) == `CSR_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.ex_op) == `GPU_OTHER));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -1,63 +1,17 @@
|
||||
`ifndef VX_DEFINE
|
||||
`define VX_DEFINE
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`include "VX_config.vh"
|
||||
`include "VX_scope.vh"
|
||||
|
||||
`define QUEUE_FORCE_MLAB 1
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// `define SYNTHESIS 1
|
||||
// `define ASIC 1
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
/* verilator lint_on UNUSED */
|
||||
`else
|
||||
`define DEBUG_BLOCK(x)
|
||||
`endif
|
||||
|
||||
`define DEBUG_BEGIN /* verilator lint_off UNUSED */
|
||||
|
||||
`define DEBUG_END /* verilator lint_on UNUSED */
|
||||
|
||||
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
|
||||
/* verilator lint_off PINCONNECTEMPTY */ \
|
||||
/* verilator lint_off DECLFILENAME */
|
||||
|
||||
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
|
||||
/* verilator lint_on PINCONNECTEMPTY */ \
|
||||
/* verilator lint_on DECLFILENAME */
|
||||
|
||||
`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \
|
||||
wire [$bits(x)-1:0] __``x``__ = x; \
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
|
||||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
`define STRINGIFY(x) `"x`"
|
||||
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
if (!(cond)) $error(msg); \
|
||||
endgenerate
|
||||
|
||||
`define CLOG2(x) $clog2(x)
|
||||
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
|
||||
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
|
||||
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
|
||||
|
||||
`define MIN(x, y) ((x < y) ? (x) : (y))
|
||||
`define MAX(x, y) ((x > y) ? (x) : (y))
|
||||
|
||||
`define UP(x) (((x) > 0) ? x : 1)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
||||
|
||||
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
||||
@@ -68,17 +22,29 @@
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define NUM_REGS 64
|
||||
`else
|
||||
`define NUM_REGS 32
|
||||
`endif
|
||||
|
||||
`define NR_BITS `LOG2UP(`NUM_REGS)
|
||||
|
||||
`define CSR_ADDR_SIZE 12
|
||||
`define CSR_ADDR_BITS 12
|
||||
|
||||
`define CSR_WIDTH 12
|
||||
|
||||
`define DIV_LATENCY 2
|
||||
`define ISTAG_BITS `LOG2UP(`ISSUEQ_SIZE)
|
||||
|
||||
`define MUL_LATENCY 2
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define LATENCY_IDIV 24
|
||||
`define LATENCY_IMUL 2
|
||||
|
||||
`define LATENCY_FMULADD 2
|
||||
`define LATENCY_FDIVSQRT 2
|
||||
`define LATENCY_FCONV 2
|
||||
`define LATENCY_FNONCOMP 1
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -86,13 +52,22 @@
|
||||
`define INST_AUIPC 7'b0010111
|
||||
`define INST_JAL 7'b1101111
|
||||
`define INST_JALR 7'b1100111
|
||||
`define INST_B 7'b1100011
|
||||
`define INST_L 7'b0000011
|
||||
`define INST_S 7'b0100011
|
||||
`define INST_I 7'b0010011
|
||||
`define INST_R 7'b0110011
|
||||
`define INST_F 7'b0001111
|
||||
`define INST_SYS 7'b1110011
|
||||
`define INST_B 7'b1100011 // branch instructions
|
||||
`define INST_L 7'b0000011 // load instructions
|
||||
`define INST_S 7'b0100011 // store instructions
|
||||
`define INST_I 7'b0010011 // immediate instructions
|
||||
`define INST_R 7'b0110011 // register instructions
|
||||
`define INST_F 7'b0001111 // Fence instructions
|
||||
`define INST_SYS 7'b1110011 // system instructions
|
||||
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
|
||||
`define INST_GPU 7'b1101011
|
||||
|
||||
`define BYTEEN_SB 3'h0
|
||||
@@ -101,6 +76,7 @@
|
||||
`define BYTEEN_UB 3'h4
|
||||
`define BYTEEN_UH 3'h5
|
||||
`define BYTEEN_BITS 3
|
||||
`define BYTEEN_TYPE(x) x[1:0]
|
||||
|
||||
`define BR_EQ 4'h0
|
||||
`define BR_NE 4'h1
|
||||
@@ -115,6 +91,7 @@
|
||||
`define BR_MRET 4'hA
|
||||
`define BR_SRET 4'hB
|
||||
`define BR_DRET 4'hC
|
||||
`define BR_NO 4'hF
|
||||
`define BR_BITS 4
|
||||
|
||||
`define OP_BITS 5
|
||||
@@ -150,18 +127,6 @@
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
`define IS_BR_OP(x) x[4]
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
|
||||
`define LSU_LB {1'b0, `BYTEEN_SB}
|
||||
`define LSU_LH {1'b0, `BYTEEN_SH}
|
||||
`define LSU_LW {1'b0, `BYTEEN_SW}
|
||||
@@ -183,6 +148,53 @@
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
|
||||
`define FPU_ADD 5'h00
|
||||
`define FPU_SUB 5'h01
|
||||
`define FPU_MUL 5'h02
|
||||
`define FPU_DIV 5'h03
|
||||
`define FPU_SQRT 5'h04
|
||||
`define FPU_MADD 5'h05
|
||||
`define FPU_MSUB 5'h06
|
||||
`define FPU_NMSUB 5'h07
|
||||
`define FPU_NMADD 5'h08
|
||||
`define FPU_SGNJ 5'h09 // FSGNJ
|
||||
`define FPU_SGNJN 5'h0A // FSGNJN
|
||||
`define FPU_SGNJX 5'h0B // FSGNJX
|
||||
`define FPU_MIN 5'h0C // FMIN.S
|
||||
`define FPU_MAX 5'h0D // FMAX.S
|
||||
`define FPU_CVTWS 5'h0E // FCVT.W.S
|
||||
`define FPU_CVTWUS 5'h0F // FCVT.WU.S
|
||||
`define FPU_CVTSW 5'h10 // FCVT.S.W
|
||||
`define FPU_CVTSWU 5'h11 // FCVT.S.WU
|
||||
`define FPU_MVXW 5'h12 // MOV FP from fpReg to integer reg
|
||||
`define FPU_MVWX 5'h13 // MOV FP from integer reg to fpReg
|
||||
`define FPU_CLASS 5'h14
|
||||
`define FPU_CMP 5'h15
|
||||
`define FPU_OTHER 5'h1f
|
||||
`define FPU_BITS 5
|
||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
`define FRM_RTZ 3'b001 // round to zero
|
||||
`define FRM_RDN 3'b010 // round to -inf
|
||||
`define FRM_RUP 3'b011 // round to +inf
|
||||
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define FRM_DYN 3'b111 // dynamic mode
|
||||
`define FRM_BITS 3
|
||||
`define FFG_BITS 5
|
||||
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
@@ -194,36 +206,43 @@
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_MUL 3'h2
|
||||
`define EX_LSU 3'h3
|
||||
`define EX_CSR 3'h4
|
||||
`define EX_GPU 3'h5
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_MUL 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_GPU 3'h6
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 5
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
|
||||
`define WB_NO 2'h0
|
||||
`define WB_ALU 2'h1
|
||||
`define WB_MEM 2'h2
|
||||
`define WB_JAL 2'h3
|
||||
`define WB_BITS 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
`define ISA_EXT_M (1 << 12)
|
||||
`else
|
||||
`define ISA_EXT_M 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define ISA_EXT_F (1 << 5)
|
||||
`else
|
||||
`define ISA_EXT_F 0
|
||||
`endif
|
||||
|
||||
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
|
||||
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
|
||||
| (0 << 2) // C - Compressed extension \
|
||||
| (0 << 3) // D - Double precsision floating-point extension \
|
||||
| (0 << 4) // E - RV32E base ISA \
|
||||
| (0 << 5) // F - Single precsision floating-point extension \
|
||||
|`ISA_EXT_F // F - Single precsision floating-point extension \
|
||||
| (0 << 6) // G - Additional standard extensions present \
|
||||
| (0 << 7) // H - Hypervisor mode implemented \
|
||||
| (1 << 8) // I - RV32I/64I/128I base ISA \
|
||||
| (0 << 9) // J - Reserved \
|
||||
| (0 << 10) // K - Reserved \
|
||||
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|
||||
| (1 << 12) // M - Integer Multiply/Divide extension \
|
||||
|`ISA_EXT_M // M - Integer Multiply/Divide extension \
|
||||
| (0 << 13) // N - User level interrupts supported \
|
||||
| (0 << 14) // O - Reserved \
|
||||
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
|
||||
@@ -240,8 +259,8 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + `WB_BITS + `NR_BITS + `NW_BITS)
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 1 + `NR_BITS + `NW_BITS)
|
||||
`else
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
|
||||
`endif
|
||||
@@ -252,7 +271,7 @@
|
||||
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
|
||||
|
||||
// TAG sharing enable
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`DCREQ_SIZE)
|
||||
`define DCORE_TAG_ID_BITS `ISTAG_BITS
|
||||
|
||||
// Core request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `DCORE_TAG_ID_BITS)
|
||||
@@ -287,7 +306,7 @@
|
||||
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
|
||||
|
||||
// TAG sharing enable
|
||||
`define ICORE_TAG_ID_BITS `LOG2UP(`ICREQ_SIZE)
|
||||
`define ICORE_TAG_ID_BITS `NW_BITS
|
||||
|
||||
// Core request tag bits
|
||||
`define ICORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `ICORE_TAG_ID_BITS)
|
||||
@@ -390,116 +409,12 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
task print_ex_type;
|
||||
input [`EX_BITS-1:0] ex;
|
||||
begin
|
||||
case (ex)
|
||||
`EX_ALU: $write("ALU");
|
||||
`EX_LSU: $write("LSU");
|
||||
`EX_CSR: $write("CSR");
|
||||
`EX_MUL: $write("MUL");
|
||||
`EX_GPU: $write("GPU");
|
||||
default: $write("NOP");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_instr_op;
|
||||
input [`EX_BITS-1:0] ex;
|
||||
input [`OP_BITS-1:0] op;
|
||||
begin
|
||||
case (ex)
|
||||
`EX_ALU: begin
|
||||
case (`ALU_BITS'(op))
|
||||
`ALU_ADD: $write("ADD");
|
||||
`ALU_SUB: $write("SUB");
|
||||
`ALU_SLL: $write("SLL");
|
||||
`ALU_SRL: $write("SRL");
|
||||
`ALU_SRA: $write("SRA");
|
||||
`ALU_SLT: $write("SLT");
|
||||
`ALU_SLTU: $write("SLTU");
|
||||
`ALU_XOR: $write("XOR");
|
||||
`ALU_OR: $write("OR");
|
||||
`ALU_AND: $write("AND");
|
||||
`ALU_LUI: $write("LUI");
|
||||
`ALU_AUIPC: $write("AUIPC");
|
||||
`ALU_BEQ: $write("EQ");
|
||||
`ALU_BNE: $write("NE");
|
||||
`ALU_BLT: $write("LT");
|
||||
`ALU_BGE: $write("GE");
|
||||
`ALU_BLTU: $write("LTU");
|
||||
`ALU_BGEU: $write("GEU");
|
||||
`ALU_JAL: $write("JAL");
|
||||
`ALU_JALR: $write("JALR");
|
||||
`ALU_ECALL: $write("ECALL");
|
||||
`ALU_EBREAK:$write("EBREAK");
|
||||
`ALU_MRET: $write("MRET");
|
||||
`ALU_SRET: $write("SRET");
|
||||
`ALU_DRET: $write("DRET");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_MUL: begin
|
||||
case (`MUL_BITS'(op))
|
||||
`MUL_MUL: $write("MUL");
|
||||
`MUL_MULH: $write("MULH");
|
||||
`MUL_MULHSU:$write("MULHSU");
|
||||
`MUL_MULHU: $write("MULHU");
|
||||
`MUL_DIV: $write("DIV");
|
||||
`MUL_DIVU: $write("DIVU");
|
||||
`MUL_REM: $write("REM");
|
||||
`MUL_REMU: $write("REMU");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
case (`LSU_BITS'(op))
|
||||
`LSU_LB: $write("LB");
|
||||
`LSU_LH: $write("LH");
|
||||
`LSU_LW: $write("LW");
|
||||
`LSU_LBU: $write("LBU");
|
||||
`LSU_LHU: $write("LHU");
|
||||
`LSU_SB: $write("SB");
|
||||
`LSU_SH: $write("SH");
|
||||
`LSU_SW: $write("SW");
|
||||
`LSU_SBU: $write("SBU");
|
||||
`LSU_SHU: $write("SHU");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_CSR: begin
|
||||
case (`CSR_BITS'(op))
|
||||
`CSR_RW: $write("CSRW");
|
||||
`CSR_RS: $write("CSRS");
|
||||
`CSR_RC: $write("CSRC");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_GPU: begin
|
||||
case (`GPU_BITS'(op))
|
||||
`GPU_TMC: $write("TMC");
|
||||
`GPU_WSPAWN:$write("WSPAWN");
|
||||
`GPU_SPLIT: $write("SPLIT");
|
||||
`GPU_JOIN: $write("JOIN");
|
||||
`GPU_BAR: $write("BAR");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_wb;
|
||||
input [`WB_BITS-1:0] wb;
|
||||
begin
|
||||
case (wb)
|
||||
`WB_ALU: $write("ALU");
|
||||
`WB_MEM: $write("MEM");
|
||||
`WB_JAL: $write("JAL");
|
||||
default: $write("NO");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
typedef struct packed {
|
||||
logic [`NW_BITS-1:0] warp_num;
|
||||
logic [`NUM_THREADS-1:0] thread_mask;
|
||||
logic [31:0] curr_PC;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic wb;
|
||||
} is_data_t;
|
||||
|
||||
`endif
|
||||
|
||||
@@ -18,26 +18,30 @@ module VX_execute #(
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
|
||||
// perf
|
||||
VX_perf_cntrs_if perf_cntrs_if,
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
|
||||
// inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if lsu_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
VX_exu_to_cmt_if alu_commit_if,
|
||||
VX_exu_to_cmt_if lsu_commit_if,
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
VX_exu_to_cmt_if mul_commit_if,
|
||||
VX_fpu_to_cmt_if fpu_commit_if,
|
||||
VX_exu_to_cmt_if gpu_commit_if,
|
||||
|
||||
output wire ebreak
|
||||
);
|
||||
|
||||
VX_csr_to_fpu_if csr_to_fpu_if();
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
@@ -66,21 +70,52 @@ module VX_execute #(
|
||||
) csr_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.perf_cntrs_if (perf_cntrs_if),
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.csr_to_fpu_if (csr_to_fpu_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.csr_commit_if (csr_commit_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
VX_mul_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) mul_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.mul_req_if (mul_req_if),
|
||||
.mul_commit_if (mul_commit_if)
|
||||
.alu_req_if (mul_req_if),
|
||||
.alu_commit_if (mul_commit_if)
|
||||
);
|
||||
`else
|
||||
assign mul_req_if.ready = 0;
|
||||
assign mul_commit_if.valid = 0;
|
||||
assign mul_commit_if.issue_tag = 0;
|
||||
assign mul_commit_if.data = 0;
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.csr_to_fpu_if (csr_to_fpu_if),
|
||||
.fpu_commit_if (fpu_commit_if)
|
||||
);
|
||||
`else
|
||||
assign fpu_req_if.ready = 0;
|
||||
assign fpu_commit_if.valid = 0;
|
||||
assign fpu_commit_if.issue_tag = 0;
|
||||
assign fpu_commit_if.data = 0;
|
||||
assign fpu_commit_if.has_fflags = 0;
|
||||
assign fpu_commit_if.fflags_NV = 0;
|
||||
assign fpu_commit_if.fflags_DZ = 0;
|
||||
assign fpu_commit_if.fflags_OF = 0;
|
||||
assign fpu_commit_if.fflags_UF = 0;
|
||||
assign fpu_commit_if.fflags_NX = 0;
|
||||
`endif
|
||||
|
||||
VX_gpu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
@@ -90,7 +125,7 @@ module VX_execute #(
|
||||
.gpu_commit_if (gpu_commit_if)
|
||||
);
|
||||
|
||||
assign ebreak = (| alu_req_if.valid) && (alu_req_if.alu_op == `ALU_EBREAK || alu_req_if.alu_op == `ALU_ECALL);
|
||||
assign ebreak = alu_req_if.valid && (alu_req_if.alu_op == `ALU_EBREAK || alu_req_if.alu_op == `ALU_ECALL);
|
||||
|
||||
`SCOPE_ASSIGN(scope_decode_valid, decode_if.valid);
|
||||
`SCOPE_ASSIGN(scope_decode_warp_num, decode_if.warp_num);
|
||||
|
||||
84
hw/rtl/VX_fpu_unit.v
Normal file
84
hw/rtl/VX_fpu_unit.v
Normal file
@@ -0,0 +1,84 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
// inputs
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||
|
||||
// outputs
|
||||
VX_fpu_to_cmt_if fpu_commit_if
|
||||
);
|
||||
|
||||
assign csr_to_fpu_if.warp_num = fpu_req_if.warp_num;
|
||||
wire [`FRM_BITS-1:0] frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
|
||||
VX_fp_fpga fp_core (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.in_valid (fpu_req_if.valid),
|
||||
.in_ready (fpu_req_if.ready),
|
||||
|
||||
.in_tag (fpu_req_if.issue_tag),
|
||||
|
||||
.op (fpu_req_if.fpu_op),
|
||||
.frm (frm),
|
||||
|
||||
.dataa (fpu_req_if.rs1_data),
|
||||
.datab (fpu_req_if.rs2_data),
|
||||
.datac (fpu_req_if.rs3_data),
|
||||
.result (fpu_commit_if.data),
|
||||
|
||||
.has_fflags (fpu_commit_if.has_fflags),
|
||||
.fflags (fpu_commit_if.fflags),
|
||||
|
||||
.out_tag (fpu_commit_if.issue_tag),
|
||||
|
||||
.out_ready (fpu_commit_if.ready),
|
||||
.out_valid (fpu_commit_if.valid)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
VX_fpnew #(
|
||||
.FMULADD (1),
|
||||
.FDIVSQRT (1),
|
||||
.FNONCOMP (1),
|
||||
.FCONV (1)
|
||||
) fp_core (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.in_valid (fpu_req_if.valid),
|
||||
.in_ready (fpu_req_if.ready),
|
||||
|
||||
.in_tag (fpu_req_if.issue_tag),
|
||||
|
||||
.op (fpu_req_if.fpu_op),
|
||||
.frm (frm),
|
||||
|
||||
.dataa (fpu_req_if.rs1_data),
|
||||
.datab (fpu_req_if.rs2_data),
|
||||
.datac (fpu_req_if.rs3_data),
|
||||
.result (fpu_commit_if.data),
|
||||
|
||||
.has_fflags (fpu_commit_if.has_fflags),
|
||||
.fflags (fpu_commit_if.fflags),
|
||||
|
||||
.out_tag (fpu_commit_if.issue_tag),
|
||||
|
||||
.out_ready (fpu_commit_if.ready),
|
||||
.out_valid (fpu_commit_if.valid)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
51
hw/rtl/VX_gpr_fp_ctrl.v
Normal file
51
hw/rtl/VX_gpr_fp_ctrl.v
Normal file
@@ -0,0 +1,51 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
// control module to support multi-cycle read for fp register
|
||||
|
||||
module VX_gpr_fp_ctrl (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
||||
|
||||
// outputs
|
||||
output wire [`NR_BITS-1:0] raddr1,
|
||||
|
||||
VX_gpr_read_if gpr_read_if
|
||||
);
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data;
|
||||
reg read_rs3;
|
||||
|
||||
wire gpr_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3;
|
||||
|
||||
wire gpr_fire = gpr_read_if.valid && gpr_read_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
read_rs3 <= 0;
|
||||
end else if (gpr_delay) begin
|
||||
read_rs3 <= 1;
|
||||
end else if (gpr_fire) begin
|
||||
read_rs3 <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
// backup original rs1 data
|
||||
always @(posedge clk) begin
|
||||
if (gpr_delay) begin
|
||||
tmp_rs1_data <= rs1_data;
|
||||
end
|
||||
end
|
||||
|
||||
// outputs
|
||||
|
||||
assign raddr1 = read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1;
|
||||
|
||||
assign gpr_read_if.ready = ~gpr_delay;
|
||||
assign gpr_read_if.rs1_data = gpr_read_if.use_rs3 ? tmp_rs1_data : rs1_data;
|
||||
assign gpr_read_if.rs2_data = rs2_data;
|
||||
assign gpr_read_if.rs3_data = rs1_data;
|
||||
|
||||
endmodule
|
||||
@@ -35,8 +35,6 @@ module VX_gpr_ram (
|
||||
ram[waddr][i][3] <= wdata[i][31:24];
|
||||
end
|
||||
end
|
||||
assert(~(|we) || (waddr != 0)); // ensure r0 is never written!
|
||||
assert(0 == ram[0]);
|
||||
end
|
||||
|
||||
assign rs1_data = ram[rs1];
|
||||
|
||||
@@ -4,43 +4,65 @@ module VX_gpr_stage #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_wb_if writeback_if,
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
// outputs
|
||||
VX_gpr_data_if gpr_data_if
|
||||
VX_gpr_read_if gpr_read_if
|
||||
);
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data_all [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data_all [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_imm;
|
||||
wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0];
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NR_BITS-1:0] raddr1;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign rs1_PC[i] = decode_if.curr_PC;
|
||||
assign rs2_imm[i] = decode_if.imm;
|
||||
end
|
||||
|
||||
assign gpr_data_if.rs1_data = decode_if.rs1_is_PC ? rs1_PC : rs1_data_all[decode_if.warp_num];
|
||||
assign gpr_data_if.rs2_data = decode_if.rs2_is_imm ? rs2_imm : rs2_data_all[decode_if.warp_num];
|
||||
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}};
|
||||
VX_gpr_ram gpr_ram (
|
||||
wire [`NUM_THREADS-1:0] we = writeback_if.thread_mask
|
||||
& {`NUM_THREADS{writeback_if.valid && (i == writeback_if.warp_num)}};
|
||||
VX_gpr_ram gpr_int_ram (
|
||||
.clk (clk),
|
||||
.we (we[i]),
|
||||
.we (we),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (decode_if.rs1),
|
||||
.rs2 (decode_if.rs2),
|
||||
.rs1_data (rs1_data_all[i]),
|
||||
.rs2_data (rs2_data_all[i])
|
||||
.rs1 (raddr1),
|
||||
.rs2 (gpr_read_if.rs2),
|
||||
.rs1_data (rs1_data[i]),
|
||||
.rs2_data (rs2_data[i])
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
//inputs
|
||||
.rs1_data (rs1_data[gpr_read_if.warp_num]),
|
||||
.rs2_data (rs2_data[gpr_read_if.warp_num]),
|
||||
|
||||
// outputs
|
||||
.raddr1 (raddr1),
|
||||
.gpr_read_if(gpr_read_if)
|
||||
);
|
||||
`else
|
||||
assign raddr1 = gpr_read_if.rs1;
|
||||
assign gpr_read_if.rs1_data = rs1_data[gpr_read_if.warp_num];
|
||||
assign gpr_read_if.rs2_data = rs2_data[gpr_read_if.warp_num];
|
||||
assign gpr_read_if.rs3_data = 0;
|
||||
assign gpr_read_if.ready = 1;
|
||||
|
||||
wire valid = gpr_read_if.valid;
|
||||
wire use_rs3 = gpr_read_if.use_rs3;
|
||||
wire [`NR_BITS-1:0] rs3 = gpr_read_if.rs3;
|
||||
`UNUSED_VAR (valid);
|
||||
`UNUSED_VAR (use_rs3);
|
||||
`UNUSED_VAR (rs3);
|
||||
`endif
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
|
||||
@@ -4,58 +4,59 @@ module VX_gpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
// Input
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
|
||||
// Output
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_commit_if gpu_commit_if
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_exu_to_cmt_if gpu_commit_if
|
||||
);
|
||||
wire [`NUM_THREADS-1:0] curr_valids = gpu_req_if.valid;
|
||||
|
||||
wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.gpu_op == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.gpu_op == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.gpu_op == `GPU_BAR);
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
wire gpu_req_fire = gpu_req_if.valid && gpu_commit_if.ready;
|
||||
|
||||
assign warp_ctl_if.warp_num = gpu_req_if.warp_num;
|
||||
|
||||
// tmc
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : tmc_new_mask_init
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
end
|
||||
assign warp_ctl_if.change_mask = is_tmc && gpu_req_fire;
|
||||
assign warp_ctl_if.thread_mask = tmc_new_mask;
|
||||
|
||||
wire valid_inst = (| curr_valids);
|
||||
|
||||
assign warp_ctl_if.warp_num = gpu_req_if.warp_num;
|
||||
// barrier
|
||||
|
||||
assign warp_ctl_if.change_mask = is_tmc && valid_inst;
|
||||
assign warp_ctl_if.thread_mask = is_tmc ? tmc_new_mask : 0;
|
||||
|
||||
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (0 == warp_ctl_if.thread_mask);
|
||||
|
||||
wire wspawn = is_wspawn && valid_inst;
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
|
||||
assign warp_ctl_if.is_barrier = is_bar && valid_inst;
|
||||
assign warp_ctl_if.is_barrier = is_bar && gpu_req_fire;
|
||||
assign warp_ctl_if.barrier_id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign warp_ctl_if.barrier_num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
||||
|
||||
assign warp_ctl_if.num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
||||
// wspawn
|
||||
|
||||
assign warp_ctl_if.wspawn = wspawn;
|
||||
assign warp_ctl_if.wspawn_pc = wspawn_pc;
|
||||
assign warp_ctl_if.wspawn_new_active = wspawn_new_active;
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
assign warp_ctl_if.wspawn = is_wspawn && gpu_req_fire;
|
||||
assign warp_ctl_if.wspawn_pc = wspawn_pc;
|
||||
assign warp_ctl_if.wspawn_wmask = wspawn_wmask;
|
||||
|
||||
// split
|
||||
|
||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : masks_init
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire curr_bool = (gpu_req_if.rs1_data[i] == 32'b1);
|
||||
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
|
||||
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);
|
||||
assign split_new_use_mask[i] = gpu_req_if.thread_mask[i] & (curr_bool);
|
||||
assign split_new_later_mask[i] = gpu_req_if.thread_mask[i] & (!curr_bool);
|
||||
end
|
||||
|
||||
wire [`NT_BITS:0] num_valids;
|
||||
@@ -63,24 +64,20 @@ module VX_gpu_unit #(
|
||||
VX_countones #(
|
||||
.N(`NUM_THREADS)
|
||||
) valids_counter (
|
||||
.valids(curr_valids),
|
||||
.valids(gpu_req_if.thread_mask),
|
||||
.count (num_valids)
|
||||
);
|
||||
|
||||
assign warp_ctl_if.is_split = is_split && (num_valids > 1);
|
||||
assign warp_ctl_if.is_split = is_split && (num_valids > 1) && gpu_req_fire;
|
||||
assign warp_ctl_if.do_split = (split_new_use_mask != 0) && (split_new_use_mask != {`NUM_THREADS{1'b1}});
|
||||
assign warp_ctl_if.split_new_mask = split_new_use_mask;
|
||||
assign warp_ctl_if.split_later_mask = split_new_later_mask;
|
||||
assign warp_ctl_if.split_save_pc = gpu_req_if.next_PC;
|
||||
|
||||
// commit
|
||||
assign gpu_commit_if.valid = gpu_req_if.valid;
|
||||
assign gpu_commit_if.issue_tag = gpu_req_if.issue_tag;
|
||||
assign gpu_commit_if.data = 0;
|
||||
assign gpu_req_if.ready = gpu_commit_if.ready;
|
||||
|
||||
// commit
|
||||
assign gpu_commit_if.valid = gpu_req_if.valid;
|
||||
assign gpu_commit_if.warp_num = gpu_req_if.warp_num;
|
||||
assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC;
|
||||
assign gpu_commit_if.wb = `WB_NO;
|
||||
assign gpu_commit_if.rd = 0;
|
||||
assign gpu_commit_if.data = 0;
|
||||
|
||||
endmodule
|
||||
@@ -18,64 +18,46 @@ module VX_icache_stage #(
|
||||
// reponse
|
||||
VX_ifetch_rsp_if ifetch_rsp_if
|
||||
);
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`NUM_THREADS-1:0] valid_threads [`NUM_WARPS-1:0];
|
||||
reg [31:0] rsp_curr_PC_buf [`NUM_WARPS-1:0];
|
||||
reg [`NUM_THREADS-1:0] rsp_thread_mask_buf [`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_inst = (| ifetch_req_if.valid);
|
||||
|
||||
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
|
||||
wire mrq_full;
|
||||
|
||||
wire mrq_push = icache_req_if.valid && icache_req_if.ready;
|
||||
wire mrq_pop = icache_rsp_if.valid && icache_rsp_if.ready;
|
||||
|
||||
assign mrq_read_addr = icache_rsp_if.tag[0][`LOG2UP(`ICREQ_SIZE)-1:0];
|
||||
|
||||
VX_index_queue #(
|
||||
.DATAW (`LOG2UP(`ICREQ_SIZE) + 32 + `NW_BITS),
|
||||
.SIZE (`ICREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({mrq_write_addr, ifetch_req_if.curr_PC, ifetch_req_if.warp_num}),
|
||||
.write_addr (mrq_write_addr),
|
||||
.push (mrq_push),
|
||||
.full (mrq_full),
|
||||
.pop (mrq_pop),
|
||||
.read_addr (mrq_read_addr),
|
||||
.read_data ({dbg_mrq_write_addr, ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num}),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||
|
||||
wire [`NW_BITS-1:0] req_tag = ifetch_req_if.warp_num;
|
||||
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mrq_push) begin
|
||||
valid_threads[ifetch_req_if.warp_num] <= ifetch_req_if.valid;
|
||||
end
|
||||
if (mrq_pop) begin
|
||||
assert(mrq_read_addr == dbg_mrq_write_addr);
|
||||
end
|
||||
end
|
||||
if (icache_req_fire) begin
|
||||
rsp_curr_PC_buf[req_tag] <= ifetch_req_if.curr_PC;
|
||||
rsp_thread_mask_buf[req_tag] <= ifetch_req_if.thread_mask;
|
||||
end
|
||||
end
|
||||
|
||||
// Icache Request
|
||||
assign icache_req_if.valid = valid_inst && !mrq_full;
|
||||
assign icache_req_if.valid = ifetch_req_if.valid;
|
||||
assign icache_req_if.rw = 0;
|
||||
assign icache_req_if.byteen = 4'b1111;
|
||||
assign icache_req_if.addr = ifetch_req_if.curr_PC[31:2];
|
||||
assign icache_req_if.data = 0;
|
||||
|
||||
// Can't accept new request
|
||||
assign ifetch_req_if.ready = !mrq_full && icache_req_if.ready;
|
||||
// Can accept new request?
|
||||
assign ifetch_req_if.ready = icache_req_if.ready;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 2'b1, 5'b0, ifetch_req_if.warp_num, mrq_write_addr};
|
||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, `NR_BITS'(0), ifetch_req_if.warp_num, req_tag};
|
||||
`else
|
||||
assign icache_req_if.tag = mrq_write_addr;
|
||||
assign icache_req_if.tag = req_tag;
|
||||
`endif
|
||||
|
||||
assign ifetch_rsp_if.valid = icache_rsp_if.valid ? valid_threads[ifetch_rsp_if.warp_num] : 0;
|
||||
assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
|
||||
assign ifetch_rsp_if.valid = icache_rsp_if.valid;
|
||||
assign ifetch_rsp_if.warp_num = rsp_tag;
|
||||
assign ifetch_rsp_if.thread_mask = rsp_thread_mask_buf[rsp_tag];
|
||||
assign ifetch_rsp_if.curr_PC = rsp_curr_PC_buf[rsp_tag];
|
||||
assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
|
||||
|
||||
// Can't accept new response
|
||||
// Can accept new response?
|
||||
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
|
||||
|
||||
`SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.valid);
|
||||
@@ -92,10 +74,10 @@ module VX_icache_stage #(
|
||||
`ifdef DBG_PRINT_CORE_ICACHE
|
||||
always @(posedge clk) begin
|
||||
if (icache_req_if.valid && icache_req_if.ready) begin
|
||||
$display("%t: I$%0d req: tag=%0h, PC=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, ifetch_req_if.curr_PC, ifetch_req_if.warp_num);
|
||||
$display("%t: I$%0d req: warp=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.warp_num, ifetch_req_if.curr_PC);
|
||||
end
|
||||
if (icache_rsp_if.valid && icache_rsp_if.ready) begin
|
||||
$display("%t: I$%0d rsp: tag=%0h, PC=%0h, warp=%0d, instr=%0h", $time, CORE_ID, mrq_read_addr, ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num, ifetch_rsp_if.instr);
|
||||
$display("%t: I$%0d rsp: warp=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.warp_num, ifetch_rsp_if.curr_PC, ifetch_rsp_if.instr);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
47
hw/rtl/VX_ipdom_stack.v
Normal file
47
hw/rtl/VX_ipdom_stack.v
Normal file
@@ -0,0 +1,47 @@
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_ipdom_stack #(
|
||||
parameter WIDTH = 1,
|
||||
parameter DEPTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input reg [WIDTH - 1:0] q1,
|
||||
input reg [WIDTH - 1:0] q2,
|
||||
output wire[WIDTH - 1:0] d,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
localparam STACK_SIZE = 2 ** DEPTH;
|
||||
|
||||
`USE_FAST_BRAM reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
|
||||
`USE_FAST_BRAM reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
|
||||
`USE_FAST_BRAM reg is_part [0:STACK_SIZE-1];
|
||||
|
||||
reg [DEPTH-1:0] rd_ptr, wr_ptr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_ptr <= 0;
|
||||
end else if (push) begin
|
||||
stack_1[wr_ptr] <= q1;
|
||||
stack_2[wr_ptr] <= q2;
|
||||
is_part[wr_ptr] <= 0;
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + 1;
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
|
||||
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
|
||||
is_part[rd_ptr] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
assign d = is_part[rd_ptr] ? stack_1[rd_ptr] : stack_2[rd_ptr];
|
||||
|
||||
assign empty = (0 == wr_ptr);
|
||||
assign full = ((STACK_SIZE-1) == wr_ptr);
|
||||
|
||||
endmodule
|
||||
@@ -8,21 +8,32 @@ module VX_issue #(
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
VX_gpr_data_if gpr_data_if();
|
||||
wire schedule_delay;
|
||||
VX_gpr_read_if gpr_read_if();
|
||||
assign gpr_read_if.valid = decode_if.valid;
|
||||
assign gpr_read_if.warp_num = decode_if.warp_num;
|
||||
assign gpr_read_if.rs1 = decode_if.rs1;
|
||||
assign gpr_read_if.rs2 = decode_if.rs2;
|
||||
assign gpr_read_if.rs3 = decode_if.rs3;
|
||||
assign gpr_read_if.use_rs3 = decode_if.use_rs3;
|
||||
|
||||
wire alu_busy = ~alu_req_if.ready/* && (| alu_req_if.valid)*/;
|
||||
wire lsu_busy = ~lsu_req_if.ready/* && (| lsu_req_if.valid)*/;
|
||||
wire csr_busy = ~csr_req_if.ready/* && (| csr_req_if.valid)*/;
|
||||
wire mul_busy = ~mul_req_if.ready/* && (| mul_req_if.valid)*/;
|
||||
wire gpu_busy = ~gpu_req_if.ready/* && (| gpu_req_if.valid)*/;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag;
|
||||
|
||||
wire gpr_busy = ~gpr_read_if.ready;
|
||||
wire alu_busy = ~alu_req_if.ready;
|
||||
wire lsu_busy = ~lsu_req_if.ready;
|
||||
wire csr_busy = ~csr_req_if.ready;
|
||||
wire mul_busy = ~mul_req_if.ready;
|
||||
wire fpu_busy = ~mul_req_if.ready;
|
||||
wire gpu_busy = ~gpu_req_if.ready;
|
||||
|
||||
VX_scheduler #(
|
||||
.CORE_ID(CORE_ID)
|
||||
@@ -30,124 +41,75 @@ module VX_issue #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.writeback_if (writeback_if),
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
.gpr_busy (gpr_busy),
|
||||
.alu_busy (alu_busy),
|
||||
.lsu_busy (lsu_busy),
|
||||
.csr_busy (csr_busy),
|
||||
.mul_busy (mul_busy),
|
||||
.gpu_busy (gpu_busy),
|
||||
.schedule_delay (schedule_delay),
|
||||
`UNUSED_PIN (is_empty)
|
||||
.fpu_busy (fpu_busy),
|
||||
.gpu_busy (gpu_busy),
|
||||
.issue_tag (issue_tag)
|
||||
);
|
||||
|
||||
VX_gpr_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpr_stage (
|
||||
.clk (clk),
|
||||
.decode_if (decode_if),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_data_if (gpr_data_if)
|
||||
.gpr_read_if (gpr_read_if)
|
||||
);
|
||||
|
||||
VX_alu_req_if alu_req_tmp_if();
|
||||
VX_lsu_req_if lsu_req_tmp_if();
|
||||
VX_csr_req_if csr_req_tmp_if();
|
||||
VX_mul_req_if mul_req_tmp_if();
|
||||
VX_gpu_req_if gpu_req_tmp_if();
|
||||
VX_decode_if decode_tmp_if();
|
||||
VX_gpr_read_if gpr_data_tmp_if();
|
||||
|
||||
VX_issue_mux issue_mux (
|
||||
.decode_if (decode_if),
|
||||
.gpr_data_if (gpr_data_if),
|
||||
.alu_req_if (alu_req_tmp_if),
|
||||
.lsu_req_if (lsu_req_tmp_if),
|
||||
.csr_req_if (csr_req_tmp_if),
|
||||
.mul_req_if (mul_req_tmp_if),
|
||||
.gpu_req_if (gpu_req_tmp_if)
|
||||
);
|
||||
|
||||
wire stall_alu = ~alu_req_if.ready || schedule_delay;
|
||||
wire stall_lsu = ~lsu_req_if.ready || schedule_delay;
|
||||
wire stall_csr = ~csr_req_if.ready || schedule_delay;
|
||||
wire stall_mul = ~mul_req_if.ready || schedule_delay;
|
||||
wire stall_gpu = ~gpu_req_if.ready || schedule_delay;
|
||||
|
||||
wire flush_alu = alu_req_if.ready && schedule_delay;
|
||||
wire flush_lsu = lsu_req_if.ready && schedule_delay;
|
||||
wire flush_csr = csr_req_if.ready && schedule_delay;
|
||||
wire flush_mul = mul_req_if.ready && schedule_delay;
|
||||
wire flush_gpu = gpu_req_if.ready && schedule_delay;
|
||||
wire stall = ~alu_req_if.ready || ~decode_if.ready;
|
||||
wire flush = alu_req_if.ready && ~decode_if.ready;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) alu_reg (
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) issue_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_alu),
|
||||
.flush (flush_alu),
|
||||
.in ({alu_req_tmp_if.valid, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.wb, alu_req_tmp_if.rd, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC}),
|
||||
.out ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.alu_op, alu_req_if.wb, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC})
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, gpr_read_if.rs1_data, gpr_read_if.rs2_data, gpr_read_if.rs3_data}),
|
||||
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_data_tmp_if.rs1_data, gpr_data_tmp_if.rs2_data, gpr_data_tmp_if.rs3_data})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_lsu),
|
||||
.flush (flush_lsu),
|
||||
.in ({lsu_req_tmp_if.valid, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.byteen, lsu_req_tmp_if.wb, lsu_req_tmp_if.rd, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset, lsu_req_tmp_if.store_data}),
|
||||
.out ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.wb, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data})
|
||||
VX_issue_demux issue_demux (
|
||||
.decode_if (decode_tmp_if),
|
||||
.gpr_read_if (gpr_data_tmp_if),
|
||||
.issue_tag (issue_tmp_tag),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + `WB_BITS + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_csr),
|
||||
.flush (flush_csr),
|
||||
.in ({csr_req_tmp_if.valid, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.wb, csr_req_tmp_if.rd, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask, csr_req_tmp_if.is_io}),
|
||||
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.wb, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_mul),
|
||||
.flush (flush_mul),
|
||||
.in ({mul_req_tmp_if.valid, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data}),
|
||||
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.wb, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_gpu),
|
||||
.flush (flush_gpu),
|
||||
.in ({gpu_req_tmp_if.valid, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data, gpu_req_tmp_if.next_PC}),
|
||||
.out ({gpu_req_if.valid, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.gpu_op, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.next_PC})
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| alu_req_tmp_if.valid) && ~stall_alu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.wb, alu_req_tmp_if.rd, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC);
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
||||
end
|
||||
if ((| mul_req_tmp_if.valid) && ~stall_mul) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data);
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
end
|
||||
if ((| lsu_req_tmp_if.valid) && ~stall_lsu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, rw=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset);
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
end
|
||||
if ((| csr_req_tmp_if.valid) && ~stall_csr) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, op=%0d, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.wb, csr_req_tmp_if.rd, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask);
|
||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
end
|
||||
if ((| gpu_req_tmp_if.valid) && ~stall_gpu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, op=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data);
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%d, rd=%0d, frm=%0h, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, fpu_req_if.frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
end
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
84
hw/rtl/VX_issue_demux.v
Normal file
84
hw/rtl/VX_issue_demux.v
Normal file
@@ -0,0 +1,84 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_demux (
|
||||
// inputs
|
||||
VX_decode_if decode_if,
|
||||
VX_gpr_read_if gpr_read_if,
|
||||
input wire [`ISTAG_BITS-1:0] issue_tag,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
// ALU unit
|
||||
assign alu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_ALU);
|
||||
assign alu_req_if.thread_mask = decode_if.thread_mask;
|
||||
assign alu_req_if.issue_tag = issue_tag;
|
||||
assign alu_req_if.warp_num = decode_if.warp_num;
|
||||
assign alu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign alu_req_if.alu_op = `ALU_OP(decode_if.ex_op);
|
||||
assign alu_req_if.rs1_data = decode_if.rs1_is_PC ? {`NUM_THREADS{decode_if.curr_PC}} : gpr_read_if.rs1_data;
|
||||
assign alu_req_if.rs2_data = decode_if.rs2_is_imm ? {`NUM_THREADS{decode_if.imm}} : gpr_read_if.rs2_data;
|
||||
assign alu_req_if.offset = decode_if.imm;
|
||||
assign alu_req_if.next_PC = decode_if.next_PC;
|
||||
|
||||
// LSU unit
|
||||
assign lsu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_LSU);
|
||||
assign lsu_req_if.thread_mask = decode_if.thread_mask;
|
||||
assign lsu_req_if.issue_tag = issue_tag;
|
||||
assign lsu_req_if.warp_num = decode_if.warp_num;
|
||||
assign lsu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign lsu_req_if.base_addr = gpr_read_if.rs1_data;
|
||||
assign lsu_req_if.store_data = gpr_read_if.rs2_data;
|
||||
assign lsu_req_if.offset = decode_if.imm;
|
||||
assign lsu_req_if.rw = `LSU_RW(decode_if.ex_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(decode_if.ex_op);
|
||||
assign lsu_req_if.rd = decode_if.rd;
|
||||
assign lsu_req_if.wb = decode_if.wb;
|
||||
|
||||
// CSR unit
|
||||
assign csr_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_CSR);
|
||||
assign csr_req_if.issue_tag = issue_tag;
|
||||
assign csr_req_if.warp_num = decode_if.warp_num;
|
||||
assign csr_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign csr_req_if.csr_op = `CSR_OP(decode_if.ex_op);
|
||||
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_BITS-1:0];
|
||||
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_read_if.rs1_data[0];
|
||||
assign csr_req_if.is_io = 1'b0;
|
||||
|
||||
// MUL unit
|
||||
`ifdef EXT_M_ENABLE
|
||||
assign mul_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_MUL);
|
||||
assign mul_req_if.issue_tag = issue_tag;
|
||||
assign mul_req_if.mul_op = `MUL_OP(decode_if.ex_op);
|
||||
assign mul_req_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign mul_req_if.rs2_data = gpr_read_if.rs2_data;
|
||||
`endif
|
||||
|
||||
// FPU unit
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_FPU);
|
||||
assign fpu_req_if.issue_tag = issue_tag;
|
||||
assign fpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign fpu_req_if.fpu_op = `FPU_OP(decode_if.ex_op);
|
||||
assign fpu_req_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign fpu_req_if.rs2_data = gpr_read_if.rs2_data;
|
||||
assign fpu_req_if.rs3_data = gpr_read_if.rs3_data;
|
||||
assign fpu_req_if.frm = decode_if.frm;
|
||||
`endif
|
||||
|
||||
// GPU unit
|
||||
assign gpu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_GPU);
|
||||
assign gpu_req_if.thread_mask = decode_if.thread_mask;
|
||||
assign gpu_req_if.issue_tag = issue_tag;
|
||||
assign gpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign gpu_req_if.gpu_op = `GPU_OP(decode_if.ex_op);
|
||||
assign gpu_req_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign gpu_req_if.rs2_data = gpr_read_if.rs2_data[0];
|
||||
assign gpu_req_if.next_PC = decode_if.next_PC;
|
||||
|
||||
endmodule
|
||||
@@ -1,76 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_mux (
|
||||
// inputs
|
||||
VX_decode_if decode_if,
|
||||
VX_gpr_data_if gpr_data_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] is_alu = {`NUM_THREADS{decode_if.ex_type == `EX_ALU}};
|
||||
wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{decode_if.ex_type == `EX_LSU}};
|
||||
wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{decode_if.ex_type == `EX_CSR}};
|
||||
wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{decode_if.ex_type == `EX_MUL}};
|
||||
wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{decode_if.ex_type == `EX_GPU}};
|
||||
|
||||
// ALU unit
|
||||
assign alu_req_if.valid = decode_if.valid & is_alu;
|
||||
assign alu_req_if.warp_num = decode_if.warp_num;
|
||||
assign alu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign alu_req_if.alu_op = `ALU_OP(decode_if.instr_op);
|
||||
assign alu_req_if.rd = decode_if.rd;
|
||||
assign alu_req_if.wb = decode_if.wb;
|
||||
assign alu_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign alu_req_if.rs2_data = gpr_data_if.rs2_data;
|
||||
assign alu_req_if.offset = decode_if.imm;
|
||||
assign alu_req_if.next_PC = decode_if.next_PC;
|
||||
|
||||
// LSU unit
|
||||
assign lsu_req_if.valid = decode_if.valid & is_lsu;
|
||||
assign lsu_req_if.warp_num = decode_if.warp_num;
|
||||
assign lsu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign lsu_req_if.base_addr = gpr_data_if.rs1_data;
|
||||
assign lsu_req_if.store_data = gpr_data_if.rs2_data;
|
||||
assign lsu_req_if.offset = decode_if.imm;
|
||||
assign lsu_req_if.rw = `LSU_RW(decode_if.instr_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(decode_if.instr_op);
|
||||
assign lsu_req_if.rd = decode_if.rd;
|
||||
assign lsu_req_if.wb = decode_if.wb;
|
||||
|
||||
// CSR unit
|
||||
assign csr_req_if.valid = decode_if.valid & is_csr;
|
||||
assign csr_req_if.warp_num = decode_if.warp_num;
|
||||
assign csr_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign csr_req_if.csr_op = `CSR_OP(decode_if.instr_op);
|
||||
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_SIZE-1:0];
|
||||
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_data_if.rs1_data[0];
|
||||
assign csr_req_if.rd = decode_if.rd;
|
||||
assign csr_req_if.wb = decode_if.wb;
|
||||
assign csr_req_if.is_io = 1'b0;
|
||||
|
||||
// MUL unit
|
||||
assign mul_req_if.valid = decode_if.valid & is_mul;
|
||||
assign mul_req_if.warp_num = decode_if.warp_num;
|
||||
assign mul_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign mul_req_if.mul_op = `MUL_OP(decode_if.instr_op);
|
||||
assign mul_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign mul_req_if.rs2_data = gpr_data_if.rs2_data;
|
||||
assign mul_req_if.rd = decode_if.rd;
|
||||
assign mul_req_if.wb = decode_if.wb;
|
||||
|
||||
// GPU unit
|
||||
assign gpu_req_if.valid = decode_if.valid & is_gpu;
|
||||
assign gpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign gpu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign gpu_req_if.gpu_op = `GPU_OP(decode_if.instr_op);
|
||||
assign gpu_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign gpu_req_if.rs2_data = gpr_data_if.rs2_data[0];
|
||||
assign gpu_req_if.next_PC = decode_if.next_PC;
|
||||
|
||||
endmodule
|
||||
@@ -9,26 +9,28 @@ module VX_lsu_unit #(
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_cache_core_req_if dcache_req_if,
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
VX_cache_core_req_if dcache_req_if,
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
|
||||
// inputs
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_commit_if lsu_commit_if
|
||||
VX_exu_to_cmt_if lsu_commit_if
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0] use_valid;
|
||||
wire use_valid;
|
||||
wire [`NUM_THREADS-1:0] use_thread_mask;
|
||||
wire use_req_rw;
|
||||
wire [`NUM_THREADS-1:0][29:0] use_req_addr;
|
||||
wire [`NUM_THREADS-1:0][1:0] use_req_offset;
|
||||
wire [`NUM_THREADS-1:0][3:0] use_req_byteen;
|
||||
wire [`NUM_THREADS-1:0][31:0] use_req_data;
|
||||
wire [`BYTEEN_BITS-1:0] mem_byteen;
|
||||
wire [1:0] use_req_sext;
|
||||
wire [`NR_BITS-1:0] use_rd;
|
||||
wire [`NW_BITS-1:0] use_warp_num;
|
||||
wire [`WB_BITS-1:0] use_wb;
|
||||
wire [`ISTAG_BITS-1:0] use_issue_tag;
|
||||
wire use_wb;
|
||||
wire [31:0] use_pc;
|
||||
|
||||
genvar i;
|
||||
@@ -38,20 +40,29 @@ module VX_lsu_unit #(
|
||||
assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
|
||||
end
|
||||
|
||||
reg [3:0] wmask;
|
||||
reg [1:0] mem_req_sext;
|
||||
always @(*) begin
|
||||
case (lsu_req_if.byteen)
|
||||
0: wmask = 4'b0001;
|
||||
1: wmask = 4'b0011;
|
||||
default: wmask = 4'b1111;
|
||||
`BYTEEN_SB: mem_req_sext = 2'h1;
|
||||
`BYTEEN_SH: mem_req_sext = 2'h2;
|
||||
default: mem_req_sext = 2'h0;
|
||||
endcase
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0][29:0] mem_req_addr;
|
||||
wire [`NUM_THREADS-1:0][1:0] mem_req_offset;
|
||||
wire [`NUM_THREADS-1:0][3:0] mem_req_byteen;
|
||||
wire [`NUM_THREADS-1:0][3:0] mem_req_byteen;
|
||||
wire [`NUM_THREADS-1:0][31:0] mem_req_data;
|
||||
|
||||
reg [3:0] wmask;
|
||||
always @(*) begin
|
||||
case (`BYTEEN_TYPE(lsu_req_if.byteen))
|
||||
0: wmask = 4'b0001;
|
||||
1: wmask = 4'b0011;
|
||||
default: wmask = 4'b1111;
|
||||
endcase
|
||||
end
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign mem_req_addr[i] = full_address[i][31:2];
|
||||
assign mem_req_offset[i] = full_address[i][1:0];
|
||||
@@ -59,128 +70,132 @@ module VX_lsu_unit #(
|
||||
assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0};
|
||||
end
|
||||
|
||||
// Can accept new request
|
||||
wire stall = ~dcache_req_if.ready || mrq_full;
|
||||
assign lsu_req_if.ready = ~stall;
|
||||
wire stall_in = ~dcache_req_if.ready;
|
||||
|
||||
// Can accept new request?
|
||||
assign lsu_req_if.ready = ~stall_in;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [`NUM_THREADS-1:0][31:0] use_address;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + `NW_BITS + `WB_BITS + 32)
|
||||
) mem_req_reg (
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + `ISTAG_BITS + (`NUM_THREADS * 32) + 2 + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + 1 + 32)
|
||||
) lsu_req_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.stall (stall_in),
|
||||
.flush (0),
|
||||
.in ({lsu_req_if.valid, full_address, lsu_req_if.byteen, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
||||
.out ({use_valid , use_address, mem_byteen , use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd , use_warp_num , use_wb , use_pc})
|
||||
.in ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.thread_mask, lsu_req_if.issue_tag, full_address, mem_req_sext, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
||||
.out ({use_valid, use_warp_num, use_thread_mask, use_issue_tag, use_address, use_req_sext, use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd, use_wb, use_pc})
|
||||
);
|
||||
|
||||
reg [`NUM_THREADS-1:0] mem_rsp_mask[`DCREQ_SIZE-1:0];
|
||||
reg [`NUM_THREADS-1:0] mem_rsp_mask_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NUM_THREADS-1:0][1:0] mem_rsp_offset_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [1:0] mem_rsp_sext_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NUM_THREADS-1:0][31:0] mem_rsp_data_all_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NW_BITS-1:0] mem_rsp_warp_num_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [31:0] mem_rsp_curr_PC_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NR_BITS-1:0] mem_rsp_rd_buf [`ISSUEQ_SIZE-1:0];
|
||||
|
||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr;
|
||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
|
||||
wire [`BYTEEN_BITS-1:0] core_rsp_mem_read;
|
||||
wire mrq_full;
|
||||
reg [`NUM_THREADS-1:0][31:0] mem_rsp_data_curr;
|
||||
|
||||
wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready
|
||||
&& (0 == use_req_rw); // only push read requests
|
||||
wire [`ISTAG_BITS-1:0] rsp_issue_tag = dcache_rsp_if.tag[0][`ISTAG_BITS-1:0];
|
||||
|
||||
wire mrq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||
|
||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_read_addr = dcache_rsp_if.tag[0][`LOG2UP(`DCREQ_SIZE)-1:0];
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask = mem_rsp_mask_buf [rsp_issue_tag];
|
||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset = mem_rsp_offset_buf [rsp_issue_tag];
|
||||
wire [1:0] mem_rsp_sext = mem_rsp_sext_buf [rsp_issue_tag];
|
||||
wire [`NUM_THREADS-1:0][31:0] mem_rsp_data_all = mem_rsp_data_all_buf [rsp_issue_tag];
|
||||
wire [`NW_BITS-1:0] mem_rsp_warp_num = mem_rsp_warp_num_buf [rsp_issue_tag];
|
||||
wire [31:0] mem_rsp_curr_PC = mem_rsp_curr_PC_buf [rsp_issue_tag];
|
||||
wire [`NR_BITS-1:0] mem_rsp_rd = mem_rsp_rd_buf [rsp_issue_tag];
|
||||
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask_upd = mem_rsp_mask[mrq_read_addr] & ~dcache_rsp_if.valid;
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask & ~dcache_rsp_if.valid;
|
||||
|
||||
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd);
|
||||
|
||||
VX_index_queue #(
|
||||
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + `WB_BITS + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
|
||||
.SIZE (`DCREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({mrq_write_addr, use_pc, use_wb, use_req_offset, mem_byteen, use_rd, use_warp_num}),
|
||||
.write_addr (mrq_write_addr),
|
||||
.push (mrq_push),
|
||||
.full (mrq_full),
|
||||
.pop (mrq_pop),
|
||||
.read_addr (mrq_read_addr),
|
||||
.read_data ({dbg_mrq_write_addr, lsu_commit_if.curr_PC, lsu_commit_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_commit_if.rd, lsu_commit_if.warp_num}),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
wire dcache_req_fire = (| dcache_req_if.valid) && dcache_req_if.ready;
|
||||
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mrq_push) begin
|
||||
mem_rsp_mask[mrq_write_addr] <= use_valid;
|
||||
if (dcache_req_fire && (0 == use_req_rw)) begin
|
||||
mem_rsp_mask_buf [use_issue_tag] <= use_thread_mask;
|
||||
mem_rsp_offset_buf [use_issue_tag] <= use_req_offset;
|
||||
mem_rsp_sext_buf [use_issue_tag] <= use_req_sext;
|
||||
mem_rsp_data_all_buf [use_issue_tag] <= 0;
|
||||
mem_rsp_warp_num_buf [use_issue_tag] <= use_warp_num;
|
||||
mem_rsp_curr_PC_buf [use_issue_tag] <= use_pc;
|
||||
mem_rsp_rd_buf [use_issue_tag] <= use_rd;
|
||||
end
|
||||
if (mrq_pop_part) begin
|
||||
mem_rsp_mask[mrq_read_addr] <= mem_rsp_mask_upd;
|
||||
assert(($time < 2) || mrq_read_addr == dbg_mrq_write_addr);
|
||||
if (dcache_rsp_fire) begin
|
||||
mem_rsp_mask_buf [rsp_issue_tag] <= mem_rsp_mask_n;
|
||||
mem_rsp_data_all_buf [rsp_issue_tag] <= mem_rsp_data_all | mem_rsp_data_curr;
|
||||
end
|
||||
end
|
||||
|
||||
// Core Request
|
||||
assign dcache_req_if.valid = use_valid & {`NUM_THREADS{~mrq_full}};
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{use_valid}} & use_thread_mask;
|
||||
assign dcache_req_if.rw = {`NUM_THREADS{use_req_rw}};
|
||||
assign dcache_req_if.byteen = use_req_byteen;
|
||||
assign dcache_req_if.addr = use_req_addr;
|
||||
assign dcache_req_if.data = use_req_data;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign dcache_req_if.tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr};
|
||||
assign dcache_req_if.tag = {use_pc, use_wb, use_rd, use_warp_num, use_issue_tag};
|
||||
`else
|
||||
assign dcache_req_if.tag = mrq_write_addr;
|
||||
assign dcache_req_if.tag = use_issue_tag;
|
||||
`endif
|
||||
|
||||
// Core Response
|
||||
reg [`NUM_THREADS-1:0][31:0] core_rsp_data;
|
||||
|
||||
// Core Response
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [15:0] rsp_data_shifted = 16'(dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0});
|
||||
wire [31:0] rsp_data_shifted = dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0};
|
||||
always @(*) begin
|
||||
case (core_rsp_mem_read)
|
||||
`BYTEEN_SB: core_rsp_data[i] = {{24{rsp_data_shifted[7]}}, rsp_data_shifted[7:0]};
|
||||
`BYTEEN_UB: core_rsp_data[i] = 32'(rsp_data_shifted[7:0]);
|
||||
`BYTEEN_SH: core_rsp_data[i] = {{16{rsp_data_shifted[15]}}, rsp_data_shifted[15:0]};
|
||||
`BYTEEN_UH: core_rsp_data[i] = 32'(rsp_data_shifted[15:0]);
|
||||
default: core_rsp_data[i] = dcache_rsp_if.data[i];
|
||||
case (mem_rsp_sext)
|
||||
1: mem_rsp_data_curr[i] = {{24{rsp_data_shifted[7]}}, rsp_data_shifted[7:0]};
|
||||
2: mem_rsp_data_curr[i] = {{16{rsp_data_shifted[15]}}, rsp_data_shifted[15:0]};
|
||||
default: mem_rsp_data_curr[i] = rsp_data_shifted;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign lsu_commit_if.valid = dcache_rsp_if.valid;
|
||||
assign lsu_commit_if.data = core_rsp_data;
|
||||
wire is_store_rsp = dcache_req_fire && use_req_rw;
|
||||
wire is_load_rsp = (| dcache_rsp_if.valid) && (0 == mem_rsp_mask_n);
|
||||
|
||||
// Can accept new cache response
|
||||
assign dcache_rsp_if.ready = lsu_commit_if.ready;
|
||||
assign lsu_commit_if.valid = is_load_rsp || is_store_rsp;
|
||||
assign lsu_commit_if.issue_tag = is_store_rsp ? use_issue_tag : rsp_issue_tag;
|
||||
assign lsu_commit_if.data = mem_rsp_data_curr | mem_rsp_data_all;
|
||||
|
||||
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_curr_PC, use_pc);
|
||||
// Can accept new cache response?
|
||||
assign dcache_rsp_if.ready = lsu_commit_if.ready && ~is_store_rsp; // STORE has priority
|
||||
|
||||
// scope registration
|
||||
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_addr, use_address);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_rw, core_req_rw);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_rw, dcache_req_if.rw );
|
||||
`SCOPE_ASSIGN(scope_dcache_req_byteen,dcache_req_if.byteen);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_if.tag);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.ready);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.ready);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_curr_PC, use_pc);
|
||||
|
||||
`SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
||||
`SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_if.tag);
|
||||
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
||||
|
||||
`UNUSED_VAR (mem_rsp_warp_num)
|
||||
`UNUSED_VAR (mem_rsp_curr_PC)
|
||||
`UNUSED_VAR (mem_rsp_rd)
|
||||
`UNUSED_VAR (use_wb)
|
||||
|
||||
`ifdef DBG_PRINT_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
|
||||
$display("%t: D$%0d req: valid=%b, warp=%0d, PC=%0h, addr=%0h, tag=%0h, rw=%0b, rd=%0d, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, use_valid, use_warp_num, use_pc, use_address, mrq_write_addr, use_req_rw, use_rd, use_req_byteen, use_req_data);
|
||||
$display("%t: D$%0d req: warp=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, rd=%0d, rw=%0b, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, use_warp_num, use_pc, dcache_req_if.valid, use_address, dcache_req_if.tag, use_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data);
|
||||
end
|
||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
||||
$display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
||||
$time, CORE_ID, lsu_commit_if.valid, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, mrq_read_addr, lsu_commit_if.rd, lsu_commit_if.data);
|
||||
$time, CORE_ID, dcache_rsp_if.valid, mem_rsp_warp_num, mem_rsp_curr_PC, dcache_rsp_if.tag, mem_rsp_rd, dcache_rsp_if.data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -245,8 +245,8 @@ module VX_mem_unit # (
|
||||
.SNOOP_FORWARDING (0),
|
||||
.DRAM_ENABLE (1),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
|
||||
) icache (
|
||||
`SCOPE_SIGNALS_CACHE_UNBIND
|
||||
|
||||
@@ -7,42 +7,70 @@ module VX_mul_unit #(
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_mul_req_if alu_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_commit_if mul_commit_if
|
||||
VX_exu_to_cmt_if alu_commit_if
|
||||
);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][63:0] mul_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] div_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] rem_result;
|
||||
|
||||
wire [`MUL_BITS-1:0] alu_op = alu_req_if.mul_op;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
|
||||
wire [`MUL_BITS-1:0] alu_op = mul_req_if.mul_op;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result, div_result;
|
||||
|
||||
genvar i;
|
||||
wire stall_mul, stall_div;
|
||||
|
||||
wire is_mul_op = (alu_op == `MUL_MUL);
|
||||
wire is_div_op = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
|
||||
|
||||
reg [`NUM_THREADS-1:0] is_div_op_in;
|
||||
wire [`NUM_THREADS-1:0] is_div_op_out;
|
||||
wire is_mul_op_out;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]};
|
||||
|
||||
wire [32:0] div_in1 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] div_in2 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in2[i][31], alu_in2[i]};
|
||||
|
||||
VX_mult #(
|
||||
reg [32:0] div_in1, div_in2;
|
||||
|
||||
// handle divide by zero
|
||||
always @(*) begin
|
||||
is_div_op_in[i] = is_div_op;
|
||||
div_in1 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in1[i][31], alu_in1[i]};
|
||||
div_in2 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in2[i][31], alu_in2[i]};
|
||||
|
||||
if (0 == alu_in2[i]) begin
|
||||
if (is_div_op) begin
|
||||
div_in1 = {1'b0, 32'hFFFFFFFF}; // quotient = (0xFFFFFFFF / 1)
|
||||
div_in2 = 1;
|
||||
end else begin
|
||||
is_div_op_in[i] = 1; // remainder = (in1 / 1)
|
||||
div_in2 = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [63:0] mul_result_tmp;
|
||||
wire [31:0] div_result_tmp;
|
||||
wire [31:0] rem_result_tmp;
|
||||
|
||||
VX_multiplier #(
|
||||
.WIDTHA(33),
|
||||
.WIDTHB(33),
|
||||
.WIDTHP(64),
|
||||
.SIGNED(1),
|
||||
.PIPELINE(`MUL_LATENCY)
|
||||
.PIPELINE(`LATENCY_IMUL)
|
||||
) multiplier (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.clk_en(~stall_mul),
|
||||
.dataa(mul_in1),
|
||||
.datab(mul_in2),
|
||||
.result(mul_result[i])
|
||||
.result(mul_result_tmp)
|
||||
);
|
||||
|
||||
VX_divide #(
|
||||
@@ -52,74 +80,59 @@ module VX_mul_unit #(
|
||||
.WIDTHR(32),
|
||||
.NSIGNED(1),
|
||||
.DSIGNED(1),
|
||||
.PIPELINE(`DIV_LATENCY)
|
||||
) sdiv (
|
||||
.PIPELINE(`LATENCY_IDIV)
|
||||
) divide (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.clk_en(~stall_div),
|
||||
.numer(div_in1),
|
||||
.denom(div_in2),
|
||||
.quotient(div_result[i]),
|
||||
.remainder(rem_result[i])
|
||||
.quotient(div_result_tmp),
|
||||
.remainder(rem_result_tmp)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`MUL_MUL: alu_result[i] = mul_result[i][31:0];
|
||||
`MUL_MULH,
|
||||
`MUL_MULHSU,
|
||||
`MUL_MULHU: alu_result[i] = mul_result[i][63:32];
|
||||
`MUL_DIV,
|
||||
`MUL_DIVU: alu_result[i] = (alu_in2[i] == 0) ? 32'hffffffff : div_result[i];
|
||||
`MUL_REM,
|
||||
`MUL_REMU: alu_result[i] = (alu_in2[i] == 0) ? alu_in1[i] : rem_result[i];
|
||||
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC, FENCE
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
reg result_avail;
|
||||
reg [4:0] pending_ctr;
|
||||
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `DIV_LATENCY : `MUL_LATENCY;
|
||||
assign mul_result[i] = is_mul_op_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
|
||||
assign div_result[i] = is_div_op_out[i] ? div_result_tmp : rem_result_tmp;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
result_avail <= 0;
|
||||
pending_ctr <= 0;
|
||||
end else begin
|
||||
if (result_avail && !stall) begin
|
||||
result_avail <= 0;
|
||||
pending_ctr <= 0;
|
||||
end
|
||||
if ((| mul_req_if.valid) && (pending_ctr == 0)) begin
|
||||
pending_ctr <= instr_delay - 1;
|
||||
if (instr_delay == 1)
|
||||
result_avail <= 1;
|
||||
end else if (pending_ctr != 0) begin
|
||||
pending_ctr <= pending_ctr - 1;
|
||||
if (pending_ctr == 1)
|
||||
result_avail <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
wire mul_valid_out;
|
||||
wire div_valid_out;
|
||||
|
||||
wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
|
||||
wire [`ISTAG_BITS-1:0] mul_issue_tag;
|
||||
wire [`ISTAG_BITS-1:0] div_issue_tag;
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(1 + `ISTAG_BITS + 1),
|
||||
.DEPTH(`LATENCY_IMUL)
|
||||
) mul_delay (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall_mul),
|
||||
.in({alu_req_if.valid && ~`IS_DIV_OP(alu_op), alu_req_if.issue_tag, is_mul_op}),
|
||||
.out({mul_valid_out, mul_issue_tag, is_mul_op_out})
|
||||
);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(1 + `ISTAG_BITS + `NUM_THREADS),
|
||||
.DEPTH(`LATENCY_IDIV)
|
||||
) div_delay (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall_div),
|
||||
.in({alu_req_if.valid && `IS_DIV_OP(alu_op), alu_req_if.issue_tag, is_div_op_in}),
|
||||
.out({div_valid_out, div_issue_tag, is_div_op_out})
|
||||
);
|
||||
|
||||
wire stall_out = (~alu_commit_if.ready && alu_commit_if.valid);
|
||||
assign stall_mul = stall_out;
|
||||
assign stall_div = stall_out
|
||||
|| (mul_valid_out && div_valid_out); // arbitration prioritizes MUL
|
||||
|
||||
// can accept new request?
|
||||
assign alu_req_if.ready = ~(stall_mul || stall_div);
|
||||
|
||||
assign alu_commit_if.valid = mul_valid_out || div_valid_out;
|
||||
assign alu_commit_if.issue_tag = mul_valid_out ? mul_issue_tag : div_issue_tag;
|
||||
assign alu_commit_if.data = mul_valid_out ? mul_result : div_result;
|
||||
|
||||
wire stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|
||||
|| pipeline_stall;
|
||||
|
||||
wire flush = mul_commit_if.ready && pipeline_stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb, alu_result}),
|
||||
.out ({mul_commit_if.valid, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
||||
);
|
||||
|
||||
assign mul_req_if.ready = ~stall;
|
||||
|
||||
endmodule
|
||||
@@ -101,7 +101,7 @@ module VX_pipeline #(
|
||||
assign csr_io_rsp_data = csr_io_rsp_if.data;
|
||||
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
|
||||
|
||||
VX_perf_cntrs_if perf_cntrs_if();
|
||||
VX_cmt_to_csr_if cmt_to_csr_if();
|
||||
VX_decode_if decode_if();
|
||||
VX_branch_ctl_if branch_ctl_if();
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
@@ -110,15 +110,18 @@ module VX_pipeline #(
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_mul_req_if mul_req_if();
|
||||
VX_fpu_req_if fpu_req_if();
|
||||
VX_gpu_req_if gpu_req_if();
|
||||
VX_wb_if writeback_if();
|
||||
VX_cmt_to_issue_if cmt_to_issue_if();
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
VX_commit_if alu_commit_if();
|
||||
VX_commit_if lsu_commit_if();
|
||||
VX_commit_if csr_commit_if();
|
||||
VX_commit_if mul_commit_if();
|
||||
VX_commit_if gpu_commit_if();
|
||||
VX_exu_to_cmt_if alu_commit_if();
|
||||
VX_exu_to_cmt_if lsu_commit_if();
|
||||
VX_exu_to_cmt_if csr_commit_if();
|
||||
VX_exu_to_cmt_if mul_commit_if();
|
||||
VX_fpu_to_cmt_if fpu_commit_if();
|
||||
VX_exu_to_cmt_if gpu_commit_if();
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID(CORE_ID)
|
||||
@@ -154,11 +157,13 @@ module VX_pipeline #(
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.cmt_to_issue_if (cmt_to_issue_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
@@ -175,12 +180,13 @@ module VX_pipeline #(
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
|
||||
.perf_cntrs_if (perf_cntrs_if),
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
@@ -189,6 +195,7 @@ module VX_pipeline #(
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.ebreak (ebreak)
|
||||
@@ -204,10 +211,12 @@ module VX_pipeline #(
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
.writeback_if (writeback_if),
|
||||
.perf_cntrs_if (perf_cntrs_if)
|
||||
.cmt_to_csr_if (cmt_to_csr_if)
|
||||
);
|
||||
|
||||
assign dcache_req_valid = core_dcache_req_if.valid;
|
||||
|
||||
66
hw/rtl/VX_platform.vh
Normal file
66
hw/rtl/VX_platform.vh
Normal file
@@ -0,0 +1,66 @@
|
||||
`ifndef VX_PLATFORM
|
||||
`define VX_PLATFORM
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
/* verilator lint_on UNUSED */
|
||||
`else
|
||||
`define DEBUG_BLOCK(x)
|
||||
`endif
|
||||
|
||||
`define DEBUG_BEGIN /* verilator lint_off UNUSED */
|
||||
|
||||
`define DEBUG_END /* verilator lint_on UNUSED */
|
||||
|
||||
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
|
||||
/* verilator lint_off PINCONNECTEMPTY */ \
|
||||
/* verilator lint_off WIDTH */ \
|
||||
/* verilator lint_off UNOPTFLAT */ \
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
/* verilator lint_off DECLFILENAME */
|
||||
|
||||
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
|
||||
/* verilator lint_on PINCONNECTEMPTY */ \
|
||||
/* verilator lint_on WIDTH */ \
|
||||
/* verilator lint_on UNOPTFLAT */ \
|
||||
/* verilator lint_on UNDRIVEN */ \
|
||||
/* verilator lint_on DECLFILENAME */
|
||||
|
||||
`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \
|
||||
wire [$bits(x)-1:0] __``x``__ = x; \
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
|
||||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
`define STRINGIFY(x) `"x`"
|
||||
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
if (!(cond)) $error(msg); \
|
||||
endgenerate
|
||||
|
||||
`define ENABLE_TRACING /* verilator tracing_on */
|
||||
`define DISABLE_TRACING /* verilator tracing_off */
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CLOG2(x) $clog2(x)
|
||||
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
|
||||
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
|
||||
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
|
||||
|
||||
`define MIN(x, y) ((x < y) ? (x) : (y))
|
||||
`define MAX(x, y) ((x > y) ? (x) : (y))
|
||||
|
||||
`define UP(x) (((x) > 0) ? x : 1)
|
||||
|
||||
`endif
|
||||
148
hw/rtl/VX_print_instr.vh
Normal file
148
hw/rtl/VX_print_instr.vh
Normal file
@@ -0,0 +1,148 @@
|
||||
`ifndef VX_PRINT_INSTR
|
||||
`define VX_PRINT_INSTR
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
task print_ex_type;
|
||||
input [`EX_BITS-1:0] ex;
|
||||
begin
|
||||
case (ex)
|
||||
`EX_ALU: $write("ALU");
|
||||
`EX_LSU: $write("LSU");
|
||||
`EX_CSR: $write("CSR");
|
||||
`EX_MUL: $write("MUL");
|
||||
`EX_FPU: $write("FPU");
|
||||
`EX_GPU: $write("GPU");
|
||||
default: $write("NOP");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_ex_op;
|
||||
input [`EX_BITS-1:0] ex;
|
||||
input [`OP_BITS-1:0] op;
|
||||
begin
|
||||
case (ex)
|
||||
`EX_ALU: begin
|
||||
case (`ALU_BITS'(op))
|
||||
`ALU_ADD: $write("ADD");
|
||||
`ALU_SUB: $write("SUB");
|
||||
`ALU_SLL: $write("SLL");
|
||||
`ALU_SRL: $write("SRL");
|
||||
`ALU_SRA: $write("SRA");
|
||||
`ALU_SLT: $write("SLT");
|
||||
`ALU_SLTU: $write("SLTU");
|
||||
`ALU_XOR: $write("XOR");
|
||||
`ALU_OR: $write("OR");
|
||||
`ALU_AND: $write("AND");
|
||||
`ALU_LUI: $write("LUI");
|
||||
`ALU_AUIPC: $write("AUIPC");
|
||||
`ALU_BEQ: $write("BEQ");
|
||||
`ALU_BNE: $write("BNE");
|
||||
`ALU_BLT: $write("BLT");
|
||||
`ALU_BGE: $write("BGE");
|
||||
`ALU_BLTU: $write("BLTU");
|
||||
`ALU_BGEU: $write("BGEU");
|
||||
`ALU_JAL: $write("JAL");
|
||||
`ALU_JALR: $write("JALR");
|
||||
`ALU_ECALL: $write("ECALL");
|
||||
`ALU_EBREAK:$write("EBREAK");
|
||||
`ALU_MRET: $write("MRET");
|
||||
`ALU_SRET: $write("SRET");
|
||||
`ALU_DRET: $write("DRET");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
case (`LSU_BITS'(op))
|
||||
`LSU_LB: $write("LB");
|
||||
`LSU_LH: $write("LH");
|
||||
`LSU_LW: $write("LW");
|
||||
`LSU_LBU: $write("LBU");
|
||||
`LSU_LHU: $write("LHU");
|
||||
`LSU_SB: $write("SB");
|
||||
`LSU_SH: $write("SH");
|
||||
`LSU_SW: $write("SW");
|
||||
`LSU_SBU: $write("SBU");
|
||||
`LSU_SHU: $write("SHU");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_CSR: begin
|
||||
case (`CSR_BITS'(op))
|
||||
`CSR_RW: $write("CSRW");
|
||||
`CSR_RS: $write("CSRS");
|
||||
`CSR_RC: $write("CSRC");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_MUL: begin
|
||||
case (`MUL_BITS'(op))
|
||||
`MUL_MUL: $write("MUL");
|
||||
`MUL_MULH: $write("MULH");
|
||||
`MUL_MULHSU:$write("MULHSU");
|
||||
`MUL_MULHU: $write("MULHU");
|
||||
`MUL_DIV: $write("DIV");
|
||||
`MUL_DIVU: $write("DIVU");
|
||||
`MUL_REM: $write("REM");
|
||||
`MUL_REMU: $write("REMU");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`FPU_BITS'(op))
|
||||
`FPU_ADD: $write("ADD");
|
||||
`FPU_SUB: $write("SUB");
|
||||
`FPU_MUL: $write("MUL");
|
||||
`FPU_DIV: $write("DIV");
|
||||
`FPU_SQRT: $write("SQRT");
|
||||
`FPU_MADD: $write("MADD");
|
||||
`FPU_NMSUB: $write("NMSUB");
|
||||
`FPU_NMADD: $write("NMADD");
|
||||
`FPU_SGNJ: $write("SGNJ");
|
||||
`FPU_SGNJN: $write("SGNJN");
|
||||
`FPU_SGNJX: $write("SGNJX");
|
||||
`FPU_MIN: $write("MIN");
|
||||
`FPU_MAX: $write("MAX");
|
||||
`FPU_CVTWS: $write("CVTWS");
|
||||
`FPU_CVTWUS:$write("CVTWUS");
|
||||
`FPU_CVTSW: $write("CVTSW");
|
||||
`FPU_CVTSWU:$write("CVTSWU");
|
||||
`FPU_MVXW: $write("MVXW");
|
||||
`FPU_MVWX: $write("MVWX");
|
||||
`FPU_CLASS: $write("CLASS");
|
||||
`FPU_CMP: $write("CMP");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_GPU: begin
|
||||
case (`GPU_BITS'(op))
|
||||
`GPU_TMC: $write("TMC");
|
||||
`GPU_WSPAWN:$write("WSPAWN");
|
||||
`GPU_SPLIT: $write("SPLIT");
|
||||
`GPU_JOIN: $write("JOIN");
|
||||
`GPU_BAR: $write("BAR");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_frm;
|
||||
input [`FRM_BITS-1:0] frm;
|
||||
begin
|
||||
case (frm)
|
||||
`FRM_RNE: $write("RNE");
|
||||
`FRM_RTZ: $write("RTZ");
|
||||
`FRM_RDN: $write("RDN");
|
||||
`FRM_RUP: $write("RUP");
|
||||
`FRM_RMM: $write("RMM");
|
||||
`FRM_DYN: $write("DYN");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
`endif
|
||||
@@ -3,84 +3,93 @@
|
||||
module VX_scheduler #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
input wire alu_busy,
|
||||
input wire lsu_busy,
|
||||
input wire csr_busy,
|
||||
input wire mul_busy,
|
||||
input wire gpu_busy,
|
||||
output wire schedule_delay,
|
||||
output wire is_empty
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
input wire gpr_busy,
|
||||
input wire alu_busy,
|
||||
input wire lsu_busy,
|
||||
input wire csr_busy,
|
||||
input wire mul_busy,
|
||||
input wire fpu_busy,
|
||||
input wire gpu_busy,
|
||||
output wire [`ISTAG_BITS-1:0] issue_tag
|
||||
);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
|
||||
|
||||
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
reg [CTVW-1:0] count_valid;
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||
reg [`NUM_THREADS-1:0] inuse_registers [`NUM_WARPS-1:0][`NUM_REGS-1:0];
|
||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||
|
||||
wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0);
|
||||
wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0);
|
||||
wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0);
|
||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.warp_num] & decode_if.reg_use_mask;
|
||||
wire inuse_hazard = (inuse_mask != 0);
|
||||
|
||||
wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1);
|
||||
wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2);
|
||||
wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0);
|
||||
|
||||
wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual);
|
||||
|
||||
wire ex_stalled = (| decode_if.valid)
|
||||
&& ((alu_busy && (decode_if.ex_type == `EX_ALU))
|
||||
wire exu_stalled = (alu_busy && (decode_if.ex_type == `EX_ALU))
|
||||
|| (lsu_busy && (decode_if.ex_type == `EX_LSU))
|
||||
|| (csr_busy && (decode_if.ex_type == `EX_CSR))
|
||||
|| (mul_busy && (decode_if.ex_type == `EX_MUL))
|
||||
|| (gpu_busy && (decode_if.ex_type == `EX_GPU)));
|
||||
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|
||||
|| (gpu_busy && (decode_if.ex_type == `EX_GPU));
|
||||
|
||||
wire stall = ex_stalled || rename_valid;
|
||||
wire issue_buf_full;
|
||||
|
||||
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && ~stall;
|
||||
wire stall = (gpr_busy || exu_stalled || inuse_hazard || issue_buf_full) && decode_if.valid;
|
||||
|
||||
wire acquire_rd = decode_if.valid && (decode_if.wb != 0) && ~stall;
|
||||
|
||||
wire release_rd = (| writeback_if.valid);
|
||||
wire release_rd = writeback_if.valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
|
||||
|
||||
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
|
||||
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
|
||||
count_valid;
|
||||
integer i, w;
|
||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.thread_mask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
integer i, w;
|
||||
for (w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (i = 0; i < 32; i++) begin
|
||||
rename_table[w][i] <= 0;
|
||||
for (i = 0; i < `NUM_REGS; i++) begin
|
||||
inuse_registers[w][i] <= 0;
|
||||
end
|
||||
end
|
||||
count_valid <= 0;
|
||||
inuse_reg_mask[w] <= `NUM_REGS'(0);
|
||||
end
|
||||
end else begin
|
||||
if (acquire_rd) begin
|
||||
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
|
||||
inuse_registers[decode_if.warp_num][decode_if.rd] <= decode_if.thread_mask;
|
||||
inuse_reg_mask[decode_if.warp_num][decode_if.rd] <= 1;
|
||||
end
|
||||
if (release_rd) begin
|
||||
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||
assert(inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
inuse_registers[writeback_if.warp_num][writeback_if.rd] <= inuse_registers_n;
|
||||
inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] <= (| inuse_registers_n);
|
||||
end
|
||||
count_valid <= count_valid_next;
|
||||
end
|
||||
end
|
||||
|
||||
wire issue_fire = decode_if.valid && ~stall;
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW ($bits(is_data_t)),
|
||||
.SIZE (`ISSUEQ_SIZE),
|
||||
.RPORTS (`NUM_EXS)
|
||||
) issue_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.wb}),
|
||||
.write_addr (issue_tag),
|
||||
.acquire_slot (issue_fire),
|
||||
.release_slot ({cmt_to_issue_if.alu_valid, cmt_to_issue_if.lsu_valid, cmt_to_issue_if.csr_valid, cmt_to_issue_if.mul_valid, cmt_to_issue_if.fpu_valid, cmt_to_issue_if.gpu_valid}),
|
||||
.read_addr ({cmt_to_issue_if.alu_tag, cmt_to_issue_if.lsu_tag, cmt_to_issue_if.csr_tag, cmt_to_issue_if.mul_tag, cmt_to_issue_if.fpu_tag, cmt_to_issue_if.gpu_tag}),
|
||||
.read_data ({cmt_to_issue_if.alu_data, cmt_to_issue_if.lsu_data, cmt_to_issue_if.csr_data, cmt_to_issue_if.mul_data, cmt_to_issue_if.fpu_data, cmt_to_issue_if.gpu_data}),
|
||||
.full (issue_buf_full)
|
||||
);
|
||||
|
||||
assign decode_if.ready = ~stall;
|
||||
|
||||
assign schedule_delay = stall;
|
||||
|
||||
assign is_empty = (0 == count_valid);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (stall) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, rename=%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, rd_rename_qual, rs1_rename_qual, rs2_rename_qual, alu_busy, lsu_busy, csr_busy, mul_busy, gpu_busy);
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, gpr=%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b",
|
||||
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full, inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1],
|
||||
inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], gpr_busy, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -145,7 +145,7 @@
|
||||
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
|
||||
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
|
||||
wire [31:0] scope_writeback_curr_PC; \
|
||||
wire [`WB_BITS-1:0] scope_writeback_wb; \
|
||||
wire scope_writeback_wb; \
|
||||
wire [`NR_BITS-1:0] scope_writeback_rd; \
|
||||
wire [63:0] scope_writeback_data; \
|
||||
wire scope_bank_valid_st0; \
|
||||
@@ -224,7 +224,7 @@
|
||||
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
|
||||
output wire [31:0] scope_writeback_curr_PC, \
|
||||
output wire [`WB_BITS-1:0] scope_writeback_wb, \
|
||||
output wire scope_writeback_wb, \
|
||||
output wire [`NR_BITS-1:0] scope_writeback_rd, \
|
||||
output wire [63:0] scope_writeback_data,
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ module VX_warp_sched #(
|
||||
);
|
||||
wire update_use_wspawn;
|
||||
wire update_visible_active;
|
||||
wire scheduled_warp;
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
||||
|
||||
@@ -98,9 +97,9 @@ module VX_warp_sched #(
|
||||
end else begin
|
||||
|
||||
if (warp_ctl_if.wspawn) begin
|
||||
warp_active <= warp_ctl_if.wspawn_new_active;
|
||||
warp_active <= warp_ctl_if.wspawn_wmask;
|
||||
use_wspawn <= warp_ctl_if.wspawn_wmask & (~`NUM_WARPS'(1));
|
||||
use_wspawn_pc <= warp_ctl_if.wspawn_pc;
|
||||
use_wspawn <= warp_ctl_if.wspawn_new_active & (~`NUM_WARPS'(1));
|
||||
end
|
||||
|
||||
if (warp_ctl_if.is_barrier) begin
|
||||
@@ -113,6 +112,10 @@ module VX_warp_sched #(
|
||||
end else if (warp_ctl_if.change_mask) begin
|
||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.thread_mask;
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
if (0 == warp_ctl_if.thread_mask) begin
|
||||
warp_active[warp_ctl_if.warp_num] <= 0;
|
||||
visible_active[warp_ctl_if.warp_num] <= 0;
|
||||
end
|
||||
end else if (join_if.is_join && !didnt_split) begin
|
||||
if (!join_fall) begin
|
||||
warp_pcs[join_if.warp_num] <= join_pc;
|
||||
@@ -127,12 +130,7 @@ module VX_warp_sched #(
|
||||
end else begin
|
||||
didnt_split <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (warp_ctl_if.whalt) begin
|
||||
warp_active[warp_ctl_if.warp_num] <= 0;
|
||||
visible_active[warp_ctl_if.warp_num] <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (update_use_wspawn) begin
|
||||
use_wspawn[warp_to_schedule] <= 0;
|
||||
@@ -168,7 +166,7 @@ module VX_warp_sched #(
|
||||
if (scheduled_warp && !stall) begin
|
||||
warp_lock[warp_num] <= 1;
|
||||
end
|
||||
if ((| ifetch_rsp_if.valid) && ifetch_rsp_if.ready) begin
|
||||
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
||||
warp_lock[ifetch_rsp_if.warp_num] <= 0;
|
||||
end
|
||||
|
||||
@@ -193,7 +191,7 @@ module VX_warp_sched #(
|
||||
|
||||
assign b_mask = barrier_stall_mask[warp_ctl_if.barrier_id][`NUM_WARPS-1:0];
|
||||
|
||||
assign reached_barrier_limit = (b_count == warp_ctl_if.num_warps);
|
||||
assign reached_barrier_limit = (b_count == warp_ctl_if.barrier_num_warps);
|
||||
|
||||
assign wstall_this_cycle = wstall_if.wstall && (wstall_if.warp_num == warp_to_schedule); // Maybe bug
|
||||
|
||||
@@ -207,24 +205,26 @@ module VX_warp_sched #(
|
||||
assign {join_fall, join_pc, join_tm} = ipdom[join_if.warp_num];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin : stacks
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
wire correct_warp_s = (i == warp_ctl_if.warp_num);
|
||||
wire correct_warp_j = (i == join_if.warp_num);
|
||||
|
||||
wire push = (warp_ctl_if.is_split && warp_ctl_if.do_split) && correct_warp_s;
|
||||
wire pop = join_if.is_join && correct_warp_j;
|
||||
|
||||
VX_generic_stack #(
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH(1+32+`NUM_THREADS),
|
||||
.DEPTH($clog2(`NUM_THREADS)+1)
|
||||
) ipdom_stack(
|
||||
.DEPTH(`NT_BITS+1)
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.d (ipdom[i]),
|
||||
.q1 (q1),
|
||||
.q2 (q2)
|
||||
.q2 (q2),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full)
|
||||
);
|
||||
end
|
||||
|
||||
@@ -264,17 +264,17 @@ module VX_warp_sched #(
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign stall = ~ifetch_req_if.ready && (| ifetch_req_if.valid);
|
||||
assign stall = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + 32 + `NW_BITS)
|
||||
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
||||
) fetch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({thread_mask, warp_pc, warp_num}),
|
||||
.out ({ifetch_req_if.valid, ifetch_req_if.curr_PC, ifetch_req_if.warp_num})
|
||||
.in ({(| thread_mask), thread_mask, warp_pc, warp_num}),
|
||||
.out ({ifetch_req_if.valid, ifetch_req_if.thread_mask, ifetch_req_if.curr_PC, ifetch_req_if.warp_num})
|
||||
);
|
||||
|
||||
assign busy = (warp_active != 0);
|
||||
|
||||
@@ -3,73 +3,137 @@
|
||||
module VX_writeback #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if lsu_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_exu_to_cmt_if alu_commit_if,
|
||||
VX_exu_to_cmt_if lsu_commit_if,
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
VX_exu_to_cmt_if mul_commit_if,
|
||||
VX_fpu_to_cmt_if fpu_commit_if,
|
||||
VX_exu_to_cmt_if gpu_commit_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
|
||||
// outputs
|
||||
VX_wb_if writeback_if
|
||||
VX_wb_if writeback_if
|
||||
);
|
||||
|
||||
wire lsu_valid = (| lsu_commit_if.valid) && (lsu_commit_if.wb != `WB_NO);
|
||||
wire mul_valid = (| mul_commit_if.valid) && (mul_commit_if.wb != `WB_NO);
|
||||
wire alu_valid = (| alu_commit_if.valid) && (alu_commit_if.wb != `WB_NO);
|
||||
wire csr_valid = (| csr_commit_if.valid) && (csr_commit_if.wb != `WB_NO);
|
||||
reg [`NUM_THREADS-1:0][31:0] wb_data [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NW_BITS-1:0] wb_warp_num [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NUM_THREADS-1:0] wb_thread_mask [`ISSUEQ_SIZE-1:0];
|
||||
reg [31:0] wb_curr_PC [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NR_BITS-1:0] wb_rd [`ISSUEQ_SIZE-1:0];
|
||||
|
||||
VX_wb_if writeback_tmp_if();
|
||||
reg [`ISSUEQ_SIZE-1:0] wb_pending;
|
||||
reg [`ISSUEQ_SIZE-1:0] wb_pending_n;
|
||||
reg [`ISTAG_BITS-1:0] wb_index;
|
||||
wire [`ISTAG_BITS-1:0] wb_index_n;
|
||||
|
||||
reg wb_valid;
|
||||
wire wb_valid_n;
|
||||
|
||||
assign writeback_tmp_if.valid = lsu_valid ? lsu_commit_if.valid :
|
||||
mul_valid ? mul_commit_if.valid :
|
||||
alu_valid ? alu_commit_if.valid :
|
||||
csr_valid ? csr_commit_if.valid :
|
||||
0;
|
||||
always @(*) begin
|
||||
wb_pending_n = wb_pending;
|
||||
|
||||
assign writeback_tmp_if.warp_num = lsu_valid ? lsu_commit_if.warp_num :
|
||||
mul_valid ? mul_commit_if.warp_num :
|
||||
alu_valid ? alu_commit_if.warp_num :
|
||||
csr_valid ? csr_commit_if.warp_num :
|
||||
0;
|
||||
if (wb_valid) begin
|
||||
wb_pending_n[wb_index] = 0;
|
||||
end
|
||||
|
||||
assign writeback_tmp_if.data = lsu_valid ? lsu_commit_if.data :
|
||||
mul_valid ? mul_commit_if.data :
|
||||
alu_valid ? alu_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
0;
|
||||
if (alu_commit_if.valid) begin
|
||||
wb_pending_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb;
|
||||
end
|
||||
if (lsu_commit_if.valid) begin
|
||||
wb_pending_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb;
|
||||
end
|
||||
if (csr_commit_if.valid) begin
|
||||
wb_pending_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb;
|
||||
end
|
||||
if (mul_commit_if.valid) begin
|
||||
wb_pending_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb;
|
||||
end
|
||||
if (fpu_commit_if.valid) begin
|
||||
wb_pending_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb;
|
||||
end
|
||||
end
|
||||
|
||||
assign writeback_tmp_if.rd = lsu_valid ? lsu_commit_if.rd :
|
||||
mul_valid ? mul_commit_if.rd :
|
||||
alu_valid ? alu_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
0;
|
||||
|
||||
wire stall = ~writeback_if.ready && (| writeback_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32))
|
||||
) wb_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data}),
|
||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data})
|
||||
VX_priority_encoder #(
|
||||
.N(`ISSUEQ_SIZE)
|
||||
) wb_select (
|
||||
.data_in (wb_pending_n),
|
||||
.data_out (wb_index_n),
|
||||
.valid_out (wb_valid_n)
|
||||
);
|
||||
|
||||
assign lsu_commit_if.ready = !stall;
|
||||
assign mul_commit_if.ready = !stall && !lsu_valid;
|
||||
assign alu_commit_if.ready = !stall && !lsu_valid && !mul_valid;
|
||||
assign csr_commit_if.ready = !stall && !lsu_valid && !mul_valid && !alu_valid;
|
||||
|
||||
// special workaround to control RISC-V benchmarks termination on Verilator
|
||||
reg [31:0] last_data_wb /* verilator public */;
|
||||
always @(posedge clk) begin
|
||||
if ((| writeback_tmp_if.valid) && ~stall && (writeback_tmp_if.rd == 28)) begin
|
||||
last_data_wb <= writeback_tmp_if.data[0];
|
||||
if (reset) begin
|
||||
wb_pending <= 0;
|
||||
wb_index <= 0;
|
||||
wb_valid <= 0;
|
||||
end else begin
|
||||
if (alu_commit_if.valid) begin
|
||||
wb_data [alu_commit_if.issue_tag] <= alu_commit_if.data;
|
||||
wb_warp_num [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.warp_num;
|
||||
wb_thread_mask [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.thread_mask;
|
||||
wb_curr_PC [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.curr_PC;
|
||||
wb_rd [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd;
|
||||
end
|
||||
if (lsu_commit_if.valid) begin
|
||||
wb_data [lsu_commit_if.issue_tag] <= lsu_commit_if.data;
|
||||
wb_warp_num [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.warp_num;
|
||||
wb_thread_mask [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.thread_mask;
|
||||
wb_curr_PC [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.curr_PC;
|
||||
wb_rd [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd;
|
||||
end
|
||||
if (csr_commit_if.valid) begin
|
||||
wb_data [csr_commit_if.issue_tag] <= csr_commit_if.data;
|
||||
wb_warp_num [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.warp_num;
|
||||
wb_thread_mask [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.thread_mask;
|
||||
wb_curr_PC [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.curr_PC;
|
||||
wb_rd [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd;
|
||||
end
|
||||
if (mul_commit_if.valid) begin
|
||||
wb_data [mul_commit_if.issue_tag] <= mul_commit_if.data;
|
||||
wb_warp_num [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.warp_num;
|
||||
wb_thread_mask [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.thread_mask;
|
||||
wb_curr_PC [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.curr_PC;
|
||||
wb_rd [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd;
|
||||
end
|
||||
if (fpu_commit_if.valid) begin
|
||||
wb_data [fpu_commit_if.issue_tag] <= fpu_commit_if.data;
|
||||
wb_warp_num [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.warp_num;
|
||||
wb_thread_mask [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.thread_mask;
|
||||
wb_curr_PC [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.curr_PC;
|
||||
wb_rd [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd;
|
||||
end
|
||||
|
||||
wb_pending <= wb_pending_n;
|
||||
wb_index <= wb_index_n;
|
||||
wb_valid <= wb_valid_n && writeback_if.ready;
|
||||
end
|
||||
end
|
||||
|
||||
// writeback request
|
||||
assign writeback_if.valid = wb_valid;
|
||||
assign writeback_if.warp_num = wb_warp_num [wb_index];
|
||||
assign writeback_if.thread_mask = wb_thread_mask [wb_index];
|
||||
assign writeback_if.curr_PC = wb_curr_PC [wb_index];
|
||||
assign writeback_if.rd = wb_rd [wb_index];
|
||||
assign writeback_if.data = wb_data [wb_index];
|
||||
|
||||
// commit back-pressure
|
||||
assign alu_commit_if.ready = 1'b1;
|
||||
assign lsu_commit_if.ready = 1'b1;
|
||||
assign csr_commit_if.ready = 1'b1;
|
||||
assign mul_commit_if.ready = 1'b1;
|
||||
assign fpu_commit_if.ready = 1'b1;
|
||||
assign gpu_commit_if.ready = 1'b1;
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||
always @(posedge clk) begin
|
||||
if (writeback_if.valid) begin
|
||||
last_wb_value[writeback_if.rd] <= writeback_if.data[0];
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
116
hw/rtl/Vortex.v
116
hw/rtl/Vortex.v
@@ -139,54 +139,54 @@ module Vortex (
|
||||
|
||||
end else begin
|
||||
|
||||
wire per_cluster_dram_req_valid [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_dram_req_rw [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag [`NUM_CLUSTERS-1:0];
|
||||
wire l3_core_req_ready;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
|
||||
wire l3_core_req_ready;
|
||||
|
||||
wire per_cluster_dram_rsp_valid [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_dram_rsp_ready [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
|
||||
|
||||
wire per_cluster_snp_req_valid [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_snp_req_invalidate [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_snp_req_ready [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
|
||||
|
||||
wire per_cluster_snp_rsp_valid [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_snp_rsp_ready [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
|
||||
|
||||
wire per_cluster_io_req_valid [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_io_req_rw [`NUM_CLUSTERS-1:0];
|
||||
wire [3:0] per_cluster_io_req_byteen [`NUM_CLUSTERS-1:0];
|
||||
wire [29:0] per_cluster_io_req_addr [`NUM_CLUSTERS-1:0];
|
||||
wire [31:0] per_cluster_io_req_data [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_io_req_ready [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
|
||||
wire [`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen;
|
||||
wire [`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready;
|
||||
|
||||
wire per_cluster_io_rsp_valid [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag [`NUM_CLUSTERS-1:0];
|
||||
wire [31:0] per_cluster_io_rsp_data [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_io_rsp_ready [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
|
||||
|
||||
wire per_cluster_csr_io_req_valid [`NUM_CLUSTERS-1:0];
|
||||
wire [11:0] per_cluster_csr_io_req_addr [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_csr_io_req_rw [`NUM_CLUSTERS-1:0];
|
||||
wire [31:0] per_cluster_csr_io_req_data [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_csr_io_req_ready [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw;
|
||||
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready;
|
||||
|
||||
wire per_cluster_csr_io_rsp_valid [`NUM_CLUSTERS-1:0];
|
||||
wire [31:0] per_cluster_csr_io_rsp_data [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_csr_io_rsp_ready [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready;
|
||||
|
||||
wire per_cluster_busy [`NUM_CLUSTERS-1:0];
|
||||
wire per_cluster_ebreak [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak;
|
||||
|
||||
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
|
||||
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
|
||||
@@ -336,27 +336,27 @@ module Vortex (
|
||||
|
||||
// L3 Cache ///////////////////////////////////////////////////////////
|
||||
|
||||
wire l3_core_req_valid [`L3NUM_REQUESTS-1:0];
|
||||
wire l3_core_req_rw [`L3NUM_REQUESTS-1:0];
|
||||
wire [`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen [`L3NUM_REQUESTS-1:0];
|
||||
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr [`L3NUM_REQUESTS-1:0];
|
||||
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data [`L3NUM_REQUESTS-1:0];
|
||||
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag [`L3NUM_REQUESTS-1:0];
|
||||
wire [`L3NUM_REQUESTS-1:0] l3_core_req_valid;
|
||||
wire [`L3NUM_REQUESTS-1:0] l3_core_req_rw;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
|
||||
|
||||
wire l3_core_rsp_valid [`L3NUM_REQUESTS-1:0];
|
||||
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data [`L3NUM_REQUESTS-1:0];
|
||||
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag [`L3NUM_REQUESTS-1:0];
|
||||
wire l3_core_rsp_ready;
|
||||
wire [`L3NUM_REQUESTS-1:0] l3_core_rsp_valid;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
|
||||
wire l3_core_rsp_ready;
|
||||
|
||||
wire l3_snp_fwdout_valid [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr [`NUM_CLUSTERS-1:0];
|
||||
wire l3_snp_fwdout_invalidate [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag [`NUM_CLUSTERS-1:0];
|
||||
wire l3_snp_fwdout_ready [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr;
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready;
|
||||
|
||||
wire l3_snp_fwdin_valid [`NUM_CLUSTERS-1:0];
|
||||
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag [`NUM_CLUSTERS-1:0];
|
||||
wire l3_snp_fwdin_ready [`NUM_CLUSTERS-1:0];
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
|
||||
|
||||
for (i = 0; i < `L3NUM_REQUESTS; i++) begin
|
||||
// Core Request
|
||||
|
||||
36
hw/rtl/cache/VX_bank.v
vendored
36
hw/rtl/cache/VX_bank.v
vendored
@@ -105,8 +105,8 @@ module VX_bank #(
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_use_pc_st0;
|
||||
wire[`WB_BITS-1:0] debug_wb_st0;
|
||||
wire[31:0] debug_pc_st0;
|
||||
wire debug_wb_st0;
|
||||
wire[`NR_BITS-1:0] debug_rd_st0;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st0;
|
||||
wire debug_rw_st0;
|
||||
@@ -114,8 +114,8 @@ module VX_bank #(
|
||||
wire[`REQS_BITS-1:0] debug_tid_st0;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
|
||||
|
||||
wire[31:0] debug_use_pc_st1e;
|
||||
wire[`WB_BITS-1:0] debug_wb_st1e;
|
||||
wire[31:0] debug_pc_st1e;
|
||||
wire debug_wb_st1e;
|
||||
wire[`NR_BITS-1:0] debug_rd_st1e;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st1e;
|
||||
wire debug_rw_st1e;
|
||||
@@ -123,8 +123,8 @@ module VX_bank #(
|
||||
wire[`REQS_BITS-1:0] debug_tid_st1e;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
|
||||
|
||||
wire[31:0] debug_use_pc_st2;
|
||||
wire[`WB_BITS-1:0] debug_wb_st2;
|
||||
wire[31:0] debug_pc_st2;
|
||||
wire debug_wb_st2;
|
||||
wire[`NR_BITS-1:0] debug_rd_st2;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st2;
|
||||
wire debug_rw_st2;
|
||||
@@ -360,7 +360,7 @@ module VX_bank #(
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -432,6 +432,9 @@ module VX_bank #(
|
||||
&& (addr_st2 == addr_st1e);
|
||||
|
||||
VX_tag_data_access #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CORE_TAG_ID_BITS(CORE_TAG_ID_BITS),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
@@ -442,6 +445,15 @@ module VX_bank #(
|
||||
) tag_data_access (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
.debug_pc_st1e(debug_pc_st1e),
|
||||
.debug_wb_st1e(debug_wb_st1e),
|
||||
.debug_rd_st1e(debug_rd_st1e),
|
||||
.debug_warp_num_st1e(debug_warp_num_st1e),
|
||||
.debug_tagid_st1e(debug_tagid_st1e),
|
||||
`endif
|
||||
|
||||
.stall (stall_bank_pipe),
|
||||
.stall_bank_pipe(stall_bank_pipe),
|
||||
|
||||
@@ -478,7 +490,7 @@ module VX_bank #(
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -513,13 +525,13 @@ module VX_bank #(
|
||||
.reset (reset),
|
||||
.stall (stall_bank_pipe),
|
||||
.flush (0),
|
||||
.in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
|
||||
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
|
||||
.in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e, snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
|
||||
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2})
|
||||
);
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -587,7 +599,7 @@ module VX_bank #(
|
||||
// Broadcast
|
||||
.is_fill_st1 (is_fill_st1[STAGE_1_CYCLES-1]),
|
||||
.fill_addr_st1 (addr_st1e),
|
||||
.pending_hazard (mrvq_pending_hazard_st1e),
|
||||
.pending_hazard_st1 (mrvq_pending_hazard_st1e),
|
||||
|
||||
// Dequeue
|
||||
.miss_resrv_pop (mrvq_pop),
|
||||
|
||||
4
hw/rtl/cache/VX_cache.v
vendored
4
hw/rtl/cache/VX_cache.v
vendored
@@ -130,10 +130,10 @@ module VX_cache #(
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_core_req_use_pc;
|
||||
wire[`WB_BITS-1:0] debug_core_req_wb;
|
||||
wire debug_core_req_wb;
|
||||
wire[`NR_BITS-1:0] debug_core_req_rd;
|
||||
wire[`NW_BITS-1:0] debug_core_req_warp_num;
|
||||
wire[`LOG2UP(CREQ_SIZE)-1:0] debug_core_req_idx;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
|
||||
5
hw/rtl/cache/VX_cache_config.vh
vendored
5
hw/rtl/cache/VX_cache_config.vh
vendored
@@ -1,10 +1,13 @@
|
||||
`ifndef VX_CACHE_CONFIG
|
||||
`define VX_CACHE_CONFIG
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
`include "VX_scope.vh"
|
||||
|
||||
`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_REQ_TAG_WIDTH)
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
|
||||
|
||||
// tag rw byteen tid
|
||||
`define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS)
|
||||
|
||||
|
||||
1
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
1
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
@@ -1,4 +1,3 @@
|
||||
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_core_req_bank_sel #(
|
||||
|
||||
4
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
4
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
@@ -41,7 +41,7 @@ module VX_cache_miss_resrv #(
|
||||
input wire is_fill_st1,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] fill_addr_st1,
|
||||
|
||||
output wire pending_hazard,
|
||||
output wire pending_hazard_st1,
|
||||
|
||||
// Miss dequeue
|
||||
input wire miss_resrv_pop,
|
||||
@@ -84,7 +84,7 @@ module VX_cache_miss_resrv #(
|
||||
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
|
||||
end
|
||||
|
||||
assign pending_hazard = |(valid_address_match);
|
||||
assign pending_hazard_st1 = |(valid_address_match);
|
||||
|
||||
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
|
||||
|
||||
36
hw/rtl/cache/VX_snp_forwarder.v
vendored
36
hw/rtl/cache/VX_snp_forwarder.v
vendored
@@ -41,8 +41,8 @@ module VX_snp_forwarder #(
|
||||
|
||||
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
|
||||
|
||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr, dbg_sfq_write_addr;
|
||||
wire sfq_push, sfq_pop, sfq_full;
|
||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr;
|
||||
wire sfq_acquire, sfq_release, sfq_full;
|
||||
|
||||
wire fwdin_valid;
|
||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag;
|
||||
@@ -56,32 +56,30 @@ module VX_snp_forwarder #(
|
||||
|
||||
assign sfq_read_addr = fwdin_tag;
|
||||
|
||||
assign sfq_push = snp_req_valid && !sfq_full && fwdout_ready;
|
||||
assign sfq_pop = snp_rsp_valid;
|
||||
assign sfq_acquire = snp_req_valid && !sfq_full && fwdout_ready;
|
||||
assign sfq_release = snp_rsp_valid;
|
||||
|
||||
VX_index_queue #(
|
||||
.DATAW (`LOG2UP(SNRQ_SIZE) + 1 +`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH),
|
||||
VX_cam_buffer #(
|
||||
.DATAW (`DRAM_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
|
||||
.SIZE (SNRQ_SIZE)
|
||||
) snp_fwd_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({sfq_write_addr, snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
||||
.write_addr (sfq_write_addr),
|
||||
.push (sfq_push),
|
||||
.pop (sfq_pop),
|
||||
.full (sfq_full),
|
||||
.read_addr (sfq_read_addr),
|
||||
.read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
|
||||
`UNUSED_PIN (empty)
|
||||
) snp_fwd_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
||||
.write_addr (sfq_write_addr),
|
||||
.acquire_slot (sfq_acquire),
|
||||
.release_slot (sfq_release),
|
||||
.read_addr (sfq_read_addr),
|
||||
.read_data ({snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
|
||||
.full (sfq_full)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (sfq_push) begin
|
||||
if (sfq_acquire) begin
|
||||
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS;
|
||||
end
|
||||
if (fwdin_fire) begin
|
||||
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
|
||||
assert(sfq_read_addr == dbg_sfq_write_addr);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
93
hw/rtl/cache/VX_tag_data_access.v
vendored
93
hw/rtl/cache/VX_tag_data_access.v
vendored
@@ -1,26 +1,38 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_tag_data_access #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
parameter CORE_TAG_ID_BITS = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 0,
|
||||
parameter CACHE_SIZE = 0,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 0,
|
||||
parameter NUM_BANKS = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 0,
|
||||
parameter WORD_SIZE = 0,
|
||||
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 0,
|
||||
parameter STAGE_1_CYCLES = 0,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 0,
|
||||
parameter WRITE_ENABLE = 0,
|
||||
|
||||
// Enable dram update
|
||||
parameter DRAM_ENABLE = 0
|
||||
parameter DRAM_ENABLE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
input wire[31:0] debug_pc_st1e,
|
||||
input wire debug_wb_st1e,
|
||||
input wire[`NR_BITS-1:0] debug_rd_st1e,
|
||||
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
|
||||
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
|
||||
`endif
|
||||
|
||||
input wire stall,
|
||||
input wire is_snp_st1e,
|
||||
input wire snp_invalidate_st1e,
|
||||
@@ -78,17 +90,17 @@ module VX_tag_data_access #(
|
||||
wire tags_match;
|
||||
|
||||
wire real_writefill = valid_req_st1e && writefill_st1e
|
||||
&& ((!use_read_valid_st1e) || (use_read_valid_st1e && !tags_match));
|
||||
&& ((~use_read_valid_st1e) || (use_read_valid_st1e && ~tags_match));
|
||||
|
||||
wire[`TAG_SELECT_BITS-1:0] writetag_st1e = writeaddr_st1e[`TAG_LINE_ADDR_RNG];
|
||||
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
|
||||
|
||||
VX_tag_data_structure #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE)
|
||||
) tag_data_structure (
|
||||
VX_tag_data_store #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE)
|
||||
) tag_data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall_bank_pipe(stall_bank_pipe),
|
||||
@@ -124,7 +136,7 @@ module VX_tag_data_access #(
|
||||
genvar i;
|
||||
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
|
||||
VX_generic_register #(
|
||||
.N( 1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
|
||||
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
|
||||
) s0_1_cc (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -140,11 +152,16 @@ module VX_tag_data_access #(
|
||||
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writetag_st1e; // Tag is always the same in SM
|
||||
assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1];
|
||||
assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1];
|
||||
|
||||
|
||||
if (`WORD_SELECT_WIDTH != 0) begin
|
||||
assign readword_st1e = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
wire [`WORD_WIDTH-1:0] readword = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
for (i = 0; i < WORD_SIZE; i++) begin
|
||||
assign readword_st1e[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
||||
end
|
||||
end else begin
|
||||
assign readword_st1e = use_read_data_st1e;
|
||||
for (i = 0; i < WORD_SIZE; i++) begin
|
||||
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
||||
end
|
||||
end
|
||||
|
||||
wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] we;
|
||||
@@ -153,9 +170,9 @@ module VX_tag_data_access #(
|
||||
wire should_write = mem_rw_st1e
|
||||
&& valid_req_st1e
|
||||
&& use_read_valid_st1e
|
||||
&& !miss_st1e
|
||||
&& !is_snp_st1e
|
||||
&& !real_writefill;
|
||||
&& ~miss_st1e
|
||||
&& ~is_snp_st1e
|
||||
&& ~real_writefill;
|
||||
|
||||
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
|
||||
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i)))
|
||||
@@ -168,22 +185,22 @@ module VX_tag_data_access #(
|
||||
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1e;
|
||||
end
|
||||
|
||||
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;
|
||||
assign use_write_enable = (writefill_st1e && ~real_writefill) ? 0 : we;
|
||||
assign use_write_data = data_write;
|
||||
|
||||
// use "case equality" to handle uninitialized tag when block entry is not valid
|
||||
assign tags_match = (writetag_st1e === use_read_tag_st1e);
|
||||
|
||||
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && !force_request_miss_st1e;
|
||||
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;
|
||||
wire req_miss = valid_req_st1e && !is_snp_st1e && use_read_valid_st1e && !writefill_st1e && !tags_match;
|
||||
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && ~force_request_miss_st1e;
|
||||
wire req_invalid = valid_req_st1e && ~is_snp_st1e && ~use_read_valid_st1e && ~writefill_st1e;
|
||||
wire req_miss = valid_req_st1e && ~is_snp_st1e && use_read_valid_st1e && ~writefill_st1e && ~tags_match;
|
||||
wire real_miss = req_invalid || req_miss;
|
||||
wire force_core_miss = (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e && !real_miss);
|
||||
wire force_core_miss = (force_request_miss_st1e && ~is_snp_st1e && ~writefill_st1e && valid_req_st1e && ~real_miss);
|
||||
assign snp_to_mrvq_st1e = valid_req_st1e && is_snp_st1e && force_request_miss_st1e;
|
||||
|
||||
// The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss
|
||||
assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e
|
||||
|| (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e);
|
||||
|| (force_request_miss_st1e && ~is_snp_st1e && ~writefill_st1e && valid_req_st1e);
|
||||
|
||||
assign miss_st1e = real_miss || snoop_hit_no_pending || force_core_miss;
|
||||
assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e;
|
||||
@@ -194,7 +211,23 @@ module VX_tag_data_access #(
|
||||
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
|
||||
assign invalidate_line = snoop_hit_no_pending;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
always @(posedge clk) begin
|
||||
if (valid_req_st1e) begin
|
||||
if ((| use_write_enable)) begin
|
||||
if (writefill_st1e) begin
|
||||
$display("%t: bank%0d:%0d store-fill: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
|
||||
end else begin
|
||||
$display("%t: bank%0d:%0d store-write: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
|
||||
end
|
||||
end else
|
||||
if (miss_st1e) begin
|
||||
$display("%t: bank%0d:%0d store-miss: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
|
||||
end else begin
|
||||
$display("%t: bank%0d:%0d store-read: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,6 +1,6 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_tag_data_structure #(
|
||||
module VX_tag_data_store #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 0,
|
||||
// Size of line inside a bank in bytes
|
||||
87
hw/rtl/fp_cores/VX_fp_fpga.v
Normal file
87
hw/rtl/fp_cores/VX_fp_fpga.v
Normal file
@@ -0,0 +1,87 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_fpga (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datac,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
);
|
||||
wire fpnew_in_ready;
|
||||
wire [`NUM_THREADS-1:0][31:0] fpnew_result;
|
||||
wire fpnew_has_fflags;
|
||||
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fpnew_fflags;
|
||||
wire [`ISTAG_BITS-1:0] fpnew_out_tag;
|
||||
wire fpnew_out_ready;
|
||||
wire fpnew_out_valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] add_result;
|
||||
wire add_out_ready;
|
||||
|
||||
VX_fpnew #(
|
||||
.FMULADD (0),
|
||||
.FDIVSQRT (1),
|
||||
.FNONCOMP (1),
|
||||
.FCONV (1)
|
||||
) fp_core (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.in_valid (in_valid),
|
||||
.in_ready (fpnew_in_ready),
|
||||
|
||||
.in_tag (in_tag),
|
||||
|
||||
.op (op),
|
||||
.frm (frm),
|
||||
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
.result (fpnew_result),
|
||||
|
||||
.has_fflags (fpnew_has_fflags),
|
||||
.fflags (fpnew_fflags),
|
||||
|
||||
.out_tag (fpnew_out_tag),
|
||||
|
||||
.out_ready (fpnew_out_ready),
|
||||
.out_valid (fpnew_out_valid)
|
||||
);
|
||||
|
||||
acl_fp_add fp_add (
|
||||
.clock (clk),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.enable (add_out_ready),
|
||||
.result (add_result)
|
||||
);
|
||||
|
||||
assign in_reqady = fpnew_in_ready;
|
||||
assign has_fflags = fpnew_has_fflags;
|
||||
assign fflags = fpnew_fflags;
|
||||
assign out_tag = fpnew_out_tag;
|
||||
assign fpnew_out_ready = out_ready;
|
||||
|
||||
assign result = fpnew_out_valid ? fpnew_result : add_result;
|
||||
assign out_valid = fpnew_out_valid;
|
||||
|
||||
endmodule
|
||||
217
hw/rtl/fp_cores/VX_fpnew.v
Normal file
217
hw/rtl/fp_cores/VX_fpnew.v
Normal file
@@ -0,0 +1,217 @@
|
||||
`include "VX_define.vh"
|
||||
`include "fpnew_pkg.sv"
|
||||
`include "defs_div_sqrt_mvp.sv"
|
||||
|
||||
module VX_fpnew #(
|
||||
parameter FMULADD = 1,
|
||||
parameter FDIVSQRT = 1,
|
||||
parameter FNONCOMP = 1,
|
||||
parameter FCONV = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datac,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
);
|
||||
localparam UNIT_FMULADD = FMULADD ? fpnew_pkg::PARALLEL : fpnew_pkg::DISABLED;
|
||||
localparam UNIT_FDIVSQRT = FDIVSQRT ? fpnew_pkg::MERGED : fpnew_pkg::DISABLED;
|
||||
localparam UNIT_FNONCOMP = FNONCOMP ? fpnew_pkg::PARALLEL : fpnew_pkg::DISABLED;
|
||||
localparam UNIT_FCONV = FCONV ? fpnew_pkg::MERGED : fpnew_pkg::DISABLED;
|
||||
|
||||
localparam FOP_BITS = fpnew_pkg::OP_BITS;
|
||||
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
|
||||
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
|
||||
|
||||
localparam FPU_DPATHW = 32'd32;
|
||||
|
||||
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
|
||||
Width: FPU_DPATHW,
|
||||
EnableVectors: 1'b0,
|
||||
EnableNanBox: 1'b1,
|
||||
FpFmtMask: 5'b10000,
|
||||
IntFmtMask: 4'b0010
|
||||
};
|
||||
|
||||
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
|
||||
PipeRegs:'{'{`LATENCY_FMULADD, 0, 0, 0, 0}, // ADDMUL
|
||||
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
|
||||
'{default: `LATENCY_FNONCOMP}, // NONCOMP
|
||||
'{default: `LATENCY_FCONV}}, // CONV
|
||||
UnitTypes:'{'{default: UNIT_FMULADD}, // ADDMUL
|
||||
'{default: UNIT_FDIVSQRT}, // DIVSQRT
|
||||
'{default: UNIT_FNONCOMP}, // NONCOMP
|
||||
'{default: UNIT_FCONV}}, // CONV
|
||||
PipeConfig: fpnew_pkg::DISTRIBUTED
|
||||
};
|
||||
|
||||
wire fpu_in_ready, fpu_in_valid;
|
||||
wire fpu_out_ready, fpu_out_valid;
|
||||
|
||||
reg [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag;
|
||||
|
||||
reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
|
||||
|
||||
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
|
||||
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
|
||||
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
||||
fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1];
|
||||
|
||||
wire is_class_op_i, is_class_op_o;
|
||||
assign is_class_op_i = (op == `FPU_CLASS);
|
||||
|
||||
reg [FOP_BITS-1:0] fpu_op;
|
||||
reg [`FRM_BITS-1:0] fpu_rnd;
|
||||
reg fpu_op_mod;
|
||||
reg fflags_en, fflags_en_o;
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = fpnew_pkg::SGNJ;
|
||||
fpu_rnd = frm;
|
||||
fpu_op_mod = 0;
|
||||
fflags_en = 1;
|
||||
fpu_operands[0] = dataa;
|
||||
fpu_operands[1] = datab;
|
||||
fpu_operands[2] = datac;
|
||||
case (op)
|
||||
`FPU_ADD: begin
|
||||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
end
|
||||
`FPU_SUB: begin
|
||||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
fpu_op_mod = 1;
|
||||
end
|
||||
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
|
||||
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
|
||||
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
|
||||
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
|
||||
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
||||
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fflags_en = 0; end
|
||||
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fflags_en = 0; end
|
||||
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fflags_en = 0; end
|
||||
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
|
||||
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
|
||||
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
|
||||
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
|
||||
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
|
||||
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
||||
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
|
||||
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
|
||||
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fflags_en = 0; end
|
||||
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
genvar i;
|
||||
|
||||
`DISABLE_TRACING
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (0 == i) begin
|
||||
fpnew_top #(
|
||||
.Features (FPU_FEATURES),
|
||||
.Implementation (FPU_IMPLEMENTATION),
|
||||
.TagType (logic[`LOG2UP(`FPURQ_SIZE)+1+1-1:0])
|
||||
) fpnew_core (
|
||||
.clk_i (clk),
|
||||
.rst_ni (1'b1),
|
||||
.operands_i ({fpu_operands[2][0], fpu_operands[1][0], fpu_operands[0][0]}),
|
||||
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
|
||||
.op_i (fpnew_pkg::operation_e'(fpu_op)),
|
||||
.op_mod_i (fpu_op_mod),
|
||||
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
|
||||
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
|
||||
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
|
||||
.vectorial_op_i (1'b0),
|
||||
.tag_i ({fpu_in_tag, fflags_en, is_class_op_i}),
|
||||
.in_valid_i (fpu_in_valid),
|
||||
.in_ready_o (fpu_in_ready),
|
||||
.flush_i (reset),
|
||||
.result_o (fpu_result[0]),
|
||||
.status_o (fpu_status[0]),
|
||||
.tag_o ({fpu_out_tag, fflags_en_o, is_class_op_o}),
|
||||
.out_valid_o (fpu_out_valid),
|
||||
.out_ready_i (fpu_out_ready),
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
end else begin
|
||||
fpnew_top #(
|
||||
.Features (FPU_FEATURES),
|
||||
.Implementation (FPU_IMPLEMENTATION),
|
||||
.TagType (logic)
|
||||
) fpnew_core (
|
||||
.clk_i (clk),
|
||||
.rst_ni (1'b1),
|
||||
.operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}),
|
||||
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
|
||||
.op_i (fpnew_pkg::operation_e'(fpu_op)),
|
||||
.op_mod_i (fpu_op_mod),
|
||||
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
|
||||
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
|
||||
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
|
||||
.vectorial_op_i (1'b0),
|
||||
.tag_i (1'b0),
|
||||
.in_valid_i (fpu_in_valid),
|
||||
`UNUSED_PIN (in_ready_o),
|
||||
.flush_i (reset),
|
||||
.result_o (fpu_result[i]),
|
||||
.status_o (fpu_status[i]),
|
||||
`UNUSED_PIN (tag_o),
|
||||
`UNUSED_PIN (out_valid_o),
|
||||
.out_ready_i (fpu_out_ready),
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
`ENABLE_TRACING
|
||||
|
||||
assign fpu_in_valid = in_valid;
|
||||
assign in_ready = fpu_in_ready;
|
||||
|
||||
assign fpu_in_tag = in_tag;
|
||||
assign out_tag = fpu_out_tag;
|
||||
|
||||
assign result = fpu_result;
|
||||
|
||||
assign has_fflags = fflags_en_o;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign fflags[i][0] = fpu_status[i].NX;
|
||||
assign fflags[i][1] = fpu_status[i].UF;
|
||||
assign fflags[i][2] = fpu_status[i].OF;
|
||||
assign fflags[i][3] = fpu_status[i].DZ;
|
||||
assign fflags[i][4] = fpu_status[i].NV;
|
||||
end
|
||||
|
||||
assign out_valid = fpu_out_valid;
|
||||
assign fpu_out_ready = out_ready;
|
||||
|
||||
endmodule
|
||||
67
hw/rtl/fp_cores/altera/acl_fp_add.v
Normal file
67
hw/rtl/fp_cores/altera/acl_fp_add.v
Normal file
@@ -0,0 +1,67 @@
|
||||
// (C) 1992-2016 Intel Corporation.
|
||||
// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
|
||||
// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
|
||||
// and/or other countries. Other marks and brands may be claimed as the property
|
||||
// of others. See Trademarks on intel.com for full list of Intel trademarks or
|
||||
// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
|
||||
// Your use of Intel Corporation's design tools, logic functions and other
|
||||
// software and tools, and its AMPP partner logic functions, and any output
|
||||
// files any of the foregoing (including device programming or simulation
|
||||
// files), and any associated documentation or information are expressly subject
|
||||
// to the terms and conditions of the Altera Program License Subscription
|
||||
// Agreement, Intel MegaCore Function License Agreement, or other applicable
|
||||
// license agreement, including, without limitation, that your use is for the
|
||||
// sole purpose of programming logic devices manufactured by Intel and sold by
|
||||
// Intel or its authorized distributors. Please refer to the applicable
|
||||
// agreement for further details.
|
||||
|
||||
module acl_fp_add(dataa, datab, clock, enable, result);
|
||||
|
||||
input [31:0] dataa;
|
||||
input [31:0] datab;
|
||||
input clock, enable;
|
||||
|
||||
output [31:0] result;
|
||||
|
||||
// FP MAC wysiwyg
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(dataa),
|
||||
.ay(datab),
|
||||
.az(),
|
||||
.clk({2'b00,clock}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_wys.operation_mode = "sp_add";
|
||||
defparam mac_fp_wys.use_chainin = "false";
|
||||
defparam mac_fp_wys.adder_subtract = "false";
|
||||
defparam mac_fp_wys.ax_clock = "0";
|
||||
defparam mac_fp_wys.ay_clock = "0";
|
||||
defparam mac_fp_wys.az_clock = "none";
|
||||
defparam mac_fp_wys.output_clock = "0";
|
||||
defparam mac_fp_wys.accumulate_clock = "none";
|
||||
defparam mac_fp_wys.ax_chainin_pl_clock = "none";
|
||||
defparam mac_fp_wys.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_wys.mult_pipeline_clock = "none";
|
||||
defparam mac_fp_wys.adder_input_clock = "0";
|
||||
defparam mac_fp_wys.accum_adder_clock = "none";
|
||||
|
||||
endmodule
|
||||
63
hw/rtl/fp_cores/altera/acl_fp_msub.v
Normal file
63
hw/rtl/fp_cores/altera/acl_fp_msub.v
Normal file
@@ -0,0 +1,63 @@
|
||||
// (C) 1992-2014 Altera Corporation. All rights reserved.
|
||||
// Your use of Altera Corporation's design tools, logic functions and other
|
||||
// software and tools, and its AMPP partner logic functions, and any output
|
||||
// files any of the foregoing (including device programming or simulation
|
||||
// files), and any associated documentation or information are expressly subject
|
||||
// to the terms and conditions of the Altera Program License Subscription
|
||||
// Agreement, Altera MegaCore Function License Agreement, or other applicable
|
||||
// license agreement, including, without limitation, that your use is for the
|
||||
// sole purpose of programming logic devices manufactured by Altera and sold by
|
||||
// Altera or its authorized distributors. Please refer to the applicable
|
||||
// agreement for further details.
|
||||
|
||||
module acl_fp_multadd(dataa, datab, datac, clock, enable, result);
|
||||
// a*b + c
|
||||
input [31:0] dataa;
|
||||
input [31:0] datab;
|
||||
input [31:0] datac;
|
||||
input clock;
|
||||
input enable;
|
||||
output [31:0] result;
|
||||
|
||||
// FP MAC wysiwyg
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(datac),
|
||||
.ay(datab),
|
||||
.az(dataa),
|
||||
.clk({2'b00,clock}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_wys.operation_mode = "sp_mult_add";
|
||||
defparam mac_fp_wys.use_chainin = "false";
|
||||
defparam mac_fp_wys.adder_subtract = "true";
|
||||
defparam mac_fp_wys.ax_clock = "0";
|
||||
defparam mac_fp_wys.ay_clock = "0";
|
||||
defparam mac_fp_wys.az_clock = "0";
|
||||
defparam mac_fp_wys.output_clock = "0";
|
||||
defparam mac_fp_wys.accumulate_clock = "none";
|
||||
defparam mac_fp_wys.ax_chainin_pl_clock = "0";
|
||||
defparam mac_fp_wys.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_wys.mult_pipeline_clock = "0";
|
||||
defparam mac_fp_wys.adder_input_clock = "0";
|
||||
defparam mac_fp_wys.accum_adder_clock = "none";
|
||||
|
||||
endmodule
|
||||
67
hw/rtl/fp_cores/altera/acl_fp_mul.v
Normal file
67
hw/rtl/fp_cores/altera/acl_fp_mul.v
Normal file
@@ -0,0 +1,67 @@
|
||||
// (C) 1992-2016 Intel Corporation.
|
||||
// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
|
||||
// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
|
||||
// and/or other countries. Other marks and brands may be claimed as the property
|
||||
// of others. See Trademarks on intel.com for full list of Intel trademarks or
|
||||
// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
|
||||
// Your use of Intel Corporation's design tools, logic functions and other
|
||||
// software and tools, and its AMPP partner logic functions, and any output
|
||||
// files any of the foregoing (including device programming or simulation
|
||||
// files), and any associated documentation or information are expressly subject
|
||||
// to the terms and conditions of the Altera Program License Subscription
|
||||
// Agreement, Intel MegaCore Function License Agreement, or other applicable
|
||||
// license agreement, including, without limitation, that your use is for the
|
||||
// sole purpose of programming logic devices manufactured by Intel and sold by
|
||||
// Intel or its authorized distributors. Please refer to the applicable
|
||||
// agreement for further details.
|
||||
|
||||
module acl_fp_mul(dataa, datab, clock, enable, result);
|
||||
|
||||
input [31:0] dataa;
|
||||
input [31:0] datab;
|
||||
input clock, enable;
|
||||
|
||||
output [31:0] result;
|
||||
|
||||
// FP MAC wysiwyg
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(),
|
||||
.ay(datab),
|
||||
.az(dataa),
|
||||
.clk({2'b00,clock}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_wys.operation_mode = "sp_mult";
|
||||
defparam mac_fp_wys.use_chainin = "false";
|
||||
defparam mac_fp_wys.adder_subtract = "false";
|
||||
defparam mac_fp_wys.ax_clock = "none";
|
||||
defparam mac_fp_wys.ay_clock = "0";
|
||||
defparam mac_fp_wys.az_clock = "0";
|
||||
defparam mac_fp_wys.output_clock = "0";
|
||||
defparam mac_fp_wys.accumulate_clock = "none";
|
||||
defparam mac_fp_wys.ax_chainin_pl_clock = "none";
|
||||
defparam mac_fp_wys.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_wys.mult_pipeline_clock = "0";
|
||||
defparam mac_fp_wys.adder_input_clock = "none";
|
||||
defparam mac_fp_wys.accum_adder_clock = "none";
|
||||
|
||||
endmodule
|
||||
63
hw/rtl/fp_cores/altera/acl_fp_nmadd.v
Normal file
63
hw/rtl/fp_cores/altera/acl_fp_nmadd.v
Normal file
@@ -0,0 +1,63 @@
|
||||
// (C) 1992-2014 Altera Corporation. All rights reserved.
|
||||
// Your use of Altera Corporation's design tools, logic functions and other
|
||||
// software and tools, and its AMPP partner logic functions, and any output
|
||||
// files any of the foregoing (including device programming or simulation
|
||||
// files), and any associated documentation or information are expressly subject
|
||||
// to the terms and conditions of the Altera Program License Subscription
|
||||
// Agreement, Altera MegaCore Function License Agreement, or other applicable
|
||||
// license agreement, including, without limitation, that your use is for the
|
||||
// sole purpose of programming logic devices manufactured by Altera and sold by
|
||||
// Altera or its authorized distributors. Please refer to the applicable
|
||||
// agreement for further details.
|
||||
|
||||
module acl_fp_multadd(dataa, datab, datac, clock, enable, result);
|
||||
// a*b + c
|
||||
input [31:0] dataa;
|
||||
input [31:0] datab;
|
||||
input [31:0] datac;
|
||||
input clock;
|
||||
input enable;
|
||||
output [31:0] result;
|
||||
|
||||
// FP MAC wysiwyg
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(datac),
|
||||
.ay(datab),
|
||||
.az(dataa),
|
||||
.clk({2'b00,clock}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_wys.operation_mode = "sp_mult_add";
|
||||
defparam mac_fp_wys.use_chainin = "false";
|
||||
defparam mac_fp_wys.adder_subtract = "false";
|
||||
defparam mac_fp_wys.ax_clock = "0";
|
||||
defparam mac_fp_wys.ay_clock = "0";
|
||||
defparam mac_fp_wys.az_clock = "0";
|
||||
defparam mac_fp_wys.output_clock = "0";
|
||||
defparam mac_fp_wys.accumulate_clock = "none";
|
||||
defparam mac_fp_wys.ax_chainin_pl_clock = "0";
|
||||
defparam mac_fp_wys.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_wys.mult_pipeline_clock = "0";
|
||||
defparam mac_fp_wys.adder_input_clock = "0";
|
||||
defparam mac_fp_wys.accum_adder_clock = "none";
|
||||
|
||||
endmodule
|
||||
67
hw/rtl/fp_cores/altera/acl_fp_sub.v
Normal file
67
hw/rtl/fp_cores/altera/acl_fp_sub.v
Normal file
@@ -0,0 +1,67 @@
|
||||
// (C) 1992-2016 Intel Corporation.
|
||||
// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
|
||||
// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
|
||||
// and/or other countries. Other marks and brands may be claimed as the property
|
||||
// of others. See Trademarks on intel.com for full list of Intel trademarks or
|
||||
// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
|
||||
// Your use of Intel Corporation's design tools, logic functions and other
|
||||
// software and tools, and its AMPP partner logic functions, and any output
|
||||
// files any of the foregoing (including device programming or simulation
|
||||
// files), and any associated documentation or information are expressly subject
|
||||
// to the terms and conditions of the Altera Program License Subscription
|
||||
// Agreement, Intel MegaCore Function License Agreement, or other applicable
|
||||
// license agreement, including, without limitation, that your use is for the
|
||||
// sole purpose of programming logic devices manufactured by Intel and sold by
|
||||
// Intel or its authorized distributors. Please refer to the applicable
|
||||
// agreement for further details.
|
||||
|
||||
module acl_fp_add(dataa, datab, clock, enable, result);
|
||||
|
||||
input [31:0] dataa;
|
||||
input [31:0] datab;
|
||||
input clock, enable;
|
||||
|
||||
output [31:0] result;
|
||||
|
||||
// FP MAC wysiwyg
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(dataa),
|
||||
.ay(datab),
|
||||
.az(),
|
||||
.clk({2'b00,clock}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_wys.operation_mode = "sp_add";
|
||||
defparam mac_fp_wys.use_chainin = "false";
|
||||
defparam mac_fp_wys.adder_subtract = "true";
|
||||
defparam mac_fp_wys.ax_clock = "0";
|
||||
defparam mac_fp_wys.ay_clock = "0";
|
||||
defparam mac_fp_wys.az_clock = "none";
|
||||
defparam mac_fp_wys.output_clock = "0";
|
||||
defparam mac_fp_wys.accumulate_clock = "none";
|
||||
defparam mac_fp_wys.ax_chainin_pl_clock = "none";
|
||||
defparam mac_fp_wys.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_wys.mult_pipeline_clock = "none";
|
||||
defparam mac_fp_wys.adder_input_clock = "0";
|
||||
defparam mac_fp_wys.accum_adder_clock = "none";
|
||||
|
||||
endmodule
|
||||
1
hw/rtl/fp_cores/fpnew
Submodule
1
hw/rtl/fp_cores/fpnew
Submodule
Submodule hw/rtl/fp_cores/fpnew added at 1def7bb630
@@ -5,18 +5,17 @@
|
||||
|
||||
interface VX_alu_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`ALU_BITS-1:0] alu_op;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`ALU_BITS-1:0] alu_op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
|
||||
wire [31:0] offset;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
|
||||
19
hw/rtl/interfaces/VX_cmt_to_csr_if.v
Normal file
19
hw/rtl/interfaces/VX_cmt_to_csr_if.v
Normal file
@@ -0,0 +1,19 @@
|
||||
`ifndef VX_CMT_TO_CSR_IF
|
||||
`define VX_CMT_TO_CSR_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_cmt_to_csr_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
wire [`NE_BITS:0] num_commits;
|
||||
|
||||
wire has_fflags;
|
||||
wire [`FFG_BITS-1:0] fflags;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
33
hw/rtl/interfaces/VX_cmt_to_issue_if.v
Normal file
33
hw/rtl/interfaces/VX_cmt_to_issue_if.v
Normal file
@@ -0,0 +1,33 @@
|
||||
`ifndef VX_CMT_TO_ISSUE_IF
|
||||
`define VX_CMT_TO_ISSUE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_cmt_to_issue_if ();
|
||||
|
||||
wire alu_valid;
|
||||
wire lsu_valid;
|
||||
wire csr_valid;
|
||||
wire mul_valid;
|
||||
wire fpu_valid;
|
||||
wire gpu_valid;
|
||||
|
||||
wire [`ISTAG_BITS-1:0] alu_tag;
|
||||
wire [`ISTAG_BITS-1:0] lsu_tag;
|
||||
wire [`ISTAG_BITS-1:0] csr_tag;
|
||||
wire [`ISTAG_BITS-1:0] mul_tag;
|
||||
wire [`ISTAG_BITS-1:0] fpu_tag;
|
||||
wire [`ISTAG_BITS-1:0] gpu_tag;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
is_data_t alu_data;
|
||||
is_data_t lsu_data;
|
||||
is_data_t csr_data;
|
||||
is_data_t mul_data;
|
||||
is_data_t fpu_data;
|
||||
is_data_t gpu_data;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -1,19 +0,0 @@
|
||||
`ifndef VX_COMMIT_IF
|
||||
`define VX_COMMIT_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_commit_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire is_io;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -5,11 +5,11 @@
|
||||
|
||||
interface VX_csr_io_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`CSR_ADDR_SIZE-1:0] addr;
|
||||
wire rw;
|
||||
wire [31:0] data;
|
||||
wire ready;
|
||||
wire valid;
|
||||
wire [`CSR_ADDR_BITS-1:0] addr;
|
||||
wire rw;
|
||||
wire [31:0] data;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,17 +5,18 @@
|
||||
|
||||
interface VX_csr_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`CSR_BITS-1:0] csr_op;
|
||||
|
||||
wire [`CSR_ADDR_SIZE-1:0] csr_addr;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr;
|
||||
wire [31:0] csr_mask;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire wb;
|
||||
wire is_io;
|
||||
|
||||
wire ready;
|
||||
|
||||
17
hw/rtl/interfaces/VX_csr_to_fpu_if.v
Normal file
17
hw/rtl/interfaces/VX_csr_to_fpu_if.v
Normal file
@@ -0,0 +1,17 @@
|
||||
`ifndef VX_CSR_TO_FPU_IF
|
||||
`define VX_CSR_TO_FPU_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef EXTF_F_ENABLE
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`endif
|
||||
|
||||
interface VX_csr_to_fpu_if ();
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -5,13 +5,14 @@
|
||||
|
||||
interface VX_decode_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] instr_op;
|
||||
wire [`OP_BITS-1:0] ex_op;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
@@ -19,12 +20,16 @@ interface VX_decode_if ();
|
||||
wire [31:0] imm;
|
||||
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
wire rs2_is_imm;
|
||||
|
||||
wire [`NUM_REGS-1:0] reg_use_mask;
|
||||
|
||||
wire use_rs1;
|
||||
wire use_rs2;
|
||||
// FP states
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire use_rs3;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
||||
15
hw/rtl/interfaces/VX_exu_to_cmt_if.v
Normal file
15
hw/rtl/interfaces/VX_exu_to_cmt_if.v
Normal file
@@ -0,0 +1,15 @@
|
||||
`ifndef VX_EXU_TO_CMT_IF
|
||||
`define VX_EXU_TO_CMT_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_exu_to_cmt_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
27
hw/rtl/interfaces/VX_fpu_req_if.v
Normal file
27
hw/rtl/interfaces/VX_fpu_req_if.v
Normal file
@@ -0,0 +1,27 @@
|
||||
`ifndef VX_FPU_REQ_IF
|
||||
`define VX_FPU_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef EXTF_F_ENABLE
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`endif
|
||||
|
||||
interface VX_fpu_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
wire [`FPU_BITS-1:0] fpu_op;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
17
hw/rtl/interfaces/VX_fpu_to_cmt_if.v
Normal file
17
hw/rtl/interfaces/VX_fpu_to_cmt_if.v
Normal file
@@ -0,0 +1,17 @@
|
||||
`ifndef VX_FPU_TO_CMT_IF
|
||||
`define VX_FPU_TO_CMT_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_fpu_to_cmt_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire has_fflags;
|
||||
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
24
hw/rtl/interfaces/VX_fpu_to_csr_if.v
Normal file
24
hw/rtl/interfaces/VX_fpu_to_csr_if.v
Normal file
@@ -0,0 +1,24 @@
|
||||
`ifndef VX_FPU_TO_CSR_IF
|
||||
`define VX_FPU_TO_CSR_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef EXTF_F_ENABLE
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`endif
|
||||
|
||||
interface VX_fpu_to_csr_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
wire fflags_NV;
|
||||
wire fflags_DZ;
|
||||
wire fflags_OF;
|
||||
wire fflags_UF;
|
||||
wire fflags_NX;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -1,13 +0,0 @@
|
||||
`ifndef VX_GPR_DATA_IF
|
||||
`define VX_GPR_DATA_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_gpr_data_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
26
hw/rtl/interfaces/VX_gpr_read_if.v
Normal file
26
hw/rtl/interfaces/VX_gpr_read_if.v
Normal file
@@ -0,0 +1,26 @@
|
||||
`ifndef VX_GPR_READ_IF
|
||||
`define VX_GPR_READ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_gpr_read_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
|
||||
wire use_rs3;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -5,17 +5,18 @@
|
||||
|
||||
interface VX_gpu_req_if();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
wire [`GPU_BITS-1:0] gpu_op;
|
||||
wire [`GPU_BITS-1:0] gpu_op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
wire [31:0] next_PC;
|
||||
wire [31:0] rs2_data;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire ready;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,10 +5,11 @@
|
||||
|
||||
interface VX_ifetch_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire ready;
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,11 +5,12 @@
|
||||
|
||||
interface VX_ifetch_rsp_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] instr;
|
||||
wire ready;
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] instr;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,20 +5,22 @@
|
||||
|
||||
interface VX_lsu_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire rw;
|
||||
wire [`BYTEEN_BITS-1:0] byteen;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
||||
wire [31:0] offset;
|
||||
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -3,21 +3,21 @@
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef EXT_M_ENABLE
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`endif
|
||||
|
||||
interface VX_mul_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
|
||||
wire [`MUL_BITS-1:0] mul_op;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`MUL_BITS-1:0] mul_op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
wire ready;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
`ifndef VX_PERF_CNTRS_IF
|
||||
`define VX_PERF_CNTRS_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_cntrs_if ();
|
||||
|
||||
wire [63:0] total_cycles;
|
||||
wire [63:0] total_instrs;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -12,17 +12,14 @@ interface VX_warp_ctl_if ();
|
||||
|
||||
wire wspawn;
|
||||
wire [31:0] wspawn_pc;
|
||||
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
wire whalt;
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
|
||||
wire is_barrier;
|
||||
wire [`NB_BITS-1:0] barrier_id;
|
||||
wire [`NW_BITS:0] num_warps;
|
||||
wire [`NW_BITS:0] barrier_num_warps;
|
||||
|
||||
wire is_split;
|
||||
wire do_split;
|
||||
|
||||
wire [`NUM_THREADS-1:0] split_new_mask;
|
||||
wire [`NUM_THREADS-1:0] split_later_mask;
|
||||
wire [31:0] split_save_pc;
|
||||
|
||||
@@ -5,10 +5,17 @@
|
||||
|
||||
interface VX_wb_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [31:0] curr_PC;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
74
hw/rtl/libs/VX_cam_buffer.v
Normal file
74
hw/rtl/libs/VX_cam_buffer.v
Normal file
@@ -0,0 +1,74 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_cam_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter RPORTS = 1,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [DATAW-1:0] write_data,
|
||||
output wire [ADDRW-1:0] write_addr,
|
||||
input wire acquire_slot,
|
||||
input wire [RPORTS-1:0][ADDRW-1:0] read_addr,
|
||||
output reg [RPORTS-1:0][DATAW-1:0] read_data,
|
||||
input wire [RPORTS-1:0] release_slot,
|
||||
output wire full
|
||||
);
|
||||
reg [DATAW-1:0] entries [SIZE-1:0];
|
||||
reg [SIZE-1:0] free_slots, free_slots_n;
|
||||
reg [ADDRW-1:0] write_addr_r;
|
||||
reg full_r;
|
||||
|
||||
wire free_valid;
|
||||
wire [ADDRW-1:0] free_index;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N(SIZE)
|
||||
) free_slots_encoder (
|
||||
.data_in (free_slots_n),
|
||||
.data_out (free_index),
|
||||
.valid_out (free_valid)
|
||||
);
|
||||
|
||||
integer i;
|
||||
|
||||
always @(*) begin
|
||||
free_slots_n = free_slots;
|
||||
if (acquire_slot) begin
|
||||
free_slots_n[write_addr_r] = 0;
|
||||
end
|
||||
for (i = 0; i < RPORTS; i++) begin
|
||||
if (release_slot[i]) begin
|
||||
free_slots_n[read_addr[i]] = 1;
|
||||
end
|
||||
read_data[i] = entries[read_addr[i]];
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
free_slots <= {SIZE{1'b1}};
|
||||
full_r <= 1'b0;
|
||||
write_addr_r <= ADDRW'(1'b0);
|
||||
end else begin
|
||||
if (acquire_slot) begin
|
||||
assert(1 == free_slots[write_addr]);
|
||||
entries[write_addr] <= write_data;
|
||||
end
|
||||
for (i = 0; i < RPORTS; i++) begin
|
||||
if (release_slot[i]) begin
|
||||
assert(0 == free_slots[read_addr[i]]);
|
||||
end
|
||||
end
|
||||
free_slots <= free_slots_n;
|
||||
write_addr_r <= free_index;
|
||||
full_r <= ~free_valid;
|
||||
end
|
||||
end
|
||||
|
||||
assign write_addr = write_addr_r;
|
||||
assign full = full_r;
|
||||
|
||||
endmodule
|
||||
@@ -1,3 +1,6 @@
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_countones #(
|
||||
parameter N = 10
|
||||
) (
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_divide #(
|
||||
parameter WIDTHN = 1,
|
||||
@@ -12,6 +12,7 @@ module VX_divide #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire clk_en,
|
||||
input wire [WIDTHN-1:0] numer,
|
||||
input wire [WIDTHD-1:0] denom,
|
||||
|
||||
@@ -31,7 +32,7 @@ module VX_divide #(
|
||||
.quotient (quotient_unqual),
|
||||
.remain (remainder_unqual),
|
||||
.aclr (1'b0),
|
||||
.clken (1'b1)
|
||||
.clken (clk_en)
|
||||
);
|
||||
|
||||
defparam
|
||||
@@ -43,8 +44,8 @@ module VX_divide #(
|
||||
quartus_div.lpm_hint = "MAXIMIZE_SPEED=6,LPM_REMAINDERPOSITIVE=FALSE",
|
||||
quartus_div.lpm_pipeline = PIPELINE;
|
||||
|
||||
assign quotient = quotient_unqual[WIDTHQ-1:0];
|
||||
assign remainder = remainder_unqual[WIDTHR-1:0];
|
||||
assign quotient = quotient_unqual [WIDTHQ-1:0];
|
||||
assign remainder = remainder_unqual [WIDTHR-1:0];
|
||||
|
||||
`else
|
||||
|
||||
@@ -82,8 +83,8 @@ module VX_divide #(
|
||||
end
|
||||
|
||||
if (PIPELINE == 0) begin
|
||||
assign quotient = quotient_unqual[WIDTHQ-1:0];
|
||||
assign remainder = remainder_unqual[WIDTHR-1:0];
|
||||
assign quotient = quotient_unqual [WIDTHQ-1:0];
|
||||
assign remainder = remainder_unqual [WIDTHR-1:0];
|
||||
end else begin
|
||||
reg [WIDTHN-1:0] quotient_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHD-1:0] remainder_pipe [0:PIPELINE-1];
|
||||
@@ -95,14 +96,14 @@ module VX_divide #(
|
||||
quotient_pipe[i] <= 0;
|
||||
remainder_pipe[i] <= 0;
|
||||
end
|
||||
else begin
|
||||
else if (clk_en) begin
|
||||
if (i == 0) begin
|
||||
quotient_pipe[0] <= quotient_unqual;
|
||||
remainder_pipe[0] <= remainder_unqual;
|
||||
quotient_pipe[i] <= quotient_unqual;
|
||||
remainder_pipe[i] <= remainder_unqual;
|
||||
end else begin
|
||||
quotient_pipe[i] <= quotient_pipe[i-1];
|
||||
remainder_pipe[i] <= remainder_pipe[i-1];
|
||||
end
|
||||
remainder_pipe[i] <= remainder_pipe[i-1];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_fair_arbiter #(
|
||||
parameter N = 1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_fixed_arbiter #(
|
||||
parameter N = 1
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_generic_queue #(
|
||||
parameter DATAW = 0,
|
||||
parameter SIZE = 1,
|
||||
parameter BUFFERED_OUTPUT = 1
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 16,
|
||||
parameter BUFFERED = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
@@ -52,13 +52,9 @@ module VX_generic_queue #(
|
||||
|
||||
end else begin // (SIZE > 1)
|
||||
|
||||
`ifdef QUEUE_FORCE_MLAB
|
||||
(* syn_ramstyle = "mlab" *) reg [DATAW-1:0] data [SIZE-1:0];
|
||||
`else
|
||||
reg [DATAW-1:0] data [SIZE-1:0];
|
||||
`endif
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0];
|
||||
|
||||
if (0 == BUFFERED_OUTPUT) begin
|
||||
if (0 == BUFFERED) begin
|
||||
|
||||
reg [`LOG2UP(SIZE):0] rd_ptr_r;
|
||||
reg [`LOG2UP(SIZE):0] wr_ptr_r;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_generic_register #(
|
||||
parameter N = 1,
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
|
||||
module VX_generic_stack #(
|
||||
parameter WIDTH = 1,
|
||||
parameter DEPTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
input reg [WIDTH - 1:0] q1,
|
||||
input reg [WIDTH - 1:0] q2,
|
||||
output wire[WIDTH - 1:0] d
|
||||
);
|
||||
|
||||
reg [DEPTH - 1:0] ptr;
|
||||
reg [WIDTH - 1:0] stack [0:(1 << DEPTH) - 1];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
ptr <= 0;
|
||||
end else if (push) begin
|
||||
stack[ptr] <= q1;
|
||||
stack[ptr+1] <= q2;
|
||||
ptr <= ptr + 2;
|
||||
end else if (pop) begin
|
||||
ptr <= ptr - 1;
|
||||
end
|
||||
end
|
||||
|
||||
assign d = stack[ptr - 1];
|
||||
|
||||
endmodule
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_index_queue #(
|
||||
parameter DATAW = 1,
|
||||
@@ -15,7 +15,7 @@ module VX_index_queue #(
|
||||
input wire [`LOG2UP(SIZE)-1:0] read_addr,
|
||||
output wire [DATAW-1:0] read_data
|
||||
);
|
||||
reg [DATAW-1:0] data [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0];
|
||||
reg [SIZE-1:0] valid;
|
||||
reg [`LOG2UP(SIZE):0] rd_ptr, wr_ptr;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_matrix_arbiter #(
|
||||
parameter N = 1
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_mult #(
|
||||
module VX_multiplier #(
|
||||
parameter WIDTHA = 1,
|
||||
parameter WIDTHB = 1,
|
||||
parameter WIDTHP = 1,
|
||||
@@ -10,6 +10,7 @@ module VX_mult #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire clk_en,
|
||||
input wire [WIDTHA-1:0] dataa,
|
||||
input wire [WIDTHB-1:0] datab,
|
||||
output wire [WIDTHP-1:0] result
|
||||
@@ -24,7 +25,7 @@ module VX_mult #(
|
||||
.result (result),
|
||||
.sclr (reset),
|
||||
.aclr (1'b0),
|
||||
.clken (1'b1),
|
||||
.clken (clk_en),
|
||||
.sum (1'b0)
|
||||
);
|
||||
|
||||
@@ -49,7 +50,7 @@ module VX_mult #(
|
||||
assign result = result_unqual;
|
||||
end else begin
|
||||
|
||||
reg [WIDTHP-1:0] result_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHP-1:0] result_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE; i++) begin
|
||||
@@ -57,12 +58,12 @@ module VX_mult #(
|
||||
if (reset) begin
|
||||
result_pipe[i] <= 0;
|
||||
end
|
||||
else begin
|
||||
else if (clk_en) begin
|
||||
if (i == 0) begin
|
||||
result_pipe[0] <= result_unqual;
|
||||
result_pipe[i] <= result_unqual;
|
||||
end else begin
|
||||
result_pipe[i] <= result_pipe[i-1];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_onehot_encoder #(
|
||||
parameter N = 6
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_priority_encoder #(
|
||||
parameter N = 1
|
||||
@@ -8,6 +8,7 @@ module VX_priority_encoder #(
|
||||
output reg valid_out
|
||||
);
|
||||
integer i;
|
||||
|
||||
always @(*) begin
|
||||
data_out = 0;
|
||||
valid_out = 0;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_rr_arbiter #(
|
||||
parameter N = 1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_scope #(
|
||||
parameter DATAW = 64,
|
||||
|
||||
50
hw/rtl/libs/VX_shift_register.v
Normal file
50
hw/rtl/libs/VX_shift_register.v
Normal file
@@ -0,0 +1,50 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_shift_register #(
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire enable,
|
||||
input wire [DATAW-1:0] in,
|
||||
output wire [DATAW-1:0] out
|
||||
);
|
||||
if (0 == DEPTH) begin
|
||||
|
||||
assign out = in;
|
||||
|
||||
end if (1 == DEPTH) begin
|
||||
|
||||
reg [DATAW-1:0] ram;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
ram <= '0;
|
||||
end else begin
|
||||
if (enable) begin
|
||||
ram <= in;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign out = ram;
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DEPTH-1:0][DATAW-1:0] ram;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
ram <= '0;
|
||||
end else begin
|
||||
if (enable) begin
|
||||
ram <= {ram[DEPTH-2:0], in};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign out = ram [DEPTH-1];
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,8 +1,8 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_tex_mgr (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire reset
|
||||
);
|
||||
|
||||
//--
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_tex_unit #(
|
||||
parameter TADDRW = 32,
|
||||
@@ -11,7 +11,7 @@ module VX_tex_unit #(
|
||||
parameter MAXAMW = 2,
|
||||
parameter TAGW = 16,
|
||||
|
||||
parameter NUMCRQS = 32,
|
||||
parameter NUMCRQS = 32
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
@@ -17,7 +17,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/simulate
|
||||
FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
|
||||
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/fp_cores -I../rtl/simulate $(FPU_INCLUDE)
|
||||
|
||||
SRCS = simulator.cpp testbench.cpp
|
||||
|
||||
@@ -28,30 +29,31 @@ CF += -std=c++11 -fms-extensions -I../..
|
||||
VF += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VF += -Wno-DECLFILENAME
|
||||
VF += --x-initial unique --x-assign unique
|
||||
VF += -exe $(SRCS) $(INCLUDE)
|
||||
VF += --exe $(SRCS) $(INCLUDE)
|
||||
VF += --cc Vortex.v --top-module Vortex
|
||||
VF += verilator.vlt
|
||||
|
||||
DBG += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
DBG += -DDBG_CORE_REQ_INFO
|
||||
|
||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
|
||||
gen-s:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
|
||||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
|
||||
|
||||
gen-sd:
|
||||
verilator $(VF) -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(SINGLECORE)' --trace $(DBG)
|
||||
verilator $(VF) $(SINGLECORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(SINGLECORE)' --trace $(DBG)
|
||||
|
||||
gen-st:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
|
||||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
|
||||
|
||||
gen-m:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
|
||||
gen-md:
|
||||
verilator $(VF) -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(MULTICORE)' --trace $(DBG)
|
||||
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(MULTICORE)' --trace $(DBG)
|
||||
|
||||
gen-mt:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
|
||||
build-s: gen-s
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
||||
@@ -24,7 +24,8 @@ Simulator::Simulator() {
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
trace_ = new VerilatedVcdC;
|
||||
trace_ = new VerilatedVcdC();
|
||||
trace_->set_time_unit("1ns");
|
||||
vortex_->trace(trace_, 99);
|
||||
trace_->open("trace.vcd");
|
||||
#endif
|
||||
@@ -105,9 +106,8 @@ void Simulator::eval_dram_bus() {
|
||||
if (!dram_rsp_active_) {
|
||||
if (dequeue_index != -1) {
|
||||
vortex_->dram_rsp_valid = 1;
|
||||
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
|
||||
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
|
||||
free(dram_rsp_vec_[dequeue_index].data);
|
||||
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].block.data(), GLOBAL_BLOCK_SIZE);
|
||||
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
|
||||
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
|
||||
dram_rsp_active_ = true;
|
||||
} else {
|
||||
@@ -141,9 +141,8 @@ void Simulator::eval_dram_bus() {
|
||||
} else {
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.data = (uint8_t*)malloc(GLOBAL_BLOCK_SIZE);
|
||||
dram_req.tag = vortex_->dram_req_tag;
|
||||
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.data);
|
||||
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
|
||||
dram_rsp_vec_.push_back(dram_req);
|
||||
}
|
||||
}
|
||||
@@ -211,7 +210,7 @@ void Simulator::wait(uint32_t cycles) {
|
||||
}
|
||||
}
|
||||
|
||||
bool Simulator::is_busy() {
|
||||
bool Simulator::is_busy() const {
|
||||
return vortex_->busy || snp_req_active_;
|
||||
}
|
||||
|
||||
@@ -238,7 +237,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
#endif
|
||||
}
|
||||
|
||||
bool Simulator::run() {
|
||||
void Simulator::run() {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] run()" << std::endl;
|
||||
#endif
|
||||
@@ -253,20 +252,15 @@ bool Simulator::run() {
|
||||
}
|
||||
|
||||
// wait 5 cycles to flush the pipeline
|
||||
this->wait(5);
|
||||
this->wait(5);
|
||||
}
|
||||
|
||||
// check riscv-tests PASSED/FAILED status
|
||||
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
|
||||
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
|
||||
#else
|
||||
#if (NUM_CLUSTERS == 1)
|
||||
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
|
||||
#else
|
||||
int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return (status == 1);
|
||||
int Simulator::get_last_wb_value(int reg) const {
|
||||
#if (NUM_CLUSTERS != 1)
|
||||
return (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
#else
|
||||
return (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
#endif
|
||||
}
|
||||
|
||||
void Simulator::load_bin(const char* program_file) {
|
||||
|
||||
@@ -15,13 +15,13 @@
|
||||
#include <vector>
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 100
|
||||
#define DRAM_LATENCY 4
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
uint8_t *data;
|
||||
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
|
||||
unsigned tag;
|
||||
} dram_req_t;
|
||||
|
||||
@@ -34,7 +34,7 @@ public:
|
||||
void load_bin(const char* program_file);
|
||||
void load_ihex(const char* program_file);
|
||||
|
||||
bool is_busy();
|
||||
bool is_busy() const;
|
||||
|
||||
void reset();
|
||||
void step();
|
||||
@@ -43,7 +43,8 @@ public:
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool run();
|
||||
void run();
|
||||
int get_last_wb_value(int reg) const;
|
||||
void print_stats(std::ostream& out);
|
||||
|
||||
private:
|
||||
|
||||
@@ -3,100 +3,162 @@
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if(argc == 1) {
|
||||
bool passed = true;
|
||||
int main(int argc, char **argv) {
|
||||
if (argc == 1) {
|
||||
#define ALL_TESTS
|
||||
#ifdef ALL_TESTS
|
||||
bool passed = true;
|
||||
|
||||
std::string tests[] = {
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-add.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-addi.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-and.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-andi.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-auipc.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-beq.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-bge.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-bgeu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-blt.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-bltu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-bne.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-jal.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-jalr.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-lb.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-lbu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-lh.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-lhu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-lui.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-lw.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-or.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-ori.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-sb.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-sh.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-simple.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-sll.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-slli.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-slt.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-slti.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-sltiu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-sltu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-sra.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-srai.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-srl.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-srli.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-sub.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-sw.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-xor.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-xori.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-div.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-divu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-mul.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-mulh.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-mulhsu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-mulhu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-rem.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-remu.hex"
|
||||
};
|
||||
std::string tests[] = {
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-add.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-and.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-auipc.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-beq.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bge.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bgeu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-blt.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bltu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bne.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-jal.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-jalr.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lb.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lbu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lh.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lhu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lui.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lw.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-or.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-ori.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sb.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sh.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-simple.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sll.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-slli.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-slt.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-slti.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sltiu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sltu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sra.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-srai.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-srl.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-srli.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sub.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sw.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-xor.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-xori.hex",
|
||||
#ifdef EXT_M_ENABLE
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-div.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-divu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-mul.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-mulh.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-mulhsu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-mulhu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-rem.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-remu.hex",
|
||||
#endif
|
||||
};
|
||||
|
||||
for (std::string test : tests) {
|
||||
std::cerr << DEFAULT << "\n---------------------------------------\n";
|
||||
std::string tests_fp[] = {
|
||||
#ifdef EXT_F_ENABLE
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fadd.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fdiv.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fmadd.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fmin.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fcmp.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-ldst.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fcvt.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fcvt_w.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fclass.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-move.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-recoding.hex",
|
||||
#endif
|
||||
};
|
||||
|
||||
std::cerr << test << std::endl;
|
||||
for (std::string test : tests) {
|
||||
std::cerr << DEFAULT << "\n---------------------------------------\n";
|
||||
|
||||
RAM ram;
|
||||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
simulator.load_ihex(test.c_str());
|
||||
bool curr = simulator.run();
|
||||
std::cerr << test << std::endl;
|
||||
|
||||
if (curr) std::cerr << GREEN << "Test Passed: " << test << std::endl;
|
||||
if (!curr) std::cerr << RED << "Test Failed: " << test << std::endl;
|
||||
std::cerr << DEFAULT;
|
||||
passed = passed && curr;
|
||||
}
|
||||
RAM ram;
|
||||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
simulator.load_ihex(test.c_str());
|
||||
simulator.run();
|
||||
|
||||
std::cerr << DEFAULT << "\n***************************************\n";
|
||||
bool status = (1 == simulator.get_last_wb_value(3));
|
||||
|
||||
if (passed) std::cerr << DEFAULT << "PASSED ALL TESTS\n";
|
||||
if (!passed) std::cerr << DEFAULT << "Failed one or more tests\n";
|
||||
|
||||
return !passed;
|
||||
if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl;
|
||||
if (!status) std::cerr << RED << "Test Failed: " << test << std::endl;
|
||||
std::cerr << DEFAULT;
|
||||
passed = passed && status;
|
||||
if (!passed)
|
||||
break;
|
||||
}
|
||||
|
||||
else {
|
||||
char* test = argv[2];
|
||||
for (std::string test : tests_fp) {
|
||||
std::cerr << DEFAULT << "\n---------------------------------------\n";
|
||||
|
||||
std::cerr << test << std::endl;
|
||||
|
||||
RAM ram;
|
||||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
simulator.load_ihex(test.c_str());
|
||||
simulator.run();
|
||||
|
||||
bool status = (1 == simulator.get_last_wb_value(3));
|
||||
|
||||
if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl;
|
||||
if (!status) std::cerr << RED << "Test Failed: " << test << std::endl;
|
||||
std::cerr << DEFAULT;
|
||||
passed = passed && status;
|
||||
if (!passed)
|
||||
break;
|
||||
}
|
||||
|
||||
std::cerr << DEFAULT << "\n***************************************\n";
|
||||
|
||||
if (passed) std::cerr << DEFAULT << "PASSED ALL TESTS\n";
|
||||
if (!passed) std::cerr << DEFAULT << "Failed one or more tests\n";
|
||||
|
||||
return !passed;
|
||||
|
||||
#else
|
||||
|
||||
char test[] = "../../../runtime/tests/simple/vx_simple.hex";
|
||||
|
||||
std::cerr << test << std::endl;
|
||||
|
||||
RAM ram;
|
||||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
simulator.load_ihex(test);
|
||||
simulator.run();
|
||||
|
||||
bool status = (1 == simulator.get_last_wb_value(3));
|
||||
|
||||
if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl;
|
||||
if (!status) std::cerr << RED << "Test Failed: " << test << std::endl;
|
||||
|
||||
return !status;
|
||||
|
||||
#endif
|
||||
|
||||
} else {
|
||||
|
||||
char* test = argv[2];
|
||||
|
||||
std::cerr << test << std::endl;
|
||||
std::cerr << test << std::endl;
|
||||
|
||||
RAM ram;
|
||||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
simulator.load_ihex(test);
|
||||
bool curr = simulator.run();
|
||||
|
||||
if (curr) std::cerr << GREEN << "Test Passed: " << test << std::endl;
|
||||
if (!curr) std::cerr << RED << "Test Failed: " << test << std::endl;
|
||||
|
||||
return !curr;
|
||||
}
|
||||
RAM ram;
|
||||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
simulator.load_ihex(test);
|
||||
simulator.run();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user