diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 38fe3d60..60061f46 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -16,12 +16,12 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CORE_REQ_INFO -#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 +CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DEBUG=1 +#DEBUG=1 #AFU=1 CFLAGS += -fPIC diff --git a/hw/opae/Makefile b/hw/opae/Makefile index eddd6849..339f9d37 100644 --- a/hw/opae/Makefile +++ b/hw/opae/Makefile @@ -4,13 +4,16 @@ FPGA_BUILD_DIR=build_fpga all: ase-1c -ase-1c: setup-ase-1c +sources.txt: + ./gen_sources.sh + +ase-1c: setup-ase-1c sources.txt make -C $(ASE_BUILD_DIR)_1c -ase-2c: setup-ase-2c +ase-2c: setup-ase-2c sources.txt make -C $(ASE_BUILD_DIR)_2c -ase-4c: setup-ase-4c +ase-4c: setup-ase-4c sources.txt make -C $(ASE_BUILD_DIR)_4c setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile @@ -28,13 +31,13 @@ $(ASE_BUILD_DIR)_2c/Makefile: $(ASE_BUILD_DIR)_4c/Makefile: afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c -fpga-1c: setup-fpga-1c +fpga-1c: setup-fpga-1c sources.txt cd $(FPGA_BUILD_DIR)_1c && qsub-synth -fpga-2c: setup-fpga-2c +fpga-2c: setup-fpga-2c sources.txt cd $(FPGA_BUILD_DIR)_2c && qsub-synth -fpga-4c: setup-fpga-4c +fpga-4c: setup-fpga-4c sources.txt cd $(FPGA_BUILD_DIR)_4c && qsub-synth setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf diff --git a/hw/opae/README b/hw/opae/README index 86e6f862..f480cc92 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -60,8 +60,8 @@ qsub-sim make ase # tests -./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -./run_ase.sh build_ase_1c ../../driver/tests/demo/demo +./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256 +./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16 ./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd # modify "vsim_run.tcl" to dump VCD trace diff --git a/hw/opae/gen_sources.sh b/hw/opae/gen_sources.sh new file mode 100755 index 00000000..deb8cdbb --- /dev/null +++ b/hw/opae/gen_sources.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl' + +inc_list="" +for dir in $dir_list; do + inc_list="$inc_list -I$dir" +done + +echo "inc_list=$inc_list" + +{ + # read design sources + for dir in $dir_list; do + echo "+incdir+$dir" + for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f) + do + echo $file + done + done +} > sources.txt \ No newline at end of file diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 0b448e88..ae14d127 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -1,46 +1,34 @@ -vortex_afu.json - -QI:vortex_afu.qsf - -#+define+SCOPE - -#+define+DBG_PRINT_CORE_ICACHE -#+define+DBG_PRINT_CORE_DCACHE -#+define+DBG_PRINT_CACHE_BANK -#+define+DBG_PRINT_CACHE_SNP -#+define+DBG_PRINT_CACHE_MSRQ -#+define+DBG_PRINT_DRAM -#+define+DBG_PRINT_PIPELINE -#+define+DBG_PRINT_OPAE -#+define+DBG_PRINT_SCOPE - -+incdir+. -+incdir+../rtl -+incdir+../rtl/interfaces -+incdir+../rtl/pipe_regs -+incdir+../rtl/cache +incdir+../rtl/libs - -../rtl/VX_user_config.vh -../rtl/VX_config.vh -../rtl/VX_define.vh - -../rtl/cache/VX_cache_config.vh -../rtl/cache/VX_cache.v -../rtl/cache/VX_cache_core_rsp_merge.v -../rtl/cache/VX_cache_core_req_bank_sel.v -../rtl/cache/VX_cache_dram_req_arb.v -../rtl/cache/VX_cache_dram_fill_arb.v -../rtl/cache/VX_cache_miss_resrv.v +../rtl/libs/VX_countones.v +../rtl/libs/VX_divide.v +../rtl/libs/VX_fair_arbiter.v +../rtl/libs/VX_fixed_arbiter.v +../rtl/libs/VX_generic_queue.v +../rtl/libs/VX_generic_register.v +../rtl/libs/VX_generic_stack.v +../rtl/libs/VX_index_queue.v +../rtl/libs/VX_matrix_arbiter.v +../rtl/libs/VX_mult.v +../rtl/libs/VX_priority_encoder.v +../rtl/libs/VX_rr_arbiter.v +../rtl/libs/VX_onehot_encooder.v ++incdir+../rtl/cache ../rtl/cache/VX_bank.v ../rtl/cache/VX_bank_core_req_arb.v +../rtl/cache/VX_cache.v +../rtl/cache/VX_cache_core_req_bank_sel.v +../rtl/cache/VX_cache_core_rsp_merge.v +../rtl/cache/VX_cache_dram_fill_arb.v +../rtl/cache/VX_cache_dram_req_arb.v +../rtl/cache/VX_cache_miss_resrv.v +../rtl/cache/VX_prefetcher.v +../rtl/cache/VX_snp_forwarder.v ../rtl/cache/VX_snp_rsp_arb.v ../rtl/cache/VX_tag_data_access.v ../rtl/cache/VX_tag_data_structure.v -../rtl/cache/VX_snp_forwarder.v -../rtl/cache/VX_prefetcher.v - -../rtl/interfaces/VX_branch_rsp_if.v ++incdir+../rtl/interfaces +../rtl/interfaces/VX_alu_req_if.v +../rtl/interfaces/VX_branch_ctl_if.v ../rtl/interfaces/VX_cache_core_req_if.v ../rtl/interfaces/VX_cache_core_rsp_if.v ../rtl/interfaces/VX_cache_dram_req_if.v @@ -48,65 +36,46 @@ QI:vortex_afu.qsf ../rtl/interfaces/VX_cache_snp_req_if.v ../rtl/interfaces/VX_cache_snp_rsp_if.v ../rtl/interfaces/VX_csr_req_if.v +../rtl/interfaces/VX_commit_if.v ../rtl/interfaces/VX_csr_io_req_if.v -../rtl/interfaces/VX_csr_io_rsp_if.v -../rtl/interfaces/VX_exec_unit_req_if.v -../rtl/interfaces/VX_backend_req_if.v -../rtl/interfaces/VX_gpr_read_if.v -../rtl/interfaces/VX_gpu_inst_req_if.v -../rtl/interfaces/VX_inst_meta_if.v -../rtl/interfaces/VX_jal_rsp_if.v +../rtl/interfaces/VX_decode_if.v +../rtl/interfaces/VX_gpr_data_if.v +../rtl/interfaces/VX_gpu_req_if.v ../rtl/interfaces/VX_join_if.v ../rtl/interfaces/VX_lsu_req_if.v ../rtl/interfaces/VX_warp_ctl_if.v ../rtl/interfaces/VX_wb_if.v ../rtl/interfaces/VX_wstall_if.v - -../rtl/libs/VX_generic_register.v -../rtl/libs/VX_mult.v -../rtl/libs/VX_divide.v -../rtl/libs/VX_generic_stack.v -../rtl/libs/VX_priority_encoder.v -../rtl/libs/VX_generic_queue.v -../rtl/libs/VX_indexable_queue.v -../rtl/libs/VX_fair_arbiter.v -../rtl/libs/VX_fixed_arbiter.v -../rtl/libs/VX_rr_arbiter.v -../rtl/libs/VX_countones.v -../rtl/libs/VX_scope.v - -../rtl/Vortex.v +../rtl/interfaces/VX_csr_io_rsp_if.v +../rtl/interfaces/VX_ifetch_req_if.v +../rtl/interfaces/VX_ifetch_rsp_if.v +../rtl/interfaces/VX_mul_req_if.v +../rtl/interfaces/VX_perf_cntrs_if.v ++incdir+../rtl +../rtl/VX_alu_unit.v +../rtl/VX_commit.v ../rtl/VX_cluster.v ../rtl/VX_core.v -../rtl/VX_mem_unit.v -../rtl/VX_pipeline.v -../rtl/VX_front_end.v -../rtl/VX_back_end.v -../rtl/VX_fetch.v -../rtl/VX_scheduler.v -../rtl/VX_exec_unit.v -../rtl/VX_warp.v -../rtl/VX_icache_stage.v -../rtl/VX_gpr_wrapper.v -../rtl/VX_gpu_inst.v -../rtl/VX_writeback.v -../rtl/VX_csr_pipe.v ../rtl/VX_csr_data.v ../rtl/VX_csr_arb.v +../rtl/VX_dcache_arb.v +../rtl/VX_decode.v ../rtl/VX_csr_io_arb.v -../rtl/VX_warp_sched.v +../rtl/VX_fetch.v +../rtl/VX_csr_unit.v ../rtl/VX_gpr_ram.v ../rtl/VX_gpr_stage.v -../rtl/VX_alu_unit.v +../rtl/VX_execute.v +../rtl/VX_gpu_unit.v +../rtl/VX_icache_stage.v +../rtl/VX_issue.v ../rtl/VX_lsu_unit.v -../rtl/VX_decode.v -../rtl/VX_inst_multiplex.v -../rtl/VX_dcache_arb.v ../rtl/VX_mem_arb.v -../rtl/VX_f_d_reg.v -../rtl/VX_i_d_reg.v -../rtl/VX_d_e_reg.v - -ccip_interface_reg.sv -ccip_std_afu.sv -vortex_afu.sv \ No newline at end of file +../rtl/VX_mem_unit.v +../rtl/VX_pipeline.v +../rtl/VX_scheduler.v +../rtl/VX_issue_mux.v +../rtl/VX_warp_sched.v +../rtl/VX_writeback.v +../rtl/Vortex.v +../rtl/VX_mul_unit.v diff --git a/hw/opae/sources_1c.txt b/hw/opae/sources_1c.txt index 8cbd9b8f..b40f7162 100644 --- a/hw/opae/sources_1c.txt +++ b/hw/opae/sources_1c.txt @@ -1,3 +1,21 @@ +define+NUM_CORES=1 +#+define+SCOPE + +#+define+DBG_PRINT_CORE_ICACHE +#+define+DBG_PRINT_CORE_DCACHE +#+define+DBG_PRINT_CACHE_BANK +#+define+DBG_PRINT_CACHE_SNP +#+define+DBG_PRINT_CACHE_MSRQ +#+define+DBG_PRINT_DRAM +#+define+DBG_PRINT_PIPELINE +#+define+DBG_PRINT_OPAE +#+define+DBG_PRINT_SCOPE + +vortex_afu.json +QI:vortex_afu.qsf +ccip_interface_reg.sv +ccip_std_afu.sv +vortex_afu.sv + C:sources.txt \ No newline at end of file diff --git a/hw/opae/sources_2c.txt b/hw/opae/sources_2c.txt index d32f448f..ca991ef9 100644 --- a/hw/opae/sources_2c.txt +++ b/hw/opae/sources_2c.txt @@ -1,4 +1,10 @@ +define+NUM_CORES=2 +define+L2_ENABLE=0 +vortex_afu.json +QI:vortex_afu.qsf +ccip_interface_reg.sv +ccip_std_afu.sv +vortex_afu.sv + C:sources.txt \ No newline at end of file diff --git a/hw/opae/sources_4c.txt b/hw/opae/sources_4c.txt index 03959c74..6ee3aa06 100644 --- a/hw/opae/sources_4c.txt +++ b/hw/opae/sources_4c.txt @@ -1,4 +1,10 @@ +define+NUM_CORES=4 +define+L2_ENABLE=0 +vortex_afu.json +QI:vortex_afu.qsf +ccip_interface_reg.sv +ccip_std_afu.sv +vortex_afu.sv + C:sources.txt \ No newline at end of file diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index ff4c2b05..2f58a8bc 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -13,7 +13,7 @@ module VX_alu_unit #( VX_branch_ctl_if branch_ctl_if, VX_commit_if alu_commit_if ); - wire [`NUM_THREADS-1:0][31:0] alu_result; + reg [`NUM_THREADS-1:0][31:0] alu_result; wire [`NUM_THREADS-1:0][32:0] sub_result; wire [`NUM_THREADS-1:0][32:0] shift_result; @@ -99,7 +99,7 @@ module VX_alu_unit #( ); VX_generic_register #( - .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)), + .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)) ) alu_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index b1b8759d..8751847e 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -76,7 +76,7 @@ `define CSR_WIDTH 12 -`define DIV_LATENCY 2 +`define DIV_LATENCY 21 `define MUL_LATENCY 2 @@ -390,6 +390,8 @@ /////////////////////////////////////////////////////////////////////////////// + + task print_ex_type; input [`EX_BITS-1:0] ex; begin diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index fb0fe514..7cb84c7e 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -30,6 +30,7 @@ module VX_lsu_unit #( wire [`NW_BITS-1:0] use_warp_num; wire [`WB_BITS-1:0] use_wb; wire [31:0] use_pc; + wire mrq_full; genvar i; @@ -83,8 +84,7 @@ module VX_lsu_unit #( wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr; wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset; wire [`BYTEEN_BITS-1:0] core_rsp_mem_read; - wire mrq_full; - + wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready && (0 == use_req_rw); // only push read requests diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index f6a6976d..4ebe1cbd 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -12,7 +12,7 @@ module VX_mul_unit #( // Outputs VX_commit_if mul_commit_if ); - wire [`NUM_THREADS-1:0][31:0] alu_result; + reg [`NUM_THREADS-1:0][31:0] alu_result; wire [`NUM_THREADS-1:0][63:0] mul_result; wire [`NUM_THREADS-1:0][31:0] div_result; wire [`NUM_THREADS-1:0][31:0] rem_result; @@ -77,6 +77,8 @@ module VX_mul_unit #( end end + wire stall; + reg result_avail; reg [4:0] pending_ctr; wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `DIV_LATENCY : `MUL_LATENCY; @@ -104,13 +106,13 @@ module VX_mul_unit #( wire pipeline_stall = ~result_avail && (| mul_req_if.valid); - wire stall = (~mul_commit_if.ready && (| mul_commit_if.valid)) - || pipeline_stall; + assign stall = (~mul_commit_if.ready && (| mul_commit_if.valid)) + || pipeline_stall; wire flush = mul_commit_if.ready && pipeline_stall; VX_generic_register #( - .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)), + .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)) ) mul_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index a0040a5b..571244d8 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -18,7 +18,6 @@ module VX_warp_sched #( ); wire update_use_wspawn; wire update_visible_active; - wire scheduled_warp; wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0]; diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index dfbf0e4a..a606c15f 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -139,54 +139,54 @@ module Vortex ( end else begin - wire per_cluster_dram_req_valid [`NUM_CLUSTERS-1:0]; - wire per_cluster_dram_req_rw [`NUM_CLUSTERS-1:0]; - wire [`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen [`NUM_CLUSTERS-1:0]; - wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr [`NUM_CLUSTERS-1:0]; - wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data [`NUM_CLUSTERS-1:0]; - wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag [`NUM_CLUSTERS-1:0]; - wire l3_core_req_ready; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag; + wire l3_core_req_ready; - wire per_cluster_dram_rsp_valid [`NUM_CLUSTERS-1:0]; - wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data [`NUM_CLUSTERS-1:0]; - wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag [`NUM_CLUSTERS-1:0]; - wire per_cluster_dram_rsp_ready [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; - wire per_cluster_snp_req_valid [`NUM_CLUSTERS-1:0]; - wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr [`NUM_CLUSTERS-1:0]; - wire per_cluster_snp_req_invalidate [`NUM_CLUSTERS-1:0]; - wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag [`NUM_CLUSTERS-1:0]; - wire per_cluster_snp_req_ready [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready; - wire per_cluster_snp_rsp_valid [`NUM_CLUSTERS-1:0]; - wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag [`NUM_CLUSTERS-1:0]; - wire per_cluster_snp_rsp_ready [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready; - wire per_cluster_io_req_valid [`NUM_CLUSTERS-1:0]; - wire per_cluster_io_req_rw [`NUM_CLUSTERS-1:0]; - wire [3:0] per_cluster_io_req_byteen [`NUM_CLUSTERS-1:0]; - wire [29:0] per_cluster_io_req_addr [`NUM_CLUSTERS-1:0]; - wire [31:0] per_cluster_io_req_data [`NUM_CLUSTERS-1:0]; - wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag [`NUM_CLUSTERS-1:0]; - wire per_cluster_io_req_ready [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_valid; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw; + wire [`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen; + wire [`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data; + wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready; - wire per_cluster_io_rsp_valid [`NUM_CLUSTERS-1:0]; - wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag [`NUM_CLUSTERS-1:0]; - wire [31:0] per_cluster_io_rsp_data [`NUM_CLUSTERS-1:0]; - wire per_cluster_io_rsp_ready [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid; + wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready; - wire per_cluster_csr_io_req_valid [`NUM_CLUSTERS-1:0]; - wire [11:0] per_cluster_csr_io_req_addr [`NUM_CLUSTERS-1:0]; - wire per_cluster_csr_io_req_rw [`NUM_CLUSTERS-1:0]; - wire [31:0] per_cluster_csr_io_req_data [`NUM_CLUSTERS-1:0]; - wire per_cluster_csr_io_req_ready [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid; + wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready; - wire per_cluster_csr_io_rsp_valid [`NUM_CLUSTERS-1:0]; - wire [31:0] per_cluster_csr_io_rsp_data [`NUM_CLUSTERS-1:0]; - wire per_cluster_csr_io_rsp_ready [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready; - wire per_cluster_busy [`NUM_CLUSTERS-1:0]; - wire per_cluster_ebreak [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] per_cluster_busy; + wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak; wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS)); wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid); @@ -336,27 +336,27 @@ module Vortex ( // L3 Cache /////////////////////////////////////////////////////////// - wire l3_core_req_valid [`L3NUM_REQUESTS-1:0]; - wire l3_core_req_rw [`L3NUM_REQUESTS-1:0]; - wire [`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen [`L3NUM_REQUESTS-1:0]; - wire [`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr [`L3NUM_REQUESTS-1:0]; - wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data [`L3NUM_REQUESTS-1:0]; - wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag [`L3NUM_REQUESTS-1:0]; + wire [`L3NUM_REQUESTS-1:0] l3_core_req_valid; + wire [`L3NUM_REQUESTS-1:0] l3_core_req_rw; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag; - wire l3_core_rsp_valid [`L3NUM_REQUESTS-1:0]; - wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data [`L3NUM_REQUESTS-1:0]; - wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag [`L3NUM_REQUESTS-1:0]; - wire l3_core_rsp_ready; + wire [`L3NUM_REQUESTS-1:0] l3_core_rsp_valid; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag; + wire l3_core_rsp_ready; - wire l3_snp_fwdout_valid [`NUM_CLUSTERS-1:0]; - wire [`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr [`NUM_CLUSTERS-1:0]; - wire l3_snp_fwdout_invalidate [`NUM_CLUSTERS-1:0]; - wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag [`NUM_CLUSTERS-1:0]; - wire l3_snp_fwdout_ready [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready; - wire l3_snp_fwdin_valid [`NUM_CLUSTERS-1:0]; - wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag [`NUM_CLUSTERS-1:0]; - wire l3_snp_fwdin_ready [`NUM_CLUSTERS-1:0]; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready; for (i = 0; i < `L3NUM_REQUESTS; i++) begin // Core Request diff --git a/hw/rtl/tex_unit/VX_tex_mgr.v b/hw/rtl/tex_unit/VX_tex_mgr.v index 42184037..0452e00f 100644 --- a/hw/rtl/tex_unit/VX_tex_mgr.v +++ b/hw/rtl/tex_unit/VX_tex_mgr.v @@ -2,7 +2,7 @@ module VX_tex_mgr ( input wire clk, - input wire reset, + input wire reset ); //-- diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index b7eef8d8..f400ad63 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -11,7 +11,7 @@ module VX_tex_unit #( parameter MAXAMW = 2, parameter TAGW = 16, - parameter NUMCRQS = 32, + parameter NUMCRQS = 32 ) ( input wire clk, input wire reset, diff --git a/hw/syn/yosys/synth.sh b/hw/syn/yosys/synth.sh index 621866f1..544bbad0 100755 --- a/hw/syn/yosys/synth.sh +++ b/hw/syn/yosys/synth.sh @@ -12,7 +12,7 @@ echo "inc_list=$inc_list" { # read design sources for dir in $dir_list; do - for file in $(find $dir -name '*.v' -o -name '*.sv' -type f) + for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f) do echo "read_verilog -sv $inc_list $file" done