From 134dd4eb59ec5195cf4b76ec3a3ca0a9066a3036 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 15 Nov 2023 21:58:40 -0800 Subject: [PATCH 01/36] Update BlackBox to include Vortex 2.0 --- src/main/scala/rocket/VortexCore.scala | 293 +++++++++++++++---------- 1 file changed, 178 insertions(+), 115 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index bfc3cbf..2c13e61 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -42,12 +42,12 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle // conditionally instantiate ports depending on whether we want to use VX_cache or not val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { - val a = Decoupled(new VortexBundleA(sourceWidth = 10, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 10, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(sourceWidth = 46, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 46, dataWidth = 32))) })) else None val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA(sourceWidth = 10, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 10, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(sourceWidth = 47, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 47, dataWidth = 32))) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { val a = Decoupled(new VortexBundleA(sourceWidth = 15, dataWidth = 128)) @@ -90,87 +90,153 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) // addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.v") // addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v") // addResource("/vsrc/vortex/hw/syn/modelsim/vortex_tb.v") - addResource("/vsrc/vortex/hw/rtl/VX_dispatch.sv") - addResource("/vsrc/vortex/hw/rtl/VX_issue.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh") - - addResource("/vsrc/vortex/hw/rtl/VX_warp_sched.sv") - // addResource("/vsrc/vortex/hw/rtl/Vortex.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_lerp.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_addr.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_mem.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_format.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sampler.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_unit.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_define.vh") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_wrap.sv") - addResource("/vsrc/vortex/hw/rtl/VX_scope.vh") - addResource("/vsrc/vortex/hw/rtl/VX_fpu_unit.sv") - addResource("/vsrc/vortex/hw/rtl/VX_scoreboard.sv") - addResource("/vsrc/vortex/hw/rtl/VX_writeback.sv") - addResource("/vsrc/vortex/hw/rtl/VX_muldiv.sv") - addResource("/vsrc/vortex/hw/rtl/VX_decode.sv") - addResource("/vsrc/vortex/hw/rtl/VX_ibuffer.sv") + addResource("/vsrc/vortex/hw/rtl/VX_gpu_pkg.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_cluster.sv") - addResource("/vsrc/vortex/hw/rtl/VX_icache_stage.sv") - addResource("/vsrc/vortex/hw/rtl/VX_gpu_unit.sv") - addResource("/vsrc/vortex/hw/rtl/VX_trace_instr.vh") - addResource("/vsrc/vortex/hw/rtl/VX_gpu_types.vh") addResource("/vsrc/vortex/hw/rtl/VX_config.vh") - // unused addResource("/vsrc/vortex/hw/rtl/libs/VX_mux.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_lzc.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_fifo_queue.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_scan.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_find_first.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_multiplier.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_remove.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_register.sv") - // unused addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_mux.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_encoder.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_reset_relay.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_popcount.sv") + addResource("/vsrc/vortex/hw/VX_config.h") + addResource("/vsrc/vortex/hw/rtl/VX_define.vh") + addResource("/vsrc/vortex/hw/rtl/VX_platform.vh") + addResource("/vsrc/vortex/hw/rtl/VX_scope.vh") + // addResource("/vsrc/vortex/hw/rtl/VX_socket.sv") + addResource("/vsrc/vortex/hw/rtl/VX_types.vh") + // addResource("/vsrc/vortex/hw/rtl/Vortex.sv") + + addResource("/vsrc/vortex/hw/rtl/core/VX_alu_unit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_commit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_core.sv") + // These are top modules used for unittests + // addResource("/vsrc/vortex/hw/rtl/core/VX_core_top.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_top.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_cluster_top.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_csr_data.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_csr_unit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_dcr_data.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_decode.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_dispatch.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_dispatch_unit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_execute.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_fetch.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_gather_unit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_ibuffer.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_int_unit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_ipdom_stack.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_issue.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_lsu_unit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_muldiv_unit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_operands.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_pending_instr.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_schedule.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_scoreboard.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_sfu_unit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_smem_unit.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_split_join.sv") + addResource("/vsrc/vortex/hw/rtl/core/VX_trace.vh") + addResource("/vsrc/vortex/hw/rtl/core/VX_wctl_unit.sv") + + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bank.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bypass.sv") + // need to disable this if VX_cache_cluster_top is disabled, otherwise causes + // unconnected port error + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_cluster.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_data.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_init.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_mshr.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_tags.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_wrap.sv") + + // gbar is only used in the socket/cluster hierarchy + // addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_arb.sv") + // addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_bus_if.sv") + // addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_unit.sv") + // Only used for caches + // addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_arb.sv") + addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_bus_if.sv") + // addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_perf_if.sv") + addResource("/vsrc/vortex/hw/rtl/mem/VX_shared_mem.sv") + addResource("/vsrc/vortex/hw/rtl/mem/VX_smem_switch.sv") + + + // tex_unit missing in Vortex 2.0 + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_lerp.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_addr.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_mem.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_format.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sampler.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_unit.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_define.vh") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_wrap.sv") + + addResource("/vsrc/vortex/hw/rtl/libs/VX_allocator.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_avs_adapter.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv") addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_insert.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_skid_buffer.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_fixed_arbiter.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_shift_register.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_index_buffer.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_encoder.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_matrix_arbiter.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_remove.sv") + // unused addResource("/vsrc/vortex/hw/rtl/libs/VX_bypass_buffer.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_cyclic_arbiter.sv") // unused addResource("/vsrc/vortex/hw/rtl/libs/VX_divider.sv") addResource("/vsrc/vortex/hw/rtl/libs/VX_dp_ram.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_elastic_adapter.sv") addResource("/vsrc/vortex/hw/rtl/libs/VX_elastic_buffer.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_rr_arbiter.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_arbiter.sv") - // unused addResource("/vsrc/vortex/hw/rtl/libs/VX_bypass_buffer.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_sp_ram.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_demux.sv") - - // unused addResource("/vsrc/vortex/hw/rtl/libs/VX_index_queue.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_div.sv") addResource("/vsrc/vortex/hw/rtl/libs/VX_fair_arbiter.sv") - addResource("/vsrc/vortex/hw/rtl/VX_define.vh") - addResource("/vsrc/vortex/hw/rtl/VX_csr_data.sv") - addResource("/vsrc/vortex/hw/rtl/VX_cache_arb.sv") - addResource("/vsrc/vortex/hw/rtl/VX_ipdom_stack.sv") - addResource("/vsrc/vortex/hw/rtl/VX_gpr_stage.sv") - addResource("/vsrc/vortex/hw/rtl/VX_execute.sv") - addResource("/vsrc/vortex/hw/rtl/VX_fetch.sv") - addResource("/vsrc/vortex/hw/rtl/VX_alu_unit.sv") - addResource("/vsrc/vortex/hw/rtl/VX_platform.vh") - addResource("/vsrc/vortex/hw/rtl/VX_commit.sv") - - addResource("/vsrc/vortex/hw/rtl/VX_pipeline.sv") - addResource("/vsrc/vortex/hw/rtl/VX_lsu_unit.sv") - addResource("/vsrc/vortex/hw/rtl/VX_csr_unit.sv") - // addResource("/vsrc/vortex/hw/rtl/Vortex_axi.sv") - // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_div.sv") - addResource("/vsrc/vortex/hw/VX_config.h") - addResource("/vsrc/vortex/sim/common/rvfloats.h") + addResource("/vsrc/vortex/hw/rtl/libs/VX_fifo_queue.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_find_first.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_generic_arbiter.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_index_buffer.sv") + // unused addResource("/vsrc/vortex/hw/rtl/libs/VX_index_queue.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_lzc.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_matrix_arbiter.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_mem_adapter.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_mem_rsp_sel.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_mem_scheduler.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_multiplier.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_mux.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_encoder.sv") + // unused addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_mux.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_pending_size.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_register.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_popcount.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_arbiter.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_encoder.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_reduce.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_reset_relay.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_rr_arbiter.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_scan.sv") + // These VX_scope_* seems to be used for FPGA debugging; if we leave them in, + // they cause elaboration errors + // addResource("/vsrc/vortex/hw/rtl/libs/VX_scope_switch.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_scope_tap.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_div.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_mul.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_shift_register.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_skid_buffer.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_sp_ram.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_arb.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_switch.sv") + addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_xbar.sv") + + addResource("/vsrc/vortex/hw/dpi/float_dpi.cpp") + addResource("/vsrc/vortex/hw/dpi/float_dpi.vh") + addResource("/vsrc/vortex/hw/dpi/util_dpi.cpp") + addResource("/vsrc/vortex/hw/dpi/util_dpi.vh") + // needed dpi cpp files + addResource("/vsrc/vortex/sim/common/bitmanip.h") + addResource("/vsrc/vortex/sim/common/mem.cpp") + addResource("/vsrc/vortex/sim/common/mem.h") + addResource("/vsrc/vortex/sim/common/mempool.h") addResource("/vsrc/vortex/sim/common/rvfloats.cpp") + addResource("/vsrc/vortex/sim/common/rvfloats.h") + addResource("/vsrc/vortex/sim/common/simobject.h") + addResource("/vsrc/vortex/sim/common/stringutil.h") + addResource("/vsrc/vortex/sim/common/util.cpp") + addResource("/vsrc/vortex/sim/common/util.h") + addResource("/vsrc/vortex/sim/common/uuid_gen.h") + // addResource("/csrc/softfloat_archive.a") addResource("/csrc/softfloat/include/internals.h") addResource("/csrc/softfloat/include/primitives.h") @@ -178,11 +244,22 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) addResource("/csrc/softfloat/include/softfloat.h") addResource("/csrc/softfloat/include/softfloat_types.h") addResource("/csrc/softfloat/RISCV/specialize.h") - addResource("/vsrc/vortex/hw/dpi/float_dpi.cpp") - addResource("/vsrc/vortex/hw/dpi/float_dpi.vh") - addResource("/vsrc/vortex/hw/dpi/util_dpi.cpp") - addResource("/vsrc/vortex/hw/dpi/util_dpi.vh") - addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_dpi.sv") + + // Vortex 2.0: fp_cores/ renamed to fpu/ + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_class.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_cvt.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_define.vh") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_div.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_dpi.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_dsp.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_fma.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_fpnew.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_ncomp.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_pkg.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_rounding.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_sqrt.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_to_csr_if.sv") + addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_unit.sv") // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_rounding.sv") // addResource("/vsrc/vortex/hw/rtl/fp_cores/altera/stratix10/dspba_delay_ver.sv") // addResource("/vsrc/vortex/hw/rtl/fp_cores/altera/stratix10/acl_fsqrt.sv") @@ -195,46 +272,32 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_class.sv") // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_fpnew.sv") // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_cvt.sv") - addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_define.vh") // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_fma.sv") // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_ncomp.sv") // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_fpga.sv") - addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_types.vh") + // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_types.vh") // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_sqrt.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_csr_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_join_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_cache_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_memsys_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_writeback_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpu_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_pipeline_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_cmt_to_csr_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_to_alu_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_alu_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ibuffer_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_branch_ctl_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_lsu_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_wstall_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_to_csr_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_csr_if.sv") addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_req_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_sched_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcr_bus_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_sched_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dispatch_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_execute_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fetch_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ibuffer_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_operands_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_pipeline_perf_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_sched_csr_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_schedule_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_sfu_csr_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_sfu_perf_if.sv") addResource("/vsrc/vortex/hw/rtl/interfaces/VX_warp_ctl_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fetch_to_csr_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_tex_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_req_if.sv") + addResource("/vsrc/vortex/hw/rtl/interfaces/VX_writeback_if.sv") + // addResource("/vsrc/vortex/hw/rtl/afu/vortex_afu.sv") // addResource("/vsrc/vortex/hw/rtl/afu/ccip_std_afu.sv") // addResource("/vsrc/vortex/hw/rtl/afu/vortex_afu.vh") @@ -262,7 +325,7 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) addResource("/vsrc/vortex/hw/rtl/VX_core.sv") addResource("/vsrc/vortex/hw/rtl/VX_core_wrapper.sv") } else { - addResource("/vsrc/vortex/hw/rtl/VX_pipeline_wrapper.sv") + addResource("/vsrc/vortex/hw/rtl/VX_core_wrapper.sv") } val nTotalRoCCCSRs = 0 From dca74eface696fdf0782354b47346ae9dbdc5384 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 15 Nov 2023 22:06:17 -0800 Subject: [PATCH 02/36] Bump vortex to 2.0 --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index e7a3db6..963c276 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit e7a3db6909e72b67e6d2327b46d2ac81ca6bb149 +Subproject commit 963c2765d99bcf58e95eeca11b3b82fa8491cc08 From 65f4264d5728ba84f6ea00abe869d71cbda85183 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 16 Nov 2023 18:00:40 -0800 Subject: [PATCH 03/36] Pass hang100 address to wrapper verilog --- src/main/scala/rocket/VortexCore.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 2c13e61..a82b631 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -70,7 +70,11 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) // Each Vortex core gets tied-off hartId of 0, 1, 2, 3, ... // The actual MHARTID read by the program is different by warp, not core; // see VX_csr_data that implements the read logic for CSR_MHARTID/GWID. - Map("CORE_ID" -> tile.tileParams.hartId) + Map( + "CORE_ID" -> tile.tileParams.hartId, + // TODO: can we get this as a parameter? + "BOOTROM_HANG100" -> 0x10100, + ) ) with HasBlackBoxResource { // addResource("/vsrc/vortex/hw/unit_tests/generic_queue/testbench.v") @@ -159,7 +163,6 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) addResource("/vsrc/vortex/hw/rtl/mem/VX_shared_mem.sv") addResource("/vsrc/vortex/hw/rtl/mem/VX_smem_switch.sv") - // tex_unit missing in Vortex 2.0 // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv") // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv") From 05ffa884a680fe312ee6d53735c25033b4b4fc4d Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 16 Nov 2023 18:00:56 -0800 Subject: [PATCH 04/36] Bump vortex with DCR fix --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index 963c276..e2d3d93 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit 963c2765d99bcf58e95eeca11b3b82fa8491cc08 +Subproject commit e2d3d93deab42b516c7e2725d667333d473a31e7 From 6802d2359825bdfaf6ab02551f9aa26b9c489d34 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Fri, 17 Nov 2023 19:12:03 -0800 Subject: [PATCH 05/36] Change dcache sourceWidth constant to match DCACHE_NOSM_TAG_WIDTH --- src/main/scala/rocket/VortexCore.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index a82b631..daac52f 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -46,8 +46,8 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 46, dataWidth = 32))) })) else None val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA(sourceWidth = 47, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 47, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(sourceWidth = 46, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 46, dataWidth = 32))) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { val a = Decoupled(new VortexBundleA(sourceWidth = 15, dataWidth = 128)) @@ -163,6 +163,7 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) addResource("/vsrc/vortex/hw/rtl/mem/VX_shared_mem.sv") addResource("/vsrc/vortex/hw/rtl/mem/VX_smem_switch.sv") + // tex_unit missing in Vortex 2.0 // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv") // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv") From 765c8ef1b095df6c72ba8f2c8b93b7940f15ffb7 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Fri, 17 Nov 2023 19:12:35 -0800 Subject: [PATCH 06/36] Remove unnecessary write ack filtering logic in VortexTLAdapter --- src/main/scala/tile/VortexTile.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 27ce62a..d4c56de 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -593,8 +593,8 @@ class VortexTLAdapter( io.outReq.bits.corrupt := 0.U io.inReq.ready := io.outReq.ready // VortexBundleD <> TLBundleD - // Do not reply to write requests; Vortex core does not expect ack on writes - io.inResp.valid := io.outResp.valid && edge.hasData(io.outResp.bits) + // Filtering out write requests is handled inside the wrapper Verilog + io.inResp.valid := io.outResp.valid io.inResp.bits.opcode := io.outResp.bits.opcode io.inResp.bits.size := io.outResp.bits.size io.inResp.bits.source := io.outResp.bits.source From 1346f74210d981e807aaeda9c9946de4eb3f845a Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Fri, 17 Nov 2023 19:13:48 -0800 Subject: [PATCH 07/36] Bump vortex with tag width fix --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index e2d3d93..faf5fe3 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit e2d3d93deab42b516c7e2725d667333d473a31e7 +Subproject commit faf5fe3838b28ba85cc1dac4d79fdca0c8334a46 From d7cbf4916afd2936d879aff685b9cc466dde566b Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 19 Nov 2023 17:49:47 -0800 Subject: [PATCH 08/36] Rename sourceWidth -> tagWidth --- src/main/scala/rocket/VortexCore.scala | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index daac52f..f956523 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -10,26 +10,26 @@ import org.chipsalliance.cde.config.Parameters import freechips.rocketchip.tile._ class VortexBundleA( - sourceWidth: Int, + tagWidth: Int, dataWidth: Int ) extends Bundle { assert(dataWidth % 8 == 0) val opcode = UInt(3.W) // FIXME: hardcoded val size = UInt(4.W) // FIXME: hardcoded - val source = UInt(sourceWidth.W) // FIXME: hardcoded + val source = UInt(tagWidth.W) // FIXME: hardcoded val address = UInt(32.W) // FIXME: hardcoded val mask = UInt((dataWidth / 8).W) // FIXME: hardcoded val data = UInt(dataWidth.W) // FIXME: hardcoded } class VortexBundleD( - sourceWidth: Int, + tagWidth: Int, dataWidth: Int ) extends Bundle { assert(dataWidth % 8 == 0) val opcode = UInt(3.W) // FIXME: hardcoded val size = UInt(4.W) // FIXME: hardcoded - val source = UInt(sourceWidth.W) // FIXME: hardcoded + val source = UInt(tagWidth.W) // FIXME: hardcoded val data = UInt(dataWidth.W) // FIXME: hardcoded } @@ -42,16 +42,16 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle // conditionally instantiate ports depending on whether we want to use VX_cache or not val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { - val a = Decoupled(new VortexBundleA(sourceWidth = 46, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 46, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(tagWidth = 46, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = 46, dataWidth = 32))) })) else None val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA(sourceWidth = 46, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 46, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(tagWidth = 46, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = 46, dataWidth = 32))) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { - val a = Decoupled(new VortexBundleA(sourceWidth = 15, dataWidth = 128)) - val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 15, dataWidth = 128))) + val a = Decoupled(new VortexBundleA(tagWidth = 15, dataWidth = 128)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = 15, dataWidth = 128))) // val a = tile.memNode.out.head._1.a.cloneType // val d = Flipped(tile.memNode.out.head._1.d.cloneType) }) else None From ccd658299187d0a0abf6929990aaa88274ca6c0e Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 19 Nov 2023 17:54:08 -0800 Subject: [PATCH 09/36] Set correct mask for PutPartial for core writes Previously byte-partial writes such as `sh` would not work correctly. --- src/main/scala/tile/VortexTile.scala | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index d4c56de..1279d87 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -566,7 +566,7 @@ class VortexTLAdapter( val inResp = Decoupled(inRespT) val outResp = chiselTypeOf(outTL._1.d) }) - val edge = outTL._2 + val (bundle, edge) = outTL val sourceGen = Module( new SourceGenerator( newSourceWidth, @@ -587,8 +587,15 @@ class VortexTLAdapter( io.outReq.bits.size := io.inReq.bits.size io.outReq.bits.source := io.inReq.bits.source io.outReq.bits.address := io.inReq.bits.address - // generate TL-correct mask - io.outReq.bits.mask := edge.mask(io.inReq.bits.address, io.inReq.bits.size) + // this is just to double-check TLWidthWidget is in place + require(io.inReq.bits.size.getWidth == bundle.params.sizeBits) + // Get requires contiguous mask; only copy core's potentially-partial mask + // when writing + io.outReq.bits.mask := Mux(edge.hasData(io.outReq.bits), + io.inReq.bits.mask, + // generate TL-correct mask + edge.mask(io.inReq.bits.address, io.inReq.bits.size) + ) io.outReq.bits.data := io.inReq.bits.data io.outReq.bits.corrupt := 0.U io.inReq.ready := io.outReq.ready From dafacf9873fa280e3acc479581b773f0c11ef81b Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 19 Nov 2023 17:55:23 -0800 Subject: [PATCH 10/36] Bump vortex --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index faf5fe3..99207c8 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit faf5fe3838b28ba85cc1dac4d79fdca0c8334a46 +Subproject commit 99207c862c104ee90490ad8d6c50755409fcf029 From 8ed82e8261e7ec4d5bf5d2c95944383488459d0b Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 27 Nov 2023 16:42:07 -0800 Subject: [PATCH 11/36] Remove unclear size width requirement in tl adapter --- src/main/scala/tile/VortexTile.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 1279d87..5eefadd 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -286,6 +286,7 @@ class VortexTile private ( // coalToVxCacheNode is a bad naming, it really means up steam of vxBank in whihc it takes input // imemNodes.foreach { l1cache.icache_bank.coalToVxCacheNode := TLWidthWidget(4) := _ } imemNodes.foreach { l1cache.coresideNode := TLWidthWidget(4) := _ } + // dmemNodes go through coalescerNode l1cache.coresideNode :=* coalescerNode l1cache.masterNode } @@ -587,8 +588,6 @@ class VortexTLAdapter( io.outReq.bits.size := io.inReq.bits.size io.outReq.bits.source := io.inReq.bits.source io.outReq.bits.address := io.inReq.bits.address - // this is just to double-check TLWidthWidget is in place - require(io.inReq.bits.size.getWidth == bundle.params.sizeBits) // Get requires contiguous mask; only copy core's potentially-partial mask // when writing io.outReq.bits.mask := Mux(edge.hasData(io.outReq.bits), From f187291a9c6fcbb49f0c8ce8620fe0fbb1856f12 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 12:51:34 -0800 Subject: [PATCH 12/36] VortexBank: Update addResource for vortex2; WIP fix params --- src/main/scala/rocket/VortexBank.scala | 305 +++++++++++++------------ 1 file changed, 154 insertions(+), 151 deletions(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index 1c622b3..7c7ee41 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -203,7 +203,7 @@ class VortexBankImp( config: VortexL1Config ) extends LazyModuleImp(outer) { val vxCache = Module( - new VX_cache( + new VX_cache_top( WORD_SIZE = config.wordSize, CACHE_LINE_SIZE = config.cacheLineSize, CORE_TAG_WIDTH = config.coreTagPlusSizeWidth, @@ -389,70 +389,63 @@ class VortexBankImp( VXReq2TLReq } -class VX_cache( - CACHE_ID: Int = 0, // seems to be only used for debug trace prints +class VX_cache_top( + // TODO: INSTANCE_ID CACHE_SIZE: Int = 16384 / 4, // CACHE_ID, "NUM_REQS" -> 1, // force to instantiate single bank by setting NUM_REQS to 1 "CACHE_SIZE" -> CACHE_SIZE, - "CACHE_LINE_SIZE" -> CACHE_LINE_SIZE, - "NUM_PORTS" -> NUM_PORTS, + "LINE_SIZE" -> CACHE_LINE_SIZE, + "NUM_BANKS" -> NUM_BANKS, + "NUM_WAYS" -> NUM_WAYS, "WORD_SIZE" -> WORD_SIZE, - "CREQ_SIZE" -> CREQ_SIZE, "CRSQ_SIZE" -> CRSQ_SIZE, "MSHR_SIZE" -> MSHR_SIZE, "MRSQ_SIZE" -> MRSQ_SIZE, "MREQ_SIZE" -> MREQ_SIZE, "WRITE_ENABLE" -> WRITE_ENABLE, - "CORE_TAG_WIDTH" -> CORE_TAG_WIDTH, - "CORE_TAG_ID_BITS" -> CORE_TAG_ID_BITS, - "MEM_TAG_WIDTH" -> MEM_TAG_WIDTH, - "BANK_ADDR_OFFSET" -> BANK_ADDR_OFFSET, - "NC_ENABLE" -> NC_ENABLE + "UUID_WIDTH" -> UUID_WIDTH, + "TAG_WIDTH" -> CORE_TAG_WIDTH, + // "MEM_TAG_WIDTH" -> MEM_TAG_WIDTH, ) ) with HasBlackBoxResource { + // require(MEM_) + val io = IO(new Bundle { val clk = Input(Clock()) val reset = Input(Reset()) - // We should be able to turn the following into TileLink easily - // CACHE <> CORE val core_req_valid = Input(Bool()) val core_req_rw = Input(Bool()) - val core_req_addr = Input(UInt(WORD_ADDR_WIDTH.W)) val core_req_byteen = Input(UInt(WORD_SIZE.W)) + val core_req_addr = Input(UInt(WORD_ADDR_WIDTH.W)) val core_req_data = Input(UInt((WORD_SIZE * 8).W)) val core_req_tag = Input(UInt(CORE_TAG_WIDTH.W)) val core_req_ready = Output(Bool()) val core_rsp_valid = Output(Bool()) // 1 bit wide - val core_rsp_tmask = - Output(Bool()) // 1 bit wide, probably can ignore (check waveform) val core_rsp_data = Output(UInt((WORD_SIZE * 8).W)) val core_rsp_tag = Output(UInt(CORE_TAG_WIDTH.W)) val core_rsp_ready = Input(Bool()) @@ -472,132 +465,142 @@ class VX_cache( val mem_rsp_ready = Output(Bool()) }) - addResource("/vsrc/vortex/hw/rtl/VX_dispatch.sv") - addResource("/vsrc/vortex/hw/rtl/VX_issue.sv") + addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bank.sv") + addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bypass.sv") + addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_data.sv") addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh") - addResource("/vsrc/vortex/hw/rtl/VX_warp_sched.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_lerp.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_addr.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_mem.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_format.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sampler.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_unit.sv") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_define.vh") - addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_wrap.sv") - addResource("/vsrc/vortex/hw/rtl/VX_scope.vh") - addResource("/vsrc/vortex/hw/rtl/VX_fpu_unit.sv") - addResource("/vsrc/vortex/hw/rtl/VX_scoreboard.sv") - addResource("/vsrc/vortex/hw/rtl/VX_writeback.sv") - addResource("/vsrc/vortex/hw/rtl/VX_muldiv.sv") - addResource("/vsrc/vortex/hw/rtl/VX_decode.sv") - addResource("/vsrc/vortex/hw/rtl/VX_ibuffer.sv") - addResource("/vsrc/vortex/hw/rtl/VX_icache_stage.sv") - addResource("/vsrc/vortex/hw/rtl/VX_gpu_unit.sv") - addResource("/vsrc/vortex/hw/rtl/VX_trace_instr.vh") - addResource("/vsrc/vortex/hw/rtl/VX_gpu_types.vh") - addResource("/vsrc/vortex/hw/rtl/VX_config.vh") - addResource("/vsrc/vortex/hw/rtl/libs/VX_lzc.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_fifo_queue.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_scan.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_find_first.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_multiplier.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_remove.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_register.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_encoder.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_reset_relay.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_popcount.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_insert.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_skid_buffer.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_fixed_arbiter.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_shift_register.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_index_buffer.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_encoder.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_matrix_arbiter.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_dp_ram.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_elastic_buffer.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_rr_arbiter.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_arbiter.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_sp_ram.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_demux.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_div.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_fair_arbiter.sv") - addResource("/vsrc/vortex/hw/rtl/libs/VX_pending_size.sv") - addResource("/vsrc/vortex/hw/rtl/VX_define.vh") - addResource("/vsrc/vortex/hw/rtl/VX_csr_data.sv") - addResource("/vsrc/vortex/hw/rtl/VX_cache_arb.sv") - addResource("/vsrc/vortex/hw/rtl/VX_ipdom_stack.sv") - addResource("/vsrc/vortex/hw/rtl/VX_gpr_stage.sv") - addResource("/vsrc/vortex/hw/rtl/VX_execute.sv") - addResource("/vsrc/vortex/hw/rtl/VX_fetch.sv") - addResource("/vsrc/vortex/hw/rtl/VX_alu_unit.sv") - addResource("/vsrc/vortex/hw/rtl/VX_platform.vh") - addResource("/vsrc/vortex/hw/rtl/VX_commit.sv") - addResource("/vsrc/vortex/hw/rtl/VX_pipeline.sv") - addResource("/vsrc/vortex/hw/rtl/VX_lsu_unit.sv") - addResource("/vsrc/vortex/hw/rtl/VX_csr_unit.sv") - addResource("/vsrc/vortex/hw/VX_config.h") - addResource("/vsrc/vortex/sim/common/rvfloats.h") - addResource("/vsrc/vortex/sim/common/rvfloats.cpp") - addResource("/csrc/softfloat/include/internals.h") - addResource("/csrc/softfloat/include/primitives.h") - addResource("/csrc/softfloat/include/primitiveTypes.h") - addResource("/csrc/softfloat/include/softfloat.h") - addResource("/csrc/softfloat/include/softfloat_types.h") - addResource("/csrc/softfloat/RISCV/specialize.h") - addResource("/vsrc/vortex/hw/dpi/float_dpi.cpp") - addResource("/vsrc/vortex/hw/dpi/float_dpi.vh") - addResource("/vsrc/vortex/hw/dpi/util_dpi.cpp") - addResource("/vsrc/vortex/hw/dpi/util_dpi.vh") - addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_dpi.sv") - addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_define.vh") - addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_types.vh") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_csr_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_join_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_cache_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_memsys_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_writeback_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpu_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_pipeline_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_cmt_to_csr_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_to_alu_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_alu_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ibuffer_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_branch_ctl_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_lsu_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_wstall_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_to_csr_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_warp_ctl_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_rsp_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fetch_to_csr_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_tex_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_req_if.sv") - addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_shared_mem.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_core_rsp_merge.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_tag_access.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_core_req_bank_sel.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_bank.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_data_access.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_flush_ctrl.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_nc_bypass.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_miss_resrv.sv") + addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_init.sv") + addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_mshr.sv") addResource("/vsrc/vortex/hw/rtl/cache/VX_cache.sv") + addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_tags.sv") + addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_top.sv") + + // addResource("/vsrc/vortex/hw/rtl/VX_dispatch.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_issue.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh") + // addResource("/vsrc/vortex/hw/rtl/VX_warp_sched.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_lerp.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_addr.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_mem.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_format.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sampler.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_unit.sv") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_define.vh") + // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_wrap.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_scope.vh") + // addResource("/vsrc/vortex/hw/rtl/VX_fpu_unit.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_scoreboard.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_writeback.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_muldiv.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_decode.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_ibuffer.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_icache_stage.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_gpu_unit.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_trace_instr.vh") + // addResource("/vsrc/vortex/hw/rtl/VX_gpu_types.vh") + // addResource("/vsrc/vortex/hw/rtl/VX_config.vh") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_lzc.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_fifo_queue.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_scan.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_find_first.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_multiplier.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_remove.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_register.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_encoder.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_reset_relay.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_popcount.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_insert.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_skid_buffer.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_fixed_arbiter.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_shift_register.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_index_buffer.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_encoder.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_matrix_arbiter.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_dp_ram.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_elastic_buffer.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_rr_arbiter.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_arbiter.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_sp_ram.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_demux.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_div.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_fair_arbiter.sv") + // addResource("/vsrc/vortex/hw/rtl/libs/VX_pending_size.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_define.vh") + // addResource("/vsrc/vortex/hw/rtl/VX_csr_data.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_cache_arb.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_ipdom_stack.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_gpr_stage.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_execute.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_fetch.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_alu_unit.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_platform.vh") + // addResource("/vsrc/vortex/hw/rtl/VX_commit.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_pipeline.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_lsu_unit.sv") + // addResource("/vsrc/vortex/hw/rtl/VX_csr_unit.sv") + // addResource("/vsrc/vortex/hw/VX_config.h") + // addResource("/vsrc/vortex/sim/common/rvfloats.h") + // addResource("/vsrc/vortex/sim/common/rvfloats.cpp") + // addResource("/csrc/softfloat/include/internals.h") + // addResource("/csrc/softfloat/include/primitives.h") + // addResource("/csrc/softfloat/include/primitiveTypes.h") + // addResource("/csrc/softfloat/include/softfloat.h") + // addResource("/csrc/softfloat/include/softfloat_types.h") + // addResource("/csrc/softfloat/RISCV/specialize.h") + // addResource("/vsrc/vortex/hw/dpi/float_dpi.cpp") + // addResource("/vsrc/vortex/hw/dpi/float_dpi.vh") + // addResource("/vsrc/vortex/hw/dpi/util_dpi.cpp") + // addResource("/vsrc/vortex/hw/dpi/util_dpi.vh") + // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_dpi.sv") + // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_define.vh") + // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_types.vh") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_rsp_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_csr_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_join_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_cache_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_memsys_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_writeback_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpu_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_pipeline_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_rsp_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_cmt_to_csr_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_to_alu_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_rsp_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_alu_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ibuffer_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_branch_ctl_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_rsp_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_lsu_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_wstall_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_rsp_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_to_csr_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_warp_ctl_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_rsp_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fetch_to_csr_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_tex_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_req_if.sv") + // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_req_if.sv") + // // addResource("/vsrc/vortex/hw/rtl/cache/VX_shared_mem.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_core_rsp_merge.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_tag_access.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_core_req_bank_sel.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_bank.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_data_access.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_flush_ctrl.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_nc_bypass.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_miss_resrv.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache.sv") } From bd1aaaccfeddc82d39aa2d0a0024234e8a4b92d4 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 12:52:23 -0800 Subject: [PATCH 13/36] Bump vortex with trace and CSR fix --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index 99207c8..5e5c625 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit 99207c862c104ee90490ad8d6c50755409fcf029 +Subproject commit 5e5c625759175bbed0e92c57602345d897918518 From c5e37dd3b8ce0f5a1ed5752a2a039c8e63e386da Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 14:55:52 -0800 Subject: [PATCH 14/36] Rename l2ReqSourceGenSize -> memSideSourceIds --- src/main/scala/rocket/VortexBank.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index 7c7ee41..a0b9e3f 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -16,7 +16,7 @@ case class VortexL1Config( coreTagWidth: Int, writeInfoReqQSize: Int, mshrSize: Int, - l2ReqSourceGenSize: Int, + memSideSourceIds: Int, uncachedAddrSets: Seq[AddressSet], icacheInstAddrSets: Seq[AddressSet] ) { @@ -24,7 +24,7 @@ case class VortexL1Config( log2Ceil(wordSize) + coreTagWidth } require( - mshrSize == l2ReqSourceGenSize, + mshrSize == memSideSourceIds, "MSHR size must match the number of sourceIds to downstream." ) } @@ -37,7 +37,7 @@ object defaultVortexL1Config coreTagWidth = 8, writeInfoReqQSize = 16, mshrSize = 8, - l2ReqSourceGenSize = 8, + memSideSourceIds = 8, uncachedAddrSets = Seq(AddressSet(0x2000000L, 0xffL)), icacheInstAddrSets = Seq(AddressSet(0x80000000L, 0xfffffffL)) ) @@ -101,7 +101,7 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters) clients = Seq( TLMasterParameters.v1( name = "VortexBank", - sourceId = IdRange(0, 1 << (log2Ceil(config.l2ReqSourceGenSize) + 5)), + sourceId = IdRange(0, 1 << (log2Ceil(config.memSideSourceIds) + 5 /*FIXME: why is this here?*/)), supportsProbe = TransferSizes(1, config.wordSize), supportsGet = TransferSizes(1, config.wordSize), supportsPutFull = TransferSizes(1, config.wordSize), @@ -177,7 +177,7 @@ class VortexBank( clients = Seq( TLMasterParameters.v1( name = "VortexBank", - sourceId = IdRange(0, config.l2ReqSourceGenSize), + sourceId = IdRange(0, config.memSideSourceIds), supportsProbe = TransferSizes(1, config.wordSize), supportsGet = TransferSizes(1, config.wordSize), supportsPutFull = TransferSizes(1, config.wordSize), @@ -332,7 +332,7 @@ class VortexBankImp( // separate source ID allocator to solve this. val sourceGen = Module( new NewSourceGenerator( - log2Ceil(config.l2ReqSourceGenSize), + log2Ceil(config.memSideSourceIds), metadata = Some(UInt(32.W)), ignoreInUse = false ) @@ -425,7 +425,7 @@ class VX_cache_top( "WRITE_ENABLE" -> WRITE_ENABLE, "UUID_WIDTH" -> UUID_WIDTH, "TAG_WIDTH" -> CORE_TAG_WIDTH, - // "MEM_TAG_WIDTH" -> MEM_TAG_WIDTH, + "MEM_TAG_WIDTH" -> MEM_TAG_WIDTH, ) ) with HasBlackBoxResource { From b66be6c3ae7edddf133bcded9173dbe0d158c73d Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 16:53:41 -0800 Subject: [PATCH 15/36] Respect VX_cache's MEM_TAG_WIDTH; rename coalToVxCacheNode --- src/main/scala/rocket/VortexBank.scala | 46 +++++++++++++++++--------- src/main/scala/tile/VortexTile.scala | 3 +- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index a0b9e3f..e642415 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -49,8 +49,8 @@ class VortexL1Cache(config: VortexL1Config)(implicit p: Parameters) // dcache banks val dcache_banks = Seq.tabulate(config.numBanks) { bankId => - val bank = LazyModule(new VortexBank(config, bankId)) - bank + val dcache_bank = LazyModule(new VortexBank(config, bankId)) + dcache_bank } // passthrough val passThrough = LazyModule(new VortexBankPassThrough(config)) @@ -61,9 +61,9 @@ class VortexL1Cache(config: VortexL1Config)(implicit p: Parameters) // core-side crossbar that arbitrates core requests to banks protected val bankXbar = LazyModule(new TLXbar) bankXbar.node :=* coresideNode - dcache_banks.foreach { _.coalToVxCacheNode :=* bankXbar.node } - passThrough.coalToVxCacheNode :=* bankXbar.node - icache_bank.coalToVxCacheNode :=* bankXbar.node + dcache_banks.foreach { _.coresideNode :=* bankXbar.node } + passThrough.coresideNode :=* bankXbar.node + icache_bank.coresideNode :=* bankXbar.node // master node that exposes to and drives the downstream val masterNode = TLIdentityNode() @@ -101,7 +101,12 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters) clients = Seq( TLMasterParameters.v1( name = "VortexBank", - sourceId = IdRange(0, 1 << (log2Ceil(config.memSideSourceIds) + 5 /*FIXME: why is this here?*/)), + sourceId = IdRange( + 0, + 1 << (log2Ceil( + config.memSideSourceIds + ) + 5 /*FIXME: give more sourceId so that passthrough doesn't block; hacky*/ ) + ), supportsProbe = TransferSizes(1, config.wordSize), supportsGet = TransferSizes(1, config.wordSize), supportsPutFull = TransferSizes(1, config.wordSize), @@ -111,14 +116,14 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters) ) ) - val coalToVxCacheNode = TLManagerNode(managerParam) + val coresideNode = TLManagerNode(managerParam) val vxCacheFetchNode = TLClientNode(clientParam) val vxCacheToL2Node = TLIdentityNode() vxCacheToL2Node := TLWidthWidget(config.cacheLineSize) := vxCacheFetchNode // the implementation to make everything a pass through lazy val module = new LazyModuleImp(this) { - val (upstream, _) = coalToVxCacheNode.in(0) + val (upstream, _) = coresideNode.in(0) val (downstream, _) = vxCacheFetchNode.out(0) downstream.a <> upstream.a @@ -187,7 +192,8 @@ class VortexBank( ) ) - val coalToVxCacheNode = TLManagerNode(managerParam) + // Core -> VxCache + val coresideNode = TLManagerNode(managerParam) val vxCacheToL2Node = TLIdentityNode() val vxCacheFetchNode = TLClientNode(clientParam) @@ -207,7 +213,9 @@ class VortexBankImp( WORD_SIZE = config.wordSize, CACHE_LINE_SIZE = config.cacheLineSize, CORE_TAG_WIDTH = config.coreTagPlusSizeWidth, - MSHR_SIZE = config.mshrSize + // MSHR_SIZE = config.mshrSize + // NUM_BANKS is set to 1 to treat a whole VX_cache_top instance as a + // single bank ) ); @@ -250,7 +258,7 @@ class VortexBankImp( // Translate TL request from Coalescer to requests for VX_cache def TLReq2VXReq = { - val (tlInFromCoal, _) = outer.coalToVxCacheNode.in.head + val (tlInFromCoal, _) = outer.coresideNode.in.head // coal -> vxCache tlInFromCoal.a.ready := @@ -407,8 +415,6 @@ class VX_cache_top( CORE_TAG_WIDTH: Int = 16, // source ID ranges from 0 to 1 << 10, we need to allocate upper bits to save size WORD_ADDR_WIDTH: Int = 28, // 16 byte "word" = 4 bits - MEM_TAG_WIDTH: Int = - 14, // Elaborated value is also completely different from (32 - log2Ceil(CACHE_LINE_SIZE)). This should match with sourceIds on client node associated with this cache MEM_ADDR_WIDTH: Int = 28 // 16 byte cache line = 4 bits ) extends BlackBox( Map( @@ -425,12 +431,20 @@ class VX_cache_top( "WRITE_ENABLE" -> WRITE_ENABLE, "UUID_WIDTH" -> UUID_WIDTH, "TAG_WIDTH" -> CORE_TAG_WIDTH, - "MEM_TAG_WIDTH" -> MEM_TAG_WIDTH, + // Although VX_cache_top exposes it as a parameter, MEM_TAG_WIDTH is + // not really configurable -- it is set to be a concatenation of the + // MSHR id and cache bank id. Instead of trying to configure it from + // Chisel side, we try to figure out its value that's elaborated in the + // Verilog side and configure the Chisel io width correspondingly. + // "MEM_TAG_WIDTH" -> MEM_TAG_WIDTH ) ) with HasBlackBoxResource { - // require(MEM_) + def memTagWidth(mshrSize: Int, numBanks: Int): Int = + log2Ceil(mshrSize) + log2Ceil(numBanks) + val MEM_TAG_WIDTH = memTagWidth(MSHR_SIZE, NUM_BANKS) + println(s"====== VortexBank: MEM_TAG_WIDTH = ${MEM_TAG_WIDTH}") val io = IO(new Bundle { val clk = Input(Clock()) @@ -466,7 +480,7 @@ class VX_cache_top( }) addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bank.sv") - addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bypass.sv") + // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bypass.sv") addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_data.sv") addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh") addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_init.sv") diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 5eefadd..86f33c5 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -283,8 +283,7 @@ class VortexTile private ( val l1cache = LazyModule(new VortexL1Cache(vortexL1Config)) // Connect L1 with imem_fetch_interface without XBar - // coalToVxCacheNode is a bad naming, it really means up steam of vxBank in whihc it takes input - // imemNodes.foreach { l1cache.icache_bank.coalToVxCacheNode := TLWidthWidget(4) := _ } + // imemNodes.foreach { l1cache.icache_bank.coresideNode := TLWidthWidget(4) := _ } imemNodes.foreach { l1cache.coresideNode := TLWidthWidget(4) := _ } // dmemNodes go through coalescerNode l1cache.coresideNode :=* coalescerNode From d45cf835cf521bd7ccab8f8f029ac7deefcb08be Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 18:42:58 -0800 Subject: [PATCH 16/36] Remove dedicated icache bank from VortexBank --- src/main/scala/rocket/VortexBank.scala | 15 ++++++++------- src/main/scala/tile/VortexTile.scala | 15 +++++---------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index e642415..a606c58 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -38,14 +38,15 @@ object defaultVortexL1Config writeInfoReqQSize = 16, mshrSize = 8, memSideSourceIds = 8, - uncachedAddrSets = Seq(AddressSet(0x2000000L, 0xffL)), - icacheInstAddrSets = Seq(AddressSet(0x80000000L, 0xfffffffL)) + // Don't cache CLINT region to ensure coherent access + uncachedAddrSets = Seq(AddressSet(0x2000000L, 0xffffL)), + icacheInstAddrSets = Seq(AddressSet(0x80000000L, 0x0fffffffL)) ) class VortexL1Cache(config: VortexL1Config)(implicit p: Parameters) extends LazyModule { // icache bank - val icache_bank = LazyModule(new VortexBank(config, 0, isICache = true)) + // val icache_bank = LazyModule(new VortexBank(config, 0, isICache = true)) // dcache banks val dcache_banks = Seq.tabulate(config.numBanks) { bankId => @@ -63,13 +64,13 @@ class VortexL1Cache(config: VortexL1Config)(implicit p: Parameters) bankXbar.node :=* coresideNode dcache_banks.foreach { _.coresideNode :=* bankXbar.node } passThrough.coresideNode :=* bankXbar.node - icache_bank.coresideNode :=* bankXbar.node + // icache_bank.coresideNode :=* bankXbar.node // master node that exposes to and drives the downstream val masterNode = TLIdentityNode() dcache_banks.foreach { masterNode := _.vxCacheToL2Node } masterNode := passThrough.vxCacheToL2Node - masterNode := icache_bank.vxCacheToL2Node + // masterNode := icache_bank.vxCacheToL2Node lazy val module = new LazyModuleImp(this) } @@ -141,7 +142,6 @@ class VortexBank( def generateAddressSets(): Seq[AddressSet] = { if (isICache) { config.icacheInstAddrSets - // Seq(AddressSet(0x00000000L, 0xFFFFFFFFL)) } else { // suppose have 4 bank // base for bank 1: ...000000|01|0000 @@ -149,7 +149,8 @@ class VortexBank( val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize) val base = 0x00000000L | (bankId * config.wordSize) - val excludeSets = (config.uncachedAddrSets ++ config.icacheInstAddrSets) + // val excludeSets = (config.uncachedAddrSets ++ config.icacheInstAddrSets) + val excludeSets = config.uncachedAddrSets var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask)) for (excludeSet <- excludeSets) { remainingSets = remainingSets.flatMap(_.subtract(excludeSet)) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 86f33c5..ab29009 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -282,25 +282,20 @@ class VortexTile private ( ) val l1cache = LazyModule(new VortexL1Cache(vortexL1Config)) - // Connect L1 with imem_fetch_interface without XBar - // imemNodes.foreach { l1cache.icache_bank.coresideNode := TLWidthWidget(4) := _ } - imemNodes.foreach { l1cache.coresideNode := TLWidthWidget(4) := _ } + // // Connect L1 with imem_fetch_interface without XBar + // // imemNodes.foreach { l1cache.icache_bank.coresideNode := TLWidthWidget(4) := _ } + // imemNodes.foreach { l1cache.coresideNode := TLWidthWidget(4) := _ } // dmemNodes go through coalescerNode l1cache.coresideNode :=* coalescerNode l1cache.masterNode } - case None => { - // Regardless of using coalescer or not, if we're not using L1, imemNode - // goes directly to tile exit xbar - // FIXME: unnatural, have L1 just handle dmem - imemNodes.foreach { tlMasterXbar.node := TLWidthWidget(4) := _ } - coalescerNode - } + case None => coalescerNode } if (vortexParams.useVxCache) { tlMasterXbar.node := TLWidthWidget(16) := memNode } else { + imemNodes.foreach { tlMasterXbar.node := TLWidthWidget(4) := _ } tlMasterXbar.node :=* l1Node } From 0d60180d0d9c97f9e77bbf6bd9f287f3b725e4b0 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 18:43:25 -0800 Subject: [PATCH 17/36] Change NUM_WAYS from 1 to 4 NUM_WAYS = 1 seem to be broken in Vortex. This makes sgemm test pass --- src/main/scala/rocket/VortexBank.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index a606c58..f588cdd 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -403,7 +403,7 @@ class VX_cache_top( CACHE_SIZE: Int = 16384 / 4, // Date: Tue, 28 Nov 2023 19:22:11 -0800 Subject: [PATCH 18/36] Instantiate separate VortexL1Cache for imem and dmem --- src/main/scala/tile/VortexTile.scala | 30 ++++++++++++++++++---------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index ab29009..f978b26 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -271,7 +271,7 @@ class VortexTile private ( } // Conditionally instantiate L1 cache - val l1Node = p(VortexL1Key) match { + val (icacheNode, dcacheNode): (TLNode, TLNode) = p(VortexL1Key) match { case Some(vortexL1Config) => { println( s"============ Using Vortex L1 cache =================" @@ -281,22 +281,29 @@ class VortexTile private ( "Vortex L1 configuration currently only works when coalescer is also enabled." ) - val l1cache = LazyModule(new VortexL1Cache(vortexL1Config)) - // // Connect L1 with imem_fetch_interface without XBar - // // imemNodes.foreach { l1cache.icache_bank.coresideNode := TLWidthWidget(4) := _ } - // imemNodes.foreach { l1cache.coresideNode := TLWidthWidget(4) := _ } + val icache = LazyModule(new VortexL1Cache(vortexL1Config)) + val dcache = LazyModule(new VortexL1Cache(vortexL1Config)) + // imemNodes.foreach { icache.coresideNode := TLWidthWidget(4) := _ } + assert(imemNodes.length == 1) // FIXME + icache.coresideNode := TLWidthWidget(4) := imemNodes(0) // dmemNodes go through coalescerNode - l1cache.coresideNode :=* coalescerNode - l1cache.masterNode + dcache.coresideNode :=* coalescerNode + (icache.masterNode, dcache.masterNode) + } + case None => { + val imemWideNode = TLIdentityNode() + assert(imemNodes.length == 1) // FIXME + imemWideNode := TLWidthWidget(4) := imemNodes(0) + (imemWideNode, coalescerNode) } - case None => coalescerNode } if (vortexParams.useVxCache) { tlMasterXbar.node := TLWidthWidget(16) := memNode } else { - imemNodes.foreach { tlMasterXbar.node := TLWidthWidget(4) := _ } - tlMasterXbar.node :=* l1Node + // imemNodes.foreach { tlMasterXbar.node := TLWidthWidget(4) := _ } + tlMasterXbar.node :=* icacheNode + tlMasterXbar.node :=* dcacheNode } /* below are copied from rocket */ @@ -584,7 +591,8 @@ class VortexTLAdapter( io.outReq.bits.address := io.inReq.bits.address // Get requires contiguous mask; only copy core's potentially-partial mask // when writing - io.outReq.bits.mask := Mux(edge.hasData(io.outReq.bits), + io.outReq.bits.mask := Mux( + edge.hasData(io.outReq.bits), io.inReq.bits.mask, // generate TL-correct mask edge.mask(io.inReq.bits.address, io.inReq.bits.size) From 4f274af36375d96d9441688b5dc843475087dcdf Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 19:34:32 -0800 Subject: [PATCH 19/36] Bump vortex with way_idx revert --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index 5e5c625..5825680 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit 5e5c625759175bbed0e92c57602345d897918518 +Subproject commit 5825680303c0bb2bddc475b83e40f2d11d1178a2 From f8d7169d194ac11e72e31fcbe7515bdcac15e87c Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 19:44:02 -0800 Subject: [PATCH 20/36] Delete old addResource for vortex v1 --- src/main/scala/rocket/VortexBank.scala | 128 ------------------------- 1 file changed, 128 deletions(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index f588cdd..0775ed0 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -489,134 +489,6 @@ class VX_cache_top( addResource("/vsrc/vortex/hw/rtl/cache/VX_cache.sv") addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_tags.sv") addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_top.sv") - - // addResource("/vsrc/vortex/hw/rtl/VX_dispatch.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_issue.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh") - // addResource("/vsrc/vortex/hw/rtl/VX_warp_sched.sv") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_lerp.sv") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_addr.sv") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_mem.sv") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_format.sv") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sampler.sv") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_unit.sv") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_define.vh") - // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_wrap.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_scope.vh") - // addResource("/vsrc/vortex/hw/rtl/VX_fpu_unit.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_scoreboard.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_writeback.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_muldiv.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_decode.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_ibuffer.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_icache_stage.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_gpu_unit.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_trace_instr.vh") - // addResource("/vsrc/vortex/hw/rtl/VX_gpu_types.vh") - // addResource("/vsrc/vortex/hw/rtl/VX_config.vh") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_lzc.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_fifo_queue.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_scan.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_find_first.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_multiplier.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_remove.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_register.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_encoder.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_reset_relay.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_popcount.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_insert.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_skid_buffer.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_fixed_arbiter.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_shift_register.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_index_buffer.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_encoder.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_matrix_arbiter.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_dp_ram.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_elastic_buffer.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_rr_arbiter.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_arbiter.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_sp_ram.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_demux.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_div.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_fair_arbiter.sv") - // addResource("/vsrc/vortex/hw/rtl/libs/VX_pending_size.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_define.vh") - // addResource("/vsrc/vortex/hw/rtl/VX_csr_data.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_cache_arb.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_ipdom_stack.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_gpr_stage.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_execute.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_fetch.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_alu_unit.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_platform.vh") - // addResource("/vsrc/vortex/hw/rtl/VX_commit.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_pipeline.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_lsu_unit.sv") - // addResource("/vsrc/vortex/hw/rtl/VX_csr_unit.sv") - // addResource("/vsrc/vortex/hw/VX_config.h") - // addResource("/vsrc/vortex/sim/common/rvfloats.h") - // addResource("/vsrc/vortex/sim/common/rvfloats.cpp") - // addResource("/csrc/softfloat/include/internals.h") - // addResource("/csrc/softfloat/include/primitives.h") - // addResource("/csrc/softfloat/include/primitiveTypes.h") - // addResource("/csrc/softfloat/include/softfloat.h") - // addResource("/csrc/softfloat/include/softfloat_types.h") - // addResource("/csrc/softfloat/RISCV/specialize.h") - // addResource("/vsrc/vortex/hw/dpi/float_dpi.cpp") - // addResource("/vsrc/vortex/hw/dpi/float_dpi.vh") - // addResource("/vsrc/vortex/hw/dpi/util_dpi.cpp") - // addResource("/vsrc/vortex/hw/dpi/util_dpi.vh") - // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_dpi.sv") - // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_define.vh") - // addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_types.vh") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_rsp_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_csr_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_join_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_cache_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_memsys_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_writeback_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpu_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_pipeline_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_rsp_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_cmt_to_csr_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_to_alu_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_rsp_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_alu_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ibuffer_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_branch_ctl_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_rsp_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_lsu_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_wstall_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_rsp_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_to_csr_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_warp_ctl_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_rsp_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fetch_to_csr_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_tex_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_req_if.sv") - // addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_req_if.sv") - // // addResource("/vsrc/vortex/hw/rtl/cache/VX_shared_mem.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_core_rsp_merge.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_tag_access.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_core_req_bank_sel.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_bank.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_data_access.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_flush_ctrl.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_nc_bypass.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_miss_resrv.sv") - // addResource("/vsrc/vortex/hw/rtl/cache/VX_cache.sv") - } // Delete the following NewSourceGenerator when merging with origin/graphics From 74fe53010517fa3103c66d5a3dff0e515bb196f2 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 19:55:08 -0800 Subject: [PATCH 21/36] Enable configuring MSHR size from Chisel --- src/main/scala/rocket/VortexBank.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index 0775ed0..7dae4fc 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -23,6 +23,10 @@ case class VortexL1Config( def coreTagPlusSizeWidth: Int = { log2Ceil(wordSize) + coreTagWidth } + // NOTE: This assertion depends on the fact that the Vortex cache is + // configured to have 1 bank, and that it uses MSHR id as the tag of + // memory-side requests. Otherwise, it will append bank id to the tag as + // well and break this requirement. require( mshrSize == memSideSourceIds, "MSHR size must match the number of sourceIds to downstream." @@ -214,7 +218,7 @@ class VortexBankImp( WORD_SIZE = config.wordSize, CACHE_LINE_SIZE = config.cacheLineSize, CORE_TAG_WIDTH = config.coreTagPlusSizeWidth, - // MSHR_SIZE = config.mshrSize + MSHR_SIZE = config.mshrSize // NUM_BANKS is set to 1 to treat a whole VX_cache_top instance as a // single bank ) From 6248926b471d957f4ce5a5b78a10f3b82c78a757 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 20:08:46 -0800 Subject: [PATCH 22/36] Remove icache-specific address set and naming --- src/main/scala/rocket/VortexBank.scala | 58 ++++++++++---------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index 7dae4fc..a5f96f2 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -17,8 +17,7 @@ case class VortexL1Config( writeInfoReqQSize: Int, mshrSize: Int, memSideSourceIds: Int, - uncachedAddrSets: Seq[AddressSet], - icacheInstAddrSets: Seq[AddressSet] + uncachedAddrSets: Seq[AddressSet] ) { def coreTagPlusSizeWidth: Int = { log2Ceil(wordSize) + coreTagWidth @@ -43,19 +42,15 @@ object defaultVortexL1Config mshrSize = 8, memSideSourceIds = 8, // Don't cache CLINT region to ensure coherent access - uncachedAddrSets = Seq(AddressSet(0x2000000L, 0xffffL)), - icacheInstAddrSets = Seq(AddressSet(0x80000000L, 0x0fffffffL)) + uncachedAddrSets = Seq(AddressSet(0x2000000L, 0xffffL)) ) class VortexL1Cache(config: VortexL1Config)(implicit p: Parameters) extends LazyModule { - // icache bank - // val icache_bank = LazyModule(new VortexBank(config, 0, isICache = true)) - - // dcache banks - val dcache_banks = Seq.tabulate(config.numBanks) { bankId => - val dcache_bank = LazyModule(new VortexBank(config, bankId)) - dcache_bank + val banks = Seq.tabulate(config.numBanks) { bankId => + // helps with name mangling in Verilog + val bank = LazyModule(new VortexBank(config, bankId)) + bank } // passthrough val passThrough = LazyModule(new VortexBankPassThrough(config)) @@ -66,15 +61,13 @@ class VortexL1Cache(config: VortexL1Config)(implicit p: Parameters) // core-side crossbar that arbitrates core requests to banks protected val bankXbar = LazyModule(new TLXbar) bankXbar.node :=* coresideNode - dcache_banks.foreach { _.coresideNode :=* bankXbar.node } + banks.foreach { _.coresideNode :=* bankXbar.node } passThrough.coresideNode :=* bankXbar.node - // icache_bank.coresideNode :=* bankXbar.node // master node that exposes to and drives the downstream val masterNode = TLIdentityNode() - dcache_banks.foreach { masterNode := _.vxCacheToL2Node } + banks.foreach { masterNode := _.vxCacheToL2Node } masterNode := passThrough.vxCacheToL2Node - // masterNode := icache_bank.vxCacheToL2Node lazy val module = new LazyModuleImp(this) } @@ -139,28 +132,22 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters) class VortexBank( config: VortexL1Config, bankId: Int, - isICache: Boolean = false )(implicit p: Parameters) extends LazyModule { // Generate AddressSet by excluding Addr we don't want def generateAddressSets(): Seq[AddressSet] = { - if (isICache) { - config.icacheInstAddrSets - } else { - // suppose have 4 bank - // base for bank 1: ...000000|01|0000 - // mask for bank 1; 111111|00|1111 - val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize) - val base = 0x00000000L | (bankId * config.wordSize) + // suppose have 4 bank + // base for bank 1: ...000000|01|0000 + // mask for bank 1; 111111|00|1111 + val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize) + val base = 0x00000000L | (bankId * config.wordSize) - // val excludeSets = (config.uncachedAddrSets ++ config.icacheInstAddrSets) - val excludeSets = config.uncachedAddrSets - var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask)) - for (excludeSet <- excludeSets) { - remainingSets = remainingSets.flatMap(_.subtract(excludeSet)) - } - remainingSets + val excludeSets = config.uncachedAddrSets + var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask)) + for (excludeSet <- excludeSets) { + remainingSets = remainingSets.flatMap(_.subtract(excludeSet)) } + remainingSets } // Slave node to upstream @@ -340,9 +327,9 @@ class VortexBankImp( tlInFromCoal.d.bits.data := vxCache.io.core_rsp_data } - // Since Vortex L1 is a write-through cache, it doesn't bookkeep writes and - // therefore doesn't allocate a new UUID for write requests. We use a - // separate source ID allocator to solve this. + // Since Vortex L1 is a write-through cache, it doesn't bookkeep writes in + // its MSHR and therefore doesn't allocate a new tag id for write requests. + // We use a separate source ID allocator to solve this. val sourceGen = Module( new NewSourceGenerator( log2Ceil(config.memSideSourceIds), @@ -435,7 +422,7 @@ class VX_cache_top( "MREQ_SIZE" -> MREQ_SIZE, "WRITE_ENABLE" -> WRITE_ENABLE, "UUID_WIDTH" -> UUID_WIDTH, - "TAG_WIDTH" -> CORE_TAG_WIDTH, + "TAG_WIDTH" -> CORE_TAG_WIDTH // Although VX_cache_top exposes it as a parameter, MEM_TAG_WIDTH is // not really configurable -- it is set to be a concatenation of the // MSHR id and cache bank id. Instead of trying to configure it from @@ -449,7 +436,6 @@ class VX_cache_top( def memTagWidth(mshrSize: Int, numBanks: Int): Int = log2Ceil(mshrSize) + log2Ceil(numBanks) val MEM_TAG_WIDTH = memTagWidth(MSHR_SIZE, NUM_BANKS) - println(s"====== VortexBank: MEM_TAG_WIDTH = ${MEM_TAG_WIDTH}") val io = IO(new Bundle { val clk = Input(Clock()) From 0589b310f18c01414fc40b3955c55e9d0a3ce374 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 20:32:49 -0800 Subject: [PATCH 23/36] Add missing parameters for VX_cache_top --- src/main/scala/rocket/VortexBank.scala | 27 ++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index a5f96f2..0443ad4 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -119,7 +119,7 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters) val vxCacheToL2Node = TLIdentityNode() vxCacheToL2Node := TLWidthWidget(config.cacheLineSize) := vxCacheFetchNode - // the implementation to make everything a pass through + // passthrough logic lazy val module = new LazyModuleImp(this) { val (upstream, _) = coresideNode.in(0) val (downstream, _) = vxCacheFetchNode.out(0) @@ -206,8 +206,6 @@ class VortexBankImp( CACHE_LINE_SIZE = config.cacheLineSize, CORE_TAG_WIDTH = config.coreTagPlusSizeWidth, MSHR_SIZE = config.mshrSize - // NUM_BANKS is set to 1 to treat a whole VX_cache_top instance as a - // single bank ) ); @@ -390,10 +388,10 @@ class VortexBankImp( } class VX_cache_top( + // these values should match the default settings in Verilog // TODO: INSTANCE_ID CACHE_SIZE: Int = 16384 / 4, // 1, // force to instantiate single bank by setting NUM_REQS to 1 + // NOTE: NUM_REQS is analogous to SIMD width, whereas NUM_BANKS is the + // actual number of banks. VX_cache.sv instantiates VX_stream_xbar + // that arbitrates the higher NUM_REQS into NUM_BANKS. Since we do + // that logic ourselves using TL units, fix those params to 1 for the + // Verilog side. + "NUM_REQS" -> 1, "CACHE_SIZE" -> CACHE_SIZE, "LINE_SIZE" -> CACHE_LINE_SIZE, - "NUM_BANKS" -> NUM_BANKS, + // NUM_BANKS is set to 1 to treat a whole VX_cache_top instance as a + // single bank + "NUM_BANKS" -> 1, "NUM_WAYS" -> NUM_WAYS, "WORD_SIZE" -> WORD_SIZE, "CRSQ_SIZE" -> CRSQ_SIZE, @@ -422,7 +427,9 @@ class VX_cache_top( "MREQ_SIZE" -> MREQ_SIZE, "WRITE_ENABLE" -> WRITE_ENABLE, "UUID_WIDTH" -> UUID_WIDTH, - "TAG_WIDTH" -> CORE_TAG_WIDTH + "TAG_WIDTH" -> CORE_TAG_WIDTH, + "CORE_OUT_REG" -> CORE_OUT_REG, + "MEM_OUT_REG" -> MEM_OUT_REG, // Although VX_cache_top exposes it as a parameter, MEM_TAG_WIDTH is // not really configurable -- it is set to be a concatenation of the // MSHR id and cache bank id. Instead of trying to configure it from @@ -435,7 +442,7 @@ class VX_cache_top( def memTagWidth(mshrSize: Int, numBanks: Int): Int = log2Ceil(mshrSize) + log2Ceil(numBanks) - val MEM_TAG_WIDTH = memTagWidth(MSHR_SIZE, NUM_BANKS) + val MEM_TAG_WIDTH = memTagWidth(MSHR_SIZE, 1/* NUM_BANKS */) val io = IO(new Bundle { val clk = Input(Clock()) From 2bdaf3a0a866b614a6afa42978de3aa5b8204792 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 28 Nov 2023 22:49:48 -0800 Subject: [PATCH 24/36] Fix undefined {MEM,WORD}_ADDR_SIZE --- src/main/scala/rocket/VortexBank.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexBank.scala index 0443ad4..5d82c0e 100644 --- a/src/main/scala/rocket/VortexBank.scala +++ b/src/main/scala/rocket/VortexBank.scala @@ -444,6 +444,11 @@ class VX_cache_top( log2Ceil(mshrSize) + log2Ceil(numBanks) val MEM_TAG_WIDTH = memTagWidth(MSHR_SIZE, 1/* NUM_BANKS */) + // These logic is fixed in VX_cache_define.vh + val memAddrWidth = 32 // FIXME hardcoded + val cacheWordAddrWidth = 32 - log2Ceil(WORD_SIZE) + val cacheMemAddrWidth = 32 - log2Ceil(CACHE_LINE_SIZE) + val io = IO(new Bundle { val clk = Input(Clock()) val reset = Input(Reset()) @@ -452,7 +457,7 @@ class VX_cache_top( val core_req_valid = Input(Bool()) val core_req_rw = Input(Bool()) val core_req_byteen = Input(UInt(WORD_SIZE.W)) - val core_req_addr = Input(UInt(WORD_ADDR_WIDTH.W)) + val core_req_addr = Input(UInt(cacheWordAddrWidth.W)) val core_req_data = Input(UInt((WORD_SIZE * 8).W)) val core_req_tag = Input(UInt(CORE_TAG_WIDTH.W)) val core_req_ready = Output(Bool()) @@ -466,7 +471,7 @@ class VX_cache_top( val mem_req_valid = Output(Bool()) val mem_req_rw = Output(Bool()) val mem_req_byteen = Output(UInt(CACHE_LINE_SIZE.W)) - val mem_req_addr = Output(UInt(MEM_ADDR_WIDTH.W)) + val mem_req_addr = Output(UInt(cacheMemAddrWidth.W)) val mem_req_data = Output(UInt((CACHE_LINE_SIZE * 8).W)) val mem_req_tag = Output(UInt(MEM_TAG_WIDTH.W)) val mem_req_ready = Input(Bool()) From 4eb9973b2c90d2b4b4cf168fb5a8c12986f4151c Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Wed, 29 Nov 2023 15:13:17 -0800 Subject: [PATCH 25/36] Attempt to replicate bitwidth logic for dmem/imem tag --- src/main/scala/rocket/VortexCore.scala | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index f956523..efb4c3e 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -40,14 +40,20 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val reset_vector = Input(UInt(resetVectorLen.W)) val interrupts = Input(new CoreInterrupts()) + // TODO: parametrize + val NW_WIDTH = 1 + val uuidWidth = 44 + val imemTagWidth = uuidWidth + NW_WIDTH + val dmemTagWidth = 46 // FIXME: hardcoded; see gpu_pkg.sv + // conditionally instantiate ports depending on whether we want to use VX_cache or not val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { - val a = Decoupled(new VortexBundleA(tagWidth = 46, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(tagWidth = 46, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(tagWidth = imemTagWidth, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = imemTagWidth, dataWidth = 32))) })) else None val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA(tagWidth = 46, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(tagWidth = 46, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(tagWidth = dmemTagWidth, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32))) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { val a = Decoupled(new VortexBundleA(tagWidth = 15, dataWidth = 128)) From 287910880413f406d49922a4b68894610f5c709b Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Fri, 1 Dec 2023 19:01:06 -0800 Subject: [PATCH 26/36] Accept coalescer enable at WithCoalescer config --- src/main/scala/tile/VortexTile.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index f978b26..74619fe 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -262,7 +262,7 @@ class VortexTile private ( val coalescerNode = p(CoalescerKey) match { case Some(coalescerParam) => { val coal = LazyModule( - new CoalescingUnit(coalescerParam.copy(enable = true)) + new CoalescingUnit(coalescerParam) ) coal.cpuNode :=* dmemAggregateNode coal.aggregateNode // N+1 lanes @@ -276,10 +276,10 @@ class VortexTile private ( println( s"============ Using Vortex L1 cache =================" ) - require( - p(CoalescerKey).isDefined, - "Vortex L1 configuration currently only works when coalescer is also enabled." - ) + // require( + // p(CoalescerKey).isDefined, + // "Vortex L1 configuration currently only works when coalescer is also enabled." + // ) val icache = LazyModule(new VortexL1Cache(vortexL1Config)) val dcache = LazyModule(new VortexL1Cache(vortexL1Config)) From efac9b7d0b9e5bb45a80db0bbf0420598de68989 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 10 Dec 2023 05:58:00 -0800 Subject: [PATCH 27/36] Better logic for {imem,dmem}TagWidth --- src/main/scala/rocket/VortexCore.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index efb4c3e..06f7055 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -41,10 +41,12 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val interrupts = Input(new CoreInterrupts()) // TODO: parametrize - val NW_WIDTH = 1 - val uuidWidth = 44 - val imemTagWidth = uuidWidth + NW_WIDTH - val dmemTagWidth = 46 // FIXME: hardcoded; see gpu_pkg.sv + val numWarps = 4 + val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps)) + val UUID_WIDTH = 44 + val imemTagWidth = UUID_WIDTH + NW_WIDTH + val LSUQ_TAG_BITS = 4 + val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS // conditionally instantiate ports depending on whether we want to use VX_cache or not val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { From 65446946be5a2050e5b20eda09f62d865a2faabd Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 10 Dec 2023 05:58:21 -0800 Subject: [PATCH 28/36] Bump vortex --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index 5825680..c2cba37 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit 5825680303c0bb2bddc475b83e40f2d11d1178a2 +Subproject commit c2cba3728a061c76d95123e5b264ee9592683cab From cb2bc8cc0aa67d9da54e6d976c0ddd29058dde8e Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 1 Jan 2024 00:08:25 -0800 Subject: [PATCH 29/36] Rename VortexBank -> VortexCache --- src/main/scala/rocket/{VortexBank.scala => VortexCache.scala} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/scala/rocket/{VortexBank.scala => VortexCache.scala} (100%) diff --git a/src/main/scala/rocket/VortexBank.scala b/src/main/scala/rocket/VortexCache.scala similarity index 100% rename from src/main/scala/rocket/VortexBank.scala rename to src/main/scala/rocket/VortexCache.scala From 15c3c55cb63a336d297c5c7b98ee7a67479e13b7 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 1 Jan 2024 00:46:01 -0800 Subject: [PATCH 30/36] Make empty sharedmem diplomacy nodes --- src/main/scala/rocket/VortexCore.scala | 4 ++-- src/main/scala/tile/VortexTile.scala | 33 ++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 06f7055..75195f7 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -103,6 +103,7 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) // addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v") // addResource("/vsrc/vortex/hw/syn/modelsim/vortex_tb.v") + addResource("/vsrc/vortex/hw/rtl/VX_gpu_pkg.sv") // addResource("/vsrc/vortex/hw/rtl/VX_cluster.sv") @@ -164,14 +165,13 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) // addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_arb.sv") // addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_bus_if.sv") // addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_unit.sv") - // Only used for caches + // mem_arb is used in VX_socket or VX_cache_cluster // addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_arb.sv") addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_bus_if.sv") // addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_perf_if.sv") addResource("/vsrc/vortex/hw/rtl/mem/VX_shared_mem.sv") addResource("/vsrc/vortex/hw/rtl/mem/VX_smem_switch.sv") - // tex_unit missing in Vortex 2.0 // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv") // addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv") diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 74619fe..e10e88e 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -187,6 +187,8 @@ class VortexTile private ( "We recommend setting nSrcIds to at least 16." ) + val smemSourceWidth = 4 // FIXME: hardcoded + val imemNodes = Seq.tabulate(1) { i => TLClientNode( Seq( @@ -228,6 +230,30 @@ class VortexTile private ( ) ) } + + val smemNodes = Seq.tabulate(numLanes) { i => + TLClientNode( + Seq( + TLMasterPortParameters.v1( + clients = Seq( + TLMasterParameters.v1( + sourceId = IdRange(0, 1 << smemSourceWidth), + name = s"Vortex Core ${vortexParams.hartId} SharedMem Lane $i", + requestFifo = true, + supportsProbe = + TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsPutFull = + TransferSizes(1, lazyCoreParamsView.coreDataBytes), + supportsPutPartial = + TransferSizes(1, lazyCoreParamsView.coreDataBytes) + ) + ) + ) + ) + ) + } + // combine outgoing per-lane dmemNode into 1 idenity node // // NOTE: We need TLWidthWidget here because there might be a data width @@ -298,6 +324,13 @@ class VortexTile private ( } } + // Instantiate sharedmem + // TODO: parametrize + val sharedmem = LazyModule(new TLRAM(AddressSet(0xff000000L, 0x00ffffffL), beatBytes = 4 /*FIXME*/)) + val smemXbar = LazyModule(new TLXbar) + smemNodes.foreach(smemXbar.node := _) + sharedmem.node :=* smemXbar.node + if (vortexParams.useVxCache) { tlMasterXbar.node := TLWidthWidget(16) := memNode } else { From 95e05f54576f12de9b182adc196b40a7af5c06bb Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 1 Jan 2024 02:24:57 -0800 Subject: [PATCH 31/36] Connect smem core IO to TL with translation --- src/main/scala/rocket/VortexCore.scala | 10 +- src/main/scala/tile/VortexTile.scala | 199 ++++++++++++++----------- 2 files changed, 123 insertions(+), 86 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 75195f7..6e93633 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -47,6 +47,8 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val imemTagWidth = UUID_WIDTH + NW_WIDTH val LSUQ_TAG_BITS = 4 val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS + // dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH + val smemTagWidth = dmemTagWidth // conditionally instantiate ports depending on whether we want to use VX_cache or not val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { @@ -57,6 +59,10 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val a = Decoupled(new VortexBundleA(tagWidth = dmemTagWidth, dataWidth = 32)) val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32))) })) else None + val smem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { + val a = Decoupled(new VortexBundleA(tagWidth = smemTagWidth, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = smemTagWidth, dataWidth = 32))) + })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { val a = Decoupled(new VortexBundleA(tagWidth = 15, dataWidth = 128)) val d = Flipped(Decoupled(new VortexBundleD(tagWidth = 15, dataWidth = 128))) @@ -103,7 +109,6 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) // addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v") // addResource("/vsrc/vortex/hw/syn/modelsim/vortex_tb.v") - addResource("/vsrc/vortex/hw/rtl/VX_gpu_pkg.sv") // addResource("/vsrc/vortex/hw/rtl/VX_cluster.sv") @@ -341,6 +346,5 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) } val nTotalRoCCCSRs = 0 - val coreBundle = new VortexBundle(tile) - val io = IO(coreBundle) + val io = IO(new VortexBundle(tile)) } diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index e10e88e..03ad530 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -259,9 +259,8 @@ class VortexTile private ( // NOTE: We need TLWidthWidget here because there might be a data width // mismatch between Vortex's per-lane response and the system bus when we // don't instantiate either L1 or the coalescer. This _should_ be optimized - // out when we instantiate coalescer which should handle data width conversion - // internally (which it does by... using TLWidthWidget), but probably not - // the cleanest way to do this. + // out when we instantiate either which should handle data width conversion + // internally (which it does by... using TLWidthWidget). val dmemAggregateNode = TLIdentityNode() dmemNodes.foreach { dmemAggregateNode := TLWidthWidget(4) := _ } @@ -326,7 +325,8 @@ class VortexTile private ( // Instantiate sharedmem // TODO: parametrize - val sharedmem = LazyModule(new TLRAM(AddressSet(0xff000000L, 0x00ffffffL), beatBytes = 4 /*FIXME*/)) + // FIXME: beatBytes should be wordSize + val sharedmem = LazyModule(new TLRAM(AddressSet(0xff000000L, 0x00ffffffL), beatBytes = 4)) val smemXbar = LazyModule(new TLXbar) smemNodes.foreach(smemXbar.node := _) sharedmem.node :=* smemXbar.node @@ -492,95 +492,128 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { outer.memNode.out(0)._1.a <> memTLAdapter.io.outReq memTLAdapter.io.outResp <> outer.memNode.out(0)._1.d } else { - val imemTLAdapter = Module( - new VortexTLAdapter( - outer.imemSourceWidth, - chiselTypeOf(core.io.imem.get(0).a.bits), - chiselTypeOf(core.io.imem.get(0).d.bits), - outer.imemNodes.head.out.head - ) - ) - // TODO: make imemNodes not a vector - imemTLAdapter.io.inReq <> core.io.imem.get(0).a - core.io.imem.get(0).d <> imemTLAdapter.io.inResp - outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq - imemTLAdapter.io.outResp <> outer.imemNodes(0).out(0)._1.d - - // @perf: this would duplicate SourceGenerator table for every lane and eat - // up some area - val dmemTLBundles = outer.dmemNodes.map(_.out.head._1) - val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ => - Module( + def connectImem = { + val imemTLAdapter = Module( new VortexTLAdapter( - outer.dmemSourceWidth, - chiselTypeOf(core.io.dmem.get(0).a.bits), - chiselTypeOf(core.io.dmem.get(0).d.bits), - outer.dmemNodes(0).out.head + outer.imemSourceWidth, + chiselTypeOf(core.io.imem.get(0).a.bits), + chiselTypeOf(core.io.imem.get(0).d.bits), + outer.imemNodes.head.out.head ) ) + // TODO: make imemNodes not a vector + imemTLAdapter.io.inReq <> core.io.imem.get(0).a + core.io.imem.get(0).d <> imemTLAdapter.io.inResp + outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq + imemTLAdapter.io.outResp <> outer.imemNodes(0).out(0)._1.d } - // Since the individual per-lane TL requests might come back out-of-sync between - // the lanes, but Vortex core expects the per-lane responses to be synced, - // we need to selectively fire responses that have the same source, and - // delay others. - // - // In order to do that, we pick a source from one of the valid lanes using e.g. - // an arbiter. Then using the chosen source id, we - // - lie to core that response is not valid if source doesn't match picked, and - // - lie to downstream that core is not ready if source doesn't match picked. - // - // Note that we cannot do this filtering logic using TileLink source ID, because - // we're allocating source for each lane independently. In that case, it's - // possible that lane 0's source matches lane 1/2/3's source by chance, - // even when they originated from different warps. Using Vortex's dcache req tag - // solves this issue because they use a UUID that is unique across all requests - // in the program. - // - // TODO: A cleaner solution would be to simply do a synchronized allocation - // of a same source id for all lanes. - val arb = Module( - new RRArbiter( - core.io.dmem.get.head.d.bits.source.cloneType, - outer.numLanes + def connectDmem = { + // @perf: this would duplicate SourceGenerator table for every lane and eat + // up some area + val dmemTLBundles = outer.dmemNodes.map(_.out.head._1) + val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ => + Module( + new VortexTLAdapter( + outer.dmemSourceWidth, + chiselTypeOf(core.io.dmem.get(0).a.bits), + chiselTypeOf(core.io.dmem.get(0).d.bits), + outer.dmemNodes(0).out.head + ) + ) + } + + // Since the individual per-lane TL requests might come back out-of-sync between + // the lanes, but Vortex core expects the per-lane responses to be synced, + // we need to selectively fire responses that have the same source, and + // delay others. + // + // In order to do that, we pick a source from one of the valid lanes using e.g. + // an arbiter. Then using the chosen source id, we + // - lie to core that response is not valid if source doesn't match picked, and + // - lie to downstream that core is not ready if source doesn't match picked. + // + // Note that we cannot do this filtering logic using TileLink source ID, because + // we're allocating source for each lane independently. In that case, it's + // possible that lane 0's source matches lane 1/2/3's source by chance, + // even when they originated from different warps. Using Vortex's dcache req tag + // solves this issue because they use a UUID that is unique across all requests + // in the program. + // + // TODO: A cleaner solution would be to simply do a synchronized allocation + // of a same source id for all lanes. + val arb = Module( + new RRArbiter( + core.io.dmem.get.head.d.bits.source.cloneType, + outer.numLanes + ) ) - ) - arb.io.out.ready := true.B - val dmemBundles = dmemTLAdapters.map(_.io.inResp) - (arb.io.in zip dmemBundles).foreach { case (arbIn, vxDmem) => - arbIn.valid := vxDmem.valid - arbIn.bits := vxDmem.bits.source - } - val matchingSources = Wire(UInt(outer.numLanes.W)) - matchingSources := dmemBundles - .map(b => - // If there is no valid response pending across all lanes, - // matchingSources should not filter out upstream ready signals, so - // set it to all-1 - !arb.io.out.valid || (b.bits.source === arb.io.out.bits) - ) - .asUInt + arb.io.out.ready := true.B + val dmemBundles = dmemTLAdapters.map(_.io.inResp) + (arb.io.in zip dmemBundles).foreach { case (arbIn, vxDmem) => + arbIn.valid := vxDmem.valid + arbIn.bits := vxDmem.bits.source + } + val matchingSources = Wire(UInt(outer.numLanes.W)) + matchingSources := dmemBundles + .map(b => + // If there is no valid response pending across all lanes, + // matchingSources should not filter out upstream ready signals, so + // set it to all-1 + !arb.io.out.valid || (b.bits.source === arb.io.out.bits) + ) + .asUInt - // make connection: - // VortexBundle <--> sourceId filter <--> VortexTLAdapter <--> dmemNodes - (core.io.dmem.get zip dmemTLAdapters) foreach { case (coreMem, tlAdapter) => - tlAdapter.io.inReq <> coreMem.a - coreMem.d <> tlAdapter.io.inResp - } - (core.io.dmem.get zip dmemTLAdapters).zipWithIndex.foreach { - case ((coreMem, tlAdapter), i) => - coreMem.d.valid := tlAdapter.io.inResp.valid && matchingSources(i) - tlAdapter.io.inResp.ready := coreMem.d.ready && matchingSources(i) - } - (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) => - tlOut.a <> tlAdapter.io.outReq - tlAdapter.io.outResp <> tlOut.d + // make connection: + // VortexBundle <--> sourceId filter <--> VortexTLAdapter <--> dmemNodes + (core.io.dmem.get zip dmemTLAdapters) foreach { case (coreMem, tlAdapter) => + tlAdapter.io.inReq <> coreMem.a + coreMem.d <> tlAdapter.io.inResp + } + // override response channel with matchingSources + (core.io.dmem.get zip dmemTLAdapters).zipWithIndex.foreach { + case ((coreMem, tlAdapter), i) => + coreMem.d.valid := tlAdapter.io.inResp.valid && matchingSources(i) + tlAdapter.io.inResp.ready := coreMem.d.ready && matchingSources(i) + } + (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) => + tlOut.a <> tlAdapter.io.outReq + tlAdapter.io.outResp <> tlOut.d + } + + outer.dmemAggregateNode.out.foreach { bo => + dontTouch(bo._1.a) + dontTouch(bo._1.d) + } } - outer.dmemAggregateNode.out.foreach { bo => - dontTouch(bo._1.a) - dontTouch(bo._1.d) + def connectSmem = { + // @perf: this would duplicate SourceGenerator table for every lane and eat + // up some area + val smemTLBundles = outer.smemNodes.map(_.out.head._1) + val smemTLAdapters = Seq.tabulate(outer.numLanes) { _ => + Module( + new VortexTLAdapter( + outer.smemSourceWidth, + chiselTypeOf(core.io.smem.get(0).a.bits), + chiselTypeOf(core.io.smem.get(0).d.bits), + outer.smemNodes(0).out.head + ) + ) + } + (core.io.smem.get zip smemTLAdapters) foreach { case (coreMem, tlAdapter) => + tlAdapter.io.inReq <> coreMem.a + coreMem.d <> tlAdapter.io.inResp + } + (smemTLAdapters zip smemTLBundles) foreach { case (tlAdapter, tlOut) => + tlOut.a <> tlAdapter.io.outReq + tlAdapter.io.outResp <> tlOut.d + } } + + connectImem + connectDmem + connectSmem } // TODO: generalize for useVxCache From 8c12c7af16d0da14a7f7081982e3c372114c19bc Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 1 Jan 2024 12:49:23 -0800 Subject: [PATCH 32/36] Instantiate multiple TLRAMs as sharedmem banks --- src/main/scala/rocket/VortexCache.scala | 2 +- src/main/scala/tile/VortexTile.scala | 41 ++++++++++++++++--------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/main/scala/rocket/VortexCache.scala b/src/main/scala/rocket/VortexCache.scala index 5d82c0e..cb59115 100644 --- a/src/main/scala/rocket/VortexCache.scala +++ b/src/main/scala/rocket/VortexCache.scala @@ -139,8 +139,8 @@ class VortexBank( // suppose have 4 bank // base for bank 1: ...000000|01|0000 // mask for bank 1; 111111|00|1111 - val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize) val base = 0x00000000L | (bankId * config.wordSize) + val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize) val excludeSets = config.uncachedAddrSets var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask)) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 03ad530..5dd2aa0 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -323,13 +323,22 @@ class VortexTile private ( } } - // Instantiate sharedmem + // Instantiate sharedmem banks + // + // Instantiate the same number of banks as there are lanes. // TODO: parametrize - // FIXME: beatBytes should be wordSize - val sharedmem = LazyModule(new TLRAM(AddressSet(0xff000000L, 0x00ffffffL), beatBytes = 4)) + val smemBanks = Seq.tabulate(numLanes) { bankId => + // Banked-by-word (4 bytes) + // base for bank 1: ff...000000|01|00 + // mask for bank 1; 00...111111|00|11 + val base = 0xff000000L | (bankId * 4 /*wordSize*/ ) + val mask = 0x00ffffffL ^ ((numLanes - 1) * 4 /*wordSize*/ ) + LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = 4 /*wordSize*/ )) + } + // smem lanes-to-banks crossbar val smemXbar = LazyModule(new TLXbar) smemNodes.foreach(smemXbar.node := _) - sharedmem.node :=* smemXbar.node + smemBanks.foreach(_.node := smemXbar.node) if (vortexParams.useVxCache) { tlMasterXbar.node := TLWidthWidget(16) := memNode @@ -557,18 +566,19 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { val matchingSources = Wire(UInt(outer.numLanes.W)) matchingSources := dmemBundles .map(b => - // If there is no valid response pending across all lanes, - // matchingSources should not filter out upstream ready signals, so - // set it to all-1 - !arb.io.out.valid || (b.bits.source === arb.io.out.bits) - ) + // If there is no valid response pending across all lanes, + // matchingSources should not filter out upstream ready signals, so + // set it to all-1 + !arb.io.out.valid || (b.bits.source === arb.io.out.bits) + ) .asUInt // make connection: // VortexBundle <--> sourceId filter <--> VortexTLAdapter <--> dmemNodes - (core.io.dmem.get zip dmemTLAdapters) foreach { case (coreMem, tlAdapter) => - tlAdapter.io.inReq <> coreMem.a - coreMem.d <> tlAdapter.io.inResp + (core.io.dmem.get zip dmemTLAdapters) foreach { + case (coreMem, tlAdapter) => + tlAdapter.io.inReq <> coreMem.a + coreMem.d <> tlAdapter.io.inResp } // override response channel with matchingSources (core.io.dmem.get zip dmemTLAdapters).zipWithIndex.foreach { @@ -601,9 +611,10 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { ) ) } - (core.io.smem.get zip smemTLAdapters) foreach { case (coreMem, tlAdapter) => - tlAdapter.io.inReq <> coreMem.a - coreMem.d <> tlAdapter.io.inResp + (core.io.smem.get zip smemTLAdapters) foreach { + case (coreMem, tlAdapter) => + tlAdapter.io.inReq <> coreMem.a + coreMem.d <> tlAdapter.io.inResp } (smemTLAdapters zip smemTLBundles) foreach { case (tlAdapter, tlOut) => tlOut.a <> tlAdapter.io.outReq From 773cfcbd6e6fcd1bbcbcce6b63db503fe1014bd0 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 1 Jan 2024 14:27:37 -0800 Subject: [PATCH 33/36] Bump vortex for external smem --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index c2cba37..06fed1e 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit c2cba3728a061c76d95123e5b264ee9592683cab +Subproject commit 06fed1e437db1a9d95cc8b9ac40909118eae7f3f From 60cd72a9d6ced562372e6221aa2dec1d86a0d7f7 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 4 Jan 2024 00:17:00 -0800 Subject: [PATCH 34/36] Flatten dmem bundle of Vortex core IO into 1-D arrays --- src/main/scala/rocket/VortexCore.scala | 44 ++++++++++++++-------- src/main/scala/tile/VortexTile.scala | 52 ++++++++++++++++++++------ 2 files changed, 69 insertions(+), 27 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 6e93633..5f4caa5 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -40,28 +40,18 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val reset_vector = Input(UInt(resetVectorLen.W)) val interrupts = Input(new CoreInterrupts()) - // TODO: parametrize - val numWarps = 4 - val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps)) - val UUID_WIDTH = 44 - val imemTagWidth = UUID_WIDTH + NW_WIDTH - val LSUQ_TAG_BITS = 4 - val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS - // dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH - val smemTagWidth = dmemTagWidth - // conditionally instantiate ports depending on whether we want to use VX_cache or not val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { - val a = Decoupled(new VortexBundleA(tagWidth = imemTagWidth, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(tagWidth = imemTagWidth, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(tagWidth = tile.imemTagWidth, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.imemTagWidth, dataWidth = 32))) })) else None val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA(tagWidth = dmemTagWidth, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32))) + // val a = Decoupled(new VortexBundleA(tagWidth = tile.dmemTagWidth, dataWidth = 32)) + // val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32))) })) else None val smem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA(tagWidth = smemTagWidth, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(tagWidth = smemTagWidth, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(tagWidth = tile.smemTagWidth, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.smemTagWidth, dataWidth = 32))) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { val a = Decoupled(new VortexBundleA(tagWidth = 15, dataWidth = 128)) @@ -70,6 +60,26 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle // val d = Flipped(tile.memNode.out.head._1.d.cloneType) }) else None + // Chisel doesn't support 2-D array in BlackBox interface to Verilog, so + // everything needs to be 1-D flattened UInt with their widths configurable by numLanes. + // + // FIXME: hardcoded bitwidths + val dmem_a_ready = Input(UInt((tile.numLanes * 1).W)) + val dmem_a_valid = Output(UInt((tile.numLanes * 1).W)) + val dmem_a_bits_opcode = Output(UInt((tile.numLanes * 3).W)) + val dmem_a_bits_size = Output(UInt((tile.numLanes * 4).W)) + val dmem_a_bits_source = Output(UInt((tile.numLanes * tile.dmemTagWidth).W)) + val dmem_a_bits_address = Output(UInt((tile.numLanes * 32).W)) + val dmem_a_bits_mask = Output(UInt((tile.numLanes * 4).W)) + val dmem_a_bits_data = Output(UInt((tile.numLanes * 32).W)) + + val dmem_d_valid = Input(UInt((tile.numLanes * 1).W)) + val dmem_d_bits_opcode = Input(UInt((tile.numLanes * 3).W)) + val dmem_d_bits_size = Input(UInt((tile.numLanes * 4).W)) + val dmem_d_bits_source = Input(UInt((tile.numLanes * tile.dmemTagWidth).W)) + val dmem_d_bits_data = Input(UInt((tile.numLanes * 32).W)) + val dmem_d_ready = Output(UInt((tile.numLanes * 1).W)) + // val fpu = Flipped(new FPUCoreIO()) //val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs)) //val trace = Output(new TraceBundle) @@ -88,6 +98,7 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) "CORE_ID" -> tile.tileParams.hartId, // TODO: can we get this as a parameter? "BOOTROM_HANG100" -> 0x10100, + "NUM_THREADS" -> tile.numLanes ) ) with HasBlackBoxResource { @@ -109,6 +120,7 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) // addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v") // addResource("/vsrc/vortex/hw/syn/modelsim/vortex_tb.v") + addResource("/vsrc/vortex/hw/rtl/VX_gpu_pkg.sv") // addResource("/vsrc/vortex/hw/rtl/VX_cluster.sv") diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 5dd2aa0..482ba7e 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -189,6 +189,16 @@ class VortexTile private ( val smemSourceWidth = 4 // FIXME: hardcoded + // TODO: parametrize + val numWarps = 4 + val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps)) + val UUID_WIDTH = 44 + val imemTagWidth = UUID_WIDTH + NW_WIDTH + val LSUQ_TAG_BITS = 4 + val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS + // dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH + val smemTagWidth = dmemTagWidth + val imemNodes = Seq.tabulate(1) { i => TLClientNode( Seq( @@ -525,8 +535,8 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { Module( new VortexTLAdapter( outer.dmemSourceWidth, - chiselTypeOf(core.io.dmem.get(0).a.bits), - chiselTypeOf(core.io.dmem.get(0).d.bits), + new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32), + new VortexBundleD(tagWidth = outer.dmemTagWidth, dataWidth = 32), outer.dmemNodes(0).out.head ) ) @@ -553,7 +563,8 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // of a same source id for all lanes. val arb = Module( new RRArbiter( - core.io.dmem.get.head.d.bits.source.cloneType, + // FIXME: should really be source on D channel + new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32).source.cloneType, outer.numLanes ) ) @@ -575,17 +586,36 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // make connection: // VortexBundle <--> sourceId filter <--> VortexTLAdapter <--> dmemNodes - (core.io.dmem.get zip dmemTLAdapters) foreach { - case (coreMem, tlAdapter) => - tlAdapter.io.inReq <> coreMem.a - coreMem.d <> tlAdapter.io.inResp + // + // Chisel doesn't support 2-D array in BlackBox interface to Verilog, so + // need to flatten everything. + dmemTLAdapters.zipWithIndex.foreach { + case (tlAdapter, i) => + // tlAdapter.io.inReq <> coreMem.a + tlAdapter.io.inReq.valid := core.io.dmem_a_valid(i) + tlAdapter.io.inReq.bits.opcode := core.io.dmem_a_bits_opcode(3 * (i + 1) - 1, 3 * i) + tlAdapter.io.inReq.bits.size := core.io.dmem_a_bits_size(4 * (i + 1) - 1, 4 * i) + tlAdapter.io.inReq.bits.source := core.io.dmem_a_bits_source(outer.dmemTagWidth * (i + 1) - 1, outer.dmemTagWidth * i) + tlAdapter.io.inReq.bits.address := core.io.dmem_a_bits_address(32 * (i + 1) - 1, 32 * i) + tlAdapter.io.inReq.bits.mask := core.io.dmem_a_bits_mask(4 * (i + 1) - 1, 4 * i) + tlAdapter.io.inReq.bits.data := core.io.dmem_a_bits_data(32 * (i + 1) - 1, 32 * i) } + core.io.dmem_a_ready := dmemTLAdapters.map(_.io.inReq.ready).asUInt + + core.io.dmem_d_valid := dmemTLAdapters.map(_.io.inResp.valid).asUInt + core.io.dmem_d_bits_opcode := dmemTLAdapters.map(_.io.inResp.bits.opcode).asUInt + core.io.dmem_d_bits_size := dmemTLAdapters.map(_.io.inResp.bits.size).asUInt + core.io.dmem_d_bits_source := dmemTLAdapters.map(_.io.inResp.bits.source).asUInt + core.io.dmem_d_bits_data := dmemTLAdapters.map(_.io.inResp.bits.data).asUInt + // override response channel with matchingSources - (core.io.dmem.get zip dmemTLAdapters).zipWithIndex.foreach { - case ((coreMem, tlAdapter), i) => - coreMem.d.valid := tlAdapter.io.inResp.valid && matchingSources(i) - tlAdapter.io.inResp.ready := coreMem.d.ready && matchingSources(i) + val dmem_d_valid_vec = Wire(Vec(outer.numLanes, Bool())) + dmemTLAdapters.zipWithIndex.foreach { + case (tlAdapter, i) => + dmem_d_valid_vec(i) := tlAdapter.io.inResp.valid && matchingSources(i) + tlAdapter.io.inResp.ready := core.io.dmem_d_ready(i) && matchingSources(i) } + core.io.dmem_d_valid := dmem_d_valid_vec.asUInt (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) => tlOut.a <> tlAdapter.io.outReq tlAdapter.io.outResp <> tlOut.d From 51e17e709bc251f90b52d6249370287e4135672a Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 4 Jan 2024 00:53:18 -0800 Subject: [PATCH 35/36] Flatten smem bundle of Vortex core IO into 1-D arrays --- src/main/scala/rocket/VortexCore.scala | 20 ++++++++++++++-- src/main/scala/tile/VortexTile.scala | 32 +++++++++++++++++++++----- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 5f4caa5..54518f5 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -50,8 +50,8 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle // val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32))) })) else None val smem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA(tagWidth = tile.smemTagWidth, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.smemTagWidth, dataWidth = 32))) + // val a = Decoupled(new VortexBundleA(tagWidth = tile.smemTagWidth, dataWidth = 32)) + // val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.smemTagWidth, dataWidth = 32))) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { val a = Decoupled(new VortexBundleA(tagWidth = 15, dataWidth = 128)) @@ -80,6 +80,22 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val dmem_d_bits_data = Input(UInt((tile.numLanes * 32).W)) val dmem_d_ready = Output(UInt((tile.numLanes * 1).W)) + val smem_a_ready = Input(UInt((tile.numLanes * 1).W)) + val smem_a_valid = Output(UInt((tile.numLanes * 1).W)) + val smem_a_bits_opcode = Output(UInt((tile.numLanes * 3).W)) + val smem_a_bits_size = Output(UInt((tile.numLanes * 4).W)) + val smem_a_bits_source = Output(UInt((tile.numLanes * tile.smemTagWidth).W)) + val smem_a_bits_address = Output(UInt((tile.numLanes * 32).W)) + val smem_a_bits_mask = Output(UInt((tile.numLanes * 4).W)) + val smem_a_bits_data = Output(UInt((tile.numLanes * 32).W)) + + val smem_d_valid = Input(UInt((tile.numLanes * 1).W)) + val smem_d_bits_opcode = Input(UInt((tile.numLanes * 3).W)) + val smem_d_bits_size = Input(UInt((tile.numLanes * 4).W)) + val smem_d_bits_source = Input(UInt((tile.numLanes * tile.smemTagWidth).W)) + val smem_d_bits_data = Input(UInt((tile.numLanes * 32).W)) + val smem_d_ready = Output(UInt((tile.numLanes * 1).W)) + // val fpu = Flipped(new FPUCoreIO()) //val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs)) //val trace = Output(new TraceBundle) diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 482ba7e..8863ea0 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -616,6 +616,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { tlAdapter.io.inResp.ready := core.io.dmem_d_ready(i) && matchingSources(i) } core.io.dmem_d_valid := dmem_d_valid_vec.asUInt + (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) => tlOut.a <> tlAdapter.io.outReq tlAdapter.io.outResp <> tlOut.d @@ -635,17 +636,36 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { Module( new VortexTLAdapter( outer.smemSourceWidth, - chiselTypeOf(core.io.smem.get(0).a.bits), - chiselTypeOf(core.io.smem.get(0).d.bits), + new VortexBundleA(tagWidth = outer.smemTagWidth, dataWidth = 32), + new VortexBundleD(tagWidth = outer.smemTagWidth, dataWidth = 32), outer.smemNodes(0).out.head ) ) } - (core.io.smem.get zip smemTLAdapters) foreach { - case (coreMem, tlAdapter) => - tlAdapter.io.inReq <> coreMem.a - coreMem.d <> tlAdapter.io.inResp + + smemTLAdapters.zipWithIndex.foreach { + case (tlAdapter, i) => + // tlAdapter.io.inReq <> coreMem.a + tlAdapter.io.inReq.valid := core.io.smem_a_valid(i) + tlAdapter.io.inReq.bits.opcode := core.io.smem_a_bits_opcode(3 * (i + 1) - 1, 3 * i) + tlAdapter.io.inReq.bits.size := core.io.smem_a_bits_size(4 * (i + 1) - 1, 4 * i) + tlAdapter.io.inReq.bits.source := core.io.smem_a_bits_source(outer.smemTagWidth * (i + 1) - 1, outer.smemTagWidth * i) + tlAdapter.io.inReq.bits.address := core.io.smem_a_bits_address(32 * (i + 1) - 1, 32 * i) + tlAdapter.io.inReq.bits.mask := core.io.smem_a_bits_mask(4 * (i + 1) - 1, 4 * i) + tlAdapter.io.inReq.bits.data := core.io.smem_a_bits_data(32 * (i + 1) - 1, 32 * i) } + core.io.smem_a_ready := smemTLAdapters.map(_.io.inReq.ready).asUInt + + core.io.smem_d_valid := smemTLAdapters.map(_.io.inResp.valid).asUInt + core.io.smem_d_bits_opcode := smemTLAdapters.map(_.io.inResp.bits.opcode).asUInt + core.io.smem_d_bits_size := smemTLAdapters.map(_.io.inResp.bits.size).asUInt + core.io.smem_d_bits_source := smemTLAdapters.map(_.io.inResp.bits.source).asUInt + core.io.smem_d_bits_data := smemTLAdapters.map(_.io.inResp.bits.data).asUInt + smemTLAdapters.zipWithIndex.foreach { + case (tlAdapter, i) => + tlAdapter.io.inResp.ready := core.io.smem_d_ready(i) + } + (smemTLAdapters zip smemTLBundles) foreach { case (tlAdapter, tlOut) => tlOut.a <> tlAdapter.io.outReq tlAdapter.io.outResp <> tlOut.d From 9e1ddfaeb97b2221472c183d5a481ef89dab333c Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 4 Jan 2024 01:35:30 -0800 Subject: [PATCH 36/36] Bump vortex with IO flattening --- src/main/resources/vsrc/vortex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/vsrc/vortex b/src/main/resources/vsrc/vortex index 06fed1e..62171c0 160000 --- a/src/main/resources/vsrc/vortex +++ b/src/main/resources/vsrc/vortex @@ -1 +1 @@ -Subproject commit 06fed1e437db1a9d95cc8b9ac40909118eae7f3f +Subproject commit 62171c0788b5ba0bba0fcf10f05ff75f14572bde