basic test timing + scope tracing ccip
This commit is contained in:
@@ -17,14 +17,10 @@ install:
|
||||
- ci/toolchain_install.sh -all
|
||||
- export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
|
||||
- export VERILATOR_ROOT=/opt/verilator
|
||||
- export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||
|
||||
# VORTEX
|
||||
- git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
- cd vortex
|
||||
- make -j`nproc`
|
||||
- export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||
|
||||
script:
|
||||
- make -j`nproc`
|
||||
- ci/test_runtime.sh
|
||||
- ci/test_driver.sh
|
||||
- ci/test_riscv_isa.sh
|
||||
|
||||
@@ -23,8 +23,8 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
#DEBUG=1
|
||||
#SCOPE=1
|
||||
DEBUG=1
|
||||
SCOPE=1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
|
||||
@@ -2,14 +2,17 @@ all:
|
||||
$(MAKE) -C basic
|
||||
$(MAKE) -C demo
|
||||
$(MAKE) -C dogfood
|
||||
$(MAKE) -C graphics
|
||||
|
||||
run:
|
||||
$(MAKE) -C basic run-rtlsim
|
||||
$(MAKE) -C demo run-rtlsim
|
||||
$(MAKE) -C dogfood run-rtlsim
|
||||
$(MAKE) -C graphics run-rtlsim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C basic clean
|
||||
$(MAKE) -C demo clean
|
||||
$(MAKE) -C dogfood clean
|
||||
$(MAKE) -C graphics clean
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include <chrono>
|
||||
#include "common.h"
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
@@ -68,6 +69,9 @@ uint64_t shuffle(int i, uint64_t value) {
|
||||
|
||||
int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) {
|
||||
int errors = 0;
|
||||
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
int num_blocks_8 = (64 * num_blocks) / 8;
|
||||
|
||||
// update source buffer
|
||||
@@ -85,7 +89,9 @@ int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) {
|
||||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
RT_CHECK(vx_copy_to_dev(buffer, dev_addr, 64 * num_blocks, 0));
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// clear destination buffer
|
||||
for (int i = 0; i < num_blocks_8; ++i) {
|
||||
@@ -94,7 +100,9 @@ int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) {
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
RT_CHECK(vx_copy_from_dev(buffer, dev_addr, 64 * num_blocks, 0));
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
@@ -114,6 +122,16 @@ int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
double elapsed;
|
||||
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count();
|
||||
printf("upload time: %lg ms\n", elapsed);
|
||||
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count();
|
||||
printf("download time: %lg ms\n", elapsed);
|
||||
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
printf("Total elapsed time: %lg ms\n", elapsed);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -121,6 +139,8 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t num_points) {
|
||||
int errors = 0;
|
||||
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// update source buffer
|
||||
{
|
||||
@@ -130,7 +150,9 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, buf_size, 0));
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
@@ -143,21 +165,25 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
std::cout << "start execution" << std::endl;
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
RT_CHECK(vx_start(device));
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// flush the caches
|
||||
std::cout << "flush the caches" << std::endl;
|
||||
auto t4 = std::chrono::high_resolution_clock::now();
|
||||
RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
|
||||
auto t5 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
auto t6 = std::chrono::high_resolution_clock::now();
|
||||
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
auto t7 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
@@ -176,6 +202,20 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
double elapsed;
|
||||
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count();
|
||||
printf("upload time: %lg ms\n", elapsed);
|
||||
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count();
|
||||
printf("execute time: %lg ms\n", elapsed);
|
||||
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t5 - t4).count();
|
||||
printf("flush time: %lg ms\n", elapsed);
|
||||
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t7 - t6).count();
|
||||
printf("download time: %lg ms\n", elapsed);
|
||||
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
printf("Total elapsed time: %lg ms\n", elapsed);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -196,7 +236,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
unsigned max_cores;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
uint32_t num_points = max_cores * count;
|
||||
uint32_t num_points = 1 * count;
|
||||
uint32_t num_blocks = (num_points * sizeof(uint32_t) + 63) / 64;
|
||||
uint32_t buf_size = num_blocks * 64;
|
||||
|
||||
@@ -222,9 +262,7 @@ int main(int argc, char *argv[]) {
|
||||
// run tests
|
||||
if (0 == test || -1 == test) {
|
||||
std::cout << "run memcopy test" << std::endl;
|
||||
RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, 1));
|
||||
if (num_blocks >= 4) RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, num_blocks/2));
|
||||
if (num_blocks >= 2) RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks));
|
||||
RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks));
|
||||
}
|
||||
|
||||
if (1 == test || -1 == test) {
|
||||
@@ -251,4 +289,4 @@ int main(int argc, char *argv[]) {
|
||||
std::cout << "Test PASSED" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,6 @@
|
||||
`include "platform_if.vh"
|
||||
import local_mem_cfg_pkg::*;
|
||||
`include "afu_json_info.vh"
|
||||
`include "VX_define.vh"
|
||||
`else
|
||||
`include "vortex_afu.vh"
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
@@ -992,7 +991,7 @@ end
|
||||
assign cmd_run_done = !vx_busy;
|
||||
|
||||
Vortex #() vortex (
|
||||
`SCOPE_BIND_top_vortex
|
||||
`SCOPE_BIND_afu_vortex
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset | vx_reset),
|
||||
@@ -1068,36 +1067,19 @@ end
|
||||
|
||||
`ifdef SCOPE
|
||||
|
||||
`SCOPE_ASSIGN (scope_reset, vx_reset);
|
||||
|
||||
`SCOPE_ASSIGN (scope_dram_req_valid, vx_dram_req_valid);
|
||||
`SCOPE_ASSIGN (scope_dram_req_addr, {vx_dram_req_addr, 4'b0});
|
||||
`SCOPE_ASSIGN (scope_dram_req_rw, vx_dram_req_rw);
|
||||
`SCOPE_ASSIGN (scope_dram_req_byteen,vx_dram_req_byteen);
|
||||
`SCOPE_ASSIGN (scope_dram_req_data, vx_dram_req_data);
|
||||
`SCOPE_ASSIGN (scope_dram_req_tag, vx_dram_req_tag);
|
||||
`SCOPE_ASSIGN (scope_dram_req_ready, vx_dram_req_ready);
|
||||
|
||||
`SCOPE_ASSIGN (scope_dram_rsp_valid, vx_dram_rsp_valid);
|
||||
`SCOPE_ASSIGN (scope_dram_rsp_data, vx_dram_rsp_data);
|
||||
`SCOPE_ASSIGN (scope_dram_rsp_tag, vx_dram_rsp_tag);
|
||||
`SCOPE_ASSIGN (scope_dram_rsp_ready, vx_dram_rsp_ready);
|
||||
|
||||
`SCOPE_ASSIGN (scope_snp_req_valid, vx_snp_req_valid);
|
||||
`SCOPE_ASSIGN (scope_snp_req_addr, {vx_snp_req_addr, 4'b0});
|
||||
`SCOPE_ASSIGN (scope_snp_req_invalidate, vx_snp_req_invalidate);
|
||||
`SCOPE_ASSIGN (scope_snp_req_tag, vx_snp_req_tag);
|
||||
`SCOPE_ASSIGN (scope_snp_req_ready, vx_snp_req_ready);
|
||||
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_valid, vx_snp_rsp_valid);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_tag, vx_snp_rsp_tag);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_ready, vx_snp_rsp_ready);
|
||||
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_valid, vx_snp_rsp_valid);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_tag, vx_snp_rsp_tag);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_ready, vx_snp_rsp_ready);
|
||||
|
||||
`SCOPE_ASSIGN (scope_busy, vx_busy);
|
||||
`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioRdValid, cp2af_sRxPort.c0.mmioRdValid);
|
||||
`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioWrValid, cp2af_sRxPort.c0.mmioWrValid);
|
||||
`SCOPE_ASSIGN (mmio_hdr_address, mmio_hdr.address);
|
||||
`SCOPE_ASSIGN (mmio_hdr_length, mmio_hdr.length);
|
||||
`SCOPE_ASSIGN (ccip_sRxPort_c0_hdr_mdata, cp2af_sRxPort.c0.hdr.mdata);
|
||||
`SCOPE_ASSIGN (ccip_sRxPort_c0_rspValid, cp2af_sRxPort.c0.rspValid);
|
||||
`SCOPE_ASSIGN (ccip_sRxPort_c1_rspValid, cp2af_sRxPort.c1.rspValid);
|
||||
`SCOPE_ASSIGN (ccip_sTxPort_c0_fire, af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull);
|
||||
`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_address, af2cp_sTxPort.c0.hdr.address);
|
||||
`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_mdata, af2cp_sTxPort.c0.hdr.mdata);
|
||||
`SCOPE_ASSIGN (ccip_sTxPort_c1_fire, af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull);
|
||||
`SCOPE_ASSIGN (ccip_sTxPort_c1_hdr_address, af2cp_sTxPort.c1.hdr.address);
|
||||
`SCOPE_ASSIGN (ccip_sTxPort_c2_mmioRdValid, af2cp_sTxPort.c2.mmioRdValid);
|
||||
|
||||
wire scope_changed = `SCOPE_TRIGGER;
|
||||
|
||||
|
||||
@@ -36,13 +36,27 @@
|
||||
`endif
|
||||
|
||||
`ifndef IO_BUS_BASE_ADDR
|
||||
`define IO_BUS_BASE_ADDR 32'hFFFFFF00
|
||||
`define IO_BUS_BASE_ADDR 32'hFF000000
|
||||
`endif
|
||||
|
||||
`ifndef IO_BUS_ADDR_COUT
|
||||
`define IO_BUS_ADDR_COUT 32'hFFFFFFFC
|
||||
`endif
|
||||
|
||||
`ifndef FRAME_BUFFER_BASE_ADDR
|
||||
`define FRAME_BUFFER_BASE_ADDR 32'hFF000000
|
||||
`endif
|
||||
|
||||
`ifndef FRAME_BUFFER_WIDTH
|
||||
`define FRAME_BUFFER_WIDTH 16'd1920
|
||||
`endif
|
||||
|
||||
`ifndef FRAME_BUFFER_HEIGHT
|
||||
`define FRAME_BUFFER_HEIGHT 16'd1080
|
||||
`endif
|
||||
|
||||
`define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT)
|
||||
|
||||
`ifndef L2_ENABLE
|
||||
`define L2_ENABLE 0
|
||||
`endif
|
||||
|
||||
@@ -90,18 +90,18 @@ module VX_gpu_unit #(
|
||||
// can accept new request?
|
||||
assign gpu_req_if.ready = gpu_commit_if.ready;
|
||||
|
||||
`SCOPE_ASSIGN (scope_gpu_req_valid, gpu_req_if.valid);
|
||||
`SCOPE_ASSIGN (scope_gpu_req_wid, gpu_req_if.wid);
|
||||
`SCOPE_ASSIGN (scope_gpu_req_tmask, gpu_req_if.tmask);
|
||||
`SCOPE_ASSIGN (scope_gpu_req_op_type, gpu_req_if.op_type);
|
||||
`SCOPE_ASSIGN (scope_gpu_req_rs1, gpu_req_if.rs1_data[0]);
|
||||
`SCOPE_ASSIGN (scope_gpu_req_rs2, gpu_req_if.rs2_data);
|
||||
`SCOPE_ASSIGN (scope_gpu_req_ready, gpu_req_if.ready);
|
||||
`SCOPE_ASSIGN (scope_gpu_rsp_valid, warp_ctl_if.valid);
|
||||
`SCOPE_ASSIGN (scope_gpu_rsp_wid, warp_ctl_if.wid);
|
||||
`SCOPE_ASSIGN (scope_gpu_rsp_tmc, warp_ctl_if.tmc);
|
||||
`SCOPE_ASSIGN (scope_gpu_rsp_wspawn, warp_ctl_if.wspawn);
|
||||
`SCOPE_ASSIGN (scope_gpu_rsp_split, warp_ctl_if.split);
|
||||
`SCOPE_ASSIGN (scope_gpu_rsp_barrier, warp_ctl_if.barrier);
|
||||
`SCOPE_ASSIGN (gpu_req_fire, gpu_req_if.valid && gpu_req_if.ready);
|
||||
`SCOPE_ASSIGN (gpu_req_wid, gpu_req_if.wid);
|
||||
`SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask);
|
||||
`SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type);
|
||||
`SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]);
|
||||
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data);
|
||||
|
||||
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wspawn, warp_ctl_if.wspawn);
|
||||
`SCOPE_ASSIGN (gpu_rsp_split, warp_ctl_if.split);
|
||||
`SCOPE_ASSIGN (gpu_rsp_barrier, warp_ctl_if.barrier);
|
||||
|
||||
endmodule
|
||||
@@ -60,16 +60,14 @@ module VX_icache_stage #(
|
||||
// Can accept new response?
|
||||
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
|
||||
|
||||
`SCOPE_ASSIGN (scope_icache_req_valid, icache_req_if.valid);
|
||||
`SCOPE_ASSIGN (scope_icache_req_wid, ifetch_req_if.wid);
|
||||
`SCOPE_ASSIGN (scope_icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||
`SCOPE_ASSIGN (scope_icache_req_tag, req_tag);
|
||||
`SCOPE_ASSIGN (scope_icache_req_ready, icache_req_if.ready);
|
||||
`SCOPE_ASSIGN (icache_req_fire, icache_req_fire);
|
||||
`SCOPE_ASSIGN (icache_req_wid, ifetch_req_if.wid);
|
||||
`SCOPE_ASSIGN (icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||
`SCOPE_ASSIGN (icache_req_tag, req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (scope_icache_rsp_valid, icache_rsp_if.valid);
|
||||
`SCOPE_ASSIGN (scope_icache_rsp_data, icache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (scope_icache_rsp_tag, rsp_tag);
|
||||
`SCOPE_ASSIGN (scope_icache_rsp_ready, icache_rsp_if.ready);
|
||||
`SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready);
|
||||
`SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (icache_rsp_tag, rsp_tag);
|
||||
|
||||
`ifdef DBG_PRINT_CORE_ICACHE
|
||||
always @(posedge clk) begin
|
||||
|
||||
@@ -100,38 +100,38 @@ module VX_issue #(
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
`SCOPE_ASSIGN (scope_issue_valid, ibuf_deq_if.valid);
|
||||
`SCOPE_ASSIGN (scope_issue_wid, ibuf_deq_if.wid);
|
||||
`SCOPE_ASSIGN (scope_issue_tmask, ibuf_deq_if.tmask);
|
||||
`SCOPE_ASSIGN (scope_issue_pc, ibuf_deq_if.PC);
|
||||
`SCOPE_ASSIGN (scope_issue_ex_type, ibuf_deq_if.ex_type);
|
||||
`SCOPE_ASSIGN (scope_issue_op_type, ibuf_deq_if.op_type);
|
||||
`SCOPE_ASSIGN (scope_issue_op_mod, ibuf_deq_if.op_mod);
|
||||
`SCOPE_ASSIGN (scope_issue_wb, ibuf_deq_if.wb);
|
||||
`SCOPE_ASSIGN (scope_issue_rd, ibuf_deq_if.rd);
|
||||
`SCOPE_ASSIGN (scope_issue_rs1, ibuf_deq_if.rs1);
|
||||
`SCOPE_ASSIGN (scope_issue_rs2, ibuf_deq_if.rs2);
|
||||
`SCOPE_ASSIGN (scope_issue_rs3, ibuf_deq_if.rs3);
|
||||
`SCOPE_ASSIGN (scope_issue_imm, ibuf_deq_if.imm);
|
||||
`SCOPE_ASSIGN (scope_issue_rs1_is_pc, ibuf_deq_if.rs1_is_PC);
|
||||
`SCOPE_ASSIGN (scope_issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm);
|
||||
`SCOPE_ASSIGN (scope_issue_ready, ibuf_deq_if.ready);
|
||||
`SCOPE_ASSIGN (scope_scoreboard_delay, scoreboard_delay);
|
||||
`SCOPE_ASSIGN (scope_gpr_delay, ~gpr_req_if.ready);
|
||||
`SCOPE_ASSIGN (scope_execute_delay, ~execute_if.ready);
|
||||
`SCOPE_ASSIGN (issue_fire, ibuf_deq_if.valid && ibuf_deq_if.ready);
|
||||
`SCOPE_ASSIGN (issue_wid, ibuf_deq_if.wid);
|
||||
`SCOPE_ASSIGN (issue_tmask, ibuf_deq_if.tmask);
|
||||
`SCOPE_ASSIGN (issue_pc, ibuf_deq_if.PC);
|
||||
`SCOPE_ASSIGN (issue_ex_type, ibuf_deq_if.ex_type);
|
||||
`SCOPE_ASSIGN (issue_op_type, ibuf_deq_if.op_type);
|
||||
`SCOPE_ASSIGN (issue_op_mod, ibuf_deq_if.op_mod);
|
||||
`SCOPE_ASSIGN (issue_wb, ibuf_deq_if.wb);
|
||||
`SCOPE_ASSIGN (issue_rd, ibuf_deq_if.rd);
|
||||
`SCOPE_ASSIGN (issue_rs1, ibuf_deq_if.rs1);
|
||||
`SCOPE_ASSIGN (issue_rs2, ibuf_deq_if.rs2);
|
||||
`SCOPE_ASSIGN (issue_rs3, ibuf_deq_if.rs3);
|
||||
`SCOPE_ASSIGN (issue_imm, ibuf_deq_if.imm);
|
||||
`SCOPE_ASSIGN (issue_rs1_is_pc, ibuf_deq_if.rs1_is_PC);
|
||||
`SCOPE_ASSIGN (issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm);
|
||||
|
||||
`SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay);
|
||||
`SCOPE_ASSIGN (gpr_delay, ~gpr_req_if.ready);
|
||||
`SCOPE_ASSIGN (execute_delay, ~execute_if.ready);
|
||||
|
||||
`SCOPE_ASSIGN (scope_gpr_rsp_valid, gpr_rsp_if.valid);
|
||||
`SCOPE_ASSIGN (scope_gpr_rsp_wid, gpr_rsp_if.wid);
|
||||
`SCOPE_ASSIGN (scope_gpr_rsp_pc, gpr_rsp_if.PC);
|
||||
`SCOPE_ASSIGN (scope_gpr_rsp_a, gpr_rsp_if.rs1_data);
|
||||
`SCOPE_ASSIGN (scope_gpr_rsp_b, gpr_rsp_if.rs2_data);
|
||||
`SCOPE_ASSIGN (scope_gpr_rsp_c, gpr_rsp_if.rs3_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_valid, gpr_rsp_if.valid);
|
||||
`SCOPE_ASSIGN (gpr_rsp_wid, gpr_rsp_if.wid);
|
||||
`SCOPE_ASSIGN (gpr_rsp_pc, gpr_rsp_if.PC);
|
||||
`SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data);
|
||||
|
||||
`SCOPE_ASSIGN (scope_writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN (scope_writeback_wid, writeback_if.wid);
|
||||
`SCOPE_ASSIGN (scope_writeback_pc, writeback_if.PC);
|
||||
`SCOPE_ASSIGN (scope_writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN (scope_writeback_data, writeback_if.data);
|
||||
`SCOPE_ASSIGN (writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN (writeback_wid, writeback_if.wid);
|
||||
`SCOPE_ASSIGN (writeback_pc, writeback_if.PC);
|
||||
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
|
||||
@@ -192,20 +192,18 @@ module VX_lsu_unit #(
|
||||
assign dcache_rsp_if.ready = ~(stall_out || mem_rsp_stall);
|
||||
|
||||
// scope registration
|
||||
`SCOPE_ASSIGN (scope_dcache_req_valid, dcache_req_if.valid);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_addr, req_address);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_rw, req_rw);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_byteen,dcache_req_if.byteen);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_tag, req_tag);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_ready, dcache_req_if.ready);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_wid, req_wid);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_pc, req_pc);
|
||||
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}});
|
||||
`SCOPE_ASSIGN (dcache_req_wid, req_wid);
|
||||
`SCOPE_ASSIGN (dcache_req_pc, req_pc);
|
||||
`SCOPE_ASSIGN (dcache_req_addr, req_address);
|
||||
`SCOPE_ASSIGN (dcache_req_rw, req_rw);
|
||||
`SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen);
|
||||
`SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN (dcache_req_tag, req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_tag, rsp_tag);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
||||
`SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.valid & {`NUM_THREADS{dcache_rsp_if.ready}});
|
||||
`SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (dcache_rsp_tag, rsp_tag);
|
||||
|
||||
`ifdef DBG_PRINT_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
`include "scope-defs.vh"
|
||||
|
||||
`define SCOPE_ASSIGN(d,s) assign d = s
|
||||
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
|
||||
|
||||
`else
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
|
||||
`define SCOPE_BIND_Vortex_cluster(__i__)
|
||||
|
||||
`define SCOPE_BIND_top_vortex
|
||||
`define SCOPE_BIND_afu_vortex
|
||||
|
||||
`define SCOPE_IO_VX_lsu_unit
|
||||
|
||||
|
||||
@@ -250,11 +250,11 @@ module VX_warp_sched #(
|
||||
|
||||
assign busy = (active_warps != 0);
|
||||
|
||||
`SCOPE_ASSIGN (scope_wsched_scheduled_warp, scheduled_warp);
|
||||
`SCOPE_ASSIGN (scope_wsched_active_warps, active_warps);
|
||||
`SCOPE_ASSIGN (scope_wsched_schedule_table, schedule_table);
|
||||
`SCOPE_ASSIGN (scope_wsched_schedule_ready, schedule_ready);
|
||||
`SCOPE_ASSIGN (scope_wsched_warp_to_schedule, warp_to_schedule);
|
||||
`SCOPE_ASSIGN (scope_wsched_warp_pc, warp_pc);
|
||||
`SCOPE_ASSIGN (wsched_scheduled_warp, scheduled_warp);
|
||||
`SCOPE_ASSIGN (wsched_active_warps, active_warps);
|
||||
`SCOPE_ASSIGN (wsched_schedule_table, schedule_table);
|
||||
`SCOPE_ASSIGN (wsched_schedule_ready, schedule_ready);
|
||||
`SCOPE_ASSIGN (wsched_warp_to_schedule, warp_to_schedule);
|
||||
`SCOPE_ASSIGN (wsched_warp_pc, warp_pc);
|
||||
|
||||
endmodule
|
||||
@@ -456,6 +456,32 @@ module Vortex (
|
||||
);
|
||||
end
|
||||
|
||||
`SCOPE_ASSIGN (reset, reset);
|
||||
|
||||
`SCOPE_ASSIGN (dram_req_fire, dram_req_valid && dram_req_ready);
|
||||
`SCOPE_ASSIGN (dram_req_addr, {dram_req_addr, 4'b0});
|
||||
`SCOPE_ASSIGN (dram_req_rw, dram_req_rw);
|
||||
`SCOPE_ASSIGN (dram_req_byteen,dram_req_byteen);
|
||||
`SCOPE_ASSIGN (dram_req_data, dram_req_data);
|
||||
`SCOPE_ASSIGN (dram_req_tag, dram_req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (dram_rsp_fire, dram_rsp_valid && dram_rsp_ready);
|
||||
`SCOPE_ASSIGN (dram_rsp_data, dram_rsp_data);
|
||||
`SCOPE_ASSIGN (dram_rsp_tag, dram_rsp_tag);
|
||||
|
||||
`SCOPE_ASSIGN (snp_req_fire, snp_req_valid && snp_req_ready);
|
||||
`SCOPE_ASSIGN (snp_req_addr, {snp_req_addr, 4'b0});
|
||||
`SCOPE_ASSIGN (snp_req_invalidate, snp_req_invalidate);
|
||||
`SCOPE_ASSIGN (snp_req_tag, snp_req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (snp_rsp_fire, snp_rsp_valid && snp_rsp_ready);
|
||||
`SCOPE_ASSIGN (snp_rsp_tag, snp_rsp_tag);
|
||||
|
||||
`SCOPE_ASSIGN (snp_rsp_fire, snp_rsp_valid && snp_rsp_ready);
|
||||
`SCOPE_ASSIGN (snp_rsp_tag, snp_rsp_tag);
|
||||
|
||||
`SCOPE_ASSIGN (busy, busy);
|
||||
|
||||
`ifdef DBG_PRINT_DRAM
|
||||
always @(posedge clk) begin
|
||||
if (dram_req_valid && dram_req_ready) begin
|
||||
|
||||
22
hw/rtl/cache/VX_bank.v
vendored
22
hw/rtl/cache/VX_bank.v
vendored
@@ -737,18 +737,18 @@ module VX_bank #(
|
||||
end
|
||||
`endif
|
||||
|
||||
`SCOPE_ASSIGN (scope_valid_st0, qual_valid_st0);
|
||||
`SCOPE_ASSIGN (scope_valid_st1, valid_st1);
|
||||
`SCOPE_ASSIGN (scope_valid_st2, valid_st2);
|
||||
`SCOPE_ASSIGN (valid_st0, qual_valid_st0);
|
||||
`SCOPE_ASSIGN (valid_st1, valid_st1);
|
||||
`SCOPE_ASSIGN (valid_st2, valid_st2);
|
||||
|
||||
`SCOPE_ASSIGN (scope_is_mrvq_st1, is_mrvq_st1);
|
||||
`SCOPE_ASSIGN (scope_miss_st1, miss_st1);
|
||||
`SCOPE_ASSIGN (scope_dirty_st1, dirty_st1);
|
||||
`SCOPE_ASSIGN (scope_force_miss_st1, force_request_miss_st1);
|
||||
`SCOPE_ASSIGN (scope_stall_pipe, stall_bank_pipe);
|
||||
`SCOPE_ASSIGN (is_mrvq_st1, is_mrvq_st1);
|
||||
`SCOPE_ASSIGN (miss_st1, miss_st1);
|
||||
`SCOPE_ASSIGN (dirty_st1, dirty_st1);
|
||||
`SCOPE_ASSIGN (force_miss_st1, force_request_miss_st1);
|
||||
`SCOPE_ASSIGN (stall_pipe, stall_bank_pipe);
|
||||
|
||||
`SCOPE_ASSIGN (scope_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN (scope_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
`SCOPE_ASSIGN (scope_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
"../rtl/cache/VX_cache_config.vh"
|
||||
],
|
||||
"modules": {
|
||||
"top": {
|
||||
"afu": {
|
||||
"submodules": {
|
||||
"vortex": {"type":"Vortex"}
|
||||
"vortex": {"type":"Vortex", "enabled":false}
|
||||
}
|
||||
},
|
||||
"Vortex": {
|
||||
@@ -73,41 +73,50 @@
|
||||
"VX_bank": {}
|
||||
},
|
||||
"taps": {
|
||||
"top": {
|
||||
"afu": {
|
||||
"?ccip_sRxPort_c0_mmioRdValid":1,
|
||||
"?ccip_sRxPort_c0_mmioWrValid":1,
|
||||
"mmio_hdr_address":16,
|
||||
"mmio_hdr_length":2,
|
||||
"ccip_sRxPort_c0_hdr_mdata":16,
|
||||
"?ccip_sRxPort_c0_rspValid":1,
|
||||
"?ccip_sRxPort_c1_rspValid":1,
|
||||
"?ccip_sTxPort_c0_fire":1,
|
||||
"ccip_sTxPort_c0_hdr_address":42,
|
||||
"ccip_sTxPort_c0_hdr_mdata":16,
|
||||
"?ccip_sTxPort_c1_fire":1,
|
||||
"ccip_sTxPort_c1_hdr_address":42,
|
||||
"ccip_sTxPort_c2_mmioRdValid":1
|
||||
},
|
||||
"afu/vortex": {
|
||||
"!reset": 1,
|
||||
"?dram_req_valid": 1,
|
||||
"?dram_req_fire": 1,
|
||||
"dram_req_addr": 32,
|
||||
"dram_req_rw": 1,
|
||||
"dram_req_byteen":"`VX_DRAM_BYTEEN_WIDTH",
|
||||
"dram_req_data":"`VX_DRAM_LINE_WIDTH",
|
||||
"dram_req_tag":"`VX_DRAM_TAG_WIDTH",
|
||||
"?dram_req_ready": 1,
|
||||
"?dram_rsp_valid": 1,
|
||||
"?dram_rsp_fire": 1,
|
||||
"dram_rsp_data":"`VX_DRAM_LINE_WIDTH",
|
||||
"dram_rsp_tag":"`VX_DRAM_TAG_WIDTH",
|
||||
"?dram_rsp_ready": 1,
|
||||
"?snp_req_valid": 1,
|
||||
"?snp_req_fire": 1,
|
||||
"snp_req_addr": 32,
|
||||
"snp_req_invalidate": 1,
|
||||
"snp_req_tag":"`VX_SNP_TAG_WIDTH",
|
||||
"?snp_req_ready": 1,
|
||||
"?snp_rsp_valid": 1,
|
||||
"snp_rsp_tag":"`VX_SNP_TAG_WIDTH",
|
||||
"?snp_rsp_ready": 1,
|
||||
"?snp_rsp_fire": 1,
|
||||
"snp_rsp_tag":"`VX_SNP_TAG_WIDTH",
|
||||
"busy": 1
|
||||
},
|
||||
"top/vortex/cluster/core/pipeline/fetch/icache_stage": {
|
||||
"?icache_req_valid": 1,
|
||||
"afu/vortex/cluster/core/pipeline/fetch/icache_stage": {
|
||||
"?icache_req_fire": 1,
|
||||
"icache_req_wid":"`NW_BITS",
|
||||
"icache_req_addr": 32,
|
||||
"icache_req_tag":"`ICORE_TAG_ID_BITS",
|
||||
"?icache_req_ready": 1,
|
||||
"?icache_rsp_valid": 1,
|
||||
"?icache_rsp_fire": 1,
|
||||
"icache_rsp_data": 32,
|
||||
"icache_rsp_tag":"`ICORE_TAG_ID_BITS",
|
||||
"?icache_rsp_ready": 1
|
||||
"icache_rsp_tag":"`ICORE_TAG_ID_BITS"
|
||||
},
|
||||
"top/vortex/cluster/core/pipeline/fetch/warp_sched": {
|
||||
"afu/vortex/cluster/core/pipeline/fetch/warp_sched": {
|
||||
"?wsched_scheduled_warp": 1,
|
||||
"wsched_active_warps": "`NUM_WARPS",
|
||||
"wsched_schedule_table": "`NUM_WARPS",
|
||||
@@ -115,14 +124,13 @@
|
||||
"wsched_warp_to_schedule": "`NW_BITS",
|
||||
"wsched_warp_pc": "32"
|
||||
},
|
||||
"top/vortex/cluster/core/pipeline/execute/gpu_unit": {
|
||||
"?gpu_req_valid": 1,
|
||||
"afu/vortex/cluster/core/pipeline/execute/gpu_unit": {
|
||||
"?gpu_req_fire": 1,
|
||||
"gpu_req_wid": "`NW_BITS",
|
||||
"gpu_req_tmask": "`NUM_THREADS",
|
||||
"gpu_req_op_type": "`GPU_BITS",
|
||||
"gpu_req_rs1": "32",
|
||||
"gpu_req_rs2": "32",
|
||||
"?gpu_req_ready": 1,
|
||||
"gpu_req_rs2": "32",
|
||||
"?gpu_rsp_valid": 1,
|
||||
"gpu_rsp_wid": "`NW_BITS",
|
||||
"gpu_rsp_tmc": "`GPU_TMC_SIZE",
|
||||
@@ -130,8 +138,8 @@
|
||||
"gpu_rsp_split": "`GPU_SPLIT_SIZE",
|
||||
"gpu_rsp_barrier": "`GPU_BARRIER_SIZE"
|
||||
},
|
||||
"top/vortex/cluster/core/pipeline/execute/lsu_unit": {
|
||||
"?dcache_req_valid":"`NUM_THREADS",
|
||||
"afu/vortex/cluster/core/pipeline/execute/lsu_unit": {
|
||||
"?dcache_req_fire":"`NUM_THREADS",
|
||||
"dcache_req_wid":"`NW_BITS",
|
||||
"dcache_req_pc": 32,
|
||||
"dcache_req_addr":"`NUM_THREADS * 32",
|
||||
@@ -139,14 +147,12 @@
|
||||
"dcache_req_byteen":"`NUM_THREADS * 4",
|
||||
"dcache_req_data": "`NUM_THREADS * 32",
|
||||
"dcache_req_tag":"`DCORE_TAG_ID_BITS",
|
||||
"?dcache_req_ready": 1,
|
||||
"?dcache_rsp_valid":"`NUM_THREADS",
|
||||
"?dcache_rsp_fire":"`NUM_THREADS",
|
||||
"dcache_rsp_data":"`NUM_THREADS * 32",
|
||||
"dcache_rsp_tag":"`DCORE_TAG_ID_BITS",
|
||||
"?dcache_rsp_ready": 1
|
||||
"dcache_rsp_tag":"`DCORE_TAG_ID_BITS"
|
||||
},
|
||||
"top/vortex/cluster/core/pipeline/issue": {
|
||||
"?issue_valid": 1,
|
||||
"afu/vortex/cluster/core/pipeline/issue": {
|
||||
"?issue_fire": 1,
|
||||
"issue_wid":"`NW_BITS",
|
||||
"issue_tmask":"`NUM_THREADS",
|
||||
"issue_pc": 32,
|
||||
@@ -161,7 +167,6 @@
|
||||
"issue_imm": 32,
|
||||
"issue_rs1_is_pc": 1,
|
||||
"issue_rs2_is_imm": 1,
|
||||
"?issue_ready": 1,
|
||||
"?gpr_rsp_valid": 1,
|
||||
"gpr_rsp_wid":"`NW_BITS",
|
||||
"gpr_rsp_pc": 32,
|
||||
@@ -177,7 +182,7 @@
|
||||
"!scoreboard_delay": 1,
|
||||
"!execute_delay": 1
|
||||
},
|
||||
"top/vortex/l3cache/bank, top/vortex/cluster/l2cache/bank, top/vortex/cluster/core/mem_unit/dcache/bank, top/vortex/cluster/core/mem_unit/icache/bank, top/vortex/cluster/core/mem_unit/smem/bank": {
|
||||
"afu/vortex/l3cache/bank, afu/vortex/cluster/l2cache/bank, afu/vortex/cluster/core/mem_unit/dcache/bank, afu/vortex/cluster/core/mem_unit/icache/bank, afu/vortex/cluster/core/mem_unit/smem/bank": {
|
||||
"?valid_st0": 1,
|
||||
"?valid_st1": 1,
|
||||
"?valid_st2": 1,
|
||||
|
||||
@@ -638,10 +638,7 @@ def gen_vl_header(file, modules, taps):
|
||||
|
||||
print("`define SCOPE_TRIGGER \\", file=f)
|
||||
i = 0
|
||||
excluded_list = []
|
||||
for key in toptaps:
|
||||
if key in excluded_list:
|
||||
continue
|
||||
tap = toptaps[key]
|
||||
if tap[2] != 2:
|
||||
continue
|
||||
@@ -653,16 +650,7 @@ def gen_vl_header(file, modules, taps):
|
||||
print(" | \\", file=f)
|
||||
print("\t(", file=f, end='')
|
||||
name = trigger_name("scope_" + key.replace('/', '_') + su, size)
|
||||
if key.endswith("_valid"):
|
||||
ready_signal = key[:-6] + "_ready"
|
||||
if ready_signal in toptaps:
|
||||
rname = trigger_name("scope_" + ready_signal.replace('/', '_') + su, size)
|
||||
print(name + " && " + rname, file=f, end='')
|
||||
excluded_list.append(ready_signal)
|
||||
else:
|
||||
print(name, file=f, end='')
|
||||
else:
|
||||
print(name, file=f, end='')
|
||||
print(name, file=f, end='')
|
||||
print(")", file=f, end='')
|
||||
i += 1
|
||||
print("", file=f)
|
||||
|
||||
Reference in New Issue
Block a user