diff --git a/driver/opae/Makefile b/driver/opae/Makefile index 367d8833..d7be0475 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -17,7 +17,7 @@ CXXFLAGS +=-fstack-protector # Position independent code CXXFLAGS += -fPIC -CXXFLAGS += -DGLOBAL_BLOCK_SIZE_BYTES=64 +CXXFLAGS += -DSCOPE LDFLAGS += -luuid diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index d6f018df..75d17c8d 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -4,6 +4,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include #include @@ -21,16 +27,18 @@ /////////////////////////////////////////////////////////////////////////////// -#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ -#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE -#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN -#define CMD_TYPE_CLFLUSH AFU_IMAGE_CMD_TYPE_CLFLUSH +#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ +#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE +#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN +#define CMD_TYPE_CLFLUSH AFU_IMAGE_CMD_TYPE_CLFLUSH -#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4) -#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4) -#define MMIO_CSR_IO_ADDR (AFU_IMAGE_MMIO_CSR_IO_ADDR * 4) -#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4) -#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4) +#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4) +#define MMIO_CSR_IO_ADDR (AFU_IMAGE_MMIO_CSR_IO_ADDR * 4) +#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4) +#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4) +#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4) +#define MMIO_CSR_SCOPE_CMD (AFU_IMAGE_MMIO_CSR_SCOPE_CMD * 4) +#define MMIO_CSR_SCOPE_DATA (AFU_IMAGE_MMIO_CSR_SCOPE_DATA * 4) /////////////////////////////////////////////////////////////////////////////// @@ -57,6 +65,10 @@ inline bool is_aligned(size_t addr, size_t alignment) { return 0 == (addr & (alignment - 1)); } +#ifdef SCOPE +std::future future_scope; +#endif + /////////////////////////////////////////////////////////////////////////////// extern int vx_dev_open(vx_device_h* hdevice) { @@ -122,6 +134,10 @@ extern int vx_dev_close(vx_device_h hdevice) { if (nullptr == hdevice) return -1; +#ifdef SCOPE + future_scope.wait(); +#endif + vx_device_t *device = ((vx_device_t*)hdevice); fpgaClose(device->fpga); @@ -377,6 +393,171 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) { return 0; } +static int vx_scope_trace(vx_device_h hdevice) { + if (nullptr == hdevice) + return -1; + + vx_device_t *device = ((vx_device_t*)hdevice); + + std::ofstream ofs("vx_scope.vcd"); + + ofs << "$timescale 1 ns $end" << std::endl; + + int fwidth = 0; + + ofs << "$var reg 1 0 clk $end" << std::endl; + + fwidth += 1; + + ofs << "$var reg 1 1 icache_req_valid $end" << std::endl; + ofs << "$var reg 2 2 icache_req_tag $end" << std::endl; + ofs << "$var reg 1 3 icache_req_ready $end" << std::endl; + ofs << "$var reg 1 4 icache_rsp_valid $end" << std::endl; + ofs << "$var reg 2 5 icache_rsp_tag $end" << std::endl; + ofs << "$var reg 1 6 icache_rsp_ready $end" << std::endl; + + fwidth += 8; + + ofs << "$var reg 4 7 dcache_req_valid $end" << std::endl; + ofs << "$var reg 2 8 dcache_req_tag $end" << std::endl; + ofs << "$var reg 1 9 dcache_req_ready $end" << std::endl; + ofs << "$var reg 4 10 dcache_rsp_valid $end" << std::endl; + ofs << "$var reg 2 11 dcache_rsp_tag $end" << std::endl; + ofs << "$var reg 1 12 dcache_rsp_ready $end" << std::endl; + + fwidth += 14; + + ofs << "$var reg 1 13 dram_req_valid $end" << std::endl; + ofs << "$var reg 29 14 dram_req_tag $end" << std::endl; + ofs << "$var reg 1 15 dram_req_ready $end" << std::endl; + ofs << "$var reg 1 16 dram_rsp_valid $end" << std::endl; + ofs << "$var reg 29 17 dram_rsp_tag $end" << std::endl; + ofs << "$var reg 1 18 dram_rsp_ready $end" << std::endl; + + fwidth += 62; + + ofs << "$var reg 1 19 schedule_delay $end" << std::endl; + + fwidth += 1; + + const int num_signals = 20; + + ofs << "enddefinitions $end" << std::endl; + + uint64_t frame_width, max_frames, data_valid; + + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 2)); + CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &frame_width)); + std::cout << "scope::frame_width=" << frame_width << std::endl; + + assert((fwidth-1)== (int)frame_width); + + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 3)); + CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &max_frames)); + std::cout << "scope::max_frames=" << max_frames << std::endl; + + do { + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 0)); + CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid)); + if (data_valid) + break; + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } while (true); + + std::cout << "scope trace dump begin..." << std::endl; + + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 1)); + + std::vector signa_data(frame_width+1); + + uint64_t frame_offset = 0, frame_no = 0; + + int signal_id = 0; + int signal_offset = 0; + + auto print_signal = [&] (uint64_t word, int signal_width) { + + int word_offset = frame_offset % 64; + + signa_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0'; + + ++signal_offset; + ++frame_offset; + + if (signal_offset == signal_width) { + signa_data[signal_width] = 0; // string null termination + ofs << 'b' << signa_data.data() << ' ' << (num_signals - signal_id) << std::endl; + signal_offset = 0; + ++signal_id; + } + + if (frame_offset == frame_width) { + assert(0 == signal_offset); + signal_id = 0; + frame_offset = 0; + ++frame_no; + if (frame_no != max_frames) { + ofs << '#' << (frame_no * 2 + 0) << std::endl; + ofs << "b0 0" << std::endl; + ofs << '#' << (frame_no * 2 + 1) << std::endl; + ofs << "b1 0" << std::endl; + ++signal_id; + } + } + }; + + ofs << '#' << (frame_no * 2 + 0) << std::endl; + ofs << "b0 0" << std::endl; + ofs << '#' << (frame_no * 2 + 1) << std::endl; + ofs << "b1 0" << std::endl; + ++signal_id; + + do { + if (frame_no == max_frames-1) { + // verify last frame is valid + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 0)); + CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid)); + assert(data_valid == 1); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 1)); + } + + uint64_t word; + CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &word)); + + do { + switch (num_signals - signal_id) { + case 2: + case 5: + case 8: + case 11: + print_signal(word, 2); + break; + case 7: + case 10: + print_signal(word, 4); + break; + case 14: + case 17: + print_signal(word, 29); + break; + default: + print_signal(word, 1); + break; + } + } while ((frame_offset % 64) != 0); + + } while (frame_no != max_frames); + + std::cout << "scope trace dump done!" << std::endl; + + // verify data not valid + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 0)); + CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &data_valid)); + assert(data_valid == 0); + + return 0; +} + extern int vx_start(vx_device_h hdevice) { if (nullptr == hdevice) return -1; @@ -387,6 +568,12 @@ extern int vx_start(vx_device_h hdevice) { if (vx_ready_wait(hdevice, -1) != 0) return -1; +#ifdef SCOPE + // launch logic scope + future_scope = std::async(std::launch::async, [&]{ vx_scope_trace(hdevice); }); +#endif + + // start execution CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN)); return 0; diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 685fd89f..510aa7b5 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -175,11 +175,11 @@ int main(int argc, char *argv[]) { RT_CHECK(vx_alloc_shared_mem(device, 4096, &dbuf)); // run tests - if (0 == test || -1 == test) { + /*if (0 == test || -1 == test) { std::cout << "run memcopy test" << std::endl; RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d00ff00ff, 1)); RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d40ff40ff, 64)); - } + }*/ if (1 == test || -1 == test) { std::cout << "run kernel test" << std::endl; diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index ff10bf62..8481da68 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -1,12 +1,16 @@ vortex_afu.json -#+define+DBG_PRINT_CORE_ICACHE -#+define+DBG_PRINT_CORE_DCACHE -#+define+DBG_PRINT_CACHE_BANK -#+define+DBG_PRINT_CACHE_SNP -#+define+DBG_PRINT_CACHE_MSRQ -#+define+DBG_PRINT_DRAM -#+define+DBG_PRINT_OPAE ++define+NDEBUG + ++define+DBG_PRINT_CORE_ICACHE ++define+DBG_PRINT_CORE_DCACHE ++define+DBG_PRINT_CACHE_BANK ++define+DBG_PRINT_CACHE_SNP ++define+DBG_PRINT_CACHE_MSRQ ++define+DBG_PRINT_DRAM ++define+DBG_PRINT_OPAE + ++define+SCOPE +incdir+. +incdir+../rtl @@ -65,10 +69,12 @@ vortex_afu.json ../rtl/libs/VX_generic_queue.v ../rtl/libs/VX_indexable_queue.v ../rtl/libs/VX_countones.v +../rtl/libs/VX_scope.v ../rtl/Vortex_Socket.v ../rtl/Vortex_Cluster.v ../rtl/Vortex.v +../rtl/VX_pipeline.v ../rtl/VX_front_end.v ../rtl/VX_back_end.v ../rtl/VX_fetch.v diff --git a/hw/opae/vortex_afu.json b/hw/opae/vortex_afu.json index 82d2d082..98dcecfd 100644 --- a/hw/opae/vortex_afu.json +++ b/hw/opae/vortex_afu.json @@ -10,7 +10,7 @@ "mmio-csr-mem-addr": 14, "mmio-csr-data-size": 16, "mmio-csr-status": 18, - "mmio-csr-scope-delay": 20, + "mmio-csr-scope-cmd": 20, "mmio-csr-scope-data": 22, "cmd-type-read": 1, diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index ca420ed0..6795583f 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -66,8 +66,8 @@ localparam MMIO_CSR_MEM_ADDR = `AFU_IMAGE_MMIO_CSR_MEM_ADDR; localparam MMIO_CSR_DATA_SIZE = `AFU_IMAGE_MMIO_CSR_DATA_SIZE; localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS; -localparam MMIO_CSR_SCOPE_DELAY = `AFU_IMAGE_MMIO_CSR_SCOPE_DELAY; -localparam MMIO_CSR_SCOPE_DATA = `AFU_IMAGE_MMIO_CSR_SCOPE_DATA; +localparam MMIO_CSR_SCOPE_CMD = `AFU_IMAGE_MMIO_CSR_SCOPE_CMD; +localparam MMIO_CSR_SCOPE_DATA= `AFU_IMAGE_MMIO_CSR_SCOPE_DATA; logic [127:0] afu_id = `AFU_ACCEL_UUID; @@ -138,7 +138,7 @@ t_ccip_clAddr csr_io_addr; logic[DRAM_ADDR_WIDTH-1:0] csr_mem_addr; logic[DRAM_ADDR_WIDTH-1:0] csr_data_size; -logic [63:0] csr_scope_delay; +logic [63:0] csr_scope_cmd; logic [63:0] csr_scope_data; logic csr_scope_read; logic csr_scope_write; @@ -153,8 +153,8 @@ assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); t_if_ccip_c2_Tx mmio_tx; assign af2cp_sTxPort.c2 = mmio_tx; -assign csr_scope_delay = 64'(cp2af_sRxPort.c0.data); -assign csr_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CSR_SCOPE_DELAY == mmio_hdr.address); +assign csr_scope_cmd = 64'(cp2af_sRxPort.c0.data); +assign csr_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CSR_SCOPE_CMD == mmio_hdr.address); assign csr_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_CSR_SCOPE_DATA == mmio_hdr.address); always_ff @(posedge clk) @@ -201,6 +201,11 @@ begin $display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data)); `endif end + MMIO_CSR_SCOPE_CMD: begin + `ifdef DBG_PRINT_OPAE + $display("%t: CSR_SCOPE_CMD: %0d", $time, 64'(cp2af_sRxPort.c0.data)); + `endif + end default: begin // user-defined CSRs //if (mmio_hdr.addres >= MMIO_CSR_USER) begin @@ -238,9 +243,9 @@ begin mmio_tx.data <= 64'(state); end MMIO_CSR_SCOPE_DATA: begin - mmio_tx.data <= csr_scope_data; + mmio_tx.data <= csr_scope_data; `ifdef DBG_PRINT_OPAE - $display("%t: scope: data=%0d", $time, csr_scope_data); + $display("%t: SCOPE: data=%0d", $time, csr_scope_data); `endif end default: mmio_tx.data <= 64'h0; @@ -790,18 +795,20 @@ end `ifdef SCOPE -`SCOPE_SIGNALS_DECL() +`SCOPE_SIGNALS_DECL + +`STATIC_ASSERT($bits({`SCOPE_SIGNALS_LIST}) == 85, "oops!") VX_scope #( - .DATAW ($bits({`SCOPE_SIGNALS_LIST()})), + .DATAW ($bits({`SCOPE_SIGNALS_LIST})), .BUSW (64), - .SIZE (1024) + .SIZE (256) ) scope ( .clk (clk), .reset (SoftReset), .start (vx_reset), - .data_in ({`SCOPE_SIGNALS_LIST()}), - .bus_in (csr_scope_delay), + .data_in ({`SCOPE_SIGNALS_LIST}), + .bus_in (csr_scope_cmd), .bus_out (csr_scope_data), .bus_read (csr_scope_read), .bus_write(csr_scope_write) @@ -814,7 +821,7 @@ VX_scope #( assign cmd_run_done = !vx_busy; Vortex_Socket #() vx_socket ( - `SCOPE_SIGNALS_ATTACH(), + `SCOPE_SIGNALS_ATTACH .clk (clk), .reset (vx_reset), diff --git a/hw/opae/vortex_afu.vh b/hw/opae/vortex_afu.vh new file mode 100644 index 00000000..8d437c38 --- /dev/null +++ b/hw/opae/vortex_afu.vh @@ -0,0 +1,29 @@ +`ifndef __VORTEX_AFU__ +`define __VORTEX_AFU__ + +`include "ccip_if_pkg.sv" + +`define PLATFORM_PROVIDES_LOCAL_MEMORY +`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH 27 +`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH 512 +`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4 + +`include "local_mem_cfg_pkg.sv" + +`define AFU_ACCEL_NAME "vortex_afu" +`define AFU_ACCEL_UUID 128'h35f9452b_25c2_434c_93d5_6f8c60db361c +`define AFU_IMAGE_CMD_TYPE_CLFLUSH 4 +`define AFU_IMAGE_CMD_TYPE_READ 1 +`define AFU_IMAGE_CMD_TYPE_RUN 3 +`define AFU_IMAGE_CMD_TYPE_WRITE 2 +`define AFU_IMAGE_MMIO_CSR_CMD 10 +`define AFU_IMAGE_MMIO_CSR_DATA_SIZE 12 +`define AFU_IMAGE_MMIO_CSR_IO_ADDR 14 +`define AFU_IMAGE_MMIO_CSR_MEM_ADDR 16 +`define AFU_IMAGE_MMIO_CSR_STATUS 18 +`define AFU_IMAGE_MMIO_CSR_SCOPE_CMD 20 +`define AFU_IMAGE_MMIO_CSR_SCOPE_DATA 22 +`define AFU_IMAGE_POWER 0 +`define AFU_TOP_IFC "ccip_std_afu_avalon_mm" + +`endif \ No newline at end of file diff --git a/hw/opae/vortex_afu_sim.v b/hw/opae/vortex_afu_sim.v new file mode 100644 index 00000000..d3f9d8e1 --- /dev/null +++ b/hw/opae/vortex_afu_sim.v @@ -0,0 +1,149 @@ +`include "vortex_afu.vh" + +/* verilator lint_off IMPORTSTAR */ +import ccip_if_pkg::*; +import local_mem_cfg_pkg::*; +/* verilator lint_on IMPORTSTAR */ + +module vortex_afu_sim #( + parameter NUM_LOCAL_MEM_BANKS = 2 +) ( + // global signals + input clk, + input reset, + + // IF signals between CCI and AFU + input logic vcp2af_sRxPort_c0_TxAlmFull, + input logic vcp2af_sRxPort_c1_TxAlmFull, + + input t_ccip_vc vcp2af_sRxPort_c0_hdr_vc_used, + input logic vcp2af_sRxPort_c0_hdr_rsvd1, + input logic vcp2af_sRxPort_c0_hdr_hit_miss, + input logic [1:0] vcp2af_sRxPort_c0_hdr_rsvd0, + input t_ccip_clNum vcp2af_sRxPort_c0_hdr_cl_num, + input t_ccip_c0_rsp vcp2af_sRxPort_c0_hdr_resp_type, + input t_ccip_mdata vcp2af_sRxPort_c0_hdr_mdata, + input t_ccip_clData vcp2af_sRxPort_c0_data, + input logic vcp2af_sRxPort_c0_rspValid, + input logic vcp2af_sRxPort_c0_mmioRdValid, + input logic vcp2af_sRxPort_c0_mmioWrValid, + + input t_ccip_vc vcp2af_sRxPort_c1_hdr_vc_used, + input logic vcp2af_sRxPort_c1_hdr_rsvd1, + input logic vcp2af_sRxPort_c1_hdr_hit_miss, + input logic vcp2af_sRxPort_c1_hdr_format, + input logic vcp2af_sRxPort_c1_hdr_rsvd0, + input t_ccip_clNum vcp2af_sRxPort_c1_hdr_cl_num, + input t_ccip_c1_rsp vcp2af_sRxPort_c1_hdr_resp_type, + input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata, + input logic vcp2af_sRxPort_c1_rspValid, + + output t_ccip_vc af2cp_sTxPort_c0_hdr_vc_sel, + output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1, + output t_ccip_clLen af2cp_sTxPort_c0_hdr_cl_len, + output t_ccip_c0_req af2cp_sTxPort_c0_hdr_req_type, + output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0, + output t_ccip_clAddr af2cp_sTxPort_c0_hdr_address, + output t_ccip_mdata af2cp_sTxPort_c0_hdr_mdata, + output logic af2cp_sTxPort_c0_valid, + + output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd2, + output t_ccip_vc af2cp_sTxPort_c1_hdr_vc_sel, + output logic af2cp_sTxPort_c1_hdr_sop, + output logic af2cp_sTxPort_c1_hdr_rsvd1, + output t_ccip_clLen af2cp_sTxPort_c1_hdr_cl_len, + output t_ccip_c1_req af2cp_sTxPort_c1_hdr_req_type, + output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0, + output t_ccip_clAddr af2cp_sTxPort_c1_hdr_address, + output t_ccip_mdata af2cp_sTxPort_c1_hdr_mdata, + output t_ccip_clData af2cp_sTxPort_c1_data, + output logic af2cp_sTxPort_c1_valid, + + output t_ccip_tid af2cp_sTxPort_c2_hdr_tid, + output logic af2cp_sTxPort_c2_mmioRdValid, + output t_ccip_mmioData af2cp_sTxPort_c2_data, + + // Avalon signals for local memory access + output t_local_mem_data avs_writedata, + input t_local_mem_data avs_readdata, + output t_local_mem_addr avs_address, + input logic avs_waitrequest, + output logic avs_write, + output logic avs_read, + output t_local_mem_byte_mask avs_byteenable, + output t_local_mem_burst_cnt avs_burstcount, + input avs_readdatavalid, + + output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select +); + +vortex_afu #( + .NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS) +) vortex_afu ( + .clk(clk), + .SoftReset(reset), + .cp2af_sRxPort({ + vcp2af_sRxPort_c0_TxAlmFull, + vcp2af_sRxPort_c1_TxAlmFull, + + vcp2af_sRxPort_c0_hdr_vc_used, + vcp2af_sRxPort_c0_hdr_rsvd1, + vcp2af_sRxPort_c0_hdr_hit_miss, + vcp2af_sRxPort_c0_hdr_rsvd0, + vcp2af_sRxPort_c0_hdr_cl_num, + vcp2af_sRxPort_c0_hdr_resp_type, + vcp2af_sRxPort_c0_hdr_mdata, + vcp2af_sRxPort_c0_data, + vcp2af_sRxPort_c0_rspValid, + vcp2af_sRxPort_c0_mmioRdValid, + vcp2af_sRxPort_c0_mmioWrValid, + + vcp2af_sRxPort_c1_hdr_vc_used, + vcp2af_sRxPort_c1_hdr_rsvd1, + vcp2af_sRxPort_c1_hdr_hit_miss, + vcp2af_sRxPort_c1_hdr_format, + vcp2af_sRxPort_c1_hdr_rsvd0, + vcp2af_sRxPort_c1_hdr_cl_num, + vcp2af_sRxPort_c1_hdr_resp_type, + vcp2af_sRxPort_c1_hdr_mdata, + vcp2af_sRxPort_c1_rspValid} + ), + .af2cp_sTxPort({ + af2cp_sTxPort_c0_hdr_vc_sel, + af2cp_sTxPort_c0_hdr_rsvd1, + af2cp_sTxPort_c0_hdr_cl_len, + af2cp_sTxPort_c0_hdr_req_type, + af2cp_sTxPort_c0_hdr_rsvd0, + af2cp_sTxPort_c0_hdr_address, + af2cp_sTxPort_c0_hdr_mdata, + af2cp_sTxPort_c0_valid, + + af2cp_sTxPort_c1_hdr_rsvd2, + af2cp_sTxPort_c1_hdr_vc_sel, + af2cp_sTxPort_c1_hdr_sop, + af2cp_sTxPort_c1_hdr_rsvd1, + af2cp_sTxPort_c1_hdr_cl_len, + af2cp_sTxPort_c1_hdr_req_type, + af2cp_sTxPort_c1_hdr_rsvd0, + af2cp_sTxPort_c1_hdr_address, + af2cp_sTxPort_c1_hdr_mdata, + af2cp_sTxPort_c1_data, + af2cp_sTxPort_c1_valid, + + af2cp_sTxPort_c2_hdr_tid, + af2cp_sTxPort_c2_mmioRdValid, + af2cp_sTxPort_c2_data + }), + .avs_writedata(avs_writedata), + .avs_readdata(avs_readdata), + .avs_address(avs_address), + .avs_waitrequest(avs_waitrequest), + .avs_write(avs_write), + .avs_read(avs_read), + .avs_byteenable(avs_byteenable), + .avs_burstcount(avs_burstcount), + .avs_readdatavalid(avs_readdatavalid), + .mem_bank_select(mem_bank_select) +); + +endmodule \ No newline at end of file diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 16f59ade..4cc8b8fc 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -283,7 +283,7 @@ /////////////////////////////////////////////////////////////////////////////// `ifdef SCOPE - `define SCOPE_SIGNALS_LIST() \ + `define SCOPE_SIGNALS_LIST \ scope_icache_req_valid, \ scope_icache_req_tag, \ scope_icache_req_ready, \ @@ -304,7 +304,7 @@ scope_dram_rsp_ready, \ scope_schedule_delay - `define SCOPE_SIGNALS_DECL() \ + `define SCOPE_SIGNALS_DECL \ wire scope_icache_req_valid; \ wire [`DCORE_TAG_WIDTH-1:0] scope_icache_req_tag; \ wire scope_icache_req_ready; \ @@ -325,7 +325,7 @@ wire scope_dram_rsp_ready; \ wire scope_schedule_delay; - `define SCOPE_SIGNALS_IO() \ + `define SCOPE_SIGNALS_IO \ /* verilator lint_off UNDRIVEN */ \ output wire scope_icache_req_valid, \ output wire [`DCORE_TAG_WIDTH-1:0] scope_icache_req_tag, \ @@ -345,10 +345,10 @@ output wire scope_dram_rsp_valid, \ output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag, \ output wire scope_dram_rsp_ready, \ - output wire scope_schedule_delay \ + output wire scope_schedule_delay, \ /* verilator lint_on UNDRIVEN */ - `define SCOPE_SIGNALS_ATTACH() \ + `define SCOPE_SIGNALS_ATTACH \ .scope_icache_req_valid (scope_icache_req_valid), \ .scope_icache_req_tag (scope_icache_req_tag), \ .scope_icache_req_ready (scope_icache_req_ready), \ @@ -361,18 +361,18 @@ .scope_dcache_rsp_valid (scope_dcache_rsp_valid), \ .scope_dcache_rsp_tag (scope_dcache_rsp_tag), \ .scope_dcache_rsp_ready (scope_dcache_rsp_ready), \ - .scope_dram_req_valid (scope_dram_req_valid), \ - .scope_dram_req_tag (scope_dram_req_tag), \ - .scope_dram_req_ready (scope_dram_req_ready), \ - .scope_dram_rsp_valid (scope_dram_rsp_valid), \ - .scope_dram_rsp_tag (scope_dram_rsp_tag), \ - .scope_dram_rsp_ready (scope_dram_rsp_ready), \ - .scope_schedule_delay (scope_schedule_delay) + .scope_dram_req_valid (scope_dram_req_valid), \ + .scope_dram_req_tag (scope_dram_req_tag), \ + .scope_dram_req_ready (scope_dram_req_ready), \ + .scope_dram_rsp_valid (scope_dram_rsp_valid), \ + .scope_dram_rsp_tag (scope_dram_rsp_tag), \ + .scope_dram_rsp_ready (scope_dram_rsp_ready), \ + .scope_schedule_delay (scope_schedule_delay), `define SCOPE_ASSIGN(d,s) assign d = s `else - `define SCOPE_SIGNALS_IO() - `define SCOPE_SIGNALS_ATTACH() + `define SCOPE_SIGNALS_IO + `define SCOPE_SIGNALS_ATTACH `define SCOPE_ASSIGN(d,s) `endif diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index d72e0151..3e03024d 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -3,7 +3,7 @@ module VX_pipeline #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_IO(), + `SCOPE_SIGNALS_IO // Clock input wire clk, diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 95767873..bd827351 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -3,7 +3,7 @@ module Vortex #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_IO(), + `SCOPE_SIGNALS_IO // Clock input wire clk, @@ -165,7 +165,7 @@ module Vortex #( VX_pipeline #( .CORE_ID(CORE_ID) ) pipeline ( - `SCOPE_SIGNALS_ATTACH(), + `SCOPE_SIGNALS_ATTACH .clk(clk), .reset(reset), diff --git a/hw/rtl/Vortex_Cluster.v b/hw/rtl/Vortex_Cluster.v index 71719b0e..abb07b4d 100644 --- a/hw/rtl/Vortex_Cluster.v +++ b/hw/rtl/Vortex_Cluster.v @@ -3,7 +3,7 @@ module Vortex_Cluster #( parameter CLUSTER_ID = 0 ) ( - `SCOPE_SIGNALS_IO(), + `SCOPE_SIGNALS_IO // Clock input wire clk, @@ -108,7 +108,7 @@ module Vortex_Cluster #( Vortex #( .CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) ) vortex_core ( - `SCOPE_SIGNALS_ATTACH(), + `SCOPE_SIGNALS_ATTACH .clk (clk), .reset (reset), diff --git a/hw/rtl/Vortex_Socket.v b/hw/rtl/Vortex_Socket.v index 5ab00a69..a5e122a6 100644 --- a/hw/rtl/Vortex_Socket.v +++ b/hw/rtl/Vortex_Socket.v @@ -1,7 +1,7 @@ `include "VX_define.vh" module Vortex_Socket ( - `SCOPE_SIGNALS_IO(), + `SCOPE_SIGNALS_IO // Clock input wire clk, @@ -64,7 +64,7 @@ module Vortex_Socket ( Vortex_Cluster #( .CLUSTER_ID(`L3CACHE_ID) ) Vortex_Cluster ( - `SCOPE_SIGNALS_ATTACH(), + `SCOPE_SIGNALS_ATTACH .clk (clk), .reset (reset), @@ -151,7 +151,7 @@ module Vortex_Socket ( Vortex_Cluster #( .CLUSTER_ID(i) ) Vortex_Cluster ( - `SCOPE_SIGNALS_ATTACH(), + `SCOPE_SIGNALS_ATTACH .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 234cdfed..428c5057 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -1,87 +1,146 @@ +`include "VX_define.vh" + module VX_scope #( parameter DATAW = 64, parameter BUSW = 64, - parameter SIZE = 1024 + parameter SIZE = 256 ) ( input wire clk, input wire reset, input wire start, input wire [DATAW-1:0] data_in, - input wire [BUSW-1:0] bus_in, - output wire [BUSW-1:0] bus_out, + input wire [BUSW-1:0] bus_in, + output reg [BUSW-1:0] bus_out, input wire bus_write, input wire bus_read ); + typedef enum logic[2:0] { + CMD_GET_VALID, + CMD_GET_DATA, + CMD_GET_WIDTH, + CMD_GET_DEPTH, + CMD_SET_DELAY, + CMD_SET_DURATION, + CMD_SET_RESERVED1, + CMD_SET_RESERVED2 + } cmd_t; + + typedef enum logic[1:0] { + GET_VALID, + GET_DATA, + GET_WIDTH, + GET_DEPTH + } cmd_get_t; + reg [DATAW-1:0] mem [SIZE-1:0]; + //reg [63:0] offsets [SIZE-1:0]; - reg [`CLOG2(SIZE)-1:0] raddr, waddr; - - reg started, running, done; - - reg [BUSW-1:0] delay_cntr; - - reg data_valid, data_end; + reg [`CLOG2(SIZE)-1:0] raddr, waddr, waddr_end; reg [`LOG2UP(DATAW)-1:0] read_offset; - wire [BUSW-3:0] data_part; + reg start_wait, recording, data_valid; + + reg [BUSW-3:0] delay_val, delay_cntr; + + reg [1:0] out_cmd; + + wire [2:0] cmd_type; + wire [BUSW-4:0] cmd_data; + assign {cmd_data, cmd_type} = bus_in; always @(posedge clk) begin if (reset) begin raddr <= 0; waddr <= 0; - started <= 0; - running <= 0; - done <= 0; + start_wait <= 0; + recording <= 0; delay_cntr <= 0; read_offset <= 0; + data_valid <= 0; + out_cmd <= $bits(out_cmd)'(CMD_GET_VALID); + delay_val <= 0; + waddr_end <= $bits(waddr)'(SIZE-1); end else begin if (bus_write) begin - delay_cntr <= bus_in; + case (cmd_type) + CMD_GET_VALID, + CMD_GET_DATA, + CMD_GET_WIDTH, + CMD_GET_DEPTH: out_cmd <= $bits(out_cmd)'(cmd_type); + CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data); + CMD_SET_DURATION: waddr_end <= $bits(waddr)'(cmd_data); + default:; + endcase end - if (start) begin - started <= 1; - end - - if (start || started) begin - if (0 == delay_cntr) begin - running <= 1; + if (start) begin + waddr <= 0; + if (0 == delay_val) begin + start_wait <= 0; + recording <= 1; + delay_cntr <= 0; end else begin - delay_cntr <= delay_cntr - 1; + start_wait <= 1; + recording <= 0; + delay_cntr <= delay_val; end end - if (running && !done) begin + if (start_wait) begin + delay_cntr <= delay_cntr - 1; + if (1 == delay_cntr) begin + start_wait <= 0; + recording <= 1; + end + end + + if (recording) begin mem[waddr] <= data_in; waddr <= waddr + 1; - if (waddr == $bits(waddr)'(SIZE-1)) begin - done <= 1; + if (waddr == waddr_end) begin + recording <= 0; + data_valid <= 1; end end - if (bus_read) begin - if (DATAW > (BUSW-2)) begin - if (read_offset < $bits(read_offset)'(DATAW-(BUSW-2))) begin - read_offset <= read_offset + $bits(read_offset)'(BUSW-2); + if (bus_read + && (out_cmd == GET_DATA) + && data_valid) begin + if (DATAW > BUSW) begin + if (read_offset < $bits(read_offset)'(DATAW-BUSW)) begin + read_offset <= read_offset + $bits(read_offset)'(BUSW); end else begin - read_offset <= 0; raddr <= raddr + 1; - end + read_offset <= 0; + if (raddr == waddr_end) begin + data_valid <= 0; + end + end end else begin - raddr <= raddr + 1; - end + raddr <= raddr + 1; + if (raddr == waddr_end) begin + data_valid <= 0; + end + end end end end - assign data_valid = (waddr != 0) && (raddr <= waddr); + always @(*) begin + case (out_cmd) + GET_VALID : bus_out = BUSW'(data_valid); + GET_WIDTH : bus_out = BUSW'(DATAW); + GET_DEPTH : bus_out = BUSW'(waddr_end) + BUSW'(1); + default : bus_out = (BUSW)'(mem[raddr] >> read_offset); + endcase + end - assign data_end = (0 == read_offset) || (raddr == waddr); - - assign data_part = (BUSW-2)'(mem[raddr] >> read_offset); - - assign bus_out = {data_valid, data_end, data_part}; + always_ff @(posedge clk) begin + if (bus_read) begin + $display("%t: read: cmd=%0d, out=0x%0h, addr=%0d, off=%0d", $time, out_cmd, bus_out, raddr, read_offset); + end + end endmodule \ No newline at end of file