opae fixes
This commit is contained in:
@@ -15,7 +15,7 @@ extern int vx_dev_caps(int caps_id) {
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
return NUM_THREADS;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
return GLOBAL_BLOCK_SIZE;
|
||||
return 64;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
return 0xffffffff;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <cmath>
|
||||
#include <uuid/uuid.h>
|
||||
#include <opae/fpga.h>
|
||||
#include <vortex.h>
|
||||
@@ -46,9 +47,14 @@ typedef struct vx_buffer_ {
|
||||
size_t size;
|
||||
} vx_buffer_t;
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||
inline size_t align_size(size_t size, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
inline bool is_aligned(size_t addr, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return 0 == (addr & (alignment - 1));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -132,9 +138,11 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
size_t asize = align_size(size);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
if (device->mem_allocation + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
@@ -158,7 +166,9 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
size_t asize = align_size(size);
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
|
||||
size_t asize = align_size(size, line_size);
|
||||
|
||||
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
|
||||
if (FPGA_OK != res) {
|
||||
@@ -249,20 +259,35 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(size, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(buffer->io_addr + src_offset, line_size))
|
||||
return -1;
|
||||
|
||||
// bound checking
|
||||
if (size + src_offset > buffer->size)
|
||||
return -1;
|
||||
if (dev_maddr + size > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + src_offset));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||
auto ls_shift = (int)std::log2(line_size);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr >> ls_shift) ));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
@@ -277,20 +302,35 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(size, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(buffer->io_addr + dest_offset, line_size))
|
||||
return -1;
|
||||
|
||||
// bound checking
|
||||
if (size + dest_offset > buffer->size)
|
||||
return -1;
|
||||
if (dev_maddr + size > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + dest_offset));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||
auto ls_shift = (int)std::log2(line_size);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
@@ -307,12 +347,22 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
||||
|
||||
vx_device_t* device = ((vx_device_t*)hdevice);
|
||||
|
||||
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, line_size))
|
||||
return -1;
|
||||
if (!is_aligned(size, line_size))
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||
auto ls_shift = (int)std::log2(line_size);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
|
||||
@@ -38,13 +38,20 @@ make clean
|
||||
make
|
||||
./basic
|
||||
|
||||
#ASE build instructions
|
||||
#
|
||||
# ASE build instructions
|
||||
#
|
||||
|
||||
# Acquire a sever node for running ASE simulations
|
||||
qsub-sim
|
||||
|
||||
# modify "vsim_run.tcl" to dump VCD trace
|
||||
vcd file vortex.vcd
|
||||
vcd add -r /*/Vortex/hw/rtl/*
|
||||
run -all
|
||||
|
||||
#compress
|
||||
# compress VCD trace
|
||||
tar -zcvf vortex.vcd.tar.gz work/vortex.vcd
|
||||
|
||||
# decompress
|
||||
# decompress VCD trace
|
||||
tar -zxvf vortex.vcd.tar.gz vortex.vcd
|
||||
30
hw/opae/run_ase.sh
Executable file
30
hw/opae/run_ase.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
|
||||
SCRIPT_DIR=$PWD
|
||||
PROGRAM=$(basename "$1")
|
||||
PROGRAM_DIR=`dirname $1`
|
||||
|
||||
# Export ASE_WORKDIR variable
|
||||
export ASE_WORKDIR=$SCRIPT_DIR/build_ase/work
|
||||
|
||||
shift 1
|
||||
|
||||
# cleanup incomplete runs
|
||||
rm -rf $ASE_WORKDIR/.app_lock.pid $ASE_WORKDIR/.ase_ready.pid
|
||||
|
||||
# Start Simulator in background
|
||||
pushd $SCRIPT_DIR/build_ase
|
||||
make sim &
|
||||
popd
|
||||
|
||||
# Wait for simulator readiness
|
||||
# When .ase_ready is created in the $ASE_WORKDIR, ASE is ready for simulation
|
||||
while [! -f $ASE_WORKDIR/.ase_ready.pid]
|
||||
do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# run application
|
||||
pushd $PROGRAM_DIR
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$LD_LIBRARY_PATH ./$PROGRAM $*
|
||||
popd
|
||||
@@ -1,6 +1,6 @@
|
||||
vortex_afu.json
|
||||
|
||||
+define+GLOBAL_BLOCK_SIZE_BYTES=64
|
||||
+define+GLOBAL_BLOCK_SIZE=64
|
||||
|
||||
+incdir+.
|
||||
+incdir+../rtl
|
||||
@@ -12,31 +12,42 @@ vortex_afu.json
|
||||
../rtl/VX_user_config.vh
|
||||
../rtl/VX_config.vh
|
||||
../rtl/VX_define.vh
|
||||
../rtl/cache/VX_cache_config.vh
|
||||
|
||||
../rtl/interfaces/VX_exec_unit_req_if.v
|
||||
../rtl/cache/VX_cache_config.vh
|
||||
../rtl/cache/VX_cache.v
|
||||
../rtl/cache/VX_bank.v
|
||||
../rtl/cache/VX_cache_core_rsp_merge.v
|
||||
../rtl/cache/VX_cache_core_req_bank_sel.v
|
||||
../rtl/cache/VX_cache_dram_req_arb.v
|
||||
../rtl/cache/VX_cache_dfq_queue.v
|
||||
../rtl/cache/VX_cache_req_queue.v
|
||||
../rtl/cache/VX_cache_miss_resrv.v
|
||||
../rtl/cache/VX_fill_invalidator.v
|
||||
../rtl/cache/VX_snp_fwd_arb.v
|
||||
../rtl/cache/VX_tag_data_access.v
|
||||
../rtl/cache/VX_tag_data_structure.v
|
||||
../rtl/cache/VX_prefetcher.v
|
||||
|
||||
../rtl/interfaces/VX_branch_rsp_if.v
|
||||
../rtl/interfaces/VX_inst_meta_if.v
|
||||
../rtl/interfaces/VX_join_if.v
|
||||
../rtl/interfaces/VX_inst_exec_wb_if.v
|
||||
../rtl/interfaces/VX_cache_dram_req_if.v
|
||||
../rtl/interfaces/VX_cache_dram_rsp_if.v
|
||||
../rtl/interfaces/VX_cache_core_req_if.v
|
||||
../rtl/interfaces/VX_cache_core_rsp_if.v
|
||||
../rtl/interfaces/VX_cache_dram_req_if.v
|
||||
../rtl/interfaces/VX_cache_dram_rsp_if.v
|
||||
../rtl/interfaces/VX_cache_snp_req_if.v
|
||||
../rtl/interfaces/VX_csr_req_if.v
|
||||
../rtl/interfaces/VX_exec_unit_req_if.v
|
||||
../rtl/interfaces/VX_frE_to_bckE_req_if.v
|
||||
../rtl/interfaces/VX_gpr_data_if.v
|
||||
../rtl/interfaces/VX_csr_wb_if.v
|
||||
../rtl/interfaces/VX_csr_req_if.v
|
||||
../rtl/interfaces/VX_lsu_req_if.v
|
||||
../rtl/interfaces/VX_cache_snp_req_rsp_if.v
|
||||
../rtl/interfaces/VX_gpr_jal_if.v
|
||||
../rtl/interfaces/VX_gpgpu_inst_req_if.v
|
||||
../rtl/interfaces/VX_wstall_if.v
|
||||
../rtl/interfaces/VX_wb_if.v
|
||||
../rtl/interfaces/VX_gpr_read_if.v
|
||||
../rtl/interfaces/VX_gpu_inst_req_if.v
|
||||
../rtl/interfaces/VX_inst_meta_if.v
|
||||
../rtl/interfaces/VX_jal_rsp_if.v
|
||||
../rtl/interfaces/VX_join_if.v
|
||||
../rtl/interfaces/VX_lsu_req_if.v
|
||||
../rtl/interfaces/VX_warp_ctl_if.v
|
||||
../rtl/interfaces/VX_inst_mem_wb_if.v
|
||||
../rtl/interfaces/VX_wb_if.v
|
||||
../rtl/interfaces/VX_wstall_if.v
|
||||
|
||||
../rtl/libs/VX_generic_register.v
|
||||
../rtl/libs/VX_mult.v
|
||||
@@ -58,40 +69,28 @@ vortex_afu.json
|
||||
../rtl/VX_warp.v
|
||||
../rtl/VX_icache_stage.v
|
||||
../rtl/VX_gpr_wrapper.v
|
||||
../rtl/VX_gpgpu_inst.v
|
||||
../rtl/VX_gpu_inst.v
|
||||
../rtl/VX_writeback.v
|
||||
../rtl/VX_csr_pipe.v
|
||||
../rtl/VX_csr_data.v
|
||||
../rtl/VX_csr_wrapper.v
|
||||
../rtl/VX_warp_sched.v
|
||||
../rtl/VX_gpr.v
|
||||
../rtl/VX_gpr_ram.v
|
||||
../rtl/VX_gpr_stage.v
|
||||
../rtl/VX_dmem_ctrl.v
|
||||
../rtl/VX_alu_unit.v
|
||||
../rtl/VX_csr_data.v
|
||||
../rtl/VX_lsu_unit.v
|
||||
../rtl/VX_decode.v
|
||||
../rtl/VX_inst_multiplex.v
|
||||
../rtl/VX_csr_wrapper.v
|
||||
../rtl/VX_lsu_addr_gen.v
|
||||
../rtl/VX_dcache_io_arb.v
|
||||
../rtl/VX_dram_arb.v
|
||||
|
||||
../rtl/pipe_regs/VX_f_d_reg.v
|
||||
../rtl/pipe_regs/VX_i_d_reg.v
|
||||
../rtl/pipe_regs/VX_d_e_reg.v
|
||||
|
||||
../rtl/cache/VX_snp_fwd_arb.v
|
||||
../rtl/cache/VX_cache_dram_req_arb.v
|
||||
../rtl/cache/VX_cache_dfq_queue.v
|
||||
../rtl/cache/VX_cache_wb_sel_merge.v
|
||||
../rtl/cache/VX_tag_data_access.v
|
||||
../rtl/cache/VX_cache.v
|
||||
../rtl/cache/VX_cache_core_req_bank_sel.v
|
||||
../rtl/cache/VX_cache_req_queue.v
|
||||
../rtl/cache/VX_bank.v
|
||||
../rtl/cache/VX_cache_miss_resrv.v
|
||||
../rtl/cache/VX_fill_invalidator.v
|
||||
../rtl/cache/VX_tag_data_structure.v
|
||||
../rtl/cache/VX_prefetcher.v
|
||||
|
||||
ccip_interface_reg.sv
|
||||
ccip_std_afu.sv
|
||||
vortex_afu.sv
|
||||
@@ -28,12 +28,14 @@ module vortex_afu #(
|
||||
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
|
||||
);
|
||||
|
||||
localparam DRAM_ADDR_WIDTH = (32 - `CLOG2(`GLOBAL_BLOCK_SIZE));
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 16;
|
||||
|
||||
localparam CCI_RD_WINDOW_SIZE = 8;
|
||||
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
|
||||
|
||||
localparam VX_SNOOP_DELAY = 300;
|
||||
localparam VX_SNOOP_DELAY = 1000;
|
||||
localparam VX_SNOOP_LEVELS = 2;
|
||||
|
||||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||
@@ -60,38 +62,39 @@ typedef enum logic[3:0] {
|
||||
STATE_CLFLUSH
|
||||
} state_t;
|
||||
|
||||
typedef logic [`LOG2UP(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
|
||||
typedef logic [`LOG2UP(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
|
||||
typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data;
|
||||
|
||||
state_t state;
|
||||
|
||||
// Vortex signals /////////////////////////////////////////////////////////////
|
||||
// Vortex ports ///////////////////////////////////////////////////////////////
|
||||
|
||||
logic vx_dram_req_read;
|
||||
logic vx_dram_req_write;
|
||||
logic [31:0] vx_dram_req_addr;
|
||||
logic vx_dram_req_read;
|
||||
logic vx_dram_req_write;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
|
||||
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_req_data;
|
||||
logic vx_dram_req_ready;
|
||||
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
|
||||
logic vx_dram_req_ready;
|
||||
|
||||
logic vx_dram_rsp_ready;
|
||||
logic vx_dram_rsp_valid;
|
||||
logic [31:0] vx_dram_rsp_addr;
|
||||
logic vx_dram_rsp_valid;
|
||||
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_rsp_data;
|
||||
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
|
||||
logic vx_dram_rsp_ready;
|
||||
|
||||
logic vx_snp_req;
|
||||
logic [31:0] vx_snp_req_addr;
|
||||
logic vx_snp_req_ready;
|
||||
logic vx_snp_req_valid;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
|
||||
logic vx_snp_req_ready;
|
||||
|
||||
logic vx_ebreak;
|
||||
logic vx_busy;
|
||||
|
||||
// AVS Queues /////////////////////////////////////////////////////////////////
|
||||
|
||||
logic avs_raq_push;
|
||||
t_local_mem_addr avs_raq_din;
|
||||
logic avs_raq_pop;
|
||||
t_local_mem_addr avs_raq_dout;
|
||||
logic avs_raq_empty;
|
||||
logic avs_raq_full;
|
||||
logic avs_rtq_push;
|
||||
t_local_mem_addr avs_rtq_din;
|
||||
logic avs_rtq_pop;
|
||||
t_local_mem_addr avs_rtq_dout;
|
||||
logic avs_rtq_empty;
|
||||
logic avs_rtq_full;
|
||||
|
||||
logic avs_rdq_push;
|
||||
t_local_mem_data avs_rdq_din;
|
||||
@@ -105,7 +108,7 @@ logic avs_rdq_full;
|
||||
logic [2:0] csr_cmd;
|
||||
t_ccip_clAddr csr_io_addr;
|
||||
t_local_mem_addr csr_mem_addr;
|
||||
logic [31:0] csr_data_size;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] csr_data_size;
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -137,16 +140,16 @@ begin
|
||||
begin
|
||||
case (mmioHdr.address)
|
||||
MMIO_CSR_IO_ADDR: begin
|
||||
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6);
|
||||
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6));
|
||||
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
|
||||
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
|
||||
end
|
||||
MMIO_CSR_MEM_ADDR: begin
|
||||
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6);
|
||||
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6));
|
||||
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
|
||||
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
|
||||
end
|
||||
MMIO_CSR_DATA_SIZE: begin
|
||||
csr_data_size <= $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6);
|
||||
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6));
|
||||
csr_data_size <= $bits(csr_data_size)'(cp2af_sRxPort.c0.data);
|
||||
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'(cp2af_sRxPort.c0.data));
|
||||
end
|
||||
MMIO_CSR_CMD: begin
|
||||
csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data);
|
||||
@@ -195,12 +198,12 @@ end
|
||||
|
||||
// COMMAND FSM ////////////////////////////////////////////////////////////////
|
||||
|
||||
logic [31:0] cci_write_ctr;
|
||||
logic [31:0] avs_read_ctr;
|
||||
logic [31:0] avs_write_ctr;
|
||||
logic [31:0] vx_snoop_ctr;
|
||||
logic [9:0] vx_snoop_delay;
|
||||
logic vx_reset;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] cci_write_ctr;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] avs_read_ctr;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] avs_write_ctr;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] snp_req_ctr;
|
||||
logic [9:0] snp_req_delay;
|
||||
logic vx_reset;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
@@ -248,13 +251,13 @@ begin
|
||||
end
|
||||
|
||||
STATE_RUN: begin
|
||||
if (vx_ebreak) begin
|
||||
if (!vx_busy) begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_CLFLUSH: begin
|
||||
if (vx_snoop_delay >= VX_SNOOP_DELAY) begin
|
||||
if (snp_req_delay >= VX_SNOOP_DELAY) begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
@@ -268,11 +271,12 @@ end
|
||||
logic cci_rdq_empty;
|
||||
t_cci_rdq_data cci_rdq_dout;
|
||||
logic cci_rdq_pop;
|
||||
logic [`L3DRAM_TAG_WIDTH-1:0] dram_req_tag;
|
||||
|
||||
t_ccip_clAddr next_avs_address;
|
||||
always_comb
|
||||
begin
|
||||
next_avs_address = csr_mem_addr + {avs_write_ctr[31:$bits(t_cci_rdq_tag)], t_cci_rdq_tag'(cci_rdq_dout)};
|
||||
next_avs_address = csr_mem_addr + {avs_write_ctr[DRAM_ADDR_WIDTH-1:$bits(t_cci_rdq_tag)], t_cci_rdq_tag'(cci_rdq_dout)};
|
||||
cci_rdq_pop = (state == STATE_WRITE
|
||||
&& !cci_rdq_empty
|
||||
&& !avs_waitrequest
|
||||
@@ -285,9 +289,7 @@ begin
|
||||
begin
|
||||
mem_bank_select <= 0;
|
||||
avs_burstcount <= 1;
|
||||
avs_byteenable <= 64'hffffffffffffffff;
|
||||
avs_address <= 0;
|
||||
avs_writedata <= 0;
|
||||
avs_byteenable <= 64'hffffffffffffffff;
|
||||
avs_read <= 0;
|
||||
avs_write <= 0;
|
||||
avs_read_ctr <= 0;
|
||||
@@ -305,7 +307,7 @@ begin
|
||||
end
|
||||
|
||||
STATE_READ: begin
|
||||
if (!avs_raq_full
|
||||
if (!avs_rtq_full
|
||||
&& !avs_rdq_full
|
||||
&& !avs_waitrequest
|
||||
&& avs_read_ctr < csr_data_size)
|
||||
@@ -332,18 +334,19 @@ begin
|
||||
if (vx_dram_req_read
|
||||
&& vx_dram_req_ready)
|
||||
begin
|
||||
avs_address <= (vx_dram_req_addr >> 6);
|
||||
avs_read <= 1;
|
||||
$display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr >> 6);
|
||||
avs_address <= vx_dram_req_addr;
|
||||
dram_req_tag <= vx_dram_req_tag;
|
||||
avs_read <= 1;
|
||||
$display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr);
|
||||
end
|
||||
|
||||
if (vx_dram_req_write
|
||||
&& vx_dram_req_ready)
|
||||
begin
|
||||
avs_writedata <= vx_dram_req_data;
|
||||
avs_address <= (vx_dram_req_addr >> 6);
|
||||
avs_address <= vx_dram_req_addr;
|
||||
avs_writedata <= vx_dram_req_data;
|
||||
avs_write <= 1;
|
||||
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr >> 6);
|
||||
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr);
|
||||
end
|
||||
end
|
||||
endcase
|
||||
@@ -362,7 +365,7 @@ logic vortex_enabled;
|
||||
always_comb
|
||||
begin
|
||||
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||
vx_dram_req_ready = vortex_enabled && !avs_waitrequest && !avs_raq_full && !avs_rdq_full;
|
||||
vx_dram_req_ready = vortex_enabled && !avs_waitrequest && !avs_rtq_full && !avs_rdq_full;
|
||||
end
|
||||
|
||||
// Vortex DRAM fill response
|
||||
@@ -370,7 +373,7 @@ end
|
||||
always_comb
|
||||
begin
|
||||
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready;
|
||||
vx_dram_rsp_addr = (avs_raq_dout << 6);
|
||||
vx_dram_rsp_tag = avs_rtq_dout;
|
||||
vx_dram_rsp_data = avs_rdq_dout;
|
||||
end
|
||||
|
||||
@@ -380,9 +383,9 @@ logic cci_wr_req;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
avs_raq_pop = vx_dram_rsp_valid || cci_wr_req;
|
||||
avs_raq_din = avs_address;
|
||||
avs_raq_push = avs_read;
|
||||
avs_rtq_pop = vx_dram_rsp_valid || cci_wr_req;
|
||||
avs_rtq_din = dram_req_tag;
|
||||
avs_rtq_push = avs_read;
|
||||
end
|
||||
|
||||
VX_generic_queue #(
|
||||
@@ -391,19 +394,19 @@ VX_generic_queue #(
|
||||
) avs_rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (SoftReset),
|
||||
.push (avs_raq_push),
|
||||
.data_in (avs_raq_din),
|
||||
.pop (avs_raq_pop),
|
||||
.data_out (avs_raq_dout),
|
||||
.empty (avs_raq_empty),
|
||||
.full (avs_raq_full)
|
||||
.push (avs_rtq_push),
|
||||
.data_in (avs_rtq_din),
|
||||
.pop (avs_rtq_pop),
|
||||
.data_out (avs_rtq_dout),
|
||||
.empty (avs_rtq_empty),
|
||||
.full (avs_rtq_full)
|
||||
);
|
||||
|
||||
// AVS data read response queue ///////////////////////////////////////////////
|
||||
|
||||
always_comb
|
||||
begin
|
||||
avs_rdq_pop = avs_raq_pop;
|
||||
avs_rdq_pop = avs_rtq_pop;
|
||||
avs_rdq_din = avs_readdata;
|
||||
avs_rdq_push = avs_readdatavalid;
|
||||
end
|
||||
@@ -426,7 +429,7 @@ VX_generic_queue #(
|
||||
|
||||
t_ccip_c0_ReqMemHdr cci_read_hdr;
|
||||
|
||||
logic [31:0] cci_read_ctr;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] cci_read_ctr;
|
||||
t_cci_rdq_tag cci_rdq_ctr;
|
||||
|
||||
logic cci_rdq_full;
|
||||
@@ -562,29 +565,29 @@ end
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset) begin
|
||||
vx_snp_req <= 0;
|
||||
vx_snoop_ctr <= 0;
|
||||
vx_snoop_delay <= 0;
|
||||
vx_snp_req_valid <= 0;
|
||||
snp_req_ctr <= 0;
|
||||
snp_req_delay <= 0;
|
||||
end
|
||||
else begin
|
||||
if (STATE_IDLE == state) begin
|
||||
vx_snoop_ctr <= 0;
|
||||
vx_snoop_delay <= 0;
|
||||
snp_req_ctr <= 0;
|
||||
snp_req_delay <= 0;
|
||||
end
|
||||
|
||||
vx_snp_req <= 0;
|
||||
vx_snp_req_valid <= 0;
|
||||
|
||||
if ((STATE_CLFLUSH == state)
|
||||
&& vx_snoop_ctr < csr_data_size
|
||||
&& (snp_req_ctr < csr_data_size)
|
||||
&& vx_snp_req_ready)
|
||||
begin
|
||||
vx_snp_req_addr <= (csr_mem_addr + vx_snoop_ctr) << 6;
|
||||
vx_snp_req <= 1;
|
||||
vx_snoop_ctr <= vx_snoop_ctr + 1;
|
||||
begin
|
||||
vx_snp_req_addr <= csr_mem_addr + snp_req_ctr;
|
||||
vx_snp_req_valid <= 1;
|
||||
snp_req_ctr <= snp_req_ctr + 1;
|
||||
end
|
||||
|
||||
if (vx_snoop_ctr == csr_data_size) begin
|
||||
vx_snoop_delay <= vx_snoop_delay + 1;
|
||||
if (snp_req_ctr == csr_data_size) begin
|
||||
snp_req_delay <= snp_req_delay + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -600,21 +603,22 @@ Vortex_Socket #() vx_socket (
|
||||
.dram_req_read (vx_dram_req_read),
|
||||
.dram_req_addr (vx_dram_req_addr),
|
||||
.dram_req_data (vx_dram_req_data),
|
||||
.dram_req_tag (vx_dram_req_tag),
|
||||
.dram_req_ready (vx_dram_req_ready),
|
||||
|
||||
// DRAM Rsp
|
||||
.dram_rsp_valid (vx_dram_rsp_valid),
|
||||
.dram_rsp_addr (vx_dram_rsp_addr),
|
||||
.dram_rsp_data (vx_dram_rsp_data),
|
||||
.dram_rsp_tag (vx_dram_rsp_tag),
|
||||
.dram_rsp_ready (vx_dram_rsp_ready),
|
||||
|
||||
// Cache Snooping Req
|
||||
.llc_snp_req_valid (vx_snp_req),
|
||||
.llc_snp_req_addr (vx_snp_req_addr),
|
||||
.llc_snp_req_ready (vx_snp_req_ready),
|
||||
.snp_req_valid (vx_snp_req_valid),
|
||||
.snp_req_addr (vx_snp_req_addr),
|
||||
.snp_req_ready (vx_snp_req_ready),
|
||||
|
||||
// program exit signal
|
||||
.ebreak (vx_ebreak)
|
||||
// status
|
||||
.busy (vx_busy)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -52,6 +52,16 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define BYTE_EN_NO 3'h7
|
||||
`define BYTE_EN_LB 3'h0
|
||||
`define BYTE_EN_LH 3'h1
|
||||
`define BYTE_EN_LW 3'h2
|
||||
`define BYTE_EN_HB 3'h4
|
||||
`define BYTE_EN_HH 3'h5
|
||||
`define BYTE_EN_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_R 7'd051
|
||||
`define INST_L 7'd003
|
||||
`define INST_ALU 7'd019
|
||||
@@ -62,7 +72,7 @@
|
||||
`define INST_JAL 7'd111
|
||||
`define INST_JALR 7'd103
|
||||
`define INST_SYS 7'd115
|
||||
`define INST_GPGPU 7'h06b
|
||||
`define INST_GPGPU 7'd107
|
||||
|
||||
`define RS2_IMMED 1
|
||||
`define RS2_REG 0
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_dram_arb #(
|
||||
parameter BANK_LINE_SIZE = 1,
|
||||
|
||||
@@ -19,26 +19,16 @@ module VX_warp (
|
||||
output wire[`NUM_THREADS-1:0] valid
|
||||
);
|
||||
|
||||
reg [31:0] real_PC;
|
||||
logic [31:0] temp_PC;
|
||||
logic [31:0] use_PC;
|
||||
reg [`NUM_THREADS-1:0] valid_t;
|
||||
reg [`NUM_THREADS-1:0] valid_zero;
|
||||
|
||||
integer i;
|
||||
initial begin
|
||||
real_PC = 0;
|
||||
for (i = 1; i < `NUM_THREADS; i=i+1) begin
|
||||
valid_t[i] = 0; // Thread 1 active
|
||||
valid_zero[i] = 0;
|
||||
end
|
||||
valid_t = 1;
|
||||
valid_zero[0] = 0;
|
||||
end
|
||||
reg [31:0] real_PC;
|
||||
reg [31:0] temp_PC;
|
||||
reg [31:0] use_PC;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (remove) begin
|
||||
valid_t <= valid_zero;
|
||||
if (reset) begin
|
||||
valid_t <= {{(`NUM_THREADS-1){1'b0}},1'b1}; // Thread 1 active
|
||||
end else if (remove) begin
|
||||
valid_t <= 0;
|
||||
end else if (change_mask) begin
|
||||
valid_t <= thread_mask;
|
||||
end
|
||||
@@ -46,7 +36,7 @@ module VX_warp (
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i = i+1) begin : valid_assign
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : valid_assign
|
||||
assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i];
|
||||
end
|
||||
endgenerate
|
||||
@@ -54,8 +44,7 @@ module VX_warp (
|
||||
always @(*) begin
|
||||
if (jal == 1'b1) begin
|
||||
temp_PC = jal_dest;
|
||||
// $display("LINKING TO %h", temp_PC);
|
||||
end else if (branch_dir == 1'b1) begin
|
||||
end else if (branch_dir) begin
|
||||
temp_PC = branch_dest;
|
||||
end else begin
|
||||
temp_PC = real_PC;
|
||||
@@ -68,8 +57,7 @@ module VX_warp (
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
real_PC <= 0;
|
||||
end else if (wspawn == 1'b1) begin
|
||||
// $display("Inside warp ***** Spawn @ %H",wspawn_pc);
|
||||
end else if (wspawn) begin
|
||||
real_PC <= wspawn_pc;
|
||||
end else if (!stall) begin
|
||||
real_PC <= use_PC + 32'h4;
|
||||
|
||||
8
hw/rtl/cache/VX_cache_config.vh
vendored
8
hw/rtl/cache/VX_cache_config.vh
vendored
@@ -3,14 +3,6 @@
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define BYTE_EN_NO 3'h7
|
||||
`define BYTE_EN_LB 3'h0
|
||||
`define BYTE_EN_LH 3'h1
|
||||
`define BYTE_EN_LW 3'h2
|
||||
`define BYTE_EN_HB 3'h4
|
||||
`define BYTE_EN_HH 3'h5
|
||||
`define BYTE_EN_BITS 3
|
||||
|
||||
// data tid tag read write base addr
|
||||
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS)
|
||||
|
||||
|
||||
7
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
7
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
@@ -53,9 +53,9 @@ module VX_cache_core_rsp_merge #(
|
||||
output wire [NUM_BANKS-1:0] per_bank_core_rsp_pop,
|
||||
|
||||
// Core Writeback
|
||||
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
|
||||
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
|
||||
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire core_rsp_ready
|
||||
);
|
||||
|
||||
@@ -81,7 +81,6 @@ module VX_cache_core_rsp_merge #(
|
||||
always @(*) begin
|
||||
core_rsp_valid = 0;
|
||||
core_rsp_data = 0;
|
||||
core_rsp_tag = 0;
|
||||
for (i = 0; i < NUM_BANKS; i = i + 1) begin
|
||||
if (found_bank
|
||||
&& per_bank_core_rsp_valid[i]
|
||||
|
||||
@@ -66,7 +66,7 @@ module VX_divide #(
|
||||
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
|
||||
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
numer_pipe[i+1] <= 0;
|
||||
|
||||
@@ -84,7 +84,7 @@ module VX_mult #(
|
||||
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
|
||||
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
dataa_pipe[i+1] <= 0;
|
||||
|
||||
@@ -19,7 +19,7 @@ module VX_tb_divide();
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < 8; i = i+1) begin : div_loop
|
||||
for (i = 0; i < 8; i++) begin : div_loop
|
||||
VX_divide#(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
|
||||
Reference in New Issue
Block a user