opae fixes
This commit is contained in:
@@ -15,7 +15,7 @@ extern int vx_dev_caps(int caps_id) {
|
|||||||
case VX_CAPS_MAX_THREADS:
|
case VX_CAPS_MAX_THREADS:
|
||||||
return NUM_THREADS;
|
return NUM_THREADS;
|
||||||
case VX_CAPS_CACHE_LINESIZE:
|
case VX_CAPS_CACHE_LINESIZE:
|
||||||
return GLOBAL_BLOCK_SIZE;
|
return 64;
|
||||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||||
return 0xffffffff;
|
return 0xffffffff;
|
||||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <cmath>
|
||||||
#include <uuid/uuid.h>
|
#include <uuid/uuid.h>
|
||||||
#include <opae/fpga.h>
|
#include <opae/fpga.h>
|
||||||
#include <vortex.h>
|
#include <vortex.h>
|
||||||
@@ -46,9 +47,14 @@ typedef struct vx_buffer_ {
|
|||||||
size_t size;
|
size_t size;
|
||||||
} vx_buffer_t;
|
} vx_buffer_t;
|
||||||
|
|
||||||
static size_t align_size(size_t size) {
|
inline size_t align_size(size_t size, size_t alignment) {
|
||||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
assert(0 == (alignment & (alignment - 1)));
|
||||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
return (size + alignment - 1) & ~(alignment - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool is_aligned(size_t addr, size_t alignment) {
|
||||||
|
assert(0 == (alignment & (alignment - 1)));
|
||||||
|
return 0 == (addr & (alignment - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -133,8 +139,10 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
|||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||||
|
|
||||||
size_t asize = align_size(size);
|
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||||
|
|
||||||
|
size_t asize = align_size(size, line_size);
|
||||||
if (device->mem_allocation + asize > dev_mem_size)
|
if (device->mem_allocation + asize > dev_mem_size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -158,7 +166,9 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
|
|||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||||
|
|
||||||
size_t asize = align_size(size);
|
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||||
|
|
||||||
|
size_t asize = align_size(size, line_size);
|
||||||
|
|
||||||
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
|
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
|
||||||
if (FPGA_OK != res) {
|
if (FPGA_OK != res) {
|
||||||
@@ -249,20 +259,35 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|
|||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||||
|
|
||||||
|
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||||
|
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||||
|
|
||||||
|
// check alignment
|
||||||
|
if (!is_aligned(dev_maddr, line_size))
|
||||||
|
return -1;
|
||||||
|
if (!is_aligned(size, line_size))
|
||||||
|
return -1;
|
||||||
|
if (!is_aligned(buffer->io_addr + src_offset, line_size))
|
||||||
|
return -1;
|
||||||
|
|
||||||
// bound checking
|
// bound checking
|
||||||
if (size + src_offset > buffer->size)
|
if (size + src_offset > buffer->size)
|
||||||
return -1;
|
return -1;
|
||||||
|
if (dev_maddr + size > dev_mem_size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
// Ensure ready for new command
|
// Ensure ready for new command
|
||||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + src_offset));
|
auto ls_shift = (int)std::log2(line_size);
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr >> ls_shift) ));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
|
||||||
|
|
||||||
// Wait for the write operation to finish
|
// Wait for the write operation to finish
|
||||||
@@ -277,20 +302,35 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|
|||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||||
|
|
||||||
|
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||||
|
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||||
|
|
||||||
|
// check alignment
|
||||||
|
if (!is_aligned(dev_maddr, line_size))
|
||||||
|
return -1;
|
||||||
|
if (!is_aligned(size, line_size))
|
||||||
|
return -1;
|
||||||
|
if (!is_aligned(buffer->io_addr + dest_offset, line_size))
|
||||||
|
return -1;
|
||||||
|
|
||||||
// bound checking
|
// bound checking
|
||||||
if (size + dest_offset > buffer->size)
|
if (size + dest_offset > buffer->size)
|
||||||
return -1;
|
return -1;
|
||||||
|
if (dev_maddr + size > dev_mem_size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
// Ensure ready for new command
|
// Ensure ready for new command
|
||||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + dest_offset));
|
auto ls_shift = (int)std::log2(line_size);
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr) >> ls_shift));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
|
||||||
|
|
||||||
// Wait for the write operation to finish
|
// Wait for the write operation to finish
|
||||||
@@ -307,12 +347,22 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
|||||||
|
|
||||||
vx_device_t* device = ((vx_device_t*)hdevice);
|
vx_device_t* device = ((vx_device_t*)hdevice);
|
||||||
|
|
||||||
|
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||||
|
|
||||||
|
// check alignment
|
||||||
|
if (!is_aligned(dev_maddr, line_size))
|
||||||
|
return -1;
|
||||||
|
if (!is_aligned(size, line_size))
|
||||||
|
return -1;
|
||||||
|
|
||||||
// Ensure ready for new command
|
// Ensure ready for new command
|
||||||
if (vx_ready_wait(hdevice, -1) != 0)
|
if (vx_ready_wait(hdevice, -1) != 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
auto ls_shift = (int)std::log2(line_size);
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
|
||||||
|
|
||||||
// Wait for the write operation to finish
|
// Wait for the write operation to finish
|
||||||
|
|||||||
@@ -38,13 +38,20 @@ make clean
|
|||||||
make
|
make
|
||||||
./basic
|
./basic
|
||||||
|
|
||||||
#ASE build instructions
|
#
|
||||||
|
# ASE build instructions
|
||||||
|
#
|
||||||
|
|
||||||
|
# Acquire a sever node for running ASE simulations
|
||||||
|
qsub-sim
|
||||||
|
|
||||||
|
# modify "vsim_run.tcl" to dump VCD trace
|
||||||
vcd file vortex.vcd
|
vcd file vortex.vcd
|
||||||
vcd add -r /*/Vortex/hw/rtl/*
|
vcd add -r /*/Vortex/hw/rtl/*
|
||||||
run -all
|
run -all
|
||||||
|
|
||||||
#compress
|
# compress VCD trace
|
||||||
tar -zcvf vortex.vcd.tar.gz work/vortex.vcd
|
tar -zcvf vortex.vcd.tar.gz work/vortex.vcd
|
||||||
|
|
||||||
# decompress
|
# decompress VCD trace
|
||||||
tar -zxvf vortex.vcd.tar.gz vortex.vcd
|
tar -zxvf vortex.vcd.tar.gz vortex.vcd
|
||||||
30
hw/opae/run_ase.sh
Executable file
30
hw/opae/run_ase.sh
Executable file
@@ -0,0 +1,30 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
SCRIPT_DIR=$PWD
|
||||||
|
PROGRAM=$(basename "$1")
|
||||||
|
PROGRAM_DIR=`dirname $1`
|
||||||
|
|
||||||
|
# Export ASE_WORKDIR variable
|
||||||
|
export ASE_WORKDIR=$SCRIPT_DIR/build_ase/work
|
||||||
|
|
||||||
|
shift 1
|
||||||
|
|
||||||
|
# cleanup incomplete runs
|
||||||
|
rm -rf $ASE_WORKDIR/.app_lock.pid $ASE_WORKDIR/.ase_ready.pid
|
||||||
|
|
||||||
|
# Start Simulator in background
|
||||||
|
pushd $SCRIPT_DIR/build_ase
|
||||||
|
make sim &
|
||||||
|
popd
|
||||||
|
|
||||||
|
# Wait for simulator readiness
|
||||||
|
# When .ase_ready is created in the $ASE_WORKDIR, ASE is ready for simulation
|
||||||
|
while [! -f $ASE_WORKDIR/.ase_ready.pid]
|
||||||
|
do
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
# run application
|
||||||
|
pushd $PROGRAM_DIR
|
||||||
|
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$LD_LIBRARY_PATH ./$PROGRAM $*
|
||||||
|
popd
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
vortex_afu.json
|
vortex_afu.json
|
||||||
|
|
||||||
+define+GLOBAL_BLOCK_SIZE_BYTES=64
|
+define+GLOBAL_BLOCK_SIZE=64
|
||||||
|
|
||||||
+incdir+.
|
+incdir+.
|
||||||
+incdir+../rtl
|
+incdir+../rtl
|
||||||
@@ -12,31 +12,42 @@ vortex_afu.json
|
|||||||
../rtl/VX_user_config.vh
|
../rtl/VX_user_config.vh
|
||||||
../rtl/VX_config.vh
|
../rtl/VX_config.vh
|
||||||
../rtl/VX_define.vh
|
../rtl/VX_define.vh
|
||||||
../rtl/cache/VX_cache_config.vh
|
|
||||||
|
|
||||||
../rtl/interfaces/VX_exec_unit_req_if.v
|
../rtl/cache/VX_cache_config.vh
|
||||||
|
../rtl/cache/VX_cache.v
|
||||||
|
../rtl/cache/VX_bank.v
|
||||||
|
../rtl/cache/VX_cache_core_rsp_merge.v
|
||||||
|
../rtl/cache/VX_cache_core_req_bank_sel.v
|
||||||
|
../rtl/cache/VX_cache_dram_req_arb.v
|
||||||
|
../rtl/cache/VX_cache_dfq_queue.v
|
||||||
|
../rtl/cache/VX_cache_req_queue.v
|
||||||
|
../rtl/cache/VX_cache_miss_resrv.v
|
||||||
|
../rtl/cache/VX_fill_invalidator.v
|
||||||
|
../rtl/cache/VX_snp_fwd_arb.v
|
||||||
|
../rtl/cache/VX_tag_data_access.v
|
||||||
|
../rtl/cache/VX_tag_data_structure.v
|
||||||
|
../rtl/cache/VX_prefetcher.v
|
||||||
|
|
||||||
../rtl/interfaces/VX_branch_rsp_if.v
|
../rtl/interfaces/VX_branch_rsp_if.v
|
||||||
../rtl/interfaces/VX_inst_meta_if.v
|
|
||||||
../rtl/interfaces/VX_join_if.v
|
|
||||||
../rtl/interfaces/VX_inst_exec_wb_if.v
|
|
||||||
../rtl/interfaces/VX_cache_dram_req_if.v
|
|
||||||
../rtl/interfaces/VX_cache_dram_rsp_if.v
|
|
||||||
../rtl/interfaces/VX_cache_core_req_if.v
|
../rtl/interfaces/VX_cache_core_req_if.v
|
||||||
../rtl/interfaces/VX_cache_core_rsp_if.v
|
../rtl/interfaces/VX_cache_core_rsp_if.v
|
||||||
|
../rtl/interfaces/VX_cache_dram_req_if.v
|
||||||
|
../rtl/interfaces/VX_cache_dram_rsp_if.v
|
||||||
|
../rtl/interfaces/VX_cache_snp_req_if.v
|
||||||
|
../rtl/interfaces/VX_csr_req_if.v
|
||||||
|
../rtl/interfaces/VX_exec_unit_req_if.v
|
||||||
../rtl/interfaces/VX_frE_to_bckE_req_if.v
|
../rtl/interfaces/VX_frE_to_bckE_req_if.v
|
||||||
../rtl/interfaces/VX_gpr_data_if.v
|
../rtl/interfaces/VX_gpr_data_if.v
|
||||||
../rtl/interfaces/VX_csr_wb_if.v
|
|
||||||
../rtl/interfaces/VX_csr_req_if.v
|
|
||||||
../rtl/interfaces/VX_lsu_req_if.v
|
|
||||||
../rtl/interfaces/VX_cache_snp_req_rsp_if.v
|
|
||||||
../rtl/interfaces/VX_gpr_jal_if.v
|
../rtl/interfaces/VX_gpr_jal_if.v
|
||||||
../rtl/interfaces/VX_gpgpu_inst_req_if.v
|
|
||||||
../rtl/interfaces/VX_wstall_if.v
|
|
||||||
../rtl/interfaces/VX_wb_if.v
|
|
||||||
../rtl/interfaces/VX_gpr_read_if.v
|
../rtl/interfaces/VX_gpr_read_if.v
|
||||||
|
../rtl/interfaces/VX_gpu_inst_req_if.v
|
||||||
|
../rtl/interfaces/VX_inst_meta_if.v
|
||||||
../rtl/interfaces/VX_jal_rsp_if.v
|
../rtl/interfaces/VX_jal_rsp_if.v
|
||||||
|
../rtl/interfaces/VX_join_if.v
|
||||||
|
../rtl/interfaces/VX_lsu_req_if.v
|
||||||
../rtl/interfaces/VX_warp_ctl_if.v
|
../rtl/interfaces/VX_warp_ctl_if.v
|
||||||
../rtl/interfaces/VX_inst_mem_wb_if.v
|
../rtl/interfaces/VX_wb_if.v
|
||||||
|
../rtl/interfaces/VX_wstall_if.v
|
||||||
|
|
||||||
../rtl/libs/VX_generic_register.v
|
../rtl/libs/VX_generic_register.v
|
||||||
../rtl/libs/VX_mult.v
|
../rtl/libs/VX_mult.v
|
||||||
@@ -58,40 +69,28 @@ vortex_afu.json
|
|||||||
../rtl/VX_warp.v
|
../rtl/VX_warp.v
|
||||||
../rtl/VX_icache_stage.v
|
../rtl/VX_icache_stage.v
|
||||||
../rtl/VX_gpr_wrapper.v
|
../rtl/VX_gpr_wrapper.v
|
||||||
../rtl/VX_gpgpu_inst.v
|
../rtl/VX_gpu_inst.v
|
||||||
../rtl/VX_writeback.v
|
../rtl/VX_writeback.v
|
||||||
../rtl/VX_csr_pipe.v
|
../rtl/VX_csr_pipe.v
|
||||||
|
../rtl/VX_csr_data.v
|
||||||
|
../rtl/VX_csr_wrapper.v
|
||||||
../rtl/VX_warp_sched.v
|
../rtl/VX_warp_sched.v
|
||||||
../rtl/VX_gpr.v
|
../rtl/VX_gpr.v
|
||||||
../rtl/VX_gpr_ram.v
|
../rtl/VX_gpr_ram.v
|
||||||
../rtl/VX_gpr_stage.v
|
../rtl/VX_gpr_stage.v
|
||||||
../rtl/VX_dmem_ctrl.v
|
../rtl/VX_dmem_ctrl.v
|
||||||
../rtl/VX_alu_unit.v
|
../rtl/VX_alu_unit.v
|
||||||
../rtl/VX_csr_data.v
|
|
||||||
../rtl/VX_lsu_unit.v
|
../rtl/VX_lsu_unit.v
|
||||||
../rtl/VX_decode.v
|
../rtl/VX_decode.v
|
||||||
../rtl/VX_inst_multiplex.v
|
../rtl/VX_inst_multiplex.v
|
||||||
../rtl/VX_csr_wrapper.v
|
|
||||||
../rtl/VX_lsu_addr_gen.v
|
../rtl/VX_lsu_addr_gen.v
|
||||||
|
../rtl/VX_dcache_io_arb.v
|
||||||
|
../rtl/VX_dram_arb.v
|
||||||
|
|
||||||
../rtl/pipe_regs/VX_f_d_reg.v
|
../rtl/pipe_regs/VX_f_d_reg.v
|
||||||
../rtl/pipe_regs/VX_i_d_reg.v
|
../rtl/pipe_regs/VX_i_d_reg.v
|
||||||
../rtl/pipe_regs/VX_d_e_reg.v
|
../rtl/pipe_regs/VX_d_e_reg.v
|
||||||
|
|
||||||
../rtl/cache/VX_snp_fwd_arb.v
|
|
||||||
../rtl/cache/VX_cache_dram_req_arb.v
|
|
||||||
../rtl/cache/VX_cache_dfq_queue.v
|
|
||||||
../rtl/cache/VX_cache_wb_sel_merge.v
|
|
||||||
../rtl/cache/VX_tag_data_access.v
|
|
||||||
../rtl/cache/VX_cache.v
|
|
||||||
../rtl/cache/VX_cache_core_req_bank_sel.v
|
|
||||||
../rtl/cache/VX_cache_req_queue.v
|
|
||||||
../rtl/cache/VX_bank.v
|
|
||||||
../rtl/cache/VX_cache_miss_resrv.v
|
|
||||||
../rtl/cache/VX_fill_invalidator.v
|
|
||||||
../rtl/cache/VX_tag_data_structure.v
|
|
||||||
../rtl/cache/VX_prefetcher.v
|
|
||||||
|
|
||||||
ccip_interface_reg.sv
|
ccip_interface_reg.sv
|
||||||
ccip_std_afu.sv
|
ccip_std_afu.sv
|
||||||
vortex_afu.sv
|
vortex_afu.sv
|
||||||
@@ -28,12 +28,14 @@ module vortex_afu #(
|
|||||||
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
|
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
|
||||||
);
|
);
|
||||||
|
|
||||||
|
localparam DRAM_ADDR_WIDTH = (32 - `CLOG2(`GLOBAL_BLOCK_SIZE));
|
||||||
|
|
||||||
localparam AVS_RD_QUEUE_SIZE = 16;
|
localparam AVS_RD_QUEUE_SIZE = 16;
|
||||||
|
|
||||||
localparam CCI_RD_WINDOW_SIZE = 8;
|
localparam CCI_RD_WINDOW_SIZE = 8;
|
||||||
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
|
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
|
||||||
|
|
||||||
localparam VX_SNOOP_DELAY = 300;
|
localparam VX_SNOOP_DELAY = 1000;
|
||||||
localparam VX_SNOOP_LEVELS = 2;
|
localparam VX_SNOOP_LEVELS = 2;
|
||||||
|
|
||||||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||||
@@ -60,38 +62,39 @@ typedef enum logic[3:0] {
|
|||||||
STATE_CLFLUSH
|
STATE_CLFLUSH
|
||||||
} state_t;
|
} state_t;
|
||||||
|
|
||||||
typedef logic [`LOG2UP(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
|
typedef logic [`LOG2UP(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
|
||||||
typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data;
|
typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data;
|
||||||
|
|
||||||
state_t state;
|
state_t state;
|
||||||
|
|
||||||
// Vortex signals /////////////////////////////////////////////////////////////
|
// Vortex ports ///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
logic vx_dram_req_read;
|
logic vx_dram_req_read;
|
||||||
logic vx_dram_req_write;
|
logic vx_dram_req_write;
|
||||||
logic [31:0] vx_dram_req_addr;
|
logic [DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
|
||||||
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_req_data;
|
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_req_data;
|
||||||
logic vx_dram_req_ready;
|
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
|
||||||
|
logic vx_dram_req_ready;
|
||||||
|
|
||||||
logic vx_dram_rsp_ready;
|
logic vx_dram_rsp_valid;
|
||||||
logic vx_dram_rsp_valid;
|
|
||||||
logic [31:0] vx_dram_rsp_addr;
|
|
||||||
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_rsp_data;
|
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_rsp_data;
|
||||||
|
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
|
||||||
|
logic vx_dram_rsp_ready;
|
||||||
|
|
||||||
logic vx_snp_req;
|
logic vx_snp_req_valid;
|
||||||
logic [31:0] vx_snp_req_addr;
|
logic [DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
|
||||||
logic vx_snp_req_ready;
|
logic vx_snp_req_ready;
|
||||||
|
|
||||||
logic vx_ebreak;
|
logic vx_busy;
|
||||||
|
|
||||||
// AVS Queues /////////////////////////////////////////////////////////////////
|
// AVS Queues /////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
logic avs_raq_push;
|
logic avs_rtq_push;
|
||||||
t_local_mem_addr avs_raq_din;
|
t_local_mem_addr avs_rtq_din;
|
||||||
logic avs_raq_pop;
|
logic avs_rtq_pop;
|
||||||
t_local_mem_addr avs_raq_dout;
|
t_local_mem_addr avs_rtq_dout;
|
||||||
logic avs_raq_empty;
|
logic avs_rtq_empty;
|
||||||
logic avs_raq_full;
|
logic avs_rtq_full;
|
||||||
|
|
||||||
logic avs_rdq_push;
|
logic avs_rdq_push;
|
||||||
t_local_mem_data avs_rdq_din;
|
t_local_mem_data avs_rdq_din;
|
||||||
@@ -105,7 +108,7 @@ logic avs_rdq_full;
|
|||||||
logic [2:0] csr_cmd;
|
logic [2:0] csr_cmd;
|
||||||
t_ccip_clAddr csr_io_addr;
|
t_ccip_clAddr csr_io_addr;
|
||||||
t_local_mem_addr csr_mem_addr;
|
t_local_mem_addr csr_mem_addr;
|
||||||
logic [31:0] csr_data_size;
|
logic [DRAM_ADDR_WIDTH-1:0] csr_data_size;
|
||||||
|
|
||||||
// MMIO controller ////////////////////////////////////////////////////////////
|
// MMIO controller ////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
@@ -137,16 +140,16 @@ begin
|
|||||||
begin
|
begin
|
||||||
case (mmioHdr.address)
|
case (mmioHdr.address)
|
||||||
MMIO_CSR_IO_ADDR: begin
|
MMIO_CSR_IO_ADDR: begin
|
||||||
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6);
|
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
|
||||||
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6));
|
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
|
||||||
end
|
end
|
||||||
MMIO_CSR_MEM_ADDR: begin
|
MMIO_CSR_MEM_ADDR: begin
|
||||||
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6);
|
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
|
||||||
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6));
|
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
|
||||||
end
|
end
|
||||||
MMIO_CSR_DATA_SIZE: begin
|
MMIO_CSR_DATA_SIZE: begin
|
||||||
csr_data_size <= $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6);
|
csr_data_size <= $bits(csr_data_size)'(cp2af_sRxPort.c0.data);
|
||||||
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6));
|
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'(cp2af_sRxPort.c0.data));
|
||||||
end
|
end
|
||||||
MMIO_CSR_CMD: begin
|
MMIO_CSR_CMD: begin
|
||||||
csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data);
|
csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data);
|
||||||
@@ -195,12 +198,12 @@ end
|
|||||||
|
|
||||||
// COMMAND FSM ////////////////////////////////////////////////////////////////
|
// COMMAND FSM ////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
logic [31:0] cci_write_ctr;
|
logic [DRAM_ADDR_WIDTH-1:0] cci_write_ctr;
|
||||||
logic [31:0] avs_read_ctr;
|
logic [DRAM_ADDR_WIDTH-1:0] avs_read_ctr;
|
||||||
logic [31:0] avs_write_ctr;
|
logic [DRAM_ADDR_WIDTH-1:0] avs_write_ctr;
|
||||||
logic [31:0] vx_snoop_ctr;
|
logic [DRAM_ADDR_WIDTH-1:0] snp_req_ctr;
|
||||||
logic [9:0] vx_snoop_delay;
|
logic [9:0] snp_req_delay;
|
||||||
logic vx_reset;
|
logic vx_reset;
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
always_ff @(posedge clk)
|
||||||
begin
|
begin
|
||||||
@@ -248,13 +251,13 @@ begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
STATE_RUN: begin
|
STATE_RUN: begin
|
||||||
if (vx_ebreak) begin
|
if (!vx_busy) begin
|
||||||
state <= STATE_IDLE;
|
state <= STATE_IDLE;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
STATE_CLFLUSH: begin
|
STATE_CLFLUSH: begin
|
||||||
if (vx_snoop_delay >= VX_SNOOP_DELAY) begin
|
if (snp_req_delay >= VX_SNOOP_DELAY) begin
|
||||||
state <= STATE_IDLE;
|
state <= STATE_IDLE;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -268,11 +271,12 @@ end
|
|||||||
logic cci_rdq_empty;
|
logic cci_rdq_empty;
|
||||||
t_cci_rdq_data cci_rdq_dout;
|
t_cci_rdq_data cci_rdq_dout;
|
||||||
logic cci_rdq_pop;
|
logic cci_rdq_pop;
|
||||||
|
logic [`L3DRAM_TAG_WIDTH-1:0] dram_req_tag;
|
||||||
|
|
||||||
t_ccip_clAddr next_avs_address;
|
t_ccip_clAddr next_avs_address;
|
||||||
always_comb
|
always_comb
|
||||||
begin
|
begin
|
||||||
next_avs_address = csr_mem_addr + {avs_write_ctr[31:$bits(t_cci_rdq_tag)], t_cci_rdq_tag'(cci_rdq_dout)};
|
next_avs_address = csr_mem_addr + {avs_write_ctr[DRAM_ADDR_WIDTH-1:$bits(t_cci_rdq_tag)], t_cci_rdq_tag'(cci_rdq_dout)};
|
||||||
cci_rdq_pop = (state == STATE_WRITE
|
cci_rdq_pop = (state == STATE_WRITE
|
||||||
&& !cci_rdq_empty
|
&& !cci_rdq_empty
|
||||||
&& !avs_waitrequest
|
&& !avs_waitrequest
|
||||||
@@ -286,8 +290,6 @@ begin
|
|||||||
mem_bank_select <= 0;
|
mem_bank_select <= 0;
|
||||||
avs_burstcount <= 1;
|
avs_burstcount <= 1;
|
||||||
avs_byteenable <= 64'hffffffffffffffff;
|
avs_byteenable <= 64'hffffffffffffffff;
|
||||||
avs_address <= 0;
|
|
||||||
avs_writedata <= 0;
|
|
||||||
avs_read <= 0;
|
avs_read <= 0;
|
||||||
avs_write <= 0;
|
avs_write <= 0;
|
||||||
avs_read_ctr <= 0;
|
avs_read_ctr <= 0;
|
||||||
@@ -305,7 +307,7 @@ begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
STATE_READ: begin
|
STATE_READ: begin
|
||||||
if (!avs_raq_full
|
if (!avs_rtq_full
|
||||||
&& !avs_rdq_full
|
&& !avs_rdq_full
|
||||||
&& !avs_waitrequest
|
&& !avs_waitrequest
|
||||||
&& avs_read_ctr < csr_data_size)
|
&& avs_read_ctr < csr_data_size)
|
||||||
@@ -332,18 +334,19 @@ begin
|
|||||||
if (vx_dram_req_read
|
if (vx_dram_req_read
|
||||||
&& vx_dram_req_ready)
|
&& vx_dram_req_ready)
|
||||||
begin
|
begin
|
||||||
avs_address <= (vx_dram_req_addr >> 6);
|
avs_address <= vx_dram_req_addr;
|
||||||
avs_read <= 1;
|
dram_req_tag <= vx_dram_req_tag;
|
||||||
$display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr >> 6);
|
avs_read <= 1;
|
||||||
|
$display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr);
|
||||||
end
|
end
|
||||||
|
|
||||||
if (vx_dram_req_write
|
if (vx_dram_req_write
|
||||||
&& vx_dram_req_ready)
|
&& vx_dram_req_ready)
|
||||||
begin
|
begin
|
||||||
|
avs_address <= vx_dram_req_addr;
|
||||||
avs_writedata <= vx_dram_req_data;
|
avs_writedata <= vx_dram_req_data;
|
||||||
avs_address <= (vx_dram_req_addr >> 6);
|
|
||||||
avs_write <= 1;
|
avs_write <= 1;
|
||||||
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr >> 6);
|
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
endcase
|
endcase
|
||||||
@@ -362,7 +365,7 @@ logic vortex_enabled;
|
|||||||
always_comb
|
always_comb
|
||||||
begin
|
begin
|
||||||
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||||
vx_dram_req_ready = vortex_enabled && !avs_waitrequest && !avs_raq_full && !avs_rdq_full;
|
vx_dram_req_ready = vortex_enabled && !avs_waitrequest && !avs_rtq_full && !avs_rdq_full;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Vortex DRAM fill response
|
// Vortex DRAM fill response
|
||||||
@@ -370,7 +373,7 @@ end
|
|||||||
always_comb
|
always_comb
|
||||||
begin
|
begin
|
||||||
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready;
|
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready;
|
||||||
vx_dram_rsp_addr = (avs_raq_dout << 6);
|
vx_dram_rsp_tag = avs_rtq_dout;
|
||||||
vx_dram_rsp_data = avs_rdq_dout;
|
vx_dram_rsp_data = avs_rdq_dout;
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -380,9 +383,9 @@ logic cci_wr_req;
|
|||||||
|
|
||||||
always_comb
|
always_comb
|
||||||
begin
|
begin
|
||||||
avs_raq_pop = vx_dram_rsp_valid || cci_wr_req;
|
avs_rtq_pop = vx_dram_rsp_valid || cci_wr_req;
|
||||||
avs_raq_din = avs_address;
|
avs_rtq_din = dram_req_tag;
|
||||||
avs_raq_push = avs_read;
|
avs_rtq_push = avs_read;
|
||||||
end
|
end
|
||||||
|
|
||||||
VX_generic_queue #(
|
VX_generic_queue #(
|
||||||
@@ -391,19 +394,19 @@ VX_generic_queue #(
|
|||||||
) avs_rd_req_queue (
|
) avs_rd_req_queue (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (SoftReset),
|
.reset (SoftReset),
|
||||||
.push (avs_raq_push),
|
.push (avs_rtq_push),
|
||||||
.data_in (avs_raq_din),
|
.data_in (avs_rtq_din),
|
||||||
.pop (avs_raq_pop),
|
.pop (avs_rtq_pop),
|
||||||
.data_out (avs_raq_dout),
|
.data_out (avs_rtq_dout),
|
||||||
.empty (avs_raq_empty),
|
.empty (avs_rtq_empty),
|
||||||
.full (avs_raq_full)
|
.full (avs_rtq_full)
|
||||||
);
|
);
|
||||||
|
|
||||||
// AVS data read response queue ///////////////////////////////////////////////
|
// AVS data read response queue ///////////////////////////////////////////////
|
||||||
|
|
||||||
always_comb
|
always_comb
|
||||||
begin
|
begin
|
||||||
avs_rdq_pop = avs_raq_pop;
|
avs_rdq_pop = avs_rtq_pop;
|
||||||
avs_rdq_din = avs_readdata;
|
avs_rdq_din = avs_readdata;
|
||||||
avs_rdq_push = avs_readdatavalid;
|
avs_rdq_push = avs_readdatavalid;
|
||||||
end
|
end
|
||||||
@@ -426,7 +429,7 @@ VX_generic_queue #(
|
|||||||
|
|
||||||
t_ccip_c0_ReqMemHdr cci_read_hdr;
|
t_ccip_c0_ReqMemHdr cci_read_hdr;
|
||||||
|
|
||||||
logic [31:0] cci_read_ctr;
|
logic [DRAM_ADDR_WIDTH-1:0] cci_read_ctr;
|
||||||
t_cci_rdq_tag cci_rdq_ctr;
|
t_cci_rdq_tag cci_rdq_ctr;
|
||||||
|
|
||||||
logic cci_rdq_full;
|
logic cci_rdq_full;
|
||||||
@@ -562,29 +565,29 @@ end
|
|||||||
always_ff @(posedge clk)
|
always_ff @(posedge clk)
|
||||||
begin
|
begin
|
||||||
if (SoftReset) begin
|
if (SoftReset) begin
|
||||||
vx_snp_req <= 0;
|
vx_snp_req_valid <= 0;
|
||||||
vx_snoop_ctr <= 0;
|
snp_req_ctr <= 0;
|
||||||
vx_snoop_delay <= 0;
|
snp_req_delay <= 0;
|
||||||
end
|
end
|
||||||
else begin
|
else begin
|
||||||
if (STATE_IDLE == state) begin
|
if (STATE_IDLE == state) begin
|
||||||
vx_snoop_ctr <= 0;
|
snp_req_ctr <= 0;
|
||||||
vx_snoop_delay <= 0;
|
snp_req_delay <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
vx_snp_req <= 0;
|
vx_snp_req_valid <= 0;
|
||||||
|
|
||||||
if ((STATE_CLFLUSH == state)
|
if ((STATE_CLFLUSH == state)
|
||||||
&& vx_snoop_ctr < csr_data_size
|
&& (snp_req_ctr < csr_data_size)
|
||||||
&& vx_snp_req_ready)
|
&& vx_snp_req_ready)
|
||||||
begin
|
begin
|
||||||
vx_snp_req_addr <= (csr_mem_addr + vx_snoop_ctr) << 6;
|
vx_snp_req_addr <= csr_mem_addr + snp_req_ctr;
|
||||||
vx_snp_req <= 1;
|
vx_snp_req_valid <= 1;
|
||||||
vx_snoop_ctr <= vx_snoop_ctr + 1;
|
snp_req_ctr <= snp_req_ctr + 1;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (vx_snoop_ctr == csr_data_size) begin
|
if (snp_req_ctr == csr_data_size) begin
|
||||||
vx_snoop_delay <= vx_snoop_delay + 1;
|
snp_req_delay <= snp_req_delay + 1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -600,21 +603,22 @@ Vortex_Socket #() vx_socket (
|
|||||||
.dram_req_read (vx_dram_req_read),
|
.dram_req_read (vx_dram_req_read),
|
||||||
.dram_req_addr (vx_dram_req_addr),
|
.dram_req_addr (vx_dram_req_addr),
|
||||||
.dram_req_data (vx_dram_req_data),
|
.dram_req_data (vx_dram_req_data),
|
||||||
|
.dram_req_tag (vx_dram_req_tag),
|
||||||
.dram_req_ready (vx_dram_req_ready),
|
.dram_req_ready (vx_dram_req_ready),
|
||||||
|
|
||||||
// DRAM Rsp
|
// DRAM Rsp
|
||||||
.dram_rsp_valid (vx_dram_rsp_valid),
|
.dram_rsp_valid (vx_dram_rsp_valid),
|
||||||
.dram_rsp_addr (vx_dram_rsp_addr),
|
|
||||||
.dram_rsp_data (vx_dram_rsp_data),
|
.dram_rsp_data (vx_dram_rsp_data),
|
||||||
|
.dram_rsp_tag (vx_dram_rsp_tag),
|
||||||
.dram_rsp_ready (vx_dram_rsp_ready),
|
.dram_rsp_ready (vx_dram_rsp_ready),
|
||||||
|
|
||||||
// Cache Snooping Req
|
// Cache Snooping Req
|
||||||
.llc_snp_req_valid (vx_snp_req),
|
.snp_req_valid (vx_snp_req_valid),
|
||||||
.llc_snp_req_addr (vx_snp_req_addr),
|
.snp_req_addr (vx_snp_req_addr),
|
||||||
.llc_snp_req_ready (vx_snp_req_ready),
|
.snp_req_ready (vx_snp_req_ready),
|
||||||
|
|
||||||
// program exit signal
|
// status
|
||||||
.ebreak (vx_ebreak)
|
.busy (vx_busy)
|
||||||
);
|
);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -52,6 +52,16 @@
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
`define BYTE_EN_NO 3'h7
|
||||||
|
`define BYTE_EN_LB 3'h0
|
||||||
|
`define BYTE_EN_LH 3'h1
|
||||||
|
`define BYTE_EN_LW 3'h2
|
||||||
|
`define BYTE_EN_HB 3'h4
|
||||||
|
`define BYTE_EN_HH 3'h5
|
||||||
|
`define BYTE_EN_BITS 3
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`define INST_R 7'd051
|
`define INST_R 7'd051
|
||||||
`define INST_L 7'd003
|
`define INST_L 7'd003
|
||||||
`define INST_ALU 7'd019
|
`define INST_ALU 7'd019
|
||||||
@@ -62,7 +72,7 @@
|
|||||||
`define INST_JAL 7'd111
|
`define INST_JAL 7'd111
|
||||||
`define INST_JALR 7'd103
|
`define INST_JALR 7'd103
|
||||||
`define INST_SYS 7'd115
|
`define INST_SYS 7'd115
|
||||||
`define INST_GPGPU 7'h06b
|
`define INST_GPGPU 7'd107
|
||||||
|
|
||||||
`define RS2_IMMED 1
|
`define RS2_IMMED 1
|
||||||
`define RS2_REG 0
|
`define RS2_REG 0
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_cache_config.vh"
|
||||||
|
|
||||||
module VX_dram_arb #(
|
module VX_dram_arb #(
|
||||||
parameter BANK_LINE_SIZE = 1,
|
parameter BANK_LINE_SIZE = 1,
|
||||||
|
|||||||
@@ -19,26 +19,16 @@ module VX_warp (
|
|||||||
output wire[`NUM_THREADS-1:0] valid
|
output wire[`NUM_THREADS-1:0] valid
|
||||||
);
|
);
|
||||||
|
|
||||||
reg [31:0] real_PC;
|
|
||||||
logic [31:0] temp_PC;
|
|
||||||
logic [31:0] use_PC;
|
|
||||||
reg [`NUM_THREADS-1:0] valid_t;
|
reg [`NUM_THREADS-1:0] valid_t;
|
||||||
reg [`NUM_THREADS-1:0] valid_zero;
|
reg [31:0] real_PC;
|
||||||
|
reg [31:0] temp_PC;
|
||||||
integer i;
|
reg [31:0] use_PC;
|
||||||
initial begin
|
|
||||||
real_PC = 0;
|
|
||||||
for (i = 1; i < `NUM_THREADS; i=i+1) begin
|
|
||||||
valid_t[i] = 0; // Thread 1 active
|
|
||||||
valid_zero[i] = 0;
|
|
||||||
end
|
|
||||||
valid_t = 1;
|
|
||||||
valid_zero[0] = 0;
|
|
||||||
end
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (remove) begin
|
if (reset) begin
|
||||||
valid_t <= valid_zero;
|
valid_t <= {{(`NUM_THREADS-1){1'b0}},1'b1}; // Thread 1 active
|
||||||
|
end else if (remove) begin
|
||||||
|
valid_t <= 0;
|
||||||
end else if (change_mask) begin
|
end else if (change_mask) begin
|
||||||
valid_t <= thread_mask;
|
valid_t <= thread_mask;
|
||||||
end
|
end
|
||||||
@@ -46,7 +36,7 @@ module VX_warp (
|
|||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (i = 0; i < `NUM_THREADS; i = i+1) begin : valid_assign
|
for (i = 0; i < `NUM_THREADS; i++) begin : valid_assign
|
||||||
assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i];
|
assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i];
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
@@ -54,8 +44,7 @@ module VX_warp (
|
|||||||
always @(*) begin
|
always @(*) begin
|
||||||
if (jal == 1'b1) begin
|
if (jal == 1'b1) begin
|
||||||
temp_PC = jal_dest;
|
temp_PC = jal_dest;
|
||||||
// $display("LINKING TO %h", temp_PC);
|
end else if (branch_dir) begin
|
||||||
end else if (branch_dir == 1'b1) begin
|
|
||||||
temp_PC = branch_dest;
|
temp_PC = branch_dest;
|
||||||
end else begin
|
end else begin
|
||||||
temp_PC = real_PC;
|
temp_PC = real_PC;
|
||||||
@@ -68,8 +57,7 @@ module VX_warp (
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
real_PC <= 0;
|
real_PC <= 0;
|
||||||
end else if (wspawn == 1'b1) begin
|
end else if (wspawn) begin
|
||||||
// $display("Inside warp ***** Spawn @ %H",wspawn_pc);
|
|
||||||
real_PC <= wspawn_pc;
|
real_PC <= wspawn_pc;
|
||||||
end else if (!stall) begin
|
end else if (!stall) begin
|
||||||
real_PC <= use_PC + 32'h4;
|
real_PC <= use_PC + 32'h4;
|
||||||
|
|||||||
8
hw/rtl/cache/VX_cache_config.vh
vendored
8
hw/rtl/cache/VX_cache_config.vh
vendored
@@ -3,14 +3,6 @@
|
|||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
`define BYTE_EN_NO 3'h7
|
|
||||||
`define BYTE_EN_LB 3'h0
|
|
||||||
`define BYTE_EN_LH 3'h1
|
|
||||||
`define BYTE_EN_LW 3'h2
|
|
||||||
`define BYTE_EN_HB 3'h4
|
|
||||||
`define BYTE_EN_HH 3'h5
|
|
||||||
`define BYTE_EN_BITS 3
|
|
||||||
|
|
||||||
// data tid tag read write base addr
|
// data tid tag read write base addr
|
||||||
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS)
|
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS)
|
||||||
|
|
||||||
|
|||||||
7
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
7
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
@@ -53,9 +53,9 @@ module VX_cache_core_rsp_merge #(
|
|||||||
output wire [NUM_BANKS-1:0] per_bank_core_rsp_pop,
|
output wire [NUM_BANKS-1:0] per_bank_core_rsp_pop,
|
||||||
|
|
||||||
// Core Writeback
|
// Core Writeback
|
||||||
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
|
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
|
||||||
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||||
output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||||
input wire core_rsp_ready
|
input wire core_rsp_ready
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -81,7 +81,6 @@ module VX_cache_core_rsp_merge #(
|
|||||||
always @(*) begin
|
always @(*) begin
|
||||||
core_rsp_valid = 0;
|
core_rsp_valid = 0;
|
||||||
core_rsp_data = 0;
|
core_rsp_data = 0;
|
||||||
core_rsp_tag = 0;
|
|
||||||
for (i = 0; i < NUM_BANKS; i = i + 1) begin
|
for (i = 0; i < NUM_BANKS; i = i + 1) begin
|
||||||
if (found_bank
|
if (found_bank
|
||||||
&& per_bank_core_rsp_valid[i]
|
&& per_bank_core_rsp_valid[i]
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ module VX_divide #(
|
|||||||
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
|
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
|
||||||
always @(posedge clock or posedge aclr) begin
|
always @(posedge clock or posedge aclr) begin
|
||||||
if (aclr) begin
|
if (aclr) begin
|
||||||
numer_pipe[i+1] <= 0;
|
numer_pipe[i+1] <= 0;
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ module VX_mult #(
|
|||||||
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
|
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
|
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
|
||||||
always @(posedge clock or posedge aclr) begin
|
always @(posedge clock or posedge aclr) begin
|
||||||
if (aclr) begin
|
if (aclr) begin
|
||||||
dataa_pipe[i+1] <= 0;
|
dataa_pipe[i+1] <= 0;
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ module VX_tb_divide();
|
|||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (i = 0; i < 8; i = i+1) begin : div_loop
|
for (i = 0; i < 8; i++) begin : div_loop
|
||||||
VX_divide#(
|
VX_divide#(
|
||||||
.WIDTHN(32),
|
.WIDTHN(32),
|
||||||
.WIDTHD(32),
|
.WIDTHD(32),
|
||||||
|
|||||||
Reference in New Issue
Block a user