opae fixes

This commit is contained in:
Blaise Tine
2020-05-06 21:14:53 -07:00
parent 59cc0d5be9
commit de9fc68ccc
14 changed files with 249 additions and 170 deletions

View File

@@ -15,7 +15,7 @@ extern int vx_dev_caps(int caps_id) {
case VX_CAPS_MAX_THREADS:
return NUM_THREADS;
case VX_CAPS_CACHE_LINESIZE:
return GLOBAL_BLOCK_SIZE;
return 64;
case VX_CAPS_LOCAL_MEM_SIZE:
return 0xffffffff;
case VX_CAPS_ALLOC_BASE_ADDR:

View File

@@ -3,6 +3,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <cmath>
#include <uuid/uuid.h>
#include <opae/fpga.h>
#include <vortex.h>
@@ -46,9 +47,14 @@ typedef struct vx_buffer_ {
size_t size;
} vx_buffer_t;
static size_t align_size(size_t size) {
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
inline size_t align_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
inline bool is_aligned(size_t addr, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return 0 == (addr & (alignment - 1));
}
///////////////////////////////////////////////////////////////////////////////
@@ -132,9 +138,11 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
size_t asize = align_size(size);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size);
if (device->mem_allocation + asize > dev_mem_size)
return -1;
@@ -158,7 +166,9 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
vx_device_t *device = ((vx_device_t*)hdevice);
size_t asize = align_size(size);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t asize = align_size(size, line_size);
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
if (FPGA_OK != res) {
@@ -249,20 +259,35 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
// check alignment
if (!is_aligned(dev_maddr, line_size))
return -1;
if (!is_aligned(size, line_size))
return -1;
if (!is_aligned(buffer->io_addr + src_offset, line_size))
return -1;
// bound checking
if (size + src_offset > buffer->size)
return -1;
if (dev_maddr + size > dev_mem_size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + src_offset));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
auto ls_shift = (int)std::log2(line_size);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr >> ls_shift) ));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
// Wait for the write operation to finish
@@ -277,20 +302,35 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
// check alignment
if (!is_aligned(dev_maddr, line_size))
return -1;
if (!is_aligned(size, line_size))
return -1;
if (!is_aligned(buffer->io_addr + dest_offset, line_size))
return -1;
// bound checking
if (size + dest_offset > buffer->size)
return -1;
if (dev_maddr + size > dev_mem_size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + dest_offset));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
auto ls_shift = (int)std::log2(line_size);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
// Wait for the write operation to finish
@@ -307,12 +347,22 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
vx_device_t* device = ((vx_device_t*)hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
// check alignment
if (!is_aligned(dev_maddr, line_size))
return -1;
if (!is_aligned(size, line_size))
return -1;
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
auto ls_shift = (int)std::log2(line_size);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
// Wait for the write operation to finish

View File

@@ -38,13 +38,20 @@ make clean
make
./basic
#ASE build instructions
#
# ASE build instructions
#
# Acquire a sever node for running ASE simulations
qsub-sim
# modify "vsim_run.tcl" to dump VCD trace
vcd file vortex.vcd
vcd add -r /*/Vortex/hw/rtl/*
run -all
#compress
# compress VCD trace
tar -zcvf vortex.vcd.tar.gz work/vortex.vcd
# decompress
# decompress VCD trace
tar -zxvf vortex.vcd.tar.gz vortex.vcd

30
hw/opae/run_ase.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/bin/bash
SCRIPT_DIR=$PWD
PROGRAM=$(basename "$1")
PROGRAM_DIR=`dirname $1`
# Export ASE_WORKDIR variable
export ASE_WORKDIR=$SCRIPT_DIR/build_ase/work
shift 1
# cleanup incomplete runs
rm -rf $ASE_WORKDIR/.app_lock.pid $ASE_WORKDIR/.ase_ready.pid
# Start Simulator in background
pushd $SCRIPT_DIR/build_ase
make sim &
popd
# Wait for simulator readiness
# When .ase_ready is created in the $ASE_WORKDIR, ASE is ready for simulation
while [! -f $ASE_WORKDIR/.ase_ready.pid]
do
sleep 1
done
# run application
pushd $PROGRAM_DIR
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$LD_LIBRARY_PATH ./$PROGRAM $*
popd

View File

@@ -1,6 +1,6 @@
vortex_afu.json
+define+GLOBAL_BLOCK_SIZE_BYTES=64
+define+GLOBAL_BLOCK_SIZE=64
+incdir+.
+incdir+../rtl
@@ -12,31 +12,42 @@ vortex_afu.json
../rtl/VX_user_config.vh
../rtl/VX_config.vh
../rtl/VX_define.vh
../rtl/cache/VX_cache_config.vh
../rtl/interfaces/VX_exec_unit_req_if.v
../rtl/cache/VX_cache_config.vh
../rtl/cache/VX_cache.v
../rtl/cache/VX_bank.v
../rtl/cache/VX_cache_core_rsp_merge.v
../rtl/cache/VX_cache_core_req_bank_sel.v
../rtl/cache/VX_cache_dram_req_arb.v
../rtl/cache/VX_cache_dfq_queue.v
../rtl/cache/VX_cache_req_queue.v
../rtl/cache/VX_cache_miss_resrv.v
../rtl/cache/VX_fill_invalidator.v
../rtl/cache/VX_snp_fwd_arb.v
../rtl/cache/VX_tag_data_access.v
../rtl/cache/VX_tag_data_structure.v
../rtl/cache/VX_prefetcher.v
../rtl/interfaces/VX_branch_rsp_if.v
../rtl/interfaces/VX_inst_meta_if.v
../rtl/interfaces/VX_join_if.v
../rtl/interfaces/VX_inst_exec_wb_if.v
../rtl/interfaces/VX_cache_dram_req_if.v
../rtl/interfaces/VX_cache_dram_rsp_if.v
../rtl/interfaces/VX_cache_core_req_if.v
../rtl/interfaces/VX_cache_core_rsp_if.v
../rtl/interfaces/VX_cache_dram_req_if.v
../rtl/interfaces/VX_cache_dram_rsp_if.v
../rtl/interfaces/VX_cache_snp_req_if.v
../rtl/interfaces/VX_csr_req_if.v
../rtl/interfaces/VX_exec_unit_req_if.v
../rtl/interfaces/VX_frE_to_bckE_req_if.v
../rtl/interfaces/VX_gpr_data_if.v
../rtl/interfaces/VX_csr_wb_if.v
../rtl/interfaces/VX_csr_req_if.v
../rtl/interfaces/VX_lsu_req_if.v
../rtl/interfaces/VX_cache_snp_req_rsp_if.v
../rtl/interfaces/VX_gpr_jal_if.v
../rtl/interfaces/VX_gpgpu_inst_req_if.v
../rtl/interfaces/VX_wstall_if.v
../rtl/interfaces/VX_wb_if.v
../rtl/interfaces/VX_gpr_read_if.v
../rtl/interfaces/VX_gpu_inst_req_if.v
../rtl/interfaces/VX_inst_meta_if.v
../rtl/interfaces/VX_jal_rsp_if.v
../rtl/interfaces/VX_join_if.v
../rtl/interfaces/VX_lsu_req_if.v
../rtl/interfaces/VX_warp_ctl_if.v
../rtl/interfaces/VX_inst_mem_wb_if.v
../rtl/interfaces/VX_wb_if.v
../rtl/interfaces/VX_wstall_if.v
../rtl/libs/VX_generic_register.v
../rtl/libs/VX_mult.v
@@ -58,40 +69,28 @@ vortex_afu.json
../rtl/VX_warp.v
../rtl/VX_icache_stage.v
../rtl/VX_gpr_wrapper.v
../rtl/VX_gpgpu_inst.v
../rtl/VX_gpu_inst.v
../rtl/VX_writeback.v
../rtl/VX_csr_pipe.v
../rtl/VX_csr_data.v
../rtl/VX_csr_wrapper.v
../rtl/VX_warp_sched.v
../rtl/VX_gpr.v
../rtl/VX_gpr_ram.v
../rtl/VX_gpr_stage.v
../rtl/VX_dmem_ctrl.v
../rtl/VX_alu_unit.v
../rtl/VX_csr_data.v
../rtl/VX_lsu_unit.v
../rtl/VX_decode.v
../rtl/VX_inst_multiplex.v
../rtl/VX_csr_wrapper.v
../rtl/VX_lsu_addr_gen.v
../rtl/VX_dcache_io_arb.v
../rtl/VX_dram_arb.v
../rtl/pipe_regs/VX_f_d_reg.v
../rtl/pipe_regs/VX_i_d_reg.v
../rtl/pipe_regs/VX_d_e_reg.v
../rtl/cache/VX_snp_fwd_arb.v
../rtl/cache/VX_cache_dram_req_arb.v
../rtl/cache/VX_cache_dfq_queue.v
../rtl/cache/VX_cache_wb_sel_merge.v
../rtl/cache/VX_tag_data_access.v
../rtl/cache/VX_cache.v
../rtl/cache/VX_cache_core_req_bank_sel.v
../rtl/cache/VX_cache_req_queue.v
../rtl/cache/VX_bank.v
../rtl/cache/VX_cache_miss_resrv.v
../rtl/cache/VX_fill_invalidator.v
../rtl/cache/VX_tag_data_structure.v
../rtl/cache/VX_prefetcher.v
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv

View File

@@ -28,12 +28,14 @@ module vortex_afu #(
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
);
localparam DRAM_ADDR_WIDTH = (32 - `CLOG2(`GLOBAL_BLOCK_SIZE));
localparam AVS_RD_QUEUE_SIZE = 16;
localparam CCI_RD_WINDOW_SIZE = 8;
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
localparam VX_SNOOP_DELAY = 300;
localparam VX_SNOOP_DELAY = 1000;
localparam VX_SNOOP_LEVELS = 2;
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
@@ -60,38 +62,39 @@ typedef enum logic[3:0] {
STATE_CLFLUSH
} state_t;
typedef logic [`LOG2UP(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
typedef logic [`LOG2UP(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data;
state_t state;
// Vortex signals /////////////////////////////////////////////////////////////
// Vortex ports ///////////////////////////////////////////////////////////////
logic vx_dram_req_read;
logic vx_dram_req_write;
logic [31:0] vx_dram_req_addr;
logic vx_dram_req_read;
logic vx_dram_req_write;
logic [DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_req_data;
logic vx_dram_req_ready;
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
logic vx_dram_req_ready;
logic vx_dram_rsp_ready;
logic vx_dram_rsp_valid;
logic [31:0] vx_dram_rsp_addr;
logic vx_dram_rsp_valid;
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_rsp_data;
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
logic vx_dram_rsp_ready;
logic vx_snp_req;
logic [31:0] vx_snp_req_addr;
logic vx_snp_req_ready;
logic vx_snp_req_valid;
logic [DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
logic vx_snp_req_ready;
logic vx_ebreak;
logic vx_busy;
// AVS Queues /////////////////////////////////////////////////////////////////
logic avs_raq_push;
t_local_mem_addr avs_raq_din;
logic avs_raq_pop;
t_local_mem_addr avs_raq_dout;
logic avs_raq_empty;
logic avs_raq_full;
logic avs_rtq_push;
t_local_mem_addr avs_rtq_din;
logic avs_rtq_pop;
t_local_mem_addr avs_rtq_dout;
logic avs_rtq_empty;
logic avs_rtq_full;
logic avs_rdq_push;
t_local_mem_data avs_rdq_din;
@@ -105,7 +108,7 @@ logic avs_rdq_full;
logic [2:0] csr_cmd;
t_ccip_clAddr csr_io_addr;
t_local_mem_addr csr_mem_addr;
logic [31:0] csr_data_size;
logic [DRAM_ADDR_WIDTH-1:0] csr_data_size;
// MMIO controller ////////////////////////////////////////////////////////////
@@ -137,16 +140,16 @@ begin
begin
case (mmioHdr.address)
MMIO_CSR_IO_ADDR: begin
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6);
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6));
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
end
MMIO_CSR_MEM_ADDR: begin
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6);
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6));
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
end
MMIO_CSR_DATA_SIZE: begin
csr_data_size <= $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6);
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6));
csr_data_size <= $bits(csr_data_size)'(cp2af_sRxPort.c0.data);
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'(cp2af_sRxPort.c0.data));
end
MMIO_CSR_CMD: begin
csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data);
@@ -195,12 +198,12 @@ end
// COMMAND FSM ////////////////////////////////////////////////////////////////
logic [31:0] cci_write_ctr;
logic [31:0] avs_read_ctr;
logic [31:0] avs_write_ctr;
logic [31:0] vx_snoop_ctr;
logic [9:0] vx_snoop_delay;
logic vx_reset;
logic [DRAM_ADDR_WIDTH-1:0] cci_write_ctr;
logic [DRAM_ADDR_WIDTH-1:0] avs_read_ctr;
logic [DRAM_ADDR_WIDTH-1:0] avs_write_ctr;
logic [DRAM_ADDR_WIDTH-1:0] snp_req_ctr;
logic [9:0] snp_req_delay;
logic vx_reset;
always_ff @(posedge clk)
begin
@@ -248,13 +251,13 @@ begin
end
STATE_RUN: begin
if (vx_ebreak) begin
if (!vx_busy) begin
state <= STATE_IDLE;
end
end
STATE_CLFLUSH: begin
if (vx_snoop_delay >= VX_SNOOP_DELAY) begin
if (snp_req_delay >= VX_SNOOP_DELAY) begin
state <= STATE_IDLE;
end
end
@@ -268,11 +271,12 @@ end
logic cci_rdq_empty;
t_cci_rdq_data cci_rdq_dout;
logic cci_rdq_pop;
logic [`L3DRAM_TAG_WIDTH-1:0] dram_req_tag;
t_ccip_clAddr next_avs_address;
always_comb
begin
next_avs_address = csr_mem_addr + {avs_write_ctr[31:$bits(t_cci_rdq_tag)], t_cci_rdq_tag'(cci_rdq_dout)};
next_avs_address = csr_mem_addr + {avs_write_ctr[DRAM_ADDR_WIDTH-1:$bits(t_cci_rdq_tag)], t_cci_rdq_tag'(cci_rdq_dout)};
cci_rdq_pop = (state == STATE_WRITE
&& !cci_rdq_empty
&& !avs_waitrequest
@@ -285,9 +289,7 @@ begin
begin
mem_bank_select <= 0;
avs_burstcount <= 1;
avs_byteenable <= 64'hffffffffffffffff;
avs_address <= 0;
avs_writedata <= 0;
avs_byteenable <= 64'hffffffffffffffff;
avs_read <= 0;
avs_write <= 0;
avs_read_ctr <= 0;
@@ -305,7 +307,7 @@ begin
end
STATE_READ: begin
if (!avs_raq_full
if (!avs_rtq_full
&& !avs_rdq_full
&& !avs_waitrequest
&& avs_read_ctr < csr_data_size)
@@ -332,18 +334,19 @@ begin
if (vx_dram_req_read
&& vx_dram_req_ready)
begin
avs_address <= (vx_dram_req_addr >> 6);
avs_read <= 1;
$display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr >> 6);
avs_address <= vx_dram_req_addr;
dram_req_tag <= vx_dram_req_tag;
avs_read <= 1;
$display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr);
end
if (vx_dram_req_write
&& vx_dram_req_ready)
begin
avs_writedata <= vx_dram_req_data;
avs_address <= (vx_dram_req_addr >> 6);
avs_address <= vx_dram_req_addr;
avs_writedata <= vx_dram_req_data;
avs_write <= 1;
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr >> 6);
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr);
end
end
endcase
@@ -362,7 +365,7 @@ logic vortex_enabled;
always_comb
begin
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
vx_dram_req_ready = vortex_enabled && !avs_waitrequest && !avs_raq_full && !avs_rdq_full;
vx_dram_req_ready = vortex_enabled && !avs_waitrequest && !avs_rtq_full && !avs_rdq_full;
end
// Vortex DRAM fill response
@@ -370,7 +373,7 @@ end
always_comb
begin
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready;
vx_dram_rsp_addr = (avs_raq_dout << 6);
vx_dram_rsp_tag = avs_rtq_dout;
vx_dram_rsp_data = avs_rdq_dout;
end
@@ -380,9 +383,9 @@ logic cci_wr_req;
always_comb
begin
avs_raq_pop = vx_dram_rsp_valid || cci_wr_req;
avs_raq_din = avs_address;
avs_raq_push = avs_read;
avs_rtq_pop = vx_dram_rsp_valid || cci_wr_req;
avs_rtq_din = dram_req_tag;
avs_rtq_push = avs_read;
end
VX_generic_queue #(
@@ -391,19 +394,19 @@ VX_generic_queue #(
) avs_rd_req_queue (
.clk (clk),
.reset (SoftReset),
.push (avs_raq_push),
.data_in (avs_raq_din),
.pop (avs_raq_pop),
.data_out (avs_raq_dout),
.empty (avs_raq_empty),
.full (avs_raq_full)
.push (avs_rtq_push),
.data_in (avs_rtq_din),
.pop (avs_rtq_pop),
.data_out (avs_rtq_dout),
.empty (avs_rtq_empty),
.full (avs_rtq_full)
);
// AVS data read response queue ///////////////////////////////////////////////
always_comb
begin
avs_rdq_pop = avs_raq_pop;
avs_rdq_pop = avs_rtq_pop;
avs_rdq_din = avs_readdata;
avs_rdq_push = avs_readdatavalid;
end
@@ -426,7 +429,7 @@ VX_generic_queue #(
t_ccip_c0_ReqMemHdr cci_read_hdr;
logic [31:0] cci_read_ctr;
logic [DRAM_ADDR_WIDTH-1:0] cci_read_ctr;
t_cci_rdq_tag cci_rdq_ctr;
logic cci_rdq_full;
@@ -562,29 +565,29 @@ end
always_ff @(posedge clk)
begin
if (SoftReset) begin
vx_snp_req <= 0;
vx_snoop_ctr <= 0;
vx_snoop_delay <= 0;
vx_snp_req_valid <= 0;
snp_req_ctr <= 0;
snp_req_delay <= 0;
end
else begin
if (STATE_IDLE == state) begin
vx_snoop_ctr <= 0;
vx_snoop_delay <= 0;
snp_req_ctr <= 0;
snp_req_delay <= 0;
end
vx_snp_req <= 0;
vx_snp_req_valid <= 0;
if ((STATE_CLFLUSH == state)
&& vx_snoop_ctr < csr_data_size
&& (snp_req_ctr < csr_data_size)
&& vx_snp_req_ready)
begin
vx_snp_req_addr <= (csr_mem_addr + vx_snoop_ctr) << 6;
vx_snp_req <= 1;
vx_snoop_ctr <= vx_snoop_ctr + 1;
begin
vx_snp_req_addr <= csr_mem_addr + snp_req_ctr;
vx_snp_req_valid <= 1;
snp_req_ctr <= snp_req_ctr + 1;
end
if (vx_snoop_ctr == csr_data_size) begin
vx_snoop_delay <= vx_snoop_delay + 1;
if (snp_req_ctr == csr_data_size) begin
snp_req_delay <= snp_req_delay + 1;
end
end
end
@@ -600,21 +603,22 @@ Vortex_Socket #() vx_socket (
.dram_req_read (vx_dram_req_read),
.dram_req_addr (vx_dram_req_addr),
.dram_req_data (vx_dram_req_data),
.dram_req_tag (vx_dram_req_tag),
.dram_req_ready (vx_dram_req_ready),
// DRAM Rsp
.dram_rsp_valid (vx_dram_rsp_valid),
.dram_rsp_addr (vx_dram_rsp_addr),
.dram_rsp_data (vx_dram_rsp_data),
.dram_rsp_tag (vx_dram_rsp_tag),
.dram_rsp_ready (vx_dram_rsp_ready),
// Cache Snooping Req
.llc_snp_req_valid (vx_snp_req),
.llc_snp_req_addr (vx_snp_req_addr),
.llc_snp_req_ready (vx_snp_req_ready),
.snp_req_valid (vx_snp_req_valid),
.snp_req_addr (vx_snp_req_addr),
.snp_req_ready (vx_snp_req_ready),
// program exit signal
.ebreak (vx_ebreak)
// status
.busy (vx_busy)
);
endmodule

View File

@@ -52,6 +52,16 @@
///////////////////////////////////////////////////////////////////////////////
`define BYTE_EN_NO 3'h7
`define BYTE_EN_LB 3'h0
`define BYTE_EN_LH 3'h1
`define BYTE_EN_LW 3'h2
`define BYTE_EN_HB 3'h4
`define BYTE_EN_HH 3'h5
`define BYTE_EN_BITS 3
///////////////////////////////////////////////////////////////////////////////
`define INST_R 7'd051
`define INST_L 7'd003
`define INST_ALU 7'd019
@@ -62,7 +72,7 @@
`define INST_JAL 7'd111
`define INST_JALR 7'd103
`define INST_SYS 7'd115
`define INST_GPGPU 7'h06b
`define INST_GPGPU 7'd107
`define RS2_IMMED 1
`define RS2_REG 0

View File

@@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_cache_config.vh"
module VX_dram_arb #(
parameter BANK_LINE_SIZE = 1,

View File

@@ -19,26 +19,16 @@ module VX_warp (
output wire[`NUM_THREADS-1:0] valid
);
reg [31:0] real_PC;
logic [31:0] temp_PC;
logic [31:0] use_PC;
reg [`NUM_THREADS-1:0] valid_t;
reg [`NUM_THREADS-1:0] valid_zero;
integer i;
initial begin
real_PC = 0;
for (i = 1; i < `NUM_THREADS; i=i+1) begin
valid_t[i] = 0; // Thread 1 active
valid_zero[i] = 0;
end
valid_t = 1;
valid_zero[0] = 0;
end
reg [31:0] real_PC;
reg [31:0] temp_PC;
reg [31:0] use_PC;
always @(posedge clk) begin
if (remove) begin
valid_t <= valid_zero;
if (reset) begin
valid_t <= {{(`NUM_THREADS-1){1'b0}},1'b1}; // Thread 1 active
end else if (remove) begin
valid_t <= 0;
end else if (change_mask) begin
valid_t <= thread_mask;
end
@@ -46,7 +36,7 @@ module VX_warp (
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i = i+1) begin : valid_assign
for (i = 0; i < `NUM_THREADS; i++) begin : valid_assign
assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i];
end
endgenerate
@@ -54,8 +44,7 @@ module VX_warp (
always @(*) begin
if (jal == 1'b1) begin
temp_PC = jal_dest;
// $display("LINKING TO %h", temp_PC);
end else if (branch_dir == 1'b1) begin
end else if (branch_dir) begin
temp_PC = branch_dest;
end else begin
temp_PC = real_PC;
@@ -68,8 +57,7 @@ module VX_warp (
always @(posedge clk) begin
if (reset) begin
real_PC <= 0;
end else if (wspawn == 1'b1) begin
// $display("Inside warp ***** Spawn @ %H",wspawn_pc);
end else if (wspawn) begin
real_PC <= wspawn_pc;
end else if (!stall) begin
real_PC <= use_PC + 32'h4;

View File

@@ -3,14 +3,6 @@
`include "VX_define.vh"
`define BYTE_EN_NO 3'h7
`define BYTE_EN_LB 3'h0
`define BYTE_EN_LH 3'h1
`define BYTE_EN_LW 3'h2
`define BYTE_EN_HB 3'h4
`define BYTE_EN_HH 3'h5
`define BYTE_EN_BITS 3
// data tid tag read write base addr
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS)

View File

@@ -53,9 +53,9 @@ module VX_cache_core_rsp_merge #(
output wire [NUM_BANKS-1:0] per_bank_core_rsp_pop,
// Core Writeback
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready
);
@@ -81,7 +81,6 @@ module VX_cache_core_rsp_merge #(
always @(*) begin
core_rsp_valid = 0;
core_rsp_data = 0;
core_rsp_tag = 0;
for (i = 0; i < NUM_BANKS; i = i + 1) begin
if (found_bank
&& per_bank_core_rsp_valid[i]

View File

@@ -66,7 +66,7 @@ module VX_divide #(
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
genvar i;
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
numer_pipe[i+1] <= 0;

View File

@@ -84,7 +84,7 @@ module VX_mult #(
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
genvar i;
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
dataa_pipe[i+1] <= 0;

View File

@@ -19,7 +19,7 @@ module VX_tb_divide();
genvar i;
generate
for (i = 0; i < 8; i = i+1) begin : div_loop
for (i = 0; i < 8; i++) begin : div_loop
VX_divide#(
.WIDTHN(32),
.WIDTHD(32),