merging changes from OPAE branch making this branch

This commit is contained in:
Blaise Tine
2020-03-27 20:19:16 -04:00
parent 614797e52f
commit 5a5c9f3981
267 changed files with 498191 additions and 166 deletions

24
driver/hw/Makefile Normal file
View File

@@ -0,0 +1,24 @@
BUILD_DIR=build_sim
all: ase fpga
ase: build-setup
make -C $(BUILD_DIR)
fpga: build-setup
# TODO
build-setup: $(BUILD_DIR)/Makefile
$(BUILD_DIR)/Makefile:
afu_sim_setup --sources=sources.txt --platform discrete_pcie3 $(BUILD_DIR) -f
run-ase:
cd $(BUILD_DIR) && MENT_VSIM_OPT="-dpicpppath /usr/bin/gcc" make sim
run-fpga:
# TODO
clean:
rm -rf $(BUILD_DIR)

View File

@@ -0,0 +1,48 @@
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
// Register all interface signals
import ccip_if_pkg::*;
module ccip_interface_reg(
// CCI-P Clocks and Resets
input logic pClk, // 400MHz - CC-P clock domain. Primary Clock
input logic pck_cp2af_softReset_T0, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState_T0, // CCI-P AFU Power State
input logic pck_cp2af_error_T0, // CCI-P Protocol Error Detected
// Interface structures
input t_if_ccip_Rx pck_cp2af_sRx_T0, // CCI-P Rx Port
input t_if_ccip_Tx pck_af2cp_sTx_T0, // CCI-P Tx Port
output logic pck_cp2af_softReset_T1,
output logic [1:0] pck_cp2af_pwrState_T1,
output logic pck_cp2af_error_T1,
output t_if_ccip_Rx pck_cp2af_sRx_T1,
output t_if_ccip_Tx pck_af2cp_sTx_T1
);
(* preserve *) logic pck_cp2af_softReset_T0_q;
(* preserve *) logic [1:0] pck_cp2af_pwrState_T0_q;
(* preserve *) logic pck_cp2af_error_T0_q;
(* preserve *) t_if_ccip_Rx pck_cp2af_sRx_T0_q;
(* preserve *) t_if_ccip_Tx pck_af2cp_sTx_T0_q;
always@(posedge pClk)
begin
pck_cp2af_softReset_T0_q <= pck_cp2af_softReset_T0;
pck_cp2af_pwrState_T0_q <= pck_cp2af_pwrState_T0;
pck_cp2af_error_T0_q <= pck_cp2af_error_T0;
pck_cp2af_sRx_T0_q <= pck_cp2af_sRx_T0;
pck_af2cp_sTx_T0_q <= pck_af2cp_sTx_T0;
end
always_comb
begin
pck_cp2af_softReset_T1 = pck_cp2af_softReset_T0_q;
pck_cp2af_pwrState_T1 = pck_cp2af_pwrState_T0_q;
pck_cp2af_error_T1 = pck_cp2af_error_T0_q;
pck_cp2af_sRx_T1 = pck_cp2af_sRx_T0_q;
pck_af2cp_sTx_T1 = pck_af2cp_sTx_T0_q;
end
endmodule

172
driver/hw/ccip_std_afu.sv Normal file
View File

@@ -0,0 +1,172 @@
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
// Top Level Vortex Driver
// To be done:
// Check how to run this with OPAE. Looks like setup issue
`include "platform_if.vh"
import local_mem_cfg_pkg::*;
module ccip_std_afu
#(
parameter NUM_LOCAL_MEM_BANKS = 2
)
(
// CCI-P Clocks and Resets
input logic pClk, // Primary CCI-P interface clock.
input logic pClkDiv2, // Aligned, pClk divided by 2.
input logic pClkDiv4, // Aligned, pClk divided by 4.
input logic uClk_usr, // User clock domain. Refer to clock programming guide.
input logic uClk_usrDiv2, // Aligned, user clock divided by 2.
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
// CCI-P structures
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
output t_if_ccip_Tx pck_af2cp_sTx, // CCI-P Tx Port
// Local memory interface
avalon_mem_if.to_fiu local_mem[NUM_LOCAL_MEM_BANKS]
);
// ====================================================================
// Pick the proper clk and reset, as chosen by the AFU's JSON file
// ====================================================================
// The platform may transform the CCI-P clock from pClk to a clock
// chosen in the AFU's JSON file.
logic clk;
assign clk = `PLATFORM_PARAM_CCI_P_CLOCK;
logic reset;
assign reset = `PLATFORM_PARAM_CCI_P_RESET;
// ====================================================================
// Register signals at interface before consuming them
// ====================================================================
(* noprune *) logic [1:0] cp2af_pwrState_T1;
(* noprune *) logic cp2af_error_T1;
logic reset_T1;
t_if_ccip_Rx cp2af_sRx_T1;
t_if_ccip_Tx af2cp_sTx_T0;
ccip_interface_reg inst_green_ccip_interface_reg
(
.pClk (clk),
.pck_cp2af_softReset_T0 (reset),
.pck_cp2af_pwrState_T0 (pck_cp2af_pwrState),
.pck_cp2af_error_T0 (pck_cp2af_error),
.pck_cp2af_sRx_T0 (pck_cp2af_sRx),
.pck_af2cp_sTx_T0 (af2cp_sTx_T0),
.pck_cp2af_softReset_T1 (reset_T1),
.pck_cp2af_pwrState_T1 (cp2af_pwrState_T1),
.pck_cp2af_error_T1 (cp2af_error_T1),
.pck_cp2af_sRx_T1 (cp2af_sRx_T1),
.pck_af2cp_sTx_T1 (pck_af2cp_sTx)
);
// ====================================================================
// User AFU goes here
// ====================================================================
//
// vortex_afu depends on CCI-P and local memory being in the same
// clock domain. This is accomplished by choosing a common clock
// in the AFU's JSON description. The platform instantiates clock-
// crossing shims automatically, as needed.
//
//
// Memory banks are used very simply here. Only bank is active at
// a time, selected by mem_bank_select. mem_bank_select is set
// by a CSR from the host.
//
t_local_mem_byte_mask avs_byteenable;
logic avs_waitrequest;
t_local_mem_data avs_readdata;
logic avs_readdatavalid;
t_local_mem_burst_cnt avs_burstcount;
t_local_mem_data avs_writedata;
t_local_mem_addr avs_address;
logic avs_write;
logic avs_read;
// choose which memory bank to test
logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select;
vortex_afu
#(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
)
hello_mem_afu_inst
(
.clk (clk),
.SoftReset (reset_T1),
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
.avs_waitrequest (avs_waitrequest),
.avs_write (avs_write),
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid (avs_readdatavalid),
.mem_bank_select (mem_bank_select),
.cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0)
);
//
// Export the local memory interface signals as vectors so that bank
// selection can use array syntax.
//
logic avs_waitrequest_v[NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_readdata_v[NUM_LOCAL_MEM_BANKS];
logic avs_readdatavalid_v[NUM_LOCAL_MEM_BANKS];
genvar b;
generate
for (b = 0; b < NUM_LOCAL_MEM_BANKS; b = b + 1)
begin : lmb
always_comb
begin
// Local memory to AFU signals
avs_waitrequest_v[b] = local_mem[b].waitrequest;
avs_readdata_v[b] = local_mem[b].readdata;
avs_readdatavalid_v[b] = local_mem[b].readdatavalid;
// Replicate address and write data to all banks. Only
// the request signals have to be bank-specific.
local_mem[b].burstcount = avs_burstcount;
local_mem[b].writedata = avs_writedata;
local_mem[b].address = avs_address;
local_mem[b].byteenable = avs_byteenable;
// Request a write to this bank?
local_mem[b].write = avs_write &&
($bits(mem_bank_select)'(b) == mem_bank_select);
// Request a read from this bank?
local_mem[b].read = avs_read &&
($bits(mem_bank_select)'(b) == mem_bank_select);
end
end
endgenerate
assign avs_waitrequest = avs_waitrequest_v[mem_bank_select];
assign avs_readdata = avs_readdata_v[mem_bank_select];
assign avs_readdatavalid = avs_readdatavalid_v[mem_bank_select];
endmodule

116
driver/hw/sources.txt Normal file
View File

@@ -0,0 +1,116 @@
vortex_afu.json
+incdir+.
+incdir+../../rtl
+incdir+../../rtl/shared_memory
+incdir+../../rtl/cache
+incdir+../../rtl/VX_cache
+incdir+../../rtl/interfaces
+incdir+../../rtl/pipe_regs
+incdir+../../rtl/compat
../../rtl/VX_define_synth.v
../../rtl/VX_define.v
../../rtl/VX_cache/VX_cache_config.v
../../rtl/Vortex_SOC.v
../../rtl/Vortex.v
../../rtl/VX_front_end.v
../../rtl/VX_back_end.v
../../rtl/VX_fetch.v
../../rtl/VX_scheduler.v
../../rtl/VX_execute_unit.v
../../rtl/VX_warp.v
../../rtl/VX_icache_stage.v
../../rtl/VX_gpr_wrapper.v
../../rtl/byte_enabled_simple_dual_port_ram.v
../../rtl/VX_gpgpu_inst.v
../../rtl/VX_writeback.v
../../rtl/VX_countones.v
../../rtl/VX_csr_handler.v
../../rtl/VX_csr_pipe.v
../../rtl/VX_generic_queue_ll.v
../../rtl/VX_warp_scheduler.v
../../rtl/VX_priority_encoder.v
../../rtl/VX_generic_queue.v
../../rtl/pipe_regs/VX_f_d_reg.v
../../rtl/pipe_regs/VX_i_d_reg.v
../../rtl/pipe_regs/VX_d_e_reg.v
../../rtl/VX_gpr.v
../../rtl/VX_gpr_stage.v
../../rtl/VX_dmem_controller.v
../../rtl/VX_alu.v
../../rtl/VX_generic_stack.v
../../rtl/VX_generic_priority_encoder.v
../../rtl/VX_csr_data.v
../../rtl/VX_lsu.v
../../rtl/VX_decode.v
../../rtl/VX_inst_multiplex.v
../../rtl/VX_csr_wrapper.v
../../rtl/VX_priority_encoder_w_mask.v
../../rtl/VX_generic_register.v
../../rtl/VX_lsu_addr_gen.v
../../rtl/compat/VX_mult.v
../../rtl/compat/VX_divide.v
../../rtl/VX_cache/VX_snp_fwd_arb.v
../../rtl/VX_cache/VX_cache_dram_req_arb.v
../../rtl/VX_cache/VX_cache_dfq_queue.v
../../rtl/VX_cache/VX_cache_wb_sel_merge.v
../../rtl/VX_cache/VX_mrv_queue.v
../../rtl/VX_cache/VX_dcache_llv_resp_bank_sel.v
../../rtl/VX_cache/VX_tag_data_access.v
../../rtl/VX_cache/VX_cache.v
../../rtl/VX_cache/VX_cache_core_req_bank_sel.v
../../rtl/VX_cache/VX_cache_req_queue.v
../../rtl/VX_cache/VX_bank.v
../../rtl/VX_cache/VX_cache_miss_resrv.v
../../rtl/VX_cache/VX_fill_invalidator.v
../../rtl/VX_cache/VX_tag_data_structure.v
../../rtl/cache/VX_generic_pe.v
../../rtl/cache/cache_set.v
../../rtl/cache/VX_d_cache.v
../../rtl/cache/VX_Cache_Bank.v
../../rtl/cache/VX_cache_data_per_index.v
../../rtl/cache/VX_d_cache_encapsulate.v
../../rtl/cache/VX_cache_bank_valid.v
../../rtl/cache/VX_cache_data.v
../../rtl/shared_memory/VX_shared_memory_block.v
../../rtl/shared_memory/VX_priority_encoder_sm.v
../../rtl/shared_memory/VX_shared_memory.v
../../rtl/shared_memory/VX_bank_valids.v
../../rtl/interfaces/VX_exec_unit_req_inter.v
../../rtl/interfaces/VX_branch_response_inter.v
../../rtl/interfaces/VX_inst_meta_inter.v
../../rtl/interfaces/VX_join_inter.v
../../rtl/interfaces/VX_icache_response_inter.v
../../rtl/interfaces/VX_gpr_wspawn_inter.v
../../rtl/interfaces/VX_inst_exec_wb_inter.v
../../rtl/interfaces/VX_gpu_dcache_dram_req_inter.v
../../rtl/interfaces/VX_csr_req_inter.v
../../rtl/interfaces/VX_icache_request_inter.v
../../rtl/interfaces/VX_gpu_dcache_res_inter.v
../../rtl/interfaces/VX_frE_to_bckE_req_inter.v
../../rtl/interfaces/VX_dram_req_rsp_inter.v
../../rtl/interfaces/VX_dcache_request_inter.v
../../rtl/interfaces/VX_gpr_data_inter.v
../../rtl/interfaces/VX_dcache_response_inter.v
../../rtl/interfaces/VX_csr_wb_inter.v
../../rtl/interfaces/VX_gpu_dcache_req_inter.v
../../rtl/interfaces/VX_lsu_req_inter.v
../../rtl/interfaces/VX_gpu_snp_req_rsp.v
../../rtl/interfaces/VX_mw_wb_inter.v
../../rtl/interfaces/VX_gpr_jal_inter.v
../../rtl/interfaces/VX_gpu_inst_req_inter.v
../../rtl/interfaces/VX_wstall_inter.v
../../rtl/interfaces/VX_wb_inter.v
../../rtl/interfaces/VX_gpr_clone_inter.v
../../rtl/interfaces/VX_gpr_read_inter.v
../../rtl/interfaces/VX_mem_req_inter.v
../../rtl/interfaces/VX_jal_response_inter.v
../../rtl/interfaces/VX_warp_ctl_inter.v
../../rtl/interfaces/VX_gpu_dcache_snp_req_inter.v
../../rtl/interfaces/VX_gpu_dcache_dram_res_inter.v
../../rtl/interfaces/VX_inst_mem_wb_inter.v
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv

37
driver/hw/vortex_afu.json Normal file
View File

@@ -0,0 +1,37 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto",
"clock-frequency-low": "auto",
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

638
driver/hw/vortex_afu.sv Normal file
View File

@@ -0,0 +1,638 @@
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
// Interface between CSR and FSM
// All the MMIOs read/write are done from CSR and passed to the FSM for state transitions
// To be done:
// Change address size to buffer's address size and data size based on IO address size. Check from hello_world
`include "platform_if.vh"
import local_mem_cfg_pkg::*;
`include "afu_json_info.vh"
module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2
) (
// global signals
input clk,
input SoftReset,
// IF signals between CCI and AFU
input t_if_ccip_Rx cp2af_sRxPort,
output t_if_ccip_Tx af2cp_sTxPort,
// Avalong signals for local memory access
output t_local_mem_data avs_writedata,
input t_local_mem_data avs_readdata,
output t_local_mem_addr avs_address,
input logic avs_waitrequest,
output logic avs_write,
output logic avs_read,
output t_local_mem_byte_mask avs_byteenable,
output t_local_mem_burst_cnt avs_burstcount,
input avs_readdatavalid,
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
);
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
localparam MEM_ADDRESS = 16'h0040; // AVMM Master Address
localparam MEM_BURSTCOUNT = 16'h0042; // AVMM Master Burst Count
localparam MEM_RDWR = 16'h0044; // AVMM Master Read/Write
localparam MEM_BANK_SELECT = 16'h0064; // Memory bank selection register
localparam READY_FOR_SW_CMD = 16'h0066; // "Ready for sw cmd" register. S/w must poll this register before issuing a read/write command to fsm
localparam MEM_BYTEENABLE = 16'h0068; // Test byteenable
// Added by Apurve to supporead and writeChange address size to buffer's address size
localparam DATA_SIZE = 16'h0046; // MMIO set by SW to denote the size od data to read/write
localparam BUFFER_IO_ADDRESS = 16'h0048; // MMIO set by SW to denote the buffer address space
logic [127:0] afu_id = `AFU_ACCEL_UUID;
// cast c0 header into ReqMmioHdr
t_ccip_c0_ReqMmioHdr mmioHdr;
assign mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
logic [2:0] mem_RDWR = '0;
//--
logic ready_for_sw_cmd;
logic run_vortex;
logic [15:0] avm_data_size;
t_ccip_clAddr avm_write_buffer_address;
t_ccip_clAddr avm_read_buffer_address;
logic avm_read;
logic avm_write;
t_local_mem_addr avm_address;
t_local_mem_burst_cnt avm_burstcount;
t_local_mem_byte_mask avm_byteenable;
// Vortex signals
logic vx_reset;
logic vx_dram_req;
logic vx_dram_req_write;
logic vx_dram_req_read;
logic vx_ebreak;
logic [31:0] vx_dram_req_addr;
logic [31:0] vx_local_addr;
logic [31:0] vx_dram_req_size;
logic [31:0] vx_count;
logic vx_dram_fill_rsp;
logic [31:0] vx_dram_req_data[15:0];
logic [31:0] vx_dram_fill_rsp_data[15:0];
logic vx_dram_fill_accept;
logic [31:0] vx_dram_fill_rsp_addr;
logic [31:0] vx_dram_expected_lat;
//
// MMIO control threads
//
always@(posedge clk) begin
if(SoftReset) begin
af2cp_sTxPort.c2.hdr <= '0;
af2cp_sTxPort.c2.data <= '0;
af2cp_sTxPort.c2.mmioRdValid <= '0;
avm_address <= '0;
avm_read <= '0;
avm_write <= '0;
avm_burstcount <= 12'd1;
mem_RDWR <= '0;
mem_bank_select <= 1'b1;
// Change address size to buffer's address size
avm_data_size <= '0;
avm_write_buffer_address <= '0;
avm_read_buffer_address <= '0;
run_vortex <= '0;
end
else begin
af2cp_sTxPort.c2.mmioRdValid <= 0;
avm_read <= mem_RDWR[0] & mem_RDWR[1]; //[0] enable [1] 0-WR,1-RD
avm_write <= mem_RDWR[0] & !mem_RDWR[1];
// Added by Apurve. Run vortex whem RDWR is 7
run_vortex <= mem_RDWR[0] & mem_RDWR[1] & mem_RDWR[2];
// set the registers on MMIO write request
// these are user-defined AFU registers at offset 0x40 and 0x41
if(cp2af_sRxPort.c0.mmioWrValid == 1)
begin
case(mmioHdr.address)
MEM_ADDRESS: avm_address <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
MEM_BURSTCOUNT: avm_burstcount <= cp2af_sRxPort.c0.data[11:0];
MEM_RDWR: mem_RDWR <= cp2af_sRxPort.c0.data[2:0];
MEM_BANK_SELECT: mem_bank_select <= $bits(mem_bank_select)'(cp2af_sRxPort.c0.data);
// Added by Apurve to support read and write buffers. Change address size to buffer's address size
DATA_SIZE:avm_data_size <= cp2af_sRxPort.c0.data[15:0];
BUFFER_IO_ADDRESS: begin
avm_write_buffer_address <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
avm_read_buffer_address <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
end
endcase
end
// serve MMIO read requests
if(cp2af_sRxPort.c0.mmioRdValid == 1)
begin
af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID
case(mmioHdr.address)
// AFU header
16'h0000: af2cp_sTxPort.c2.data <= {
4'b0001, // Feature type = AFU
8'b0, // reserved
4'b0, // afu minor revision = 0
7'b0, // reserved
1'b1, // end of DFH list = 1
24'b0, // next DFH offset = 0
4'b0, // afu major revision = 0
12'b0 // feature ID = 0
};
AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low
AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi
16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU
16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved
MEM_ADDRESS: af2cp_sTxPort.c2.data <= 64'(avm_address);
MEM_BURSTCOUNT: af2cp_sTxPort.c2.data <= 64'(avm_burstcount);
MEM_RDWR: af2cp_sTxPort.c2.data <= {62'd0, mem_RDWR};
READY_FOR_SW_CMD: af2cp_sTxPort.c2.data <= ready_for_sw_cmd;
MEM_BANK_SELECT: af2cp_sTxPort.c2.data <= 64'(mem_bank_select);
default: af2cp_sTxPort.c2.data <= 64'h0;
endcase
af2cp_sTxPort.c2.mmioRdValid <= 1; // post response
end else
begin
if (avm_read | avm_write | run_vortex) mem_RDWR[0] <= 0;
end
end
end
// FSM
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
// Interface between CSR and FSM
// All the MMIOs read/write passed from csr are used for state transitions
// Read: local memory to shared buffer
// Write: shared buffer to local memory
// To be done:
// Review the FSM and implement read/write to shared buffer
// Vortex on/off signal
// check on byteenable and burst signals
//cp2af_sRxPort -> sRx
//af2cp_sTxPort -> sTx
typedef enum logic[3:0] { IDLE,
VX_REQ,
VX_WR_REQ,
VX_RD_REQ,
VX_RSP,
RD_REQ,
RD_RSP,
WR_REQ,
WR_RSP } state_t;
// Added by Apurve for shared memory space write/read
t_ccip_clAddr wr_addr;
t_ccip_clAddr rd_addr;
logic [15:0] count;
logic [15:0] count_rsp;
logic start_read;
logic start_write;
t_local_mem_addr local_address;
logic init_avs_read;
parameter ADDRESS_MAX_BIT = 10;
state_t state;
assign avs_burstcount = avm_burstcount;
t_local_mem_burst_cnt burstcount;
assign avs_byteenable = avm_byteenable;
always_ff @(posedge clk) begin
if(SoftReset) begin
local_address <= '0;
avs_write <= '0;
avs_read <= '0;
state <= IDLE;
burstcount <= 1;
ready_for_sw_cmd <= 0;
count <= 0;
count_rsp <= 0;
vx_reset <= 1'b0;
vx_count <= 0;
end
else begin
case(state)
IDLE: begin
ready_for_sw_cmd <= 1;
if (avm_write) begin
state <= WR_REQ;
ready_for_sw_cmd <= 0;
count <= 0;
count_rsp <= 0;
end else if (avm_read) begin
init_avs_read <= 1;
state <= RD_REQ;
ready_for_sw_cmd <= 0;
count <= 0;
count_rsp <= 0;
end else if (run_vortex) begin
state <= VX_REQ;
vx_reset <= 1'b1;
ready_for_sw_cmd <= 0;
end
end
WR_REQ: begin //AVL MM Posted Write
af2cp_sTxPort.c0.valid <= 1'b0;
avs_write <= 0;
if (~avs_waitrequest)
begin
if (count_rsp >= avm_data_size)
begin
state <= WR_RSP;
avs_write <= 0;
end
end
end
WR_RSP: begin // wait for write response
avm_byteenable <= 64'hffffffffffffffff;
state <= IDLE;
end
RD_REQ: begin // AVL MM Read non-posted
af2cp_sTxPort.c1.valid <= 1'b0;
if (~avs_waitrequest) begin
if (count_rsp >= avm_data_size)
begin
state <= RD_RSP;
avs_read <= 0;
end
end
end
RD_RSP: begin
state <= IDLE;
end
VX_REQ: begin
vx_reset <= 1'b0;
if (vx_dram_req_write) begin
vx_count <= 0;
avs_write <= 1'b1;
state <= VX_WR_REQ;
end
if (vx_dram_req_read) begin
vx_count <= 0;
avs_read <= 1'b1;
state <= VX_RD_REQ;
end
if (vx_ebreak) begin
state <= VX_RSP;
end
end
VX_WR_REQ: begin
avs_write <= 1'b0;
if (vx_count >= vx_dram_req_size)
begin
state <= VX_REQ;
vx_count <= 0;
end
end
VX_RD_REQ: begin
avs_read <= 1'b0;
vx_dram_fill_rsp <= 1'b0;
if (vx_count >= vx_dram_req_size)
begin
state <= VX_REQ;
vx_count <= 0;
end
end
VX_RSP: begin
vx_count <= 0;
state <= IDLE;
end
endcase
end // end else reset
end // posedge clk
// Vortex call
Vortex_SOC #()
vx_soc (
.clk (clk),
.reset (vx_reset),
// IO
//.io_valid[`NUMBER_CORES-1:0] (),
//.io_data [`NUMBER_CORES-1:0] (),
//.number_cores (),
// DRAM Dcache Req
.out_dram_req (vx_dram_req),
.out_dram_req_write (vx_dram_req_write),
.out_dram_req_read (vx_dram_req_read),
.out_dram_req_addr (vx_dram_req_addr),
.out_dram_req_size (vx_dram_req_size),
.out_dram_req_data (vx_dram_req_data),
.out_dram_expected_lat (vx_dram_expected_lat),
// DRAM Dcache Res
.out_dram_fill_accept (vx_dram_fill_accept),
.out_dram_fill_rsp (vx_dram_fill_rsp),
.out_dram_fill_rsp_addr (vx_dram_fill_rsp_addr),
.out_dram_fill_rsp_data (vx_dram_fill_rsp_data),
//.l3c_snp_req (),
//.l3c_snp_req_addr (),
//.l3c_snp_req_delay (),
.out_ebreak (vx_ebreak)
);
// Local memory read/write address
//assign avs_address = (vx_dram_req ? (vx_count ? vx_local_addr : vx_dram_req_addr) : (count ? local_address : avm_address));
assign avs_address = (((state == VX_WR_REQ) || (state == VX_RD_REQ)) ? (vx_count ? vx_local_addr : vx_dram_req_addr) : (count ? local_address : avm_address));
// Vortex DRAM requests and responses
// Handling of read/write data and vx_dram_req_size
// Is vx_dram_fill_accept for backpressure?
always_ff @(posedge clk) begin
if (state == VX_WR_REQ) begin
if (!avs_waitrequest & (vx_count < vx_dram_req_size)) begin
avs_write <= 1'b1;
//avs_writedata <= vx_dram_req_data;
avs_writedata[31:0] = vx_dram_req_data[0];
avs_writedata[63:32] = vx_dram_req_data[1];
avs_writedata[95:64] = vx_dram_req_data[2];
avs_writedata[127:96] = vx_dram_req_data[3];
avs_writedata[159:128] = vx_dram_req_data[4];
avs_writedata[191:160] = vx_dram_req_data[5];
avs_writedata[223:192] = vx_dram_req_data[6];
avs_writedata[255:224] = vx_dram_req_data[7];
avs_writedata[287:256] = vx_dram_req_data[8];
avs_writedata[319:288] = vx_dram_req_data[9];
avs_writedata[351:320] = vx_dram_req_data[10];
avs_writedata[383:352] = vx_dram_req_data[11];
avs_writedata[415:384] = vx_dram_req_data[12];
avs_writedata[447:416] = vx_dram_req_data[13];
avs_writedata[479:448] = vx_dram_req_data[14];
avs_writedata[511:480] = vx_dram_req_data[15];
vx_local_addr <= (vx_count ? vx_local_addr + 1 : vx_dram_req_addr + 1);
// Update the count value based on the number of bytes written
vx_count <= vx_count + 64;
if ((vx_dram_req_size - vx_count) < 64)
begin
avm_byteenable <= 64'hffffffffffffffff >> (64 - (vx_dram_req_size - vx_count));
end else
begin
avm_byteenable <= 64'hffffffffffffffff;
end
end
end
end
always_ff @(posedge clk) begin
//if (SoftReset) begin
if (vx_reset) begin
vx_dram_fill_rsp <= 1'b0;
//vx_dram_fill_rsp_data <= 0;
vx_dram_fill_rsp_data[0] <= 0;
vx_dram_fill_rsp_data[1] <= 0;
vx_dram_fill_rsp_data[2] <= 0;
vx_dram_fill_rsp_data[3] <= 0;
vx_dram_fill_rsp_data[4] <= 0;
vx_dram_fill_rsp_data[5] <= 0;
vx_dram_fill_rsp_data[6] <= 0;
vx_dram_fill_rsp_data[7] <= 0;
vx_dram_fill_rsp_data[8] <= 0;
vx_dram_fill_rsp_data[9] <= 0;
vx_dram_fill_rsp_data[10] <= 0;
vx_dram_fill_rsp_data[11] <= 0;
vx_dram_fill_rsp_data[12] <= 0;
vx_dram_fill_rsp_data[13] <= 0;
vx_dram_fill_rsp_data[14] <= 0;
vx_dram_fill_rsp_data[15] <= 0;
end
if (state == VX_RD_REQ) begin
if (avs_readdatavalid & vx_dram_fill_accept) begin
avs_read <= 1'b1;
vx_dram_fill_rsp <= 1'b1;
//vx_dram_fill_rsp_data <= avs_readdata;
vx_dram_fill_rsp_data[0] <= avs_readdata[31:0];
vx_dram_fill_rsp_data[1] <= avs_readdata[63:32];
vx_dram_fill_rsp_data[2] <= avs_readdata[95:64];
vx_dram_fill_rsp_data[3] <= avs_readdata[127:96];
vx_dram_fill_rsp_data[4] <= avs_readdata[159:128];
vx_dram_fill_rsp_data[5] <= avs_readdata[191:160];
vx_dram_fill_rsp_data[6] <= avs_readdata[223:192];
vx_dram_fill_rsp_data[7] <= avs_readdata[255:224];
vx_dram_fill_rsp_data[8] <= avs_readdata[287:256];
vx_dram_fill_rsp_data[9] <= avs_readdata[319:288];
vx_dram_fill_rsp_data[10] <= avs_readdata[351:320];
vx_dram_fill_rsp_data[11] <= avs_readdata[383:352];
vx_dram_fill_rsp_data[12] <= avs_readdata[415:384];
vx_dram_fill_rsp_data[13] <= avs_readdata[447:416];
vx_dram_fill_rsp_data[14] <= avs_readdata[479:448];
vx_dram_fill_rsp_data[15] <= avs_readdata[511:480];
vx_local_addr <= (vx_count ? vx_local_addr + 1 : vx_dram_req_addr + 1);
vx_dram_fill_rsp_addr <= vx_local_addr;
// Update the count value based on the number of bytes written
vx_count <= vx_count + 64;
end
end
end
// Read from local memory (avs_readdata) and write to shared space
// Implement write header
always_ff @(posedge clk) begin
if (state == RD_REQ & avs_readdatavalid & !cp2af_sRxPort.c1TxAlmFull & count < avm_data_size & !avs_waitrequest & start_write)
begin
wr_addr <= (count? wr_addr + 1 : avm_write_buffer_address + 1);
local_address <= (count? local_address + 1 : avm_address + 1);
start_write <= 1'b0;
end
end
// Write header defines the request to the FIU
t_ccip_c1_ReqMemHdr wr_hdr;
always_comb
begin
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
// Virtual address (MPF virtual addressing is enabled)
wr_hdr.address = (count? wr_addr: avm_write_buffer_address);
// Start of packet is true (single line write)
wr_hdr.sop = 1'b1;
end
// Send write requests to the FIU
always_ff @(posedge clk)
begin
if (SoftReset)
begin
af2cp_sTxPort.c1.hdr <= '0;
af2cp_sTxPort.c1.data <= '0;
af2cp_sTxPort.c1.valid <= '0;
end
// Generate a write request when needed and the FIU isn't full
if (state == RD_REQ & avs_readdatavalid & !cp2af_sRxPort.c1TxAlmFull & count < avm_data_size & !avs_waitrequest & start_write)
begin
af2cp_sTxPort.c1.hdr <= wr_hdr;
af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_readdata);
af2cp_sTxPort.c1.valid <= 1'b1;
start_write <= 1'b0;
count <= count + 64;
end
end
// Write response
always_ff @(posedge clk)
begin
if (SoftReset)
begin
start_write <= 1'b1;
end
// Generate a read request when needed and the FIU isn't full
if (state == RD_REQ & cp2af_sRxPort.c1.rspValid)
begin
count_rsp <= count_rsp + 64;
start_write <= 1'b1;
init_avs_read <= 1'b1;
end
end
// avs_read control
always_ff @(posedge clk)
begin
if (SoftReset)
begin
init_avs_read <= 1'b0;
end
if (init_avs_read & state <= RD_REQ)
begin
avs_read <= 1'b1;
init_avs_read <= 1'b0;
end else
begin
avs_read <= 1'b0;
end
end
// Write to local memory (avs_writedata) and read from shared space
// Implement read header
always_ff @(posedge clk) begin
if (SoftReset)
begin
rd_addr <= 0;
local_address <= 0;
end
if (state == WR_REQ & !cp2af_sRxPort.c0TxAlmFull & count < avm_data_size & !avs_waitrequest & start_read)
begin
// Read address + 1 gives address for next block. Each block is 64B
rd_addr <= (count? rd_addr + 1 : avm_read_buffer_address + 1);
local_address <= (count? local_address + 1 : avm_address);
start_read <= 1'b0;
end
end
// Read header defines the request to the FIU
t_ccip_c0_ReqMemHdr rd_hdr;
always_comb
begin
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
rd_hdr.address = (count? rd_addr : avm_read_buffer_address);
end
// Send read requests to the FIU
always_ff @(posedge clk)
begin
if (SoftReset)
begin
af2cp_sTxPort.c0.hdr <= '0;
af2cp_sTxPort.c0.valid <= '0;
end
// Generate a read request when needed and the FIU isn't full
if (state == WR_REQ & !cp2af_sRxPort.c0TxAlmFull & count < avm_data_size & !avs_waitrequest & start_read)
begin
af2cp_sTxPort.c0.hdr <= rd_hdr;
af2cp_sTxPort.c0.valid <= 1'b1;
start_read <= 1'b0;
count <= count + 64;
end
end
// Read response
always_ff @(posedge clk)
begin
if (SoftReset)
begin
start_read <= 1'b1;
avm_byteenable <= 64'hffffffffffffffff;
end
// Generate a read request when needed and the FIU isn't full
if (state == WR_REQ & cp2af_sRxPort.c0.rspValid)
begin
if ((avm_data_size - count_rsp) < 64)
begin
avm_byteenable <= 64'hffffffffffffffff >> (64 - (avm_data_size - count_rsp));
end else
begin
avm_byteenable <= 64'hffffffffffffffff;
end
avs_writedata <= cp2af_sRxPort.c0.data;
avs_write <= 1;
count_rsp <= count_rsp + 64;
start_read <= 1'b1;
end
end
endmodule