project directories reorganization
This commit is contained in:
@@ -1,603 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// Redistributions of source code must retain the above copyright notice, this
|
||||
// list of conditions and the following disclaimer.
|
||||
//
|
||||
// Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// Neither the name of the Intel Corporation nor the names of its contributors
|
||||
// may be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
// Read from the memory locations first and then write to the memory locations
|
||||
|
||||
`include "platform_if.vh"
|
||||
`include "afu_json_info.vh"
|
||||
|
||||
|
||||
module ccip_std_afu
|
||||
(
|
||||
// CCI-P Clocks and Resets
|
||||
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
|
||||
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
|
||||
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
|
||||
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
|
||||
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
|
||||
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
|
||||
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
|
||||
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
|
||||
|
||||
// Interface structures
|
||||
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
|
||||
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
|
||||
);
|
||||
|
||||
|
||||
//
|
||||
// Run the entire design at the standard CCI-P frequency (400 MHz).
|
||||
//
|
||||
logic clk;
|
||||
assign clk = pClk;
|
||||
|
||||
logic reset;
|
||||
assign reset = pck_cp2af_softReset;
|
||||
|
||||
logic [511:0] wr_data;
|
||||
logic [511:0] rd_data;
|
||||
|
||||
logic get_write_addr;
|
||||
logic do_update;
|
||||
logic rd_end_of_list;
|
||||
logic rd_needed;
|
||||
logic wr_needed;
|
||||
logic [15:0] cnt_list_length;
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Register requests.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
//
|
||||
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
|
||||
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
|
||||
// We also assign pck_af2cp_sTx to sTx here but don't register it.
|
||||
// The code below never uses combinational logic to write sTx.
|
||||
//
|
||||
|
||||
t_if_ccip_Rx sRx;
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
sRx <= pck_cp2af_sRx;
|
||||
end
|
||||
|
||||
t_if_ccip_Tx sTx;
|
||||
assign pck_af2cp_sTx = sTx;
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// CSR (MMIO) handling.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
// The AFU ID is a unique ID for a given program. Here we generated
|
||||
// one with the "uuidgen" program and stored it in the AFU's JSON file.
|
||||
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
|
||||
// to extract the UUID into afu_json_info.vh.
|
||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
//
|
||||
// A valid AFU must implement a device feature list, starting at MMIO
|
||||
// address 0. Every entry in the feature list begins with 5 64-bit
|
||||
// words: a device feature header, two AFU UUID words and two reserved
|
||||
// words.
|
||||
//
|
||||
|
||||
// Is a CSR read request active this cycle?
|
||||
logic is_csr_read;
|
||||
assign is_csr_read = sRx.c0.mmioRdValid;
|
||||
|
||||
// Is a CSR write request active this cycle?
|
||||
logic is_csr_write;
|
||||
assign is_csr_write = sRx.c0.mmioWrValid;
|
||||
|
||||
// The MMIO request header is overlayed on the normal c0 memory read
|
||||
// response data structure. Cast the c0Rx header to an MMIO request
|
||||
// header.
|
||||
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
|
||||
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
|
||||
|
||||
|
||||
//
|
||||
// Implement the device feature list by responding to MMIO reads.
|
||||
//
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
sTx.c2.mmioRdValid <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Always respond with something for every read request
|
||||
sTx.c2.mmioRdValid <= is_csr_read;
|
||||
|
||||
// The unique transaction ID matches responses to requests
|
||||
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
|
||||
|
||||
// Addresses are of 32-bit objects in MMIO space. Addresses
|
||||
// of 64-bit objects are thus multiples of 2.
|
||||
case (mmio_req_hdr.address)
|
||||
0: // AFU DFH (device feature header)
|
||||
begin
|
||||
// Here we define a trivial feature list. In this
|
||||
// example, our AFU is the only entry in this list.
|
||||
sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
// Feature type is AFU
|
||||
sTx.c2.data[63:60] <= 4'h1;
|
||||
// End of list (last entry in list)
|
||||
sTx.c2.data[40] <= 1'b1;
|
||||
end
|
||||
|
||||
// AFU_ID_L
|
||||
2: sTx.c2.data <= afu_id[63:0];
|
||||
|
||||
// AFU_ID_H
|
||||
4: sTx.c2.data <= afu_id[127:64];
|
||||
|
||||
// DFH_RSVD0
|
||||
6: sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
|
||||
// DFH_RSVD1
|
||||
8: sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
|
||||
default: sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
//
|
||||
// CSR write handling. Host software must tell the AFU the memory address
|
||||
// to which it should be writing. The address is set by writing a CSR.
|
||||
//
|
||||
|
||||
// We use MMIO address 0 to set the memory address. The read and
|
||||
// write MMIO spaces are logically separate so we are free to use
|
||||
// whatever we like. This may not be good practice for cleanly
|
||||
// organizing the MMIO address space, but it is legal.
|
||||
logic is_mem_addr_csr_write;
|
||||
assign is_mem_addr_csr_write = get_write_addr && is_csr_write &&
|
||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
|
||||
|
||||
// Memory address to which this AFU will write.
|
||||
t_ccip_clAddr write_mem_addr;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
get_write_addr <= 1'b1;
|
||||
end
|
||||
else if (is_mem_addr_csr_write)
|
||||
begin
|
||||
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
||||
get_write_addr <= 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// We use MMIO address 0 to set the memory address for reading data.
|
||||
logic is_mem_addr_csr_read;
|
||||
assign is_mem_addr_csr_read = !get_write_addr && is_csr_write &&
|
||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
|
||||
|
||||
// Memory address from which this AFU will read.
|
||||
logic start_read;
|
||||
t_ccip_clAddr read_mem_addr;
|
||||
|
||||
//logic start_traversal = 'b0;
|
||||
//t_ccip_clAddr start_traversal_addr;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
start_read <= 1'b0;
|
||||
end
|
||||
else if (is_mem_addr_csr_read)
|
||||
begin
|
||||
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
||||
start_read <= 'b1;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Main AFU logic
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
//
|
||||
// States in our simple example.
|
||||
//
|
||||
//typedef enum logic [0:0]
|
||||
typedef enum logic [1:0]
|
||||
{
|
||||
STATE_IDLE,
|
||||
STATE_READ,
|
||||
STATE_UPDATE,
|
||||
STATE_WRITE
|
||||
}
|
||||
t_state;
|
||||
|
||||
t_state state;
|
||||
|
||||
//
|
||||
// State machine
|
||||
//
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
rd_end_of_list <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
case (state)
|
||||
STATE_IDLE:
|
||||
begin
|
||||
// Traversal begins when CSR 1 is written
|
||||
if (start_read)
|
||||
begin
|
||||
state <= STATE_READ;
|
||||
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
|
||||
end
|
||||
end
|
||||
|
||||
STATE_READ:
|
||||
begin
|
||||
if (rd_needed)
|
||||
begin
|
||||
// Read data from the address and update address
|
||||
state <= STATE_UPDATE;
|
||||
start_read <= 'b0;
|
||||
$display("AFU reading data and pointing to next read address...");
|
||||
end
|
||||
end
|
||||
|
||||
STATE_UPDATE:
|
||||
begin
|
||||
// Update the read value to be written back
|
||||
if (do_update)
|
||||
begin
|
||||
state <= STATE_WRITE;
|
||||
$display("AFU performing comutations on the read values...");
|
||||
end
|
||||
end
|
||||
|
||||
STATE_WRITE:
|
||||
begin
|
||||
// Write the updated value to the address
|
||||
// Point to new address after that
|
||||
// if done then point to IDLE; else read new values
|
||||
if (rd_end_of_list)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
$display("AFU done...");
|
||||
end
|
||||
else
|
||||
begin
|
||||
if (wr_needed)
|
||||
begin
|
||||
state <= STATE_READ;
|
||||
$display("AFU reading again from read address...");
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Read logic.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
//
|
||||
// READ REQUEST
|
||||
//
|
||||
|
||||
// Did a write response just arrive
|
||||
logic addr_next_valid;
|
||||
|
||||
// Next read address
|
||||
t_ccip_clAddr addr_next;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
// Next read address is valid when we have got the write response back
|
||||
// and channel is not full
|
||||
//addr_next_valid <= sRx.c0TxAlmFull;
|
||||
addr_next_valid <= sRx.c1.rspValid;
|
||||
|
||||
// Next address is current address plus address length
|
||||
// Apurve
|
||||
//addr_next <= addr_next + addr_size;
|
||||
addr_next <= addr_next + 0;
|
||||
|
||||
// End of list reached if we have read 10 times
|
||||
rd_end_of_list <= (cnt_list_length == 'h10);
|
||||
end
|
||||
|
||||
//
|
||||
// Since back pressure may prevent an immediate read request, we must
|
||||
// record whether a read is needed and hold it until the request can
|
||||
// be sent to the FIU.
|
||||
//
|
||||
t_ccip_clAddr rd_addr;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
rd_needed <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// If reads are allowed this cycle then we can safely clear
|
||||
// any previously requested reads. This simple AFU has only
|
||||
// one read in flight at a time since it is walking a pointer
|
||||
// chain.
|
||||
if (rd_needed)
|
||||
begin
|
||||
rd_needed <= sRx.c0TxAlmFull;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Need a read under two conditions:
|
||||
// - Starting a new walk
|
||||
// - A read response just arrived from a line containing
|
||||
// a next pointer.
|
||||
rd_needed <= (start_read || (addr_next_valid && ! rd_end_of_list));
|
||||
rd_addr <= (start_read ? read_mem_addr : addr_next);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// Emit read requests to the FIU.
|
||||
//
|
||||
|
||||
// Read header defines the request to the FIU
|
||||
t_cci_c0_ReqMemHdr rd_hdr;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
rd_hdr = t_cci_c0_ReqMemHdr'(0);
|
||||
|
||||
// Read request type
|
||||
rd_hdr.req_type = eREQ_RDLINE_I;
|
||||
// Virtual address (MPF virtual addressing is enabled)
|
||||
rd_hdr.address = rd_addr;
|
||||
// Let the FIU pick the channel
|
||||
rd_hdr.vc_sel = eVC_VA;
|
||||
// Read 4 lines (the size of an entry in the list)
|
||||
rd_hdr.cl_len = eCL_LEN_4;
|
||||
end
|
||||
|
||||
// Send read requests to the FIU
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
sTx.c0.valid <= 1'b0;
|
||||
cnt_list_length <= 0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Generate a read request when needed and the FIU isn't full
|
||||
sTx.c0.valid <= (rd_needed && ! sRx.c0TxAlmFull);
|
||||
sTx.c0.hdr <= rd_hdr;
|
||||
|
||||
if (rd_needed && ! sRx.c0TxAlmFull)
|
||||
begin
|
||||
cnt_list_length <= cnt_list_length + 1;
|
||||
//$display(" Reading from VA 0x%x", clAddrToByteAddr(rd_addr));
|
||||
$display("Incrementing read count...");
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// READ RESPONSE HANDLING
|
||||
//
|
||||
|
||||
//
|
||||
// Receive data (read responses).
|
||||
//
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
do_update <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
if (state == STATE_READ)
|
||||
begin
|
||||
rd_data <= sRx.c0.data;
|
||||
do_update <= 1'b1;
|
||||
end
|
||||
if (state == STATE_UPDATE)
|
||||
begin
|
||||
// Update the read data and put it in the write data to be written
|
||||
wr_data <= rd_data + 1;
|
||||
do_update <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Write logic.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
|
||||
//
|
||||
// WRITE REQUEST
|
||||
//
|
||||
|
||||
// Did a write response just arrive
|
||||
logic wr_addr_next_valid;
|
||||
|
||||
// Next write address
|
||||
t_ccip_clAddr wr_addr_next;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
// Next write address is valid when we have got the read response back
|
||||
// and channel is not full
|
||||
//wr_addr_next_valid <= sRx.c1TxAlmFull;
|
||||
wr_addr_next_valid <= sRx.c0.rspValid;
|
||||
|
||||
// Next address is current address plus address length
|
||||
// Apurve
|
||||
//wr_addr_next <= wr_addr_next + addr_size;
|
||||
wr_addr_next <= wr_addr_next + 0;
|
||||
end
|
||||
|
||||
//
|
||||
// Since back pressure may prevent an immediate write request, we must
|
||||
// record whether a write is needed and hold it until the request can
|
||||
// be sent to the FIU.
|
||||
//
|
||||
t_ccip_clAddr wr_addr;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
wr_needed <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// If writes are allowed this cycle then we can safely clear
|
||||
// any previously requested writes. This simple AFU has only
|
||||
// one write in flight at a time since it is walking a pointer
|
||||
// chain.
|
||||
if (wr_needed)
|
||||
begin
|
||||
wr_needed <= sRx.c1TxAlmFull;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Need a write under two conditions:
|
||||
// - Starting a new walk
|
||||
// - A write response just arrived from a line containing
|
||||
// a next pointer.
|
||||
//wr_needed <= (start_write || (wr_addr_next_valid && ! rd_end_of_list));
|
||||
wr_needed <= (start_write || wr_addr_next_valid);
|
||||
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// Emit write requests to the FIU.
|
||||
//
|
||||
|
||||
// Write header defines the request to the FIU
|
||||
t_ccip_c1_ReqMemHdr wr_hdr;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
wr_hdr = t_cci_c1_ReqMemHdr'(0);
|
||||
|
||||
// Write request type
|
||||
wr_hdr.req_type = eREQ_RDLINE_I;
|
||||
// Virtual address (MPF virtual addressing is enabled)
|
||||
wr_hdr.address = wr_addr;
|
||||
// Let the FIU pick the channel
|
||||
wr_hdr.vc_sel = eVC_VA;
|
||||
// Write 4 lines (the size of an entry in the list)
|
||||
wr_hdr.cl_len = eCL_LEN_4;
|
||||
// Start of packet is true (single line write)
|
||||
wr_hdr.sop = 1'b1;
|
||||
end
|
||||
|
||||
// Send write requests to the FIU
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
sTx.c1.valid <= 1'b0;
|
||||
//cnt_list_length <= 0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Generate a write request when needed and the FIU isn't full
|
||||
sTx.c1.valid <= (wr_needed && ! sRx.c1TxAlmFull);
|
||||
sTx.c1.hdr <= wr_hdr;
|
||||
sTx.c1.data = t_ccip_clData'(wr_data);
|
||||
|
||||
//if (wr_needed && ! sRx.c1TxAlmFull)
|
||||
//begin
|
||||
// cnt_list_length <= cnt_list_length + 1;
|
||||
// //$display(" Writing from VA 0x%x", clAddrToByteAddr(rd_addr));
|
||||
// $display("Incrementing write count...");
|
||||
//end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// WRITE RESPONSE HANDLING
|
||||
//
|
||||
|
||||
// Apurve: Check if a signal is to be sent to read to start reading in case
|
||||
// write response does not work
|
||||
//
|
||||
// Send data (write requests).
|
||||
//
|
||||
//always_ff @(posedge clk)
|
||||
//begin
|
||||
// if (state == STATE_WRITE)
|
||||
// begin
|
||||
// rd_data <= sRx.c0.data;
|
||||
// end
|
||||
// if (state == STATE_UPDATE)
|
||||
// begin
|
||||
// // Update the write data and put it in the write data to be written
|
||||
// wr_data <= rd_data + 1;
|
||||
// end
|
||||
//end
|
||||
|
||||
endmodule
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"version": 1,
|
||||
"afu-image": {
|
||||
"power": 0,
|
||||
"afu-top-interface":
|
||||
{
|
||||
"name": "ccip_std_afu"
|
||||
},
|
||||
"accelerator-clusters":
|
||||
[
|
||||
{
|
||||
"name": "cci_hello",
|
||||
"total-contexts": 1,
|
||||
"accelerator-type-uuid": "c6aa954a-9b91-4a37-abc1-1d9f0709dcc3"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,653 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// Redistributions of source code must retain the above copyright notice, this
|
||||
// list of conditions and the following disclaimer.
|
||||
//
|
||||
// Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// Neither the name of the Intel Corporation nor the names of its contributors
|
||||
// may be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
// Read from the memory locations first and then write to the memory locations
|
||||
|
||||
`include "platform_if.vh"
|
||||
`include "afu_json_info.vh"
|
||||
|
||||
|
||||
module ccip_std_afu
|
||||
(
|
||||
// CCI-P Clocks and Resets
|
||||
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
|
||||
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
|
||||
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
|
||||
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
|
||||
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
|
||||
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
|
||||
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
|
||||
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
|
||||
|
||||
// Interface structures
|
||||
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
|
||||
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
|
||||
);
|
||||
|
||||
|
||||
//
|
||||
// Run the entire design at the standard CCI-P frequency (400 MHz).
|
||||
//
|
||||
logic clk;
|
||||
assign clk = pClk;
|
||||
|
||||
logic reset;
|
||||
assign reset = pck_cp2af_softReset;
|
||||
|
||||
logic [511:0] wr_data;
|
||||
logic [511:0] rd_data;
|
||||
|
||||
logic do_update;
|
||||
logic start_read;
|
||||
logic start_write;
|
||||
logic wr_addr_next_valid;
|
||||
logic addr_next_valid;
|
||||
logic rd_end_of_list;
|
||||
logic rd_needed;
|
||||
logic wr_needed;
|
||||
logic read_req;
|
||||
logic write_req;
|
||||
logic [15:0] cnt_list_length;
|
||||
t_ccip_clAddr rd_addr;
|
||||
t_ccip_clAddr wr_addr;
|
||||
t_ccip_clAddr addr_next;
|
||||
t_ccip_clAddr wr_addr_next;
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Register requests.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
//
|
||||
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
|
||||
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
|
||||
// We also assign pck_af2cp_sTx to sTx here but don't register it.
|
||||
// The code below never uses combinational logic to write sTx.
|
||||
//
|
||||
|
||||
t_if_ccip_Rx sRx;
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
sRx <= pck_cp2af_sRx;
|
||||
end
|
||||
|
||||
t_if_ccip_Tx sTx;
|
||||
assign pck_af2cp_sTx = sTx;
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// CSR (MMIO) handling.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
// The AFU ID is a unique ID for a given program. Here we generated
|
||||
// one with the "uuidgen" program and stored it in the AFU's JSON file.
|
||||
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
|
||||
// to extract the UUID into afu_json_info.vh.
|
||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
//
|
||||
// A valid AFU must implement a device feature list, starting at MMIO
|
||||
// address 0. Every entry in the feature list begins with 5 64-bit
|
||||
// words: a device feature header, two AFU UUID words and two reserved
|
||||
// words.
|
||||
//
|
||||
|
||||
// Is a CSR read request active this cycle?
|
||||
logic is_csr_read;
|
||||
assign is_csr_read = sRx.c0.mmioRdValid;
|
||||
|
||||
// Is a CSR write request active this cycle?
|
||||
logic is_csr_write;
|
||||
assign is_csr_write = sRx.c0.mmioWrValid;
|
||||
|
||||
// The MMIO request header is overlayed on the normal c0 memory read
|
||||
// response data structure. Cast the c0Rx header to an MMIO request
|
||||
// header.
|
||||
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
|
||||
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
|
||||
|
||||
|
||||
//
|
||||
// Implement the device feature list by responding to MMIO reads.
|
||||
//
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
sTx.c2.mmioRdValid <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Always respond with something for every read request
|
||||
sTx.c2.mmioRdValid <= is_csr_read;
|
||||
|
||||
// The unique transaction ID matches responses to requests
|
||||
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
|
||||
|
||||
// Addresses are of 32-bit objects in MMIO space. Addresses
|
||||
// of 64-bit objects are thus multiples of 2.
|
||||
case (mmio_req_hdr.address)
|
||||
0: // AFU DFH (device feature header)
|
||||
begin
|
||||
// Here we define a trivial feature list. In this
|
||||
// example, our AFU is the only entry in this list.
|
||||
sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
// Feature type is AFU
|
||||
sTx.c2.data[63:60] <= 4'h1;
|
||||
// End of list (last entry in list)
|
||||
sTx.c2.data[40] <= 1'b1;
|
||||
end
|
||||
|
||||
// AFU_ID_L
|
||||
2: sTx.c2.data <= afu_id[63:0];
|
||||
|
||||
// AFU_ID_H
|
||||
4: sTx.c2.data <= afu_id[127:64];
|
||||
|
||||
// DFH_RSVD0
|
||||
6: sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
|
||||
// DFH_RSVD1
|
||||
8: sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
|
||||
// Updated by apurve to check fpgaReadMMIO
|
||||
10: sTx.c2.data <= t_ccip_mmioData'(start_read);
|
||||
|
||||
default: sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
//
|
||||
// CSR write handling. Host software must tell the AFU the memory address
|
||||
// to which it should be writing. The address is set by writing a CSR.
|
||||
//
|
||||
|
||||
// We use MMIO address 0 to set the memory address. The read and
|
||||
// write MMIO spaces are logically separate so we are free to use
|
||||
// whatever we like. This may not be good practice for cleanly
|
||||
// organizing the MMIO address space, but it is legal.
|
||||
logic is_mem_addr_csr_write;
|
||||
assign is_mem_addr_csr_write = is_csr_write &&
|
||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
|
||||
|
||||
// Memory address to which this AFU will write.
|
||||
t_ccip_clAddr write_mem_addr;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
start_write <= 1'b0;
|
||||
end
|
||||
else if (is_mem_addr_csr_write)
|
||||
begin
|
||||
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
||||
start_write <= 1'b1;
|
||||
//$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// We use MMIO address 8 to set the memory address for reading data.
|
||||
logic is_mem_addr_csr_read;
|
||||
assign is_mem_addr_csr_read = is_csr_write &&
|
||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(2));
|
||||
|
||||
// Memory address from which this AFU will read.
|
||||
t_ccip_clAddr read_mem_addr;
|
||||
|
||||
//logic start_traversal = 'b0;
|
||||
//t_ccip_clAddr start_traversal_addr;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
start_read <= 1'b0;
|
||||
end
|
||||
else if (is_mem_addr_csr_read)
|
||||
begin
|
||||
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
||||
start_read <= 1'b1;
|
||||
//$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Main AFU logic
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
//
|
||||
// States in our simple example.
|
||||
//
|
||||
//typedef enum logic [0:0]
|
||||
typedef enum logic [1:0]
|
||||
{
|
||||
STATE_IDLE,
|
||||
STATE_READ,
|
||||
STATE_UPDATE,
|
||||
STATE_WRITE
|
||||
}
|
||||
t_state;
|
||||
|
||||
t_state state;
|
||||
|
||||
//
|
||||
// State machine
|
||||
//
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
rd_end_of_list <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
case (state)
|
||||
STATE_IDLE:
|
||||
begin
|
||||
// Traversal begins when CSR 1 is written
|
||||
if (start_read)
|
||||
begin
|
||||
state <= STATE_READ;
|
||||
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
|
||||
end
|
||||
end
|
||||
|
||||
STATE_READ:
|
||||
begin
|
||||
$display("AFU in READ...");
|
||||
$display("do_update is %d...",do_update);
|
||||
$display("addr_next_valid is %d...",addr_next_valid);
|
||||
$display("rd_needed is %d...",rd_needed);
|
||||
if (!rd_needed && do_update)
|
||||
begin
|
||||
state <= STATE_UPDATE;
|
||||
$display("AFU moving to UPDATE...");
|
||||
end
|
||||
end
|
||||
|
||||
STATE_UPDATE:
|
||||
begin
|
||||
// Update the read value to be written back
|
||||
$display("AFU in UPDATE...");
|
||||
if (!do_update)
|
||||
begin
|
||||
state <= STATE_WRITE;
|
||||
wr_needed <= 1'b1;
|
||||
$display("AFU moving to WRITE...");
|
||||
end
|
||||
end
|
||||
|
||||
STATE_WRITE:
|
||||
begin
|
||||
// Write the updated value to the address
|
||||
// Point to new address after that
|
||||
// if done then point to IDLE; else read new values
|
||||
$display("AFU in WRITE...");
|
||||
if (rd_end_of_list)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
$display("AFU done...");
|
||||
end
|
||||
else if (!wr_needed)
|
||||
begin
|
||||
state <= STATE_READ;
|
||||
$display("AFU moving to READ from WRITE...");
|
||||
start_write <= 1'b0;
|
||||
write_req <= 1'b0;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Read logic.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
//
|
||||
// READ REQUEST
|
||||
//
|
||||
|
||||
// Did a write response just arrive
|
||||
|
||||
// Next read address
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
// Next read address is valid when we have got the write response back
|
||||
if (sRx.c1.rspValid)
|
||||
begin
|
||||
addr_next_valid <= sRx.c1.rspValid;
|
||||
|
||||
//if (state == STATE_READ && !rd_needed)
|
||||
//begin
|
||||
// Apurve: Next address is current address plus address length
|
||||
//addr_next <= addr_next + addr_size;
|
||||
addr_next <= (addr_next_valid ? rd_addr + 0 : rd_addr);
|
||||
|
||||
// End of list reached if we have read 5 times
|
||||
rd_end_of_list <= (cnt_list_length == 'h5);
|
||||
//end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// Since back pressure may prevent an immediate read request, we must
|
||||
// record whether a read is needed and hold it until the request can
|
||||
// be sent to the FIU.
|
||||
//
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
rd_needed <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// If reads are allowed this cycle then we can safely clear
|
||||
// any previously requested reads. This simple AFU has only
|
||||
// one read in flight at a time since it is walking a pointer
|
||||
// chain.
|
||||
if (rd_needed)
|
||||
begin
|
||||
//rd_needed <= sRx.c0TxAlmFull;
|
||||
//rd_needed <= (!sRx.c0TxAlmFull && !sRx.c0.rspValid);
|
||||
rd_needed <= !sRx.c0.rspValid;
|
||||
end
|
||||
else if (state == STATE_READ)
|
||||
begin
|
||||
// Need a read under two conditions:
|
||||
// - Starting a new walk
|
||||
// - A read response just arrived from a line containing
|
||||
// a next pointer.
|
||||
rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
|
||||
rd_addr <= (start_read ? read_mem_addr : addr_next);
|
||||
//$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr));
|
||||
//$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr));
|
||||
//$display("start read is %d", start_read);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// Emit read requests to the FIU.
|
||||
//
|
||||
|
||||
// Read header defines the request to the FIU
|
||||
t_ccip_c0_ReqMemHdr rd_hdr;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
|
||||
|
||||
// Read request type (No intention to cache)
|
||||
//rd_hdr.req_type = 4'h0;
|
||||
|
||||
// Virtual address (MPF virtual addressing is enabled)
|
||||
rd_hdr.address = rd_addr;
|
||||
|
||||
// Read over channel VA
|
||||
//rd_hdr.vc_sel = 2'h0;
|
||||
|
||||
// Read one cache line (64 bytes)
|
||||
//rd_hdr.cl_len = 2'h0;
|
||||
end
|
||||
|
||||
// Send read requests to the FIU
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
sTx.c0.valid <= 1'b0;
|
||||
cnt_list_length <= 0;
|
||||
read_req <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Generate a read request when needed and the FIU isn't full
|
||||
if (state == STATE_READ)
|
||||
begin
|
||||
sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull && !read_req);
|
||||
|
||||
if (rd_needed && !sRx.c0TxAlmFull && !read_req)
|
||||
begin
|
||||
sTx.c0.hdr <= rd_hdr;
|
||||
cnt_list_length <= cnt_list_length + 1;
|
||||
read_req <= 1'b1;
|
||||
$display("Incrementing read count...%d",cnt_list_length);
|
||||
$display("Read address is 0x%x...",rd_hdr.address);
|
||||
addr_next_valid <= 1'b0;
|
||||
// Apurve: Add something to stop read once this section has been accessed
|
||||
//rd_needed <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// READ RESPONSE HANDLING
|
||||
//
|
||||
|
||||
//
|
||||
// Receive data (read responses).
|
||||
//
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
do_update <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
if (!do_update && sRx.c0.rspValid)
|
||||
begin
|
||||
rd_data <= sRx.c0.data;
|
||||
do_update <= 1'b1;
|
||||
$display("rd data is %d...",rd_data);
|
||||
end
|
||||
|
||||
if ((state == STATE_UPDATE) && (do_update == 1'b1))
|
||||
begin
|
||||
// Update the read data and put it in the write data to be written
|
||||
wr_data <= rd_data + 2;
|
||||
do_update <= 1'b0;
|
||||
read_req <= 1'b0;
|
||||
$display("write data is %d...",wr_data);
|
||||
|
||||
// First read done. Next reads should be from the updated addresses
|
||||
start_read <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Write logic.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
|
||||
//
|
||||
// WRITE REQUEST
|
||||
//
|
||||
|
||||
// Did a write response just arrive
|
||||
|
||||
// Next write address
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (sRx.c0.rspValid)
|
||||
begin
|
||||
// Next write address is valid when we have got the read response back
|
||||
wr_addr_next_valid <= sRx.c0.rspValid;
|
||||
//wr_addr_next_valid <= (!start_write && sRx.c0.rspValid);
|
||||
|
||||
//if (state == STATE_WRITE && !wr_needed)
|
||||
//begin
|
||||
// Apurve: Next address is current address plus address length
|
||||
//wr_addr_next <= wr_addr + 0;
|
||||
wr_addr_next <= (wr_addr_next_valid ? wr_addr + 0 : wr_addr);
|
||||
//end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// Since back pressure may prevent an immediate write request, we must
|
||||
// record whether a write is needed and hold it until the request can
|
||||
// be sent to the FIU.
|
||||
//
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
wr_needed <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// If writes are allowed this cycle then we can safely clear
|
||||
// any previously requested writes. This simple AFU has only
|
||||
// one write in flight at a time since it is walking a pointer
|
||||
// chain.
|
||||
if (wr_needed)
|
||||
begin
|
||||
//wr_needed <= sRx.c1TxAlmFull;
|
||||
//wr_needed <= (!sRx.c1TxAlmFull && !sRx.c1.rspValid);
|
||||
wr_needed <= !sRx.c1.rspValid;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Need a write under two conditions:
|
||||
// - Starting a new walk
|
||||
// - A write response just arrived from a line containing
|
||||
// a next pointer.
|
||||
wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
|
||||
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
|
||||
//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// Emit write requests to the FIU.
|
||||
//
|
||||
|
||||
// Write header defines the request to the FIU
|
||||
t_ccip_c1_ReqMemHdr wr_hdr;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
|
||||
|
||||
// Write request type
|
||||
//wr_hdr.req_type = 4'h0;
|
||||
|
||||
// Virtual address (MPF virtual addressing is enabled)
|
||||
wr_hdr.address = wr_addr;
|
||||
|
||||
// Let the FIU pick the channel
|
||||
//wr_hdr.vc_sel = 2'h2;
|
||||
|
||||
// Write 1 cache line (64 bytes)
|
||||
//wr_hdr.cl_len = 2'h0;
|
||||
|
||||
// Start of packet is true (single line write)
|
||||
wr_hdr.sop = 1'b1;
|
||||
end
|
||||
|
||||
// Send write requests to the FIU
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
sTx.c1.valid <= 1'b0;
|
||||
write_req <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Generate a write request when needed and the FIU isn't full
|
||||
if (state == STATE_WRITE)
|
||||
begin
|
||||
sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull && !write_req);
|
||||
if (wr_needed && !sRx.c1TxAlmFull && !write_req)
|
||||
begin
|
||||
sTx.c1.hdr <= wr_hdr;
|
||||
sTx.c1.data <= t_ccip_clData'(wr_data);
|
||||
write_req <= 1'b1;
|
||||
wr_addr_next_valid <= 1'b0;
|
||||
$display("Write address is 0x%x...", wr_hdr.address);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
//
|
||||
// WRITE RESPONSE HANDLING
|
||||
//
|
||||
|
||||
// Apurve: Check if a signal is to be sent to read to start reading in case
|
||||
// write response does not work
|
||||
//
|
||||
// Send data (write requests).
|
||||
//
|
||||
//always_ff @(posedge clk)
|
||||
//begin
|
||||
// if (state == STATE_WRITE)
|
||||
// begin
|
||||
// rd_data <= sRx.c0.data;
|
||||
// end
|
||||
// if (state == STATE_UPDATE)
|
||||
// begin
|
||||
// // Update the write data and put it in the write data to be written
|
||||
// wr_data <= rd_data + 1;
|
||||
// end
|
||||
//end
|
||||
|
||||
endmodule
|
||||
@@ -1,621 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// Redistributions of source code must retain the above copyright notice, this
|
||||
// list of conditions and the following disclaimer.
|
||||
//
|
||||
// Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// Neither the name of the Intel Corporation nor the names of its contributors
|
||||
// may be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
// Read from the memory locations first and then write to the memory locations
|
||||
|
||||
`include "platform_if.vh"
|
||||
`include "afu_json_info.vh"
|
||||
|
||||
|
||||
module ccip_std_afu
|
||||
(
|
||||
// CCI-P Clocks and Resets
|
||||
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
|
||||
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
|
||||
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
|
||||
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
|
||||
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
|
||||
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
|
||||
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
|
||||
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
|
||||
|
||||
// Interface structures
|
||||
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
|
||||
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
|
||||
);
|
||||
|
||||
|
||||
//
|
||||
// Run the entire design at the standard CCI-P frequency (400 MHz).
|
||||
//
|
||||
logic clk;
|
||||
assign clk = pClk;
|
||||
|
||||
logic reset;
|
||||
assign reset = pck_cp2af_softReset;
|
||||
|
||||
logic [511:0] wr_data;
|
||||
logic [511:0] rd_data;
|
||||
|
||||
logic do_update;
|
||||
logic start_read;
|
||||
logic start_write;
|
||||
logic wr_addr_next_valid;
|
||||
logic addr_next_valid;
|
||||
logic rd_end_of_list;
|
||||
logic rd_needed;
|
||||
logic wr_needed;
|
||||
logic [15:0] cnt_list_length;
|
||||
t_ccip_clAddr rd_addr;
|
||||
t_ccip_clAddr wr_addr;
|
||||
t_ccip_clAddr addr_next;
|
||||
t_ccip_clAddr wr_addr_next;
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Register requests.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
//
|
||||
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
|
||||
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
|
||||
// We also assign pck_af2cp_sTx to sTx here but don't register it.
|
||||
// The code below never uses combinational logic to write sTx.
|
||||
//
|
||||
|
||||
t_if_ccip_Rx sRx;
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
sRx <= pck_cp2af_sRx;
|
||||
end
|
||||
|
||||
t_if_ccip_Tx sTx;
|
||||
assign pck_af2cp_sTx = sTx;
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// CSR (MMIO) handling.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
// The AFU ID is a unique ID for a given program. Here we generated
|
||||
// one with the "uuidgen" program and stored it in the AFU's JSON file.
|
||||
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
|
||||
// to extract the UUID into afu_json_info.vh.
|
||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
//
|
||||
// A valid AFU must implement a device feature list, starting at MMIO
|
||||
// address 0. Every entry in the feature list begins with 5 64-bit
|
||||
// words: a device feature header, two AFU UUID words and two reserved
|
||||
// words.
|
||||
//
|
||||
|
||||
// Is a CSR read request active this cycle?
|
||||
logic is_csr_read;
|
||||
assign is_csr_read = sRx.c0.mmioRdValid;
|
||||
|
||||
// Is a CSR write request active this cycle?
|
||||
logic is_csr_write;
|
||||
assign is_csr_write = sRx.c0.mmioWrValid;
|
||||
|
||||
// The MMIO request header is overlayed on the normal c0 memory read
|
||||
// response data structure. Cast the c0Rx header to an MMIO request
|
||||
// header.
|
||||
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
|
||||
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
|
||||
|
||||
|
||||
//
|
||||
// Implement the device feature list by responding to MMIO reads.
|
||||
//
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
sTx.c2.mmioRdValid <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Always respond with something for every read request
|
||||
sTx.c2.mmioRdValid <= is_csr_read;
|
||||
|
||||
// The unique transaction ID matches responses to requests
|
||||
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
|
||||
|
||||
// Addresses are of 32-bit objects in MMIO space. Addresses
|
||||
// of 64-bit objects are thus multiples of 2.
|
||||
case (mmio_req_hdr.address)
|
||||
0: // AFU DFH (device feature header)
|
||||
begin
|
||||
// Here we define a trivial feature list. In this
|
||||
// example, our AFU is the only entry in this list.
|
||||
sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
// Feature type is AFU
|
||||
sTx.c2.data[63:60] <= 4'h1;
|
||||
// End of list (last entry in list)
|
||||
sTx.c2.data[40] <= 1'b1;
|
||||
end
|
||||
|
||||
// AFU_ID_L
|
||||
2: sTx.c2.data <= afu_id[63:0];
|
||||
|
||||
// AFU_ID_H
|
||||
4: sTx.c2.data <= afu_id[127:64];
|
||||
|
||||
// DFH_RSVD0
|
||||
6: sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
|
||||
// DFH_RSVD1
|
||||
8: sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
|
||||
// Updated by apurve to check fpgaReadMMIO
|
||||
10: sTx.c2.data <= t_ccip_mmioData'(start_read);
|
||||
|
||||
default: sTx.c2.data <= t_ccip_mmioData'(0);
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
//
|
||||
// CSR write handling. Host software must tell the AFU the memory address
|
||||
// to which it should be writing. The address is set by writing a CSR.
|
||||
//
|
||||
|
||||
// We use MMIO address 0 to set the memory address. The read and
|
||||
// write MMIO spaces are logically separate so we are free to use
|
||||
// whatever we like. This may not be good practice for cleanly
|
||||
// organizing the MMIO address space, but it is legal.
|
||||
logic is_mem_addr_csr_write;
|
||||
assign is_mem_addr_csr_write = is_csr_write &&
|
||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
|
||||
|
||||
// Memory address to which this AFU will write.
|
||||
t_ccip_clAddr write_mem_addr;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
start_write <= 1'b0;
|
||||
end
|
||||
else if (is_mem_addr_csr_write)
|
||||
begin
|
||||
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
||||
start_write <= 1'b1;
|
||||
//$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// We use MMIO address 8 to set the memory address for reading data.
|
||||
logic is_mem_addr_csr_read;
|
||||
assign is_mem_addr_csr_read = is_csr_write &&
|
||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(2));
|
||||
|
||||
// Memory address from which this AFU will read.
|
||||
t_ccip_clAddr read_mem_addr;
|
||||
|
||||
//logic start_traversal = 'b0;
|
||||
//t_ccip_clAddr start_traversal_addr;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
start_read <= 1'b0;
|
||||
end
|
||||
else if (is_mem_addr_csr_read)
|
||||
begin
|
||||
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
||||
start_read <= 1'b1;
|
||||
//$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Main AFU logic
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
//
|
||||
// States in our simple example.
|
||||
//
|
||||
//typedef enum logic [0:0]
|
||||
typedef enum logic [1:0]
|
||||
{
|
||||
STATE_IDLE,
|
||||
STATE_READ,
|
||||
STATE_UPDATE,
|
||||
STATE_WRITE
|
||||
}
|
||||
t_state;
|
||||
|
||||
t_state state;
|
||||
|
||||
//
|
||||
// State machine
|
||||
//
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
rd_end_of_list <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
case (state)
|
||||
STATE_IDLE:
|
||||
begin
|
||||
// Traversal begins when CSR 1 is written
|
||||
if (start_read)
|
||||
begin
|
||||
state <= STATE_READ;
|
||||
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
|
||||
end
|
||||
end
|
||||
|
||||
STATE_READ:
|
||||
begin
|
||||
$display("AFU in READ...");
|
||||
if (!rd_needed && do_update)
|
||||
begin
|
||||
state <= STATE_UPDATE;
|
||||
$display("AFU moving to UPDATE...");
|
||||
end
|
||||
end
|
||||
|
||||
STATE_UPDATE:
|
||||
begin
|
||||
// Update the read value to be written back
|
||||
$display("AFU in UPDATE...");
|
||||
if (!do_update)
|
||||
begin
|
||||
state <= STATE_WRITE;
|
||||
wr_needed <= 1'b1;
|
||||
$display("AFU moving to WRITE...");
|
||||
end
|
||||
end
|
||||
|
||||
STATE_WRITE:
|
||||
begin
|
||||
// Write the updated value to the address
|
||||
// Point to new address after that
|
||||
// if done then point to IDLE; else read new values
|
||||
$display("AFU in WRITE...");
|
||||
if (rd_end_of_list)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
$display("AFU done...");
|
||||
end
|
||||
else if (!wr_needed)
|
||||
begin
|
||||
state <= STATE_READ;
|
||||
$display("AFU moving to READ from WRITE...");
|
||||
start_write <= 1'b0;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Read logic.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
//
|
||||
// READ REQUEST
|
||||
//
|
||||
|
||||
// Did a write response just arrive
|
||||
|
||||
// Next read address
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
// Next read address is valid when we have got the write response back
|
||||
addr_next_valid <= sRx.c1.rspValid;
|
||||
|
||||
// Apurve: Next address is current address plus address length
|
||||
//addr_next <= addr_next + addr_size;
|
||||
addr_next <= rd_addr + 0;
|
||||
|
||||
// End of list reached if we have read 5 times
|
||||
rd_end_of_list <= (cnt_list_length == 'h5);
|
||||
end
|
||||
|
||||
//
|
||||
// Since back pressure may prevent an immediate read request, we must
|
||||
// record whether a read is needed and hold it until the request can
|
||||
// be sent to the FIU.
|
||||
//
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
rd_needed <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// If reads are allowed this cycle then we can safely clear
|
||||
// any previously requested reads. This simple AFU has only
|
||||
// one read in flight at a time since it is walking a pointer
|
||||
// chain.
|
||||
if (rd_needed)
|
||||
begin
|
||||
rd_needed <= sRx.c0TxAlmFull;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Need a read under two conditions:
|
||||
// - Starting a new walk
|
||||
// - A read response just arrived from a line containing
|
||||
// a next pointer.
|
||||
rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
|
||||
rd_addr <= (start_read ? read_mem_addr : addr_next);
|
||||
//$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr));
|
||||
//$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr));
|
||||
//$display("start read is %d", start_read);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// Emit read requests to the FIU.
|
||||
//
|
||||
|
||||
// Read header defines the request to the FIU
|
||||
t_ccip_c0_ReqMemHdr rd_hdr;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
|
||||
|
||||
// Read request type (No intention to cache)
|
||||
//rd_hdr.req_type = 4'h0;
|
||||
|
||||
// Virtual address (MPF virtual addressing is enabled)
|
||||
rd_hdr.address = rd_addr;
|
||||
|
||||
// Read over channel VA
|
||||
//rd_hdr.vc_sel = 2'h0;
|
||||
|
||||
// Read one cache line (64 bytes)
|
||||
//rd_hdr.cl_len = 2'h0;
|
||||
end
|
||||
|
||||
// Send read requests to the FIU
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
sTx.c0.valid <= 1'b0;
|
||||
cnt_list_length <= 0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Generate a read request when needed and the FIU isn't full
|
||||
if (state == STATE_READ)
|
||||
begin
|
||||
sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull);
|
||||
|
||||
if (rd_needed && !sRx.c0TxAlmFull)
|
||||
begin
|
||||
sTx.c0.hdr <= rd_hdr;
|
||||
cnt_list_length <= cnt_list_length + 1;
|
||||
$display("Incrementing read count...%d",cnt_list_length);
|
||||
$display("Read address is 0x%x...",rd_hdr.address);
|
||||
// Apurve: Add something to stop read once this section has been accessed
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// READ RESPONSE HANDLING
|
||||
//
|
||||
|
||||
//
|
||||
// Receive data (read responses).
|
||||
//
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
do_update <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
if (sRx.c0.rspValid)
|
||||
begin
|
||||
rd_data <= sRx.c0.data;
|
||||
do_update <= 1'b1;
|
||||
//$display("rd data is %d...",rd_data);
|
||||
end
|
||||
|
||||
if (state == STATE_UPDATE)
|
||||
begin
|
||||
// Update the read data and put it in the write data to be written
|
||||
wr_data <= rd_data + 2;
|
||||
do_update <= 1'b0;
|
||||
$display("write data is %d...",wr_data);
|
||||
|
||||
// First read done. Next reads should be from the updated addresses
|
||||
start_read <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// =========================================================================
|
||||
//
|
||||
// Write logic.
|
||||
//
|
||||
// =========================================================================
|
||||
|
||||
|
||||
//
|
||||
// WRITE REQUEST
|
||||
//
|
||||
|
||||
// Did a write response just arrive
|
||||
|
||||
// Next write address
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
// Next write address is valid when we have got the read response back
|
||||
wr_addr_next_valid <= sRx.c0.rspValid;
|
||||
|
||||
// Apurve: Next address is current address plus address length
|
||||
wr_addr_next <= wr_addr + 0;
|
||||
|
||||
end
|
||||
|
||||
//
|
||||
// Since back pressure may prevent an immediate write request, we must
|
||||
// record whether a write is needed and hold it until the request can
|
||||
// be sent to the FIU.
|
||||
//
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
wr_needed <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// If writes are allowed this cycle then we can safely clear
|
||||
// any previously requested writes. This simple AFU has only
|
||||
// one write in flight at a time since it is walking a pointer
|
||||
// chain.
|
||||
if (wr_needed)
|
||||
begin
|
||||
wr_needed <= sRx.c1TxAlmFull;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Need a write under two conditions:
|
||||
// - Starting a new walk
|
||||
// - A write response just arrived from a line containing
|
||||
// a next pointer.
|
||||
wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
|
||||
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
|
||||
//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
// Emit write requests to the FIU.
|
||||
//
|
||||
|
||||
// Write header defines the request to the FIU
|
||||
t_ccip_c1_ReqMemHdr wr_hdr;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
|
||||
|
||||
// Write request type
|
||||
//wr_hdr.req_type = 4'h0;
|
||||
|
||||
// Virtual address (MPF virtual addressing is enabled)
|
||||
wr_hdr.address = wr_addr;
|
||||
|
||||
// Let the FIU pick the channel
|
||||
//wr_hdr.vc_sel = 2'h2;
|
||||
|
||||
// Write 1 cache line (64 bytes)
|
||||
//wr_hdr.cl_len = 2'h0;
|
||||
|
||||
// Start of packet is true (single line write)
|
||||
wr_hdr.sop = 1'b1;
|
||||
end
|
||||
|
||||
// Send write requests to the FIU
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset)
|
||||
begin
|
||||
sTx.c1.valid <= 1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
// Generate a write request when needed and the FIU isn't full
|
||||
if (state == STATE_WRITE)
|
||||
begin
|
||||
sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull);
|
||||
if (wr_needed && !sRx.c1TxAlmFull)
|
||||
begin
|
||||
sTx.c1.hdr <= wr_hdr;
|
||||
sTx.c1.data <= t_ccip_clData'(wr_data);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
//
|
||||
// WRITE RESPONSE HANDLING
|
||||
//
|
||||
|
||||
// Apurve: Check if a signal is to be sent to read to start reading in case
|
||||
// write response does not work
|
||||
//
|
||||
// Send data (write requests).
|
||||
//
|
||||
//always_ff @(posedge clk)
|
||||
//begin
|
||||
// if (state == STATE_WRITE)
|
||||
// begin
|
||||
// rd_data <= sRx.c0.data;
|
||||
// end
|
||||
// if (state == STATE_UPDATE)
|
||||
// begin
|
||||
// // Update the write data and put it in the write data to be written
|
||||
// wr_data <= rd_data + 1;
|
||||
// end
|
||||
//end
|
||||
|
||||
endmodule
|
||||
@@ -1,2 +0,0 @@
|
||||
cci_hello.json
|
||||
cci_hello_afu.sv
|
||||
@@ -1,11 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
##
|
||||
## Setup ASE environment using ../rtl/sources.txt.
|
||||
##
|
||||
|
||||
# Absolute path to this script
|
||||
SCRIPT=$(readlink -f "$0")
|
||||
SCRIPT_PATH=$(dirname "$SCRIPT")
|
||||
|
||||
afu_sim_setup --sources="${SCRIPT_PATH}/../rtl/sources.txt" $@
|
||||
@@ -1,41 +0,0 @@
|
||||
include ../../common/sw/common_include.mk
|
||||
|
||||
# Primary test name
|
||||
TEST = cci_hello
|
||||
|
||||
# Build directory
|
||||
OBJDIR = obj
|
||||
CFLAGS += -I./$(OBJDIR)
|
||||
CPPFLAGS += -I./$(OBJDIR)
|
||||
|
||||
# Files and folders
|
||||
SRCS = $(TEST).c
|
||||
OBJS = $(addprefix $(OBJDIR)/,$(patsubst %.c,%.o,$(SRCS)))
|
||||
|
||||
# Targets (build only $(TEST)_ase by default)
|
||||
all: $(TEST) $(TEST)_ase
|
||||
|
||||
# AFU info from JSON file, including AFU UUID
|
||||
AFU_JSON_INFO = $(OBJDIR)/afu_json_info.h
|
||||
|
||||
$(AFU_JSON_INFO): ../hw/rtl/$(TEST).json | objdir
|
||||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
||||
|
||||
$(OBJS): $(AFU_JSON_INFO)
|
||||
|
||||
$(TEST): $(OBJS)
|
||||
$(CC) -o $@ $^ $(LDFLAGS) $(FPGA_LIBS)
|
||||
|
||||
$(TEST)_ase: $(OBJS)
|
||||
$(CC) -o $@ $^ $(LDFLAGS) $(ASE_LIBS)
|
||||
|
||||
$(OBJDIR)/%.o: %.c | objdir
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
clean:
|
||||
rm -rf $(TEST) $(TEST)_ase $(OBJDIR)
|
||||
|
||||
objdir:
|
||||
@mkdir -p $(OBJDIR)
|
||||
|
||||
.PHONY: all clean
|
||||
@@ -1,210 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// Redistributions of source code must retain the above copyright notice, this
|
||||
// list of conditions and the following disclaimer.
|
||||
//
|
||||
// Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// Neither the name of the Intel Corporation nor the names of its contributors
|
||||
// may be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include <opae/fpga.h>
|
||||
|
||||
// State from the AFU's JSON file, extracted using OPAE's afu_json_mgr script
|
||||
#include "afu_json_info.h"
|
||||
|
||||
#define CACHELINE_BYTES 64
|
||||
#define CL(x) ((x) * CACHELINE_BYTES)
|
||||
|
||||
|
||||
//
|
||||
// Search for an accelerator matching the requested UUID and connect to it.
|
||||
//
|
||||
static fpga_handle connect_to_accel(const char *accel_uuid)
|
||||
{
|
||||
fpga_properties filter = NULL;
|
||||
fpga_guid guid;
|
||||
fpga_token accel_token;
|
||||
uint32_t num_matches;
|
||||
fpga_handle accel_handle;
|
||||
fpga_result r;
|
||||
|
||||
// Don't print verbose messages in ASE by default
|
||||
//setenv("ASE_LOG", "0", 0);
|
||||
|
||||
// Set up a filter that will search for an accelerator
|
||||
fpgaGetProperties(NULL, &filter);
|
||||
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
||||
|
||||
// Add the desired UUID to the filter
|
||||
uuid_parse(accel_uuid, guid);
|
||||
fpgaPropertiesSetGUID(filter, guid);
|
||||
|
||||
// Do the search across the available FPGA contexts
|
||||
num_matches = 1;
|
||||
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
|
||||
|
||||
// Not needed anymore
|
||||
fpgaDestroyProperties(&filter);
|
||||
|
||||
if (num_matches < 1)
|
||||
{
|
||||
fprintf(stderr, "Accelerator %s not found!\n", accel_uuid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Open accelerator
|
||||
r = fpgaOpen(accel_token, &accel_handle, 0);
|
||||
assert(FPGA_OK == r);
|
||||
|
||||
// Done with token
|
||||
fpgaDestroyToken(&accel_token);
|
||||
|
||||
return accel_handle;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Allocate a buffer in I/O memory, shared with the FPGA.
|
||||
//
|
||||
static volatile void* alloc_buffer(fpga_handle accel_handle,
|
||||
ssize_t size,
|
||||
uint64_t *wsid,
|
||||
uint64_t *io_addr)
|
||||
{
|
||||
fpga_result r;
|
||||
volatile void* buf;
|
||||
|
||||
r = fpgaPrepareBuffer(accel_handle, size, (void*)&buf, wsid, 0);
|
||||
if (FPGA_OK != r) return NULL;
|
||||
|
||||
// Get the physical address of the buffer in the accelerator
|
||||
r = fpgaGetIOAddress(accel_handle, *wsid, io_addr);
|
||||
assert(FPGA_OK == r);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
fpga_handle accel_handle;
|
||||
volatile char *buf;
|
||||
volatile char *buf_r;
|
||||
uint64_t wsid1;
|
||||
uint64_t wsid2;
|
||||
uint64_t buf_pa;
|
||||
uint64_t ret_buf_pa;
|
||||
uint64_t buf_rpa;
|
||||
uint64_t ret_buf_rpa;
|
||||
fpga_result r;
|
||||
|
||||
// Find and connect to the accelerator
|
||||
accel_handle = connect_to_accel(AFU_ACCEL_UUID);
|
||||
|
||||
// Allocate a single page memory buffer for write
|
||||
buf = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
|
||||
&wsid1, &buf_pa);
|
||||
// Allocate a single page memory buffer for read
|
||||
buf_r = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
|
||||
&wsid2, &buf_rpa);
|
||||
assert(NULL != buf);
|
||||
|
||||
//// Set the low byte of the shared buffer to 0. The FPGA will write
|
||||
//// a non-zero value to it.
|
||||
//buf[0] = 0;
|
||||
|
||||
// Set the low byte of the shared buffer buf_r to 0. The FPGA will read
|
||||
// the values and write to buf address
|
||||
buf[0] = 5;
|
||||
buf_r[0] = 5;
|
||||
|
||||
// Tell the accelerator the address of the buffer using cache line
|
||||
// addresses. The accelerator will respond by writing to the buffer.
|
||||
r = fpgaWriteMMIO64(accel_handle, 0, 0, buf_pa / CL(1));
|
||||
printf("Write address is %08lx\n", buf_pa);
|
||||
printf("Write address div 64 is %08lx\n", buf_pa/ CL(1));
|
||||
assert(FPGA_OK == r);
|
||||
|
||||
// Wait for response from FPGA. Check using fpgaReadMMIO
|
||||
//r = fpgaReadMMIO64(accel_handle, 0, 0, &ret_buf_pa);
|
||||
//printf("Returned write is %08lx\n", ret_buf_pa);
|
||||
//assert(FPGA_OK == r);
|
||||
|
||||
///////////////////// Added to check fpgaRead
|
||||
// Wait for response from FPGA. Check using fpgaReadMMIO
|
||||
r = fpgaReadMMIO64(accel_handle, 0, 5 * sizeof(uint64_t), &ret_buf_rpa);
|
||||
printf("Returned read at 10 is %08lx\n", ret_buf_rpa);
|
||||
assert(FPGA_OK == r);
|
||||
///////////////////////////////////////////////
|
||||
|
||||
|
||||
// Tell the accelerator the address of the buffer using cache line
|
||||
// addresses. The accelerator will read from the buffer.
|
||||
// Write the address to MMIO 1
|
||||
r = fpgaWriteMMIO64(accel_handle, 0, sizeof(uint64_t), buf_rpa / CL(1));
|
||||
printf("Read address is %08lx\n", buf_rpa);
|
||||
printf("Read address div64 is %08lx\n", buf_rpa / CL(1));
|
||||
assert(FPGA_OK == r);
|
||||
|
||||
// Wait for response from FPGA. Check using fpgaReadMMIO
|
||||
//r = fpgaReadMMIO64(accel_handle, 0, sizeof(uint64_t), &ret_buf_rpa);
|
||||
//printf("Returned write is %08lx\n", ret_buf_rpa);
|
||||
//assert(FPGA_OK == r);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Update this
|
||||
// Spin, waiting for the value in memory to change to something non-zero.
|
||||
while (5 == buf[0])
|
||||
{
|
||||
// A well-behaved program would use _mm_pause(), nanosleep() or
|
||||
// equivalent to save power here.
|
||||
};
|
||||
|
||||
// Print the string written by the FPGA
|
||||
printf("%d\n", buf[0]);
|
||||
|
||||
do {
|
||||
//printf("%d\n", buf[0]);
|
||||
} while (10 != buf[0]);
|
||||
|
||||
// Done
|
||||
fpgaReleaseBuffer(accel_handle, wsid1);
|
||||
fpgaReleaseBuffer(accel_handle, wsid2);
|
||||
fpgaClose(accel_handle);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
//
|
||||
// Generated by afu_json_mgr from ../hw/rtl/cci_hello.json
|
||||
//
|
||||
|
||||
#ifndef __AFU_JSON_INFO__
|
||||
#define __AFU_JSON_INFO__
|
||||
|
||||
#define AFU_ACCEL_NAME "cci_hello"
|
||||
#define AFU_ACCEL_UUID "C6AA954A-9B91-4A37-ABC1-1D9F0709DCC3"
|
||||
#define AFU_IMAGE_POWER 0
|
||||
#define AFU_TOP_IFC "ccip_std_afu"
|
||||
|
||||
#endif // __AFU_JSON_INFO__
|
||||
Binary file not shown.
@@ -1,36 +0,0 @@
|
||||
|
||||
ASE_BUILD_DIR=build_ase
|
||||
FPGA_BUILD_DIR=build_fpga
|
||||
|
||||
all: ase fpga
|
||||
|
||||
ase: setup-ase
|
||||
make -C $(ASE_BUILD_DIR)
|
||||
|
||||
fpga: setup-fpga
|
||||
cd $(FPGA_BUILD_DIR) && qsub-synth
|
||||
|
||||
setup-ase: $(ASE_BUILD_DIR)/Makefile
|
||||
|
||||
setup-fpga: $(FPGA_BUILD_DIR)/build/dcp.qpf
|
||||
|
||||
$(ASE_BUILD_DIR)/Makefile:
|
||||
afu_sim_setup --s sources.txt $(ASE_BUILD_DIR)
|
||||
|
||||
$(FPGA_BUILD_DIR)/build/dcp.qpf:
|
||||
afu_synth_setup -s sources.txt $(FPGA_BUILD_DIR)
|
||||
|
||||
run-ase:
|
||||
cd $(ASE_BUILD_DIR) && make sim
|
||||
|
||||
wave:
|
||||
vsim -view $(ASE_BUILD_DIR)/work/vsim.wlf -do wave.do
|
||||
|
||||
run-fpga:
|
||||
# TODO
|
||||
|
||||
clean-ase:
|
||||
rm -rf $(ASE_BUILD_DIR)
|
||||
|
||||
clean-fpga:
|
||||
rm -rf $(FPGA_BUILD_DIR)
|
||||
@@ -1,39 +0,0 @@
|
||||
use the following step to build vortex and run it on fpga on intel cloud server using OPAE.
|
||||
This script is also present at ~/dev/runVortex
|
||||
|
||||
## To configure quartus and opae. Run this after logging in.
|
||||
source /export/fpga/bin/setup-fpga-env fpga-pac-a10
|
||||
#########################
|
||||
## Vortex Run commands ##
|
||||
#########################
|
||||
## Synthesis
|
||||
cd ~/dev/Vortex/driver/hw/
|
||||
# Configure a Quartus build area
|
||||
afu_synth_setup -s sources.txt build_fpga
|
||||
cd build_fpga
|
||||
# Run Quartus in the vLab batch queue
|
||||
qsub-synth
|
||||
# Check if the job is submitted to the queue and running. Status should be R
|
||||
qstat | grep tinebp
|
||||
# Constantly monitoring the job submitted to the queue. Stop this using Ctrl+C
|
||||
watch ‘qstat | grep tinebp’
|
||||
## Executing on FPGA
|
||||
# From the build_fpga directory acquire a fpga node
|
||||
qsub-fpga
|
||||
# Go to the directory whree qsub-synth was run above
|
||||
cd $PBS_O_WORKDIR
|
||||
# Load the image onto an FPGA
|
||||
fpgaconf vortex_afu.gbs
|
||||
# If this says Multiple ports. Then use --bus with fpgaconf. #bus info can be found by fpgainfo port
|
||||
#fpgaconf --bus 0xaf vortex_afu.gbs
|
||||
## Running the Test case
|
||||
cd ../../sw/opae
|
||||
make clean
|
||||
make
|
||||
# For shared library
|
||||
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
|
||||
# Run the program
|
||||
cd ../../tests/basic
|
||||
make clean
|
||||
make
|
||||
./basic
|
||||
@@ -1,48 +0,0 @@
|
||||
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
|
||||
|
||||
// Register all interface signals
|
||||
|
||||
import ccip_if_pkg::*;
|
||||
module ccip_interface_reg(
|
||||
// CCI-P Clocks and Resets
|
||||
input logic pClk, // 400MHz - CC-P clock domain. Primary Clock
|
||||
input logic pck_cp2af_softReset_T0, // CCI-P ACTIVE HIGH Soft Reset
|
||||
input logic [1:0] pck_cp2af_pwrState_T0, // CCI-P AFU Power State
|
||||
input logic pck_cp2af_error_T0, // CCI-P Protocol Error Detected
|
||||
// Interface structures
|
||||
input t_if_ccip_Rx pck_cp2af_sRx_T0, // CCI-P Rx Port
|
||||
input t_if_ccip_Tx pck_af2cp_sTx_T0, // CCI-P Tx Port
|
||||
|
||||
output logic pck_cp2af_softReset_T1,
|
||||
output logic [1:0] pck_cp2af_pwrState_T1,
|
||||
output logic pck_cp2af_error_T1,
|
||||
|
||||
output t_if_ccip_Rx pck_cp2af_sRx_T1,
|
||||
output t_if_ccip_Tx pck_af2cp_sTx_T1
|
||||
|
||||
);
|
||||
(* preserve *) logic pck_cp2af_softReset_T0_q;
|
||||
(* preserve *) logic [1:0] pck_cp2af_pwrState_T0_q;
|
||||
(* preserve *) logic pck_cp2af_error_T0_q;
|
||||
(* preserve *) t_if_ccip_Rx pck_cp2af_sRx_T0_q;
|
||||
(* preserve *) t_if_ccip_Tx pck_af2cp_sTx_T0_q;
|
||||
|
||||
always@(posedge pClk)
|
||||
begin
|
||||
pck_cp2af_softReset_T0_q <= pck_cp2af_softReset_T0;
|
||||
pck_cp2af_pwrState_T0_q <= pck_cp2af_pwrState_T0;
|
||||
pck_cp2af_error_T0_q <= pck_cp2af_error_T0;
|
||||
pck_cp2af_sRx_T0_q <= pck_cp2af_sRx_T0;
|
||||
pck_af2cp_sTx_T0_q <= pck_af2cp_sTx_T0;
|
||||
end
|
||||
|
||||
always_comb
|
||||
begin
|
||||
pck_cp2af_softReset_T1 = pck_cp2af_softReset_T0_q;
|
||||
pck_cp2af_pwrState_T1 = pck_cp2af_pwrState_T0_q;
|
||||
pck_cp2af_error_T1 = pck_cp2af_error_T0_q;
|
||||
pck_cp2af_sRx_T1 = pck_cp2af_sRx_T0_q;
|
||||
pck_af2cp_sTx_T1 = pck_af2cp_sTx_T0_q;
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,172 +0,0 @@
|
||||
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
|
||||
|
||||
// Top Level Vortex Driver
|
||||
|
||||
// To be done:
|
||||
// Check how to run this with OPAE. Looks like setup issue
|
||||
|
||||
|
||||
`include "platform_if.vh"
|
||||
|
||||
import local_mem_cfg_pkg::*;
|
||||
|
||||
module ccip_std_afu
|
||||
#(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
)
|
||||
(
|
||||
// CCI-P Clocks and Resets
|
||||
input logic pClk, // Primary CCI-P interface clock.
|
||||
input logic pClkDiv2, // Aligned, pClk divided by 2.
|
||||
input logic pClkDiv4, // Aligned, pClk divided by 4.
|
||||
input logic uClk_usr, // User clock domain. Refer to clock programming guide.
|
||||
input logic uClk_usrDiv2, // Aligned, user clock divided by 2.
|
||||
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
|
||||
|
||||
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
|
||||
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
|
||||
|
||||
// CCI-P structures
|
||||
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
|
||||
output t_if_ccip_Tx pck_af2cp_sTx, // CCI-P Tx Port
|
||||
|
||||
// Local memory interface
|
||||
avalon_mem_if.to_fiu local_mem[NUM_LOCAL_MEM_BANKS]
|
||||
);
|
||||
|
||||
// ====================================================================
|
||||
// Pick the proper clk and reset, as chosen by the AFU's JSON file
|
||||
// ====================================================================
|
||||
|
||||
// The platform may transform the CCI-P clock from pClk to a clock
|
||||
// chosen in the AFU's JSON file.
|
||||
logic clk;
|
||||
assign clk = `PLATFORM_PARAM_CCI_P_CLOCK;
|
||||
|
||||
logic reset;
|
||||
assign reset = `PLATFORM_PARAM_CCI_P_RESET;
|
||||
|
||||
|
||||
// ====================================================================
|
||||
// Register signals at interface before consuming them
|
||||
// ====================================================================
|
||||
|
||||
(* noprune *) logic [1:0] cp2af_pwrState_T1;
|
||||
(* noprune *) logic cp2af_error_T1;
|
||||
|
||||
logic reset_T1;
|
||||
t_if_ccip_Rx cp2af_sRx_T1;
|
||||
t_if_ccip_Tx af2cp_sTx_T0;
|
||||
|
||||
ccip_interface_reg inst_green_ccip_interface_reg
|
||||
(
|
||||
.pClk (clk),
|
||||
.pck_cp2af_softReset_T0 (reset),
|
||||
.pck_cp2af_pwrState_T0 (pck_cp2af_pwrState),
|
||||
.pck_cp2af_error_T0 (pck_cp2af_error),
|
||||
.pck_cp2af_sRx_T0 (pck_cp2af_sRx),
|
||||
.pck_af2cp_sTx_T0 (af2cp_sTx_T0),
|
||||
|
||||
.pck_cp2af_softReset_T1 (reset_T1),
|
||||
.pck_cp2af_pwrState_T1 (cp2af_pwrState_T1),
|
||||
.pck_cp2af_error_T1 (cp2af_error_T1),
|
||||
.pck_cp2af_sRx_T1 (cp2af_sRx_T1),
|
||||
.pck_af2cp_sTx_T1 (pck_af2cp_sTx)
|
||||
);
|
||||
|
||||
|
||||
// ====================================================================
|
||||
// User AFU goes here
|
||||
// ====================================================================
|
||||
|
||||
//
|
||||
// vortex_afu depends on CCI-P and local memory being in the same
|
||||
// clock domain. This is accomplished by choosing a common clock
|
||||
// in the AFU's JSON description. The platform instantiates clock-
|
||||
// crossing shims automatically, as needed.
|
||||
//
|
||||
|
||||
//
|
||||
// Memory banks are used very simply here. Only bank is active at
|
||||
// a time, selected by mem_bank_select. mem_bank_select is set
|
||||
// by a CSR from the host.
|
||||
//
|
||||
t_local_mem_byte_mask avs_byteenable;
|
||||
logic avs_waitrequest;
|
||||
t_local_mem_data avs_readdata;
|
||||
logic avs_readdatavalid;
|
||||
t_local_mem_burst_cnt avs_burstcount;
|
||||
t_local_mem_data avs_writedata;
|
||||
t_local_mem_addr avs_address;
|
||||
logic avs_write;
|
||||
logic avs_read;
|
||||
|
||||
// choose which memory bank to test
|
||||
logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select;
|
||||
|
||||
vortex_afu
|
||||
#(
|
||||
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
|
||||
)
|
||||
vortex_afu_inst
|
||||
(
|
||||
.clk (clk),
|
||||
.SoftReset (reset_T1),
|
||||
|
||||
.avs_writedata (avs_writedata),
|
||||
.avs_readdata (avs_readdata),
|
||||
.avs_address (avs_address),
|
||||
.avs_waitrequest (avs_waitrequest),
|
||||
.avs_write (avs_write),
|
||||
.avs_read (avs_read),
|
||||
.avs_byteenable (avs_byteenable),
|
||||
.avs_burstcount (avs_burstcount),
|
||||
.avs_readdatavalid (avs_readdatavalid),
|
||||
.mem_bank_select (mem_bank_select),
|
||||
|
||||
.cp2af_sRxPort (cp2af_sRx_T1),
|
||||
.af2cp_sTxPort (af2cp_sTx_T0)
|
||||
);
|
||||
|
||||
//
|
||||
// Export the local memory interface signals as vectors so that bank
|
||||
// selection can use array syntax.
|
||||
//
|
||||
logic avs_waitrequest_v[NUM_LOCAL_MEM_BANKS];
|
||||
t_local_mem_data avs_readdata_v[NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_readdatavalid_v[NUM_LOCAL_MEM_BANKS];
|
||||
|
||||
genvar b;
|
||||
generate
|
||||
for (b = 0; b < NUM_LOCAL_MEM_BANKS; b = b + 1)
|
||||
begin : lmb
|
||||
always_comb
|
||||
begin
|
||||
// Local memory to AFU signals
|
||||
avs_waitrequest_v[b] = local_mem[b].waitrequest;
|
||||
avs_readdata_v[b] = local_mem[b].readdata;
|
||||
avs_readdatavalid_v[b] = local_mem[b].readdatavalid;
|
||||
|
||||
// Replicate address and write data to all banks. Only
|
||||
// the request signals have to be bank-specific.
|
||||
local_mem[b].burstcount = avs_burstcount;
|
||||
local_mem[b].writedata = avs_writedata;
|
||||
local_mem[b].address = avs_address;
|
||||
local_mem[b].byteenable = avs_byteenable;
|
||||
|
||||
// Request a write to this bank?
|
||||
local_mem[b].write = avs_write &&
|
||||
($bits(mem_bank_select)'(b) == mem_bank_select);
|
||||
|
||||
// Request a read from this bank?
|
||||
local_mem[b].read = avs_read &&
|
||||
($bits(mem_bank_select)'(b) == mem_bank_select);
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign avs_waitrequest = avs_waitrequest_v[mem_bank_select];
|
||||
assign avs_readdata = avs_readdata_v[mem_bank_select];
|
||||
assign avs_readdatavalid = avs_readdatavalid_v[mem_bank_select];
|
||||
|
||||
endmodule
|
||||
@@ -1,120 +0,0 @@
|
||||
vortex_afu.json
|
||||
|
||||
+define+GLOBAL_BLOCK_SIZE_BYTES=64
|
||||
|
||||
+incdir+.
|
||||
+incdir+../../rtl
|
||||
+incdir+../../rtl/shared_memory
|
||||
+incdir+../../rtl/cache
|
||||
+incdir+../../rtl/VX_cache
|
||||
+incdir+../../rtl/interfaces
|
||||
+incdir+../../rtl/pipe_regs
|
||||
+incdir+../../rtl/compat
|
||||
|
||||
../../rtl/VX_define_synth.v
|
||||
../../rtl/VX_define.v
|
||||
../../rtl/VX_cache/VX_cache_config.v
|
||||
../../rtl/Vortex_SOC.v
|
||||
../../rtl/Vortex_Cluster.v
|
||||
../../rtl/Vortex.v
|
||||
../../rtl/VX_front_end.v
|
||||
../../rtl/VX_back_end.v
|
||||
../../rtl/VX_fetch.v
|
||||
../../rtl/VX_scheduler.v
|
||||
../../rtl/VX_execute_unit.v
|
||||
../../rtl/VX_warp.v
|
||||
../../rtl/VX_icache_stage.v
|
||||
../../rtl/VX_gpr_wrapper.v
|
||||
../../rtl/byte_enabled_simple_dual_port_ram.v
|
||||
../../rtl/VX_gpgpu_inst.v
|
||||
../../rtl/VX_writeback.v
|
||||
../../rtl/VX_countones.v
|
||||
../../rtl/VX_csr_handler.v
|
||||
../../rtl/VX_csr_pipe.v
|
||||
../../rtl/VX_generic_queue_ll.v
|
||||
../../rtl/VX_warp_scheduler.v
|
||||
../../rtl/VX_priority_encoder.v
|
||||
../../rtl/VX_generic_queue.v
|
||||
../../rtl/pipe_regs/VX_f_d_reg.v
|
||||
../../rtl/pipe_regs/VX_i_d_reg.v
|
||||
../../rtl/pipe_regs/VX_d_e_reg.v
|
||||
../../rtl/VX_gpr.v
|
||||
../../rtl/VX_gpr_stage.v
|
||||
../../rtl/VX_dmem_controller.v
|
||||
../../rtl/VX_alu.v
|
||||
../../rtl/VX_generic_stack.v
|
||||
../../rtl/VX_generic_priority_encoder.v
|
||||
../../rtl/VX_csr_data.v
|
||||
../../rtl/VX_lsu.v
|
||||
../../rtl/VX_decode.v
|
||||
../../rtl/VX_inst_multiplex.v
|
||||
../../rtl/VX_csr_wrapper.v
|
||||
../../rtl/VX_priority_encoder_w_mask.v
|
||||
../../rtl/VX_generic_register.v
|
||||
../../rtl/VX_lsu_addr_gen.v
|
||||
../../rtl/compat/VX_mult.v
|
||||
../../rtl/compat/VX_divide.v
|
||||
../../rtl/VX_cache/VX_snp_fwd_arb.v
|
||||
../../rtl/VX_cache/VX_cache_dram_req_arb.v
|
||||
../../rtl/VX_cache/VX_cache_dfq_queue.v
|
||||
../../rtl/VX_cache/VX_cache_wb_sel_merge.v
|
||||
../../rtl/VX_cache/VX_mrv_queue.v
|
||||
../../rtl/VX_cache/VX_dcache_llv_resp_bank_sel.v
|
||||
../../rtl/VX_cache/VX_tag_data_access.v
|
||||
../../rtl/VX_cache/VX_cache.v
|
||||
../../rtl/VX_cache/VX_cache_core_req_bank_sel.v
|
||||
../../rtl/VX_cache/VX_cache_req_queue.v
|
||||
../../rtl/VX_cache/VX_bank.v
|
||||
../../rtl/VX_cache/VX_cache_miss_resrv.v
|
||||
../../rtl/VX_cache/VX_fill_invalidator.v
|
||||
../../rtl/VX_cache/VX_tag_data_structure.v
|
||||
../../rtl/VX_cache/VX_prefetcher.v
|
||||
../../rtl/cache/VX_generic_pe.v
|
||||
../../rtl/cache/cache_set.v
|
||||
../../rtl/cache/VX_d_cache.v
|
||||
../../rtl/cache/VX_Cache_Bank.v
|
||||
../../rtl/cache/VX_cache_data_per_index.v
|
||||
../../rtl/cache/VX_d_cache_encapsulate.v
|
||||
../../rtl/cache/VX_cache_bank_valid.v
|
||||
../../rtl/cache/VX_cache_data.v
|
||||
../../rtl/shared_memory/VX_shared_memory_block.v
|
||||
../../rtl/shared_memory/VX_priority_encoder_sm.v
|
||||
../../rtl/shared_memory/VX_shared_memory.v
|
||||
../../rtl/shared_memory/VX_bank_valids.v
|
||||
../../rtl/interfaces/VX_exec_unit_req_inter.v
|
||||
../../rtl/interfaces/VX_branch_response_inter.v
|
||||
../../rtl/interfaces/VX_inst_meta_inter.v
|
||||
../../rtl/interfaces/VX_join_inter.v
|
||||
../../rtl/interfaces/VX_icache_response_inter.v
|
||||
../../rtl/interfaces/VX_gpr_wspawn_inter.v
|
||||
../../rtl/interfaces/VX_inst_exec_wb_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_dram_req_inter.v
|
||||
../../rtl/interfaces/VX_csr_req_inter.v
|
||||
../../rtl/interfaces/VX_icache_request_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_res_inter.v
|
||||
../../rtl/interfaces/VX_frE_to_bckE_req_inter.v
|
||||
../../rtl/interfaces/VX_dram_req_rsp_inter.v
|
||||
../../rtl/interfaces/VX_dcache_request_inter.v
|
||||
../../rtl/interfaces/VX_gpr_data_inter.v
|
||||
../../rtl/interfaces/VX_dcache_response_inter.v
|
||||
../../rtl/interfaces/VX_csr_wb_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_req_inter.v
|
||||
../../rtl/interfaces/VX_lsu_req_inter.v
|
||||
../../rtl/interfaces/VX_gpu_snp_req_rsp.v
|
||||
../../rtl/interfaces/VX_mw_wb_inter.v
|
||||
../../rtl/interfaces/VX_gpr_jal_inter.v
|
||||
../../rtl/interfaces/VX_gpu_inst_req_inter.v
|
||||
../../rtl/interfaces/VX_wstall_inter.v
|
||||
../../rtl/interfaces/VX_wb_inter.v
|
||||
../../rtl/interfaces/VX_gpr_clone_inter.v
|
||||
../../rtl/interfaces/VX_gpr_read_inter.v
|
||||
../../rtl/interfaces/VX_mem_req_inter.v
|
||||
../../rtl/interfaces/VX_jal_response_inter.v
|
||||
../../rtl/interfaces/VX_warp_ctl_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_snp_req_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_dram_res_inter.v
|
||||
../../rtl/interfaces/VX_inst_mem_wb_inter.v
|
||||
|
||||
ccip_interface_reg.sv
|
||||
ccip_std_afu.sv
|
||||
vortex_afu.sv
|
||||
@@ -1,49 +0,0 @@
|
||||
{
|
||||
"version": 1,
|
||||
"afu-image": {
|
||||
"power": 0,
|
||||
"clock-frequency-high": "auto",
|
||||
"clock-frequency-low": "auto",
|
||||
|
||||
"mmio-csr-cmd": 10,
|
||||
"mmio-csr-status": 12,
|
||||
"mmio-csr-io-addr": 14,
|
||||
"mmio-csr-mem-addr": 16,
|
||||
"mmio-csr-data-size": 18,
|
||||
|
||||
"cmd-type-read": 1,
|
||||
"cmd-type-write": 2,
|
||||
"cmd-type-run": 3,
|
||||
"cmd-type-clflush": 4,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
"class": "ccip_std_afu_avalon_mm",
|
||||
"module-ports" :
|
||||
[
|
||||
{
|
||||
"class": "cci-p",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"class": "local-memory",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"accelerator-clusters":
|
||||
[
|
||||
{
|
||||
"name": "vortex_afu",
|
||||
"total-contexts": 1,
|
||||
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,576 +0,0 @@
|
||||
// Interface between CSR and FSM
|
||||
// All the MMIOs read/write are done from CSR and passed to the FSM for state transitions
|
||||
|
||||
// To be done:
|
||||
// Change address size to buffer's address size and data size based on IO address size. Check from hello_world
|
||||
|
||||
`include "platform_if.vh"
|
||||
import local_mem_cfg_pkg::*;
|
||||
`include "afu_json_info.vh"
|
||||
|
||||
module vortex_afu #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
) (
|
||||
// global signals
|
||||
input clk,
|
||||
input SoftReset,
|
||||
|
||||
// IF signals between CCI and AFU
|
||||
input t_if_ccip_Rx cp2af_sRxPort,
|
||||
output t_if_ccip_Tx af2cp_sTxPort,
|
||||
|
||||
// Avalon signals for local memory access
|
||||
output t_local_mem_data avs_writedata,
|
||||
input t_local_mem_data avs_readdata,
|
||||
output t_local_mem_addr avs_address,
|
||||
input logic avs_waitrequest,
|
||||
output logic avs_write,
|
||||
output logic avs_read,
|
||||
output t_local_mem_byte_mask avs_byteenable,
|
||||
output t_local_mem_burst_cnt avs_burstcount,
|
||||
input avs_readdatavalid,
|
||||
|
||||
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
|
||||
);
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 16;
|
||||
|
||||
localparam VX_SNOOP_DELAY = 300;
|
||||
localparam VX_SNOOP_LEVELS = 2;
|
||||
|
||||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
||||
|
||||
localparam CMD_TYPE_READ = `AFU_IMAGE_CMD_TYPE_READ;
|
||||
localparam CMD_TYPE_WRITE = `AFU_IMAGE_CMD_TYPE_WRITE;
|
||||
localparam CMD_TYPE_RUN = `AFU_IMAGE_CMD_TYPE_RUN;
|
||||
localparam CMD_TYPE_CLFLUSH = `AFU_IMAGE_CMD_TYPE_CLFLUSH;
|
||||
|
||||
localparam MMIO_CSR_CMD = `AFU_IMAGE_MMIO_CSR_CMD;
|
||||
localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS;
|
||||
localparam MMIO_CSR_IO_ADDR = `AFU_IMAGE_MMIO_CSR_IO_ADDR;
|
||||
localparam MMIO_CSR_MEM_ADDR = `AFU_IMAGE_MMIO_CSR_MEM_ADDR;
|
||||
localparam MMIO_CSR_DATA_SIZE = `AFU_IMAGE_MMIO_CSR_DATA_SIZE;
|
||||
|
||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
typedef enum logic[3:0] {
|
||||
STATE_IDLE,
|
||||
STATE_READ,
|
||||
STATE_WRITE,
|
||||
STATE_RUN,
|
||||
STATE_CLFLUSH
|
||||
} state_t;
|
||||
|
||||
state_t state;
|
||||
|
||||
// Vortex signals /////////////////////////////////////////////////////////////
|
||||
|
||||
logic vx_dram_req_read;
|
||||
logic vx_dram_req_write;
|
||||
logic [31:0] vx_dram_req_addr;
|
||||
logic [31:0] vx_dram_req_data[15:0];
|
||||
logic vx_dram_req_delay;
|
||||
|
||||
logic vx_dram_fill_accept;
|
||||
logic vx_dram_fill_rsp;
|
||||
logic [31:0] vx_dram_fill_rsp_addr;
|
||||
logic [31:0] vx_dram_fill_rsp_data[15:0];
|
||||
|
||||
logic vx_snp_req;
|
||||
logic [31:0] vx_snp_req_addr;
|
||||
logic vx_snp_req_delay;
|
||||
|
||||
logic vx_ebreak;
|
||||
|
||||
// AVS Queues /////////////////////////////////////////////////////////////////
|
||||
|
||||
logic avs_raq_push;
|
||||
t_local_mem_addr avs_raq_din;
|
||||
logic avs_raq_pop;
|
||||
t_local_mem_addr avs_raq_dout;
|
||||
logic avs_raq_empty;
|
||||
logic avs_raq_full;
|
||||
|
||||
logic avs_rdq_push;
|
||||
t_local_mem_data avs_rdq_din;
|
||||
logic avs_rdq_pop;
|
||||
t_local_mem_data avs_rdq_dout;
|
||||
logic avs_rdq_empty;
|
||||
logic avs_rdq_full;
|
||||
|
||||
// CSR variables //////////////////////////////////////////////////////////////
|
||||
|
||||
logic [2:0] csr_cmd;
|
||||
t_ccip_clAddr csr_io_addr;
|
||||
t_local_mem_addr csr_mem_addr;
|
||||
logic [31:0] csr_data_size;
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_c0_ReqMmioHdr mmioHdr;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
||||
end
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
af2cp_sTxPort.c2.hdr <= 0;
|
||||
af2cp_sTxPort.c2.data <= 0;
|
||||
af2cp_sTxPort.c2.mmioRdValid <= 0;
|
||||
csr_cmd <= 0;
|
||||
csr_io_addr <= 0;
|
||||
csr_mem_addr <= 0;
|
||||
csr_data_size <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
csr_cmd <= 0;
|
||||
af2cp_sTxPort.c2.mmioRdValid <= 0;
|
||||
|
||||
// serve MMIO write request
|
||||
if (cp2af_sRxPort.c0.mmioWrValid)
|
||||
begin
|
||||
case (mmioHdr.address)
|
||||
MMIO_CSR_IO_ADDR: begin
|
||||
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6);
|
||||
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6));
|
||||
end
|
||||
MMIO_CSR_MEM_ADDR: begin
|
||||
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6);
|
||||
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6));
|
||||
end
|
||||
MMIO_CSR_DATA_SIZE: begin
|
||||
csr_data_size <= $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6);
|
||||
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6));
|
||||
end
|
||||
MMIO_CSR_CMD: begin
|
||||
csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data);
|
||||
$display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data));
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// serve MMIO read requests
|
||||
if (cp2af_sRxPort.c0.mmioRdValid)
|
||||
begin
|
||||
af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID
|
||||
case (mmioHdr.address)
|
||||
// AFU header
|
||||
16'h0000: af2cp_sTxPort.c2.data <= {
|
||||
4'b0001, // Feature type = AFU
|
||||
8'b0, // reserved
|
||||
4'b0, // afu minor revision = 0
|
||||
7'b0, // reserved
|
||||
1'b1, // end of DFH list = 1
|
||||
24'b0, // next DFH offset = 0
|
||||
4'b0, // afu major revision = 0
|
||||
12'b0 // feature ID = 0
|
||||
};
|
||||
AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low
|
||||
AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi
|
||||
16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU
|
||||
16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved
|
||||
MMIO_CSR_STATUS: begin
|
||||
if (state != af2cp_sTxPort.c2.data)
|
||||
$display("%t: STATUS: state=%0d", $time, state);
|
||||
af2cp_sTxPort.c2.data <= state;
|
||||
end
|
||||
default: af2cp_sTxPort.c2.data <= 64'h0;
|
||||
endcase
|
||||
af2cp_sTxPort.c2.mmioRdValid <= 1; // post response
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// COMMAND FSM ////////////////////////////////////////////////////////////////
|
||||
|
||||
logic [31:0] cci_write_ctr;
|
||||
logic [31:0] avs_read_ctr;
|
||||
logic [31:0] avs_write_ctr;
|
||||
logic [31:0] vx_snoop_ctr;
|
||||
logic [9:0] vx_snoop_delay;
|
||||
logic vx_reset;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
vx_reset <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
vx_reset <= 0;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
case (csr_cmd)
|
||||
CMD_TYPE_READ: begin
|
||||
$display("%t: STATE READ: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_READ;
|
||||
end
|
||||
CMD_TYPE_WRITE: begin
|
||||
$display("%t: STATE WRITE: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_WRITE;
|
||||
end
|
||||
CMD_TYPE_RUN: begin
|
||||
$display("%t: STATE START", $time);
|
||||
vx_reset <= 1;
|
||||
state <= STATE_RUN;
|
||||
end
|
||||
CMD_TYPE_CLFLUSH: begin
|
||||
$display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_CLFLUSH;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
STATE_READ: begin
|
||||
if (cci_write_ctr >= csr_data_size)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_WRITE: begin
|
||||
if (avs_write_ctr >= csr_data_size)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_RUN: begin
|
||||
if (vx_ebreak)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_CLFLUSH: begin
|
||||
if (vx_snoop_delay >= VX_SNOOP_DELAY)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// AVS Controller /////////////////////////////////////////////////////////////
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
mem_bank_select <= 0;
|
||||
avs_burstcount <= 1;
|
||||
avs_byteenable <= 64'hffffffffffffffff;
|
||||
avs_address <= 0;
|
||||
avs_writedata <= 0;
|
||||
avs_read <= 0;
|
||||
avs_write <= 0;
|
||||
avs_read_ctr <= 0;
|
||||
avs_write_ctr <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
avs_read <= 0;
|
||||
avs_write <= 0;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
avs_read_ctr <= 0;
|
||||
avs_write_ctr <= 0;
|
||||
end
|
||||
|
||||
STATE_READ: begin
|
||||
if (!avs_raq_full
|
||||
&& !avs_rdq_full
|
||||
&& !avs_waitrequest
|
||||
&& avs_read_ctr < csr_data_size)
|
||||
begin
|
||||
avs_address <= csr_mem_addr + avs_read_ctr;
|
||||
avs_read <= 1;
|
||||
avs_read_ctr <= avs_read_ctr + 1;
|
||||
$display("%t: AVS Rd Req: addr=%h", $time, csr_mem_addr + avs_read_ctr);
|
||||
end
|
||||
end
|
||||
|
||||
STATE_WRITE: begin
|
||||
if (cp2af_sRxPort.c0.rspValid
|
||||
&& avs_write_ctr < csr_data_size)
|
||||
begin
|
||||
avs_writedata <= cp2af_sRxPort.c0.data;
|
||||
avs_address <= csr_mem_addr + avs_write_ctr;
|
||||
avs_write <= 1;
|
||||
avs_write_ctr <= avs_write_ctr + 1;
|
||||
$display("%t: AVS Wr Req: addr=%h (%0d/%0d)", $time, csr_mem_addr + avs_write_ctr, avs_write_ctr + 1, csr_data_size);
|
||||
end
|
||||
end
|
||||
|
||||
STATE_RUN, STATE_CLFLUSH: begin
|
||||
if (vx_dram_req_read
|
||||
&& !vx_dram_req_delay)
|
||||
begin
|
||||
avs_address <= (vx_dram_req_addr >> 6);
|
||||
avs_read <= 1;
|
||||
$display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr >> 6);
|
||||
end
|
||||
|
||||
if (vx_dram_req_write
|
||||
&& !vx_dram_req_delay)
|
||||
begin
|
||||
avs_writedata <= {>>{vx_dram_req_data}};
|
||||
avs_address <= (vx_dram_req_addr >> 6);
|
||||
avs_write <= 1;
|
||||
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr >> 6);
|
||||
end
|
||||
end
|
||||
endcase
|
||||
|
||||
if (avs_readdatavalid)
|
||||
begin
|
||||
$display("%t: AVS Rd Rsp", $time);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Vortex DRAM requests stalling
|
||||
|
||||
logic vortex_enabled;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||
vx_dram_req_delay = !vortex_enabled || avs_waitrequest || avs_raq_full || avs_rdq_full;
|
||||
end
|
||||
|
||||
// Vortex DRAM fill response
|
||||
|
||||
always_comb
|
||||
begin
|
||||
vx_dram_fill_rsp = vortex_enabled && !avs_rdq_empty && vx_dram_fill_accept;
|
||||
vx_dram_fill_rsp_addr = (avs_raq_dout << 6);
|
||||
{>>{vx_dram_fill_rsp_data}} = avs_rdq_dout;
|
||||
end
|
||||
|
||||
// AVS address read request queue /////////////////////////////////////////////
|
||||
|
||||
logic cci_write_req;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
avs_raq_pop = vx_dram_fill_rsp || cci_write_req;
|
||||
avs_raq_din = avs_address;
|
||||
avs_raq_push = avs_read;
|
||||
end
|
||||
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW($bits(t_local_mem_addr)),
|
||||
.SIZE(AVS_RD_QUEUE_SIZE)
|
||||
) vx_rd_addr_queue (
|
||||
.clk (clk),
|
||||
.reset (SoftReset),
|
||||
.push (avs_raq_push),
|
||||
.in_data (avs_raq_din),
|
||||
.pop (avs_raq_pop),
|
||||
.out_data (avs_raq_dout),
|
||||
.empty (avs_raq_empty),
|
||||
.full (avs_raq_full)
|
||||
);
|
||||
|
||||
// AVS data read response queue ///////////////////////////////////////////////
|
||||
|
||||
always_comb
|
||||
begin
|
||||
avs_rdq_pop = avs_raq_pop;
|
||||
avs_rdq_din = avs_readdata;
|
||||
avs_rdq_push = avs_readdatavalid;
|
||||
end
|
||||
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW($bits(t_local_mem_data)),
|
||||
.SIZE(AVS_RD_QUEUE_SIZE)
|
||||
) vx_rd_data_queue (
|
||||
.clk (clk),
|
||||
.reset (SoftReset),
|
||||
.push (avs_rdq_push),
|
||||
.in_data (avs_rdq_din),
|
||||
.pop (avs_rdq_pop),
|
||||
.out_data (avs_rdq_dout),
|
||||
.empty (avs_rdq_empty),
|
||||
.full (avs_rdq_full)
|
||||
);
|
||||
|
||||
// CCI Read Request ///////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_c0_ReqMemHdr rd_hdr;
|
||||
|
||||
logic cci_read_pending;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
|
||||
rd_hdr.address = csr_io_addr + avs_write_ctr;
|
||||
end
|
||||
|
||||
// Send read requests to CCI
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
af2cp_sTxPort.c0.hdr <= 0;
|
||||
af2cp_sTxPort.c0.valid <= 0;
|
||||
cci_read_pending <= 0;
|
||||
end
|
||||
else begin
|
||||
af2cp_sTxPort.c0.valid <= 0;
|
||||
|
||||
if (STATE_WRITE == state
|
||||
&& !cp2af_sRxPort.c0TxAlmFull // ensure read queue not full
|
||||
&& !avs_waitrequest // ensure AVS write queue not full
|
||||
&& !cci_read_pending // ensure no read pending
|
||||
&& avs_write_ctr < csr_data_size) // ensure not done
|
||||
begin
|
||||
af2cp_sTxPort.c0.hdr <= rd_hdr;
|
||||
af2cp_sTxPort.c0.valid <= 1;
|
||||
cci_read_pending <= 1;
|
||||
$display("%t: CCI Rd Req: addr=%h", $time, rd_hdr.address);
|
||||
end
|
||||
|
||||
if (cci_read_pending
|
||||
&& cp2af_sRxPort.c0.rspValid)
|
||||
begin
|
||||
$display("%t: CCI Rd Rsp", $time);
|
||||
cci_read_pending <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// CCI Write Request //////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_c1_ReqMemHdr wr_hdr;
|
||||
|
||||
logic cci_write_pending;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
cci_write_req = (STATE_READ == state)
|
||||
&& !avs_rdq_empty
|
||||
&& !cp2af_sRxPort.c1TxAlmFull
|
||||
&& !cci_write_pending
|
||||
&& cci_write_ctr < csr_data_size;
|
||||
|
||||
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
|
||||
wr_hdr.address = csr_io_addr + cci_write_ctr;
|
||||
wr_hdr.sop = 1; // single line write mode
|
||||
end
|
||||
|
||||
// Send write requests to CCI
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
af2cp_sTxPort.c1.hdr <= 0;
|
||||
af2cp_sTxPort.c1.data <= 0;
|
||||
af2cp_sTxPort.c1.valid <= 0;
|
||||
cci_write_ctr <= 0;
|
||||
cci_write_pending <= 0;
|
||||
end
|
||||
else begin
|
||||
af2cp_sTxPort.c1.valid <= 0;
|
||||
|
||||
if (STATE_IDLE == state)
|
||||
begin
|
||||
cci_write_ctr <= 0;
|
||||
end
|
||||
|
||||
if (cci_write_req)
|
||||
begin
|
||||
af2cp_sTxPort.c1.hdr <= wr_hdr;
|
||||
af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_rdq_dout);
|
||||
af2cp_sTxPort.c1.valid <= 1;
|
||||
cci_write_pending <= 1;
|
||||
$display("%t: CCI Wr Req: addr=%h", $time, wr_hdr.address);
|
||||
end
|
||||
|
||||
if (cci_write_pending
|
||||
&& cp2af_sRxPort.c1.rspValid)
|
||||
begin
|
||||
cci_write_ctr <= cci_write_ctr + 1;
|
||||
cci_write_pending <= 0;
|
||||
$display("%t: CCI Wr Rsp (%0d/%0d)", $time, cci_write_ctr + 1, csr_data_size);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Vortex cache snooping //////////////////////////////////////////////////////
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
vx_snp_req <= 0;
|
||||
vx_snoop_ctr <= 0;
|
||||
vx_snoop_delay <= 0;
|
||||
end
|
||||
else begin
|
||||
if (STATE_IDLE == state)
|
||||
begin
|
||||
vx_snoop_ctr <= 0;
|
||||
vx_snoop_delay <= 0;
|
||||
end
|
||||
|
||||
vx_snp_req <= 0;
|
||||
|
||||
if ((STATE_CLFLUSH == state)
|
||||
&& vx_snoop_ctr < csr_data_size
|
||||
&& !vx_snp_req_delay)
|
||||
begin
|
||||
vx_snp_req_addr <= (csr_mem_addr + vx_snoop_ctr) << 6;
|
||||
vx_snp_req <= 1;
|
||||
vx_snoop_ctr <= vx_snoop_ctr + 1;
|
||||
end
|
||||
|
||||
if (vx_snoop_ctr == csr_data_size)
|
||||
begin
|
||||
vx_snoop_delay <= vx_snoop_delay + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Vortex binding /////////////////////////////////////////////////////////////
|
||||
|
||||
Vortex_SOC #() vx_soc (
|
||||
.clk (clk),
|
||||
.reset (SoftReset || vx_reset),
|
||||
|
||||
// DRAM Req
|
||||
.out_dram_req_write (vx_dram_req_write),
|
||||
.out_dram_req_read (vx_dram_req_read),
|
||||
.out_dram_req_addr (vx_dram_req_addr),
|
||||
.out_dram_req_data (vx_dram_req_data),
|
||||
.out_dram_req_delay (vx_dram_req_delay),
|
||||
|
||||
// DRAM Rsp
|
||||
.out_dram_fill_accept (vx_dram_fill_accept),
|
||||
.out_dram_fill_rsp (vx_dram_fill_rsp),
|
||||
.out_dram_fill_rsp_addr (vx_dram_fill_rsp_addr),
|
||||
.out_dram_fill_rsp_data (vx_dram_fill_rsp_data),
|
||||
|
||||
// Cache Snooping Req
|
||||
.llc_snp_req (vx_snp_req),
|
||||
.llc_snp_req_addr (vx_snp_req_addr),
|
||||
.llc_snp_req_delay (vx_snp_req_delay),
|
||||
|
||||
// program exit signal
|
||||
.out_ebreak (vx_ebreak)
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -1,69 +0,0 @@
|
||||
onerror {resume}
|
||||
quietly WaveActivateNextPane {} 0
|
||||
add wave -noupdate -label clk /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/clk
|
||||
add wave -noupdate -label reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/SoftReset
|
||||
add wave -noupdate -label state /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/state
|
||||
add wave -noupdate -label cci_write_pending /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_pending
|
||||
add wave -noupdate -label cci_write_ctr -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_ctr
|
||||
add wave -noupdate -label csr_data_size -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/csr_data_size
|
||||
add wave -noupdate -label avs_read_ctr -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_read_ctr
|
||||
add wave -noupdate -label avs_waitrequest /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_waitrequest
|
||||
add wave -noupdate -label avs_address -radix hexadecimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_address
|
||||
add wave -noupdate -label avs_readdata -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_readdata
|
||||
add wave -noupdate -label avs_writedata -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_writedata
|
||||
add wave -noupdate -label avs_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_write
|
||||
add wave -noupdate -label avs_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_read
|
||||
add wave -noupdate -label avs_readdatavalid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_readdatavalid
|
||||
add wave -noupdate -label sRx.c0.rspValid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cp2af_sRxPort.c0.rspValid
|
||||
add wave -noupdate -label sRx.c1.rspValid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cp2af_sRxPort.c1.rspValid
|
||||
add wave -noupdate -label sTx.c0.valid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/af2cp_sTxPort.c0.valid
|
||||
add wave -noupdate -label sTx.c1.valid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/af2cp_sTxPort.c1.valid
|
||||
add wave -noupdate -label cci_write_req /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_req
|
||||
add wave -noupdate -label avs_raq_push /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_push
|
||||
add wave -noupdate -label avs_rdq_push /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_push
|
||||
add wave -noupdate -label avs_raq_pop /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_pop
|
||||
add wave -noupdate -label avs_rdq_pop /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_pop
|
||||
add wave -noupdate -label avs_raq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_full
|
||||
add wave -noupdate -label avs_rdq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_full
|
||||
add wave -noupdate -label avs_raq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_empty
|
||||
add wave -noupdate -label avs_rdq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_empty
|
||||
add wave -noupdate -label vortex_enabled /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vortex_enabled
|
||||
add wave -noupdate -label vx_reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/reset
|
||||
add wave -noupdate -label vx_dram_req_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_read
|
||||
add wave -noupdate -label vx_dram_req_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_write
|
||||
add wave -noupdate -label vx_dram_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_delay
|
||||
add wave -noupdate -label vx_dram_req_addr -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_addr
|
||||
add wave -noupdate -label vx_draw_req_data -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_data
|
||||
add wave -noupdate -label out_dram_fill_rsp /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_rsp
|
||||
add wave -noupdate -label out_dram_fill_accept /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_accept
|
||||
add wave -noupdate -label vx_draw_fill_rsp_data -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_fill_rsp_data
|
||||
add wave -noupdate -label vx_dram_fill_rsp_addr -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_fill_rsp_addr
|
||||
add wave -noupdate -label llc_snp_req /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req
|
||||
add wave -noupdate -label llc_snp_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req_delay
|
||||
add wave -noupdate -label out_break /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_ebreak
|
||||
add wave -noupdate -label warp_pc -radix hexadecimal -radixshowbase 0 {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_pc}
|
||||
add wave -noupdate -label scheduled_warp {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/scheduled_warp}
|
||||
add wave -noupdate -label thread_mask {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/thread_mask}
|
||||
add wave -noupdate -label warp_num {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_num}
|
||||
add wave -noupdate -label warp_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_active}
|
||||
add wave -noupdate -label warp_stalled {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_stalled}
|
||||
add wave -noupdate -label warp_lock {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_lock}
|
||||
add wave -noupdate -label use_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/use_active}
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 2} {360293 ps} 0}
|
||||
quietly wave cursor active 1
|
||||
configure wave -namecolwidth 195
|
||||
configure wave -valuecolwidth 100
|
||||
configure wave -justifyvalue left
|
||||
configure wave -signalnamewidth 0
|
||||
configure wave -snapdistance 10
|
||||
configure wave -datasetprefix 0
|
||||
configure wave -rowmargin 4
|
||||
configure wave -childrowmargin 2
|
||||
configure wave -gridoffset 0
|
||||
configure wave -gridperiod 1
|
||||
configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ps
|
||||
update
|
||||
WaveRestoreZoom {346453 ps} {711141 ps}
|
||||
@@ -1,97 +0,0 @@
|
||||
|
||||
|
||||
## Required tools
|
||||
# gcc (>4.9)
|
||||
# libjson
|
||||
# python
|
||||
# Quartus
|
||||
# RTL Simulator (VCS or ModelSim or QuestaSim)
|
||||
|
||||
|
||||
|
||||
## Download OPAE SDK from https://github.com/OPAE/opae-sdk/archive/1.4.0-1.tar.gz
|
||||
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/
|
||||
|
||||
## Update the following file based on /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh
|
||||
# ./opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh
|
||||
|
||||
|
||||
|
||||
###################################################################################################
|
||||
################################### TO BE DONE EVERY TIME #########################################
|
||||
###################################################################################################
|
||||
## Change the shell to bash before running
|
||||
bash
|
||||
|
||||
## Setup Environment
|
||||
## Running the default script results in multiple versions of libcurl during cmake.
|
||||
#source /nethome/achawda6/specialProblem/rg_intel_fpga_end_19.3.sh
|
||||
source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh
|
||||
|
||||
## Setup the variables for using the Quartus modelsim
|
||||
source /nethome/achawda6/specialProblem/modelsim_env.sh
|
||||
|
||||
## Run this to setup the environment variables
|
||||
source /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh
|
||||
|
||||
## gcc version should be greater than 4.9 to support c++14
|
||||
source /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/env_check.sh
|
||||
|
||||
export PATH=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall/bin:${PATH}
|
||||
export FPGA_BBB_CCI_SRC=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb
|
||||
####################################################################################################
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Setup OPAE
|
||||
mkdir mybuild
|
||||
cd mybuild
|
||||
|
||||
## Update the directory path where you want to install OPAE
|
||||
cmake .. -DBUILD_ASE=1 -DCMAKE_INSTALL_PREFIX=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall
|
||||
make
|
||||
make install
|
||||
|
||||
|
||||
|
||||
|
||||
## Setup ASE
|
||||
## Add the installed OPAE path in PATH
|
||||
export PATH=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall/bin:${PATH}
|
||||
|
||||
## Use this version of HDL files
|
||||
/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/afu_sim_setup --sources=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/rtl/sources_ase_server.txt run1Build
|
||||
cd run1Build/
|
||||
python scripts/ipc_clean.py
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Running Sample
|
||||
## Download opae-bbb from https://github.com/OPAE/intel-fpga-bbb
|
||||
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1
|
||||
git clone https://github.com/OPAE/intel-fpga-bbb
|
||||
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb
|
||||
mkdir mybuild
|
||||
cd mybuild
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall
|
||||
make
|
||||
make install
|
||||
|
||||
export FPGA_BBB_CCI_SRC=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Running hello world
|
||||
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb/samples/tutorial/01_hello_world
|
||||
afu_sim_setup --source hw/rtl/sources.txt build_sim
|
||||
cd build_sim
|
||||
## Update libstdc++6 if it errors out
|
||||
make
|
||||
make sim
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh
|
||||
export PATH=/tools/opae/1.4.0/bin:/tools/reconfig/intel/19.3/modelsim_ase/bin:$PATH
|
||||
export LD_LIBRARY_PATH=/tools/opae/1.4.0/lib:$PATH
|
||||
export QUARTUS_HOME=$QUARTUS_ROOTDIR
|
||||
export MTI_HOME=/tools/reconfig/intel/19.3/modelsim_ase
|
||||
export FPGA_FAMILY=arria10
|
||||
@@ -1,23 +0,0 @@
|
||||
|
||||
|
||||
all: stub
|
||||
|
||||
stub:
|
||||
$(MAKE) -C stub
|
||||
|
||||
opae:
|
||||
$(MAKE) -C opae
|
||||
|
||||
rtlsim:
|
||||
$(MAKE) -C rtlsim
|
||||
|
||||
simx:
|
||||
$(MAKE) -C simx
|
||||
|
||||
clean:
|
||||
$(MAKE) clean -C dummy
|
||||
$(MAKE) clean -C opae
|
||||
$(MAKE) clean -C rtlsim
|
||||
$(MAKE) clean -C simx
|
||||
|
||||
.PHONY: all opae rtlsim simx clean
|
||||
@@ -1,72 +0,0 @@
|
||||
#ifndef __VX_DRIVER_H__
|
||||
#define __VX_DRIVER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void* vx_device_h;
|
||||
|
||||
typedef void* vx_buffer_h;
|
||||
|
||||
// device caps ids
|
||||
#define VX_CAPS_VERSION 0x0
|
||||
#define VX_CAPS_MAX_CORES 0x1
|
||||
#define VX_CAPS_MAX_WARPS 0x2
|
||||
#define VX_CAPS_MAX_THREADS 0x3
|
||||
#define VX_CAPS_CACHE_LINESIZE 0x4
|
||||
#define VX_CAPS_LOCAL_MEM_SIZE 0x5
|
||||
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
|
||||
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
|
||||
|
||||
// return device configurations
|
||||
int vx_dev_caps(int caps_id);
|
||||
|
||||
// open the device and connect to it
|
||||
int vx_dev_open(vx_device_h* hdevice);
|
||||
|
||||
// Close the device when all the operations are done
|
||||
int vx_dev_close(vx_device_h hdevice);
|
||||
|
||||
// Allocate shared buffer with device
|
||||
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
|
||||
|
||||
// Get host pointer address
|
||||
volatile void* vx_host_ptr(vx_buffer_h hbuffer);
|
||||
|
||||
// release buffer
|
||||
int vx_buf_release(vx_buffer_h hbuffer);
|
||||
|
||||
// allocate device memory and return address
|
||||
int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr);
|
||||
|
||||
// Copy bytes from device local memory to buffer
|
||||
int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size);
|
||||
|
||||
// Copy bytes from buffer to device local memory
|
||||
int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset);
|
||||
|
||||
// Copy bytes from device local memory to buffer
|
||||
int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dst_offset);
|
||||
|
||||
// Start device execution
|
||||
int vx_start(vx_device_h hdevice);
|
||||
|
||||
// Wait for device ready with milliseconds timeout
|
||||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
|
||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||
|
||||
// upload kernel bytes to device
|
||||
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size);
|
||||
|
||||
// upload kernel file to device
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __VX_DRIVER_H__
|
||||
@@ -1,68 +0,0 @@
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I/tools/opae/1.4.0/include -I../../../runtime
|
||||
|
||||
LDFLAGS += -L/tools/opae/1.4.0/lib
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
|
||||
# data relocation and projection
|
||||
LDFLAGS +=-z relro -z now
|
||||
|
||||
# stack buffer overrun detection
|
||||
CXXFLAGS +=-fstack-protector
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
CXXFLAGS += -DGLOBAL_BLOCK_SIZE_BYTES=64
|
||||
|
||||
LDFLAGS += -luuid
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
FPGA_LIBS += -lopae-c
|
||||
|
||||
ASE_LIBS += -lopae-c-ase
|
||||
|
||||
LIB_DIR=../lib
|
||||
|
||||
ASE_DIR = ase
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
PROJECT_ASE = $(ASE_DIR)/libvortex.so
|
||||
|
||||
AFU_JSON_INFO = vortex_afu.h
|
||||
|
||||
SRCS = vortex.cpp ../vx_utils.cpp
|
||||
|
||||
all: $(PROJECT) $(PROJECT_ASE)
|
||||
|
||||
# AFU info from JSON file, including AFU UUID
|
||||
$(AFU_JSON_INFO): ../../hw/vortex_afu.json
|
||||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $@
|
||||
|
||||
$(PROJECT_ASE): $(SRCS) $(ASE_DIR)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@
|
||||
|
||||
vortex.o: vortex.cpp $(AFU_JSON_INFO)
|
||||
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
|
||||
|
||||
$(ASE_DIR):
|
||||
mkdir -p ase
|
||||
|
||||
.depend: $(SRCS) $(AFU_JSON_INFO)
|
||||
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) $(PROJECT_ASE) $(AFU_JSON_INFO) *.o .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
@@ -1,338 +0,0 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <uuid/uuid.h>
|
||||
#include <opae/fpga.h>
|
||||
#include <vortex.h>
|
||||
#include "vortex_afu.h"
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("OPAE Error: '%s' returned %d, %s!\n", \
|
||||
#_expr, (int)res, fpgaErrStr(res)); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ
|
||||
#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE
|
||||
#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN
|
||||
#define CMD_TYPE_CLFLUSH AFU_IMAGE_CMD_TYPE_CLFLUSH
|
||||
|
||||
#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4)
|
||||
#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4)
|
||||
#define MMIO_CSR_IO_ADDR (AFU_IMAGE_MMIO_CSR_IO_ADDR * 4)
|
||||
#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4)
|
||||
#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct vx_device_ {
|
||||
fpga_handle fpga;
|
||||
size_t mem_allocation;
|
||||
} vx_device_t;
|
||||
|
||||
typedef struct vx_buffer_ {
|
||||
uint64_t wsid;
|
||||
volatile void* host_ptr;
|
||||
uint64_t io_addr;
|
||||
vx_device_h hdevice;
|
||||
size_t size;
|
||||
} vx_buffer_t;
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
fpga_properties filter = nullptr;
|
||||
fpga_result res;
|
||||
fpga_guid guid;
|
||||
fpga_token accel_token;
|
||||
uint32_t num_matches;
|
||||
fpga_handle accel_handle;
|
||||
vx_device_t* device;
|
||||
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
// ensure that the block size 64
|
||||
assert(64 == vx_dev_caps(VX_CAPS_CACHE_LINESIZE));
|
||||
|
||||
// Set up a filter that will search for an accelerator
|
||||
fpgaGetProperties(nullptr, &filter);
|
||||
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
||||
|
||||
// Add the desired UUID to the filter
|
||||
uuid_parse(AFU_ACCEL_UUID, guid);
|
||||
fpgaPropertiesSetGUID(filter, guid);
|
||||
|
||||
// Do the search across the available FPGA contexts
|
||||
num_matches = 1;
|
||||
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
|
||||
|
||||
// Not needed anymore
|
||||
fpgaDestroyProperties(&filter);
|
||||
|
||||
if (num_matches < 1) {
|
||||
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open accelerator
|
||||
res = fpgaOpen(accel_token, &accel_handle, 0);
|
||||
if (FPGA_OK != res) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Done with token
|
||||
fpgaDestroyToken(&accel_token);
|
||||
|
||||
// allocate device object
|
||||
device = (vx_device_t*)malloc(sizeof(vx_device_t));
|
||||
if (nullptr == device) {
|
||||
fpgaClose(accel_handle);
|
||||
return -1;
|
||||
}
|
||||
|
||||
device->fpga = accel_handle;
|
||||
device->mem_allocation = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
|
||||
*hdevice = device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
fpgaClose(device->fpga);
|
||||
|
||||
free(device);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
size_t asize = align_size(size);
|
||||
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
if (device->mem_allocation + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
*dev_maddr = device->mem_allocation;
|
||||
device->mem_allocation += asize;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
fpga_result res;
|
||||
void* host_ptr;
|
||||
uint64_t wsid;
|
||||
uint64_t io_addr;
|
||||
vx_buffer_t* buffer;
|
||||
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
size_t asize = align_size(size);
|
||||
|
||||
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
|
||||
if (FPGA_OK != res) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get the physical address of the buffer in the accelerator
|
||||
res = fpgaGetIOAddress(device->fpga, wsid, &io_addr);
|
||||
if (FPGA_OK != res) {
|
||||
fpgaReleaseBuffer(device->fpga, wsid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate buffer object
|
||||
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
|
||||
if (nullptr == buffer) {
|
||||
fpgaReleaseBuffer(device->fpga, wsid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
buffer->wsid = wsid;
|
||||
buffer->host_ptr = host_ptr;
|
||||
buffer->io_addr = io_addr;
|
||||
buffer->hdevice = hdevice;
|
||||
buffer->size = size;
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
|
||||
return buffer->host_ptr;
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
fpgaReleaseBuffer(device->fpga, buffer->wsid);
|
||||
|
||||
free(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
uint64_t data = 0;
|
||||
struct timespec sleep_time;
|
||||
|
||||
#if defined(USE_ASE)
|
||||
sleep_time.tv_sec = 1;
|
||||
sleep_time.tv_nsec = 0;
|
||||
#else
|
||||
sleep_time.tv_sec = 0;
|
||||
sleep_time.tv_nsec = 1000000;
|
||||
#endif
|
||||
|
||||
// to milliseconds
|
||||
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||
|
||||
for (;;) {
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_STATUS, &data));
|
||||
if (0 == data || 0 == timeout)
|
||||
break;
|
||||
nanosleep(&sleep_time, nullptr);
|
||||
timeout -= sleep_time_ms;
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
// bound checking
|
||||
if (size + src_offset > buffer->size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + src_offset));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
// bound checking
|
||||
if (size + dest_offset > buffer->size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + dest_offset));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device_t* device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN));
|
||||
|
||||
return 0;
|
||||
}
|
||||
2
driver/sw/rtlsim/.gitignore
vendored
2
driver/sw/rtlsim/.gitignore
vendored
@@ -1,2 +0,0 @@
|
||||
obj_dir
|
||||
*.so
|
||||
@@ -1,50 +0,0 @@
|
||||
# CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
CFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
# CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
USE_MULTICORE=1
|
||||
|
||||
CFLAGS += -I../../include -I../../../../rtl/simulate -I../../../../runtime
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_RTLSIM
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
||||
ifdef USE_MULTICORE
|
||||
CFLAGS += -DUSE_MULTICORE
|
||||
RTL_TOP = Vortex_SOC
|
||||
else
|
||||
VL_FLAGS += -DSINGLE_CORE_BENCH
|
||||
RTL_TOP = Vortex
|
||||
endif
|
||||
|
||||
SRCS = vortex.cpp ../vx_utils.cpp ../../../rtl/simulate/simulator.cpp
|
||||
|
||||
RTL_INCLUDE = -I../../../rtl -I../../../rtl/interfaces -I../../../rtl/cache -I../../../rtl/VX_cache -I../../../rtl/shared_memory -I../../../rtl/pipe_regs -I../../../rtl/compat
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
VL_FLAGS += -Wno-UNDRIVEN --Wno-PINMISSING -Wno-STMTDLY -Wno-WIDTH -Wno-UNSIGNED -Wno-UNOPTFLAT -Wno-LITENDIAN -Wno-BLKLOOPINIT
|
||||
|
||||
# Debugigng
|
||||
#VL_FLAGS += --trace -DVL_DEBUG=1
|
||||
#CFLAGS += -DVCD_OUTPUT
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
.PHONY: build_config
|
||||
build_config:
|
||||
(cd ../../../rtl && ./gen_config.py --rtl_locations)
|
||||
|
||||
$(PROJECT): $(SRCS) build_config
|
||||
verilator --exe --cc $(RTL_TOP).v $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(RTL_TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
||||
@@ -1,310 +0,0 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include <ram.h>
|
||||
#include <simulator.h>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device;
|
||||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
~vx_buffer() {
|
||||
if (data_) {
|
||||
free(data_);
|
||||
}
|
||||
}
|
||||
|
||||
void* data() const {
|
||||
return data_;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
vx_device* device() const {
|
||||
return device_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
vx_device* device_;
|
||||
void* data_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: is_done_(false)
|
||||
, simulator_(&ram_) {
|
||||
simulator_.reset();
|
||||
thread_ = new std::thread(__thread_proc__, this);
|
||||
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
if (thread_) {
|
||||
mutex_.lock();
|
||||
is_done_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
thread_->join();
|
||||
delete thread_;
|
||||
}
|
||||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
size_t asize = align_size(size);
|
||||
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
mem_allocation_ += asize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
size_t asize = align_size(size);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
|
||||
}*/
|
||||
|
||||
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
|
||||
|
||||
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
|
||||
}*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int flush_caches(size_t dev_maddr, size_t size) {
|
||||
|
||||
mutex_.lock();
|
||||
simulator_.flush_caches(dev_maddr, size);
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int start() {
|
||||
|
||||
mutex_.lock();
|
||||
simulator_.reset();
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int wait(long long timeout) {
|
||||
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_busy = simulator_.is_busy();
|
||||
mutex_.unlock();
|
||||
|
||||
if (!is_busy || 0 == timeout_sec--)
|
||||
break;
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void thread_proc() {
|
||||
std::cout << "Device ready..." << std::endl;
|
||||
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_done = is_done_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (is_done)
|
||||
break;
|
||||
|
||||
mutex_.lock();
|
||||
simulator_.step();
|
||||
mutex_.unlock();
|
||||
}
|
||||
|
||||
std::cout << "Device shutdown..." << std::endl;
|
||||
}
|
||||
|
||||
static void __thread_proc__(vx_device* device) {
|
||||
device->thread_proc();
|
||||
}
|
||||
|
||||
bool is_done_;
|
||||
size_t mem_allocation_;
|
||||
RAM ram_;
|
||||
Simulator simulator_;
|
||||
std::thread* thread_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
*hdevice = new vx_device();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
delete device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
return device->alloc_local_mem(size, dev_maddr);
|
||||
}
|
||||
|
||||
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->flush_caches(dev_maddr, size);
|
||||
}
|
||||
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
auto buffer = new vx_buffer(size, device);
|
||||
if (nullptr == buffer->data()) {
|
||||
delete buffer;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
return buffer->data();
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
delete buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + src_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + dest_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->start();
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
2
driver/sw/simx/.gitignore
vendored
2
driver/sw/simx/.gitignore
vendored
@@ -1,2 +0,0 @@
|
||||
obj_dir
|
||||
libvortex.so
|
||||
@@ -1,32 +0,0 @@
|
||||
CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_SIMX
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
||||
SRCS = vortex.cpp ../vx_utils.cpp ../../../simX/args.cpp ../../../simX/mem.cpp ../../../simX/core.cpp ../../../simX/instruction.cpp ../../../simX/enc.cpp ../../../simX/util.cpp
|
||||
|
||||
RTL_TOP = ../../../simX/cache_simX.v
|
||||
|
||||
RTL_INCLUDE = -I../../../old_rtl -I../../../old_rtl/interfaces -I../../../old_rtl/cache -I../../../old_rtl/shared_memory
|
||||
|
||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
verilator --exe --cc $(RTL_TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f Vcache_simX.mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
||||
@@ -1,318 +0,0 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include <core.h>
|
||||
#include <config.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static size_t align_size(size_t size) {
|
||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device;
|
||||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
~vx_buffer() {
|
||||
if (data_) {
|
||||
free(data_);
|
||||
}
|
||||
}
|
||||
|
||||
void* data() const {
|
||||
return data_;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
vx_device* device() const {
|
||||
return device_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
vx_device* device_;
|
||||
void* data_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: is_done_(false)
|
||||
, is_running_(false)
|
||||
, thread_(__thread_proc__, this) {
|
||||
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
mutex_.lock();
|
||||
is_done_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
thread_.join();
|
||||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
auto asize = align_size(size);
|
||||
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
mem_allocation_ += asize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
auto asize = align_size(size);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
|
||||
}*/
|
||||
|
||||
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
|
||||
|
||||
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
|
||||
}*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int start() {
|
||||
|
||||
mutex_.lock();
|
||||
is_running_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int wait(long long timeout) {
|
||||
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_running = is_running_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (!is_running || 0 == timeout_sec--)
|
||||
break;
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void run() {
|
||||
Harp::ArchDef arch("rv32i", NW, NT);
|
||||
Harp::WordDecoder dec(arch);
|
||||
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
|
||||
Harp::Core core(arch, dec, mu);
|
||||
mu.attach(ram_, 0);
|
||||
|
||||
while (core.running()) {
|
||||
core.step();
|
||||
}
|
||||
core.printStats();
|
||||
}
|
||||
|
||||
void thread_proc() {
|
||||
std::cout << "Device ready..." << std::endl;
|
||||
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_done = is_done_;
|
||||
bool is_running = is_running_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (is_done)
|
||||
break;
|
||||
|
||||
if (is_running) {
|
||||
std::cout << "Device running..." << std::endl;
|
||||
|
||||
this->run();
|
||||
|
||||
mutex_.lock();
|
||||
is_running_ = false;
|
||||
mutex_.unlock();
|
||||
|
||||
std::cout << "Device ready..." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Device shutdown..." << std::endl;
|
||||
}
|
||||
|
||||
static void __thread_proc__(vx_device* device) {
|
||||
device->thread_proc();
|
||||
}
|
||||
|
||||
bool is_done_;
|
||||
bool is_running_;
|
||||
size_t mem_allocation_;
|
||||
std::thread thread_;
|
||||
Harp::RAM ram_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
*hdevice = new vx_device();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
delete device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
return device->alloc_local_mem(size, dev_maddr);
|
||||
}
|
||||
|
||||
extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
// this functionality is not need by simX
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
auto buffer = new vx_buffer(size, device);
|
||||
if (nullptr == buffer->data()) {
|
||||
delete buffer;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
return buffer->data();
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
delete buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + src_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + dest_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->start();
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I../../../runtime
|
||||
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
||||
SRCS = vortex.cpp ../vx_utils.cpp
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
||||
@@ -1,45 +0,0 @@
|
||||
#include <vortex.h>
|
||||
|
||||
extern int vx_dev_open(vx_device_h* /*hdevice*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h /*hdevice*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_flush_caches(vx_device_h /*hdevice*/, size_t /*dev_maddr*/, size_t /*size*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern volatile void* vx_host_ptr(vx_buffer_h /*hbuffer*/) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*src_offset*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*dest_offset*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h /*hdevice*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
|
||||
return -1;
|
||||
}
|
||||
@@ -1,118 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <vortex.h>
|
||||
#include <config.h>
|
||||
|
||||
extern int vx_dev_caps(int caps_id) {
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
return 0;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
return NUMBER_CORES;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
return NW;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
return NT;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
return GLOBAL_BLOCK_SIZE_BYTES;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
return 0xffffffff;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
return 0x10000000;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
return 0x80000000;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
|
||||
int err = 0;
|
||||
|
||||
if (NULL == content || 0 == size)
|
||||
return -1;
|
||||
|
||||
uint32_t buffer_transfer_size = 65536;
|
||||
uint32_t kernel_base_addr = vx_dev_caps(VX_CAPS_KERNEL_BASE_ADDR);
|
||||
|
||||
// allocate device buffer
|
||||
vx_buffer_h buffer;
|
||||
err = vx_alloc_shared_mem(device, buffer_transfer_size, &buffer);
|
||||
if (err != 0)
|
||||
return -1;
|
||||
|
||||
// get buffer address
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
|
||||
#if defined(USE_SIMX)
|
||||
// default startup routine
|
||||
((uint32_t*)buf_ptr)[0] = 0xf1401073;
|
||||
((uint32_t*)buf_ptr)[1] = 0xf1401073;
|
||||
((uint32_t*)buf_ptr)[2] = 0x30101073;
|
||||
((uint32_t*)buf_ptr)[3] = 0x800000b7;
|
||||
((uint32_t*)buf_ptr)[4] = 0x000080e7;
|
||||
err = vx_copy_to_dev(buffer, 0, 5 * 4, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
}
|
||||
|
||||
// newlib io simulator trap
|
||||
((uint32_t*)buf_ptr)[0] = 0x00008067;
|
||||
err = vx_copy_to_dev(buffer, 0x70000000, 4, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
//
|
||||
// upload content
|
||||
//
|
||||
|
||||
size_t offset = 0;
|
||||
while (offset < size) {
|
||||
auto chunk_size = std::min<size_t>(buffer_transfer_size, size - offset);
|
||||
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
|
||||
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
}
|
||||
offset += chunk_size;
|
||||
}
|
||||
|
||||
vx_buf_release(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// get length of file:
|
||||
ifs.seekg(0, ifs.end);
|
||||
auto size = ifs.tellg();
|
||||
ifs.seekg(0, ifs.beg);
|
||||
|
||||
// allocate buffer
|
||||
auto content = new char [size];
|
||||
|
||||
// read file content
|
||||
ifs.read(content, size);
|
||||
|
||||
// upload
|
||||
int err = vx_upload_kernel_bytes(device, content, size);
|
||||
|
||||
// release buffer
|
||||
delete[] content;
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -1,67 +0,0 @@
|
||||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
||||
|
||||
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../../sw/include
|
||||
|
||||
LDFLAGS +=
|
||||
|
||||
PROJECT = basic
|
||||
|
||||
SRCS = basic.cpp
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DMP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.hex: kernel.elf
|
||||
$(VX_CPY) -O ihex kernel.elf kernel.hex
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CPY) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/dummy -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
Binary file not shown.
@@ -1,233 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <vortex.h>
|
||||
|
||||
int test = -1;
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "t:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 't': {
|
||||
test = atoi(optarg);
|
||||
} break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
std::cout << "Test." << std::endl;
|
||||
std::cout << "Usage: [-t testno][-h: help]" << std::endl;
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t shuffle(int i, uint64_t value) {
|
||||
return (value << i) | (value & ((1 << i)-1));;
|
||||
}
|
||||
|
||||
int run_memcopy_test(vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
uint32_t address,
|
||||
uint64_t value,
|
||||
int num_blocks) {
|
||||
int ret;
|
||||
int errors = 0;
|
||||
|
||||
// write sbuf data
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value);
|
||||
}
|
||||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
ret = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
ret = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||
auto ref = shuffle(i, value);
|
||||
if (curr != ref) {
|
||||
std::cout << "error @ " << std::hex << (address + 64 * i)
|
||||
<< ": actual " << curr << ", expected " << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int run_kernel_test(vx_device_h device,
|
||||
vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
const char* program) {
|
||||
int ret;
|
||||
int errors = 0;
|
||||
|
||||
uint64_t seed = 0x0badf00d40ff40ff;
|
||||
int num_blocks = 4;
|
||||
|
||||
unsigned src_dev_addr = 0x10000000;
|
||||
unsigned dest_dev_addr = 0x20000000;
|
||||
|
||||
// write sbuf data
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
|
||||
}
|
||||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
ret = vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
ret = vx_upload_kernel_file(device, program);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
ret = vx_start(device);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
ret = vx_ready_wait(device, -1);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// flush the caches
|
||||
std::cout << "flush the caches" << std::endl;
|
||||
ret = vx_flush_caches(device, dest_dev_addr, 64 * num_blocks);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
ret = vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||
auto ref = shuffle(i, seed);
|
||||
if (curr != ref) {
|
||||
std::cout << "error @ " << std::hex << (dest_dev_addr + 64 * i)
|
||||
<< ": actual " << curr << ", expected " << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h sbuf = nullptr;
|
||||
vx_buffer_h dbuf = nullptr;
|
||||
|
||||
void cleanup() {
|
||||
if (sbuf) {
|
||||
vx_buf_release(sbuf);
|
||||
}
|
||||
if (dbuf) {
|
||||
vx_buf_release(dbuf);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int ret;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
vx_device_h device;
|
||||
ret = vx_dev_open(&device);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// create source buffer
|
||||
std::cout << "create source buffer" << std::endl;
|
||||
ret = vx_alloc_shared_mem(device, 4096, &sbuf);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// create destination buffer
|
||||
std::cout << "create destination buffer" << std::endl;
|
||||
ret = vx_alloc_shared_mem(device, 4096, &dbuf);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// run tests
|
||||
if (0 == test || -1 == test) {
|
||||
std::cout << "run memcopy test" << std::endl;
|
||||
|
||||
ret = run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (1 == test || -1 == test) {
|
||||
std::cout << "run kernel test" << std::endl;
|
||||
ret = run_kernel_test(device, sbuf, dbuf, "kernel.bin");
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
std::cout << "Test PASSED" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
Binary file not shown.
@@ -1,9 +0,0 @@
|
||||
#include <stdint.h>
|
||||
|
||||
void main() {
|
||||
int64_t* x = (int64_t*)0x10000000;
|
||||
int64_t* y = (int64_t*)0x20000000;
|
||||
for (int i = 0; i < 8 * 4; ++i) {
|
||||
y[i] = x[i];
|
||||
}
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
||||
|
||||
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../../sw/include
|
||||
|
||||
PROJECT = demo
|
||||
|
||||
SRCS = demo.cpp
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DMP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.hex: kernel.elf
|
||||
$(VX_CPY) -O ihex kernel.elf kernel.hex
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CPY) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/stub -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
@@ -1,15 +0,0 @@
|
||||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7fffff00
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t num_warps;
|
||||
uint32_t num_threads;
|
||||
uint32_t stride;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
Binary file not shown.
@@ -1,241 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include "common.h"
|
||||
|
||||
const char* program_file = "kernel.bin";
|
||||
uint32_t data_stride = 0xffffffff;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
std::cout << "Usage: [-f: program] [-n stride] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:f:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
data_stride = atoi(optarg);
|
||||
break;
|
||||
case 'f':
|
||||
program_file = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (nullptr == program_file) {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
int run_test(vx_device_h device,
|
||||
vx_buffer_h buffer,
|
||||
const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t num_points) {
|
||||
int ret;
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
ret = vx_start(device);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
ret = vx_ready_wait(device, -1);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// flush the destination buffer caches
|
||||
std::cout << "flush the destination buffer caches" << std::endl;
|
||||
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
{
|
||||
int errors = 0;
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
int ref = i + i;
|
||||
int cur = buf_ptr[i];
|
||||
if (cur != ref) {
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
|
||||
void cleanup() {
|
||||
if (buffer) {
|
||||
vx_buf_release(buffer);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int ret;
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
|
||||
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
|
||||
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
|
||||
|
||||
if (data_stride == 0xffffffff) {
|
||||
data_stride = block_size / sizeof(uint32_t);
|
||||
}
|
||||
|
||||
uint32_t num_points = max_cores * max_warps * max_threads * data_stride;
|
||||
uint32_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
||||
std::cout << "number of workitems: " << num_points << std::endl;
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
ret = vx_dev_open(&device);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
ret = vx_upload_kernel_file(device, program_file);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
kernel_arg.src0_ptr = value;
|
||||
|
||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
kernel_arg.src1_ptr = value;
|
||||
|
||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
ret = vx_alloc_shared_mem(device, alloc_size, &buffer);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// populate source buffer values
|
||||
std::cout << "populate source buffer values" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// upload source buffers
|
||||
std::cout << "upload source buffers" << std::endl;
|
||||
|
||||
ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
kernel_arg.num_warps = max_warps;
|
||||
kernel_arg.num_threads = max_threads;
|
||||
kernel_arg.stride = data_stride;
|
||||
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
std::cout << "PASSED!" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
Binary file not shown.
@@ -1,32 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "intrinsics/vx_intrinsics.h"
|
||||
#include "vx_api/vx_api.h"
|
||||
#include "common.h"
|
||||
|
||||
void kernel_body(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
int* x = (int*)_arg->src0_ptr;
|
||||
int* y = (int*)_arg->src1_ptr;
|
||||
int* z = (int*)_arg->dst_ptr;
|
||||
|
||||
unsigned wNo = vx_warpNum();
|
||||
unsigned tid = vx_threadID();
|
||||
|
||||
unsigned i = ((wNo * _arg->num_threads) + tid) * _arg->stride;
|
||||
|
||||
for (unsigned j = 0; j < _arg->stride; ++j) {
|
||||
z[i+j] = x[i+j] + y[i+j];
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
/*printf("num_warps=%d\n", arg->num_warps);
|
||||
printf("num_threads=%d\n", arg->num_threads);
|
||||
printf("stride=%d\n", arg->stride);
|
||||
printf("src0_ptr=0x%x\n", arg->src0_ptr);
|
||||
printf("src1_ptr=0x%x\n", arg->src1_ptr);
|
||||
printf("dst_ptr=0x%x\n", arg->dst_ptr);*/
|
||||
vx_spawnWarps(arg->num_warps, arg->num_threads, kernel_body, arg);
|
||||
}
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user