merging changes from OPAE branch making this branch

This commit is contained in:
Blaise Tine
2020-03-27 20:19:16 -04:00
parent 614797e52f
commit 5a5c9f3981
267 changed files with 498191 additions and 166 deletions

View File

@@ -0,0 +1,603 @@
//
// Copyright (c) 2017, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// Neither the name of the Intel Corporation nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// Read from the memory locations first and then write to the memory locations
`include "platform_if.vh"
`include "afu_json_info.vh"
module ccip_std_afu
(
// CCI-P Clocks and Resets
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
// Interface structures
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
);
//
// Run the entire design at the standard CCI-P frequency (400 MHz).
//
logic clk;
assign clk = pClk;
logic reset;
assign reset = pck_cp2af_softReset;
logic [511:0] wr_data;
logic [511:0] rd_data;
logic get_write_addr;
logic do_update;
logic rd_end_of_list;
logic rd_needed;
logic wr_needed;
logic [15:0] cnt_list_length;
// =========================================================================
//
// Register requests.
//
// =========================================================================
//
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
// We also assign pck_af2cp_sTx to sTx here but don't register it.
// The code below never uses combinational logic to write sTx.
//
t_if_ccip_Rx sRx;
always_ff @(posedge clk)
begin
sRx <= pck_cp2af_sRx;
end
t_if_ccip_Tx sTx;
assign pck_af2cp_sTx = sTx;
// =========================================================================
//
// CSR (MMIO) handling.
//
// =========================================================================
// The AFU ID is a unique ID for a given program. Here we generated
// one with the "uuidgen" program and stored it in the AFU's JSON file.
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
// to extract the UUID into afu_json_info.vh.
logic [127:0] afu_id = `AFU_ACCEL_UUID;
//
// A valid AFU must implement a device feature list, starting at MMIO
// address 0. Every entry in the feature list begins with 5 64-bit
// words: a device feature header, two AFU UUID words and two reserved
// words.
//
// Is a CSR read request active this cycle?
logic is_csr_read;
assign is_csr_read = sRx.c0.mmioRdValid;
// Is a CSR write request active this cycle?
logic is_csr_write;
assign is_csr_write = sRx.c0.mmioWrValid;
// The MMIO request header is overlayed on the normal c0 memory read
// response data structure. Cast the c0Rx header to an MMIO request
// header.
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
//
// Implement the device feature list by responding to MMIO reads.
//
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c2.mmioRdValid <= 1'b0;
end
else
begin
// Always respond with something for every read request
sTx.c2.mmioRdValid <= is_csr_read;
// The unique transaction ID matches responses to requests
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
// Addresses are of 32-bit objects in MMIO space. Addresses
// of 64-bit objects are thus multiples of 2.
case (mmio_req_hdr.address)
0: // AFU DFH (device feature header)
begin
// Here we define a trivial feature list. In this
// example, our AFU is the only entry in this list.
sTx.c2.data <= t_ccip_mmioData'(0);
// Feature type is AFU
sTx.c2.data[63:60] <= 4'h1;
// End of list (last entry in list)
sTx.c2.data[40] <= 1'b1;
end
// AFU_ID_L
2: sTx.c2.data <= afu_id[63:0];
// AFU_ID_H
4: sTx.c2.data <= afu_id[127:64];
// DFH_RSVD0
6: sTx.c2.data <= t_ccip_mmioData'(0);
// DFH_RSVD1
8: sTx.c2.data <= t_ccip_mmioData'(0);
default: sTx.c2.data <= t_ccip_mmioData'(0);
endcase
end
end
//
// CSR write handling. Host software must tell the AFU the memory address
// to which it should be writing. The address is set by writing a CSR.
//
// We use MMIO address 0 to set the memory address. The read and
// write MMIO spaces are logically separate so we are free to use
// whatever we like. This may not be good practice for cleanly
// organizing the MMIO address space, but it is legal.
logic is_mem_addr_csr_write;
assign is_mem_addr_csr_write = get_write_addr && is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
// Memory address to which this AFU will write.
t_ccip_clAddr write_mem_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
get_write_addr <= 1'b1;
end
else if (is_mem_addr_csr_write)
begin
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
get_write_addr <= 1'b0;
end
end
// We use MMIO address 0 to set the memory address for reading data.
logic is_mem_addr_csr_read;
assign is_mem_addr_csr_read = !get_write_addr && is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
// Memory address from which this AFU will read.
logic start_read;
t_ccip_clAddr read_mem_addr;
//logic start_traversal = 'b0;
//t_ccip_clAddr start_traversal_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_read <= 1'b0;
end
else if (is_mem_addr_csr_read)
begin
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_read <= 'b1;
end
end
// =========================================================================
//
// Main AFU logic
//
// =========================================================================
//
// States in our simple example.
//
//typedef enum logic [0:0]
typedef enum logic [1:0]
{
STATE_IDLE,
STATE_READ,
STATE_UPDATE,
STATE_WRITE
}
t_state;
t_state state;
//
// State machine
//
always_ff @(posedge clk)
begin
if (reset)
begin
state <= STATE_IDLE;
rd_end_of_list <= 1'b0;
end
else
begin
case (state)
STATE_IDLE:
begin
// Traversal begins when CSR 1 is written
if (start_read)
begin
state <= STATE_READ;
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
STATE_READ:
begin
if (rd_needed)
begin
// Read data from the address and update address
state <= STATE_UPDATE;
start_read <= 'b0;
$display("AFU reading data and pointing to next read address...");
end
end
STATE_UPDATE:
begin
// Update the read value to be written back
if (do_update)
begin
state <= STATE_WRITE;
$display("AFU performing comutations on the read values...");
end
end
STATE_WRITE:
begin
// Write the updated value to the address
// Point to new address after that
// if done then point to IDLE; else read new values
if (rd_end_of_list)
begin
state <= STATE_IDLE;
$display("AFU done...");
end
else
begin
if (wr_needed)
begin
state <= STATE_READ;
$display("AFU reading again from read address...");
end
end
end
endcase
end
end
// =========================================================================
//
// Read logic.
//
// =========================================================================
//
// READ REQUEST
//
// Did a write response just arrive
logic addr_next_valid;
// Next read address
t_ccip_clAddr addr_next;
always_ff @(posedge clk)
begin
// Next read address is valid when we have got the write response back
// and channel is not full
//addr_next_valid <= sRx.c0TxAlmFull;
addr_next_valid <= sRx.c1.rspValid;
// Next address is current address plus address length
// Apurve
//addr_next <= addr_next + addr_size;
addr_next <= addr_next + 0;
// End of list reached if we have read 10 times
rd_end_of_list <= (cnt_list_length == 'h10);
end
//
// Since back pressure may prevent an immediate read request, we must
// record whether a read is needed and hold it until the request can
// be sent to the FIU.
//
t_ccip_clAddr rd_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
rd_needed <= 1'b0;
end
else
begin
// If reads are allowed this cycle then we can safely clear
// any previously requested reads. This simple AFU has only
// one read in flight at a time since it is walking a pointer
// chain.
if (rd_needed)
begin
rd_needed <= sRx.c0TxAlmFull;
end
else
begin
// Need a read under two conditions:
// - Starting a new walk
// - A read response just arrived from a line containing
// a next pointer.
rd_needed <= (start_read || (addr_next_valid && ! rd_end_of_list));
rd_addr <= (start_read ? read_mem_addr : addr_next);
end
end
end
//
// Emit read requests to the FIU.
//
// Read header defines the request to the FIU
t_cci_c0_ReqMemHdr rd_hdr;
always_comb
begin
rd_hdr = t_cci_c0_ReqMemHdr'(0);
// Read request type
rd_hdr.req_type = eREQ_RDLINE_I;
// Virtual address (MPF virtual addressing is enabled)
rd_hdr.address = rd_addr;
// Let the FIU pick the channel
rd_hdr.vc_sel = eVC_VA;
// Read 4 lines (the size of an entry in the list)
rd_hdr.cl_len = eCL_LEN_4;
end
// Send read requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c0.valid <= 1'b0;
cnt_list_length <= 0;
end
else
begin
// Generate a read request when needed and the FIU isn't full
sTx.c0.valid <= (rd_needed && ! sRx.c0TxAlmFull);
sTx.c0.hdr <= rd_hdr;
if (rd_needed && ! sRx.c0TxAlmFull)
begin
cnt_list_length <= cnt_list_length + 1;
//$display(" Reading from VA 0x%x", clAddrToByteAddr(rd_addr));
$display("Incrementing read count...");
end
end
end
//
// READ RESPONSE HANDLING
//
//
// Receive data (read responses).
//
always_ff @(posedge clk)
begin
if (reset)
begin
do_update <= 1'b0;
end
else
begin
if (state == STATE_READ)
begin
rd_data <= sRx.c0.data;
do_update <= 1'b1;
end
if (state == STATE_UPDATE)
begin
// Update the read data and put it in the write data to be written
wr_data <= rd_data + 1;
do_update <= 1'b0;
end
end
end
// =========================================================================
//
// Write logic.
//
// =========================================================================
//
// WRITE REQUEST
//
// Did a write response just arrive
logic wr_addr_next_valid;
// Next write address
t_ccip_clAddr wr_addr_next;
always_ff @(posedge clk)
begin
// Next write address is valid when we have got the read response back
// and channel is not full
//wr_addr_next_valid <= sRx.c1TxAlmFull;
wr_addr_next_valid <= sRx.c0.rspValid;
// Next address is current address plus address length
// Apurve
//wr_addr_next <= wr_addr_next + addr_size;
wr_addr_next <= wr_addr_next + 0;
end
//
// Since back pressure may prevent an immediate write request, we must
// record whether a write is needed and hold it until the request can
// be sent to the FIU.
//
t_ccip_clAddr wr_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
wr_needed <= 1'b0;
end
else
begin
// If writes are allowed this cycle then we can safely clear
// any previously requested writes. This simple AFU has only
// one write in flight at a time since it is walking a pointer
// chain.
if (wr_needed)
begin
wr_needed <= sRx.c1TxAlmFull;
end
else
begin
// Need a write under two conditions:
// - Starting a new walk
// - A write response just arrived from a line containing
// a next pointer.
//wr_needed <= (start_write || (wr_addr_next_valid && ! rd_end_of_list));
wr_needed <= (start_write || wr_addr_next_valid);
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
end
end
end
//
// Emit write requests to the FIU.
//
// Write header defines the request to the FIU
t_ccip_c1_ReqMemHdr wr_hdr;
always_comb
begin
wr_hdr = t_cci_c1_ReqMemHdr'(0);
// Write request type
wr_hdr.req_type = eREQ_RDLINE_I;
// Virtual address (MPF virtual addressing is enabled)
wr_hdr.address = wr_addr;
// Let the FIU pick the channel
wr_hdr.vc_sel = eVC_VA;
// Write 4 lines (the size of an entry in the list)
wr_hdr.cl_len = eCL_LEN_4;
// Start of packet is true (single line write)
wr_hdr.sop = 1'b1;
end
// Send write requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c1.valid <= 1'b0;
//cnt_list_length <= 0;
end
else
begin
// Generate a write request when needed and the FIU isn't full
sTx.c1.valid <= (wr_needed && ! sRx.c1TxAlmFull);
sTx.c1.hdr <= wr_hdr;
sTx.c1.data = t_ccip_clData'(wr_data);
//if (wr_needed && ! sRx.c1TxAlmFull)
//begin
// cnt_list_length <= cnt_list_length + 1;
// //$display(" Writing from VA 0x%x", clAddrToByteAddr(rd_addr));
// $display("Incrementing write count...");
//end
end
end
//
// WRITE RESPONSE HANDLING
//
// Apurve: Check if a signal is to be sent to read to start reading in case
// write response does not work
//
// Send data (write requests).
//
//always_ff @(posedge clk)
//begin
// if (state == STATE_WRITE)
// begin
// rd_data <= sRx.c0.data;
// end
// if (state == STATE_UPDATE)
// begin
// // Update the write data and put it in the write data to be written
// wr_data <= rd_data + 1;
// end
//end
endmodule

View File

@@ -0,0 +1,18 @@
{
"version": 1,
"afu-image": {
"power": 0,
"afu-top-interface":
{
"name": "ccip_std_afu"
},
"accelerator-clusters":
[
{
"name": "cci_hello",
"total-contexts": 1,
"accelerator-type-uuid": "c6aa954a-9b91-4a37-abc1-1d9f0709dcc3"
}
]
}
}

View File

@@ -0,0 +1,653 @@
//
// Copyright (c) 2017, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// Neither the name of the Intel Corporation nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// Read from the memory locations first and then write to the memory locations
`include "platform_if.vh"
`include "afu_json_info.vh"
module ccip_std_afu
(
// CCI-P Clocks and Resets
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
// Interface structures
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
);
//
// Run the entire design at the standard CCI-P frequency (400 MHz).
//
logic clk;
assign clk = pClk;
logic reset;
assign reset = pck_cp2af_softReset;
logic [511:0] wr_data;
logic [511:0] rd_data;
logic do_update;
logic start_read;
logic start_write;
logic wr_addr_next_valid;
logic addr_next_valid;
logic rd_end_of_list;
logic rd_needed;
logic wr_needed;
logic read_req;
logic write_req;
logic [15:0] cnt_list_length;
t_ccip_clAddr rd_addr;
t_ccip_clAddr wr_addr;
t_ccip_clAddr addr_next;
t_ccip_clAddr wr_addr_next;
// =========================================================================
//
// Register requests.
//
// =========================================================================
//
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
// We also assign pck_af2cp_sTx to sTx here but don't register it.
// The code below never uses combinational logic to write sTx.
//
t_if_ccip_Rx sRx;
always_ff @(posedge clk)
begin
sRx <= pck_cp2af_sRx;
end
t_if_ccip_Tx sTx;
assign pck_af2cp_sTx = sTx;
// =========================================================================
//
// CSR (MMIO) handling.
//
// =========================================================================
// The AFU ID is a unique ID for a given program. Here we generated
// one with the "uuidgen" program and stored it in the AFU's JSON file.
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
// to extract the UUID into afu_json_info.vh.
logic [127:0] afu_id = `AFU_ACCEL_UUID;
//
// A valid AFU must implement a device feature list, starting at MMIO
// address 0. Every entry in the feature list begins with 5 64-bit
// words: a device feature header, two AFU UUID words and two reserved
// words.
//
// Is a CSR read request active this cycle?
logic is_csr_read;
assign is_csr_read = sRx.c0.mmioRdValid;
// Is a CSR write request active this cycle?
logic is_csr_write;
assign is_csr_write = sRx.c0.mmioWrValid;
// The MMIO request header is overlayed on the normal c0 memory read
// response data structure. Cast the c0Rx header to an MMIO request
// header.
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
//
// Implement the device feature list by responding to MMIO reads.
//
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c2.mmioRdValid <= 1'b0;
end
else
begin
// Always respond with something for every read request
sTx.c2.mmioRdValid <= is_csr_read;
// The unique transaction ID matches responses to requests
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
// Addresses are of 32-bit objects in MMIO space. Addresses
// of 64-bit objects are thus multiples of 2.
case (mmio_req_hdr.address)
0: // AFU DFH (device feature header)
begin
// Here we define a trivial feature list. In this
// example, our AFU is the only entry in this list.
sTx.c2.data <= t_ccip_mmioData'(0);
// Feature type is AFU
sTx.c2.data[63:60] <= 4'h1;
// End of list (last entry in list)
sTx.c2.data[40] <= 1'b1;
end
// AFU_ID_L
2: sTx.c2.data <= afu_id[63:0];
// AFU_ID_H
4: sTx.c2.data <= afu_id[127:64];
// DFH_RSVD0
6: sTx.c2.data <= t_ccip_mmioData'(0);
// DFH_RSVD1
8: sTx.c2.data <= t_ccip_mmioData'(0);
// Updated by apurve to check fpgaReadMMIO
10: sTx.c2.data <= t_ccip_mmioData'(start_read);
default: sTx.c2.data <= t_ccip_mmioData'(0);
endcase
end
end
//
// CSR write handling. Host software must tell the AFU the memory address
// to which it should be writing. The address is set by writing a CSR.
//
// We use MMIO address 0 to set the memory address. The read and
// write MMIO spaces are logically separate so we are free to use
// whatever we like. This may not be good practice for cleanly
// organizing the MMIO address space, but it is legal.
logic is_mem_addr_csr_write;
assign is_mem_addr_csr_write = is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
// Memory address to which this AFU will write.
t_ccip_clAddr write_mem_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_write <= 1'b0;
end
else if (is_mem_addr_csr_write)
begin
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_write <= 1'b1;
//$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
end
end
// We use MMIO address 8 to set the memory address for reading data.
logic is_mem_addr_csr_read;
assign is_mem_addr_csr_read = is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(2));
// Memory address from which this AFU will read.
t_ccip_clAddr read_mem_addr;
//logic start_traversal = 'b0;
//t_ccip_clAddr start_traversal_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_read <= 1'b0;
end
else if (is_mem_addr_csr_read)
begin
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_read <= 1'b1;
//$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
// =========================================================================
//
// Main AFU logic
//
// =========================================================================
//
// States in our simple example.
//
//typedef enum logic [0:0]
typedef enum logic [1:0]
{
STATE_IDLE,
STATE_READ,
STATE_UPDATE,
STATE_WRITE
}
t_state;
t_state state;
//
// State machine
//
always_ff @(posedge clk)
begin
if (reset)
begin
state <= STATE_IDLE;
rd_end_of_list <= 1'b0;
end
else
begin
case (state)
STATE_IDLE:
begin
// Traversal begins when CSR 1 is written
if (start_read)
begin
state <= STATE_READ;
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
STATE_READ:
begin
$display("AFU in READ...");
$display("do_update is %d...",do_update);
$display("addr_next_valid is %d...",addr_next_valid);
$display("rd_needed is %d...",rd_needed);
if (!rd_needed && do_update)
begin
state <= STATE_UPDATE;
$display("AFU moving to UPDATE...");
end
end
STATE_UPDATE:
begin
// Update the read value to be written back
$display("AFU in UPDATE...");
if (!do_update)
begin
state <= STATE_WRITE;
wr_needed <= 1'b1;
$display("AFU moving to WRITE...");
end
end
STATE_WRITE:
begin
// Write the updated value to the address
// Point to new address after that
// if done then point to IDLE; else read new values
$display("AFU in WRITE...");
if (rd_end_of_list)
begin
state <= STATE_IDLE;
$display("AFU done...");
end
else if (!wr_needed)
begin
state <= STATE_READ;
$display("AFU moving to READ from WRITE...");
start_write <= 1'b0;
write_req <= 1'b0;
end
end
endcase
end
end
// =========================================================================
//
// Read logic.
//
// =========================================================================
//
// READ REQUEST
//
// Did a write response just arrive
// Next read address
always_ff @(posedge clk)
begin
// Next read address is valid when we have got the write response back
if (sRx.c1.rspValid)
begin
addr_next_valid <= sRx.c1.rspValid;
//if (state == STATE_READ && !rd_needed)
//begin
// Apurve: Next address is current address plus address length
//addr_next <= addr_next + addr_size;
addr_next <= (addr_next_valid ? rd_addr + 0 : rd_addr);
// End of list reached if we have read 5 times
rd_end_of_list <= (cnt_list_length == 'h5);
//end
end
end
//
// Since back pressure may prevent an immediate read request, we must
// record whether a read is needed and hold it until the request can
// be sent to the FIU.
//
always_ff @(posedge clk)
begin
if (reset)
begin
rd_needed <= 1'b0;
end
else
begin
// If reads are allowed this cycle then we can safely clear
// any previously requested reads. This simple AFU has only
// one read in flight at a time since it is walking a pointer
// chain.
if (rd_needed)
begin
//rd_needed <= sRx.c0TxAlmFull;
//rd_needed <= (!sRx.c0TxAlmFull && !sRx.c0.rspValid);
rd_needed <= !sRx.c0.rspValid;
end
else if (state == STATE_READ)
begin
// Need a read under two conditions:
// - Starting a new walk
// - A read response just arrived from a line containing
// a next pointer.
rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
rd_addr <= (start_read ? read_mem_addr : addr_next);
//$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr));
//$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr));
//$display("start read is %d", start_read);
end
end
end
//
// Emit read requests to the FIU.
//
// Read header defines the request to the FIU
t_ccip_c0_ReqMemHdr rd_hdr;
always_comb
begin
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
// Read request type (No intention to cache)
//rd_hdr.req_type = 4'h0;
// Virtual address (MPF virtual addressing is enabled)
rd_hdr.address = rd_addr;
// Read over channel VA
//rd_hdr.vc_sel = 2'h0;
// Read one cache line (64 bytes)
//rd_hdr.cl_len = 2'h0;
end
// Send read requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c0.valid <= 1'b0;
cnt_list_length <= 0;
read_req <= 1'b0;
end
else
begin
// Generate a read request when needed and the FIU isn't full
if (state == STATE_READ)
begin
sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull && !read_req);
if (rd_needed && !sRx.c0TxAlmFull && !read_req)
begin
sTx.c0.hdr <= rd_hdr;
cnt_list_length <= cnt_list_length + 1;
read_req <= 1'b1;
$display("Incrementing read count...%d",cnt_list_length);
$display("Read address is 0x%x...",rd_hdr.address);
addr_next_valid <= 1'b0;
// Apurve: Add something to stop read once this section has been accessed
//rd_needed <= 1'b0;
end
end
end
end
//
// READ RESPONSE HANDLING
//
//
// Receive data (read responses).
//
always_ff @(posedge clk)
begin
if (reset)
begin
do_update <= 1'b0;
end
else
begin
if (!do_update && sRx.c0.rspValid)
begin
rd_data <= sRx.c0.data;
do_update <= 1'b1;
$display("rd data is %d...",rd_data);
end
if ((state == STATE_UPDATE) && (do_update == 1'b1))
begin
// Update the read data and put it in the write data to be written
wr_data <= rd_data + 2;
do_update <= 1'b0;
read_req <= 1'b0;
$display("write data is %d...",wr_data);
// First read done. Next reads should be from the updated addresses
start_read <= 1'b0;
end
end
end
// =========================================================================
//
// Write logic.
//
// =========================================================================
//
// WRITE REQUEST
//
// Did a write response just arrive
// Next write address
always_ff @(posedge clk)
begin
if (sRx.c0.rspValid)
begin
// Next write address is valid when we have got the read response back
wr_addr_next_valid <= sRx.c0.rspValid;
//wr_addr_next_valid <= (!start_write && sRx.c0.rspValid);
//if (state == STATE_WRITE && !wr_needed)
//begin
// Apurve: Next address is current address plus address length
//wr_addr_next <= wr_addr + 0;
wr_addr_next <= (wr_addr_next_valid ? wr_addr + 0 : wr_addr);
//end
end
end
//
// Since back pressure may prevent an immediate write request, we must
// record whether a write is needed and hold it until the request can
// be sent to the FIU.
//
always_ff @(posedge clk)
begin
if (reset)
begin
wr_needed <= 1'b0;
end
else
begin
// If writes are allowed this cycle then we can safely clear
// any previously requested writes. This simple AFU has only
// one write in flight at a time since it is walking a pointer
// chain.
if (wr_needed)
begin
//wr_needed <= sRx.c1TxAlmFull;
//wr_needed <= (!sRx.c1TxAlmFull && !sRx.c1.rspValid);
wr_needed <= !sRx.c1.rspValid;
end
else
begin
// Need a write under two conditions:
// - Starting a new walk
// - A write response just arrived from a line containing
// a next pointer.
wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
end
end
end
//
// Emit write requests to the FIU.
//
// Write header defines the request to the FIU
t_ccip_c1_ReqMemHdr wr_hdr;
always_comb
begin
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
// Write request type
//wr_hdr.req_type = 4'h0;
// Virtual address (MPF virtual addressing is enabled)
wr_hdr.address = wr_addr;
// Let the FIU pick the channel
//wr_hdr.vc_sel = 2'h2;
// Write 1 cache line (64 bytes)
//wr_hdr.cl_len = 2'h0;
// Start of packet is true (single line write)
wr_hdr.sop = 1'b1;
end
// Send write requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c1.valid <= 1'b0;
write_req <= 1'b0;
end
else
begin
// Generate a write request when needed and the FIU isn't full
if (state == STATE_WRITE)
begin
sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull && !write_req);
if (wr_needed && !sRx.c1TxAlmFull && !write_req)
begin
sTx.c1.hdr <= wr_hdr;
sTx.c1.data <= t_ccip_clData'(wr_data);
write_req <= 1'b1;
wr_addr_next_valid <= 1'b0;
$display("Write address is 0x%x...", wr_hdr.address);
end
end
end
end
//
// WRITE RESPONSE HANDLING
//
// Apurve: Check if a signal is to be sent to read to start reading in case
// write response does not work
//
// Send data (write requests).
//
//always_ff @(posedge clk)
//begin
// if (state == STATE_WRITE)
// begin
// rd_data <= sRx.c0.data;
// end
// if (state == STATE_UPDATE)
// begin
// // Update the write data and put it in the write data to be written
// wr_data <= rd_data + 1;
// end
//end
endmodule

View File

@@ -0,0 +1,621 @@
//
// Copyright (c) 2017, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// Neither the name of the Intel Corporation nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// Read from the memory locations first and then write to the memory locations
`include "platform_if.vh"
`include "afu_json_info.vh"
module ccip_std_afu
(
// CCI-P Clocks and Resets
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
// Interface structures
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
);
//
// Run the entire design at the standard CCI-P frequency (400 MHz).
//
logic clk;
assign clk = pClk;
logic reset;
assign reset = pck_cp2af_softReset;
logic [511:0] wr_data;
logic [511:0] rd_data;
logic do_update;
logic start_read;
logic start_write;
logic wr_addr_next_valid;
logic addr_next_valid;
logic rd_end_of_list;
logic rd_needed;
logic wr_needed;
logic [15:0] cnt_list_length;
t_ccip_clAddr rd_addr;
t_ccip_clAddr wr_addr;
t_ccip_clAddr addr_next;
t_ccip_clAddr wr_addr_next;
// =========================================================================
//
// Register requests.
//
// =========================================================================
//
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
// We also assign pck_af2cp_sTx to sTx here but don't register it.
// The code below never uses combinational logic to write sTx.
//
t_if_ccip_Rx sRx;
always_ff @(posedge clk)
begin
sRx <= pck_cp2af_sRx;
end
t_if_ccip_Tx sTx;
assign pck_af2cp_sTx = sTx;
// =========================================================================
//
// CSR (MMIO) handling.
//
// =========================================================================
// The AFU ID is a unique ID for a given program. Here we generated
// one with the "uuidgen" program and stored it in the AFU's JSON file.
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
// to extract the UUID into afu_json_info.vh.
logic [127:0] afu_id = `AFU_ACCEL_UUID;
//
// A valid AFU must implement a device feature list, starting at MMIO
// address 0. Every entry in the feature list begins with 5 64-bit
// words: a device feature header, two AFU UUID words and two reserved
// words.
//
// Is a CSR read request active this cycle?
logic is_csr_read;
assign is_csr_read = sRx.c0.mmioRdValid;
// Is a CSR write request active this cycle?
logic is_csr_write;
assign is_csr_write = sRx.c0.mmioWrValid;
// The MMIO request header is overlayed on the normal c0 memory read
// response data structure. Cast the c0Rx header to an MMIO request
// header.
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
//
// Implement the device feature list by responding to MMIO reads.
//
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c2.mmioRdValid <= 1'b0;
end
else
begin
// Always respond with something for every read request
sTx.c2.mmioRdValid <= is_csr_read;
// The unique transaction ID matches responses to requests
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
// Addresses are of 32-bit objects in MMIO space. Addresses
// of 64-bit objects are thus multiples of 2.
case (mmio_req_hdr.address)
0: // AFU DFH (device feature header)
begin
// Here we define a trivial feature list. In this
// example, our AFU is the only entry in this list.
sTx.c2.data <= t_ccip_mmioData'(0);
// Feature type is AFU
sTx.c2.data[63:60] <= 4'h1;
// End of list (last entry in list)
sTx.c2.data[40] <= 1'b1;
end
// AFU_ID_L
2: sTx.c2.data <= afu_id[63:0];
// AFU_ID_H
4: sTx.c2.data <= afu_id[127:64];
// DFH_RSVD0
6: sTx.c2.data <= t_ccip_mmioData'(0);
// DFH_RSVD1
8: sTx.c2.data <= t_ccip_mmioData'(0);
// Updated by apurve to check fpgaReadMMIO
10: sTx.c2.data <= t_ccip_mmioData'(start_read);
default: sTx.c2.data <= t_ccip_mmioData'(0);
endcase
end
end
//
// CSR write handling. Host software must tell the AFU the memory address
// to which it should be writing. The address is set by writing a CSR.
//
// We use MMIO address 0 to set the memory address. The read and
// write MMIO spaces are logically separate so we are free to use
// whatever we like. This may not be good practice for cleanly
// organizing the MMIO address space, but it is legal.
logic is_mem_addr_csr_write;
assign is_mem_addr_csr_write = is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
// Memory address to which this AFU will write.
t_ccip_clAddr write_mem_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_write <= 1'b0;
end
else if (is_mem_addr_csr_write)
begin
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_write <= 1'b1;
//$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
end
end
// We use MMIO address 8 to set the memory address for reading data.
logic is_mem_addr_csr_read;
assign is_mem_addr_csr_read = is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(2));
// Memory address from which this AFU will read.
t_ccip_clAddr read_mem_addr;
//logic start_traversal = 'b0;
//t_ccip_clAddr start_traversal_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_read <= 1'b0;
end
else if (is_mem_addr_csr_read)
begin
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_read <= 1'b1;
//$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
// =========================================================================
//
// Main AFU logic
//
// =========================================================================
//
// States in our simple example.
//
//typedef enum logic [0:0]
typedef enum logic [1:0]
{
STATE_IDLE,
STATE_READ,
STATE_UPDATE,
STATE_WRITE
}
t_state;
t_state state;
//
// State machine
//
always_ff @(posedge clk)
begin
if (reset)
begin
state <= STATE_IDLE;
rd_end_of_list <= 1'b0;
end
else
begin
case (state)
STATE_IDLE:
begin
// Traversal begins when CSR 1 is written
if (start_read)
begin
state <= STATE_READ;
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
STATE_READ:
begin
$display("AFU in READ...");
if (!rd_needed && do_update)
begin
state <= STATE_UPDATE;
$display("AFU moving to UPDATE...");
end
end
STATE_UPDATE:
begin
// Update the read value to be written back
$display("AFU in UPDATE...");
if (!do_update)
begin
state <= STATE_WRITE;
wr_needed <= 1'b1;
$display("AFU moving to WRITE...");
end
end
STATE_WRITE:
begin
// Write the updated value to the address
// Point to new address after that
// if done then point to IDLE; else read new values
$display("AFU in WRITE...");
if (rd_end_of_list)
begin
state <= STATE_IDLE;
$display("AFU done...");
end
else if (!wr_needed)
begin
state <= STATE_READ;
$display("AFU moving to READ from WRITE...");
start_write <= 1'b0;
end
end
endcase
end
end
// =========================================================================
//
// Read logic.
//
// =========================================================================
//
// READ REQUEST
//
// Did a write response just arrive
// Next read address
always_ff @(posedge clk)
begin
// Next read address is valid when we have got the write response back
addr_next_valid <= sRx.c1.rspValid;
// Apurve: Next address is current address plus address length
//addr_next <= addr_next + addr_size;
addr_next <= rd_addr + 0;
// End of list reached if we have read 5 times
rd_end_of_list <= (cnt_list_length == 'h5);
end
//
// Since back pressure may prevent an immediate read request, we must
// record whether a read is needed and hold it until the request can
// be sent to the FIU.
//
always_ff @(posedge clk)
begin
if (reset)
begin
rd_needed <= 1'b0;
end
else
begin
// If reads are allowed this cycle then we can safely clear
// any previously requested reads. This simple AFU has only
// one read in flight at a time since it is walking a pointer
// chain.
if (rd_needed)
begin
rd_needed <= sRx.c0TxAlmFull;
end
else
begin
// Need a read under two conditions:
// - Starting a new walk
// - A read response just arrived from a line containing
// a next pointer.
rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
rd_addr <= (start_read ? read_mem_addr : addr_next);
//$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr));
//$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr));
//$display("start read is %d", start_read);
end
end
end
//
// Emit read requests to the FIU.
//
// Read header defines the request to the FIU
t_ccip_c0_ReqMemHdr rd_hdr;
always_comb
begin
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
// Read request type (No intention to cache)
//rd_hdr.req_type = 4'h0;
// Virtual address (MPF virtual addressing is enabled)
rd_hdr.address = rd_addr;
// Read over channel VA
//rd_hdr.vc_sel = 2'h0;
// Read one cache line (64 bytes)
//rd_hdr.cl_len = 2'h0;
end
// Send read requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c0.valid <= 1'b0;
cnt_list_length <= 0;
end
else
begin
// Generate a read request when needed and the FIU isn't full
if (state == STATE_READ)
begin
sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull);
if (rd_needed && !sRx.c0TxAlmFull)
begin
sTx.c0.hdr <= rd_hdr;
cnt_list_length <= cnt_list_length + 1;
$display("Incrementing read count...%d",cnt_list_length);
$display("Read address is 0x%x...",rd_hdr.address);
// Apurve: Add something to stop read once this section has been accessed
end
end
end
end
//
// READ RESPONSE HANDLING
//
//
// Receive data (read responses).
//
always_ff @(posedge clk)
begin
if (reset)
begin
do_update <= 1'b0;
end
else
begin
if (sRx.c0.rspValid)
begin
rd_data <= sRx.c0.data;
do_update <= 1'b1;
//$display("rd data is %d...",rd_data);
end
if (state == STATE_UPDATE)
begin
// Update the read data and put it in the write data to be written
wr_data <= rd_data + 2;
do_update <= 1'b0;
$display("write data is %d...",wr_data);
// First read done. Next reads should be from the updated addresses
start_read <= 1'b0;
end
end
end
// =========================================================================
//
// Write logic.
//
// =========================================================================
//
// WRITE REQUEST
//
// Did a write response just arrive
// Next write address
always_ff @(posedge clk)
begin
// Next write address is valid when we have got the read response back
wr_addr_next_valid <= sRx.c0.rspValid;
// Apurve: Next address is current address plus address length
wr_addr_next <= wr_addr + 0;
end
//
// Since back pressure may prevent an immediate write request, we must
// record whether a write is needed and hold it until the request can
// be sent to the FIU.
//
always_ff @(posedge clk)
begin
if (reset)
begin
wr_needed <= 1'b0;
end
else
begin
// If writes are allowed this cycle then we can safely clear
// any previously requested writes. This simple AFU has only
// one write in flight at a time since it is walking a pointer
// chain.
if (wr_needed)
begin
wr_needed <= sRx.c1TxAlmFull;
end
else
begin
// Need a write under two conditions:
// - Starting a new walk
// - A write response just arrived from a line containing
// a next pointer.
wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
end
end
end
//
// Emit write requests to the FIU.
//
// Write header defines the request to the FIU
t_ccip_c1_ReqMemHdr wr_hdr;
always_comb
begin
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
// Write request type
//wr_hdr.req_type = 4'h0;
// Virtual address (MPF virtual addressing is enabled)
wr_hdr.address = wr_addr;
// Let the FIU pick the channel
//wr_hdr.vc_sel = 2'h2;
// Write 1 cache line (64 bytes)
//wr_hdr.cl_len = 2'h0;
// Start of packet is true (single line write)
wr_hdr.sop = 1'b1;
end
// Send write requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c1.valid <= 1'b0;
end
else
begin
// Generate a write request when needed and the FIU isn't full
if (state == STATE_WRITE)
begin
sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull);
if (wr_needed && !sRx.c1TxAlmFull)
begin
sTx.c1.hdr <= wr_hdr;
sTx.c1.data <= t_ccip_clData'(wr_data);
end
end
end
end
//
// WRITE RESPONSE HANDLING
//
// Apurve: Check if a signal is to be sent to read to start reading in case
// write response does not work
//
// Send data (write requests).
//
//always_ff @(posedge clk)
//begin
// if (state == STATE_WRITE)
// begin
// rd_data <= sRx.c0.data;
// end
// if (state == STATE_UPDATE)
// begin
// // Update the write data and put it in the write data to be written
// wr_data <= rd_data + 1;
// end
//end
endmodule

View File

@@ -0,0 +1,2 @@
cci_hello.json
cci_hello_afu.sv

View File

@@ -0,0 +1,11 @@
#!/bin/sh
##
## Setup ASE environment using ../rtl/sources.txt.
##
# Absolute path to this script
SCRIPT=$(readlink -f "$0")
SCRIPT_PATH=$(dirname "$SCRIPT")
afu_sim_setup --sources="${SCRIPT_PATH}/../rtl/sources.txt" $@

View File

@@ -0,0 +1,41 @@
include ../../common/sw/common_include.mk
# Primary test name
TEST = cci_hello
# Build directory
OBJDIR = obj
CFLAGS += -I./$(OBJDIR)
CPPFLAGS += -I./$(OBJDIR)
# Files and folders
SRCS = $(TEST).c
OBJS = $(addprefix $(OBJDIR)/,$(patsubst %.c,%.o,$(SRCS)))
# Targets (build only $(TEST)_ase by default)
all: $(TEST) $(TEST)_ase
# AFU info from JSON file, including AFU UUID
AFU_JSON_INFO = $(OBJDIR)/afu_json_info.h
$(AFU_JSON_INFO): ../hw/rtl/$(TEST).json | objdir
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
$(OBJS): $(AFU_JSON_INFO)
$(TEST): $(OBJS)
$(CC) -o $@ $^ $(LDFLAGS) $(FPGA_LIBS)
$(TEST)_ase: $(OBJS)
$(CC) -o $@ $^ $(LDFLAGS) $(ASE_LIBS)
$(OBJDIR)/%.o: %.c | objdir
$(CC) $(CFLAGS) -c $< -o $@
clean:
rm -rf $(TEST) $(TEST)_ase $(OBJDIR)
objdir:
@mkdir -p $(OBJDIR)
.PHONY: all clean

View File

@@ -0,0 +1,210 @@
//
// Copyright (c) 2017, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// Neither the name of the Intel Corporation nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <uuid/uuid.h>
#include <opae/fpga.h>
// State from the AFU's JSON file, extracted using OPAE's afu_json_mgr script
#include "afu_json_info.h"
#define CACHELINE_BYTES 64
#define CL(x) ((x) * CACHELINE_BYTES)
//
// Search for an accelerator matching the requested UUID and connect to it.
//
static fpga_handle connect_to_accel(const char *accel_uuid)
{
fpga_properties filter = NULL;
fpga_guid guid;
fpga_token accel_token;
uint32_t num_matches;
fpga_handle accel_handle;
fpga_result r;
// Don't print verbose messages in ASE by default
//setenv("ASE_LOG", "0", 0);
// Set up a filter that will search for an accelerator
fpgaGetProperties(NULL, &filter);
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
// Add the desired UUID to the filter
uuid_parse(accel_uuid, guid);
fpgaPropertiesSetGUID(filter, guid);
// Do the search across the available FPGA contexts
num_matches = 1;
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
// Not needed anymore
fpgaDestroyProperties(&filter);
if (num_matches < 1)
{
fprintf(stderr, "Accelerator %s not found!\n", accel_uuid);
return 0;
}
// Open accelerator
r = fpgaOpen(accel_token, &accel_handle, 0);
assert(FPGA_OK == r);
// Done with token
fpgaDestroyToken(&accel_token);
return accel_handle;
}
//
// Allocate a buffer in I/O memory, shared with the FPGA.
//
static volatile void* alloc_buffer(fpga_handle accel_handle,
ssize_t size,
uint64_t *wsid,
uint64_t *io_addr)
{
fpga_result r;
volatile void* buf;
r = fpgaPrepareBuffer(accel_handle, size, (void*)&buf, wsid, 0);
if (FPGA_OK != r) return NULL;
// Get the physical address of the buffer in the accelerator
r = fpgaGetIOAddress(accel_handle, *wsid, io_addr);
assert(FPGA_OK == r);
return buf;
}
int main(int argc, char *argv[])
{
fpga_handle accel_handle;
volatile char *buf;
volatile char *buf_r;
uint64_t wsid1;
uint64_t wsid2;
uint64_t buf_pa;
uint64_t ret_buf_pa;
uint64_t buf_rpa;
uint64_t ret_buf_rpa;
fpga_result r;
// Find and connect to the accelerator
accel_handle = connect_to_accel(AFU_ACCEL_UUID);
// Allocate a single page memory buffer for write
buf = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
&wsid1, &buf_pa);
// Allocate a single page memory buffer for read
buf_r = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
&wsid2, &buf_rpa);
assert(NULL != buf);
//// Set the low byte of the shared buffer to 0. The FPGA will write
//// a non-zero value to it.
//buf[0] = 0;
// Set the low byte of the shared buffer buf_r to 0. The FPGA will read
// the values and write to buf address
buf[0] = 5;
buf_r[0] = 5;
// Tell the accelerator the address of the buffer using cache line
// addresses. The accelerator will respond by writing to the buffer.
r = fpgaWriteMMIO64(accel_handle, 0, 0, buf_pa / CL(1));
printf("Write address is %08lx\n", buf_pa);
printf("Write address div 64 is %08lx\n", buf_pa/ CL(1));
assert(FPGA_OK == r);
// Wait for response from FPGA. Check using fpgaReadMMIO
//r = fpgaReadMMIO64(accel_handle, 0, 0, &ret_buf_pa);
//printf("Returned write is %08lx\n", ret_buf_pa);
//assert(FPGA_OK == r);
///////////////////// Added to check fpgaRead
// Wait for response from FPGA. Check using fpgaReadMMIO
r = fpgaReadMMIO64(accel_handle, 0, 5 * sizeof(uint64_t), &ret_buf_rpa);
printf("Returned read at 10 is %08lx\n", ret_buf_rpa);
assert(FPGA_OK == r);
///////////////////////////////////////////////
// Tell the accelerator the address of the buffer using cache line
// addresses. The accelerator will read from the buffer.
// Write the address to MMIO 1
r = fpgaWriteMMIO64(accel_handle, 0, sizeof(uint64_t), buf_rpa / CL(1));
printf("Read address is %08lx\n", buf_rpa);
printf("Read address div64 is %08lx\n", buf_rpa / CL(1));
assert(FPGA_OK == r);
// Wait for response from FPGA. Check using fpgaReadMMIO
//r = fpgaReadMMIO64(accel_handle, 0, sizeof(uint64_t), &ret_buf_rpa);
//printf("Returned write is %08lx\n", ret_buf_rpa);
//assert(FPGA_OK == r);
// Update this
// Spin, waiting for the value in memory to change to something non-zero.
while (5 == buf[0])
{
// A well-behaved program would use _mm_pause(), nanosleep() or
// equivalent to save power here.
};
// Print the string written by the FPGA
printf("%d\n", buf[0]);
do {
//printf("%d\n", buf[0]);
} while (10 != buf[0]);
// Done
fpgaReleaseBuffer(accel_handle, wsid1);
fpgaReleaseBuffer(accel_handle, wsid2);
fpgaClose(accel_handle);
return 0;
}

View File

@@ -0,0 +1,13 @@
//
// Generated by afu_json_mgr from ../hw/rtl/cci_hello.json
//
#ifndef __AFU_JSON_INFO__
#define __AFU_JSON_INFO__
#define AFU_ACCEL_NAME "cci_hello"
#define AFU_ACCEL_UUID "C6AA954A-9B91-4A37-ABC1-1D9F0709DCC3"
#define AFU_IMAGE_POWER 0
#define AFU_TOP_IFC "ccip_std_afu"
#endif // __AFU_JSON_INFO__

Binary file not shown.

24
driver/hw/Makefile Normal file
View File

@@ -0,0 +1,24 @@
BUILD_DIR=build_sim
all: ase fpga
ase: build-setup
make -C $(BUILD_DIR)
fpga: build-setup
# TODO
build-setup: $(BUILD_DIR)/Makefile
$(BUILD_DIR)/Makefile:
afu_sim_setup --sources=sources.txt --platform discrete_pcie3 $(BUILD_DIR) -f
run-ase:
cd $(BUILD_DIR) && MENT_VSIM_OPT="-dpicpppath /usr/bin/gcc" make sim
run-fpga:
# TODO
clean:
rm -rf $(BUILD_DIR)

View File

@@ -0,0 +1,48 @@
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
// Register all interface signals
import ccip_if_pkg::*;
module ccip_interface_reg(
// CCI-P Clocks and Resets
input logic pClk, // 400MHz - CC-P clock domain. Primary Clock
input logic pck_cp2af_softReset_T0, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState_T0, // CCI-P AFU Power State
input logic pck_cp2af_error_T0, // CCI-P Protocol Error Detected
// Interface structures
input t_if_ccip_Rx pck_cp2af_sRx_T0, // CCI-P Rx Port
input t_if_ccip_Tx pck_af2cp_sTx_T0, // CCI-P Tx Port
output logic pck_cp2af_softReset_T1,
output logic [1:0] pck_cp2af_pwrState_T1,
output logic pck_cp2af_error_T1,
output t_if_ccip_Rx pck_cp2af_sRx_T1,
output t_if_ccip_Tx pck_af2cp_sTx_T1
);
(* preserve *) logic pck_cp2af_softReset_T0_q;
(* preserve *) logic [1:0] pck_cp2af_pwrState_T0_q;
(* preserve *) logic pck_cp2af_error_T0_q;
(* preserve *) t_if_ccip_Rx pck_cp2af_sRx_T0_q;
(* preserve *) t_if_ccip_Tx pck_af2cp_sTx_T0_q;
always@(posedge pClk)
begin
pck_cp2af_softReset_T0_q <= pck_cp2af_softReset_T0;
pck_cp2af_pwrState_T0_q <= pck_cp2af_pwrState_T0;
pck_cp2af_error_T0_q <= pck_cp2af_error_T0;
pck_cp2af_sRx_T0_q <= pck_cp2af_sRx_T0;
pck_af2cp_sTx_T0_q <= pck_af2cp_sTx_T0;
end
always_comb
begin
pck_cp2af_softReset_T1 = pck_cp2af_softReset_T0_q;
pck_cp2af_pwrState_T1 = pck_cp2af_pwrState_T0_q;
pck_cp2af_error_T1 = pck_cp2af_error_T0_q;
pck_cp2af_sRx_T1 = pck_cp2af_sRx_T0_q;
pck_af2cp_sTx_T1 = pck_af2cp_sTx_T0_q;
end
endmodule

172
driver/hw/ccip_std_afu.sv Normal file
View File

@@ -0,0 +1,172 @@
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
// Top Level Vortex Driver
// To be done:
// Check how to run this with OPAE. Looks like setup issue
`include "platform_if.vh"
import local_mem_cfg_pkg::*;
module ccip_std_afu
#(
parameter NUM_LOCAL_MEM_BANKS = 2
)
(
// CCI-P Clocks and Resets
input logic pClk, // Primary CCI-P interface clock.
input logic pClkDiv2, // Aligned, pClk divided by 2.
input logic pClkDiv4, // Aligned, pClk divided by 4.
input logic uClk_usr, // User clock domain. Refer to clock programming guide.
input logic uClk_usrDiv2, // Aligned, user clock divided by 2.
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
// CCI-P structures
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
output t_if_ccip_Tx pck_af2cp_sTx, // CCI-P Tx Port
// Local memory interface
avalon_mem_if.to_fiu local_mem[NUM_LOCAL_MEM_BANKS]
);
// ====================================================================
// Pick the proper clk and reset, as chosen by the AFU's JSON file
// ====================================================================
// The platform may transform the CCI-P clock from pClk to a clock
// chosen in the AFU's JSON file.
logic clk;
assign clk = `PLATFORM_PARAM_CCI_P_CLOCK;
logic reset;
assign reset = `PLATFORM_PARAM_CCI_P_RESET;
// ====================================================================
// Register signals at interface before consuming them
// ====================================================================
(* noprune *) logic [1:0] cp2af_pwrState_T1;
(* noprune *) logic cp2af_error_T1;
logic reset_T1;
t_if_ccip_Rx cp2af_sRx_T1;
t_if_ccip_Tx af2cp_sTx_T0;
ccip_interface_reg inst_green_ccip_interface_reg
(
.pClk (clk),
.pck_cp2af_softReset_T0 (reset),
.pck_cp2af_pwrState_T0 (pck_cp2af_pwrState),
.pck_cp2af_error_T0 (pck_cp2af_error),
.pck_cp2af_sRx_T0 (pck_cp2af_sRx),
.pck_af2cp_sTx_T0 (af2cp_sTx_T0),
.pck_cp2af_softReset_T1 (reset_T1),
.pck_cp2af_pwrState_T1 (cp2af_pwrState_T1),
.pck_cp2af_error_T1 (cp2af_error_T1),
.pck_cp2af_sRx_T1 (cp2af_sRx_T1),
.pck_af2cp_sTx_T1 (pck_af2cp_sTx)
);
// ====================================================================
// User AFU goes here
// ====================================================================
//
// vortex_afu depends on CCI-P and local memory being in the same
// clock domain. This is accomplished by choosing a common clock
// in the AFU's JSON description. The platform instantiates clock-
// crossing shims automatically, as needed.
//
//
// Memory banks are used very simply here. Only bank is active at
// a time, selected by mem_bank_select. mem_bank_select is set
// by a CSR from the host.
//
t_local_mem_byte_mask avs_byteenable;
logic avs_waitrequest;
t_local_mem_data avs_readdata;
logic avs_readdatavalid;
t_local_mem_burst_cnt avs_burstcount;
t_local_mem_data avs_writedata;
t_local_mem_addr avs_address;
logic avs_write;
logic avs_read;
// choose which memory bank to test
logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select;
vortex_afu
#(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
)
hello_mem_afu_inst
(
.clk (clk),
.SoftReset (reset_T1),
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
.avs_waitrequest (avs_waitrequest),
.avs_write (avs_write),
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid (avs_readdatavalid),
.mem_bank_select (mem_bank_select),
.cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0)
);
//
// Export the local memory interface signals as vectors so that bank
// selection can use array syntax.
//
logic avs_waitrequest_v[NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_readdata_v[NUM_LOCAL_MEM_BANKS];
logic avs_readdatavalid_v[NUM_LOCAL_MEM_BANKS];
genvar b;
generate
for (b = 0; b < NUM_LOCAL_MEM_BANKS; b = b + 1)
begin : lmb
always_comb
begin
// Local memory to AFU signals
avs_waitrequest_v[b] = local_mem[b].waitrequest;
avs_readdata_v[b] = local_mem[b].readdata;
avs_readdatavalid_v[b] = local_mem[b].readdatavalid;
// Replicate address and write data to all banks. Only
// the request signals have to be bank-specific.
local_mem[b].burstcount = avs_burstcount;
local_mem[b].writedata = avs_writedata;
local_mem[b].address = avs_address;
local_mem[b].byteenable = avs_byteenable;
// Request a write to this bank?
local_mem[b].write = avs_write &&
($bits(mem_bank_select)'(b) == mem_bank_select);
// Request a read from this bank?
local_mem[b].read = avs_read &&
($bits(mem_bank_select)'(b) == mem_bank_select);
end
end
endgenerate
assign avs_waitrequest = avs_waitrequest_v[mem_bank_select];
assign avs_readdata = avs_readdata_v[mem_bank_select];
assign avs_readdatavalid = avs_readdatavalid_v[mem_bank_select];
endmodule

116
driver/hw/sources.txt Normal file
View File

@@ -0,0 +1,116 @@
vortex_afu.json
+incdir+.
+incdir+../../rtl
+incdir+../../rtl/shared_memory
+incdir+../../rtl/cache
+incdir+../../rtl/VX_cache
+incdir+../../rtl/interfaces
+incdir+../../rtl/pipe_regs
+incdir+../../rtl/compat
../../rtl/VX_define_synth.v
../../rtl/VX_define.v
../../rtl/VX_cache/VX_cache_config.v
../../rtl/Vortex_SOC.v
../../rtl/Vortex.v
../../rtl/VX_front_end.v
../../rtl/VX_back_end.v
../../rtl/VX_fetch.v
../../rtl/VX_scheduler.v
../../rtl/VX_execute_unit.v
../../rtl/VX_warp.v
../../rtl/VX_icache_stage.v
../../rtl/VX_gpr_wrapper.v
../../rtl/byte_enabled_simple_dual_port_ram.v
../../rtl/VX_gpgpu_inst.v
../../rtl/VX_writeback.v
../../rtl/VX_countones.v
../../rtl/VX_csr_handler.v
../../rtl/VX_csr_pipe.v
../../rtl/VX_generic_queue_ll.v
../../rtl/VX_warp_scheduler.v
../../rtl/VX_priority_encoder.v
../../rtl/VX_generic_queue.v
../../rtl/pipe_regs/VX_f_d_reg.v
../../rtl/pipe_regs/VX_i_d_reg.v
../../rtl/pipe_regs/VX_d_e_reg.v
../../rtl/VX_gpr.v
../../rtl/VX_gpr_stage.v
../../rtl/VX_dmem_controller.v
../../rtl/VX_alu.v
../../rtl/VX_generic_stack.v
../../rtl/VX_generic_priority_encoder.v
../../rtl/VX_csr_data.v
../../rtl/VX_lsu.v
../../rtl/VX_decode.v
../../rtl/VX_inst_multiplex.v
../../rtl/VX_csr_wrapper.v
../../rtl/VX_priority_encoder_w_mask.v
../../rtl/VX_generic_register.v
../../rtl/VX_lsu_addr_gen.v
../../rtl/compat/VX_mult.v
../../rtl/compat/VX_divide.v
../../rtl/VX_cache/VX_snp_fwd_arb.v
../../rtl/VX_cache/VX_cache_dram_req_arb.v
../../rtl/VX_cache/VX_cache_dfq_queue.v
../../rtl/VX_cache/VX_cache_wb_sel_merge.v
../../rtl/VX_cache/VX_mrv_queue.v
../../rtl/VX_cache/VX_dcache_llv_resp_bank_sel.v
../../rtl/VX_cache/VX_tag_data_access.v
../../rtl/VX_cache/VX_cache.v
../../rtl/VX_cache/VX_cache_core_req_bank_sel.v
../../rtl/VX_cache/VX_cache_req_queue.v
../../rtl/VX_cache/VX_bank.v
../../rtl/VX_cache/VX_cache_miss_resrv.v
../../rtl/VX_cache/VX_fill_invalidator.v
../../rtl/VX_cache/VX_tag_data_structure.v
../../rtl/cache/VX_generic_pe.v
../../rtl/cache/cache_set.v
../../rtl/cache/VX_d_cache.v
../../rtl/cache/VX_Cache_Bank.v
../../rtl/cache/VX_cache_data_per_index.v
../../rtl/cache/VX_d_cache_encapsulate.v
../../rtl/cache/VX_cache_bank_valid.v
../../rtl/cache/VX_cache_data.v
../../rtl/shared_memory/VX_shared_memory_block.v
../../rtl/shared_memory/VX_priority_encoder_sm.v
../../rtl/shared_memory/VX_shared_memory.v
../../rtl/shared_memory/VX_bank_valids.v
../../rtl/interfaces/VX_exec_unit_req_inter.v
../../rtl/interfaces/VX_branch_response_inter.v
../../rtl/interfaces/VX_inst_meta_inter.v
../../rtl/interfaces/VX_join_inter.v
../../rtl/interfaces/VX_icache_response_inter.v
../../rtl/interfaces/VX_gpr_wspawn_inter.v
../../rtl/interfaces/VX_inst_exec_wb_inter.v
../../rtl/interfaces/VX_gpu_dcache_dram_req_inter.v
../../rtl/interfaces/VX_csr_req_inter.v
../../rtl/interfaces/VX_icache_request_inter.v
../../rtl/interfaces/VX_gpu_dcache_res_inter.v
../../rtl/interfaces/VX_frE_to_bckE_req_inter.v
../../rtl/interfaces/VX_dram_req_rsp_inter.v
../../rtl/interfaces/VX_dcache_request_inter.v
../../rtl/interfaces/VX_gpr_data_inter.v
../../rtl/interfaces/VX_dcache_response_inter.v
../../rtl/interfaces/VX_csr_wb_inter.v
../../rtl/interfaces/VX_gpu_dcache_req_inter.v
../../rtl/interfaces/VX_lsu_req_inter.v
../../rtl/interfaces/VX_gpu_snp_req_rsp.v
../../rtl/interfaces/VX_mw_wb_inter.v
../../rtl/interfaces/VX_gpr_jal_inter.v
../../rtl/interfaces/VX_gpu_inst_req_inter.v
../../rtl/interfaces/VX_wstall_inter.v
../../rtl/interfaces/VX_wb_inter.v
../../rtl/interfaces/VX_gpr_clone_inter.v
../../rtl/interfaces/VX_gpr_read_inter.v
../../rtl/interfaces/VX_mem_req_inter.v
../../rtl/interfaces/VX_jal_response_inter.v
../../rtl/interfaces/VX_warp_ctl_inter.v
../../rtl/interfaces/VX_gpu_dcache_snp_req_inter.v
../../rtl/interfaces/VX_gpu_dcache_dram_res_inter.v
../../rtl/interfaces/VX_inst_mem_wb_inter.v
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv

37
driver/hw/vortex_afu.json Normal file
View File

@@ -0,0 +1,37 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto",
"clock-frequency-low": "auto",
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

638
driver/hw/vortex_afu.sv Normal file
View File

@@ -0,0 +1,638 @@
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
// Interface between CSR and FSM
// All the MMIOs read/write are done from CSR and passed to the FSM for state transitions
// To be done:
// Change address size to buffer's address size and data size based on IO address size. Check from hello_world
`include "platform_if.vh"
import local_mem_cfg_pkg::*;
`include "afu_json_info.vh"
module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2
) (
// global signals
input clk,
input SoftReset,
// IF signals between CCI and AFU
input t_if_ccip_Rx cp2af_sRxPort,
output t_if_ccip_Tx af2cp_sTxPort,
// Avalong signals for local memory access
output t_local_mem_data avs_writedata,
input t_local_mem_data avs_readdata,
output t_local_mem_addr avs_address,
input logic avs_waitrequest,
output logic avs_write,
output logic avs_read,
output t_local_mem_byte_mask avs_byteenable,
output t_local_mem_burst_cnt avs_burstcount,
input avs_readdatavalid,
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
);
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
localparam MEM_ADDRESS = 16'h0040; // AVMM Master Address
localparam MEM_BURSTCOUNT = 16'h0042; // AVMM Master Burst Count
localparam MEM_RDWR = 16'h0044; // AVMM Master Read/Write
localparam MEM_BANK_SELECT = 16'h0064; // Memory bank selection register
localparam READY_FOR_SW_CMD = 16'h0066; // "Ready for sw cmd" register. S/w must poll this register before issuing a read/write command to fsm
localparam MEM_BYTEENABLE = 16'h0068; // Test byteenable
// Added by Apurve to supporead and writeChange address size to buffer's address size
localparam DATA_SIZE = 16'h0046; // MMIO set by SW to denote the size od data to read/write
localparam BUFFER_IO_ADDRESS = 16'h0048; // MMIO set by SW to denote the buffer address space
logic [127:0] afu_id = `AFU_ACCEL_UUID;
// cast c0 header into ReqMmioHdr
t_ccip_c0_ReqMmioHdr mmioHdr;
assign mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
logic [2:0] mem_RDWR = '0;
//--
logic ready_for_sw_cmd;
logic run_vortex;
logic [15:0] avm_data_size;
t_ccip_clAddr avm_write_buffer_address;
t_ccip_clAddr avm_read_buffer_address;
logic avm_read;
logic avm_write;
t_local_mem_addr avm_address;
t_local_mem_burst_cnt avm_burstcount;
t_local_mem_byte_mask avm_byteenable;
// Vortex signals
logic vx_reset;
logic vx_dram_req;
logic vx_dram_req_write;
logic vx_dram_req_read;
logic vx_ebreak;
logic [31:0] vx_dram_req_addr;
logic [31:0] vx_local_addr;
logic [31:0] vx_dram_req_size;
logic [31:0] vx_count;
logic vx_dram_fill_rsp;
logic [31:0] vx_dram_req_data[15:0];
logic [31:0] vx_dram_fill_rsp_data[15:0];
logic vx_dram_fill_accept;
logic [31:0] vx_dram_fill_rsp_addr;
logic [31:0] vx_dram_expected_lat;
//
// MMIO control threads
//
always@(posedge clk) begin
if(SoftReset) begin
af2cp_sTxPort.c2.hdr <= '0;
af2cp_sTxPort.c2.data <= '0;
af2cp_sTxPort.c2.mmioRdValid <= '0;
avm_address <= '0;
avm_read <= '0;
avm_write <= '0;
avm_burstcount <= 12'd1;
mem_RDWR <= '0;
mem_bank_select <= 1'b1;
// Change address size to buffer's address size
avm_data_size <= '0;
avm_write_buffer_address <= '0;
avm_read_buffer_address <= '0;
run_vortex <= '0;
end
else begin
af2cp_sTxPort.c2.mmioRdValid <= 0;
avm_read <= mem_RDWR[0] & mem_RDWR[1]; //[0] enable [1] 0-WR,1-RD
avm_write <= mem_RDWR[0] & !mem_RDWR[1];
// Added by Apurve. Run vortex whem RDWR is 7
run_vortex <= mem_RDWR[0] & mem_RDWR[1] & mem_RDWR[2];
// set the registers on MMIO write request
// these are user-defined AFU registers at offset 0x40 and 0x41
if(cp2af_sRxPort.c0.mmioWrValid == 1)
begin
case(mmioHdr.address)
MEM_ADDRESS: avm_address <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
MEM_BURSTCOUNT: avm_burstcount <= cp2af_sRxPort.c0.data[11:0];
MEM_RDWR: mem_RDWR <= cp2af_sRxPort.c0.data[2:0];
MEM_BANK_SELECT: mem_bank_select <= $bits(mem_bank_select)'(cp2af_sRxPort.c0.data);
// Added by Apurve to support read and write buffers. Change address size to buffer's address size
DATA_SIZE:avm_data_size <= cp2af_sRxPort.c0.data[15:0];
BUFFER_IO_ADDRESS: begin
avm_write_buffer_address <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
avm_read_buffer_address <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
end
endcase
end
// serve MMIO read requests
if(cp2af_sRxPort.c0.mmioRdValid == 1)
begin
af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID
case(mmioHdr.address)
// AFU header
16'h0000: af2cp_sTxPort.c2.data <= {
4'b0001, // Feature type = AFU
8'b0, // reserved
4'b0, // afu minor revision = 0
7'b0, // reserved
1'b1, // end of DFH list = 1
24'b0, // next DFH offset = 0
4'b0, // afu major revision = 0
12'b0 // feature ID = 0
};
AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low
AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi
16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU
16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved
MEM_ADDRESS: af2cp_sTxPort.c2.data <= 64'(avm_address);
MEM_BURSTCOUNT: af2cp_sTxPort.c2.data <= 64'(avm_burstcount);
MEM_RDWR: af2cp_sTxPort.c2.data <= {62'd0, mem_RDWR};
READY_FOR_SW_CMD: af2cp_sTxPort.c2.data <= ready_for_sw_cmd;
MEM_BANK_SELECT: af2cp_sTxPort.c2.data <= 64'(mem_bank_select);
default: af2cp_sTxPort.c2.data <= 64'h0;
endcase
af2cp_sTxPort.c2.mmioRdValid <= 1; // post response
end else
begin
if (avm_read | avm_write | run_vortex) mem_RDWR[0] <= 0;
end
end
end
// FSM
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
// Interface between CSR and FSM
// All the MMIOs read/write passed from csr are used for state transitions
// Read: local memory to shared buffer
// Write: shared buffer to local memory
// To be done:
// Review the FSM and implement read/write to shared buffer
// Vortex on/off signal
// check on byteenable and burst signals
//cp2af_sRxPort -> sRx
//af2cp_sTxPort -> sTx
typedef enum logic[3:0] { IDLE,
VX_REQ,
VX_WR_REQ,
VX_RD_REQ,
VX_RSP,
RD_REQ,
RD_RSP,
WR_REQ,
WR_RSP } state_t;
// Added by Apurve for shared memory space write/read
t_ccip_clAddr wr_addr;
t_ccip_clAddr rd_addr;
logic [15:0] count;
logic [15:0] count_rsp;
logic start_read;
logic start_write;
t_local_mem_addr local_address;
logic init_avs_read;
parameter ADDRESS_MAX_BIT = 10;
state_t state;
assign avs_burstcount = avm_burstcount;
t_local_mem_burst_cnt burstcount;
assign avs_byteenable = avm_byteenable;
always_ff @(posedge clk) begin
if(SoftReset) begin
local_address <= '0;
avs_write <= '0;
avs_read <= '0;
state <= IDLE;
burstcount <= 1;
ready_for_sw_cmd <= 0;
count <= 0;
count_rsp <= 0;
vx_reset <= 1'b0;
vx_count <= 0;
end
else begin
case(state)
IDLE: begin
ready_for_sw_cmd <= 1;
if (avm_write) begin
state <= WR_REQ;
ready_for_sw_cmd <= 0;
count <= 0;
count_rsp <= 0;
end else if (avm_read) begin
init_avs_read <= 1;
state <= RD_REQ;
ready_for_sw_cmd <= 0;
count <= 0;
count_rsp <= 0;
end else if (run_vortex) begin
state <= VX_REQ;
vx_reset <= 1'b1;
ready_for_sw_cmd <= 0;
end
end
WR_REQ: begin //AVL MM Posted Write
af2cp_sTxPort.c0.valid <= 1'b0;
avs_write <= 0;
if (~avs_waitrequest)
begin
if (count_rsp >= avm_data_size)
begin
state <= WR_RSP;
avs_write <= 0;
end
end
end
WR_RSP: begin // wait for write response
avm_byteenable <= 64'hffffffffffffffff;
state <= IDLE;
end
RD_REQ: begin // AVL MM Read non-posted
af2cp_sTxPort.c1.valid <= 1'b0;
if (~avs_waitrequest) begin
if (count_rsp >= avm_data_size)
begin
state <= RD_RSP;
avs_read <= 0;
end
end
end
RD_RSP: begin
state <= IDLE;
end
VX_REQ: begin
vx_reset <= 1'b0;
if (vx_dram_req_write) begin
vx_count <= 0;
avs_write <= 1'b1;
state <= VX_WR_REQ;
end
if (vx_dram_req_read) begin
vx_count <= 0;
avs_read <= 1'b1;
state <= VX_RD_REQ;
end
if (vx_ebreak) begin
state <= VX_RSP;
end
end
VX_WR_REQ: begin
avs_write <= 1'b0;
if (vx_count >= vx_dram_req_size)
begin
state <= VX_REQ;
vx_count <= 0;
end
end
VX_RD_REQ: begin
avs_read <= 1'b0;
vx_dram_fill_rsp <= 1'b0;
if (vx_count >= vx_dram_req_size)
begin
state <= VX_REQ;
vx_count <= 0;
end
end
VX_RSP: begin
vx_count <= 0;
state <= IDLE;
end
endcase
end // end else reset
end // posedge clk
// Vortex call
Vortex_SOC #()
vx_soc (
.clk (clk),
.reset (vx_reset),
// IO
//.io_valid[`NUMBER_CORES-1:0] (),
//.io_data [`NUMBER_CORES-1:0] (),
//.number_cores (),
// DRAM Dcache Req
.out_dram_req (vx_dram_req),
.out_dram_req_write (vx_dram_req_write),
.out_dram_req_read (vx_dram_req_read),
.out_dram_req_addr (vx_dram_req_addr),
.out_dram_req_size (vx_dram_req_size),
.out_dram_req_data (vx_dram_req_data),
.out_dram_expected_lat (vx_dram_expected_lat),
// DRAM Dcache Res
.out_dram_fill_accept (vx_dram_fill_accept),
.out_dram_fill_rsp (vx_dram_fill_rsp),
.out_dram_fill_rsp_addr (vx_dram_fill_rsp_addr),
.out_dram_fill_rsp_data (vx_dram_fill_rsp_data),
//.l3c_snp_req (),
//.l3c_snp_req_addr (),
//.l3c_snp_req_delay (),
.out_ebreak (vx_ebreak)
);
// Local memory read/write address
//assign avs_address = (vx_dram_req ? (vx_count ? vx_local_addr : vx_dram_req_addr) : (count ? local_address : avm_address));
assign avs_address = (((state == VX_WR_REQ) || (state == VX_RD_REQ)) ? (vx_count ? vx_local_addr : vx_dram_req_addr) : (count ? local_address : avm_address));
// Vortex DRAM requests and responses
// Handling of read/write data and vx_dram_req_size
// Is vx_dram_fill_accept for backpressure?
always_ff @(posedge clk) begin
if (state == VX_WR_REQ) begin
if (!avs_waitrequest & (vx_count < vx_dram_req_size)) begin
avs_write <= 1'b1;
//avs_writedata <= vx_dram_req_data;
avs_writedata[31:0] = vx_dram_req_data[0];
avs_writedata[63:32] = vx_dram_req_data[1];
avs_writedata[95:64] = vx_dram_req_data[2];
avs_writedata[127:96] = vx_dram_req_data[3];
avs_writedata[159:128] = vx_dram_req_data[4];
avs_writedata[191:160] = vx_dram_req_data[5];
avs_writedata[223:192] = vx_dram_req_data[6];
avs_writedata[255:224] = vx_dram_req_data[7];
avs_writedata[287:256] = vx_dram_req_data[8];
avs_writedata[319:288] = vx_dram_req_data[9];
avs_writedata[351:320] = vx_dram_req_data[10];
avs_writedata[383:352] = vx_dram_req_data[11];
avs_writedata[415:384] = vx_dram_req_data[12];
avs_writedata[447:416] = vx_dram_req_data[13];
avs_writedata[479:448] = vx_dram_req_data[14];
avs_writedata[511:480] = vx_dram_req_data[15];
vx_local_addr <= (vx_count ? vx_local_addr + 1 : vx_dram_req_addr + 1);
// Update the count value based on the number of bytes written
vx_count <= vx_count + 64;
if ((vx_dram_req_size - vx_count) < 64)
begin
avm_byteenable <= 64'hffffffffffffffff >> (64 - (vx_dram_req_size - vx_count));
end else
begin
avm_byteenable <= 64'hffffffffffffffff;
end
end
end
end
always_ff @(posedge clk) begin
//if (SoftReset) begin
if (vx_reset) begin
vx_dram_fill_rsp <= 1'b0;
//vx_dram_fill_rsp_data <= 0;
vx_dram_fill_rsp_data[0] <= 0;
vx_dram_fill_rsp_data[1] <= 0;
vx_dram_fill_rsp_data[2] <= 0;
vx_dram_fill_rsp_data[3] <= 0;
vx_dram_fill_rsp_data[4] <= 0;
vx_dram_fill_rsp_data[5] <= 0;
vx_dram_fill_rsp_data[6] <= 0;
vx_dram_fill_rsp_data[7] <= 0;
vx_dram_fill_rsp_data[8] <= 0;
vx_dram_fill_rsp_data[9] <= 0;
vx_dram_fill_rsp_data[10] <= 0;
vx_dram_fill_rsp_data[11] <= 0;
vx_dram_fill_rsp_data[12] <= 0;
vx_dram_fill_rsp_data[13] <= 0;
vx_dram_fill_rsp_data[14] <= 0;
vx_dram_fill_rsp_data[15] <= 0;
end
if (state == VX_RD_REQ) begin
if (avs_readdatavalid & vx_dram_fill_accept) begin
avs_read <= 1'b1;
vx_dram_fill_rsp <= 1'b1;
//vx_dram_fill_rsp_data <= avs_readdata;
vx_dram_fill_rsp_data[0] <= avs_readdata[31:0];
vx_dram_fill_rsp_data[1] <= avs_readdata[63:32];
vx_dram_fill_rsp_data[2] <= avs_readdata[95:64];
vx_dram_fill_rsp_data[3] <= avs_readdata[127:96];
vx_dram_fill_rsp_data[4] <= avs_readdata[159:128];
vx_dram_fill_rsp_data[5] <= avs_readdata[191:160];
vx_dram_fill_rsp_data[6] <= avs_readdata[223:192];
vx_dram_fill_rsp_data[7] <= avs_readdata[255:224];
vx_dram_fill_rsp_data[8] <= avs_readdata[287:256];
vx_dram_fill_rsp_data[9] <= avs_readdata[319:288];
vx_dram_fill_rsp_data[10] <= avs_readdata[351:320];
vx_dram_fill_rsp_data[11] <= avs_readdata[383:352];
vx_dram_fill_rsp_data[12] <= avs_readdata[415:384];
vx_dram_fill_rsp_data[13] <= avs_readdata[447:416];
vx_dram_fill_rsp_data[14] <= avs_readdata[479:448];
vx_dram_fill_rsp_data[15] <= avs_readdata[511:480];
vx_local_addr <= (vx_count ? vx_local_addr + 1 : vx_dram_req_addr + 1);
vx_dram_fill_rsp_addr <= vx_local_addr;
// Update the count value based on the number of bytes written
vx_count <= vx_count + 64;
end
end
end
// Read from local memory (avs_readdata) and write to shared space
// Implement write header
always_ff @(posedge clk) begin
if (state == RD_REQ & avs_readdatavalid & !cp2af_sRxPort.c1TxAlmFull & count < avm_data_size & !avs_waitrequest & start_write)
begin
wr_addr <= (count? wr_addr + 1 : avm_write_buffer_address + 1);
local_address <= (count? local_address + 1 : avm_address + 1);
start_write <= 1'b0;
end
end
// Write header defines the request to the FIU
t_ccip_c1_ReqMemHdr wr_hdr;
always_comb
begin
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
// Virtual address (MPF virtual addressing is enabled)
wr_hdr.address = (count? wr_addr: avm_write_buffer_address);
// Start of packet is true (single line write)
wr_hdr.sop = 1'b1;
end
// Send write requests to the FIU
always_ff @(posedge clk)
begin
if (SoftReset)
begin
af2cp_sTxPort.c1.hdr <= '0;
af2cp_sTxPort.c1.data <= '0;
af2cp_sTxPort.c1.valid <= '0;
end
// Generate a write request when needed and the FIU isn't full
if (state == RD_REQ & avs_readdatavalid & !cp2af_sRxPort.c1TxAlmFull & count < avm_data_size & !avs_waitrequest & start_write)
begin
af2cp_sTxPort.c1.hdr <= wr_hdr;
af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_readdata);
af2cp_sTxPort.c1.valid <= 1'b1;
start_write <= 1'b0;
count <= count + 64;
end
end
// Write response
always_ff @(posedge clk)
begin
if (SoftReset)
begin
start_write <= 1'b1;
end
// Generate a read request when needed and the FIU isn't full
if (state == RD_REQ & cp2af_sRxPort.c1.rspValid)
begin
count_rsp <= count_rsp + 64;
start_write <= 1'b1;
init_avs_read <= 1'b1;
end
end
// avs_read control
always_ff @(posedge clk)
begin
if (SoftReset)
begin
init_avs_read <= 1'b0;
end
if (init_avs_read & state <= RD_REQ)
begin
avs_read <= 1'b1;
init_avs_read <= 1'b0;
end else
begin
avs_read <= 1'b0;
end
end
// Write to local memory (avs_writedata) and read from shared space
// Implement read header
always_ff @(posedge clk) begin
if (SoftReset)
begin
rd_addr <= 0;
local_address <= 0;
end
if (state == WR_REQ & !cp2af_sRxPort.c0TxAlmFull & count < avm_data_size & !avs_waitrequest & start_read)
begin
// Read address + 1 gives address for next block. Each block is 64B
rd_addr <= (count? rd_addr + 1 : avm_read_buffer_address + 1);
local_address <= (count? local_address + 1 : avm_address);
start_read <= 1'b0;
end
end
// Read header defines the request to the FIU
t_ccip_c0_ReqMemHdr rd_hdr;
always_comb
begin
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
rd_hdr.address = (count? rd_addr : avm_read_buffer_address);
end
// Send read requests to the FIU
always_ff @(posedge clk)
begin
if (SoftReset)
begin
af2cp_sTxPort.c0.hdr <= '0;
af2cp_sTxPort.c0.valid <= '0;
end
// Generate a read request when needed and the FIU isn't full
if (state == WR_REQ & !cp2af_sRxPort.c0TxAlmFull & count < avm_data_size & !avs_waitrequest & start_read)
begin
af2cp_sTxPort.c0.hdr <= rd_hdr;
af2cp_sTxPort.c0.valid <= 1'b1;
start_read <= 1'b0;
count <= count + 64;
end
end
// Read response
always_ff @(posedge clk)
begin
if (SoftReset)
begin
start_read <= 1'b1;
avm_byteenable <= 64'hffffffffffffffff;
end
// Generate a read request when needed and the FIU isn't full
if (state == WR_REQ & cp2af_sRxPort.c0.rspValid)
begin
if ((avm_data_size - count_rsp) < 64)
begin
avm_byteenable <= 64'hffffffffffffffff >> (64 - (avm_data_size - count_rsp));
end else
begin
avm_byteenable <= 64'hffffffffffffffff;
end
avs_writedata <= cp2af_sRxPort.c0.data;
avs_write <= 1;
count_rsp <= count_rsp + 64;
start_read <= 1'b1;
end
end
endmodule

97
driver/opae_setup.sh Normal file
View File

@@ -0,0 +1,97 @@
## Required tools
# gcc (>4.9)
# libjson
# python
# Quartus
# RTL Simulator (VCS or ModelSim or QuestaSim)
## Download OPAE SDK from https://github.com/OPAE/opae-sdk/archive/1.4.0-1.tar.gz
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/
## Update the following file based on /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh
# ./opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh
###################################################################################################
################################### TO BE DONE EVERY TIME #########################################
###################################################################################################
## Change the shell to bash before running
bash
## Setup Environment
## Running the default script results in multiple versions of libcurl during cmake.
#source /nethome/achawda6/specialProblem/rg_intel_fpga_end_19.3.sh
source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh
## Setup the variables for using the Quartus modelsim
source /nethome/achawda6/specialProblem/modelsim_env.sh
## Run this to setup the environment variables
source /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh
## gcc version should be greater than 4.9 to support c++14
source /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/env_check.sh
export PATH=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall/bin:${PATH}
export FPGA_BBB_CCI_SRC=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb
####################################################################################################
## Setup OPAE
mkdir mybuild
cd mybuild
## Update the directory path where you want to install OPAE
cmake .. -DBUILD_ASE=1 -DCMAKE_INSTALL_PREFIX=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall
make
make install
## Setup ASE
## Add the installed OPAE path in PATH
export PATH=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall/bin:${PATH}
## Use this version of HDL files
/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/afu_sim_setup --sources=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/rtl/sources_ase_server.txt run1Build
cd run1Build/
python scripts/ipc_clean.py
## Running Sample
## Download opae-bbb from https://github.com/OPAE/intel-fpga-bbb
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1
git clone https://github.com/OPAE/intel-fpga-bbb
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb
mkdir mybuild
cd mybuild
cmake .. -DCMAKE_INSTALL_PREFIX=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall
make
make install
export FPGA_BBB_CCI_SRC=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb
## Running hello world
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb/samples/tutorial/01_hello_world
afu_sim_setup --source hw/rtl/sources.txt build_sim
cd build_sim
## Update libstdc++6 if it errors out
make
make sim

6
driver/set_env.sh Normal file
View File

@@ -0,0 +1,6 @@
source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh
export PATH=/tools/opae/1.4.0/bin:/tools/reconfig/intel/19.3/modelsim_ase/bin:$PATH
export LD_LIBRARY_PATH=/tools/opae/1.4.0/lib:$PATH
export QUARTUS_HOME=$QUARTUS_ROOTDIR
export MTI_HOME=/tools/reconfig/intel/19.3/modelsim_ase
export FPGA_FAMILY=arria10

19
driver/sw/Makefile Normal file
View File

@@ -0,0 +1,19 @@
all: opae rtlsim simx
opae:
$(MAKE) -C opae
rtlsim:
$(MAKE) -C rtlsim
simx:
$(MAKE) -C simx
clean:
$(MAKE) clean -C opae
$(MAKE) clean -C rtlsim
$(MAKE) clean -C simx
.PHONY: all opae rtlsim simx clean

View File

@@ -0,0 +1,67 @@
#ifndef __VX_DRIVER_H__
#define __VX_DRIVER_H__
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef void* vx_device_h;
typedef void* vx_buffer_h;
#define VX_LOCAL_MEM_SIZE 0xffffffff
#define VX_ALLOC_BASE_ADDR 0x10000000
#define VX_KERNEL_BASE_ADDR 0x80000000
#define VX_CACHE_LINESIZE 64
// open the device and connect to it
int vx_dev_open(vx_device_h* hdevice);
// Close the device when all the operations are done
int vx_dev_close(vx_device_h hdevice);
// Allocate shared buffer with device
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
// Get host pointer address
volatile void* vx_host_ptr(vx_buffer_h hbuffer);
// release buffer
int vx_buf_release(vx_buffer_h hbuffer);
// allocate device memory and return address
int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr);
// Copy bytes from device local memory to buffer
int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size);
// Copy bytes from buffer to device local memory
int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset);
// Copy bytes from device local memory to buffer
int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dst_offset);
// Start device execution
int vx_start(vx_device_h hdevice);
// Wait for device ready with milliseconds timeout
int vx_ready_wait(vx_device_h hdevice, long long timeout);
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
// upload kernel bytes to device
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size);
// upload kernel file to device
int vx_upload_kernel_file(vx_device_h device, const char* filename);
#ifdef __cplusplus
}
#endif
#endif // __VX_DRIVER_H__

66
driver/sw/opae/Makefile Normal file
View File

@@ -0,0 +1,66 @@
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I/tools/opae/1.4.0/include
LDFLAGS += -L/tools/opae/1.4.0/lib
# stack execution protection
LDFLAGS +=-z noexecstack
# data relocation and projection
LDFLAGS +=-z relro -z now
# stack buffer overrun detection
CXXFLAGS +=-fstack-protector
# Position independent code
CXXFLAGS += -fPIC
LDFLAGS += -luuid
LDFLAGS += -shared
FPGA_LIBS += -lopae-c
ASE_LIBS += -lopae-c-ase
LIB_DIR=../lib
ASE_DIR = ase
PROJECT = libvortex.so
PROJECT_ASE = $(ASE_DIR)/libvortex.so
AFU_JSON_INFO = vortex_afu.h
SRCS = vortex.cpp ../vx_utils.cpp
all: $(PROJECT) $(PROJECT_ASE)
# AFU info from JSON file, including AFU UUID
$(AFU_JSON_INFO): ../../hw/vortex_afu.json
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $@
$(PROJECT_ASE): $(SRCS) $(ASE_DIR)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@
vortex.o: vortex.cpp $(AFU_JSON_INFO)
$(CC) $(CXXFLAGS) -c vortex.cpp -o $@
$(ASE_DIR):
mkdir -p ase
.depend: $(SRCS) $(AFU_JSON_INFO)
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
clean:
rm -rf $(PROJECT) $(PROJECT_ASE) $(AFU_JSON_INFO) *.o .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

349
driver/sw/opae/vortex.cpp Executable file
View File

@@ -0,0 +1,349 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <uuid/uuid.h>
#include <opae/fpga.h>
#include <vortex.h>
#include "vortex_afu.h"
// MMIO Address Mappings
#define MMIO_COPY_IO_ADDRESS 0X120
#define MMIO_COPY_AVM_ADDRESS 0x100
#define MMIO_COPY_DATA_SIZE 0X118
#define MMIO_CMD_TYPE 0X110
#define MMIO_READY_FOR_CMD 0X198
#define MMIO_CMD_TYPE_READ 0
#define MMIO_CMD_TYPE_WRITE 1
#define MMIO_CMD_TYPE_START 2
#define MMIO_CMD_TYPE_SNOOP 3
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
typedef struct vx_device_ {
fpga_handle fpga;
size_t mem_allocation;
} vx_device_t;
typedef struct vx_buffer_ {
uint64_t wsid;
volatile void* host_ptr;
uint64_t io_addr;
fpga_handle fpga;
size_t size;
} vx_buffer_t;
static size_t align_size(size_t size) {
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
}
///////////////////////////////////////////////////////////////////////////////
// Search for an accelerator matching the requested UUID and connect to it
// Convert this to void if required as storing the fpga_handle to params variable
extern int vx_dev_open(vx_device_h* hdevice) {
fpga_properties filter = NULL;
fpga_result res;
fpga_guid guid;
fpga_token accel_token;
uint32_t num_matches;
fpga_handle accel_handle;
vx_device_t* device;
if (NULL == hdevice)
return -1;
// Set up a filter that will search for an accelerator
fpgaGetProperties(NULL, &filter);
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
// Add the desired UUID to the filter
uuid_parse(AFU_ACCEL_UUID, guid);
fpgaPropertiesSetGUID(filter, guid);
// Do the search across the available FPGA contexts
num_matches = 1;
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
// Not needed anymore
fpgaDestroyProperties(&filter);
if (num_matches < 1) {
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
return NULL;
}
// Open accelerator
res = fpgaOpen(accel_token, &accel_handle, 0);
if (FPGA_OK != res) {
return NULL;
}
// Done with token
fpgaDestroyToken(&accel_token);
// allocate device object
device = (vx_device_t*)malloc(sizeof(vx_device_t));
if (NULL == device) {
fpgaClose(accel_handle);
return NULL;
}
device->fpga = accel_handle;
device->mem_allocation = VX_ALLOC_BASE_ADDR;
*hdevice = device;
return 0;
}
// Close the fpga when all the operations are done
extern int vx_dev_close(vx_device_h hdevice) {
if (NULL == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
fpgaClose(device->fpga);
free(device);
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (NULL == hdevice
|| NULL == dev_maddr
|| 0 >= size)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
size_t asize = align_size(size);
if (device->mem_allocation + asize > VX_ALLOC_BASE_ADDR)
return -1;
*dev_maddr = device->mem_allocation;
device->mem_allocation += asize;
return 0;
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
fpga_result res;
void* host_ptr;
uint64_t wsid;
uint64_t io_addr;
vx_buffer_t* buffer;
if (NULL == hdevice
|| 0 >= size
|| NULL == hbuffer)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
size_t asize = align_size(size);
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
if (FPGA_OK != res) {
return -1;
}
// Get the physical address of the buffer in the accelerator
res = fpgaGetIOAddress(device->fpga, wsid, &io_addr);
if (FPGA_OK != res) {
fpgaReleaseBuffer(device->fpga, wsid);
return -1;
}
// allocate buffer object
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
if (NULL == buffer) {
fpgaReleaseBuffer(device->fpga, wsid);
return -1;
}
buffer->wsid = wsid;
buffer->host_ptr = host_ptr;
buffer->io_addr = io_addr;
buffer->fpga = device->fpga;
buffer->size = size;
*hbuffer = buffer;
return 0;
}
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
if (NULL == buffer)
return NULL;
return buffer->host_ptr;
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
if (NULL == buffer)
return -1;
fpgaReleaseBuffer(buffer->fpga, buffer->wsid);
free(buffer);
return 0;
}
// Check if HW is ready for SW
static int ready_for_sw(fpga_handle hdevice) {
uint64_t data = 0;
struct timespec sleep_time;
#ifdef USE_ASE
sleep_time.tv_sec = 1;
sleep_time.tv_nsec = 0;
#else
sleep_time.tv_sec = 0;
sleep_time.tv_nsec = 1000000;
#endif
do {
CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data));
nanosleep(&sleep_time, NULL);
} while (data != 0x1);
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (NULL == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
// bound checking
if (size + src_offset > buffer->size)
return -1;
// Ensure ready for new command
if (ready_for_sw(buffer->fpga) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_WRITE));
// Wait for the write operation to finish
return ready_for_sw(buffer->fpga);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (NULL == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
// bound checking
if (size + dest_offset > buffer->size)
return -1;
// Ensure ready for new command
if (ready_for_sw(buffer->fpga) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + dest_offset)/VX_CACHE_LINESIZE));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_READ));
// Wait for the write operation to finish
return ready_for_sw(buffer->fpga);
}
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
if (NULL == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
// bound checking
if (size + src_offset > buffer->size)
return -1;
// Ensure ready for new command
if (ready_for_sw(buffer->fpga) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_SNOOP));
// Wait for the write operation to finish
return ready_for_sw(buffer->fpga);
return 0;
}
extern int vx_start(vx_device_h hdevice) {
if (NULL == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (ready_for_sw(device->fpga) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_START));
return 0;
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (NULL == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
uint64_t data = 0;
struct timespec sleep_time;
#ifdef USE_ASE
sleep_time.tv_sec = 1;
sleep_time.tv_nsec = 0;
#else
sleep_time.tv_sec = 0;
sleep_time.tv_nsec = 1000000;
#endif
// to milliseconds
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
do {
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_READY_FOR_CMD, &data));
nanosleep(&sleep_time, NULL);
sleep_time_ms -= sleep_time_ms;
if (timeout <= sleep_time_ms)
break;
} while (data != 0x1);
return 0;
}

49
driver/sw/rtlsim/Makefile Normal file
View File

@@ -0,0 +1,49 @@
#CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
#USE_MULTICORE=1
CFLAGS += -I../../include -I../../../../rtl/simulate
CFLAGS += -fPIC
CFLAGS += -DUSE_RTLSIM
LDFLAGS += -shared -pthread
ifdef USE_MULTICORE
CFLAGS += -DUSE_MULTICORE
RTL_TOP = Vortex_SOC
else
RTL_TOP = Vortex
endif
SRCS = vortex.cpp ../vx_utils.cpp ../../../rtl/simulate/$(RTL_TOP).cpp
RTL_INCLUDE = -I../../../rtl -I../../../rtl/interfaces -I../../../rtl/cache -I../../../rtl/VX_cache -I../../../rtl/shared_memory -I../../../rtl/pipe_regs -I../../../rtl/compat
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
VL_FLAGS += --threads $(THREADS)
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
VL_FLAGS += -Wno-UNDRIVEN --Wno-PINMISSING -Wno-STMTDLY -Wno-WIDTH -Wno-UNSIGNED -Wno-UNOPTFLAT -Wno-LITENDIAN
# Debugigng
VL_FLAGS += --trace -DVL_DEBUG=1
CFLAGS += -DVCD_OUTPUT
PROJECT = libvortex.so
all: $(PROJECT)
.PHONY: build_config
build_config:
(cd ../../../rtl && ./gen_config.py --rtl_locations)
$(PROJECT): $(SRCS) build_config
verilator --exe --cc $(RTL_TOP).v $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f V$(RTL_TOP).mk
clean:
rm -rf $(PROJECT) obj_dir

327
driver/sw/rtlsim/vortex.cpp Normal file
View File

@@ -0,0 +1,327 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <thread>
#include <mutex>
#include <chrono>
#include <vortex.h>
#include <ram.h>
#ifdef USE_MULTICORE
#include <Vortex_SOC.h>
#else
#include <Vortex.h>
#endif
#define PAGE_SIZE 4096
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
}
///////////////////////////////////////////////////////////////////////////////
class vx_device;
class vx_buffer {
public:
vx_buffer(size_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = align_size(size);
data_ = malloc(aligned_asize);
}
~vx_buffer() {
if (data_) {
free(data_);
}
}
void* data() const {
return data_;
}
size_t size() const {
return size_;
}
vx_device* device() const {
return device_;
}
private:
size_t size_;
vx_device* device_;
void* data_;
};
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
: is_done_(false)
, mem_allocation_(VX_ALLOC_BASE_ADDR)
, vortex_(&ram_) {
thread_ = new std::thread(__thread_proc__, this);
}
~vx_device() {
if (thread_) {
mutex_.lock();
is_done_ = true;
mutex_.unlock();
thread_->join();
delete thread_;
}
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
size_t asize = align_size(size);
if (mem_allocation_ + asize > VX_LOCAL_MEM_SIZE)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;
return 0;
}
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
size_t asize = align_size(size);
if (dest_addr + asize > ram_.size())
return -1;
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
}*/
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
return 0;
}
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = align_size(size);
if (src_addr + asize > ram_.size())
return -1;
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
}*/
return 0;
}
int flush_caches(size_t dev_maddr, size_t size) {
mutex_.lock();
vortex_.flush_caches(dev_maddr, size);
mutex_.unlock();
return 0;
}
int start() {
mutex_.lock();
vortex_.reset();
mutex_.unlock();
return 0;
}
int wait(long long timeout) {
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
for (;;) {
mutex_.lock();
bool is_busy = vortex_.is_busy();
mutex_.unlock();
if (!is_busy || 0 == timeout_sec--)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
return 0;
}
private:
void thread_proc() {
std::cout << "Device ready..." << std::endl;
for (;;) {
mutex_.lock();
bool is_done = is_done_;
mutex_.unlock();
if (is_done)
break;
mutex_.lock();
vortex_.step();
mutex_.unlock();
}
std::cout << "Device shutdown..." << std::endl;
}
static void __thread_proc__(vx_device* device) {
device->thread_proc();
}
bool is_done_;
size_t mem_allocation_;
RAM ram_;
#ifdef USE_MULTICORE
Vortex_SOC vortex_;
#else
Vortex vortex_;
#endif
std::thread* thread_;
std::mutex mutex_;
};
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
if (NULL == hdevice)
return -1;
*hdevice = new vx_device();
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
delete device;
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (NULL == hdevice
|| NULL == dev_maddr
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
if (NULL == hdevice
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->flush_caches(dev_maddr, size);
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| NULL == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);
auto buffer = new vx_buffer(size, device);
if (nullptr == buffer->data()) {
delete buffer;
return -1;
}
*hbuffer = buffer;
return 0;
}
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
return buffer->data();
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
delete buffer;
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + src_offset > buffer->size())
return -1;
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + dest_offset > buffer->size())
return -1;
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->wait(timeout);
}

37
driver/sw/simx/Makefile Normal file
View File

@@ -0,0 +1,37 @@
CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
MAX_WARPS ?= 8
MAX_THREADS ?= 4
CFLAGS += -I../../include -I../../../../simX/include
CFLAGS += -fPIC
CFLAGS += -DUSE_SIMX
CFLAGS += -DMAX_WARPS=$(MAX_WARPS) -DMAX_THREADS=$(MAX_THREADS)
LDFLAGS += -shared -pthread
SRCS = vortex.cpp ../vx_utils.cpp ../../../simX/args.cpp ../../../simX/mem.cpp ../../../simX/core.cpp ../../../simX/instruction.cpp ../../../simX/enc.cpp ../../../simX/util.cpp
RTL_TOP = ../../../simX/cache_simX.v
RTL_INCLUDE = -I../../../old_rtl -I../../../old_rtl/interfaces -I../../../old_rtl/cache -I../../../old_rtl/shared_memory
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
VL_FLAGS += --threads $(THREADS)
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
PROJECT = libvortex.so
all: $(PROJECT)
$(PROJECT): $(SRCS)
verilator --exe --cc $(RTL_TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f Vcache_simX.mk
clean:
rm -rf $(PROJECT) obj_dir

BIN
driver/sw/simx/libvortex.so Executable file

Binary file not shown.

324
driver/sw/simx/vortex.cpp Normal file
View File

@@ -0,0 +1,324 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <thread>
#include <mutex>
#include <chrono>
#include <vortex.h>
#include "core.h"
#define PAGE_SIZE 4096
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE);
}
///////////////////////////////////////////////////////////////////////////////
class vx_device;
class vx_buffer {
public:
vx_buffer(size_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = align_size(size);
data_ = malloc(aligned_asize);
}
~vx_buffer() {
if (data_) {
free(data_);
}
}
void* data() const {
return data_;
}
size_t size() const {
return size_;
}
vx_device* device() const {
return device_;
}
private:
size_t size_;
vx_device* device_;
void* data_;
};
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
: is_done_(false)
, is_running_(false)
, mem_allocation_(VX_ALLOC_BASE_ADDR)
, thread_(__thread_proc__, this)
{}
~vx_device() {
mutex_.lock();
is_done_ = true;
mutex_.unlock();
thread_.join();
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
size_t asize = align_size(size);
if (mem_allocation_ + asize > VX_LOCAL_MEM_SIZE)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;
return 0;
}
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
size_t asize = align_size(size);
if (dest_addr + asize > ram_.size())
return -1;
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
}*/
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
return 0;
}
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = align_size(size);
if (src_addr + asize > ram_.size())
return -1;
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
}*/
return 0;
}
int start() {
mutex_.lock();
is_running_ = true;
mutex_.unlock();
return 0;
}
int wait(long long timeout) {
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
for (;;) {
mutex_.lock();
bool is_running = is_running_;
mutex_.unlock();
if (!is_running || 0 == timeout_sec--)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
return 0;
}
private:
void run() {
Harp::ArchDef arch("rv32i", false, MAX_WARPS, MAX_THREADS);
Harp::WordDecoder dec(arch);
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
Harp::Core core(arch, dec, mu);
mu.attach(ram_, 0);
while (core.running()) {
core.step();
}
core.printStats();
}
void thread_proc() {
std::cout << "Device ready..." << std::endl;
for (;;) {
mutex_.lock();
bool is_done = is_done_;
bool is_running = is_running_;
mutex_.unlock();
if (is_done)
break;
if (is_running) {
std::cout << "Device running..." << std::endl;
this->run();
mutex_.lock();
is_running_ = false;
mutex_.unlock();
std::cout << "Device ready..." << std::endl;
}
}
std::cout << "Device shutdown..." << std::endl;
}
static void __thread_proc__(vx_device* device) {
device->thread_proc();
}
bool is_done_;
bool is_running_;
size_t mem_allocation_;
std::thread thread_;
Harp::RAM ram_;
std::mutex mutex_;
};
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
if (NULL == hdevice)
return -1;
*hdevice = new vx_device();
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
delete device;
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (NULL == hdevice
|| NULL == dev_maddr
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) {
if (NULL == hdevice
|| 0 >= size)
return -1;
// this functionality is not need by simX
return 0;
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| NULL == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);
auto buffer = new vx_buffer(size, device);
if (nullptr == buffer->data()) {
delete buffer;
return -1;
}
*hbuffer = buffer;
return 0;
}
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
return buffer->data();
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
delete buffer;
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + src_offset > buffer->size())
return -1;
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + dest_offset > buffer->size())
return -1;
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->wait(timeout);
}

91
driver/sw/vx_utils.cpp Normal file
View File

@@ -0,0 +1,91 @@
#include <iostream>
#include <fstream>
#include <cstring>
#include <vortex.h>
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
int err = 0;
if (NULL == content || 0 == size)
return -1;
static constexpr uint32_t TRANSFER_SIZE = 4096;
// allocate device buffer
vx_buffer_h buffer;
err = vx_alloc_shared_mem(device, TRANSFER_SIZE, &buffer);
if (err != 0)
return -1;
// get buffer address
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
#if defined(USE_SIMX)
// default startup routine
((uint32_t*)buf_ptr)[0] = 0xf1401073;
((uint32_t*)buf_ptr)[1] = 0xf1401073;
((uint32_t*)buf_ptr)[2] = 0x30101073;
((uint32_t*)buf_ptr)[3] = 0x800000b7;
((uint32_t*)buf_ptr)[4] = 0x000080e7;
err = vx_copy_to_dev(buffer, 0, 5 * 4, 0);
if (err != 0) {
vx_buf_release(buffer);
return err;
}
// newlib io simulator trap
((uint32_t*)buf_ptr)[0] = 0x00008067;
err = vx_copy_to_dev(buffer, 0x70000000, 4, 0);
if (err != 0) {
vx_buf_release(buffer);
return err;
}
#endif
//
// upload content
//
size_t offset = 0;
while (offset < size) {
auto chunk_size = std::min<size_t>(TRANSFER_SIZE, size - offset);
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
err = vx_copy_to_dev(buffer, VX_KERNEL_BASE_ADDR + offset, chunk_size, 0);
if (err != 0) {
vx_buf_release(buffer);
return err;
}
offset += chunk_size;
}
vx_buf_release(buffer);
return 0;
}
int vx_upload_kernel_file(vx_device_h device, const char* filename) {
std::ifstream ifs(filename);
if (!ifs) {
std::cout << "error: " << filename << " not found" << std::endl;
return -1;
}
// get length of file:
ifs.seekg(0, ifs.end);
auto size = ifs.tellg();
ifs.seekg(0, ifs.beg);
// allocate buffer
auto content = new char [size];
// read file content
ifs.read(content, size);
// upload
int err = vx_upload_kernel_bytes(device, content, size);
// release buffer
delete[] content;
return err;
}

View File

@@ -0,0 +1,37 @@
CXXFLAGS += -std=c++17 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../sw/include
LDFLAGS +=
PROJECT = basic
SRCS = basic.cpp
all: $(PROJECT)
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/simx -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-ase: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../sw/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

BIN
driver/tests/basic/basic Executable file

Binary file not shown.

107
driver/tests/basic/basic.cpp Executable file
View File

@@ -0,0 +1,107 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <vortex.h>
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "?")) != -1) {
switch (c) {
case '?': {
printf("Test.\n");
printf("Usage: [-h: help]\n");
exit(0);
} break;
default:
exit(-1);
}
}
}
uint64_t shuffle(int i, uint64_t value) {
return (value << i) | (value & ((1 << i)-1));;
}
int run_test(vx_buffer_h sbuf, vx_buffer_h dbuf, uint32_t address, uint64_t value, int num_blocks) {
int err;
int num_failures = 0;
// write sbuf data
for (int i = 0; i < 8 * num_blocks; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value);
}
// write buffer to local memory
err = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
if (err != 0)
return -1;
// read buffer from local memory
err = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
if (err != 0)
return -1;
// verify result
for (int i = 0; i < 8 * num_blocks; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
auto ref = shuffle(i, value);
if (curr != ref) {
printf("error @ %x: actual %ld, expected %ld\n", address + 64 * i, curr, ref);
++num_failures;
}
}
return num_failures;
}
int main(int argc, char *argv[]) {
int err;
int num_failures = 0;
// parse command arguments
parse_args(argc, argv);
// open device connection
vx_device_h device;
err = vx_dev_open(&device);
if (err != 0)
return -1;
// create source buffer
vx_buffer_h sbuf;
err = vx_alloc_shared_mem(device, 4096, &sbuf);
if (err != 0) {
vx_dev_close(device);
return -1;
}
// create destination buffer
vx_buffer_h dbuf;
err = vx_alloc_shared_mem(device, 4096, &dbuf);
if (err != 0) {
vx_buf_release(sbuf);
vx_dev_close(device);
return -1;
}
// run tests
num_failures += run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
num_failures += run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 2);
num_failures += run_test(sbuf, dbuf, 0x20000000, 0xff00ff00ff00ff00, 4);
num_failures += run_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
// releae buffers
vx_buf_release(sbuf);
vx_buf_release(dbuf);
// close device
vx_dev_close(device);
if (0 == num_failures) {
printf("Test PASSED\n");
} else {
printf("Test FAILED\n");
}
return num_failures;
}

View File

@@ -0,0 +1,70 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
VX_RT_PATH ?= $(wildcard ../../../runtime)
MAX_WARPS ?= 8
MAX_THREADS ?= 4
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_CFLAGS = -v -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
VX_CFLAGS += -DMAX_WARPS=$(MAX_WARPS) -DMAX_THREADS=$(MAX_THREADS)
VX_SRCS = kernel.c
CXXFLAGS += -std=c++17 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../sw/include
PROJECT = demo
SRCS = demo.cpp
all: $(PROJECT)
$(PROJECT).dump: $(PROJECT).elf
$(VX_DMP) -D $(PROJECT).elf > $(PROJECT).dump
$(PROJECT).hex: $(PROJECT).elf
$(VX_CPY) -O ihex $(PROJECT).elf $(PROJECT).hex
$(PROJECT).bin: $(PROJECT).elf
$(VX_CPY) -O binary $(PROJECT).elf $(PROJECT).bin
$(PROJECT).elf: $(SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o $(PROJECT).elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/simx -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f $(PROJECT).bin
run-ase: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f $(PROJECT).bin
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f $(PROJECT).bin
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../sw/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f $(PROJECT).bin
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o *.dump .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

BIN
driver/tests/demo/demo Executable file

Binary file not shown.

BIN
driver/tests/demo/demo.bin Executable file

Binary file not shown.

View File

@@ -0,0 +1,75 @@
#include <iostream>
#include <unistd.h>
#include <vortex.h>
const char* program_file = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: -f: program [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "f:h?")) != -1) {
switch (c) {
case 'f': {
program_file = optarg;
} break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
if (nullptr == program_file) {
show_usage();
exit(-1);
}
}
int main(int argc, char *argv[]) {
int err;
// parse command arguments
parse_args(argc, argv);
// open device connection
vx_device_h device;
err = vx_dev_open(&device);
if (err != 0)
return -1;
// upload program
err = vx_upload_kernel_file(device, program_file);
if (err != 0) {
vx_dev_close(device);
return -1;
}
// start device
err = vx_start(device);
if (err != 0) {
vx_dev_close(device);
return -1;
}
// wait for completion
err = vx_ready_wait(device, -1);
if (err != 0) {
vx_dev_close(device);
return -1;
}
// close device
vx_dev_close(device);
printf("done!\n");
return 0;
}

BIN
driver/tests/demo/demo.elf Executable file

Binary file not shown.

269
driver/tests/demo/demo.hex Normal file
View File

@@ -0,0 +1,269 @@
:0200000480007A
:10000000970500009385C50B130540006B10B500E4
:10001000EF00C00A130510006B0005001385C1C373
:10002000138601C43306A64093050000EF00901329
:10003000171500001305C583EF00007FEF00D008FF
:08004000EF0080006F00807FDB
:10004800130101FF2326110023248100EF004073D0
:100058001304050013142400EF0000733304A400F4
:100068001315240037070010B7070020B387A7002F
:100078003307A70083A60700032707008320C100D2
:1000880003248100B70700303385A700B307D700E2
:100098002320F50013010101678000009307000089
:1000A80063880700371500801305C5866F00407701
:1000B80067800000130500026B000500971100001F
:1000C80093814174F32610029396A60173260002C9
:1000D8009315A6001316260037F1FF6F3301B140C0
:1000E8003301D1403301C100F326100263860600B4
:1000F800130500006B0005006780000083270500DA
:100108001358860093560601135786012380C700AB
:10011800A38007012381D700A381E70093884700C4
:100128006352C00A1388870033B80501B3B6B70015
:100138001307F6FF1348180093C6160013379700E5
:10014800B366D800134717003377D700630407084E
:1001580033E7B80013773700631E07061378C6FF26
:1001680093870500138708003308B80083A60700A3
:100178009387470013074700232ED7FEE39807FF0E
:100188009377C6FFB386F800630CF6023387F50051
:100198000348070013871700238006016352C7022C
:1001A8003387E5000347070093872700A380E6000D
:1001B80063D8C700B387F50083C707002381F6001B
:1001C800B388C80093F73800B388170123201501B6
:1001D8006780000013074600B387E700138708000D
:1001E80083C605001307170093851500A30FD7FED4
:1001F800E318F7FE6FF0DFFC83270500130101FF0A
:1002080003C8070003C6170083C6270003C73700C3
:1002180023060101A306C1002307D100A307E100BB
:100228008328C100138847006352100B1386870088
:1002380033B6C500B3B6B7001387F8FF13461600E8
:1002480093C6160013379700B366D60013471700F6
:100258003377D7006306070833E7050113773700BC
:100268006310070813F6C8FF1307080093870500F3
:100278003306B60083260700938747001307470015
:1002880023AED7FEE398C7FE93F7C8FFB306F8007E
:10029800638CF80203C306003386F5001387170042
:1002A800230066006352170303C616003387E50070
:1002B800938727002300C70063D8170103C72600C8
:1002C800B387F5002380E7003308180193773800D7
:1002D800338807012320050113010101678000000D
:1002E80013874800B387E700130708008346070011
:1002F8001307170093851500A38FD5FEE318F7FEA3
:100308006FF09FFC67800000B727000023A2F5006C
:1003180013050000678000001305100067800000C7
:10032800130101FE232E1100B707007113074000C7
:1003380093588600135806019356860193DE85006C
:1003480013DE050113D38501130F300023A0E70046
:100358002388E70023A4E700238CE7002382E70132
:10036800A38207002393070023A6A700A3880700FA
:1003780023890700A3890700A38C0700238D0700A2
:10038800A38D0700238EC700238AB700A38AD7014D
:10039800238BC701A38B6700A38E1701238F070147
:1003A800A38FD70083A781C3E7800700B707007230
:1003B80003C707002306E10003C71700A306E100EF
:1003C80003C727002307E10003C73700A307E1009D
:1003D8000326C1006358C00293874700B705008E03
:1003E8003306F6009385C5FF83C607003387B70039
:1003F800130581003307E5002300D7009387170012
:10040800E394C7FE8320C101032581001301010283
:1004180067800000130101FF2324810013040600F4
:1004280023261100B70700711307400013588600F0
:100438009356840113DE850013D3050193D88501F3
:100448001356060123A6A70023A0E7002382E7008E
:10045800A38207002388E7002393070023A4E7006B
:10046800A388070023890700A3890700238CE700D6
:10047800A38C0700238D0700A38D0700238E870018
:10048800238AB700A38AC701238B6700A38B1701B0
:10049800A38E0701238FC700A38FD70083A781C32B
:1004A800E78007008320C1001305040003248100AE
:1004B8001301010167800000130101FF232481005B
:1004C800B707007123261100130406001307400024
:1004D800130350001388070123A0E7002382670055
:1004E800A3820700238887009358860093568401C7
:1004F80013560601A30018012301C800A301D80060
:100508002393070023A4E70023A6A7006352800AC9
:100518009386470193878701B3B7F5003335B80051
:100528001306F4FF13C717001336960093471500F8
:10053800B367F70013471600B377F7006388070817
:10054800B3E7B60093F73700639207081375C4FF43
:100558003305B500938705003388B64003A6070026
:100568003307F800938747002320C700E318F5FEF8
:100578009377C4FFB386F600630CF4023387F50063
:1005880003460700138717002380C60063528702BB
:100598003387E5000347070093872700A380E60019
:1005A80063D88700B387F50083C707002381F60067
:1005B80083A781C3E78007008320C10013050400D7
:1005C800032481001301010167800000130748001C
:1005D80033078700B3870641B387F50083C7C7FF92
:1005E80093861600A38FF6FEE316D7FE6FF05FFC26
:1005F8009307050003A541C393D6F741B3C7F60097
:10060800B387D740B387A70023AAF1C26780000049
:10061800130500006F00C014130101FE232E110002
:10062800B707007113074000935886001358060156
:100638009356860193528500935F0501135F8501E8
:1006480093DE850013DE050113D385019303700043
:1006580023A0E7002388E70023A4E700238CE70012
:1006680023827700A3820700239307002386A7002D
:10067800A38657002387F701A387E701A38807000C
:1006880023890700A3890700A38C0700238D07008F
:10069800A38D0700238EC700238AB700A38AD7013A
:1006A800238BC701A38B6700A38E1701238F070134
:1006B800A38FD70083A781C3E7800700B70700721D
:1006C80003C707002306E10003C71700A306E100DC
:1006D80003C727002307E10003C73700A307E1008A
:1006E8000326C1006358C00293874700B705008EF0
:1006F8003306F6009385C5FF83C607003387B70026
:10070800130581003307E5002300D70093871700FE
:10071800E394C7FE8320C101032581001301010270
:1007280067800000130500006F0080036F00C0059C
:1007380037150080130585BF6F00800A03A5C1C364
:100748001307150023AEE1C267800000371500804B
:100758001305C5C16F00C0086B10B50067800000A5
:100768006B000500678000006B40B50067800000E3
:100778006B200500678000006B3000006780000078
:10078800732510026780000073250002678000004F
:10079800732560026780000073255002678000009F
:1007A800130540006B000500F32610029396F6002F
:1007B800732600029315A6001316260037F1FF6F63
:1007C8003301B1403301D1403301C100F326100297
:1007D80063860600130500006B00050067800000B3
:1007E800130141FF232011002322B1008345050096
:1007F80063880500EF00C001130515006FF01FFFA7
:1008080083200100832541001301C1006780000097
:10081800970200009382027F23A0B2006780000045
:100828009305050093060000130600001305000059
:100838006F008020130101FF93050000232481002D
:100848002326110013040500EF00802803A501C327
:100858008327C50363840700E780070013050400A6
:10086800EFF01FDB130101FF23248100B71700807D
:10087800371400801304440093874700B3878740E8
:10088800232291002326110093D427406380040279
:100898009387C7FF33848700832704009384F4FF7A
:1008A8001304C4FFE7800700E39804FE8320C10017
:1008B80003248100832441001301010167800000A3
:1008C800130101FF23248100232021013714008014
:1008D8003719008093070400130909003309F94008
:1008E800232611002322910013592940630009028D
:1008F8001304040093040000832704009384140065
:1009080013044400E7800700E31899FE37140080B9
:100918003719008093070400130949003309F94087
:1009280013592940630009021304040093040000CA
:10093800832704009384140013044400E78007000D
:10094800E31899FE8320C100032481008324410019
:100958000329010013010101678000001303F0005F
:1009680013070500637EC3029377F7006390070AB5
:1009780063920508937606FF1376F600B386E600C1
:100988002320B7002322B7002324B7002326B7006B
:1009980013070701E366D7FE6314060067800000AB
:1009A800B306C3409396260097020000B38656000C
:1009B8006780C6002307B700A306B7002306B70061
:1009C800A305B7002305B700A304B7002304B700A5
:1009D800A303B7002303B700A302B7002302B7009D
:1009E800A301B7002301B700A300B7002300B70095
:1009F8006780000093F5F50F93968500B3E5D50061
:100A080093960501B3E5D5006FF0DFF693962700BE
:100A180097020000B386560093820000E78006FA2A
:100A280093800200938707FF3307F7403306F600E9
:100A3800E378C3F66FF0DFF303A701C383278714B6
:100A4800638C070403A747001308F001634EE80608
:100A58001318270063060502338307012324C308FC
:100A680083A88718130610003316E600B3E8C800F9
:100A780023A417192324D310930620006304D50256
:100A88001307170023A2E700B387070123A4B700C1
:100A980013050000678000009307C7142324F71488
:100AA8006FF05FFA83A6C7181307170023A2E700A1
:100AB80033E6C60023A6C718B387070123A4B700E7
:100AC80013050000678000001305F0FF6780000031
:100AD800130101FD232C410103AA01C32320210393
:100AE8002326110203298A14232481022322910236
:100AF800232E3101232A5101232861012326710164
:100B08002324810163000904130B0500938B05005E
:100B1800930A10009309F0FF832449001384F4FF1B
:100B28006342040293942400B304990063840B0481
:100B380083A74410638077051304F4FF9384C4FFEC
:100B4800E31634FF8320C102032481028324410277
:100B5800032901028329C101032A8101832A410152
:100B6800032B0101832BC100032C81001301010316
:100B7800678000008327490083A644009387F7FF16
:100B8800638E870423A20400E38806FA8327891862
:100B980033978A00032C4900B377F7006392070262
:100BA800E78006000327490083278A146314870116
:100BB800E304F9F8E38807F8138907006FF0DFF515
:100BC8008327C91883A544083377F700631C0700F7
:100BD80013050B00E78006006FF0DFFC2322890075
:100BE8006FF09FFA13850500E78006006FF09FFB02
:100BF8004552524F523A205F756E6C696E6B206E8B
:100C08006F742079657420696D706C656D656E749C
:100C180065640A004552524F523A205F6C696E6B08
:100C2800206E6F742079657420696D706C656D65D0
:060C38006E7465640A0001
:04100000A4000080C8
:10100800000001000000000000000000FC12008049
:1010180064130080CC130080000000000000000072
:1010280000000000000000000000000000000000B8
:1010380000000000000000000000000000000000A8
:101048000000000000000000000000000000000098
:101058000000000000000000000000000000000088
:101068000000000000000000000000000000000078
:101078000000000000000000000000000000000068
:101088000000000000000000000000000000000058
:101098000000000000000000000000000000000048
:1010A8000000000000000000000000000000000038
:1010B80001000000000000000E33CDAB34126DE6D5
:1010C800ECDE05000B00000000000000000000003E
:1010D8000000000000000000000000000000000008
:1010E80000000000000000000000000000000000F8
:1010F80000000000000000000000000000000000E8
:1011080000000000000000000000000000000000D7
:1011180000000000000000000000000000000000C7
:1011280000000000000000000000000000000000B7
:1011380000000000000000000000000000000000A7
:101148000000000000000000000000000000000097
:101158000000000000000000000000000000000087
:101168000000000000000000000000000000000077
:101178000000000000000000000000000000000067
:101188000000000000000000000000000000000057
:101198000000000000000000000000000000000047
:1011A8000000000000000000000000000000000037
:1011B8000000000000000000000000000000000027
:1011C8000000000000000000000000000000000017
:1011D8000000000000000000000000000000000007
:1011E80000000000000000000000000000000000F7
:1011F80000000000000000000000000000000000E7
:1012080000000000000000000000000000000000D6
:1012180000000000000000000000000000000000C6
:1012280000000000000000000000000000000000B6
:1012380000000000000000000000000000000000A6
:101248000000000000000000000000000000000096
:101258000000000000000000000000000000000086
:101268000000000000000000000000000000000076
:101278000000000000000000000000000000000066
:101288000000000000000000000000000000000056
:101298000000000000000000000000000000000046
:1012A8000000000000000000000000000000000036
:1012B8000000000000000000000000000000000026
:1012C8000000000000000000000000000000000016
:1012D8000000000000000000000000000000000006
:1012E80000000000000000000000000000000000F6
:1012F80000000000000000000000000000000000E6
:1013080000000000000000000000000000000000D5
:1013180000000000000000000000000000000000C5
:1013280000000000000000000000000000000000B5
:1013380000000000000000000000000000000000A5
:101348000000000000000000000000000000000095
:101358000000000000000000000000000000000085
:101368000000000000000000000000000000000075
:101378000000000000000000000000000000000065
:101388000000000000000000000000000000000055
:101398000000000000000000000000000000000045
:1013A8000000000000000000000000000000000035
:1013B8000000000000000000000000000000000025
:1013C8000000000000000000000000000000000015
:1013D8000000000000000000000000000000000005
:1013E80000000000000000000000000000000000F5
:1013F80000000000000000000000000000000000E5
:1014080000000000000000000000000000000000D4
:1014180000000000000000000000000000000000C4
:1014280000000000000000000000000000000000B4
:0C14380010100080000000100000007088
:040000058000000077
:00000001FF

View File

@@ -0,0 +1,25 @@
#include <stdlib.h>
#include <stdio.h>
#include "intrinsics/vx_intrinsics.h"
void main() {
unsigned *x = (unsigned*)0x10000000;
unsigned *y = (unsigned*)0x20000000;
unsigned *z = (unsigned*)0x30000000;
unsigned wid = vx_warpID();
unsigned tid = vx_threadID();
unsigned i = (wid * MAX_THREADS) + tid;
//if (i == 0) {
// printf("begin\n");
//}
z[i] = x[i] + y[i];
//if (i == 0) {
// printf("end\n");
//}
}

372144
driver/tests/demo/run.log Normal file

File diff suppressed because it is too large Load Diff

0
driver/tests/results.txt Normal file
View File