mutiple fixes: parallel printf, fixed cycle in cache, opencl refactored vecadd and sgemm, regen opencl kernels with hard-float, fixed vortex io bus interface, fixed dpi floats APi to support multicore mode, make vlsim multicore default, make rtlsim multi-core default, removed POCL binaries from repository, updated Makefiles to use external POCL

This commit is contained in:
Blaise Tine
2020-09-19 14:45:42 -04:00
parent 80f929eb61
commit f6f95e0c46
146 changed files with 116779 additions and 194258 deletions

View File

@@ -57,7 +57,7 @@ module VX_bank #(
// Core Request
input wire [NUM_REQUESTS-1:0] core_req_valid,
input wire [NUM_REQUESTS-1:0] core_req_rw,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw,
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,

View File

@@ -18,7 +18,7 @@ module VX_bank_core_req_arb #(
// Enqueue Data
input wire reqq_push,
input wire [NUM_REQUESTS-1:0] bank_valids,
input wire [NUM_REQUESTS-1:0] bank_rw,
input wire [`CORE_REQ_TAG_COUNT-1:0] bank_rw,
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] bank_byteen,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] bank_writedata,
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] bank_addr,
@@ -40,21 +40,21 @@ module VX_bank_core_req_arb #(
);
wire [NUM_REQUESTS-1:0] out_per_valids;
wire [NUM_REQUESTS-1:0] out_per_rw;
wire [`CORE_REQ_TAG_COUNT-1:0] out_per_rw;
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] out_per_byteen;
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] out_per_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] out_per_writedata;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] out_per_tag;
reg [NUM_REQUESTS-1:0] use_per_valids;
reg [NUM_REQUESTS-1:0] use_per_rw;
reg [`CORE_REQ_TAG_COUNT-1:0] use_per_rw;
reg [NUM_REQUESTS-1:0][WORD_SIZE-1:0] use_per_byteen;
reg [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] use_per_addr;
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] use_per_writedata;
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] use_per_tag;
wire [NUM_REQUESTS-1:0] qual_valids;
wire [NUM_REQUESTS-1:0] qual_rw;
wire [`CORE_REQ_TAG_COUNT-1:0] qual_rw;
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] qual_byteen;
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] qual_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] qual_writedata;
@@ -108,16 +108,17 @@ module VX_bank_core_req_arb #(
assign reqq_empty = !qual_has_request;
assign reqq_req_st0 = qual_has_request;
assign reqq_req_tid_st0 = qual_request_index;
assign reqq_req_rw_st0 = qual_rw[qual_request_index];
assign reqq_req_tid_st0 = qual_request_index;
assign reqq_req_byteen_st0 = qual_byteen[qual_request_index];
assign reqq_req_addr_st0 = qual_addr[qual_request_index];
assign reqq_req_writedata_st0 = qual_writedata[qual_request_index];
if (CORE_TAG_ID_BITS != 0) begin
assign reqq_req_tag_st0 = qual_tag;
assign reqq_req_rw_st0 = qual_rw;
end else begin
assign reqq_req_tag_st0 = qual_tag[qual_request_index];
assign reqq_req_tag_st0 = qual_tag[qual_request_index];
assign reqq_req_rw_st0 = qual_rw[qual_request_index];
end
`DEBUG_BLOCK(

View File

@@ -41,10 +41,6 @@ module VX_cache #(
// Enable snoop forwarding
parameter SNOOP_FORWARDING = 0,
// Prefetcher
parameter PRFQ_SIZE = 1,
parameter PRFQ_STRIDE = 0,
// core request tag size
parameter CORE_TAG_WIDTH = 42,
@@ -70,7 +66,7 @@ module VX_cache #(
// Core request
input wire [NUM_REQUESTS-1:0] core_req_valid,
input wire [NUM_REQUESTS-1:0] core_req_rw,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw,
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,
@@ -246,7 +242,7 @@ module VX_cache #(
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
wire [`CORE_REQ_TAG_COUNT-1:0] curr_bank_core_req_rw;
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
@@ -427,9 +423,7 @@ module VX_cache #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE)
.DFQQ_SIZE (DFQQ_SIZE)
) cache_dram_req_arb (
.clk (clk),
.reset (reset),

View File

@@ -8,10 +8,7 @@ module VX_cache_dram_req_arb #(
// Size of a word in bytes
parameter WORD_SIZE = 0,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 0,
// Prefetcher
parameter PRFQ_SIZE = 1,
parameter PRFQ_STRIDE = 0
parameter DFQQ_SIZE = 0
) (
input wire clk,
input wire reset,
@@ -38,32 +35,9 @@ module VX_cache_dram_req_arb #(
input wire dram_req_ready
);
wire pref_pop;
wire pref_valid;
wire[`DRAM_ADDR_WIDTH-1:0] pref_addr;
wire dwb_valid;
wire dfqq_req;
wire dwb_valid;
wire dfqq_req;
assign pref_pop = !dwb_valid && !dfqq_req && dram_req_ready && pref_valid;
VX_prefetcher #(
.PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE),
.BANK_LINE_SIZE(BANK_LINE_SIZE),
.WORD_SIZE (WORD_SIZE)
) prfqq (
.clk (clk),
.reset (reset),
.dram_req (dram_req_valid && !dram_req_rw),
.dram_req_addr(dram_req_addr),
.pref_pop (pref_pop),
.pref_valid (pref_valid),
.pref_addr (pref_addr)
);
wire[`DRAM_ADDR_WIDTH-1:0] dfqq_req_addr;
`DEBUG_BEGIN
@@ -110,10 +84,10 @@ module VX_cache_dram_req_arb #(
assign per_bank_dram_wb_req_ready[i] = dram_req_ready && (dwb_bank == `BANK_BITS'(i));
end
assign dram_req_valid = dwb_valid || dfqq_req || pref_pop;
assign dram_req_valid = dwb_valid || dfqq_req;
assign dram_req_rw = dwb_valid;
assign dram_req_byteen = dwb_valid ? per_bank_dram_wb_req_byteen[dwb_bank] : {BANK_LINE_SIZE{1'b1}};
assign dram_req_addr = dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : (dfqq_req ? dfqq_req_addr : pref_addr);
assign dram_req_addr = dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : dfqq_req_addr;
assign {dram_req_data} = dwb_valid ? per_bank_dram_wb_req_data[dwb_bank] : 0;
endmodule

View File

@@ -1,71 +0,0 @@
`include "VX_cache_config.vh"
module VX_prefetcher #(
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 0,
// Size of a word in bytes
parameter WORD_SIZE = 0,
parameter PRFQ_SIZE = 1,
parameter PRFQ_STRIDE = 0
) (
input wire clk,
input wire reset,
input wire dram_req,
input wire[`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
input wire pref_pop,
output wire pref_valid,
output wire[`DRAM_ADDR_WIDTH-1:0] pref_addr
);
reg[`LOG2UP(PRFQ_STRIDE):0] use_valid;
reg[`DRAM_ADDR_WIDTH-1:0] use_addr;
wire current_valid;
wire[`DRAM_ADDR_WIDTH-1:0] current_addr;
wire current_full;
wire current_empty;
assign current_valid = !current_empty;
wire update_use = ((use_valid == 0) || ((use_valid-1) == 0)) && current_valid;
VX_generic_queue #(
.DATAW(`DRAM_ADDR_WIDTH),
.SIZE(PRFQ_SIZE)
) pfq_queue (
.clk (clk),
.reset (reset),
.push (dram_req && !current_full && !pref_pop),
.data_in (dram_req_addr),
.pop (update_use),
.data_out(current_addr),
.empty (current_empty),
.full (current_full),
`UNUSED_PIN (size)
);
assign pref_valid = 0; // TODO use_valid != 0;
assign pref_addr = use_addr;
always @(posedge clk) begin
if (reset) begin
use_valid <= 0;
use_addr <= 0;
end else begin
if (update_use) begin
use_valid <= PRFQ_STRIDE;
use_addr <= current_addr + BANK_LINE_SIZE;
end else if (pref_valid && pref_pop) begin
use_valid <= use_valid - 1;
use_addr <= use_addr + BANK_LINE_SIZE;
end
end
end
endmodule