mutiple fixes: parallel printf, fixed cycle in cache, opencl refactored vecadd and sgemm, regen opencl kernels with hard-float, fixed vortex io bus interface, fixed dpi floats APi to support multicore mode, make vlsim multicore default, make rtlsim multi-core default, removed POCL binaries from repository, updated Makefiles to use external POCL
This commit is contained in:
2
hw/rtl/cache/VX_bank.v
vendored
2
hw/rtl/cache/VX_bank.v
vendored
@@ -57,7 +57,7 @@ module VX_bank #(
|
||||
|
||||
// Core Request
|
||||
input wire [NUM_REQUESTS-1:0] core_req_valid,
|
||||
input wire [NUM_REQUESTS-1:0] core_req_rw,
|
||||
input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw,
|
||||
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
|
||||
15
hw/rtl/cache/VX_bank_core_req_arb.v
vendored
15
hw/rtl/cache/VX_bank_core_req_arb.v
vendored
@@ -18,7 +18,7 @@ module VX_bank_core_req_arb #(
|
||||
// Enqueue Data
|
||||
input wire reqq_push,
|
||||
input wire [NUM_REQUESTS-1:0] bank_valids,
|
||||
input wire [NUM_REQUESTS-1:0] bank_rw,
|
||||
input wire [`CORE_REQ_TAG_COUNT-1:0] bank_rw,
|
||||
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] bank_byteen,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] bank_writedata,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] bank_addr,
|
||||
@@ -40,21 +40,21 @@ module VX_bank_core_req_arb #(
|
||||
);
|
||||
|
||||
wire [NUM_REQUESTS-1:0] out_per_valids;
|
||||
wire [NUM_REQUESTS-1:0] out_per_rw;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0] out_per_rw;
|
||||
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] out_per_byteen;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] out_per_addr;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] out_per_writedata;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] out_per_tag;
|
||||
|
||||
reg [NUM_REQUESTS-1:0] use_per_valids;
|
||||
reg [NUM_REQUESTS-1:0] use_per_rw;
|
||||
reg [`CORE_REQ_TAG_COUNT-1:0] use_per_rw;
|
||||
reg [NUM_REQUESTS-1:0][WORD_SIZE-1:0] use_per_byteen;
|
||||
reg [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] use_per_addr;
|
||||
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] use_per_writedata;
|
||||
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] use_per_tag;
|
||||
|
||||
wire [NUM_REQUESTS-1:0] qual_valids;
|
||||
wire [NUM_REQUESTS-1:0] qual_rw;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0] qual_rw;
|
||||
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] qual_byteen;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] qual_addr;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] qual_writedata;
|
||||
@@ -108,16 +108,17 @@ module VX_bank_core_req_arb #(
|
||||
|
||||
assign reqq_empty = !qual_has_request;
|
||||
assign reqq_req_st0 = qual_has_request;
|
||||
assign reqq_req_tid_st0 = qual_request_index;
|
||||
assign reqq_req_rw_st0 = qual_rw[qual_request_index];
|
||||
assign reqq_req_tid_st0 = qual_request_index;
|
||||
assign reqq_req_byteen_st0 = qual_byteen[qual_request_index];
|
||||
assign reqq_req_addr_st0 = qual_addr[qual_request_index];
|
||||
assign reqq_req_writedata_st0 = qual_writedata[qual_request_index];
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
assign reqq_req_tag_st0 = qual_tag;
|
||||
assign reqq_req_rw_st0 = qual_rw;
|
||||
end else begin
|
||||
assign reqq_req_tag_st0 = qual_tag[qual_request_index];
|
||||
assign reqq_req_tag_st0 = qual_tag[qual_request_index];
|
||||
assign reqq_req_rw_st0 = qual_rw[qual_request_index];
|
||||
end
|
||||
|
||||
`DEBUG_BLOCK(
|
||||
|
||||
12
hw/rtl/cache/VX_cache.v
vendored
12
hw/rtl/cache/VX_cache.v
vendored
@@ -41,10 +41,6 @@ module VX_cache #(
|
||||
// Enable snoop forwarding
|
||||
parameter SNOOP_FORWARDING = 0,
|
||||
|
||||
// Prefetcher
|
||||
parameter PRFQ_SIZE = 1,
|
||||
parameter PRFQ_STRIDE = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 42,
|
||||
|
||||
@@ -70,7 +66,7 @@ module VX_cache #(
|
||||
|
||||
// Core request
|
||||
input wire [NUM_REQUESTS-1:0] core_req_valid,
|
||||
input wire [NUM_REQUESTS-1:0] core_req_rw,
|
||||
input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw,
|
||||
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
@@ -246,7 +242,7 @@ module VX_cache #(
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0] curr_bank_core_req_rw;
|
||||
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
|
||||
@@ -427,9 +423,7 @@ module VX_cache #(
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.PRFQ_SIZE (PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (PRFQ_STRIDE)
|
||||
.DFQQ_SIZE (DFQQ_SIZE)
|
||||
) cache_dram_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
36
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
36
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
@@ -8,10 +8,7 @@ module VX_cache_dram_req_arb #(
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 0,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 0,
|
||||
// Prefetcher
|
||||
parameter PRFQ_SIZE = 1,
|
||||
parameter PRFQ_STRIDE = 0
|
||||
parameter DFQQ_SIZE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
@@ -38,32 +35,9 @@ module VX_cache_dram_req_arb #(
|
||||
input wire dram_req_ready
|
||||
);
|
||||
|
||||
wire pref_pop;
|
||||
wire pref_valid;
|
||||
wire[`DRAM_ADDR_WIDTH-1:0] pref_addr;
|
||||
wire dwb_valid;
|
||||
wire dfqq_req;
|
||||
|
||||
wire dwb_valid;
|
||||
wire dfqq_req;
|
||||
|
||||
assign pref_pop = !dwb_valid && !dfqq_req && dram_req_ready && pref_valid;
|
||||
|
||||
VX_prefetcher #(
|
||||
.PRFQ_SIZE (PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (PRFQ_STRIDE),
|
||||
.BANK_LINE_SIZE(BANK_LINE_SIZE),
|
||||
.WORD_SIZE (WORD_SIZE)
|
||||
) prfqq (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.dram_req (dram_req_valid && !dram_req_rw),
|
||||
.dram_req_addr(dram_req_addr),
|
||||
|
||||
.pref_pop (pref_pop),
|
||||
.pref_valid (pref_valid),
|
||||
.pref_addr (pref_addr)
|
||||
);
|
||||
|
||||
wire[`DRAM_ADDR_WIDTH-1:0] dfqq_req_addr;
|
||||
|
||||
`DEBUG_BEGIN
|
||||
@@ -110,10 +84,10 @@ module VX_cache_dram_req_arb #(
|
||||
assign per_bank_dram_wb_req_ready[i] = dram_req_ready && (dwb_bank == `BANK_BITS'(i));
|
||||
end
|
||||
|
||||
assign dram_req_valid = dwb_valid || dfqq_req || pref_pop;
|
||||
assign dram_req_valid = dwb_valid || dfqq_req;
|
||||
assign dram_req_rw = dwb_valid;
|
||||
assign dram_req_byteen = dwb_valid ? per_bank_dram_wb_req_byteen[dwb_bank] : {BANK_LINE_SIZE{1'b1}};
|
||||
assign dram_req_addr = dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : (dfqq_req ? dfqq_req_addr : pref_addr);
|
||||
assign dram_req_addr = dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : dfqq_req_addr;
|
||||
assign {dram_req_data} = dwb_valid ? per_bank_dram_wb_req_data[dwb_bank] : 0;
|
||||
|
||||
endmodule
|
||||
|
||||
71
hw/rtl/cache/VX_prefetcher.v
vendored
71
hw/rtl/cache/VX_prefetcher.v
vendored
@@ -1,71 +0,0 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_prefetcher #(
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 0,
|
||||
parameter PRFQ_SIZE = 1,
|
||||
parameter PRFQ_STRIDE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire dram_req,
|
||||
input wire[`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
|
||||
|
||||
input wire pref_pop,
|
||||
output wire pref_valid,
|
||||
output wire[`DRAM_ADDR_WIDTH-1:0] pref_addr
|
||||
|
||||
);
|
||||
reg[`LOG2UP(PRFQ_STRIDE):0] use_valid;
|
||||
reg[`DRAM_ADDR_WIDTH-1:0] use_addr;
|
||||
|
||||
wire current_valid;
|
||||
wire[`DRAM_ADDR_WIDTH-1:0] current_addr;
|
||||
|
||||
wire current_full;
|
||||
wire current_empty;
|
||||
|
||||
assign current_valid = !current_empty;
|
||||
|
||||
wire update_use = ((use_valid == 0) || ((use_valid-1) == 0)) && current_valid;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(`DRAM_ADDR_WIDTH),
|
||||
.SIZE(PRFQ_SIZE)
|
||||
) pfq_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.push (dram_req && !current_full && !pref_pop),
|
||||
.data_in (dram_req_addr),
|
||||
|
||||
.pop (update_use),
|
||||
.data_out(current_addr),
|
||||
|
||||
.empty (current_empty),
|
||||
.full (current_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign pref_valid = 0; // TODO use_valid != 0;
|
||||
assign pref_addr = use_addr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
use_valid <= 0;
|
||||
use_addr <= 0;
|
||||
end else begin
|
||||
if (update_use) begin
|
||||
use_valid <= PRFQ_STRIDE;
|
||||
use_addr <= current_addr + BANK_LINE_SIZE;
|
||||
end else if (pref_valid && pref_pop) begin
|
||||
use_valid <= use_valid - 1;
|
||||
use_addr <= use_addr + BANK_LINE_SIZE;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user