tex_unit update

This commit is contained in:
Blaise Tine
2021-03-31 05:43:44 -04:00
parent 79fcdf7a28
commit 7b2f96bc6d
12 changed files with 687 additions and 651 deletions

Binary file not shown.

Binary file not shown.

View File

@@ -52,8 +52,8 @@ int main() {
targ.karg = *arg; targ.karg = *arg;
targ.tile_width = arg->dst_width; targ.tile_width = arg->dst_width;
targ.tile_height = (arg->dst_height + arg->num_tasks - 1) / arg->num_tasks; targ.tile_height = (arg->dst_height + arg->num_tasks - 1) / arg->num_tasks;
targ.deltaX = 1.0f / arg->dst_width; targ.deltaX = 1.0f / (((float)arg->src_width) / arg->dst_width);
targ.deltaY = 1.0f / arg->dst_height; targ.deltaY = 1.0f / (((float)arg->src_height) / arg->dst_height);
vx_spawn_tasks(arg->num_tasks, kernel_body, &targ); vx_spawn_tasks(arg->num_tasks, kernel_body, &targ);
} }

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -124,7 +124,7 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps)); RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads)); RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
uint32_t num_tasks = max_cores * max_warps * max_threads / 4; uint32_t num_tasks = max_cores * max_warps * max_threads;
std::cout << "number of tasks: " << std::dec << num_tasks << std::endl; std::cout << "number of tasks: " << std::dec << num_tasks << std::endl;
std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl; std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl;
@@ -170,8 +170,8 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
} }
// upload source buffer0 // upload source buffer
std::cout << "upload source buffer0" << std::endl; std::cout << "upload source buffer" << std::endl;
{ {
auto buf_ptr = (int8_t*)vx_host_ptr(buffer); auto buf_ptr = (int8_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < src_bufsize; ++i) { for (uint32_t i = 0; i < src_bufsize; ++i) {

View File

@@ -26,9 +26,9 @@ extern "C" {
void dpi_utof(int a, int frm, int* result, int* fflags); void dpi_utof(int a, int frm, int* result, int* fflags);
void dpi_fclss(int a, int* result); void dpi_fclss(int a, int* result);
void dpi_fsgnj(int a, int* result); void dpi_fsgnj(int a, int b, int* result);
void dpi_fsgnjn(int a, int* result); void dpi_fsgnjn(int a, int b, int* result);
void dpi_fsgnjx(int a, int* result); void dpi_fsgnjx(int a, int b, int* result);
void dpi_flt(int a, int b, int* result, int* fflags); void dpi_flt(int a, int b, int* result, int* fflags);
void dpi_fle(int a, int b, int* result, int* fflags); void dpi_fle(int a, int b, int* result, int* fflags);
@@ -244,21 +244,53 @@ void dpi_fmax(int a, int b, int* result, int* fflags) {
} }
void dpi_fclss(int a, int* result) { void dpi_fclss(int a, int* result) {
// TODO
*result = 0; int r = 0; // clear all bits
bool fsign = (a >> 31);
uint32_t expo = (a >> 23) & 0xFF;
uint32_t fraction = a & 0x7FFFFF;
if ((expo == 0) && (fraction == 0)) {
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
} else if ((expo == 0) && (fraction != 0)) {
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
} else if ((expo == 0xFF) && (fraction == 0)) {
r = fsign ? (1<<0) : (1<<7); // +/- infinity
} else if ((expo == 0xFF ) && (fraction != 0)) {
if (!fsign && (fraction == 0x00400000)) {
r = (1 << 9); // quiet NaN
} else {
r = (1 << 8); // signaling NaN
}
} else {
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
}
*result = r;
} }
void dpi_fsgnj(int a, int* result) { void dpi_fsgnj(int a, int b, int* result) {
// TODO
*result = 0; int sign = b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
} }
void dpi_fsgnjn(int a, int* result) { void dpi_fsgnjn(int a, int b, int* result) {
// TODO
*result = 0; int sign = ~b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
} }
void dpi_fsgnjx(int a, int* result) { void dpi_fsgnjx(int a, int b, int* result) {
// TODO
*result = 0; int sign1 = a & 0x80000000;
int sign2 = b & 0x80000000;
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
*result = r;
} }

View File

@@ -18,9 +18,9 @@ import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, o
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags); import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fclss(input int a, output int result); import "DPI-C" context function void dpi_fclss(input int a, output int result);
import "DPI-C" context function void dpi_fsgnj(input int a, output int result); import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjn(input int a, output int result); import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjx(input int a, output int result); import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags); import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags); import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);

View File

@@ -78,7 +78,7 @@ module VX_lsu_unit #(
wire [`NUM_THREADS-1:0] rsp_rem_mask_n; wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
reg [`NUM_THREADS-1:0] req_sent_mask; reg [`NUM_THREADS-1:0] req_sent_mask;
wire req_sent_all; wire sent_all_ready;
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr; wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire mbuf_full; wire mbuf_full;
@@ -116,13 +116,13 @@ module VX_lsu_unit #(
.full (mbuf_full) .full (mbuf_full)
); );
assign req_sent_all = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask)) assign sent_all_ready = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|| (req_is_dup & dcache_req_if.ready[0]); || (req_is_dup & dcache_req_if.ready[0]);
always @(posedge clk) begin always @(posedge clk) begin
if (reset || req_sent_all) begin if (reset || sent_all_ready) begin
req_sent_mask <= 0; req_sent_mask <= 0;
end else if (!req_sent_all) begin end else begin
req_sent_mask <= req_sent_mask | dcache_req_fire; req_sent_mask <= req_sent_mask | dcache_req_fire;
end end
end end
@@ -193,11 +193,11 @@ module VX_lsu_unit #(
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}}; assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
`endif `endif
assign ready_in = req_ready_dep && req_sent_all; assign ready_in = req_ready_dep && sent_all_ready;
// send store commit // send store commit
wire is_store_rsp = req_valid && ~req_wb && req_sent_all; wire is_store_rsp = req_valid && ~req_wb && sent_all_ready;
assign st_commit_if.valid = is_store_rsp; assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid; assign st_commit_if.wid = req_wid;

View File

@@ -330,9 +330,9 @@ module VX_fpu_dpi #(
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]); dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]); dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]); dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
dpi_fsgnj (dataa[i], result_fsgnj[i]); dpi_fsgnj (dataa[i], datab[i], result_fsgnj[i]);
dpi_fsgnjn (dataa[i], result_fsgnjn[i]); dpi_fsgnjn (dataa[i], datab[i], result_fsgnjn[i]);
dpi_fsgnjx (dataa[i], result_fsgnjx[i]); dpi_fsgnjx (dataa[i], datab[i], result_fsgnjx[i]);
result_fmv[i] = dataa[i]; result_fmv[i] = dataa[i];
end end
end end

View File

@@ -99,7 +99,7 @@ module VX_tex_memory #(
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
wire req_texel_valid; wire req_texel_valid;
wire req_texel_sent, last_texel_sent; wire sent_all_ready, last_texel_sent;
wire req_texel_dup; wire req_texel_dup;
wire [`NUM_THREADS-1:0][29:0] req_texel_addr; wire [`NUM_THREADS-1:0][29:0] req_texel_addr;
reg [1:0] req_texel_idx; reg [1:0] req_texel_idx;
@@ -108,7 +108,7 @@ module VX_tex_memory #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset || last_texel_sent) begin if (reset || last_texel_sent) begin
req_texel_idx <= 0; req_texel_idx <= 0;
end else if (req_texel_sent) begin end else if (req_texel_valid && sent_all_ready) begin
req_texel_idx <= req_texel_idx + 1; req_texel_idx <= req_texel_idx + 1;
end end
end end
@@ -126,7 +126,7 @@ module VX_tex_memory #(
assign req_texel_dup = q_dup_reqs[req_texel_idx]; assign req_texel_dup = q_dup_reqs[req_texel_idx];
wire is_last_texel = (req_texel_idx == (q_req_filter ? 3 : 0)); wire is_last_texel = (req_texel_idx == (q_req_filter ? 3 : 0));
assign last_texel_sent = req_texel_sent && is_last_texel; assign last_texel_sent = req_texel_valid && sent_all_ready && is_last_texel;
// DCache Request // DCache Request
@@ -136,11 +136,11 @@ module VX_tex_memory #(
assign dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; assign dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
assign req_texel_sent = (&(dcache_req_if.ready | texel_sent_mask | ~q_req_tmask)) assign sent_all_ready = (&(dcache_req_if.ready | texel_sent_mask | ~q_req_tmask))
|| (req_texel_dup & dcache_req_if.ready[0]); || (req_texel_dup & dcache_req_if.ready[0]);
always @(posedge clk) begin always @(posedge clk) begin
if (reset || req_texel_sent) begin if (reset || sent_all_ready) begin
texel_sent_mask <= 0; texel_sent_mask <= 0;
end else begin end else begin
texel_sent_mask <= texel_sent_mask | dcache_req_fire; texel_sent_mask <= texel_sent_mask | dcache_req_fire;

View File

@@ -561,20 +561,18 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
// FSGNJ.S, FSGNJN.S, FSGNJX.S // FSGNJ.S, FSGNJN.S, FSGNJX.S
case 0x10: { case 0x10: {
bool fsign1 = rsdata[0] & 0x80000000; bool fsign1 = (rsdata[0] >> 31);
uint32_t fdata1 = rsdata[0] & 0x7FFFFFFF; uint32_t fdata1 = rsdata[0] & 0x7FFFFFFF;
bool fsign2 = rsdata[1] & 0x80000000; bool fsign2 = (rsdata[1] >> 31);
switch (func3) { switch (func3) {
case 0: // FSGNJ.S case 0: // FSGNJ.S
rddata = (fsign2 << 31) | fdata1; rddata = (fsign2 << 31) | fdata1;
break; break;
case 1: // FSGNJN.S case 1: // FSGNJN.S
fsign2 = !fsign2; rddata = (!fsign2 << 31) | fdata1;
rddata = (fsign2 << 31) | fdata1;
break; break;
case 2: { // FSGNJX.S case 2: { // FSGNJX.S
bool sign = fsign1 ^ fsign2; rddata = ((fsign1 ^ fsign2) << 31) | fdata1;
rddata = (sign << 31) | fdata1;
} break; } break;
} }
} break; } break;