minor updates

This commit is contained in:
Blaise Tine
2021-03-28 18:08:04 -04:00
parent 6514a3b782
commit f968dbccd3
9 changed files with 80 additions and 86 deletions

Binary file not shown.

View File

@@ -27,7 +27,7 @@ void kernel_body(int task_id, void* arg) {
for (uint32_t x = 0; x < _arg->tile_width; ++x) { for (uint32_t x = 0; x < _arg->tile_width; ++x) {
int32_t u = (int32_t)(fu * (1<<20)); int32_t u = (int32_t)(fu * (1<<20));
int32_t v = (int32_t)(fv * (1<<20)); int32_t v = (int32_t)(fv * (1<<20));
dst_ptr[x] = vx_tex(0, u, v, 0); dst_ptr[x] = vx_tex(0, u, v, 0x0);
fu += _arg->deltaX; fu += _arg->deltaX;
} }
dst_ptr += _arg->karg.dst_pitch; dst_ptr += _arg->karg.dst_pitch;

View File

@@ -36,23 +36,23 @@ Disassembly of section .text:
80000060: 04912223 sw s1,68(sp) 80000060: 04912223 sw s1,68(sp)
80000064: 01442783 lw a5,20(s0) # 7ffff014 <__stack_size+0x7fffec14> 80000064: 01442783 lw a5,20(s0) # 7ffff014 <__stack_size+0x7fffec14>
80000068: fd079073 csrw 0xfd0,a5 80000068: fd079073 csrw 0xfd0,a5
8000006c: fd105073 csrwi 0xfd1,0 8000006c: fd405073 csrwi 0xfd4,0
80000070: 00442503 lw a0,4(s0) 80000070: 00442503 lw a0,4(s0)
80000074: 01f00493 li s1,31 80000074: 01f00493 li s1,31
80000078: 00151513 slli a0,a0,0x1 80000078: 00151513 slli a0,a0,0x1
8000007c: fff50513 addi a0,a0,-1 8000007c: fff50513 addi a0,a0,-1
80000080: 400000ef jal ra,80000480 <__clzsi2> 80000080: 400000ef jal ra,80000480 <__clzsi2>
80000084: 40a48533 sub a0,s1,a0 80000084: 40a48533 sub a0,s1,a0
80000088: fd251073 csrw 0xfd2,a0 80000088: fd551073 csrw 0xfd5,a0
8000008c: 00842503 lw a0,8(s0) 8000008c: 00842503 lw a0,8(s0)
80000090: 00151513 slli a0,a0,0x1 80000090: 00151513 slli a0,a0,0x1
80000094: fff50513 addi a0,a0,-1 80000094: fff50513 addi a0,a0,-1
80000098: 3e8000ef jal ra,80000480 <__clzsi2> 80000098: 3e8000ef jal ra,80000480 <__clzsi2>
8000009c: 40a484b3 sub s1,s1,a0 8000009c: 40a484b3 sub s1,s1,a0
800000a0: fd349073 csrw 0xfd3,s1 800000a0: fd649073 csrw 0xfd6,s1
800000a4: fd405073 csrwi 0xfd4,0 800000a4: fd105073 csrwi 0xfd1,0
800000a8: fd505073 csrwi 0xfd5,0 800000a8: fd205073 csrwi 0xfd2,0
800000ac: fd605073 csrwi 0xfd6,0 800000ac: fd305073 csrwi 0xfd3,0
800000b0: 01442503 lw a0,20(s0) 800000b0: 01442503 lw a0,20(s0)
800000b4: 01842583 lw a1,24(s0) 800000b4: 01842583 lw a1,24(s0)
800000b8: 01c42603 lw a2,28(s0) 800000b8: 01c42603 lw a2,28(s0)

Binary file not shown.

View File

@@ -204,7 +204,7 @@ module VX_csr_data #(
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID; `CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
default: begin default: begin
assert (~read_enable || read_addr >= `CSR_TEX_BEGIN(0) && read_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES)) assert (~read_enable || (read_addr >= `CSR_TEX_BEGIN(0) && read_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES)))
else $error("%t: invalid CSR read address: %0h", $time, read_addr); else $error("%t: invalid CSR read address: %0h", $time, read_addr);
end end
endcase endcase

View File

@@ -24,7 +24,7 @@ module VX_decode #(
reg [`MOD_BITS-1:0] op_mod; reg [`MOD_BITS-1:0] op_mod;
reg [31:0] imm; reg [31:0] imm;
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm; reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
reg rd_fp, rs1_fp, rs2_fp; reg rd_fp, rs1_fp, rs2_fp, rs3_fp;
reg is_join, is_wstall; reg is_join, is_wstall;
wire [31:0] instr = ifetch_rsp_if.instr; wire [31:0] instr = ifetch_rsp_if.instr;
@@ -59,6 +59,7 @@ module VX_decode #(
rd_fp = 0; rd_fp = 0;
rs1_fp = 0; rs1_fp = 0;
rs2_fp = 0; rs2_fp = 0;
rs3_fp = 1;
is_join = 0; is_join = 0;
is_wstall = 0; is_wstall = 0;
@@ -367,6 +368,7 @@ module VX_decode #(
use_rs1 = 1; use_rs1 = 1;
use_rs2 = 1; use_rs2 = 1;
use_rs3 = 1; use_rs3 = 1;
rs3_fp = 0;
end end
`endif `endif
default:; default:;
@@ -395,7 +397,7 @@ module VX_decode #(
assign decode_if.rd = {rd_fp, rd}; assign decode_if.rd = {rd_fp, rd};
assign decode_if.rs1 = {rs1_fp, rs1_qual}; assign decode_if.rs1 = {rs1_fp, rs1_qual};
assign decode_if.rs2 = {rs2_fp, rs2}; assign decode_if.rs2 = {rs2_fp, rs2};
assign decode_if.rs3 = {1'b1, rs3}; assign decode_if.rs3 = {rs3_fp, rs3};
`else `else
`UNUSED_VAR (rd_fp) `UNUSED_VAR (rd_fp)
`UNUSED_VAR (rs1_fp) `UNUSED_VAR (rs1_fp)

View File

@@ -50,8 +50,8 @@ module VX_tex_addr #(
`UNUSED_PARAM (CORE_ID) `UNUSED_PARAM (CORE_ID)
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] u; wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_u;
wire [1:0][`NUM_THREADS-1:0][`FIXED_FRAC-1:0] v; wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_v;
wire [`TEX_STRIDE_BITS-1:0] log_stride; wire [`TEX_STRIDE_BITS-1:0] log_stride;
// stride // stride
@@ -71,8 +71,9 @@ module VX_tex_addr #(
wire [31:0] fv[1:0]; wire [31:0] fv[1:0];
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0); assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_widths[i]) : 0); assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_heights[i]) : 0); assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_heights[i]) : 0);
VX_tex_wrap #( VX_tex_wrap #(
@@ -80,15 +81,7 @@ module VX_tex_addr #(
) tex_wrap_u0 ( ) tex_wrap_u0 (
.wrap_i (wrap_u), .wrap_i (wrap_u),
.coord_i (fu[0]), .coord_i (fu[0]),
.coord_o (u[0][i]) .coord_o (clamped_u[i][0])
);
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_v0 (
.wrap_i (wrap_v),
.coord_i (fv[0]),
.coord_o (v[0][i])
); );
VX_tex_wrap #( VX_tex_wrap #(
@@ -96,7 +89,15 @@ module VX_tex_addr #(
) tex_wrap_u1 ( ) tex_wrap_u1 (
.wrap_i (wrap_u), .wrap_i (wrap_u),
.coord_i (fu[1]), .coord_i (fu[1]),
.coord_o (u[1][i]) .coord_o (clamped_u[i][1])
);
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_v0 (
.wrap_i (wrap_v),
.coord_i (fv[0]),
.coord_o (clamped_v[i][0])
); );
VX_tex_wrap #( VX_tex_wrap #(
@@ -104,7 +105,7 @@ module VX_tex_addr #(
) tex_wrap_v1 ( ) tex_wrap_v1 (
.wrap_i (wrap_v), .wrap_i (wrap_v),
.coord_i (fv[1]), .coord_i (fv[1]),
.coord_o (v[1][i]) .coord_o (clamped_v[i][1])
); );
end end
@@ -117,10 +118,11 @@ module VX_tex_addr #(
wire [`FIXED_FRAC-1:0] x [1:0]; wire [`FIXED_FRAC-1:0] x [1:0];
wire [`FIXED_FRAC-1:0] y [1:0]; wire [`FIXED_FRAC-1:0] y [1:0];
assign x[0] = u[0][i] >> ((`FIXED_FRAC) - log_widths[i]); assign x[0] = clamped_u[i][0] >> ((`FIXED_FRAC) - log_widths[i]);
assign x[1] = u[1][i] >> ((`FIXED_FRAC) - log_widths[i]); assign x[1] = clamped_u[i][1] >> ((`FIXED_FRAC) - log_widths[i]);
assign y[0] = v[0][i] >> ((`FIXED_FRAC) - log_heights[i]);
assign y[1] = v[1][i] >> ((`FIXED_FRAC) - log_heights[i]); assign y[0] = clamped_v[i][0] >> ((`FIXED_FRAC) - log_heights[i]);
assign y[1] = clamped_v[i][1] >> ((`FIXED_FRAC) - log_heights[i]);
assign addr[i][0] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[0]) << log_widths[i])) << log_stride; assign addr[i][0] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[0]) << log_widths[i])) << log_stride;
assign addr[i][1] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[0]) << log_widths[i])) << log_stride; assign addr[i][1] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[0]) << log_widths[i])) << log_stride;
@@ -128,6 +130,12 @@ module VX_tex_addr #(
assign addr[i][3] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[1]) << log_widths[i])) << log_stride; assign addr[i][3] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[1]) << log_widths[i])) << log_stride;
end end
wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] u0, v0;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign u0[i] = clamped_u[i][0];
assign v0[i] = clamped_v[i][0];
end
wire stall_out = mem_req_valid && ~mem_req_ready; wire stall_out = mem_req_valid && ~mem_req_ready;
VX_pipe_register #( VX_pipe_register #(
@@ -137,7 +145,7 @@ module VX_tex_addr #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (~stall_out), .enable (~stall_out),
.data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, u[0], v[0], req_info}), .data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, u0, v0, req_info}),
.data_out ({mem_req_valid, mem_req_wid, mem_req_tmask, mem_req_PC, mem_req_filter, mem_req_stride, mem_req_addr, mem_req_u, mem_req_v, mem_req_info}) .data_out ({mem_req_valid, mem_req_wid, mem_req_tmask, mem_req_PC, mem_req_filter, mem_req_stride, mem_req_addr, mem_req_u, mem_req_v, mem_req_info})
); );

View File

@@ -24,9 +24,9 @@ module VX_tex_unit #(
`UNUSED_PARAM (CORE_ID) `UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [(1 << `TEX_MIP_BITS)-1:0]; reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [`TEX_WIDTH_BITS-1:0] tex_width [(1 << `TEX_MIP_BITS)-1:0]; reg [`TEX_WIDTH_BITS-1:0] tex_width [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [`TEX_HEIGHT_BITS-1:0] tex_height [(1 << `TEX_MIP_BITS)-1:0]; reg [`TEX_HEIGHT_BITS-1:0] tex_height [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0]; reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0]; reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
@@ -37,49 +37,28 @@ module VX_tex_unit #(
// CSRs programming // CSRs programming
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
wire [`TEX_LOD_BITS-1:0] mip_level = tex_csr_if.write_data[28 +: `TEX_LOD_BITS];
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (tex_csr_if.write_enable) begin
tex_baddr[i] <= 0; case (tex_csr_if.write_addr)
tex_format[i] <= 0; `CSR_TEX_ADDR(i) : tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
tex_wrap_u[i] <= 0; `CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
tex_wrap_v[i] <= 0; `CSR_TEX_WRAP(i) : begin
tex_filter[i] <= 0; tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
end begin tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
if (tex_csr_if.write_enable) begin end
case (tex_csr_if.write_addr) `CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
`CSR_TEX_ADDR(i) : tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0]; `CSR_TEX_MIPOFF(i) : tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
`CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0]; `CSR_TEX_WIDTH(i) : tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
`CSR_TEX_WRAP(i) : begin `CSR_TEX_HEIGHT(i) : tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS]; default:
tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS]; assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
end && tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0]; endcase
`CSR_TEX_MIPOFF(i),
`CSR_TEX_WIDTH(i),
`CSR_TEX_HEIGHT(i):;
default:
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
endcase
end
end end
end end
end end
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
wire [`TEX_MIP_BITS-1:0] mip_waddr = tex_csr_if.write_data[24 +: `TEX_MIP_BITS];
always @(posedge clk) begin
if (tex_csr_if.write_enable && tex_csr_if.write_addr == `CSR_TEX_MIPOFF(i))
tex_mipoff[mip_waddr] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
if (tex_csr_if.write_enable && tex_csr_if.write_addr == `CSR_TEX_WIDTH(i))
tex_width[mip_waddr] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
if (tex_csr_if.write_enable && tex_csr_if.write_addr == `CSR_TEX_HEIGHT(i))
tex_height[mip_waddr] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
end
end
// mipmap attributes // mipmap attributes
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] tex_mipoffs; wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] tex_mipoffs;
@@ -87,10 +66,11 @@ module VX_tex_unit #(
wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] tex_heights; wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] tex_heights;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`TEX_MIP_BITS-1:0] mip_raddr = {tex_req_if.unit[`NTEX_BITS-1:0], tex_req_if.lod[i][`TEX_LOD_BITS-1:0]}; wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
assign tex_mipoffs[i] = tex_mipoff[mip_raddr]; wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][20+:`TEX_LOD_BITS];
assign tex_widths[i] = tex_width[mip_raddr]; assign tex_mipoffs[i] = tex_mipoff[unit][mip_level];
assign tex_heights[i] = tex_height[mip_raddr]; assign tex_widths[i] = tex_width[unit][mip_level];
assign tex_heights[i] = tex_height[unit][mip_level];
end end
// address generation // address generation
@@ -237,13 +217,14 @@ module VX_tex_unit #(
if (tex_csr_if.write_enable if (tex_csr_if.write_enable
&& (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i) && (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin && tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin
$display("%t: core%0d-tex_csr: csr_tex%d_addr, csr_data=%0h", $time, CORE_ID, i, tex_baddr[i]); $display("%t: core%0d-tex_unit: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_width, csr_data=%0h", $time, CORE_ID, i, tex_width[i]); $display("%t: core%0d-tex_unit: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_height, csr_data=%0h", $time, CORE_ID, i, tex_height[i]); $display("%t: core%0d-tex_unit: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_format, csr_data=%0h", $time, CORE_ID, i, tex_format[i]); $display("%t: core%0d-tex_unit: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_u, csr_data=%0h", $time, CORE_ID, i, tex_wrap_u[i]); $display("%t: core%0d-tex_unit: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]);
$display("%t: core%0d-tex_csr: csr_tex%d_wrap_v, csr_data=%0h", $time, CORE_ID, i, tex_wrap_v[i]); $display("%t: core%0d-tex_unit: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]);
$display("%t: core%0d-tex_csr: csr_tex%d_filter, csr_data=%0h", $time, CORE_ID, i, tex_filter[i]); $display("%t: core%0d-tex_unit: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]);
$display("%t: core%0d-tex_unit: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]);
end end
end end
end end

View File

@@ -54,8 +54,11 @@ extern "C" {
// Texture load // Texture load
#define vx_tex(unit, u, v, l) ({ \ #define vx_tex(unit, u, v, l) ({ \
register unsigned __r; \ unsigned __r; \
__asm__ __volatile__ (".insn r4 0x6b, 5, " __ASM_STR(unit) ", %0, %1, %2, %3" : "=r"(__r) : "r"(u), "r"(v), "r"(l)); \ unsigned __u = u; \
unsigned __v = v; \
unsigned __l = l; \
__asm__ __volatile__ (".insn r4 0x6b, 5, " __ASM_STR(unit) ", %0, %1, %2, %3" : "=r"(__r) : "r"(__u), "r"(__v), "r"(__l)); \
__r; \ __r; \
}) })