seemingly working fp32 implementation
This commit is contained in:
@@ -385,6 +385,11 @@
|
||||
`define LATENCY_FCVT 5
|
||||
`endif
|
||||
|
||||
// Tensor Core Latency
|
||||
`ifndef LATENCY_HMMA
|
||||
`define LATENCY_HMMA 4
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Cache Enable
|
||||
|
||||
0
hw/rtl/fpu/VX_tensor_core.sv
Normal file
0
hw/rtl/fpu/VX_tensor_core.sv
Normal file
35
hw/rtl/fpu/VX_tensor_dpu.sv
Normal file
35
hw/rtl/fpu/VX_tensor_dpu.sv
Normal file
@@ -0,0 +1,35 @@
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_tensor_dpu #(
|
||||
|
||||
) (
|
||||
input clk,
|
||||
input reset,
|
||||
|
||||
input valid_in,
|
||||
input [3:0][1:0][31:0] A_tile,
|
||||
input [1:0][3:0][31:0] B_tile,
|
||||
input [3:0][3:0][31:0] C_tile,
|
||||
|
||||
output valid_out,
|
||||
output [3:0][3:0][31:0] D_tile
|
||||
);
|
||||
logic [3:0][3:0][31:0] result_hmma;
|
||||
|
||||
always @(*) begin
|
||||
dpi_hmma(valid_in, A_tile, B_tile, C_tile, result_hmma);
|
||||
end
|
||||
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + $bits(D_tile)),
|
||||
.DEPTH (`LATENCY_HMMA),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({valid_in, result_hmma}),
|
||||
.data_out ({valid_out, D_tile})
|
||||
);
|
||||
endmodule
|
||||
28
hw/rtl/fpu/VX_tensor_tb.sv
Normal file
28
hw/rtl/fpu/VX_tensor_tb.sv
Normal file
@@ -0,0 +1,28 @@
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_tensor_tb(
|
||||
input clk,
|
||||
input reset,
|
||||
|
||||
input valid_in,
|
||||
input [3:0][1:0][31:0] A_tile,
|
||||
input [1:0][3:0][31:0] B_tile,
|
||||
input [3:0][3:0][31:0] C_tile,
|
||||
|
||||
output valid_out,
|
||||
output [3:0][3:0][31:0] D_tile
|
||||
);
|
||||
|
||||
VX_tensor_dpu #() tensor_core (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
||||
.valid_in(valid_in),
|
||||
.A_tile(A_tile),
|
||||
.B_tile(B_tile),
|
||||
.C_tile(C_tile),
|
||||
|
||||
.valid_out(valid_out),
|
||||
.D_tile(D_tile)
|
||||
);
|
||||
endmodule
|
||||
Reference in New Issue
Block a user