Merge branch 'new-cisc' into kernels-asplos-ae
This commit is contained in:
BIN
tests/regression/sgemm_gemmini_dma/args/1024
Normal file
BIN
tests/regression/sgemm_gemmini_dma/args/1024
Normal file
Binary file not shown.
BIN
tests/regression/sgemm_gemmini_dma/args/128
Normal file
BIN
tests/regression/sgemm_gemmini_dma/args/128
Normal file
Binary file not shown.
BIN
tests/regression/sgemm_gemmini_dma/args/256
Normal file
BIN
tests/regression/sgemm_gemmini_dma/args/256
Normal file
Binary file not shown.
BIN
tests/regression/sgemm_gemmini_dma/args/512
Normal file
BIN
tests/regression/sgemm_gemmini_dma/args/512
Normal file
Binary file not shown.
11
tests/regression/sgemm_gemmini_dma/compile_ampere.sh
Executable file
11
tests/regression/sgemm_gemmini_dma/compile_ampere.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
rm kernel.radiance.elf
|
||||
rm -rf binaries
|
||||
mkdir binaries
|
||||
for a in args/*; do
|
||||
cp -f $a args.bin
|
||||
aa=$(basename "$a")
|
||||
cp -f input.a/"$aa" input.a.bin
|
||||
cp -f input.b/"$aa" input.b.bin
|
||||
make > /dev/null
|
||||
mv kernel.radiance.elf binaries/gemmini_fp16dma"$aa".elf
|
||||
done
|
||||
11
tests/regression/sgemm_gemmini_dma/compile_hopper.sh
Executable file
11
tests/regression/sgemm_gemmini_dma/compile_hopper.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
rm kernel.radiance.elf
|
||||
rm -rf binaries
|
||||
mkdir binaries
|
||||
for a in args/*; do
|
||||
cp -f $a args.bin
|
||||
aa=$(basename "$a")
|
||||
cp -f input.a/"$aa" input.a.bin
|
||||
cp -f input.b/"$aa" input.b.bin
|
||||
make > /dev/null
|
||||
mv kernel.radiance.elf binaries/gemmini_hopper_dma"$aa".elf
|
||||
done
|
||||
@@ -15,20 +15,20 @@ def truncated_matrix_multiplication(matrix_a, matrix_b, size):
|
||||
result = np.matmul(truncated_a, truncated_b)
|
||||
return result.astype(np.float16)
|
||||
|
||||
# Generate the 512x512 matrices
|
||||
size = 512
|
||||
matrix_a = generate_fp16_matrix(size)
|
||||
matrix_b = generate_fp16_matrix(size)
|
||||
|
||||
# Save the operand matrices to binary files
|
||||
save_matrix_to_bin("input.a.bin", matrix_a)
|
||||
save_matrix_to_bin("input.b.bin", matrix_b)
|
||||
|
||||
# Generate and save the reference matrices for 128x128, 256x256, and 512x512 sizes
|
||||
sizes = [128, 256, 512]
|
||||
sizes = [128, 256, 512, 1024]
|
||||
for s in sizes:
|
||||
np.random.seed(0)
|
||||
matrix_a = generate_fp16_matrix(s)
|
||||
matrix_b = generate_fp16_matrix(s)
|
||||
|
||||
# Save the operand matrices to binary files
|
||||
save_matrix_to_bin("input.a.bin", matrix_a)
|
||||
save_matrix_to_bin(f"input.a/{s}", matrix_a)
|
||||
save_matrix_to_bin("input.b.bin", matrix_b)
|
||||
save_matrix_to_bin(f"input.b/{s}", matrix_b)
|
||||
|
||||
ref_matrix = truncated_matrix_multiplication(matrix_a, matrix_b, s)
|
||||
print(ref_matrix)
|
||||
save_matrix_to_bin(f"ref{s}.bin", ref_matrix)
|
||||
|
||||
print("All files generated successfully.")
|
||||
|
||||
@@ -107,7 +107,7 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
|
||||
for (uint32_t tile_j = 0; tile_j < num_tiles_n; tile_j += 1) {
|
||||
for (uint32_t tile_k = 0; tile_k < num_tiles_k; tile_k += 1) {
|
||||
uint32_t a_hexadecile = (tile_k & 1) << 2;
|
||||
uint32_t b_hexadecile = a_hexadecile + 8;
|
||||
uint32_t b_hexadecile = a_hexadecile + 11;
|
||||
gemmini_tile_load_ab(A, B,
|
||||
a_hexadecile, b_hexadecile, tile_i, tile_j, tile_k,
|
||||
dim_m, dim_n, dim_k, TILE_M, TILE_N, TILE_K);
|
||||
|
||||
Reference in New Issue
Block a user