Merge branch 'new-cisc' into kernels-asplos-ae

This commit is contained in:
Hansung Kim
2025-01-28 21:18:12 -08:00
9 changed files with 75 additions and 49 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,11 @@
rm kernel.radiance.elf
rm -rf binaries
mkdir binaries
for a in args/*; do
cp -f $a args.bin
aa=$(basename "$a")
cp -f input.a/"$aa" input.a.bin
cp -f input.b/"$aa" input.b.bin
make > /dev/null
mv kernel.radiance.elf binaries/gemmini_fp16dma"$aa".elf
done

View File

@@ -0,0 +1,11 @@
rm kernel.radiance.elf
rm -rf binaries
mkdir binaries
for a in args/*; do
cp -f $a args.bin
aa=$(basename "$a")
cp -f input.a/"$aa" input.a.bin
cp -f input.b/"$aa" input.b.bin
make > /dev/null
mv kernel.radiance.elf binaries/gemmini_hopper_dma"$aa".elf
done

View File

@@ -15,20 +15,20 @@ def truncated_matrix_multiplication(matrix_a, matrix_b, size):
result = np.matmul(truncated_a, truncated_b)
return result.astype(np.float16)
# Generate the 512x512 matrices
size = 512
matrix_a = generate_fp16_matrix(size)
matrix_b = generate_fp16_matrix(size)
# Save the operand matrices to binary files
save_matrix_to_bin("input.a.bin", matrix_a)
save_matrix_to_bin("input.b.bin", matrix_b)
# Generate and save the reference matrices for 128x128, 256x256, and 512x512 sizes
sizes = [128, 256, 512]
sizes = [128, 256, 512, 1024]
for s in sizes:
np.random.seed(0)
matrix_a = generate_fp16_matrix(s)
matrix_b = generate_fp16_matrix(s)
# Save the operand matrices to binary files
save_matrix_to_bin("input.a.bin", matrix_a)
save_matrix_to_bin(f"input.a/{s}", matrix_a)
save_matrix_to_bin("input.b.bin", matrix_b)
save_matrix_to_bin(f"input.b/{s}", matrix_b)
ref_matrix = truncated_matrix_multiplication(matrix_a, matrix_b, s)
print(ref_matrix)
save_matrix_to_bin(f"ref{s}.bin", ref_matrix)
print("All files generated successfully.")

View File

@@ -107,7 +107,7 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
for (uint32_t tile_j = 0; tile_j < num_tiles_n; tile_j += 1) {
for (uint32_t tile_k = 0; tile_k < num_tiles_k; tile_k += 1) {
uint32_t a_hexadecile = (tile_k & 1) << 2;
uint32_t b_hexadecile = a_hexadecile + 8;
uint32_t b_hexadecile = a_hexadecile + 11;
gemmini_tile_load_ab(A, B,
a_hexadecile, b_hexadecile, tile_i, tile_j, tile_k,
dim_m, dim_n, dim_k, TILE_M, TILE_N, TILE_K);