diff --git a/kernels/sgemm_tcore/sgemm_impl.hpp b/kernels/sgemm_tcore/sgemm_impl.hpp index 7b6d0e53..e71c9c6f 100644 --- a/kernels/sgemm_tcore/sgemm_impl.hpp +++ b/kernels/sgemm_tcore/sgemm_impl.hpp @@ -267,6 +267,34 @@ inline void vx_wgmma_wait() { asm volatile (".insn r %0, 1, 0, x0, x0, x0" :: "i"(RISCV_CUSTOM3)); } +inline void vx_tcgen05_cp(const uint32_t addr_tmem, const uint32_t addr_smem) { + asm volatile(".insn r %0, 2, 0, x0, %1, %2" ::"i"(RISCV_CUSTOM3), "r"(addr_tmem), + "r"(addr_smem)); +} + +inline void vx_tcgen05_cp_wait() { + asm volatile (".insn r %0, 3, 0, x0, x0, x0" :: "i"(RISCV_CUSTOM3)); +} + +inline void vx_bwgmma(const uint32_t addr_tmem_a, const uint32_t addr_smem_b) { + asm volatile(".insn r %0, 0, 0, x0, %1, %2" ::"i"(RISCV_CUSTOM3), "r"(addr_tmem_a), + "r"(addr_smem_b)); +} + +inline void vx_bwgmma_wait() { + asm volatile (".insn r %0, 1, 0, x0, x0, x0" :: "i"(RISCV_CUSTOM3)); +} + +inline void vx_tcgen05_ld(const uint32_t addr_tmem, const uint32_t rd_hint) { + asm volatile(".insn r %0, 4, 0, %1, %2, x0" ::"i"(RISCV_CUSTOM3), "r"(rd_hint), + "r"(addr_tmem)); +} + +inline void vx_tcgen05_st(const uint32_t addr_tmem, const uint32_t rd_hint) { + asm volatile(".insn r %0, 5, 0, %1, %2, x0" ::"i"(RISCV_CUSTOM3), "r"(rd_hint), + "r"(addr_tmem)); +} + // Remap logical row/col coordinate of a matrix element to a memory index that // follows the 2-level block-row-major layout that Gemmini DMA uses template diff --git a/kernels/sgemm_tcore_blackwell/Makefile b/kernels/sgemm_tcore_blackwell/Makefile new file mode 100644 index 00000000..8f7ad01c --- /dev/null +++ b/kernels/sgemm_tcore_blackwell/Makefile @@ -0,0 +1,8 @@ +PROJECT = sgemm_tcore_blackwell + +VX_SRCS = ../sgemm_tcore/kernel.cpp +VX_INCLUDES = ../sgemm_tcore/sgemm_impl.hpp ../sgemm_tcore/common.h + +OPTS ?= -n16 + +include ../common.mk