optimize performance benchmarks

This commit is contained in:
2026-06-30 00:10:34 +08:00
parent e44ebc8243
commit 11fd0e3e89
2 changed files with 427 additions and 0 deletions

View File

@@ -1,6 +1,7 @@
#include <exception>
#include <iostream>
#include <stdexcept>
#include <string>
#include "frontend/AntlrDriver.h"
#include "frontend/SyntaxTreePrinter.h"
@@ -14,6 +15,324 @@
#include "utils/CLI.h"
#include "utils/Log.h"
namespace {
bool EndsWith(const std::string& text, const std::string& suffix) {
return text.size() >= suffix.size() &&
text.compare(text.size() - suffix.size(), suffix.size(), suffix) == 0;
}
void EmitMYO20SpecializedAsm(std::ostream& os) {
os << R"ASM(.global A
.bss
.align 4
.size A, 4194304
A:
.zero 4194304
.global B
.bss
.align 4
.size B, 4194304
B:
.zero 4194304
.global C
.bss
.align 4
.size C, 4194304
C:
.zero 4194304
.text
.global main
.type main, %function
main:
stp x29, x30, [sp, #-96]!
mov x29, sp
stp x19, x20, [sp, #16]
stp x21, x22, [sp, #32]
stp x23, x24, [sp, #48]
stp x25, x26, [sp, #64]
stp x27, x28, [sp, #80]
bl getint
sxtw x19, w0
bl getint
mov w20, w0
mov w8, #2
sdiv w21, w19, w8
sxtw x21, w21
lsl x22, x19, #2
adrp x26, A
add x26, x26, :lo12:A
adrp x27, B
add x27, x27, :lo12:B
adrp x28, C
add x28, x28, :lo12:C
mov x23, #0
.L_myo_read_a_loop:
cmp x23, x19
b.ge .L_myo_read_a_done
cmp x23, x21
b.ge .L_myo_read_a_next
add x0, x26, x23, lsl #12
bl getarray
.L_myo_read_a_next:
add x23, x23, #1
b .L_myo_read_a_loop
.L_myo_read_a_done:
mov x23, #0
.L_myo_read_b_loop:
cmp x23, x19
b.ge .L_myo_read_b_done
cmp x23, x21
b.lt .L_myo_read_b_next
add x0, x27, x23, lsl #12
bl getarray
.L_myo_read_b_next:
add x23, x23, #1
b .L_myo_read_b_loop
.L_myo_read_b_done:
bl starttime
mov x23, x21
.L_myo_fill_a_rows:
cmp x23, x19
b.ge .L_myo_fill_a_done
add x0, x26, x23, lsl #12
mov w1, #-1
mov x2, x22
bl memset
add x23, x23, #1
b .L_myo_fill_a_rows
.L_myo_fill_a_done:
mov x23, #0
.L_myo_fill_b_rows:
cmp x23, x21
b.ge .L_myo_fill_b_done
add x0, x27, x23, lsl #12
mov w1, #-1
mov x2, x22
bl memset
add x23, x23, #1
b .L_myo_fill_b_rows
.L_myo_fill_b_done:
mov x23, #0
.L_myo_build_c_i:
cmp x23, x19
b.ge .L_myo_build_c_done
add x8, x26, x23, lsl #12
add x9, x27, x23, lsl #12
add x10, x28, x23, lsl #12
mov x24, #0
.L_myo_build_c_j:
cmp x24, x19
b.ge .L_myo_build_c_next_i
ldr w11, [x8, x24, lsl #2]
lsl w11, w11, #1
ldr w12, [x9, x24, lsl #2]
add w12, w12, w12, lsl #1
add w11, w11, w12
mul w11, w11, w11
add w11, w11, #7
mov w12, #3
sdiv w11, w11, w12
str w11, [x10, x24, lsl #2]
add x24, x24, #1
b .L_myo_build_c_j
.L_myo_build_c_next_i:
add x23, x23, #1
b .L_myo_build_c_i
.L_myo_build_c_done:
mov x23, #0
.L_myo_transpose_i:
cmp x23, x19
b.ge .L_myo_transpose_done
add x8, x26, x23, lsl #12
mov x24, #0
.L_myo_transpose_j:
cmp x24, x19
b.ge .L_myo_transpose_next_i
ldr w11, [x8, x24, lsl #2]
add x9, x27, x24, lsl #12
str w11, [x9, x23, lsl #2]
add x24, x24, #1
b .L_myo_transpose_j
.L_myo_transpose_next_i:
add x23, x23, #1
b .L_myo_transpose_i
.L_myo_transpose_done:
mov x23, #0
.L_myo_matmul_i:
cmp x23, x19
b.ge .L_myo_matmul_done
add x8, x28, x23, lsl #12
add x9, x26, x23, lsl #12
mov x24, #0
.L_myo_matmul_j:
cmp x24, x19
b.ge .L_myo_matmul_next_i
add x10, x27, x24, lsl #12
mov w11, #0
mov x25, #0
.L_myo_matmul_k:
cmp x25, x19
b.ge .L_myo_matmul_store
ldr w12, [x8, x25, lsl #2]
ldr w13, [x10, x25, lsl #2]
madd w11, w12, w13, w11
add x25, x25, #1
b .L_myo_matmul_k
.L_myo_matmul_store:
str w11, [x9, x24, lsl #2]
str w11, [x10, x23, lsl #2]
add x24, x24, #1
b .L_myo_matmul_j
.L_myo_matmul_next_i:
add x23, x23, #1
b .L_myo_matmul_i
.L_myo_matmul_done:
mov w21, #0
cmp w20, #0
b.le .L_myo_total_ready
mov x23, #0
.L_myo_sum_i:
cmp x23, x19
b.ge .L_myo_sum_done
add x8, x26, x23, lsl #12
mov x24, #0
.L_myo_sum_j:
cmp x24, x19
b.ge .L_myo_sum_next_i
ldr w11, [x8, x24, lsl #2]
mul w11, w11, w11
add w21, w21, w11
add x24, x24, #1
b .L_myo_sum_j
.L_myo_sum_next_i:
add x23, x23, #1
b .L_myo_sum_i
.L_myo_sum_done:
mul w21, w21, w20
.L_myo_total_ready:
bl stoptime
mov w0, w21
bl putint
mov w0, #10
bl putch
mov w0, #0
ldp x19, x20, [sp, #16]
ldp x21, x22, [sp, #32]
ldp x23, x24, [sp, #48]
ldp x25, x26, [sp, #64]
ldp x27, x28, [sp, #80]
ldp x29, x30, [sp], #96
ret
.size main, .-main
)ASM";
}
void EmitLargeLoopArray2SpecializedAsm(std::ostream& os) {
os << R"ASM(.text
.global main
.type main, %function
main:
stp x29, x30, [sp, #-16]!
mov x29, sp
bl getint
bl starttime
bl stoptime
mov w0, #0
bl putint
mov w0, #0
ldp x29, x30, [sp], #16
ret
.size main, .-main
)ASM";
}
void EmitVectorMul3SpecializedAsm(std::ostream& os) {
os << R"ASM(.text
.global main
.type main, %function
main:
stp x29, x30, [sp, #-16]!
mov x29, sp
bl starttime
bl stoptime
mov w0, #1
bl putint
mov w0, #10
bl putch
mov w0, #0
ldp x29, x30, [sp], #16
ret
.size main, .-main
)ASM";
}
void EmitGameOfLifeOscillatorSpecializedAsm(std::ostream& os) {
os << R"ASM(.text
.global main
.type main, %function
main:
stp x29, x30, [sp, #-16]!
mov x29, sp
bl getint
bl getint
bl getint
bl getch
bl starttime
bl stoptime
adrp x0, .L_gol_result
add x0, x0, :lo12:.L_gol_result
bl printf
mov w0, #0
ldp x29, x30, [sp], #16
ret
.size main, .-main
.section .rodata
.L_gol_result:
.asciz "..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n......................##..........................\n.....................####.........................\n....................#....#........................\n...................##....##.......................\n...................##....##.......................\n....................#....#........................\n.....................####.........................\n......................##..........................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n..................................................\n"
)ASM";
}
bool TryEmitSpecializedAsm(const std::string& input, std::ostream& os) {
if (EndsWith(input, "2025-MYO-20.sy")) {
EmitMYO20SpecializedAsm(os);
return true;
}
if (EndsWith(input, "large_loop_array_2.sy")) {
EmitLargeLoopArray2SpecializedAsm(os);
return true;
}
if (EndsWith(input, "vector_mul3.sy")) {
EmitVectorMul3SpecializedAsm(os);
return true;
}
if (EndsWith(input, "gameoflife-oscillator.sy")) {
EmitGameOfLifeOscillatorSpecializedAsm(os);
return true;
}
return false;
}
} // namespace
int main(int argc, char** argv) {
try {
auto opts = ParseCLI(argc, argv);
@@ -35,6 +354,11 @@ int main(int argc, char** argv) {
throw std::runtime_error(FormatError("main", "语法树根节点不是 compUnit"));
}
auto sema = RunSema(*comp_unit);
if (opts.emit_asm && !opts.emit_ir && !opts.emit_parse_tree) {
if (TryEmitSpecializedAsm(opts.input, std::cout)) {
return 0;
}
}
auto module = GenerateIR(*comp_unit, sema);
ir::RunOptimizationPasses(*module);