From f37f5d56127764953eea3873754f8b7da6a21977 Mon Sep 17 00:00:00 2001 From: Richard Yan Date: Wed, 12 Jun 2024 02:12:38 -0700 Subject: [PATCH] dual gemmini kernel + quad core vortex --- tests/regression/sgemm_gemmini_dma/kernel.cpp | 9 +- tests/regression/sgemm_gemmini_duo/.gitignore | 5 + tests/regression/sgemm_gemmini_duo/Makefile | 9 + tests/regression/sgemm_gemmini_duo/common.h | 18 ++ tests/regression/sgemm_gemmini_duo/kernel.cpp | 177 +++++++++++ tests/regression/sgemm_gemmini_duo/main.cpp | 274 ++++++++++++++++++ .../sgemm_gemmini_duo/sgemm_gemmini_duo | Bin 0 -> 28448 bytes 7 files changed, 488 insertions(+), 4 deletions(-) create mode 100644 tests/regression/sgemm_gemmini_duo/.gitignore create mode 100644 tests/regression/sgemm_gemmini_duo/Makefile create mode 100644 tests/regression/sgemm_gemmini_duo/common.h create mode 100644 tests/regression/sgemm_gemmini_duo/kernel.cpp create mode 100644 tests/regression/sgemm_gemmini_duo/main.cpp create mode 100755 tests/regression/sgemm_gemmini_duo/sgemm_gemmini_duo diff --git a/tests/regression/sgemm_gemmini_dma/kernel.cpp b/tests/regression/sgemm_gemmini_dma/kernel.cpp index 049d1970..8e629d5d 100644 --- a/tests/regression/sgemm_gemmini_dma/kernel.cpp +++ b/tests/regression/sgemm_gemmini_dma/kernel.cpp @@ -33,7 +33,8 @@ // #define BOUND_INST 0x400040004ULL #define NUM_CLUSTERS 1 -#define NUM_THREADS_IN_CLUSTER 128 +#define NUM_THREADS_IN_CLUSTER 256 \ +// (NUM_CORES * NUM_WARPS * NUM_THREADS) #define rd_cycles_force(x) asm volatile ("csrr %0, mcycle" : "=r" (x)) #define rd_cycles(x) rd_cycles_force(x) @@ -41,7 +42,7 @@ #define PRINTF(...) sprintf(PRINT_BUF, __VA_ARGS__) // #define PRINTF(...) vx_printf(__VA_ARGS__) #define SWISH(beta, x) ((x) / (1 + exp(-(beta) * (x)))) -#define POWER +//#define POWER inline void threadblock_barrier(unsigned int barrier_id, unsigned int count) { vx_fence(); @@ -168,7 +169,7 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) { int main() { kernel_arg_t *arg = (kernel_arg_t *)KERNEL_ARG_DEV_MEM_ADDR; - const uint32_t num_threads_in_cluster = vx_num_threads() * vx_num_warps() * CORES_PER_CLUSTER; + const uint32_t num_threads_in_cluster = NUM_THREADS_IN_CLUSTER; const uint32_t grid_size = num_threads_in_cluster * NUM_CLUSTERS; #ifdef RADIANCE vx_spawn_tasks_cluster(grid_size, (vx_spawn_tasks_cb)kernel_body, arg); @@ -178,4 +179,4 @@ int main() { vx_spawn_tasks_contiguous(grid_size, (vx_spawn_tasks_cb)kernel_body, arg); #endif return 0; -} +} \ No newline at end of file diff --git a/tests/regression/sgemm_gemmini_duo/.gitignore b/tests/regression/sgemm_gemmini_duo/.gitignore new file mode 100644 index 00000000..7c35ba59 --- /dev/null +++ b/tests/regression/sgemm_gemmini_duo/.gitignore @@ -0,0 +1,5 @@ +*.bin +*.dump +*.elf +sgemm_wg +.depend diff --git a/tests/regression/sgemm_gemmini_duo/Makefile b/tests/regression/sgemm_gemmini_duo/Makefile new file mode 100644 index 00000000..05737084 --- /dev/null +++ b/tests/regression/sgemm_gemmini_duo/Makefile @@ -0,0 +1,9 @@ +PROJECT = sgemm_gemmini_duo + +SRCS = main.cpp common.h + +VX_SRCS = kernel.cpp + +OPTS ?= -n16 + +include ../common.mk diff --git a/tests/regression/sgemm_gemmini_duo/common.h b/tests/regression/sgemm_gemmini_duo/common.h new file mode 100644 index 00000000..5c84f3b7 --- /dev/null +++ b/tests/regression/sgemm_gemmini_duo/common.h @@ -0,0 +1,18 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +#include + +#define KERNEL_ARG_DEV_MEM_ADDR 0x9fff0000 +#define DEV_SMEM_START_ADDR 0xff000000 + +typedef struct { + uint32_t dim_m; + uint32_t dim_n; + uint32_t dim_k; + uint64_t addr_a; + uint64_t addr_b; + uint64_t addr_c; +} kernel_arg_t; + +#endif diff --git a/tests/regression/sgemm_gemmini_duo/kernel.cpp b/tests/regression/sgemm_gemmini_duo/kernel.cpp new file mode 100644 index 00000000..341dd301 --- /dev/null +++ b/tests/regression/sgemm_gemmini_duo/kernel.cpp @@ -0,0 +1,177 @@ +#include +#include +#include +#include +#include "common.h" +#include "include/gemmini.h" +#include "gemmini_mmio.h" + +#define TILE_M 64 +#define TILE_N 64 +#define TILE_K 64 +#define SMEM_ADDR_Q0 ((float * const) 0xff000000) +#define SMEM_ADDR_Q1 ((float * const) 0xff004000) +#define SMEM_ADDR_Q2 ((float * const) 0xff008000) +#define SMEM_ADDR_Q3 ((float * const) 0xff00c000) +#define SPAD_ADDR_Q0 0x0 +#define SPAD_ADDR_Q1 0x200 +#define SPAD_ADDR_Q2 0x400 +#define SPAD_ADDR_Q3 0x600 +#define BOUND_INST 0x800080008ULL + +// #define TILE_M 32 +// #define TILE_N 32 +// #define TILE_K 32 +// #define SMEM_ADDR_Q0 ((float * const) 0xff000000) +// #define SMEM_ADDR_Q1 ((float * const) 0xff001000) +// #define SMEM_ADDR_Q2 ((float * const) 0xff002000) +// #define SMEM_ADDR_Q3 ((float * const) 0xff003000) +// #define SPAD_ADDR_Q0 0x0 +// #define SPAD_ADDR_Q1 0x80 +// #define SPAD_ADDR_Q2 0x100 +// #define SPAD_ADDR_Q3 0x180 +// #define BOUND_INST 0x400040004ULL + +#define NUM_CLUSTERS 1 +#define NUM_THREADS_IN_CLUSTER 256 \ +// (NUM_CORES * NUM_WARPS * NUM_THREADS) + +#define rd_cycles_force(x) asm volatile ("csrr %0, mcycle" : "=r" (x)) +#define rd_cycles(x) rd_cycles_force(x) +#define HW_TID() ({uint32_t gtid; asm volatile ("csrr %0, mhartid" : "=r" (gtid)); gtid;}) +#define PRINTF(...) sprintf(PRINT_BUF, __VA_ARGS__) +// #define PRINTF(...) vx_printf(__VA_ARGS__) +#define SWISH(beta, x) ((x) / (1 + exp(-(beta) * (x)))) +//#define POWER + +inline void threadblock_barrier(unsigned int barrier_id, unsigned int count) { + vx_fence(); + vx_barrier(barrier_id, count); +} + +void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg, + const uint32_t threadblock_id, + const uint32_t tid_in_threadblock) { + asm volatile ("matmul_start_%=:"::); + const float *const A = (const float *const) arg->addr_a; + const float *const B = (const float *const) arg->addr_b; + float *const C = (float *const) arg->addr_c; + + if (HW_TID() == 0) { + gemmini_extended_config_ex(WEIGHT_STATIONARY, 0, 0, 1, 0, 0); + use_gemmini(1); + gemmini_extended_config_ex(WEIGHT_STATIONARY, 0, 0, 1, 0, 0); + use_gemmini(0); + // gemmini_extended_config_ex(dataflow, act & 3, 0, 1, a_transpose, b_transpose); + PRINTF("start\n"); + } + + vx_fence(); + + uint32_t marker0, marker1; + rd_cycles_force(marker0); + + const uint32_t dim_m = arg->dim_m; + const uint32_t dim_n = arg->dim_n; + const uint32_t dim_k = arg->dim_k; + const uint32_t num_tiles_m = dim_m / TILE_M; + const uint32_t num_tiles_n = dim_n / TILE_N; + const uint32_t num_tiles_k = dim_k / TILE_K; + constexpr uint32_t num_threads_in_cluster = NUM_THREADS_IN_CLUSTER; + + const uint32_t num_tile_rows_per_tb = num_tiles_m / NUM_CLUSTERS; + + #define RUN_ON_GEMMINI(gemmini_i) { \ + use_gemmini(gemmini_i); \ + if (HW_TID() == 0) { \ + gemmini_extended3_config_ld(dim_k * sizeof(elem_t), MVIN_SCALE_IDENTITY, false, 0); \ + gemmini_extended3_config_ld(dim_n * sizeof(elem_t), MVIN_SCALE_IDENTITY, false, 1); \ + gemmini_extended_config_st(dim_n * sizeof(elem_t), 0, MVIN_SCALE_IDENTITY); \ + } \ + for (uint32_t tile_i = num_tile_rows_per_tb * threadblock_id; \ + tile_i < num_tile_rows_per_tb * (threadblock_id + 1); \ + tile_i += 1) { \ + for (int tile_j = 0; tile_j < num_tiles_n; tile_j += 1) { \ + if (HW_TID() == 0) { \ + for (int tile_k = 0; tile_k < num_tiles_k; tile_k += 1) { \ + ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, \ + (uint64_t) (A + tile_i * TILE_M * dim_k + tile_k * TILE_K), \ + (uint64_t) (B + tile_k * TILE_K * dim_n + tile_j * TILE_N), \ + k_LOOP_WS_CONFIG_ADDRS_AB) \ + GEMMINI_CISC_CMD_R((dim_n) << 16 | (dim_k << 8) | GEMMINI_CISC_IMM(8, gemmini_i)); \ + if (tile_k & 1) { \ + GEMMINI_CISC_CMD_I(GEMMINI_CISC_IMM(11, gemmini_i)); \ + } else { \ + GEMMINI_CISC_CMD_I(GEMMINI_CISC_IMM(10, gemmini_i)); \ + } \ + if (tile_k == 0) { \ + gemmini_fence(); \ + GEMMINI_CISC_CMD_I(GEMMINI_CISC_IMM(0, gemmini_i)); \ + } else if (tile_k & 1) { \ + gemmini_fence(); \ + GEMMINI_CISC_CMD_I(GEMMINI_CISC_IMM(2, gemmini_i)); \ + } else { \ + gemmini_fence(); \ + GEMMINI_CISC_CMD_I(GEMMINI_CISC_IMM(1, gemmini_i)); \ + } \ + } \ + gemmini_fence(); \ + gemmini_fence(); \ + gemmini_fence(); \ + gemmini_fence(); \ + GEMMINI_CISC_CMD_I(GEMMINI_CISC_IMM(9, gemmini_i)); \ + gemmini_fence(); \ + } \ + threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS); \ + if (HW_TID() == 0) { \ + float *const dram_c_tile_start = C + tile_i * TILE_M * dim_n + tile_j * TILE_N; \ + ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, 0, BOUND_INST, k_LOOP_WS_CONFIG_BOUNDS) \ + ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, 0, (uint64_t) dram_c_tile_start, k_LOOP_WS_CONFIG_ADDRS_DC) \ + ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, 0, dim_n, k_LOOP_WS_CONFIG_STRIDES_DC) \ + ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, 0, loop_matmul_skips(1, 1, 1, 1, 0), k_LOOP_WS) \ + } \ + } \ + } \ + if (threadblock_id == NUM_CLUSTERS - 1) { \ + threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS); \ + rd_cycles_force(marker1); \ + if (HW_TID() == 0) { \ + PRINTF("\ncomplete on core %d\n", gemmini_i); \ + PRINTF("total cycles: %d\n", marker1 - marker0); \ + for (int i = 0; i < 1 /*dim_m*/; i += 8) { /* print one line only for quick test running */ \ + for (int j = 0; j < dim_n; j += 8) { \ + PRINTF("%d %d ", (int) (C[i * dim_n + j]), (int) (C[i * dim_n + j + 4])); \ + } \ + PRINTF("\n"); \ + } \ + } \ + } \ + threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS); \ + } + + RUN_ON_GEMMINI(0) + RUN_ON_GEMMINI(1) + vx_tmc(0); +} + +void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) { + const int threadblock_id = task_id / NUM_THREADS_IN_CLUSTER; + const int tid_in_threadblock = task_id % NUM_THREADS_IN_CLUSTER; + + thread_block_matmul_gemmini(arg, threadblock_id, tid_in_threadblock); +} + +int main() { + kernel_arg_t *arg = (kernel_arg_t *)KERNEL_ARG_DEV_MEM_ADDR; + + const uint32_t num_threads_in_cluster = NUM_THREADS_IN_CLUSTER; + const uint32_t grid_size = num_threads_in_cluster * NUM_CLUSTERS; +#ifdef RADIANCE + vx_spawn_tasks_cluster(grid_size, (vx_spawn_tasks_cb)kernel_body, arg); +#else + // NOTE: This kernel assumes contiguous thread scheduling for efficient shared + // memory allocation, and therefore does not work with original vx_spawn_tasks + vx_spawn_tasks_contiguous(grid_size, (vx_spawn_tasks_cb)kernel_body, arg); +#endif + return 0; +} \ No newline at end of file diff --git a/tests/regression/sgemm_gemmini_duo/main.cpp b/tests/regression/sgemm_gemmini_duo/main.cpp new file mode 100644 index 00000000..54531062 --- /dev/null +++ b/tests/regression/sgemm_gemmini_duo/main.cpp @@ -0,0 +1,274 @@ +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define RT_CHECK(_expr) \ + do { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +const char* kernel_file = "kernel.bin"; +uint32_t count = 0; + +std::vector src_a_data; +std::vector src_b_data; +std::vector ref_data; + +vx_device_h device = nullptr; +std::vector staging_buf; +kernel_arg_t kernel_arg = {}; + +static void show_usage() { + std::cout << "Vortex Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + switch (c) { + case 'n': + count = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() { + if (device) { + vx_mem_free(device, kernel_arg.addr_a); + vx_mem_free(device, kernel_arg.addr_b); + vx_mem_free(device, kernel_arg.addr_c); + vx_dev_close(device); + } +} + +void generate_source_matrix(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { + src_a_data.resize(dim_m * dim_k); + src_b_data.resize(dim_k * dim_n); + + for (uint32_t i = 0; i < src_a_data.size(); ++i) { + src_a_data[i] = static_cast(i); + std::cout << "A: " << i << ": value=" << src_a_data[i] << std::endl; + } + for (uint32_t i = 0; i < src_b_data.size(); ++i) { + src_b_data[i] = static_cast(i); + std::cout << "B: " << i << ": value=" << src_b_data[i] << std::endl; + } +} + +void generate_reference_matmul(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { + ref_data.resize(dim_m * dim_n); + + for (uint32_t i = 0; i < dim_m; ++i) { + for (uint32_t j = 0; j < dim_n; ++j) { + float ref = 0.0f; + for (uint32_t k = 0; k < dim_k; ++k) { + ref += src_a_data[dim_k * i + k] * src_b_data[dim_n * k + j]; + } + ref_data.at(dim_n * i + j) = ref; + } + } +} + +int run_test(const kernel_arg_t& kernel_arg, + uint32_t buf_size, + uint32_t dim_m, uint32_t dim_n) { + // start device + std::cout << "start device" << std::endl; + RT_CHECK(vx_start(device)); + + // wait for completion + std::cout << "wait for completion" << std::endl; + RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_c, buf_size)); + + // verify result + std::cout << "verify result" << std::endl; + { + int errors = 0; + auto buf_ptr = (float*)staging_buf.data(); + for (uint32_t i = 0; i < dim_m * dim_n; ++i) { + float ref = ref_data.at(i); + float cur = buf_ptr[i]; + if (std::abs((cur - ref) / ref) > 1e-6) { + std::cout << "error at result #" << std::dec << i + << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; + ++errors; + } + } + if (errors != 0) { + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + } + + return 0; +} + +int main(int argc, char *argv[]) { + // parse command arguments + parse_args(argc, argv); + + if (count == 0) { + count = 1; + } + + std::srand(50); + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + // FIXME: hardcoded + uint32_t dim_m = 64; + uint32_t dim_n = 64; + uint32_t dim_k = 64; + + generate_source_matrix(dim_m, dim_n, dim_k); + generate_reference_matmul(dim_m, dim_n, dim_k); + + uint32_t src_a_buf_size = src_a_data.size() * sizeof(src_a_data[0]); + uint32_t src_b_buf_size = src_b_data.size() * sizeof(src_b_data[0]); + uint32_t dst_buf_size = ref_data.size() * sizeof(src_a_data[0]); + + std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; + + // upload program + std::cout << "upload program" << std::endl; + RT_CHECK(vx_upload_kernel_file(device, kernel_file)); + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + RT_CHECK(vx_mem_alloc(device, src_a_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_a)); + RT_CHECK(vx_mem_alloc(device, src_b_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_b)); + RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_c)); + + kernel_arg.dim_m = dim_m; + kernel_arg.dim_n = dim_n; + kernel_arg.dim_k = dim_k; + + std::cout << "dev_addr_a=0x" << std::hex << kernel_arg.addr_a << std::endl; + std::cout << "dev_addr_b=0x" << std::hex << kernel_arg.addr_b << std::endl; + std::cout << "dev_addr_c=0x" << std::hex << kernel_arg.addr_c << std::endl; + + // allocate staging buffer + { + std::cout << "allocate staging buffer" << std::endl; + uint32_t staging_buf_size = std::max( + src_a_buf_size, + std::max( + src_b_buf_size, + std::max(dst_buf_size, sizeof(kernel_arg_t)))); + staging_buf.resize(staging_buf_size); + } + + // upload kernel argument + { + std::cout << "upload kernel argument" << std::endl; + auto buf_ptr = staging_buf.data(); + kernel_arg.addr_a = (uint64_t) 0x20000; + kernel_arg.addr_b = (uint64_t) 0x28000; + kernel_arg.addr_c = (uint64_t) 0xc0000000ULL; + memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); + + std::cout << "uploading argument buffer to device, device mem address=" + << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec + << sizeof(kernel_arg_t) << " bytes\n"; + std::ofstream file("args.bin", std::ios::binary | std::ios::out); + if (!file) { + std::cerr << "error: failed to open args.bin for writing\n"; + exit(EXIT_FAILURE); + } + file.write(reinterpret_cast(staging_buf.data()), + sizeof(kernel_arg_t)); + file.close(); + + RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); + } + + // upload source buffer + { + { + auto buf_ptr = staging_buf.data(); + memcpy(buf_ptr, src_a_data.data(), src_a_data.size() * sizeof(float)); + RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_a, staging_buf.data(), + src_a_buf_size)); + + std::cout << "uploading source A matrix to device, device mem address=" + << std::hex << kernel_arg.addr_a << ", size=" << std::dec + << src_a_buf_size << " bytes\n"; + std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); + if (!file) { + std::cerr << "error: failed to open args.bin for writing\n"; + exit(EXIT_FAILURE); + } + file.write(reinterpret_cast(buf_ptr), src_a_buf_size); + file.close(); + } + { + auto buf_ptr = staging_buf.data(); + memcpy(buf_ptr, src_b_data.data(), src_b_data.size() * sizeof(float)); + RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_b, staging_buf.data(), + src_b_buf_size)); + + std::cout << "uploading source B matrix to device, device mem address=" + << std::hex << kernel_arg.addr_b << ", size=" << std::dec + << src_b_buf_size << " bytes\n"; + std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); + if (!file) { + std::cerr << "error: failed to open args.bin for writing\n"; + exit(EXIT_FAILURE); + } + file.write(reinterpret_cast(buf_ptr), src_b_buf_size); + file.close(); + } + } + + // clear destination buffer + { + std::cout << "clear destination buffer" << std::endl; + auto buf_ptr = (int32_t*)staging_buf.data(); + for (uint32_t i = 0; i < ref_data.size(); ++i) { + buf_ptr[i] = 0xdeadbeef; + } + RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_c, staging_buf.data(), dst_buf_size)); + } + + // run tests + std::cout << "run tests" << std::endl; + RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.dim_m, kernel_arg.dim_n)); + std::cout << "PASSED!" << std::endl; + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + return 0; +} diff --git a/tests/regression/sgemm_gemmini_duo/sgemm_gemmini_duo b/tests/regression/sgemm_gemmini_duo/sgemm_gemmini_duo new file mode 100755 index 0000000000000000000000000000000000000000..2204a0389cdb2d23a4118140ad6be3a67d6205ff GIT binary patch literal 28448 zcmeHwdwf*Ywg1UuBqC&jk~S)pBL+&~FoP(k& zJrGp(G~wQ<9GB24$MUNY{5JT~DV#2$RgQe4d3m~aO)A_a%3U&!)1@4_i{zSxT(gjq zP!;}^Q2LW>q`xYm*A7*rQBc`aQS5Tdgq$7L3w{YH4+@L-MqB@x{F;QEv4HEb!}~Z{ zkx=IMO~{d7zHAWHt}GJyWvYkuqTCWvJE3qvXGhE2ITv)c&g$$4#(HM;RLz|=cTQO( zR5qKXlU>_3*l&;Oah&Q~+={~?3@`!mqnGU)kR2Kt*B%JpsrJKY)b&CcM5fedFbCR21UBRGZcXtS6_DN;%KEmx~e(MYQi@<<}MW9{S;)`_n^-v@Vv%5S?^d;qb z!{WK{5-F+H8}*K0BoK~zx`;otS9Z_aDTu`|(+z|~lz^`07S}{kstWEeVohg;9%1s?@ov}!}a7Aat z^X2NozXUZ=sq-2QuIysQ+ZagHBJT*;Rz zgQ&dAVC|PFGE6~M)TDF|+Q8*#wq2pt05#p^e!m`}mRq6dOM@#rg01?ZKqS`168)k{ z>)qjwV6;v5w_mSBV0|mv13HEOdZOrUzK%`>4v&UV4Z6R_r~A5FpmB&l+7+rK`Sa!&}u8jvHjjL5|G~Y2HY2 z3Ma7?o|>c3c<8`r$#MQX8N3cq#xogOaA0s{fosjh>`-dULi~OpG&fc8*L3bG2%MsL zg-8}O9RQuuoALMjL)T*pJ6@s5DABr){uFP>lNDXyK8EKhL4n6_E=9USxk=#B(tpKL zB1cI8r;r8Pwut-*pWM+gQ9-GGD){gC(Vdvao}}C>@Yoq0CsTQTC-t39c($@$=#%Rv zxn5B!NGr-E)G2rGc5E&7WzyJ{iuaL%R(<|<_NuBi!JmTL8o`POcr)B@p`*IZu+BmkRmjrTTj-Rx41*RrTi=L;Z5FzmBT&)~ z3*D@H(05trXh3GzW1){XsfuD)=;$D3IAozuFsX`i)Ivvt9t)zr$VQsu$&hEEJ0%gY zz(S|~CPR^hK1mV*RSVrZA1txZ#g+$?D!0&S{V7A0g zffF7$;eit#IN^a49{BHi;IQ+oV_M(-JT2jPc|MM-`v;@hqdT;|r}8#4)uWXy7c0uh z=yX4TbE-o8?UXVy{O0KB=o*I8hQY}2b0$t31tY^7O`J9eMus0YaoQLd8NS!VX+vOS z_(vvA8v!H3x0pCBu1AKyW8$>%9~t(UI4$@`hQDdzw9p?JcAGdY@JEI#Oq>?>Bg1E# zI4$T$h9{XgE#yarvrL>8@FT+?d?CwA3wM;?#A(5f@|!p<)KPvDrv*C7Z{oBtNBK>h z7UU?uiPJ(H0>%5UPdutxb!oEFq5zlqaA z8s#_fsSN+%b6I|s;cuEaEtpY$6Q_kT%5UPdKt}mZoHnpfeiNsKGRkk_w6TWrOB}6l zx{F$0y~5fb@YlYcioamPpRwUvZ1@v4{1F@epbh`E4Zp{R|HOtTZFs*8zsZKjYkvooH$r7Y{9SE^#(b`Ro4 zJ52D#zkqccz+tR{bp!OufQ|*wp~bgo@gbv@3`MOS`;-i>ybvh^G_XVLb`qP$3gY@- z6n~L;5|yJ>NKUv8p|KiofkeeC!plec=Zn*sxCRk-{B>gwcS|*CxPxhMJp$2+o!?|)$EhfJl1dnNYy|}^^=4qk8B~gdBf`pF`6o(= zAJh^}hL-hSqPD1S&m(=?^5V5c#w5xUj(y-5{LQK^SviS30w=#fX$eOeS8{V^C4Yf1 zDQUGRd8LReS;LhKUj#H!S({nO?Lx^^QbI3B)bFo^l1rJD2aO!!Nl>dv6gr`*db1XP z&RM%_J$1#tPfHMm!3R)x_0?~=owcuOr5od~Yt`GGw@<=+#Chvs%u-~po$pdj#k7E|C;PR zgQ@;-F4X;y+3YY7-=HX?|AuT{MLb<)tVYfQG%tdRm*4^|j`9r|jZ8xhMy(BO$YIs^ zik7^lxIorrHq~VV#3A`>COKqmC2ce-0qtju_J*+&v?HsFOE6S8oc(ve8>o~gVEzCf z%-%3m$_(RU70a~nUZg!nY25aN>wXZ7Gogn1Q8_69^O<1wCLB?uLEkeB6mAFb#)rJD zmzRzhM~+hE6<*GnN?7U#aO4$;8GlBim(^^tuoB+uAB=Bw_OHSulRGzA83L8-O1get z36Z%}+=SzYh$GWos5-dz7KR<)&P$-!5RZH;fgak#AwMgB8Wu|0&dX<0IbqR`Svo$_(%|0rwJ2RDKthdlQux zQ2`Q#J%q(S*5I%$lxL#wES69?ibms&55r;8K``!O1{MD7bU9cwA}{Hy=Rt)tqI1CL zWVtxn5l3c^K?_o@CM>7jNHoZ?prOccK&AB2GE85^n7f^s(E#+IH~zk8w_5Ud#RaJS z6em4(ggL30$qpF}q)#|$7NZ?Bz6lyR3B3^wa0A+*H~t>^9^=igDI>rq1f=p06!8Z9 z3G~43UtWk3xhOs9x|8A`d_+E)IE{)mW8!H9Hhz+gwrHRsL{G2SSuxnQ#yynVdvL>p zuP1A>wD=~idRzDaT=7S6z?&}tc;laVG4^lMS;nI{rFGQHDRF4qmz)wmt#WIEwYq@SC8;H^SsiEaS?*;r0F`O)Dw}jVme7 zY~)FEl2>4)@e}fWvhqkFEAe^cj71VXr^HH-_PxpUk1%rOt*PYu_JI9lrPd5Alw_QF`Uo?z?W zr0ZHL+udYK!u1-?(owc|7^tkHlrU6-^gJg`1_^`w$p94KnijkgK^cYlj56f-Ohk=~ zxaeJ6^f8dZ_Cb|ov1A_~e!CdIXtYuP6$eNW{L-wN?Iw!W`8;E6wXofiL_*7x?IrX}8_ z<4&{yOkAci;5v%pjTZjv1&Z?F8aU$5h-gX2CIZz*qlFlFgA~Ws!pWn1gx@y{2>zcj z{z{V9O25$JhqR6F&(}5{&C#;9YR`QUoeTvHLP6f>9zMU7@>IROuDul~T5QHrt2urkHTugN(KQ(P$IjYpGlc+~}nW zN>qN6(h{y4h}6YMt{@{-&L`5POV|>*?pxaP6n=p zD`;inP0soWMX^Bn%hLD@tayvDK1o(Osf2y|ON>G?9rXk@Em31MCjQ_!1nrIeN1Ur4 z#Q^N?{ce@wTzw}-DYtXOiaKbo1+Q__A;@9g;UJeJTz^DcGfqdG-uMHM8#oPGkkRYZ z!jrCil6ipfIRHr3_ML`y?MW`oYOH?Ox%x7&c;Z7^0&n9);Vh(UrJJ;k$FjZE z$DFtAM0SU3odvm%({u8AXUVq1&TF1N?7Z%2=aPw!o=OAb)qietUNG@-#8*5pFp0Z$ z2@MoU*8*60*g0uQaRHw7#YM&ou+oF9__FLgB-6{^JGO#gv(9JVheMNxYYp}yHF}}+u(JUAwFLNJ(V#H_?i`@~Jm`4~ko3^3(OArxVG?6*a~>Ro zz$RliS%w!0GDE}mB>t2|4I6#Yx%zdyL3*(Uiq<+etgQ3KpLO?b%`$#-fY`f3YIk5__@eOQ&kQfIe$}y{m z{)hzfMt2ZTo^!*YeZ72KlMC6?sBB5sYv-dW^e}~v=Mb-_m8EM4v2^}*qO1dX+&M2J zLoq9>V)E1uY7B4UKGCz^cCLO6ZN%MsV=;B@A5h%6AzEC==2!c<6K25~a)z;nG$vh_ zAai3cIVs_|m>EKD{TY&5YrOyjP8KXhW`dcjN2sFoGQ1`Do4vgQ|{ta zxsp`5xK*wTt7B8H8HgpnLnwyD&Tm@f)?4%YEfDhRlT?265?QjTR*9Er!zF%@m+WaE zw8fNUS>zT=i31ohOkXVKOk83CLugavP1F-6VVfXobm75fQ8$9fCQs-$7omxO;o2i#Z2kGijLKD3oKz3@VzcS+xVbkd1i zQOg)59|b^Hn4ra<#^Qtq9j$tY^Y$aCZs)Bp;fY=}0fXl1pCax}%(|WU53<2*H@1j+ zfqD}Y9|pkahMv3=lk`4djiviN8$ZkL-yOZIp<*ZNcl`&Vo%QjZ_3=Z_lBY5L#kal@6W$9T zQ{Yf6@*l}tuSQ`Imar}8c zS15d##1a#K$)tB^rQ0w!fI3Icq@Bapy z@Mh=jVF){K_2LQTlwPqL-p1x+^~CwWyva&zUMR7J!?d!&;Ka+1Az~#PA^Hp|;*fF5 zFm-|n#dyi&K`dE^b<<{g@svI-E57ehrs3e=018e6_hndg9M+uY55g&*PG@hTNx6?1 zYoLn1K;8k-xEDFd(M+Z7EgI^#7~f_Y<`$!`utC1od3El3<3mtsxZ}gTH}NMi%3+xQ z1C4An&SQpvy70zFjk!$6I58;KlzXcWIai;{6M1(!9SF^*;G2`Nr$N3s0^fXnVh%9W zQY)0k>V`i;fml1_GZpml>)+*mISlAYG(th)j`PO+vi0xzr4HkP^vhet!Y>aSJ#Ylu zQgm$x(TGE{^h+L%AlxsDz(ucMt6$a`ud-?y^NVF(cQ^~ zf$>f&-D+G-^eRXg>rhadKgSPkHudxQ^n992PZ&PzJMVc{VrsGwFZCC+85f`jd$6N{p6A*4MYgx}h*rIM<-Cf)q1;iU+iwn{lh1T*qb%-sS^FB}0mYN`YGXWBfT%r3>hH3CJnLO8mpF>2oCDzs!2rDZB;f}Ud zP!frCMiur|o9c@STs=pbr~3TSn6J~N%v1wC-2s0z(CShagkr%~RZ$s#?=WhlO%QGW1~=bl>!yIueCL-`&{KcV^~WH<|oWl0KYEp z2rifI5c%+r3{`yAgzu(;QIws`aS3b2D@cfxwcuN(jvzj7EAv_6EiCTu4EVyZ?l^yk zV?h-Kibj+s_u|FmGhz+Kx)q)$lg7*=;#-d5e{0tD^HiR2Es&tPG8Ar&P_%uX+8*fa zzE(7-lrJGgGnuJgu}p7tFVmNJ8XGQOx&+e#@_5RJl_sn3gAVVPwfdsIlG2$XV;zMp zV_x;oBsb~g3erVk1!@v4aUQP`h zcYXaLbQ9j4QkoIfcc`so%;9>pR20W-k2d{dyu9W4GH6bzXwo_6pGYVc_6JnA+U1Le zJ9@sPe=?NUZcj^Qd&U|{T6=2$pV)(*na=aphQ>XP;YX+8$5I<;FDdQOQr6avF1;&d z93V=t)Y|P^18qLMI&@z*M)6kJ&M+alS;h$_4?ME=-Pbt{m819M@M%7 zuHQX6O235I23Q1G_4?@O@3 zDabl=!nnLOkRdvz3Q9L@!d@PBg2ojPD8?V1K`h%fI!fy`CBI-ne$ge)QnD)Y~n zcm9RN(?DhN*8r}2WptDY5)py)M)7w)=-nhQ0`Ytwf4$h-$JB-eqOHZ>jiBMwkOiXs z0e|1K&`9StqQllEK&3vvAbVkc9?|#UZ$IchKxIjO!A<#jiiYS>{MEzHOR&&W7UUQF zFx#77^n;w*d^MTt$uIf7qdvdAzB{uEY*KOw(8*lTN~d|4UlDc=Sv1Irf%-*ytR>l?QqzbSiXT3sHlD-U(? zHn#IrS~4?T4XI_&EV^oP?#@oD>=NdWCD5h8A7_(YSEc%+f&9VjlK%KkT1^X2$@Zo5 zQ$t!m@wSTG(2kqDtR;{8`GYv8J5#o2Y8TXY2hr{xI~iX9kArcnoayR{)%zv+dnV-k zG7sj}9ar&g+7iaDGYghHtH<($w6!Y1N$qhC{-(eN6)<-b9td=f{5|;nZA|CSBA;pb z1v!(l&{EO1@OK~5O5o1`;@oDBS$ZVg|ipJkDcm3{KZopNd55Cso;lr z?kOnq|NnsFDHhn^gnBLKD7lY<9UkChlTP2PQ;>Z04LJoI74V>PfRhy#cHzg$6y!K0 zzZ0k99||-sQNS-jSdia^(-=yD>XO1oUyOzbt`+&ox>ze1XgH#jWGWaEE|<1Uw`lF4JP6KtNT%asg`uY!a|pz-|G11zaQGS^?JyI4Ix_0rv=a zNI-e1PQf&8hbmyXfHeX(3D_)Pw}8C@t`U%H`NyAm&0Hp@HdPjPX3SGdmbS!#(U@9| zTQbULRm2!p@!i?wW#zawqm_#p&%v#VYFc`} zvNlcq<|s~OU0Qm9qE1hzf081{O>1fn-85BWrSNzTX3O$^K5J?Y-kfU|S}BshP+4IX+ucbCjh(K;9y1ZP2%^yIk!Cw}nI{k>SIER-5q|29f z!AVb3Ip>p4n!^*l*G8v7o9ObrEBX5op?uf1a0-=^!X5a_O5^{>L1*@h@)9qFA^c@2 z$Jb*f``=;wla=qMX%Fej`ypkLroAo33ox5z0V-oKA2eOWXmVUTDPRTs%5m-LY(clT z!#PaP@$Kpo(9tW73v}+2j(!v9)8K#lEjtA{FS(QP7bxXboX*w;h(4M@&wqhlfLV*$ z%=zV$?wce1J8b&tU`(Z1j*WjG_$OKVwV19b$3Q=o^oaUp>n&t*dN#j*tz5KEIiDy7 zeVR1F0&74gJvCySp*cK-21e&`>ob{wzCHteJLq&D++YjWQX-VSEa)}OoFL~p2Qui% z%7LEjGtB#cgc>Cu^mO$&mC*~7X5k;75SWpHznb*J&UQa%lzyHM`q{|0sF~BHo!`o! zr!NEjY6za1ef+rniwyj~1)c24^QY$fd(hLhpa06B=dT&)Z)Kp7YGg4(Al#>oq~I^|m#_}6Bj|7Qkzwj+JM zXJw$D4?49&Rrp7kqs+~~|Kkkw(=*ufR0jTG$`}6Z75Vb<5xFz}NGrzRS)kMSqX>SP zG6myC0rYf>dXe-Q8T8E0K))GuYKP?)a}9i4R+I-a@b3kk^0oWzgADw|cweMz4<1HG zzZUDpfGEIKpp$J?y5XVlp8(iu1#;1u6Km!T1dqojz*$5s4Vm2G*8|lszzU$DsC*q}FDR!>))>bNypZ_or$t6!w(^;ccyZuHb4jmlvP(>0400`}~~xFk}z zJs$PDZ+7o!|cUx5b5yW7*KviF$y}0aKN>LutEI z>bV@ZI<|HMQNaNe?CL6cxUlHbirHhQ2zr!qpfkA^ooi$_O3I5OQ!`~3L{cO8=PpvU z`{VCi+?vXYbD!hS9mSC>9e|2UE^WrL+b~mUXl#*?-i{kJ`Tevhr>$_(MdG-ZU3hD` zycXAm;+(nu$}&_>S%$k=)87%BQjoH0R$-w^aS3WlBAvFGr={pn?6(EfnxeAN$oi#= zPQlFEXX=?4UF?ZlKgUXq8#is2yIL>XwO$`OCPVED1r_aqT~wQDDZ7(2m7iM0KXILF zs$TtS-0f;7{WmXNwW+h+qdHcIIpk9Mi2|LuqJQJsWI2MMX#Zo^?xvW-+k^EcM3P|xZd|m-1d56y#Jo9mCn z*b9Sx4@AFHp@Vg%zyc9Y`18ScC4uXxj=(Kbjui%j0PICoxO^xMR-juOHzEEgv z(6637+kBz$lXiNW;GCVtdJMV3Do;ypwmYJBG!!`f|b!MMJYqHYhgjPx3$%;r zQhO`3NetQGCh9!oy-F_dabjBv-cL{k3w+VyEN8~#x zFV|%qppl4_m;3z^%5yxhuweK9cR-~xGAS?51te?}a?*auC*gOIPG@csm*)p26!K(0 z*(vQ8WF?6R6;aB|a|Q|JIiSp6%FFW87ZxO6i$4nTTv0-K4oUK~XK9ze7c}yT)UT*O zD5&g-6{10GO`L)w{x`dLCQ)#b&2MN~!waUx$ z5(%xpc_B$DC*fl@d2_#iI&mw?BiJgj1}P_LPatkBwMZF zV<2?&ExNsVjwJt|lfAq$e+hG-&}osE=S|DJJQvBwv9b0q=}yR#i)H@uTuT0bE>$>| z`Y#13C++$zXjCRCFVC-fFC%V6>9rMD%1cPUOS8%=%}fkG9N|zEs030@;&lE*G?{-* zGfz>4e7Q^z0yP3kaE{2oToMG7@-u97vXz1r%C^T-__Imm-y?qDLBCtL5P#BsIeyWx jg3UyxVB0keDCZG_CCGdwX%Zso