From 91a82c9f0fd57fa556cd8d063ca93d5ee38d9f05 Mon Sep 17 00:00:00 2001 From: Richard Yan Date: Wed, 29 Jan 2025 22:11:25 -0800 Subject: [PATCH] merge kernel changes from kernels-asplos-ae --- kernels/sgemm_gemmini/compile_ampere.sh | 19 ++++-- kernels/sgemm_gemmini/compile_hopper.sh | 19 ++++-- kernels/sgemm_gemmini/input.a | 1 - kernels/sgemm_gemmini/input.b | 1 - kernels/sgemm_gemmini_dma/compile_ampere.sh | 12 ---- kernels/sgemm_gemmini_dma/compile_debug.sh | 12 ---- kernels/sgemm_gemmini_dma/compile_hopper.sh | 12 ---- kernels/sgemm_gemmini_dma/compile_virgo.sh | 21 ++++++ .../sgemm_gemmini_dma/compile_virgo_ampere.sh | 25 +++++++ .../sgemm_gemmini_dma/generate_operands.py | 4 +- kernels/sgemm_gemmini_dma/input.a/128 | Bin 32768 -> 0 bytes kernels/sgemm_gemmini_dma/input.b/128 | Bin 32768 -> 0 bytes kernels/sgemm_tcore/args.bin | 1 - kernels/sgemm_tcore/compile_tcore.sh | 64 ++++++++++++++++++ kernels/sgemm_tcore/input.a.bin | 1 - ...fp16.m1024n1024k1024.row.swizzle_fp16.bin} | Bin ...01.fp16.m256n256k256.row.swizzle_fp16.bin} | Bin ...01.fp16.m512n512k512.row.swizzle_fp16.bin} | Bin kernels/sgemm_tcore/input.b.bin | 1 - ...put.b.rand01.fp16.m1024n1024k1024.row.bin} | Bin .../input.b.rand01.fp16.m256n256k256.row.bin} | Bin .../input.b.rand01.fp16.m512n512k512.row.bin} | Bin kernels/sgemm_tcore/kernel.cpp | 4 -- kernels/sgemm_tcore/sgemm_impl.hpp | 11 ++- 24 files changed, 146 insertions(+), 62 deletions(-) delete mode 120000 kernels/sgemm_gemmini/input.a delete mode 120000 kernels/sgemm_gemmini/input.b delete mode 100755 kernels/sgemm_gemmini_dma/compile_ampere.sh delete mode 100755 kernels/sgemm_gemmini_dma/compile_debug.sh delete mode 100755 kernels/sgemm_gemmini_dma/compile_hopper.sh create mode 100755 kernels/sgemm_gemmini_dma/compile_virgo.sh create mode 100755 kernels/sgemm_gemmini_dma/compile_virgo_ampere.sh delete mode 100644 kernels/sgemm_gemmini_dma/input.a/128 delete mode 100644 kernels/sgemm_gemmini_dma/input.b/128 delete mode 120000 kernels/sgemm_tcore/args.bin create mode 100755 kernels/sgemm_tcore/compile_tcore.sh delete mode 120000 kernels/sgemm_tcore/input.a.bin rename kernels/{sgemm_gemmini_dma/input.a/1024 => sgemm_tcore/input.a.rand01.fp16.m1024n1024k1024.row.swizzle_fp16.bin} (100%) rename kernels/{sgemm_gemmini_dma/input.a/256 => sgemm_tcore/input.a.rand01.fp16.m256n256k256.row.swizzle_fp16.bin} (100%) rename kernels/{sgemm_gemmini_dma/input.a/512 => sgemm_tcore/input.a.rand01.fp16.m512n512k512.row.swizzle_fp16.bin} (100%) delete mode 120000 kernels/sgemm_tcore/input.b.bin rename kernels/{sgemm_gemmini_dma/input.b/1024 => sgemm_tcore/input.b.rand01.fp16.m1024n1024k1024.row.bin} (100%) rename kernels/{sgemm_gemmini_dma/input.b/256 => sgemm_tcore/input.b.rand01.fp16.m256n256k256.row.bin} (100%) rename kernels/{sgemm_gemmini_dma/input.b/512 => sgemm_tcore/input.b.rand01.fp16.m512n512k512.row.bin} (100%) diff --git a/kernels/sgemm_gemmini/compile_ampere.sh b/kernels/sgemm_gemmini/compile_ampere.sh index 43dd96ae..a4713106 100755 --- a/kernels/sgemm_gemmini/compile_ampere.sh +++ b/kernels/sgemm_gemmini/compile_ampere.sh @@ -1,11 +1,16 @@ -rm kernel.radiance.elf -rm -rf binaries -mkdir binaries +#!/bin/sh + for a in args/*; do + echo "compiling GEMM kernel for Virgo with dim ${a}" cp -f $a args.bin aa=$(basename "$a") - cp -f input.a/"$aa" input.a.bin - cp -f input.b/"$aa" input.b.bin - make > /dev/null - mv kernel.radiance.elf binaries/gemmini_fp16nodma"$aa".elf + cp ../sgemm_gemmini_dma/input.a.rand01.fp16.m${aa}n${aa}k${aa}.row.bin input.a.bin + cp ../sgemm_gemmini_dma/input.b.rand01.fp16.m${aa}n${aa}k${aa}.row.bin input.b.bin + touch input.c.bin + + # touch source file to force re-building, as the Makefile does not track + # binary changes + touch kernel.cpp + + make CONFIG=gemm.virgo.ampere.nodma.dim${aa} done diff --git a/kernels/sgemm_gemmini/compile_hopper.sh b/kernels/sgemm_gemmini/compile_hopper.sh index 7b816d6f..b6a0a80e 100755 --- a/kernels/sgemm_gemmini/compile_hopper.sh +++ b/kernels/sgemm_gemmini/compile_hopper.sh @@ -1,11 +1,16 @@ -rm kernel.radiance.elf -rm -rf binaries -mkdir binaries +#!/bin/sh + for a in args/*; do + echo "compiling GEMM kernel for Virgo with dim ${a}" cp -f $a args.bin aa=$(basename "$a") - cp -f input.a/"$aa" input.a.bin - cp -f input.b/"$aa" input.b.bin - make > /dev/null - mv kernel.radiance.elf binaries/gemmini_hopper_nodma"$aa".elf + cp ../sgemm_gemmini_dma/input.a.rand01.fp16.m${aa}n${aa}k${aa}.row.bin input.a.bin + cp ../sgemm_gemmini_dma/input.b.rand01.fp16.m${aa}n${aa}k${aa}.row.bin input.b.bin + touch input.c.bin + + # touch source file to force re-building, as the Makefile does not track + # binary changes + touch kernel.cpp + + make CONFIG=gemm.virgo.hopper.nodma.dim${aa} done diff --git a/kernels/sgemm_gemmini/input.a b/kernels/sgemm_gemmini/input.a deleted file mode 120000 index 52e944e9..00000000 --- a/kernels/sgemm_gemmini/input.a +++ /dev/null @@ -1 +0,0 @@ -../sgemm_gemmini_dma/input.a \ No newline at end of file diff --git a/kernels/sgemm_gemmini/input.b b/kernels/sgemm_gemmini/input.b deleted file mode 120000 index d01681eb..00000000 --- a/kernels/sgemm_gemmini/input.b +++ /dev/null @@ -1 +0,0 @@ -../sgemm_gemmini_dma/input.b \ No newline at end of file diff --git a/kernels/sgemm_gemmini_dma/compile_ampere.sh b/kernels/sgemm_gemmini_dma/compile_ampere.sh deleted file mode 100755 index d623d2cf..00000000 --- a/kernels/sgemm_gemmini_dma/compile_ampere.sh +++ /dev/null @@ -1,12 +0,0 @@ -rm kernel.radiance.elf -rm -rf binaries -mkdir binaries -touch input.c.bin -for a in args/*; do - cp -f $a args.bin - aa=$(basename "$a") - cp -f input.a/"$aa" input.a.bin - cp -f input.b/"$aa" input.b.bin - make > /dev/null - mv kernel.radiance.elf binaries/gemmini_fp16dma"$aa".elf -done diff --git a/kernels/sgemm_gemmini_dma/compile_debug.sh b/kernels/sgemm_gemmini_dma/compile_debug.sh deleted file mode 100755 index c245b2bc..00000000 --- a/kernels/sgemm_gemmini_dma/compile_debug.sh +++ /dev/null @@ -1,12 +0,0 @@ -rm kernel.radiance.elf -rm -rf binaries -mkdir binaries -touch input.c.bin -for a in args/*; do - cp -f $a args.bin - aa=$(basename "$a") - cp -f input.a/"$aa" input.a.bin - cp -f input.b/"$aa" input.b.bin - make > /dev/null - mv kernel.radiance.elf binaries/gemmini_debug_dma"$aa".elf -done diff --git a/kernels/sgemm_gemmini_dma/compile_hopper.sh b/kernels/sgemm_gemmini_dma/compile_hopper.sh deleted file mode 100755 index 41b30728..00000000 --- a/kernels/sgemm_gemmini_dma/compile_hopper.sh +++ /dev/null @@ -1,12 +0,0 @@ -rm kernel.radiance.elf -rm -rf binaries -mkdir binaries -touch input.c.bin -for a in args/*; do - cp -f $a args.bin - aa=$(basename "$a") - cp -f input.a/"$aa" input.a.bin - cp -f input.b/"$aa" input.b.bin - make > /dev/null - mv kernel.radiance.elf binaries/gemmini_hopper_dma"$aa".elf -done diff --git a/kernels/sgemm_gemmini_dma/compile_virgo.sh b/kernels/sgemm_gemmini_dma/compile_virgo.sh new file mode 100755 index 00000000..f2deb04c --- /dev/null +++ b/kernels/sgemm_gemmini_dma/compile_virgo.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +if [ ! -f input.a.rand01.fp16.m256n256k256.row.bin ]; then + echo "input binaries not found, generating operands" + python3 generate_operands.py +fi + +for a in args/*; do + echo "compiling GEMM kernel for Virgo with dim ${a}" + cp -f $a args.bin + aa=$(basename "$a") + cp -f input.a.rand01.fp16.m${aa}n${aa}k${aa}.row.bin input.a.bin + cp -f input.b.rand01.fp16.m${aa}n${aa}k${aa}.row.bin input.b.bin + touch input.c.bin + + # touch source file to force re-building, as the Makefile does not track + # binary changes + touch kernel.cpp + + make CONFIG=gemm.virgo.hopper.dim${aa} +done diff --git a/kernels/sgemm_gemmini_dma/compile_virgo_ampere.sh b/kernels/sgemm_gemmini_dma/compile_virgo_ampere.sh new file mode 100755 index 00000000..d71785f3 --- /dev/null +++ b/kernels/sgemm_gemmini_dma/compile_virgo_ampere.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# +# This script generates the 8-core-per-cluster version of Virgo GEMM kernels. +# We use the 4-core version for final evaluation; the 8-core kernels should +# behave identically. + +if [ ! -f input.a.rand01.fp16.m256n256k256.row.bin ]; then + echo "input binaries not found, generating operands" + python3 generate_operands.py +fi + +for a in args/*; do + echo "compiling GEMM kernel for Virgo with dim ${a}" + cp -f $a args.bin + aa=$(basename "$a") + cp -f input.a.rand01.fp16.m${aa}n${aa}k${aa}.row.bin input.a.bin + cp -f input.b.rand01.fp16.m${aa}n${aa}k${aa}.row.bin input.b.bin + touch input.c.bin + + # touch source file to force re-building, as the Makefile does not track + # binary changes + touch kernel.cpp + + make CONFIG=gemm.virgo.ampere.dim${aa} +done diff --git a/kernels/sgemm_gemmini_dma/generate_operands.py b/kernels/sgemm_gemmini_dma/generate_operands.py index 21ca9b72..31767b4b 100644 --- a/kernels/sgemm_gemmini_dma/generate_operands.py +++ b/kernels/sgemm_gemmini_dma/generate_operands.py @@ -24,9 +24,9 @@ for s in sizes: # Save the operand matrices to binary files save_matrix_to_bin("input.a.bin", matrix_a) - save_matrix_to_bin(f"input.a/{s}", matrix_a) + save_matrix_to_bin(f"input.a.rand01.fp16.m{s}n{s}k{s}.row.bin", matrix_a) save_matrix_to_bin("input.b.bin", matrix_b) - save_matrix_to_bin(f"input.b/{s}", matrix_b) + save_matrix_to_bin(f"input.b.rand01.fp16.m{s}n{s}k{s}.row.bin", matrix_b) ref_matrix = truncated_matrix_multiplication(matrix_a, matrix_b, s) save_matrix_to_bin(f"ref{s}.bin", ref_matrix) diff --git a/kernels/sgemm_gemmini_dma/input.a/128 b/kernels/sgemm_gemmini_dma/input.a/128 deleted file mode 100644 index 66035f64c4af4ebfc28e7b554552171221b9aa91..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32768 zcmWh#Wq?-I)29)nK|)}OU3T~B8z&|Zkp=CTbLY(bYPKb8#Z%A_4w?|Autj+@$D7*3Fb>-C`0%tyq}XyIdPQfr6*ae? z)09{toZ(F32rM+kl}DBBcsQQ$4DP}cn7}=8uYPZ)7Hg_CAk} zuJHmcVP@6ap7L|M*tZ|A%vf%{z~m2) z#nJG&av*hd4s_vsx-NpV9T%u8?m9>;wvdh&=!iAcP29k*{5h}_ra`bG-_-w_cMw{srIjA?I)M-AC{!c9?q6L>OT@g{NUDdk`PfF}^?_itN+paF825Y?X17~C zv1+)BeD;?R=7;>utBO`tES8-L>3FjVme5D|A2n6Y>6ts9hu|G=8ehO{>a_CIsZ1&`syB0<{dDT6TDXcXdhBi9o>rW5(l%Y zc)LD2RX@b;_#dpnNnT&w4l{6l7^!w9t-%b1U+K@GDQ}AmHkdd-zkxn#v)48@4<=(% z{GP|;i{LTX3>&#d!W0}vxuBk|MHtO#126$kMk@K=nRk$vpF>&aJpW7Spc&VO7QC2t zb3>k+{DVto9{;Z5-8`INOLGyo5`Jv&gs0#GKSy9y=wzt0+>01DLxyYWEg#jRZD#Wa zfA4;-yJ4pAWms&-sRe!<+|pHTYyB&ib-d_cy$#CY*X9E+&XtZq~t7gyf zA#;g_#pxAfWF3qb2^`QC!rYZqJMQ!KPl2XlG|?+Euo5s{qRL{-2UKD6?@-l;XO74-|Lc; ziaKz+q##!?>3J01<(za@r-F3moQ|j`wkLK~9t@8abl(Sl<@@=6aqoeSWZ`|dowLC) z)e5?E2+FCn{9Ld6CJYoLv8U7|B+YCZO)riNqf%{?WG>h zS+k4A(Zpy<_o_M_K4NY~$HQ&^0E~) zdY$?}UhJrM$?aiYp@n0%U$$c=dw7M{o9}p6sUzie&qJHoX?aV7X_c+P)q`u*7F=h> zxL2ZMX^PFtS8<|wqYFbpzKi7xPQO$yH2!^%@$pQFX1U%B)y_IHKO4?NeWXjtZQf@McZIoh44)*&=$S`6I8aMB zkIu1^=zu!oRMf+1kW=1o8~zDvnt#=e$xH*kqNoHIj;;?=zJ@RPc#w#!ZIMn|xp+@c%yJ*}eiUWmA8P}ldf zz#S|ZEd;~3y59kJK~j;qb`>rNWZ(c?NQ(LkcsbmN&Qf35M#W~Bv-YUJ8Pd~u|24$P zJtjI#7wlvAH77XZY{T#b-okrvmd>F*S2-OGW1Z|ewVa|&ylDVW(`>t!OR5J&ciMdF zd+Oo;qiWGs>;sE&gXvoEM3FpDJ2E!7m9Nnir(5-y;_7#f!yq&^9u0SP z@@<(Sg95X$DtoX}Z-`~2Pq;%g2Q^OE$q%6e>`-0!3-zOP$TE}!9G+xT+i$#sW~=_p z&1RK~z>wsUJeoSfN!q|UW0~+R-{Uio&shPKw~@DmE9(3>NKNN6SWefWhtcvTm){pB zMjMz;Dl^Q~OYzIZ9N5gSqT0|;oL$eQWzig%irzaXO#&~7E=<@4|CpB4f)6We4%tm` zji+I818Nf2arR>yu8s#xWABDuh8}k0Cp3z_78rTlKF6 zj+$!N$qb654);(U+^u}h=}v25BIJYL_>`_n-JrJli4Vh^*w5w*nt-)AQDvuw_Ip^& zfAAgJtv}@gc*9Ih4p17XL~pS^9an9sB}}z#uy3*P(beh-_j4-gF~KUju%62Msj#V= zxP&U=b>|;$FFkCNdTkm}M;yxkK%P_V%^=*NE~@o<0L_P! z;Z%`lx+o396&O!Hs1{Hk9^f+Wq3V+$Ht=ZzuTS3MWTr-uB9OsvspjJLq<2)%riB_1 z#BX)KXkOjS?E!i83f{)A;2GTHe1dw?=nJ}=cf>yA$~-mJBhlf?(AH~-Nq8cbq}GLg z@rpY!Zy?qQejEK#YX6Y>M5Uy+Tn;93eRC!{*6dVwY$G%$;Bl3Erm z&8zSs)TK4F!)E5%Tu)sF165%N-&9u_xUL;bNpga5x}w(vcTpfA0dw0J7t=Fr91Lsjx7RtI@@UmEBs{GIE4x;|>}F3+x+U7;baYMz_EFrzxs;h4)gJ8o(d? zQ@ALVR(Ew8zZ%VRw$PV2G4zd0z|Z&{ego^MhiVEN)NcOFPoH3QS}2{=4?fT$d@G!3 zCI3KIWR9OKD$GevsE5All%p58lT&ba7!ciI)9QS1M%@wgkVzkfo2DFpaHr#bp5ylM zCSY-yGdJLv_oEHRis^}}pzX%L3%-5?Q@ONLkDbUmTh4CsCh0QvAuZ-4ctuxXhPK`u z6N9yZmCozksuqr+%dlMTo{jZ|-BJTP6Zhb3dV~3(mTd)# zusJ`4Pk9bD4X1+XYO?g040-|d#VRVv85NohFWnu5J4zpT5ZOgnIbyp8M#l1++`QBI zfmX&>x5>CY^g!-^5%&+g1`pi^aeE7wf^nRTzu-N)3_f1|O0k1D1AlVI@B#c)t(JQ3uTz=b`Uxl==ceFXNfmIR zFz^Go#wKAJ7)0}I3{t6ca6!$3&hDq~Ds#qEQpQOe=$%+kwY0}^rez+Me8}yf=ff$Q zr7Nk2p(&KpOogjFI`=ZCICYczSQ*Z8U2_t$geRFpxY(b-gK?HhqMDJT{8g|oXHwbd zOq%TKJ(hy6W0g%RGal;UHhO8xnb}TP+Js|GBGkZg?yJOH`d2t?$KoV9X)8O&XqI&6 zMYx-{sl|9v*D}rgp?U`YtgiVhpop4kS5XuHjJXMo;3#*)2e^Y5nzedv;Ewyff6dKE z6`?LprWU@{E3u*(l=RA!5jIs_KaexNq(?=o(h6+KFTAf~M{t<8$c@uus5)E?bhQ2O zD`ESte~1RbCcMNKh3WSyR6cntyj6eu-Bn(F7AHbAT1>m7lkJkb{Us(w>cDz^m){q? z3;)CU>VJHlYVyN`N$`uXzie0yYR76o7F@zV!%kku3Nt}3TZoG%Rg8^}W}z;EEY3pM zd;tL*B^>fEs283mWVIc24el@nRXJ6g-a`i4p~x0q&#$~M10`S%SK!rfNmbA>zG^+^ zx!+mckF~S4xB)H-|5&i2I;Jhj43#aW3!u-qQ=4HKCIlbU1) z(MU>3v(*Fp(oOIEYyLzd408!(N*)i_VI4m&RuU$9SA!cMNS?+;JZWFshPPQ7~xOM37CXq18pgzX@ukLJUoI$yvlfj#`@VQ2QKEIdg{Cve3XT* zU_*T5-Jzv|1UyU$W7IU2M(6h|U!@lIx*pDPerI?SjPuSyWn2O;>2EGT-Q>1yjP=1p z*x(-K_WB{D=Pb}THi4GvE1ZMR&<0^Lx7omGJ1l+{UZn?qMx7GohIdAGny4CV%G%>R zQP5yEE*i-OpSzdz-+09IqCGfHa8L&d>Cw8anQG=llfa9{@q>_OC#siDS=xfDoNQQ^ z9J(JFh}(osXM<*TD>tFOP)YBniuOsQynRDqTR;9byymg=6sq$udSkc4MLa;=Lp}v; zMU0c%oztCQ9)w5W0&YZ~nbmMr)q)f1J|?QoTp8Phzb*2Hv-m0WO0N@J%mBS1xBd%$ zF--+W{HH6>PbNE5p^)9q^=K|FWHgImJhs3ed9GTn>WQ8;%$p<%z!Lr3%z}P49eon_ z!nD!XLDThcR=0*=qfIjF-`hscAUp|Mp)m~SqRyJ|6X%@2OBBy;{u%p=EonQ`c$=MS zxnIB!wASrvufbn#7RnNt!h?iq_m4d^kRO}jFp)cgqd#P~2SME7}N@e8aZtDH&wqy7Xf2J2; zBfTYB!L*0#vGLO9hsaaarxJQDUNDu+Egwj~s`#si^QxvbR&Zr7*P5X%E%A2a@ zpU3(=s=v|g zXlL@)@B;YSWW&;`yiCtJ;j*-k$MQ}5Gd2gttNpwaZ;95F%D&d$a5Y^u(B9M}M-<+p z(J%R)Am&S^XY8^66mJ)+==4myES1#NO_UDWpPTT9LYpDK8X@Tafm6^vv=6vb;FLN^ z<#Dd?hg@{aPV@!{>*^zOZ6%M@>s9ecb9`2nb4s;V z=N-<#NPqf_Z`o1UC!E1Q}exL*w*e zH*fG)m_^N0UCu@qLepU^^!FO*+BPY8#3|`!m+!xnjGKlUV`0%iU&n%x*5tb6W#QqR zSMQ{2bRI^*Y&fMvTlLqtv&~QVFKpp3j&uU<%SfDeSY}NUw~-#(kKU_aVT${s9qYEX z_01D=)NkOlQ!D-JxC2+i`N(L<5m}nBSa0z6L1$6N5~TO%l(RbtkH`qC1-!0O#c%Ou z;UZ|Io|{UT-FaYEn7R;woBC6#5}uH}RXl-iYJ}T~o4X$bRYcU+ej`(bDsjr>itb9@ z1?^xMNE_>hJ6g=+He(T^(yoYglT18Lkv!NoUxM{>=U@7L~oHlLV@5I%x{G;Be zTJby$I8c>|rWSqmIjr$|V59%D-ND7pdihz;*`?AecB&tBUT_Ql;C{nnl6ptCn~Q>; z@2FeqKPv7I5vA%3HrIuv%f8Vo@M1W~f#^TFE`ai|3+%*|x-kr8z1d(sFLqPiq|Wp&J}mZU;#>8-Eze12 zo|+bD^F z=Pl})N`oImS#P0p))prgm#wfVXL{yYEax4d(Y6uyOllTOW4cFvjEsT5ywuo*zo+N^ z&d4lrN8alucB%6-?lGNc9`p}1)O~3vX2oWB4vOGwCCT+3N^ww>KT~uwS8}En0 zd|xf5F$yr|=%tz+7_E z3A6X9cBm(O>o1ReK?l54py9N4-R=+WN%Ju;F{SvKy=!(Tio8@;;Zv#w*(sB|06#S<$*RzO+_8y*Dhkm6$={DR0&*n%vta61@+N1Ovw81o{4t5IrL4Zrd0k2t^qUM zUvWV+EDqr3&i>@Txuu%NX&@b(iOl9Jf}7XJ=DUBHqq+~=3I_vupmNguP+D_86f!&f zRHCr0j{Trtn#I(cpQtrzG_-bp&?kb~O;x$WMa84}+*=@i>;rlX+iA1+o}SV(KRa~g zE?7|*MHO|11NN;tgIA!xzO1fz=^`nGVUF;#gw7@0$8=s9Me(Pc5477~$fexO>KTbT zq{iW-U}4@tgCZ?OS-md4dsi&0d}06J806;EpVv`?z(tsjHKc})n-9t9q61+$boIh^ zqCHk*vhx9+371_Tn?m=^*O8-8*SB1;EQ{XrQVL6cSNJY;h@FM?g6|$%m*%-Q zof13=@8BLm z3p5RWG*Q9D2gP^)-Fn=`FC6WR^Kd-)YKM9Xjn#WwO@9J){kq;Y44ZELRqABg$5!9~ zH4@&z29EQ_t6sbt9eV@kn#m@m8v&(6xi@D-)!N}~7hW6(U&hkIBV+V(@by^#7M}6C z(OJ%FhfoW*nK%UTkVh4BYqP6g*eB+-DMSM}uPDlSz2Ptrw_ql5tFy9odZ{6H2W(ZP z#arzdoj_Z8obC!Mpgmo7w$pFeH+c+1lAhV4kPF-MUbT;k+d9}sI%FepnA^%J6z9+N zL2e#;3=eHnzXD*mGyEZ*ht`=z#a(RLi_i0~p64|3azqB&{d6=^7Pp#z>4csrj$$3r zws&!Lr#KXMjwijvtI}5%h3|zv(s|8;{JUtdea$&J6{NwB^d%K?8UaNTZKAG8pBh1` zUU%Lt^_vDNijHG2P55V%k4Kv4SSt2}(!z96xsvE0|AF)1s_qteXl9uiltt9Yor zp6CH3)jf9&Og4Yx7rb4nYrp9mcnGI-`uK_VY9yU7?vyZ?KjzJXo-(Rv;bpPHy0Y-7 z3hg$xHu5_Bly*pz{Wg*^GQ6`5ZlT+OpNTA$#IkJm=LG^=7P!s9tV+N@F{# z`GQH;2_sz_Yle;dV_H!q?#+Z-%XlCA~{nfTD1XT)zq= z@j~oL`Tgk-(`7LVDe)QH#+LLjG=qnlZf;X9N88Nr@R>iyv^O8gt;t9=C}c9)Ah`KU zcn2vrJq_o=uZop}kb8z6NNS=8#`867600HXB{MdpYih0TDpb~=i;H{50udDVhRYph=o+}Mn_^eThiXuc``9#*Rf6DK(kJRVq>1$PUOKwqgx@T+8b7W-p^oz+in-lS_P1futuG2TQ-OOx;tp-Ly6 zub3W$m7Q+1HrdE*{-60AUnf7Mf5NlXdX=eYn&?T=bPq@1f$%}KJ#KgCVsc|og=!@H z;kP6my3VDAnO8(hCsdrOE?6!Tta6+AtznYcz#ZYfk;xOOug2*!RFtj@s!kccV?VBC*(ibEl+jrlRVx5C@zUy)hb1Y6N?7;C!;@;dB% zNmY0VO{GONU!37mxWnWZ7x_7-p&R-cp2Jk)KMZxw>pZy9cEbPoeK-e=r4izlWwTXG zHaOtjvqLGHcvExKTiR(l_+L5ectPYd=L&y9}rCfyBC`XY3w2Wl)u0^ko}%3iWmE_smBZ9FWrV(aSLqePn7#S0}t6z+%){wAuoqN#=q^1!~eW7 zes!;zS;<#LA88q0tULRkQnA=fnFn*!0jf<4B+>Ax-YGhFF_R1OvyT;FQh1@;Mfm%J zXvd^q^&T8A%GVUU4+rfkZ#?A12eEIVRJ1ANcIU-Di&YYTHl>;xt^^Am5}tDcYnyr4 z9|uVVE{7ilpZAn0RmRkYB~A%-UlkJd;|EpG=_k`;otbMFsJ^r{)?aj^Uf3KO(i<+| z)uw*>A}+^w9Edf@`yWn#QJ9|Y*o`_!pO`n0$T{3eg$t-=;tNgT=ECrbP-$`D>LJ@M zDlYU$Jq+A11KF1J4tlCIxwqLQF2SjsThuoB&&YOm&FJtb zdq3bN41#^$8~9p(=3Ez^@g&q+rB_GPI^B(yhF0mt;xf*5R@k3yN%~sd!(E9TxOQ}% zo#5Wp@i@u(QFW0z-UTs86I<)7;HLTZ*vz!WYhZ8Nkot(*@CWQz_#^D(ms2<0X^?0= zuQxCAnQA0nb0^T35W*|wACubY=QiLPc9N})#p6rEC%jsw+86d5-GKjaHx3V;;3eW2 z9}3jt;kFX5H}m9Nn%dp&?*z!ufaK`D zasCypV4~@4zovci_6COf()WqV9Xz(Ufwkspwi58j; z^Ga{t7%dZ9X&$?c`LLJH#9lK zCtRPj&17a5orhZ_W75lK^{W|ay7;xJ5-r7C1?zGf-7G1eC>)IiQQj7WnB6eMAdgtST6vS5phq-|ob7p&NRpmnbfMvU7*~*%&R- z$L$s#iQgr5^b^%8PNvS-E8Lk*xci;wlw2@#_-jrlS=&+=i#4Q0-ek;c8p?f6R43KX z>Y?}&ZKxQZ4f(hej*Egjl;27UbrCB?#wAw}2jzPm5-xp(I;e8g z*-N&2@EgHw4Y6-_ds4})P&4NL-UbK(ec3)C2(IOI5@&Flz6}N9?wN*(n>_Y@<#UpR z+%D=)6)asSA(lG%f04|7MY@J({m;#HS`hvg8pU34cFB|7aUSybp>d%ZWJJ%r>rb}h zITbD!Hde)10C%`hG@p&52fRGi&!nTh;*yMw))gOVgs|5c_76x~csTwU9}#A@kiktb zm#LuqTUO|enK?};6K``rrcqE!ujK7CMGf?J>1w*5J4DVU1%GL))35p#mE@{0&z6Sy z{*8!h%27}0HfUeOqL3wU$8`7hQeo!@8#MdHy__j|gwB2#X6b*UwdptATdmA?CHW&& zPXB;+bQ9VDb94rG&L>68FmpTMVGVMnon7?rH73YCDrgR{ibukQaF z8S9L4_Rtndv&4xi=!)<2n%;Oj_|r%p>gf%L^@E(cke^<1-Py30y2X9eK1nEhl!Viv zo;~1Sm)tETz;Sm78(PH|++u1=Y#+4edHy6xjIKn2f_MSX;66K4&GhPX zuTU%fE&dLV&8lc0XA17LUGxWi+4RBVFg%>m9TD!7tm!x{H+%R`x=GjdB7Tp{bpbX7 zra}OUNA61YIh~nh$Iw0)4o7u=wZ!I;lwl^jo4WHNQ3ltEXHwseA=&EWP%4)Xwkx+9pUtg27ZFVz2NAOdg>Wr4DIxs1o zsz7TRXKG?2FTv}slVLm75!dvzslxA~@7=zbRh4n`a%P2aT2`nj&WJa>x9fX`-i&_<#rV)?Tk^^1lRU( zcg744pchbGJmNe$PLzq+(AvGl-_S+-3O*6t_AiW4J)6oq1YLxy4haE$1-H$&RK=^W z;;@9dpmM6;tddMbP0G!KgO~ZViFo^E=5*zlAckc5?74g&$3R6>%557vBFakS#cJshOpqf$9| z!@Glz@y}>!RRHsd{!j@nQU2I9>|&E)oT{pFdjIMrehzQFs6(Cbot^~m>^(Sy)i58e zaneRR>diJPZ+Rbn#b@zSvL&X0NLp^5PDWr3J(VlvZR%GcN{$T1R(>w$k$Rqx5o_t^Tsj)yf9&Yk1v$+SUho||1HR@7 zN|_ekV4dD+4*f(&Y9h(+s)3}y3(*6|VGinSKBiNm1&?-Ssoa&IuCFd6hJDlgx-F|>k;hdb+%olC7D^J93`iiQAYa$o)a+{m9Bveb&+l0Ix z;ZvJ4X|GO&Pk6p}%Q>UlLUsI2f8{qQc+G7S-R%c-#>gPk$XyDVOdJ~O7%5~<@%Fs+ z>`?nX{iHMv%X`rl(0!=8=@(v!2Z}b(l5>`4TM?Z_>5IIjlhJfe4R^Qr2;Wdg*N;tu znUXBdlT<|fnVq=68xnrQKS{1A9L)$H!IjvLx~QO&iPH-|ud7Mam|kI8T*NJl z-Eo7;`Jyjj;>5 zp{dG?u_T|i9o&g#57d%9nVsVK&gbvk`Sw%(87sh0Oet<)I_VKZL>pbh9b$iRJ1-jf zLQ(+#6MwscXgk-TFfD4|InAM}jyXc`wjq}pJnYdB7u^%s8t}=Y(Hph~% zmn%4*!#i~khI(x{mHpFHqSAcNhH18wQ94C?9ulkK{Lbehg92a3KFogg4Cc%1t?ECd z6r95TC#m%_k+HC}&H%C5}l$qq{Gg4JMHr!BBg$MCpe#*PtM&faw5RAT#%9tU7M3#sT z+{$jkSb_19bt)n`mi_iy*$XJay>XUwnzEACToI(G($&Qsxk6KZg z`Lvvt^DFKEoBROW5-;s*HH{WZ&h-QofKnt$-n{?gVY-F6srS+y+~$rpGxBw!$$qs+ zMNI9tgn(OvF7b`%cKX9k4~Aj0?6!56+)HuCn(A=et>s$9llh$Ndb#GU4e%Ao?;J5Jgzsm!E#WYH z#O-l2?&I3wlwKY^g{%4B!kAcX)j_Sq*OD5$q^{x>$$6%C{)pCre0V3)+eu3H+(L40 z;qVar#!PiCa|ZE<@`uvvyKu#yDExa}V2;_Oulfwd%}hyNZ`YT-8rYE2$PDj?(|Cl5 zQBJ56{9n=EO$EG9k>oLwOMXWQ*vEFEnciwO%6-d;-fV1TODFr0F8mT3!$xmdT$Dbj z3*HE~FTEBwtPRWg;99Qbr!Cf$Ssmt(-%FAiL#TLkHe_;Vgzv(K@C2#R;gYYoEN+Ry z){@#Sp1hAc(eL3lg2+GTnl?Rbpk3C$DZiqgrOpRd$gWF}Zu{%tf*noE^qS;++z3y( z)4e8UzO5*ZZ31V+zw}M|4);J&m><~%O-vhj7OO;gWzTJ%H&l4k`EXjvF5Q=OPfb3g z-{8;UZCKaF!tl+{TP$Ng}Yu$N)Fk(#MTo4K@BeBd_xt+)*{IbN5SRMAM8?o}uY{t8Q- z0d8u&fp3J@@Iw2>{6iB>bJ8ONM*2^QovjL!p%}=8kongt|#K_mXTc zHgRuZZ+HXGY-9*j?ue{0B4m zKdkI`hZ3d<9KrwD$~*=aNnKX7b!b7MHIOFp4;D70a>x4WoOp;oj&Achi@ua4lFX_6 z-I!ZA>0kMraY->omg!_7K#zZ3La2;}Mb#+!}n(Q|m++$f}q>hnATw zHqJ3gIcZ0@5LSUiII1$qNuG1Up#k~|WR{H5%jg8za z&@Qy~!tDjq-4{n3EG4@h3dhn`o}a-XKsW&So=;7xyOeV{9Dd zupQ~qM~z~4bR)Cs|2OdXsHqsPWv7}}5Qk$!&%{IeQ+8M@I6r#_%=XZ9^)kFr(9s7; z^`x-raBXO|eMLJ3@obK~u?={8_;WaAa?&48e|H;p!vt7a;5^;sy4XijJpFLK&KHSG zHnxF{n&nB&yvEMRqA#F!bdOUn8b{fAR_qbHgdu7ZJTbSUE4Z_4gq?L72w(3iS>w%4 za&lVPacEs+sqF+qsF)Rl9gmr91b*TEUK@lgUu0`#Lw+PJ5GNh zdov%zi~PfaC^;$d6G?>^bbpt6Gv);Kz)F#kFgX&$YG$Z(jSsTxGs}JK9)Sj;Gv$iJ z$@IvF8=(Zq2BGX;wHIXBgAR$iGQb<4J^sx(FZ$X|XEW3>2c^m@ijv+}hrG_BJB_5X z@sr^>+>UfG5zgzfmXX$zUX@p#PpR_bpwx2I|{D{9)0LzE=hkIev{;JvM)O_b6D!wHJLFa1*fZIj2e zl2iG{`^>!(Zq49Lga33lNWd+k^Ay)d+)IGY1uB@dpVIip?0FQ$%)GZ#l7{M-Dq&8j zf|BFEYCaXzHx05mUrCChpP-AQCL->^ON?M8+g0{yN=mByI$et2hvoIxyxVWA|HED3 z71$cDsEvr+nTo+@PJ-0vDAOFydd>7r90vI)pJw*@}ZbMzg_H>4<@DEkNid!UE&9fVX1^~^9ng?p-D(sBQD$xPjZw7gfm!BTp^ zY;Et>rFk%ImW16)8;lMSb+=){ag2AG%f9tGIoIWGIy*sp;w1^|oM(`jFB`3Z=J+0t zz{Kb%zoTuS%TOD9tLDfi{cn}^!#;i33b`*5230vpHb$p(HfwvIaCdi-5(?L{*ds#8lZm7uY$PA>Fdg%VtX zjZ~l5CooDWea~LNoj$8bJh5+*p+tSe5Wwpi?tG8_PLIit)fNycMO)ztuyW(_Lu(xa=)3l`C^k*O}+cRD8YnY@Di7)xRo$C(s zBk^zKPW&cVxgI@)F5wKOirp<8reeYW!Q7-Z^s`@E_mn)>3H223!9A+w#luRv1?7rn zRsF?-2#Z73hOWCW^cLP_>*@786Q|%z^EPsoJ4I^YxM1!0JVnL|`#T9E1+yl>O{bP9 z_p{)+yO@h{2ia2^Bw6>DaJFE#Sn613_zp*~5r23JGzo|33(3gm@~5B^yJvrOcF~`3 zl7~w-ypEoko^T5*MEg+-y60_^eElDALoyf7B6)GSY(iuS$GeJ;*r=CM-wfvvwlY~# z18al_)!?Ur7v3P*WgZ;v>7UmH^yFY4_gi(>nNLC4xH}|?im2I6`NUUR8Xaw>gxi}r zp|DrQ{tf?g%SGN%-N*!}MGf6Bp2uZ4FIW`QyJuBrUK_ka?I2b1M|7EL2tutWv#GXZ zZU@unqDxFSgX{*chw6n|_Fz81M%_quh|cQ}{uO#mDe-tPojN6%xCwAZ=c4A$C=L-7w)hOX)=&Us1GEpRt0 zR;B2glgXwx$@GzZ?FS;i*?;_jqEA#7Pg%=kuP^DQO`){3Pj8K_p~~=sc-(Wi1q_w6 z@9xNX_aA23M&oQ&Q%~IvW(j_-gMJF;DS^RSx&^ShDQ7<)@n%001?DJx3N zbdw+EhdYI{Q!mL>`I0*??34+Q;jo(l$N7aN_q9j1yWaCE@103wY9>$fn?M>)D~SKw z|Lf>1qpP~QE({dc;1o!R=h`{<$o7qEa0~7(!L>LoP^1uCn&R&6?oM%sV5PVQFY?Xz z``0l#S}ymVv-etS&iO38$4)i>(H(rl$UD^$>RB12>i;#X%s#{KPVt?}_ zkY(VaCp$G`CDkCD#lzH2aX}2X!u3llnb&k1$7tg57vbr zVU_&?uGy33SEmqjaFeyu*=Q3sn)C~<*Rkd#K2apYH5VZWqvqCNUnYvQjXm!RMf#;%LA*eUVNTYNQSUV&}J{{tu%@^ln2;S_exkN z=wsW&p`ta^_o(^0s3@-spcedzrP_08HEWF;FW$KmRZBi4;<2@#eG0_NTFwD+A$+05 z>74()#jdS~%2hZqGQlOZU7xVaLrdSy-@~Pt>ml;QkA{-HnM zDvY89xx<^sgwTiGWcUeorvOkpEEKxw;Uu2THkbeR+bDjD~Yo93hRQb6xaftvL^cxyr(C+ zm+2v=F>2%;{Iy(5N0FN5L7kI*!}-!x*5<|OG#;s{=@-0|xFy%X4|7KKXM62M@|s&f zXEo;V|Dfva*9&xJ{|h+LvXIQIs7}ylf-|h;-sNPv2uTks&EdimBu7WO9w=<$uY|=5E!*8Xw!6xoRpsqiV|4AUJFjCGEoc z9LvbN$s+ol4B_p0A2Pr{60Da5G7aDMMTJbT_muT}a1-Vh`!E$<1?PwdpC}7Fv|Lt~ zCopwJQU{7|GdkJR8-34LIm|wU6D$)OVP93hVBVTrc)c-p3F|8_O;5|`Nah?PU!aNI z=R;9He^NzN3|Sr6@4OdnMTl4zn94TCDw*OgM+Wniua8w#r_-a=6mf{I)|GVA=>5Tl zm?`w&7wsKviql=y7k{v^Rv3OMC6~x1-fZk0?Q47JQg1t6RAAC=9X0c+?{YusDaYuo zYA1f@iN4ID4o?R^`=G!P9ZtT|+2UW%W*O(y*ALw}%yj)S3tQ&ySM!lin@YZ@t*jT1 zaaODLq6{bxAEM8asxd3#?&}bf^Q~$h`{2%JGu1xt1#Uar{FPPbsKu*_^f}t1PIGvckLlPz+~<;d&Fui045fvA8VaHrDr4HT8R}g41t+I)ZAt zFbUV6kitJla_JTJO0^gh#Wi5jgo!7Z)r?W8BA2_JbjMj)T*t?z>f6ZU|1RqDTE2Or zkDkQqyUk@hse_qIB~`_DOYXKl!B_np2gNgdPQJw`kF z0VjQge;4m2dh6zTf7nvd9*!(Wwj~9j!W7lnpywUpAJC1_$ZMX({H40*%(D9H2wjcr zP&w5{`1(y<)!$k+mUVSicc@)d%+viyOBF`*lL0cI)<>=qrG#YhY9nndt}{PB!hcv@ z)l!^(&B4mHI|!?J#dfBph8 zYLR#^mgsJJ1QIo1aMA2C3m-|l>xrT#f9d-I=kN~)zH5g8vI?oK-7!cuI`~j&WY+WXja8r z2Zcwe%71Zn1 zWIg`bZ4}lNRF5)hZy-c(wDzembil*pAg7#6HSelUP8MDo`NMmj^V}8d)G;9f+c0N+ zq+{4!-BxtrOW8uGxl!z*e#%R#9HNUmL+3yOHlMEHOu%nAQXF+kkZ$se+%0}3U+faH zflF=2dV*>=QCnmiCJJ$GIlfab)`f(L$p&c{g8i*yRyOZ#6{)Ww zFIa_Tv7V}@>V>Rl3lfL!W)MGT`6K$0Q#z-os!sH`l(T3Rp3hCBFUe~8Nlkzk=4nKA zsMo9DLdt|(*-8BUkEk}{r7ocZAcquF1phoyDyLe|fs3>Ql6kpR!PwtG3OQ^`cFvO} z@{>M?eyNhvS1pk(3;0!3aHg(H1uq;rOT8q0pv$MRU#m2@S*NK2e2uXy?z@`FYl3BU zm-uz|xItv7x~L}V>!c*8E?i_I-#qzLPs?VhWECr=&qF)z32GKHd-R%UE05{UNWwky zHWPi}c8T_W)K&b=$lO3{v4Ey!wUpM??fzsEhbe|#CM|Vi=ZJjbq?OA>1L*hR?jn9m z-?p2H4di+79GqDTkUoE8PY~CQesYE{r?Z9hC&Q2?-W_NhIBwNs)tpi81N%71Dvs#- zYLwkYt;Iz9J$ns$R0~pqZBhNyO*}o5bw*v7C5R(x7w(ct?gl z-I7Nh6`$~D{t!=CEBb<_VI{;2D<`}w-{~#5Yj28F`j}P-V7BO{h(mHMX(6gXJ5Pm+ zF~4j@TD$ehZ8{bw-=8#~Yg&(-BmUL0xR}X1dNyD-GzmHsSN*J`$fiw^tI33XNBKeV zPM5LP85dYO)st42VW{bUHKX`#PdnSErr5uTXW}QjjvnG~qT_>9KcIEc>!rvGcy7A0 zTCAjhMpUX?!mhxpR#?vBTj@yp24_ev_BMKvHICM`r-LQZh#umFe9}p9T};c{z@@51 zP1HP}JP+ML<~-!|8>ybmPaeR-J()-7$;csvky4mww9qzm@D!CS)3dJXb?`GZkLIE^ zG_15ncCuc6FiaxI3OU<1ie6wF0`+-maTqE{FI|uPZ55ThoeiR#;|~ORL1dTOi*GDH z8Ey}E13DilGBNP;m$4Rn*eIUTs;A&(P%wejI`(I^iE;x400+Lba&AU z{D&UwSYT!F4Q|*mbS3->m;vh#b|Y2H-v{Zad$PSaF7|jbd4{Rwq_0{aM~UsG3dCS` zP|`iXBIR4&HFgp*zo*3#oP;aY7yBSf!fAX!MO&FcoVqG!1vbi`5mn32bW*HoVRthxSpS)80xrfM1N2~NX8^#$I9zhw{P-=>&@ zq*t0Gm#hp;=O6oDIFSBB*PN_T`=}%&!=8IH;j`a>?_$18YwuGJ!1zkz&eEgZw#-%UuP_7itl)aUIypcRsNl}hc$xFrV3JYUC~oa z;akj+>YPxXn$X-b(PGg0+v^pvcZuQMho>lkcvNn9MbfG`I7*{MZ=_#S*(kozc_O<; zyhjBcPukg&=`mD6OGP8^0&S?%I$Dl}s`#2@R>y-mL34X&4r7goeA{@W<8kPa~7AK*!t5X~^%EHnjwZ#Kfm9&ga zB%^idnB&2T_A+@%ZxJhv1YTb4vUigPvH&v0P2qXzBtlrK_+XEBCcsla0-n*k=+3LS zU!9e3^!*JAO&cAqV1JUEz|u11IekcGGXlJ5BV{h6?AX8+jS35M^O;p*fF<-8ekJlp*O4Ja8hi6`{gU{(T9O9 zZabFVE@E%CJBt$F5?$9f1$9f2+73sbULp8R)nj*|u&olgMLBeERe3+zMeZdE_w-!P zbo5fIWmmmY55%u}8sv)NGBakFL;SB|GN~AAn5xC*=t6;vqBi-(Ob|W7b~>7mqmR{8 z5=SfRA@DlZWBx!(uz3!$6G*jHR$0Lo$c$OTlfVvfk*Dw;D#d!Nv(VY9o}CvY#HN^2 z{364}Oqp9X=XKrxoMcc}XVE;$4>j^P?|zXSJ5APCf3i)qt9u!>R}OTzE3IC(LsHO( zheFL>C5I5duN)6YXE;w6m8rUu%8#Del~=8y>|g$rzjPnObcNBmKpCaFHi2A*>##AhxH0ZjvJi>&JMKI5Jo)53s8YAp z4P+Qk(H7=ME4R$0()vg6ii(TPY^7Z>m_R3z`}A(0xLZuT(#zOvHlFko_f!+)1}5o! zs-ZWldV$o`1@JVE(=MVFeWn_SR-siu`5hwjfNyhM=i$>aQ(kW_LvQt357Y1MQf68H zDEb|L4EEa=U0>JIO=%FG&H?FGoGs5)JK1lIWd3y*}O*gM3yiw zVOCre|K%jrS2Eh%9<4gV&zX&EF|uG5as$1{&z_|*Ib}iXmN*~&3iHYkPzx6GJmRQf z$#4lypD!JVx}C)$_9%9>T1>8>UT!M3y8l6G?&EY6C-fR#Qs4I2;=#}LNj)TBJMrG^ zscgXtid>j_R1_!mCpDUWpz*Rk?}3W(jB!wOkr~|e$nQ-i=e6I0=MJZ87<|3AeKE9# zksrP5NLBzY?m4WowS#+{tzZ>(Ca+m$Pn<4jh9U+h)PgIiDo%3@k%%;r2+J zF5%=TJxF_TmkbcgR7W#9#=POzZ2G+S8g zkyOZ3eB|??MKsfA#VmQ)KUuakA}LaAsv}Py*zXqQ)ASCPs6Xv!DO>|51C&ZLmlL5{ z(q4i2vZ1vgm{H7PlkH}5DH|li**C+F1CSbdXP)Hloyn&JEx0k6Ys9JsE zf=Mj<3D>n!(L43-b>YyC`XdiJ!)O7z664|7$yoUP@ zKKXaHjbGspy@+**Xz8p|hv*S-K=y%%IX{q2Z)as`R`&qXvWsbJ_l0}WnCiyKEnqfG z&<){?7{pe%+G%EA)EbrhGq7XxsIT5gsAzw~XYxQ$zD5iaDKe5iLp>fH-iRFI$Ls@g zw|QJnQ-fs#y;0_Kr>Ya~GH3vW#X?byC6MMk(RvTAVr%5`+9C172K!((EsCgZC9Du4^9k=*2N8Z02SwDIR7SN_R`gtPgm7{ z=m~rUNj9?JOzn(5V4Nx(>mo$P#tc~!o4j?;>s9MHEbh?{bWQ^{E^Vw?o0v6)XT zF+%l6<+bH2Uf5TQoD|IZ7))fzrj&KnbZ){~w#LoR{7$%>pi-2fT8Lg^ zm~%X80P5uWEC&6~QkolY?+9lv=vX&+5we)KVL#|8GC!uD;@m?rIyvoG`Y1BxCP@V| zPGVC0oi&DYW3u%im`$y6H_JycXLVW-ADgO4Y?$=KHh^Ms33cX9w#KaEnF1c!0_1RV zTBfbw8mcBr(^;5j-fx3eEbcQb^)=f0O!(Kq+J_%m#YaPDR@E8COx4*euFBVQFrD?VvkBe0ngekX)B(ydtOIGs~}QLzd7pyo$P!O+2K?B#9kORF^_IRlA$y=kz4h2 zIb4=kt{l&XxXVDlk1m#;RrKCcA#N?v#VG>Mei&waD?qS`1PAg)aESFx2d(TNmD|Wk zMu7~PP38_Xa>t6{;G3r!TbvRi6jTq_x5qae)BRucZMUbjF>W7}p$E=5wlWC9F8Y8g zbMyvrF}`}g41D*&zaEoo$)r>9gq9*Xqc-?}fnk{%Z1bxAJL z6&>L;&rjZnxbm3Tw-bB$U~wKUi?b@7_y@U|EpB1Em@daE=<)EFUxO~wSPk|&x|x^? zKV=JdDAGc&sm9shLT7HL50hKW#NV-!%wR1LJGCU81CydIi8a0sIxqBVOTLL4CO!np z8WH}ZK`q7#j;YL1@J929wxJoI+AI`_E>*|rK-BpMR2i{fRieps6`V_9!M7yL>hHdz z*Yq12$Nveuq1VVwIwM#{eg>O=GBs1!jX3LN4ZV!le@#msJuKm|*%vhm3>M7A=(PV$|!);miW z2&JImq!I5(8N3J8c`8!CA5cA92el$2KJz!=r)nnI7^rJSlE*v>X(**O z%ZqZ7crWwmPRNOikjh%xP2sUN zja}iZWFzFz#?twqGPL)%P-CHt)f4O0YHNXY--(7V$RdN_ru~ZZq9%U%RI3(Pw>f?m zIKf)TlcKv@p3W*(O=sk->^C{%)o?n2lo9vwywoas1+Je3PH9>_m{xRRzp#C9*lA>{ z|KXGMINsZufpl4Kx-_PjBDA%rgskmaaznM&yQLHPyLTQuA)_sUj;_A4ZP65@5-DN>`!Hdj;vJ+av8#bys%@a19jSiNT|ImKJNS0C1qp+7 z@-O-?>eR|mp?kmqevjvbzht;9%uj*d>5#T?Cnt!ZaJ;qz11dgjx_Y921#!-cWY-^H zV~n?@>=m2nHYoZHx%SI!hDZqz($p=8yD!z=qV9w7as##IIdNJv)gE)3s>X&})5tV? zs}Uq&e6yUVGdQP>dh8$gwfi}>=_cbLb>Z`yBLloc{zsS&Gf?Qpvqy1#^*V5Q2GZS% z%k`v%rEIANn=8SX$!=ZoQD-IT!jfc&N)NYIM4$>kZ7x%5Nnh6Ae*l$yDxaa6vRQgQ zG|VT~M&wD3*~bHOW0To6&!2V+RBdz2Li&yUobOeSbarcod=*#%Pf~k&QyBCUsY5OK z7Vl6J5-Bh7WHUC!jM`f%9c=us(LcWX2TQf^#Yxh%mbeHR9k^Pm)hms)~nWDKq*i5& z{hQHcm6PRTLRDG&C$df65_!meRl>j2*=0}ip2QSAT33-f++5&pw~^UYLu|20;+E*D zFM*}Bm9`Mw0<+zls;=(|K0%Avrq&MdH;=_dh{-X_$#wU9psl>ko?=TtW!*vKB1J?l z=F=72%1~L?JBteahgtnLd$n2s#=pk9F@R>W3yXQ`EuMw7d@9XtjgMSPr>U`Yf*OSi zq@Q<<`~#}!SrE5J!SVV`ytnFsU0M;^XeA_hJI7oXZ{v2V5bGXKF=J#~5sGA08BvPI z>PqUf8tdHTkLb_zGWng{7VX77|2Hy^mJjw7+ht?xw0j2LvM`XSbHh`53he76pps@M zU(Hjv<6_hac?frIgy_LO#T-DdaR_<28rVAVx3a}!(TkMjulZD~Yv8TD20J9QI-nVrNMk_2#0c_GZi{= zVbvXq{0p3e`_&nhtRw?}3tM+?>68B2Y8^`p9_K9Qo!al4V~zl=^#(|Vo8@CTz7niN zJVE|^4r@mQd{(^)4_L(S^wfQ!eUu@)!B{FN2k6gGhdc73?4CJ=jug9iA^VFnRu^!z z*awR04|xS7#9BHFe{JV=yXcqt0T^#feA_|mIf;~&A`2r*$qh(;8>$@(*Etn4dkb!l zhUaes0f)^RjfqfGS;NA_o;4Jk*aI2xmH`dztQZ+%s8P--R6*74rh)zbU*Priz)7=A zj)KE93LQd8m0e{feS8yRn#kw+qx^%`2+ZXXn2-%t1)QICf6S^uYf)8c5ZjPun<>Ft zbZyj7?TY+tG^k16$N|w)jAze7{{h9v3bt|+Xb_Hw&v_^KC!%s(hdg@*r>-cdW<<{r z{bj8{7PMhK5!13R8EQpAa&JIIjo;SKEH_MZfQ`< zf;^PwXQiQ(UDu2C3Gl6?j)8A^B0pr5)yD$4z|L5vyW8tXZ+=4^jPOGpO0TzB7r;j? zso)0TMObdR-|Q^A(!K1XlR>{WOM&+N3T&XcdIisnZ7-vACNYB6|_Ky|B1U3(U8|x<{ z;3;|FyJ4=Q6qEj+;7RBin_`Xw(Jwt1cVBoz(j4^Fp-v0yK_EB$Q9DtIEcDeNQ>>2m zAE>DRrNf*=og(U^6UpJkllLSie`n9JhXh(V-{@=1{Q9yDQn_7F5!{C+QPq8leJ1VM zcU1sxfQ^{XnDRX<%d4T)=*CVAZWF7t)e7x26z zqm*8!S}j;}p2_3FSG8 zXx){T6+4ZxBCAexe7;JotuvE!#QdiYWqJqlKpb?|B%M<))01ds&`~nT{k}rFgy@WU z?<8{*|6Q!d)Ap;*E{j8xih+Y|FZPD?bd#O3;siO3toU%(%01KE4)WUzTF06It!@Q& zd6b6Js28f{D|&;Iz<|M7Zc7qeVrx03Tu>bRy_Eq$syVEz+5+{JM|$PxO>M*k#(m zvrf;(X_5;Q^mI-mW29~z^RwuMb7#5KER!#-KU9s=O_jmB(I0jL_(ldI#ka)taP zxDB28{y~0X9fjxWySn53PP2>FYJl#EsZwJ(m73%xcFvtcrs9)2!CqtMLbU#y_oWWD zrzAk(-D&2MWkC=JZ&_v0YXhIfVC%Kh%vZu2^p$680+UopeV#bzmBaW3U6fR{cE!~4 zZbJ(2!2eEtcG2o5bBRfII@JVpv}49t@Gc~&<~;C)(1N6aU4%~;1O3^-F!$4)?ol(9 zc9HFamwQ3XX18pxnnQ(g(= zw^Bh;jK|ckKFCoKytey+ckmpSYtVC~4b&9Bk(PE|(6aNCiL|7NeYCNS2p&K-j9YG)NZYROt zZ@N9)1ov;NiwJe{@lV!#Q6Mly4WymSF9=N;;;nchy`Bl`9B!3r=33C=tL3kw*XqN% z61ION$U^iN(bXJJs_OE{2W+y>z(clC$BLySlT|Nr65k-g?2fb`TY#+&mPiLhwu`!` zqg65Uf^Lp!@B#f=w2Te9ahPEZ^p)bLjCrgX9NTZvpOl7wc8HknCc;B-#I1sEc7xsz zuf{iU{-+qjg2$1eDdzOavJ#AgqH-;NtTM%p6*I7PC`qis1iKCTAXf~5K3|c)H4D>z z0l^m_+4w>=f>T#mC;3pml0VX4VrJ12QaZb30gLcPvVX9VUP&{7gfP|a1TXJZc#OO0 z>>ysOfmf!pDn;jzpA@QUSzaC&BQdLp6=~wCLD|iyYe!cAskj5ZB2E{)47X9dZb{>i zUyIf6z&$^y|5P!$lkt%Gz>;S)5}*4!IZ6iFkFfnLH+DYFkX7uinEw~R>3btsKTwYD zX9JuRw|apJ_G9locp*B2^j?vaV$b|p#CXA{?9ETgUtT{t70_ew|bnj#GtXcIj% z)vr!^x=q3^t&aE>z!}j{|02FSW85K7+Ah&yVEO*0)2fBh#m%nvc|H~Og#-6v12@#@ z88~kKM=sK*a9;ngc7e@10yWi2!?pX+DNMxd^!zYLpr8K9U56845qigVP_W+WofZQV zJr1|D4&JenNDlcQ$sz~%BHX-uGOr9`%{r!#bUVzZsY-B|j}@zU2a#VSl5XfNkL$yM z9skdws;Yn0yJ&yyu&t;TxXzC>SvOWeu2SXbRB-DXvuW}$vVOn&_5?NGW2YB>dQet} z8u!}0M7DcJscbljJA%9(Z+u1Wrzrix`Z@up5cbG@^<5w@+_Pf7^Gn2YY=P*ej;a9i zFANmN1XT!I1tP%nxTNm83uC^sLZJWlP?zDtNztRRd*zC?Pjlpyyr^eVjG4wNqmpN+ zcwjfe#y^gE>>XN9tx|uu&o#)MzH;_@Br9_8+F~+E7MWwWfp=ft-A2pMm+nu&5wa02 zi#y{hy=0nf6MPDt_yqkLbB`|Sr8Updfkx~RzE7>sI4xNiyn8d;xgZO6z~A*KD!1z5 zggH=0xNZ1Oc}1)sLonCrEf-r3bneF@mvNAdVOQ+4b{QItDtsl#7hTz(*pJy({V6_M z6RbCZw1v~DkJdoneEAzAs*Jn=_hFP-(pQac(dQyJ`6)W2g{Yo8{*OyX+v*lp6_qLS zUwR1>%G{tpe{x$o6)?AauVUo~WQrQeX#JZU+*I|0M@rx`bGh>_BWpY*Yg~Mf{ z^b|U64~)$d98d4KTjl9Ms@iF_8~HidsW(Z`1?HdL;DzqhK{o z;%7}W@-aN&;(*!22^mmUWhrDPC-LBd0}PahAQu+mcybfvLl+KI~!l5x*50ODG{#IdL#`2O>c%OCKi|%0MX$w#gnmv z%*Bji3GVY)9#1alSi2j^NH?(~_A~2%>xeMb6G`dnfnn}`HjH*BZ)mc*Z>QycY^cyk zK}>`zxG!I$S_N)_As%YvLIMussX!0im2Gx5>7j5|MmQO)@yt?5^pjf(-$wdCVSXQz zt$CsW8!RGOGMvQgoJ8=ZulfcV-&vaCH_SZve7Sf(nhD#LYC1izfwB$mv+e8z>hhDg z4?7@P_ct~m#oI#VBA;k`(Zb~L@h9=oe2g*K?u#Db6ZR;aP`RUmn7}5;-26JV|93)| z75&qLnVsL96>wi{lV1ItH@L_ zp)v#baWBEHh*yh6I(xcEz_Yp;-}`@Lrgsg!Vt@Yh3WSoF&e9O*ppFqbqob`;?-CdT5!(Gr7mj z$@-byz+=v->v=ZiNhg}7YolMflnz@mp2^`|vQE~NRrLBjgz4#lh)KcK!=20SU znqG~B)IscE8N#!W8hW5x)>wyXeF!GxuaUzEMJ{=7td|c&ZF3AA;17o>i(I!2BM0^n zY)AE$V)U?Ilis9`Fav+8{(K3tU|-GL?pdcla$PswonSw_H#<2>tO$@mZFZACwA!gK zq#S%u`Ez+z#db1!@I)k=z!~EU#5Qznc@1y_$N{%5O`!#_J@2Lc);TLqOUkE8hAGx9 znGAPOC)QToCx0UGUd+7##y}EWpMC5oT1J*-(ekK!M8%^!n1Kx!M^)XxSDstn(`8i- zJ3_voB?*sC507yF*hrBi+oED0X5MuI`le@~ilFsq6)_9jcK4vp*@VvMHd`Y{;JqyY zdO{n1)mS9I=&LH7qgg-r*Au`nI%hp1In4PqeZ(Dp0ZPEHd@XjSoRvS!MNToh6yK)u z$F@?JKtS!T{?nyoE@vOj#VnB?TH9e}>ld;v%_^4ZNVvEsz!lw^^Z~{Cpgj&7%S*wF zwuChS;5qbJ z%k?PUi&HY&9%?Vgj>1lIrJAB5orBOpx~K-w_0k#fdPu4rkH|oe@ z!8#y*ZkL1{!o?`WqyJ=NyF}zYV2^{;1*iBwW*Jm>97~dj65rV_c7!6!w=aVgvYg z><6q#uVB~ZNYuSqz?&?qM|qFPNau#~$R{L=eH#42dMbtnRXgLe-hys7jc&*@(%(cV zauK6^O=50g=JN);&1#~kp5&{AY3DlcD64q1B}RF3@+_E;o$=Mgz4r_{*Cb;ck|j~j zlE4ku%Wue;;Mk4R%RN`&cmGA*M#|AN4_SM88(kPr*gtwjbbBi^>Ez6lBe6$dH=o39 z-xZnFdS`uSMZn(uf}Z@o-VQ$KTFPlESlW?D)NTMR?7MG~E`siAfS6^^v={md@?IoK z?==q^Kl>}|WZOqtk$3v8cQ7VxDY62-g+lf?KExZQQ@|_RD(ZsMHxV8FE8WMpPtB$U z^&}ZX{sj@LkZk3-CYs3;7T1T}c-1nhGppz1bt74hr~!PwtS0m5=4Lc@{p`V06E6qB NHxSFdMQnk>{eRu~DXstj diff --git a/kernels/sgemm_gemmini_dma/input.b/128 b/kernels/sgemm_gemmini_dma/input.b/128 deleted file mode 100644 index 5b6ff306622fbaf27bd739fdf855245b03cc795f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32768 zcmWigb(ED=7se3~L`0+;24;r2bLYkzCw2hRAt5cHgfvKZmo$R3ba!_nNDD}Jr*sQa z^6l>r7O+^%n)klv?EUQL`R&QVE75^3-~s>Qr7?qSMLUO@Q5`<#Wx^;lg}3Gme1`oE z!6^OGyMtfSLEC}0t9Fnb|1Pl2-9W!!fN$uV6z{i#TR4C-@O>`CXW_PK7a*NWWj0#x z=iAZUxWlZq4F8TH_m^dM=I>P*dPyT2U2;rgnAGd)}mpFmys1RBy4?@;t1=EZ&3&Q^fF;h}zK zoRj|>UQ65YJNQ#gWu?LA2SthsGIyUTnbLnQuRANVMdMcdEU)a>`Y!d=z8sM}8723{{;AkwM-_ zh>3iR{6!ng3;V#&VRo|11!#Y4L+DO-IWwiAWr_W`ikHlspb40l*1~fCXWs1no;bZg zX7?1{3hqkE;El!nyp{jL2HXrfK~1`-u9#`?G-^B?hP9EyXDM{8+Jc#cls z{U$Rsbk~HZ%h#&rRHom;x1IBpiy7-;&^=xtfbVI1_8P5G{r?y!q-k$OFH} ze}_wXDdbd+4d`X+Z~TZO-HG@Obx_xNJg$HpbPRXH54a2drd{Eo;l*m7eL|n$K3m19 z3zM9;da>EK$8vWZ7P%0(ugB0G{7VPtNMAAk&gbBFAy%a6V#84n-X&vgjzaBk{|1@Igd#|L@3 z;%s{aZrKCznYb#qwdq-L7rM^b-Q;$lslrR#AXRk#!xOj&zEp+iUz(cGgyxt`@KxY_ z_?YSk&*&j7;Trj-n)_T4o~q$=!j`nZ;EG7lY=^PfKF~FM&tGYMC~po2!_YRVvQCA` zv<;Vo15ORTr8bz0umRq1oBWMUZ>O)^j_Ue3)O5b$>n0IqIhUvo+`)&Y9Q+#Y1nCU$ zYiOi)JN+;x?NJ@@F88*z{fp{BFblWRWvLC#r!Q1AP>Q$fcJ!L3g_}_ZTPXSxU(nD% zR_<$x>GAP%{Q9svzB-gKBiv7PTMRp+63^SQrh_VkpE{6~iQh%GMIR(C;u;Xqamr$O z=&frbgDl?DyQXj`XF>Lf15dOLx@wm zaS!)Z2-~HO5f)Y*VH5smOK>vkz(Zh9C>uYZ*ypivj+>tc!#Jn?EB40~AOK&bF z4z+`0o_VJ*FVy7+ZW_2j{kc|Xk=(YAs#;(fybTq@^!%C+L|&F5#v2ERKEG0x0^dGM#>(h1taUDaJUVe0Z!Y7Hl0s`tta zu_fFx&H%fP=jmJg9;(6b)E)K~JPIov-w9*9tK9)G%}*98!;Pd{6eu`fZd)q6rE5Y9 zCjwQYw{uQ}IyS8jJl}T5QwaxEeE3f_1HOcg{0*n}R+}WQ!6jfh*SE_#H}_H$`ojCs zgr%DPWYXysP9Z*{*Sa;KxW9zgM^eCeC@H6)1IMdnunTU%IP_yyy9;ewbpd_nt$hX6 zy{BK`qbM+Z6sQ>h5@jm~ys)<>hO?H&?5Yp09 zdy?|#c@VKTRZBfry>q8?XT6V~z;dr=XbK+2n&u!}qnzydWBr4$1HbXkWctjX%o%Na zp3Uj;UQ%7>zRp9%Y__0+p#`dNQ|!;3Ai>n9o2Dl}#dTqi+EST7_rwfNcDl-A=&1CN zZ*YbB44&yLdT`{SUFY?JH|nU{irQi(z6Jx_&+Irl!E5n1XS~#&c8FMy8RdxH<0INE@|; zrg98euOAc$EEN?~7#?9cuLE~h=V2m@P=i8AScTW?*%0vufv?tb_Q+6OK=}q>yD5T| zX|6jbZXo}qw?le2iEfy>I2YD13ognNu`hK3m86^a zch*n6H2EKLPfh^W-wO-;zofIKhcon?n!!#=XFum-P8rjPm6w)&FqP<)I%d;F%0Wv% zgV)ploG#k(&NADRnY%ex>~ouvD!}B(A6$i7Lux%qkH);AKH>LN(Ju7sL8ru1@D@h! zZT&l@&i{>*fu_HXvKEWjZ#5VFD7wm$I>5h5|W$;b>M}HzFD|ART;SIJ#^omWbTVcBRBUqb;nwPkp z)|>LU43NGI{EUUv9{7#QLIvzi8v-Taj0w^n{?n9Hsj&!Dw-1u4o5?WKYl!ddadnh; zn7xoiPEZz%vsXiV)dc(4+`*J)4gI7S(LvZwrFkZQO+Jl?ti@!aI6FZ+9TU~FKI-!Z zzc?&5|Al5#z{v{#xqomX=2fGpNZ_QJ4GaDH^bdUp-%%zW8Y~8_Ie)M#tqksjd%VlE zA<<)S3ckUGfe?J4GT!*`4O0{Pn=Qx%(Y$y1gq8*3{VAp-caJ`$@1Y##<~b&d>Bgn- zIu_Ei@o+SU{|=X6W^J5PsqoS(v+Os# zDrp}y)dSQs=xQFTX1pJ>*bj*hMT=y^DY&%YDtDkf<1X}cmEc>Plr)#O!3wBLRZL%V zKmH;As~6B??+iDP8avppsZYh-h#bxLEP9&1!0Y5ERB->%UGh(c)8T9Qx9tseXs*pj zOA{Yq%Df7)!q|e9+(ED#GV4Y*V`vwir0iZxRVt~R)7P}5#rBSSl1{^l(0q*Y*Xp7g zuhP(WoH6lnj!*o0KfRuIcRkfGdG;m(vjjI~DY| z;p6ZQ|KT)|inIbs1xxv5&9%q`ItK-CE&Z+oIbVo62RaWwtI;Z#oWqRKH&D|C=wFph zI?oJq4xhP?Fpm51P7cy5GhRB*@47gy#?f9~ zI7rnnNY#03OdY?BpN&RS8~Es)Pzz03J_R?RvU`*ttIAYbex=52>PsF6Q<3dIJQ8oH z(eB^^wZdJ)XT9uv8hq!no*vBx<-B8@9JZ@_*e2S6_W47cz3vi7!`19a+eQ7OUT_E1 zj}!GF%&*$1meO;Qb26M_7ErQKG%x~>gwqFBQC9UxPOz^WUT?BdA>QMB&1=mL+OD6_ zNKDD!x;uEcSJ^jyVH?L2^fGs3VqRWF6Wnq5(e37B=Mp%CTR08)6TYKb;Q?ft&8u~1 zS^)d0r#+%JaDw|NkcqoSh+ezpp`Sh;en^K^a%f=6=$iiM=#g+O&ZUk-*5^;_Kf}sq z94CWfRDegucJ#K(nd_8TQKcngYlSPClF%8FnHDAkC5D&rB<|{VhR?uppZXKbENF`J z@mcUQ^+=tQ>B%JaqHFZMo9IN5U;0Cx0BLW}1)oMz3 z1%F9zoyO5tjqm z`rFSAPv~YK!oA!t^HB>d~s@D6n zbvk?|x4E774V{f_fHnMqjngDJ+J0&_$Z4D*>hyqgpT+ufyg^g2a{g52Nv_h)YN!wM z!%G6^Y%VV!{~G<#nHNf7Ci6{uM!iv`sCw4XdcV1aUFk9uhMv5}Ton(acIc8GL7&O{ zsPE6<04n}ne3lqIrHASi!8CqRUa50nYjX?A^GWer3MD;JSNJd3pqTdKFkT?KdsyIp z{BG(>@1(}Qh$bY~g1zAu{H^ylKEo+)OPuH5H%WMfDu%Ylb=7}iH~*n69%@K+F(Q-D zHB9mUr;>^3PB;T>mp^x>Sa&3+&@Ff+oQ20!Pfp`|>;#*#{tFc;Tl5wM@>rb-a?%X- z(D^5!H|z?9;0Lazra_v#<0*kFQ%(M8qI!7TS$8jVqrdQ5^_w5-ow7~kISZ+gaYgh7 zuBx{L@0wGTKa>f|aB{`E3|!UAo%6bjS_Ge|VNgA;DNgrNniRG-<>udMvl>7?UZQF4 z9WLX9uybsAp5o6CU!}BJ0Xua!@l;C73EV1fPILOt%YyY(Qrtak5t{38!y!^Pl^s!QEtbBP9?WQO2qXeae!s?JY` zLfvSyU*2@Lv%ShZ02^fai2~j@GmC0)Q{DyfdOHDhwYSx8D!#}T`x)j=JWF{fYtjx?R6mwFe39-ur{JNQswVM!+aWH#Kw*23u7_`8 zOV!F8mv{0ljiXvvNc2okdhVRXPjG17=R8HH3^jB=Qw#MXEKDzXoI6QnREY(zzZ+ z?L{uiwc%DE9ruOd-W#Vn^n$aFvYpiL@iWEwxF)J8YRceu&RIK%i>e8Eig-yjH|)hP zO`^RHH{l%-=OI-2qsvrVxJ9eHV2qCzmfNG=N;fT(i~G?WR%K*5PZe%RDaE;~0j+Qp z&Vgi_2P*#9oNkQ7-277ND!%2T{2x5SbG(d;!F)f0 z$@v&P$6pgu+D7gwb4E|0(omTDxS9D2la1HgO$4T@myYVte!rsaCg0b$ltHI6RcK>u zK3t9uoUNu5q0Su~7_Fxc(Utg!*T&3KeZ`BnP~UIDsr6a>0e3<-teww?5#B`=bUJ&v z^;!2UG>Ddjqc$r#x;y<9sL1iWU26M&uODuq1aB`tD0oeE#tB?SH|LE~*Cu+OQv1*V z^QY*V;asiY&R}cj1vWBmU<*vf36xEy``I?7ehK^SI%s0k!Y6%UCVRsS z1jk>3TgCCeNbl^296R6^=hgr6SMI1_6`kCyF{R;8J&#`aTcHNN(UZMqv{1*0^LEL^ z(R~~ccMPX_4(8PaKZtjb#-5E4uSL#pMstS#;JMB)E^D*V!UA8?JGB;5x$WpHf3zJJ z=zw`6lX0f0VU_nIUJote#bDixxJPR7UC5b~kyr2rJd?{YYj7Zs#$9@nDjp2uTOE(n zd4L|FXFAD4&GZqf#Q`&vPIF)So9`F=E!+xA+TJ!f+!Td9m8U1=bZYMbq0Zu3ys98RJU7^jYV$*3aN#?Dj^NAN_qo(lW-`3t^- zznL2!B)R*yGB>O6_9yc>XxChq3 z!e*GMCzC`M@rDoLQ)+1%IlFZZ=QDU0jL?3)&wGr&7EBjv4fm6Z!&Y|9EX=9*(`{}* z*WfnilL_q%2170R>tJ2pZa47lBxRHFCOw>sBs|lf2WDe)Q!`xMZi03+OOK-l^oUC1 zH|nZr?YrS16b((lYFIaJ4K?&yCoPlO@=lfGuKI7dAr9{vT1}a}$)TCzTOU`gU=sM| zh$K=Yl~z0bV!^K7e*BAHk(F4y^NcxT@@L;Hi;piMQ-ez^%V=4X;?$f6jvJdSk zycB4lo8mxB=2!A&(jU;ugFba*-EE;EG(sK0Q8ovafxICw|C(JY4>XTP zb$5QoE$vY=+-3!3H{(`(813MegF3iV$8nZOQLOCFr1HRU5$54+evkW9yxpynV`)<= zu@Uz(qxb;U=7|&*9l1f?>=ORPnGsHaKlDnTjj=Yt%WAvW6)}0$1xdsXnzG77+d2Jl z0T0q+okVrjVXp!vn&fJM%&x@>?t=#S*!;j9+$Z@rQw=ju{P}v(#gvtdXw<9l$WIqI zf$g0`>Oyb`T%skCT`U&2@T`|orNf1OZ~Zkrfzy#hQ`7qa{;vjBh|}1>R+dgXj#Kj^ ztjXGYOP|0(=a&69P?Td;X_do%>9ydWv5ovIJ3&oGnfU@+0-jMdr`X`~Q z-K9569&=PC@?X)V;r1}EvV&Do-Xe3$*HF~&uP;gpcSKKDTd<|Ske2cibK91uuWZLq zZ3x49Ge@29%R+LupmW|{(Z#5gxJ1vLN<7VNfdLeP^crom$&EIG@MjUf&Imr*0}G ze#Zv#L5w+OTjDEtNe^TWKFd|`k<6iMagq2`Q=`kEl)ubG?L6FKK7-!T&iF1VnXZZ_ z)U|xst&jUCFQ514@Jl^E@wuAf){+!?4_tA|o7dn%SE$0<@RV9>yF~V={xZ=&<`qVZ zx_JXN>^X7ncJX|M;&uq%)l`x9A+l=#MlLf0no7@?71X4@rK|Ozl$(bOZhw*3)OIHFe9~1X6GY z)!I&@^a&%QkKvvDh30^hbek{dA7E?S8*~tdIm2!C=zP9mM+5oo!yi;OehzK*Do8<7 zq{}T4ueG?i2yY~*{37t1($P!Hsm-tfEKuQ>v09t-k{@2YE7wZz3_UUnX^onU$3r7{E}d`+dUfM2@_+P?{{(i?Pqfl6C3%^0 zD?2ls50ajK#EAQS_KaAnU{)Rh6T%x*j_?6&ExuV^ECc6aQxl1v%H=-L70q|Sjm|Q@ z$TzrOT?1-6dNe!gDVZ4ebOCHlSd@(W+bLhS? zY1OB^&0KR9i<~jvs#!0{0;X%x&p%HPDoC}f|jfG8+(MF*XEVDs~6HhjI z_0mSIaMA%w+tIO9LZ4188&|IG%-j?nKv!!xf264tztoB^&t0^ieHn zr2fVot#&w1`B%Ef8KFJJ>mTOT4xi#BowXT{>L|PK9!doc?qYGd)9z{_r9g@eQ zO~xa=xx67UOX6&NCVg{`c*HGnMf8UAtIU{LMRogbSGpBHhrXp*{ADOzWSQE{tD@7P zi*4%G3YKuQ>7OJAx(kKfpKzM=hIFbE-r@o_EA@yrK+kz9Sww9}$0>2D$zZ>O%J?KS z1It@KRNtK8Ax_AdDY@fAJR9AKb)X2PVFk(jrNQGoRQlR!DiW>4=xo;gP2TuY(TZH2 z@6ZhAsVc0ZxL!Du=K8$6-DNg~cwHyB46MLpq9=dm^zI4n4hLwMI6igwrv6W-;aOBp zcT!iQJ!D3VgZ>nvv2Z*xRnptfp%8DS^^wb*&7|R9%>uO;2iUc!+@Tmtzo}IGlgvda z#L<{5KK@27q~B9^$>yWsK7Lb`R;t8E-T|w{4IYT0XlqQDlmJ!pliM3FOP@Hc2hngo zRIrzQK2-Z05*jJqaF*LYo7H#aAju`y_+g1CmqmS#_D` zn|oZ({fh5P*PKo*-0{wAZ=Lw%V@;9B9a|*odU?!pHI2gj71!sL-e^5T=DhSOUOk5j zb^zVQaq6&cf_-!eoGZQfJ_ex?wwLTD;uMT2i>I)MzlJkV4$zUqur}Hccbk{+l#}a= z_!!R96Uo{7OAhu(_2z4Om~^ho_A*}KAv^^81Z}i6&oo!yswjb|Sq%g5OWPN!LwRaV zop~6K;<&iB-Y0M&^p>AcFqo6WdYeoT=ucOxq`#GbI-zw!w-kr6Qe~g3*}@0hgqhS% ze}v_8IHWrUIsQC@M2>HTP>>#mj@gh{ttaa{f>FC(apHl(XVjk?bI-&jGo0&GJkJoQmMU+iBVeU?UB6pl0x526Qpt4;$_+wIv1a; zP(Cm0W^;N)H_EJbHDX+jmZJ}F5x0x0A2p@<-$-5?k4?2B+Up(U#_s+hbxGKs1iGN# zP%0>mA0#atf$uUaLBGG3?0S$}Jly1i(M}?`a|Q@M0vBy&`FISvZ$8kt>>vkt)1gy44vN z7@tv6uz%E2At{c~ac7=)-NwKLcdXw|HKTD5G41^V;+)o_v4NpHID19W$rC&WKJ})W z-QGNYMD^TWDxOMHW*mkcOi4P(D}>V7fMB(_D;(r{6UY? z|CnAlHhe~}ptqbChdQtPf;1YJ*MAy4=dwH6-hM{wLsG;3oENQbLxi`crf%iNakvlEwtr<&79_^5;7Aq6(j zF833A*G`d{I2MOHeW9GH%iqu*Or2B_ub4{lbMR|i=l0;s-q&~!YPtWyM!G1zeGxum ze{ct?GVT~P13S?x0oO7_+UoI!q^?Sig^E-9Sch~el%}r>XWcKO&^AIbH z#tP@Wmq{-aKF)E{1-_!y9XAAV%1;fIt}KjeAE0WN~Wv04FzzYb5Quk zQBV-_(Fo_D(~~9$@s!q^65kvg8XI~@-SuSHq@U?KQOPXrzdE&UK^RjGrV5vN92$A- zR17@wt_o3*-!y=1dNU%Yh5ulFXai=6H0JhdAioj^Hi73k@Tzcl2t-8faGe|WO^ zs~7m3`-Ri2VEVkfR9R{&^KT=57d?r`z5f0soXqdJSK=P8X@N%k%9N1LXubL^^rgEB ze~X5BGn7}>Or|E{pq$2EFn{!e`CvbK?VvHtfv`CVN1cHAT#XV>sZ!EaXBNz%nwXkp zjxcR-AZ6iv(XYja-k$J}_W*}_4@`{y*WThKlIK?vB5ggz@;aCqcQSMu8bWoQD5UFa znrhzr@j45C4oBq7rMFN0d-{^Cj2B>NzBZB0=9T^v>OpQ%R-=;=F*DAh{h`5lp1Z3w zdLd21Lw*s64cs*u)DK)Lu#;BvY&h?H<^QAy$+V@#li7r|leWWvfOT@}|6(`WgTb|G z0fqglrUSf&&aCY-?(2@X8_ZID6{eehW)P3zsYx09KQKWtG*GeTjs6Ng)$5?KTnmE0x}=PLE9&WGL9Q_-etRCB%3q!ya;nwjRD)m>>glo5CA zMffiqA4uRa;x9YV#!%AhNDtH)mDLpVcSCZ~kELX)IH~4~N~lctz0{(v>q2)` zLiAMT)vg#T?%r4@AZ4Jsk*)S4O~f%tA+HHn#v;@dUPboWLXh8{ z=4mQ}3fKVT)AzYa^ny@&UAce|rVEAWS&%e*x>KRHPqQ_BKgZHNm~A2=Tq}J_&=N7Q%KpCUUu=ywhH4o z(Qn272Dd?LS}dep4{FJ|Y^l&q|1~G~4*8vho2e`PJh^D2uI?J);%7hy(pZg;L0DB6 zmwkeyN(B-})=THUw6BC%Z>G~YWmPu6IPLNNawBlSo>GUMT>OikAeHw=wZnq>Ox%m5 zII7^+;k!D8%FlWD9v{`?iE)65jW49<2{m3Rddq2-b-!*U(`qNxN0jU@SQZzn+tE1p zPH>m%6DSH`ok;CaYng0w^ZEE5dZKB{djdbW_u;mF8)`w-ReOvP=5v%vr95zH5mbeR z{J<3B@BI%>KJwiS7GkUG-{k%^c4rCgREQ49>=>bxGG(^coj8kfVVW*Uxts}93C43p zdskKAEs)OX29?!C>;@~NDeZ8~>l}i_f;r)G-sSp@E+8{mPWS&DVR7yfjCarBY$7}) z>gh+lGAV@3p^u-DS~%sszTP~j7kFm&ibDQQ_{vG9r&_^LyzkuQ2I9;mSMTjG=cE1+ z_L_#FKM|>~S?G7bF;GONz5k%MsSF)e7n!6(=(&sKoQ|Yqw7lR6hzV~BZKSXeE;slt zPZqwn7u|Lra#wF6P757!e)jI0-@SKuPyL~b;roQqoYFM%%hNKwS}#`>BU|x?c;262 z^1wtpHGy?en8kJDw@aSY9KVcg;(?GQ^DN5YeIGSitMbEqSmWH}1?~zwDHBO1>c;tm^f}`N!^IOfnvL*_a0@{gqNXvM{?RJ* zomV!Z@Dth?EQfm}&Amh`0&_Vvp0Gp1PZE=H9q8^i;gJ8Obj*I@sO7S+xRrFxE&iK=-8nVX<7(IrwnWR??2_?r z(_8RS>}yPEW)<13D%?Q;pDiJ+8oG9;lnsbWwIcVzX#V*}54Iwbg-0 za_4)e8I2dMnng{fPc-QFfoW!%xoNA)=TX{b)vxu>{(jldsfnk>1EQN6TgJ=ngwnjzn5LAp3}qVZ;+@536c32 zn$ji7fy>&1Hkob$5lkhur3Ou+4b)a8(R}Q!htn-^u)l8U|4VIUV#pfbR;4VEB59y* zA@fTcnwqqizmPa9GKB-)j}CIHV=nu>Ew6{^pnXo~oWgK9&qAC`t0ZUHth$AF2v6VI zpO5uS#%O|9i`EM3Ul%Ju8~-nHDnR{bvc>PTEx2DKt)H9jIE6%Qe2ax4C62V;<7+w< z`NZ29nl6e(Q*BjEoZ-t+%S&}3@~|xY64zPhhNj_^UbjFY-3{L3c&HQ^p~Xk_7vLiG zTJnf4RMgpt>9lVP2ez1KzRdWC)1K2wa(`a!cJt@BBiZ@LXn$O#CPQbhn);J=!(DaF z-oq2_3D~7S@=|e}AKFyr6S+Gh+aw!tULhdTwVXPV+RAQ9!V>SQ_eQwIdt8PN+q86sVkoaZ zEwueV^2tu%m$t6#17(1L`fc8lI5*VBug(vG#Y{3`OmpI5H;nV7dwzuMFi!oZN0^k* zgA)BXE)vb^_RFKs0q$sshuV)~SRkUP?f3;B!X`5q;9p-pO!)ZW#ab#)~_6!pow1sU`93=aC1KifS zT;M;-PF|#koRRL(Em95cduiyAb1h*Lwa{N<5qmUY8Z7WWb+gGH$O#wCkVsqiyt;0; z>#@);vMba#ydd#YGsYie6ZIFI*Sq3YjogR)(F>tLJS&t%cQd&}H^n53a|Ve^mE5F^ zo0V82ykE%trkSeBO{}kW;1JaT2D^hy8u9zq+6z#e^QrN|f;3T{Fkub3v&~~{WR@Fo z2I~xNCT>Xu{GIfVxbm~r=%g#|FQU>e;Q5&9`nZ$MF9}Vf+3-@%iRjR6A^v9b6aJRJ z=UuX?mqnFO^Ww^f8YGq0dn85e9sJLqX3oLHxWY7>HqjRSQZ`Gn^DBJe9^+p8O~^(L z@dvCzn=nIkFU`Y-eu!gK0m+zp>oz7Uq{n{h3;S1OqJ6=0BZbBB*(z?tCTm#ZUAxwN z%U{?*GK2mc|2F#>x~gY6#jFuN<$G~8|8_FtL1&ATu!?y6D)_M5ljR9pw&YYOx9B+(xkLl=0v*B|N};svitEJCBrOI6d4x_`({>TO7?-dNZF+MWkL*hD>s9_pSQ5PIp> z;PH_~&ep&}Q%rhG1SZ>6yw~rB`}IN9-=?+S1m^ml?j}yoXdxiW;aB)8ALOspP5L7m zwJ|tO-1)Q4dM+=sc4yPT*3195;|HqxAM6%!`g=70?&kE)7`F+b^5 zd`nF0WTs~Rq=LhPJ7q$6$|)uFd%}xCiC$ew;peuG%t(JYUUH+NlYCo?7eUw=O;;tm zEJ-h<_w|LZ%_z*N%0V}~Ga3WObqie}a)(YyPPu>(nyVBPCyLza<|>Qcqg&f<0VgS9 zwyU>N8&tTopIQjdu7UgJQ#!6ciqlicnTG#(Srg0SNvuX~;}awySPrRaxezQjX=TPv ze1ifs(Jx_s#n&b!U6+155_3i`!piW7_`mTWjglF*C$8ZIY8d9UC(WOtVrR)7=6p&M znFMRxyFxZ)4J=Hzieqit;GWLj!wVmjEQWu>%(lK^7_0#9nBJY5@z%TKs-RA9%o5xFWT=anZ$Q$E@psATF ziPg8z)j6*=NT%G1hN&@dSQQIB_LuQz{GEP;OGM&2R-7|k%Ij!`!a=AZQ^!nQ4JVjA zj!#+KI&?{rlMhfKsT+?9)uq;s7iiBf-3m}bYRz-1;2&0RqziSDZK9g_=Lki&OO4N4 z*RF=w`d@q&s_QW!GSFQghjjSSi}3fO;jw%-1wJ@`>J| zs$wA;Zj#Y0%pZIMO=y7daL?_vz!D*Jb2&Rjlb2InA{U(ZvcudELiV;$YiHqzBwAVF zzMjtyyv&qE(u`~Bka+t$B+01|y(b$_b?|T88m)&rR5_U;b_z>dh^wkrUM`iLvzzY& zo4m<5E;u*RSZc|<^oyh#l=8peKlr0;3Wn&0FgmN_bH}w5#XOg1@t@)%j|#jmn9~_7 z$^ARCH+GXe>pWQeRTAzL;IN;~pAgJ7b&iJM3io$A z4HBT4mr?f@&mxO{m-M6kPIl!=c!~ULAho|Qwl^;4n=ISxG)s0$3&JnTk+;!Vy&y6j zo%;NlIFWb6H5o6&_A&P+KY|W0T=@8nTvGJIOF1i@0@;OlzN)K-GGTuwJAmjahD?|v zyh`+;PZ>o454Nr@1+~p$$(PE?DJ##NVV`|N=P1@3;D;&?9-~!~9aoIx)GN$$eOI-P z59ur7ukVd)h-84h@JG^6%I&1)4Kx;e;ZKmowvkQtZQeet;3e@|Xb~Nmy|Yb}&*p?4 z8$1T}+|#Vh_n_3*yfk#s*NKC9zW!7%lkX=z-llPq4{LbjCU7RPmQLp$XD(#J61C8b$X53FQ?-LR5gY5 zCGQW}-)dz)n)7Za*)pHwRW{$j$3Pc1ojM7_sGaQGri|vJ4&n0jH5|7!Wa1$H*)|Cl zb<0RUxk+BAfqzo{PM6&;KPzF7td$6+0{1 zGx0RZ&LCf6ZZMJvuw$5ti zVSC8wrZ)RA4|f+&c(8qLvcPDlsG8vp%%R!|*H;~JbOwJ3!7EiDz^%vALZ%10`tWo!%BdvGJ;xLm_RWu30-c*2XRM}liZ`3y7 z3jTyR6t|l@;54W%EX-ozk|HJ+cJio12S$4toMccQCgLzvDt5mRedEP1B)d0+fvg7ZrfaTG%vV~P4YVnBe8cGIj^mx%XlU0?-HlMSN zs*3#BJ)lq1Kc+fng%hG```PO#x@dRq~(ww1waKaoEV_;5q!uX7Rq^%aMZ489pv->?|5C72~$Kn)pyQKr*`b%}AQ> zJ)$Afc|H8r$g*ispu9?qT@} z676k?NHVh?>j@8#5C7xYez~|ILQ1Ysu{=jq$;xmUonJqsfnIeup)DNuu*= zI^+$Y1GqV6o{&vL_5Uh5%fPCREehiVf)gwdC35xLBjba+6?YBp1&S9BQY^S@NN{(T z;_ejp;_k(H>;0#NYiIT>`F1{jhqhn`3^bO+o7lx9nEiuk!Is7C?z}N=?%hWRq3vCp zFI4+gKm5XO170JNqLStFqgCM z!A4O)yp~qhLF)-RX}#1I`+6X^`3_u|y`+Um zPixVIVzN7xfEfcVxgN=mJmqh~3%zWko-H!qR`mn11di^F;E{TsaOP_&oQ z;4{pQdrocrkwkkeoxsw|s`L_ftF_1t1u(_VRAu1nb(L9IeRQmT^EJ?kP$|mDK611_ zE3zjWWr$gaSb8sVu_r|WQaabfT(h}nl3fUm%xGk%D~NwoIzg?W;v;JY{b*WP7<=fP zCVACgMq}3xahno(kM8nEsY^VI-A-D_w$9LL#Y%8*9_xmftt!MF70qI#eFp7>do+hw zMxTf`tOR|?ZleK`O-bXfK5Q+rE!rF8ha%8(3-D8BTV916^9{6D21d#`Dxk)zFm$H3 z%E-7aDp49SH)#zmaCVo{>10E(i>@9g+=XU0K82L(`F2(3khyypE%%!xt`4)}F zzUFIrLgfJy0*O^wpio^}lbmKuBw~hg$s_D2ro;&{oR>6`oTbzcmd$IY3SB5mA-y?HqJ;79oq)Q&hJ~qN!Dm=G9^1a#$wO0s6&YW0z{AGs3Z~WtHWp`E~zK z^g0`hU>#|u*FTDsK)S7mzZys&t9b?yqOb9y_D(qi+>i1+gKlfa@!Ggu_N7gYyz;zG zQZ>X`y#f57UQe(>K6mth|#nZSU3L9l)vT3j?=2f{2j$|#_mc$4b{V8H_ z-#uo|bf*)kATR}s-*s7<44Oe2{!XqTKg_(csW=c-fL2gP?&lmhQL+CZ zYhOkdL2kT0rWYhooMQTpNPwR)9-7)W$vIVT;m+>V|L+X^t1^m;s-`i-^zy6vz1a(` ztqy?>Y8di~pIIVX4V^1@V4)ptt+e}U%iB!P&4%rHQ|Do(?+$FQ5dd^FzcfLLmVd@CJ6;)_7nlG8`Eoh$|K>GbDt*xf1UNXJB zYYk$BL>M%~gFJ>khXYeP5Ce|D5%$FCu4c1>xRp_w$L?XK8t(;YI`Wb^wxCJRsG;J! z-PBwd@sV%$jIeu%4eln+`{;6T(2~6m;T}9h-|ae{S3_taI~dRI#6t5_L;H~PH@Fl# z=ohrE&A>)>AD{DO`CU5V5B=KP0v*ETzUI)lYVp;4u33P(#KwsH_$^sDW!Ka;f2qR2qJgdVotNOXNB!;Mc+ zeFtNL_+3WD77&Bncjel+RLsrg*myXX)GinJ=D$RXs?q!paa}So&>w;1Ty0rJJQc}o zJB!q1&BZhyo{mh`q1AOxWB_-G>Aap=$u}Vj$!JmW+?uLP{RE1@@<3*EXQW$xNJnQ_Mu|M!~Z}vqBCWJEGfRI%=)m*&xvmhUkY~6WZs$$^4n~+ z&d=VNRrFHdB6$T}kUub|oHt9!V!qa95uV7Z$oD!*Y_v18fo2+dTQ8TF#4_B#7Wh55 zKTH(s$Ts&*Q9^8@*}WynGOHh1Bi6)z69Z`aKxNiVebRmoVi^55BQtI=q}7} zNrAn5s%(JlRa&S}jlGphYSd(>ScZAtSZ;U)3^UBY1khectD?6d1pqHe^TdVL)Z3GSPfd7MU zEZIj7`BIQwU1m3i}@A+bSQBvb>}E30?s4z0hS;ZC5Dnu<`giix!FD z1JJ?uSbgPSw%vEeija$u|Bnht_Z{=TDvspMI}z_MN^&{p%(Sr~VmIcq-s*(B%(fYn zRFo}cZ)AMKMJVe{R*H-=Ol7m?+r89e`B1iE#W`V*j2!lM+$_F{C90vn4f6B1kQ}-y zD*NVWH+olAI{V_=+m^MgY|s6!^r?$yCp5i7PFrAV@=T{h{D;LgT4p3 zJNww}1jx^k(G@F4vfUD(_5EKaU%o(a%QIWjzj zp&4C}*VTXQSfCoO38iB=tp+!7m$jK0aK4tH1z*LN$rmFAf<x&hF%DN(b9( z^m^A2cMe@8Fc+M1$uNwmtx6~TW~O8;hzb+W;a4W^}NjL&p4)HjA5cS+(OlXgzl#L(nb;wV;ne8t*-6t`QKSYxf+_7LQpLZJ1pON5 zBDQ(7EZ{pwdq%?s*CQ)YCsY4sE=xt zc|nGFPE6$ZE5mV3O5|oFuoA_F^V&ggi_MJxWspq5c*2RCFl)W)xRD?fSgA zx-ZiDEpf>l?-Yy)7o(7wuB(z+7_SUJz{UM)2C1p9`LF1PfqrPcj6~=Y;XNiGS}jpk*$3J(@T{y|Gr&?s79uz3h6YoaZ@+Uq?CKn1Q6?ervd$>(Hn3%TOoL za|<+)zHUk$>QG-x8Y?;qrk|sMbKe`o4m#(2zsWrK6^(&UKTwt@Rb)-khNpr~m(CpK zyzp%?wy^3}Z__f%2Z-G+W`w7?h}92$scZnfgvoagi}jy`B9%!<8n8RcnxG7vifk+% zTgw8Yy&aH5ZSDTSKHy1t*GZ(wR!t;g3(`oii-rgbS+#fY-{v79-3=s)NOwBQyoaXO^X5F3kpk(==WCVNAUzW$AueI@B{;x3743aF^@bEzbp^E6qrdyI2S|&ytBf5 znB9@Bvfhdfv<%WWiv^3jO&14B^7BTTVyko$TEp1rTvjKo(_$eU)2ZmKc#(N)sneN5 zc`U0TwWwtMKL_^FSs`AFD*6&~)yL&7V_MuUkjfU~_wWhn)Em;GO|9;H0@x9gtVU*n zSywg3x2G4fcr$Ppooq5QJKJQ`_K)$FX4#N29YsESbE&091nte%@b2y=@{pB8%Cq$D z4t$shL4&B99jB61AHQE_Hm1t6W_A)Pxp|LW3%h{K3$feE18hE8U~PDi-Igr!%oQtzm)wuPEwcwU z5Xw5yKGCbmBV^Yvg8vq7{w|kL(`hBLsb_p+-~dC;D)5*Vq_;+DGj35=sHM@*T zh^!ar%&Lf5BuI8Zny{Oipx4R_GKDNqA@Pj+)F|4T_P6Wlx5)YI0iSUST(h?Le@!%p z6}I(7k_%nK_2O2{WKmLW6z|vqF$#>-$)4rxx?ZN*TG_-^kV%`_*<^~mF8$!a&2rCi zk3$yjLg0f~kJqqK{^ew0IcZwuB3nSKlpawFt*dM+$hWI-@{~X(X@b*Q%!faklO?&+!s%&_3}_Cv zA8GT`=&!`;)`Ub&5>cSIy|F6mWTPU_fW)B77v#8OTp%J;c1nvW@zudSDqG zs~hpbF6i-SiwyENQki`vL<3!iokUVq6PNBqR;fs5iAbyKNltEvB%YfcL^u7OT8=Kl zPUPXLc^1;%=5aln6HLalc$l0oCaK4C8t-KEK-(ZiJ+{}%=-BT#bB>A6n9doJehF}; zI)g@(WRJqFWe2{&GvF>umce{)E>q1_M}JdfjbDY=_56bFv|D@V6&8&Xc0JykAM7BK zZjI#&Ur%ch_{+!S57nKnQDazF(o{qnU+r{uKn+)|yc5+Pv>0NnJs`H$^zH%A`2uOG ze#PeGN7WL3PG9pqB+sB^Wp>W0ia4w8(Qja#?KMv5)kh&Y) z!B?McQ5VsoUt+A$>-1Dwg8ap6hyv`Er~t2f$j?6K z*_{QRvwDy8($YouInDIxsMppzqy~a%Wj0nBsxTQ7bHSqQhMCW^&j>|6X)-HICWw=& zHXM|*;2z9VUrB2hXjlFy-+H;x*Nm@FZ`dMfkzH0SNOo~?gE13S)+fZD@*n%Syl+jQ z^Vuri2dS|JY79uxX*}UPk5~jE>woN$w+C5BuEQ-|Y2~I<%@giJBn}OmAX1s^kqe#S zWR>p+vb@{WcDh%mBGn!a8cK**ha|&EE3GJHgCt_C4BO6wqh?^%-fVXze>-2FLUi(oiJ0cDoDl zL_^DTYPe{mUZ6);&~8ung9Wn(Y4Rtyg%{(>`HoL6l`NAF0{KEm>UP#qz7U;)vn&no z=*xvht5<*G`HOV$2S6CDjkIYQ-!;}KPa&OXX9E#+A+(brv?f@+3y>Q94zk-D@|B19 zONk@?45}!2zUM%x2$K86DQHRIc-|(8k?g!23zcquY=SyL+8_y>?7rqd14S^1Pq*qr zYt3hea;0Azz2tOt#y^~;VH?C4dlTH2N4lPKh~x&Vqb^j{Q>29K%J0ZeY8MjWA*=}P zX}9#H%6*tXf@mJ!41E|4%WJZs>M6SNJ>rYVY)0#7*J@~SUChqXZFf`^^+kB5+0f4U zp!e~X{BLO2A4oRLptb#r%m)7A$Qak@5gXNzxv)ClOxN%Y42YJ|Q<{-U_ zmzGnRUtFZoq8_^@&*O7CCc<3N^fkGNPLBR2{S=M+J-q!W(R{cG@@?}&QT%h%&` z`^NtjU&VBqsB`%Yw#k3gd^A$EZj#$+@) z?2Dc+t2;jMIEK)0?MB)Ca1G0gUo0ch_{k;Oc=x`gz`aL+5Yy-=gVkz$;f zB(}x(RPT6%jL`A+Ug-Ic(fsbm77LHFQLfe<)hF|(Y_BT@ULuK9SxyzJbhsSA9d?;LUl*SqqZ!?6*_F<#p2$WH&U?$YYw=0*i6q-|d?a_6iRJ!pACCL=?iN7h|#;%gn<|d8& zE9Rc|s%|44PgIj;}c4Cy3}E>Dy< zN!wOk^%Sl0A^7#Q-8Sv89T#pqZDsc3pl(7;C;14{$ntC z5#JWg*-&y6ERW~1l^DX#A-U6$`mOOQT3ojJ!zU~%{p5prhgD_I@u~eyedIj6?yc&B z$OekrJ6CHu2DiNLBnYz}vW;Yp^4Vos2b!u!f;{@1IIh8H@L0Z=MpdyI-s^0U%3L}~ zEkm{f9BGQw`$Vt*!8+8*jc{bUxbfPZ~9`JNgO@SBg ze?vCvo$z#@p<`cB)G{9W_lZQ(0Z)^$Y9%@hf8q%d!_vdSYe&P$FvZNAXi~<@v9!FW z11*Ox{CIKQUPa%qGjUPmih8G4A~QZ4o%^h40dQKz`(8zoT)qVT+`Od^p+lKS{xesT zCSr@TR>kS!@JB7-bqZk?p5)GfR!3pJ4>L}2FgagZKSWwugtQ`G)o$5eWJ7W}9z6CY zQkmUBX>CHv`)3NG576Q&I>j}Zc8`Xubm5emsO3W)77A) zB^fvPKD|#KQ@iLVCDCx=*J)3lc7#S%uQS_k2G~TQU`g0T=K0yOQ5H7URls- z3-gj{f$Hgu1l7Y6Tf);>&4F{c%-l^Ms#?~4=ZIRSvf8hT9&%^m%V=|QJ#b8n;K`zj zh{aRmqM4t6rzt8`+n{^pMe<(wKq&kRz;7Bt@-)>n&PI<^KJiiiViU~WViQZXzlr>EI4vCLs1J(Yt%D$}Vy}n@1tQ>Yk&Vc8pI7ggTVIFU;Gwp8QT);cWjmzLL&27AK@%{wS0v3y2=dN(b!P0p zn2Wsi1<v+n_v?eg*K&`=%(r-tbEYAMeLdR?gK0Y$nU-&*uGxod=o@5qHrkZ~*lP2v{D z2k=~$k^{a$c%RPMQ^RXH&8-?D9&X}5o!(jLTSLb4ITmBX=|W`5;#qo;6JEDQo2)c{ z4C2)**^q7o4f{SCtrO%W9vatLM5{VZUYW^G;5}t6ao*Px?%E|$S3UF?9x|_ysK0lf*ohiyYIxK)Bj$WkS~e3GTsn$TUls{e9m_qT1olVa|n)ksy*V*Ei;H zDUB%OnX@lGSlojv_707J5U6Dr^aXEW>lQNDmzCGo8=qE;`I#IL`_SO4jJu`q2DFwH zR6A=IKOS39jZhP@Z=j`|MUTq|u7~_M>8p49_prOgew)H>K zSImcWtLKpzs+xIHz%afit0H?7NBh7V*cw$3pXUaot%rLusg???7}Z^$6)Q;oz%u2* zw|=)S!`|{tsZodu?`d?-RK|Z;A@{tG=v}w;&zGz`zzc zK#dEQ6Xib8xjw3#q`QoC#<{wPo^XoR`M09K{*kv74_HSuY%OP7t=4?7UBN1V z?EC>)5hrOW_%uK1IdHW9R%>-Nwo0WbHdy{6)53jk%uibjkWZ*ce~63`z2zh}P-Tf* z$v%q}#t$`|F0q?aW=#RZvZd}}t%ffWG?f8B?`KFNUAl;p|c3GdrY24TvDS>z6EG>v#0Rx~cH@6ncETOl+akA(u zoQIpuySN`ZV2);xU!hlD1lM4>cqT{FEw~Q|UV-E%V0Q4dm_)b5cQga?G=6a(MQ`YC zCv*d+s%+0Isg2en9>vm$5Y^Z^tP=5CUZv)fa5x6*WeI1e(b0SWSMYDzA|^j*QNhsm z_rkf@K!+&PSI4rwoxm@BhtBSQvKQS18o+;`Xq)C)(iED-arM%kfNo@p{1mv4e)t2B zOVeo|_DxjOEk$Rrxi_O_mlsr}eO6JtiWt?wN(B31rm55qa)7V#?ohR@+WHIK8h;ax zb}o54mZ?)Z6SCgr@NXqDijJ1W!JD0kghO||j<=;Zk>6QiPWNX(d$cBU;^F92whiou zE3vxh6@D);mTr=V{E?)Lm|;}qU3DGR27SF8GTyutw?N&7o3;op<|6SBnsIY*O@i?0 z#mOwlE(~>UU;{x0c7S-iNFLD$HjIBjHec6gviL@CP-LE>L}A&WPVd zzNpq>tWzqiok*lrkeDvy-@-Ptn{a{F$%+0Tq%7`{;balL3R+i9kw-OVp92!FG8fsd zI*_}vlyjN6RB^Q1AJ8<#g2@%*06Qp`==!n_@=F&*ANXE9NH$f47iAp^CS#&2;z=|| z%R)%sv;v>+ckJ`ZEbB(k=LZ;7#hiH1R_e>Fp1<%ZZxB=M!>TJ;tIF%vND4i&r2Pd+ zwan%zBO`bYW7KT12+5!(BBuk^8=hAyahh9ZUL+PTlfUFp(BB?_bG=X3wEC#&o(*;| z4YRI*dwESza?i+U|Kz)TFKAX#8k+75`H#L9FyumH#%A$zVx7JTisyFc9)ArA=LwyY z*l|&ElRO(!88@YetT!}+CbTnl62+@e-e&3=nn;JO;VO@}b4;>Ew^H3P4~xpIt%ZFm z_8$=ywFkQ-?&yZ@FCxX=K}-O_&&}6?YjFTs+I;GYOM|#OKgM!~)4FsCn!4HbJDtxA zDU@0Nj#ru-$i=SsN6IB4Rkf4Jc4d_oS->WN>Y!J+JmIXJc%VyMs%qpz~6XXtoKovlf;=6Rqv&GiaE8mRdYej>FH|O79jZY$@QR}h zFI(k$4=!(iy_aX?*>pd94r$B#+U?yh=>PkB^W8g z3DwknSk{b`cvboPr5@&N58FY`%w0tSoO@zS$>GzPD-o3FOqY8-~k?hbTve#lwy z)f(9SUES=gVuwhIJ%+aGOn-u1TMe@(B5m6lIi@Z$$$OcM)o5RcaQ0KyXf7@WL6RWqlieNv~7sUz-zY`YA9 zLs@@S`IeNxnfJRlE0pRKWL6%aE7?YN)OP~oWJ@x{oX(~N-tz}Y;r9m%FknS_&Oqhg z!!}FsRb>VFf*%Rw@n`ge(iD0I&&$%HwV8}(&GMrE%KzvusKPIhQ$0@4LEBwGqKL2q zx}iA$4BThjr#Fy;>JO;yU1(<~8}Zo5t~X9FJhDjtJ#gB7w|ATM(2ah{6P<0!hg9x3 znwx!znPpyfO~tm9bb%!Hu@fLcL8AXO*m%;+?cC}iL1}l1jKI)qd zZ{{Lfqu;RXs;tW9oI?h}sAaq@`6hq)Q)Csom8S8BF;ihRoF4m&#q!7s-WMqKj0yi#SAen zasYZN$LI=T=#%al)TAd!Z*&){K~3_LZhXJ_iVcxVk+34}H|(CSAaDW*i_h@q1huD@ zJPn8DUvPr%=!{UPUk9$@?lA;xhg_I;*I+Z-ZMs_&0*gAITtl0Ib21GcRMlnTiZ33_?}NM&ouQw~^2iQ+Q~_N`9+s12u$%#I<`|F% z`a+``f*aQaIJ2|C8gK73rDc(isTCOPTooN-*W2$wSZfoRY{jcI*lu}J7ZW}(GVRAo@DZRX|6NIO#0t30bF`6xk-Gq0n3fm}>@QyX4DP(PYN036# z>+WKStnNepT3vF@HrAPgppkw=o-T(~!B~lg?+tq&KjZ1ek!;rYz_8s)3I>{qr(`n) z-HOgM3(F;Fyyr0vsZ&m(9ml_u>6qu=lUu$y=v^%$e!$#Xye}}w>p<-x0nPqfMwIG@#kit z36QKe`1*($o}4NjT_Z-)Qr2DTKkb)$aDTLf8+y+*B;+Q5S+X8HtQF7)K*bQ3_y(DY z?g2G3zkDjntMa@9e<>>HUvyV^Gi0vT+d(mrEFAkGJFt9G+jaOxQ+f#B-w3<4tR6Ak>?y~}tRf)pIQ;_)eD_r-63;1t7uZ%3 z1!8nV-4?lmFFeM%i~S3$z_Kq$PxIs0GP4Wr$yVGE+d@}<;r2KK(A^rYGFTxj0z3M4 zT8H&p^x4ZvV#cs9XAu(wVX2WDev$N|D7@8L^$iySAY(cEK5O!U+gT8yM-QpdTSD`g{>DG)!*!k+)V$_oy~ii zpy@W%scY4=&#MM}9qmfDJ3rMgi~i9Cj~-2K&zU3chqH z`zKf!-;j?e!Aiz8p`}4IJiyW!bLCkyxv%mtkdK;S15Kn^2h{fgdWUxfO|Q53dZIHo z285q@ka0Iz;b4_7*YotbBELi}u(^9VWpp0V%sGklWeRACFU(W^&cekFZwQ^tPa;oq z6r173V0#7;2SJ@spr(ggZrRhWqs}7Vz7b@xPqKPkN78q&XcF=s9E z9D~fR(9<8<&?#7wjKDUlE%ckO5UBG{WhRh!x?7>~XOVdBu0uq39u0a+CU7d^&;d*ya)<7fw0HKQKKb0W_Xgspft#SiO~+@PB| z?>vW8ALpZQh8iBI2Nx&7DsAQPmq&VS9q&m$;oiSa7ZR)0e=5DbO61cI@Twp25v-yg zXC^s~o8JL7R!&7;?KNEN98dsi%7fyp3{g#-Kat-Zu1=W4T)&I8F-zmmqJ6)KR!3KV zCEBQ?8RL(|ZnGQQqEz_x0r*r6)Dw`dO3^AHUiK%=u`6IA=<8jqE7*{hJM=TMz->e$ zYmho@N00;NLhpQYs!mXkp@_#~vt18T(@J0y`Alzl&BzEi49o2?xJORL_JevdnACBO zVc%L6cVFnCx6nSU0D|@!S_J*-L++fof3hOl9qg$feWDh<0Wak``{`W64ALL{&{F&z zo#kV)s_O{IANzPoaamr$2EggMDQKZTT{WD#qP{yDyqg$NH_%3XMKW(H8>OJfL;rM< zsj7>ZiG=w=>@^&sZy=!yDsnNCyr-8{3V!opC>~5AwNti7wG)toLLIJfmZ^9g^$4`YK1_DBd@xWS^E!OGJOVS9;`9pocdZ)=JA%i1AFh{fa? z{{83C8J2!83!|%`Waa{`?1A{M3I&3pUKf$Wolf9${BqoM2ERjAVwbQ@K452UHPKh18-~Wf652t*H#WcEQzZBda7nuw81YilI}Gg6>Pt<99s>;QqB=~aE?K)z%*#KGbS=!bOL{B40Z1s8cFzBYujo~136<2?YH8O)fCZt1n zVID0HdFRr5q5>}h$LlAbid{Nh>maQ}((vc5+<1mPQi}sO=yz|Z>!96VMuQ$Y(@Doq zst$ZI?MwPvi|7;ELZ7^y&X1(i2wp$XfYmTpq4&{-k5mQ3WF9Bx8BK8l?L@xNK<}<} z(fJ}bve=*Xv;Tjmbz>vMYK)n^7+V{&M%5F)WHO#+F_>oyvhliQ(I$Kg*l2s@Q2!xe z%dX(smQg#fy|5>p>A#~oQ#T#p+r+bj9eYi0S9w`4t0K7$TE%uX2U|9}BERWlo%9>0 zkF^dNmUUs}Bxm#4PW^|903o@w&Y~=Q4;Sh$yp#Wk`X*nim+*Ak;?=Ek&zCivp=2rf z4z0Ve3|7PSI<^?!mbYdrxrg4uQ-8eCMcQDrf7Wk(&E*&0VQA0eW0sL5+~~%EP`dtnjCNr^c^ctj8Jf&PFljwykhfDX0;c#637f^*us?YBh!vo`)PTb6!8VY|?%8Cq$^wpToN6KRqUZ60%?r#i z3&?a9_8CNPHD80zP*7$hnXThAswlyPeFb-?R2~F^Tw(QA&4Fj$9;fj;lG)Xh%_keg z6*Ri1o7Yt}-VUU!O{}JxZ2g1m;~3^wL8^%u(N!QTvNbsgd zN=(_AaoetryvJw`0=?KTSMjCTky%w>qX#zQ?4_UVapbL44~p+*RfG&w>&?Tusv_o1 zQqWw+{*~#)L~MLXf>SUEJ1jrPJR+N-6V6;>T8ss|~FortqKM!qC8b`<{t;I>g)4z9%@?B2`%qq{lwn zWz`LI$NRaTB2RP0DncJ%J6bDv9Tnju?PeWSW{?Wk@td(fpryO`N41Dn=2>JGr=CnJcCpT^s`^e2nI1Nk QOpcvOdW$n~qw}i&0cv@jKL7v# diff --git a/kernels/sgemm_tcore/args.bin b/kernels/sgemm_tcore/args.bin deleted file mode 120000 index a4214e3e..00000000 --- a/kernels/sgemm_tcore/args.bin +++ /dev/null @@ -1 +0,0 @@ -args.m256n256k256.bin \ No newline at end of file diff --git a/kernels/sgemm_tcore/compile_tcore.sh b/kernels/sgemm_tcore/compile_tcore.sh new file mode 100755 index 00000000..0d3403d3 --- /dev/null +++ b/kernels/sgemm_tcore/compile_tcore.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +archs=("volta" "ampere" "hopper") +dims=("256" "512" "1024") + +if [ -z "$TOOLDIR" ]; then + echo "error: \$TOOLDIR not set. Did you run source ci/toolchain_env.sh?" + exit 1 +fi + +switch_binaries() { + local dim="$1" + local arch="$2" + dma=1 + [[ "$arch" == "volta" ]] && dma=0 + echo "dma is $dma" + if [ "$dma" == "1" ]; then + layout_a="row.swizzle_fp16" + layout_b="row" + else + layout_a="col.swizzle_fp16" + layout_b="row.swizzle_fp16" + fi + + args="args.m$1n$1k$1.bin" + input_a="input.a.rand01.fp16.m$1n$1k$1.$layout_a.bin" + input_b="input.b.rand01.fp16.m$1n$1k$1.$layout_b.bin" + check_exists "$args" + check_exists "$input_a" + check_exists "$input_b" + + ln -sf -v "$args" "args.bin" + ln -sf -v "$input_a" "input.a.bin" + ln -sf -v "$input_b" "input.b.bin" +} + +check_exists() { + if ! [ -f "$1" ]; then + echo "error: looked for file $1 that does not exist." + exit 1 + fi +} + +for arch in "${archs[@]}"; do + git checkout kernels-asplos-ae-$arch + + # re-compile libvortexrt.a + # FIXME after restructure + pushd ../../../kernel + make + popd + + for dim in "${dims[@]}"; do + echo "compiling GEMM kernel for $arch with dim $dim" + + switch_binaries $dim $arch + + # touch source file to force re-building, as the Makefile does not track + # binary changes + touch kernel.cpp + + make CONFIG=gemm.tcore.$arch.dim$dim + done +done diff --git a/kernels/sgemm_tcore/input.a.bin b/kernels/sgemm_tcore/input.a.bin deleted file mode 120000 index 25594851..00000000 --- a/kernels/sgemm_tcore/input.a.bin +++ /dev/null @@ -1 +0,0 @@ -input.a.rand01.fp16.m256n256k256.col.swizzle_fp16.bin \ No newline at end of file diff --git a/kernels/sgemm_gemmini_dma/input.a/1024 b/kernels/sgemm_tcore/input.a.rand01.fp16.m1024n1024k1024.row.swizzle_fp16.bin similarity index 100% rename from kernels/sgemm_gemmini_dma/input.a/1024 rename to kernels/sgemm_tcore/input.a.rand01.fp16.m1024n1024k1024.row.swizzle_fp16.bin diff --git a/kernels/sgemm_gemmini_dma/input.a/256 b/kernels/sgemm_tcore/input.a.rand01.fp16.m256n256k256.row.swizzle_fp16.bin similarity index 100% rename from kernels/sgemm_gemmini_dma/input.a/256 rename to kernels/sgemm_tcore/input.a.rand01.fp16.m256n256k256.row.swizzle_fp16.bin diff --git a/kernels/sgemm_gemmini_dma/input.a/512 b/kernels/sgemm_tcore/input.a.rand01.fp16.m512n512k512.row.swizzle_fp16.bin similarity index 100% rename from kernels/sgemm_gemmini_dma/input.a/512 rename to kernels/sgemm_tcore/input.a.rand01.fp16.m512n512k512.row.swizzle_fp16.bin diff --git a/kernels/sgemm_tcore/input.b.bin b/kernels/sgemm_tcore/input.b.bin deleted file mode 120000 index 69136c11..00000000 --- a/kernels/sgemm_tcore/input.b.bin +++ /dev/null @@ -1 +0,0 @@ -input.b.rand01.fp16.m256n256k256.row.swizzle_fp16.bin \ No newline at end of file diff --git a/kernels/sgemm_gemmini_dma/input.b/1024 b/kernels/sgemm_tcore/input.b.rand01.fp16.m1024n1024k1024.row.bin similarity index 100% rename from kernels/sgemm_gemmini_dma/input.b/1024 rename to kernels/sgemm_tcore/input.b.rand01.fp16.m1024n1024k1024.row.bin diff --git a/kernels/sgemm_gemmini_dma/input.b/256 b/kernels/sgemm_tcore/input.b.rand01.fp16.m256n256k256.row.bin similarity index 100% rename from kernels/sgemm_gemmini_dma/input.b/256 rename to kernels/sgemm_tcore/input.b.rand01.fp16.m256n256k256.row.bin diff --git a/kernels/sgemm_gemmini_dma/input.b/512 b/kernels/sgemm_tcore/input.b.rand01.fp16.m512n512k512.row.bin similarity index 100% rename from kernels/sgemm_gemmini_dma/input.b/512 rename to kernels/sgemm_tcore/input.b.rand01.fp16.m512n512k512.row.bin diff --git a/kernels/sgemm_tcore/kernel.cpp b/kernels/sgemm_tcore/kernel.cpp index d3bda941..38830b86 100644 --- a/kernels/sgemm_tcore/kernel.cpp +++ b/kernels/sgemm_tcore/kernel.cpp @@ -91,8 +91,6 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) { DEV_SMEM_START_ADDR + sizeof(float_type) * 2 * (2 * BM * BK) * threadblock_id_in_cluster); - MARK_BEG(); - // NOTE: hardcoded constexpr uint32_t quartile = (128 << 10) >> 2; // 128KB / 4 static_assert((quartile * 4) == SMEM_SIZE, "wrong quartile constant"); @@ -121,8 +119,6 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) { threadblocks_per_cluster, threadblock_id_in_cluster, sharedmem_per_threadblock); - MARK_END(); - float *gmem_tmp_d0 = reinterpret_cast(0xd0000000UL); float *gmem_tmp_d1 = reinterpret_cast(0xd1000000UL); float *gmem_tmp_d2 = reinterpret_cast(0xd2000000UL); diff --git a/kernels/sgemm_tcore/sgemm_impl.hpp b/kernels/sgemm_tcore/sgemm_impl.hpp index 76d3755d..10829db2 100644 --- a/kernels/sgemm_tcore/sgemm_impl.hpp +++ b/kernels/sgemm_tcore/sgemm_impl.hpp @@ -136,6 +136,10 @@ static_assert(WMITER * WNITER * TCM * TCN * NUM_WARPS * CORES_PER_CLUSTER == #error Unsupported smem size #endif +// timing markers +#define MARK_BEG() asm volatile ("slti x0, x1, -1047") +#define MARK_END() asm volatile ("slti x0, x1, -499") + enum class MemLayout { MN_major, K_major, @@ -1220,6 +1224,8 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, for (uint32_t block_k = 0; (block_k * BK) < dim_k; block_k++) { asm volatile("loop_k_start_%=:" ::); + MARK_BEG(); + // producer code: GMEM->SMEM memory movement // --------------------------------------------------------------------- // @@ -1395,6 +1401,8 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, threadblock_barrier(threadblock_id_in_cluster, warps_per_threadblock_per_core); + MARK_END(); + asm volatile("loop_k_end_%=:" ::); } @@ -1422,8 +1430,9 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, asm volatile("move_out_end_%=:" ::); } + + asm volatile("loop_mn_end_%=:" ::); } - asm volatile("loop_mn_end_%=:" ::); } }