From 43ae82e7888f209f4a2df39a846f135d592f0a80 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Oct 2020 16:40:50 -0700 Subject: [PATCH] vlsim fix, verilator fst trace, use ram optimization --- benchmarks/opencl/sgemm/sgemm | Bin 45952 -> 46048 bytes driver/opae/vlsim/Makefile | 10 +- driver/opae/vlsim/opae_sim.cpp | 176 +++++++++-------- driver/opae/vlsim/opae_sim.h | 19 +- driver/opae/vx_scope.h | 2 +- driver/tests/dogfood/Makefile | 2 +- hw/opae/README | 2 +- hw/opae/vortex_afu.sv | 298 ++++++++++++++++------------- hw/rtl/VX_define.vh | 5 - hw/rtl/VX_gpr_ram.v | 135 ++----------- hw/rtl/VX_icache_stage.v | 4 +- hw/rtl/VX_ipdom_stack.v | 4 +- hw/rtl/VX_platform.vh | 2 +- hw/rtl/cache/VX_bank.v | 6 +- hw/rtl/cache/VX_cache_miss_resrv.v | 29 ++- hw/rtl/cache/VX_tag_data_access.v | 8 +- hw/rtl/cache/VX_tag_data_store.v | 2 +- hw/rtl/libs/VX_dp_ram.v | 48 ++++- hw/rtl/libs/VX_generic_queue.v | 2 +- hw/rtl/libs/VX_scope.v | 5 +- hw/simulate/Makefile | 7 +- hw/simulate/simulator.cpp | 73 ++++--- hw/simulate/simulator.h | 7 +- 23 files changed, 424 insertions(+), 422 deletions(-) diff --git a/benchmarks/opencl/sgemm/sgemm b/benchmarks/opencl/sgemm/sgemm index d8d18df94950d1f0aaacb588a1fa20d43ee40d1c..644db9d7ddee9a375612d65afc3e165e1110bb99 100755 GIT binary patch literal 46048 zcmeHwd3==Bx&QmlkYo}d35jUjZ~#Hpgs^V`LNYLsu*fF3Fl3pKz+`D=!s0E6ji_mi z1#Q(#RbBF`;Go1wcOPy01i%2>lW< z;Q=ayYZR4|%P;?_9{KibzWtidgcG#>CN$-v82(mjd~#S094`}+me;bEW3`MQ9V5wJ z;a&x{OlbHn10VG~(<@gKF4y^`vdem{zX?q}6h#`Ur%o=4)Q*odG_`FVzjgZ5@lz)k zMw<&KO1UYYv#GINFB zM7%FKffRDs`}Re@ErtGU27dgKuUP>2lYvNIokEU<@nLC8xT&mCjR~R9hQ{WmP&8K6 z8ViN2P{qo{q1te3ctb-p7H(a+xGd7#6kb_X9SJL3pO~7+@^BE_%R!t<( z^s%;ZTX=c6s&;-`U0t{}4qH;bysBwKoV2VpTontKH8(a^HPv25F3>J$XljVoBciZc zZm14mD};YBpmd&P%}ufJ)&xL;C{0!#-rP__(F?<|Ws#~_U2|(=MS0Z8A}&VjetBy{ zEbJ%`E+xUu<6w0j%UYW^v{p67bz2;6Tw1*`Ttn^Vw>3m+;}k2xF;%M4)(wsl8hv>~ ztbTq&Q&sCWR@dg{RumA`#Y8k#TQhbn>u#zQx?;(S*wmW(*5;<>Nukvfizi06q1GBh zHIe3;O_Q3Mw*)qmr%{n3wxOmb6lJQI@EQdVZ-qIdt+2jT*NmoN)y2Y*h}F^-i(0XU z#<11W+RzlMvs%Mj8b}ijhc{Vu$RirI8ml76%Ys(n)>f--3vFiAp+;-1P^e~W6>F?2 z(r^u0Pl!3{hSpfsfNWaB0EcU7xYM!9*#CX^}3C7ToleGLLUMg*%HYk1!X5)7aC)ne7tp_v#@tKI5 z?z!!}226iE)(krRG}if^%MHu6h^TZRZvjoO4@qZFRvp4>s3M z7u>WB;%;)mId?VJZWr7=9^C1IyXS}dTyV}g&Gmo_&bhg{_PgL_o=w~V7u?KQ2|wt9 zb3SXXCtdLK3=rWF7o783a~*ZTIsZ4;i!M0V2IlH^!G{X#_tV6|eiRyXr6s{3JXWC}^h@>B78bYj|gOYxbX}WIDeo23aX}WCBK1qL-X}W69 zZb^TMX}W07PDy``X%?raMbfu2O_%IhFX>M*O;_w$Ch1QwoyGJ#NngV>U9x9_q+6M$ zEB5#$y^(3UU{9{3tC`Mb+LH7YOw$#6j=cxKh~-Sv1$$nU^rcMG^?Hs-dOp*1xt@cP zp2;*_t!KZaCo)YJ>)9viF-+67dUi{C7}H!9_Ux4Oc}&yAdRio%$2485XT7Aen5Ikh zER(c{X}VI+JW0P>h%{ZOXM&{v$~0Z4$1mwun5N700X?Ec~$NtIk zKg+bA=@%vabEfG+Jx3({DAU83J}Bu2nWiiC?3eU+n5GN$?346YnWpRX>_)oy^@@&X z)&)EM5NvIb9R#cjcBO4Y7@Tn; z*zt1gTr}x-$03O(J@D#NFV+?2Ty#CM!2I@0Z}@b0To#O@%} zeA>_n^^xvQ0Cvrwn4xp5I@*g7em`Tq*DY#?A?Uei;1q#aM20K@g>cwCu z^jdsEWe2K_4%JbiQgZKwoCD18XOBt2U4`yg{9JiQ;7EDL;-?P>PLifGaB^1QxtuQc zQHKL36zIgPz>%D;`;n;2dE?+)IfDYn4+oAZ+_71Kr?;=`3>?e(R^YjgN5O9&3Emm( zI%^m4qNRQ0t(;sRN(qvuNpdWSfgZ9wCE{DWcSrlX3BaE{}y+5Y<~$kNFs2Ov2Nl{bgnvyOh!N2apbeb zM@Ao+5qK%*^9L0#>h^PEQMYLCIf&oFIBfmztgX(pPq|R-8nr?yE-m6ht=A}@LRGm? zt2C7F%Au{Qm9|JP=_??iwc$H68N4*J)uzl z>Ee1yqrRq4kGoI@G-|g(ecOe)Povs^($x^W?zIVUjgB9`2A>FaeE&rRkFc3^zxd9n zQ&71FL9lBVy*SwMDEzISLb^(B#URqX0Jht8Fy}J|uRj>mo>|i00(uumrw66Ebv*G1 zeXn~SGVKT)Yu|p%ij6~q6Jb|E3WR+ef%rrB=T4x@D4-G$aH{*)$bEOPQ^b`D1v#shDIxN@f4W0j6k={2kesIN4id zmd+#Xr=wuf!-1n*BOKo4GfH%(eVvG|E<(`39IfbhQZ)Gqh6QO)5v9P9PGM3CkXRJ> z9dd?k{f+{2sKCRhvPVF2s{2})0}31ocBIuJpToQQ0V<{MOq--KCZx}d-y&~E;K@#5 zQW=w2GOkk@-;b7{D)pytBfByu<8+nrlgRk3OPDdI9y;R(0YHffNywO8G(G4VmGQw& zVNw~BSTY{3GA?v7u4KkPN4M|@NKSQk!2l@nL1g?1$){u%IJ#c|`0%ciXmnbMDT4fd zIe3CypOL1|iDOq6X)deicoPG46=^!233d%WHBvVU>i0Qj(f)WYjB}t z{!#MDEMuGp{szrPIy&}1W$1VSI{pNL(VzX0!LEaCIo%eh4zMIW|Cs@Ebf)bEs#Kap zVmplh?l{K=WY7`#Uiu z5cC31kssy#71iwsJkTkms;nfDOunHq`9h{C>%+$(dZv@fKanTO`T)xMs}XDn^N=+s zyC~Yv0D!Om9KnMGIpgz_kkJviw|)D)Qk;7wr>M$lvXj$t=5z$^p^9@Gaza0n7FAL| z?nq@f!g92V)P3#S_leYfBK2w|^|#1hHRO|TLFzS*)DkWALM64(k!o7&1tN8C`}Vye zb+1TWsHEQHNc}RUUhGKyGnlCB;fra3&(J1lXb<+I5xBCTx{raSBXDQ?_B%!Jog$d+ z9u8mR2wqIVPr+l95pGd}1DJ^%?mmPB?EVR6(y2h-RG=*`&|L}?0Z7kA+P9yaoAar5 z=+WH-@o(jfIS5^6fdB(`u;$s|p+9)k1YqD2^nh>WtmCx7>Mo5FpWPSnP!}--zB|0P zz%*NEIH|x~A_}`4-gVq*pi{Ph1)N(MbbW>hnhlDI#Rf@90=qkfMKx5?h~>t?4Pd#y zrJIV*|0^uF2fnNe0B z zn21h~NBxM3PaWPhN=Lh2I0lA}T?I%(<6}dqc-nt4=;|UpYKy}9Hke1(-?u4540e6{ z_UlFW>qYmSO7|I#?w5nI`+2zLBM^G3`%V;v?o`hVVfhDu?g;E`-@a3X>=YsQC?Ut; zWXjX?C}g)IWCewE52OwqyK<>)*Jq?WT`JY_0!9A?$^yS$@G1cGHm^X}D5wO0-T~-e zgYcfe<_y}kmAFn7bnH5cX8G5gTnW(>hCsIiv}yPDH<)qBZ~(f8vMxIZKI=!RI;VVB zg0$-;c)w=wHpS4CyJE<<{m1-oaqhy1nByf>EHZi-Ix2H`!P0#lT<8&yoa!!zA$Efx zmzvyxgkG1Jo}^Y8svfk#&cP#?q0eGw_ci{PM^Doy~Oc&x6^4o!p()8QBNu}VmTpZ?CyZ8wjZ-GPhADRy~%u_ zLsxahlc&iJ=z~CaJb`kLgW?C+GP_@SolBb&c6;Es-4l@VDjiXvY#5D;(O3Z%GQxKO z`M_p%e+PW84uGg30ClgVd51wo5IRdjtU%k2!J_!(B0w@1llI+u!FWPIomk@chLB_h z+-W*i)0kzo9R%%@T$y678tmeF^>Fw1scXk$zyM1O*3$R^a~CnA9}9M1oWQ`)h33@7 zS)w#?Y1^DMwm&}oO1&Cuf6ebK+hsy~xVu55Q*|YCtxBJOv}2HlvWT=50KZRp^~Qk8 zK}&l_`u0P?2Y)^J2mkqr-=1Fwf(hRsIF{@qJJt;9~zw&#%9I!tTn6j^3sk|rK>AC-Y#8L(eZpKc5FsWKUX$n z#xq<|Ir`<|Uk4BU)634RyX#eCw(IrS5W`>D@k(XK+vSjP>a70`wm)VEXS~|>3T)}S z^6Jv7O0Omq@;Tk8;&J|wE&ALp)KELlzhqTqiHt<9}7{g;f0UgB>J$J%fVS?eEB zI~2XXxD%UU@$bY-XxB>gqcsiQf+ zlvBu2)T&QTr7$Fha8fqfIq_3Rh@-T)aenBsIIuXhJWyI5TDqii?Kpo^^|n|zn#BHo z{Fg7USQ$8-09}k0bzNDV2BqpD`%ua@Kd zsA?UwL+|5q$cvxAkMkS5N#kC*d=)AJ!cDWT4^>IpU8iE-_mykxnM!VTFS&uO;hMHs z*dMD8>&70}KjF57q!O6cf!a}OZiqBjS4Bcwnp-#NW{y+vL;#BDE|R&WAwlwJD-fw_ ziLy7uIr~iih{y(iV>HVuZ{E@*Hm?mwWAK<*Lvxchd@sAlZ?Q-aJQJ*av9_uRqFL60(u&GJ z`B49O^ty@xQ;p75Z;M!E5f4)yAk>^VdQ1bRPAe)TlUsm`}_eJzTI|4F5x_k@j=%y zB-B^m^y=)9^z|>HOvuB^&RvjQaA{8eExw)B-1BCQnRM|miXcrneotV>u5w|E2D{;qz3>`^zT1+ypIkRHsQ-kz~IdtM}a zdTI8A((F<5v;Fh43qU_V+b8Xh_aq;L95$Peu8%D6kp(`oz(*GN$O0c(;3ErsWPy(? z@R0>RvcN|c_{ahuS>XSl1! z%4PD${YK3iB&0ZZlk`r_kK-~)8$EFBE9na~9$SKvcJ$W5{$B;=IrilI7VwUCT&s0h zr^C%Uyg`Sz>+s7u{DBUi(BTU@{F4sf)?vDAe&Km%9gfuDR2?qR;c6Y$>2R|SZ_wfG zI{dN@f1txBbohb}|D?mWb(qeLVO(eHaHI~W>TrP$SL?7&hnsbHgAQ-kp^}E@_e=c} zU07B&(?1GNw~zKuE}ULC#XoUE@stTuCrfPdwV~cc0WHj=bFAFPG@&du#2!ehY{RNo5Q-Em~qr>UfFAWb0f+4K{3Z!ux)Kdh9?02(73z1j9E{V>< zwt2tZ61^X}_xm)X>Ano#eaz5KzY#*y821cgYy$1>8-nQ3^tVeqmX-My44?k@bpTtL zGtn2NpS+tSncst@(*MbLZszW6#NT7QAoDsDHvJUi{;Xd@|8&b{VpP@|w3BpO5)-oC zWWpnf=~2LoV67^XX%yghlja#y+so5Y&#Iok2?WE`19|LA!-~0T>CmmYtP!t;#_F&LE@&Y1r#vS{y4~XXDy*&itNuLac9;lCMHN?U)EJj6iecP ztQsaJN@9Q3MkXfNJl8yswGma5K1FmpnDsjH!kWGtFR;NJkH_&`5dl%d* z=Vut>(o1c^bN+*+pReMD7$VclB%a~>I)tPh0ZFb6+owIi>~p^ZW2b#Zq8Gx3Xz{Lg80@U4lLLOYrGOCAj^l z1fOM-9_;&EHtavxch@`#KEGaqeb-5F&utRiceeyz{P?)2@+V`Ys7(4v=8hITFksCPB$~3Fba4!MukhnExW4%^BRk z>{SWM-?1_NgAqPhc0 zc1tkl^XNzi5198bOfYyr>0t@x|6YQ!lM=Jmqe(?)=9D4ry@?0MdI(77@_ z>}K@DcDc6+t%TaG2W*0(da&jy%xz`aqL~_TP{&`^T zAy&#ZR%!lIj342UlSqyoQ!A5r>v!L)iV0przx^1)WY41Yw*%^u#q z*0T?N9!2p^-y2BsQ^AV)ulEdv>PGBNTLjNn`aQl zGPThh0_X7VdL!)FEAMA&* zjS3yc?-wNb2Xin!_z--sAD}Pe_i7xgeeesYd1Ae&!%X}%ulB(P2x9I-fw_cODVy=Z zLSV={7nn;8Hqi(Fj&7L(s)6Ry%UXbVXq){PQ~?EW)!AvVi9UD~{R;)$1N1i?5;39i z!8vF}BzXepXOc*?4_>d;=mDC;tEe$N9hPx?a4(EVDFaaL=iuieF+R8us#0C!gPkB# zY2}08f@G$R58jA)EohqYi~Ar=IJ^%ZJey0*H1hkRXtTC`*D>%-xCKhucRveXAnGR+ z)ys8Nmt43TE={6yLuCuBLNqKymWz`Y-wh|>Y{RyT-$jd(D1;YaD77EG8UOd8ue%tw zc7&wLW!Q6|w2yg?z+O8LT}LLzm;M9Y4t=Tcc^w&k*)SBB|E!CT!_n-g>@Ptxau#lY zy`&5J3(CkY$T3pk-!()^riy3zLg@1_F?t6%>WNVVV)cxRg3ehE^I*n>;qCh%`cM3R zgI~cmlwQraNF_5aMto>E>Pq9-@7n@0$X^RhQpl%KGcSD{790$sY50}kS8#0#`PR=r zH%l@|dk^wQw$XCg8TNzVnb_3UXk};Hzk%%?Jto%T^)lIm6wojiuSSQIpl*=$5X(0Wo@r+u zx(%W|n`qrf*?eSnEPe9GO|YDu8S(u96n>#-^r=RU2{a>?ZyUO8)Y~99P>OonfYCAY z(k-aBUqH+o`27vPg5RQGeYJ~jjOucwG1rm5qvNpr$RNSCrWgt3H_UO{8nU=;4UIMO z5wuv!{`X|rh9p6@BgttKhAb}I&{)0zToiU04YQ`O%K=zF^U`g|`(^wxKsXS;f&uIp zr4ADPWgZvfywa%dkaN(Esr&oe#4w)DCWZ#{@fIkab?Prr<$PIF+V+Sn+rAa)!M-Qe zYVD|6ru|VZ(_T{RvzOKS?9Xb^*3D(wl+nVPvD zb(~YPj)j}r8g8_5Y8mf`9TrgJgsIDv+?S--^#d;yFvMo?db=?oNY@0^OT_UxwCErrROWqDtFPXh^BF=;NfI` z(4)(Ux*i?JpvNv}bSnD&L61{kPZe79pr29|>`QUEB^B<5GAecGqOYn-E{vHM-;0j(5%=!VWo3pzT0RDz^ zzXAF8>xTVj-NF2CBLf%t3ODNr8E&VVT=Fa6jBlZz%lWH^do!FL#?>1>8X)!Wmr&E1 z%{tA=_2whNO#+!QsAB4i6h6Z@8BlM(wBMk2Nn?D*bhe^(B8*!T$s8ql$UNtS zhx--7Sx!|bDV~&U&V5w$Vh?vRRDtFWWW(4j8%`wy7~g7hXJdE;*sWQ=$H6UP&QrQ$S9ggT&yxX|m;& zH(j>8@@B}ESKdt7^2(bndrx_DWXmgWo@{yLmC2S@Ub$>}&m-~dtIJiqf+uG%z|2;c61K;#q$}RG6c~{ zY*3!{XfOHG*dRPNz^wVD($_o$r^=r%ea)s!M9Ww*&o3w}z_#VtMcPV{`ygo-i`m>RXX*Y=6%ShWG5(biXqeO2d z?IwwyK(ox>B5Z$W^csQkpdj-^~Cd+`&4ldj*(2o}4icY20!fK13_<)hfqQMw z?ZBjQ(URfYi5?=u$IVf%C-)J+z5Q}uKz+}zBgc=qD>!R0u(^-1SL6<$?6Rq?wpl!v z&OOYr^lf7J57JnYS=p%V+=E07g3X8XA5cus=ueb7-WX3D{{hFUm@$wzpB!*h5XXPO z-9yW z?k@EcW|e27OrFaW@Xk^{VOIIMbgktIxUbYtm=(aYPL^lIZD?L<)9zve7RUVT)A94z${1|i&z&hT-CkRhvwG_JzSeB9$H z6F`Q<)3|d|=DQHx0h|s_vtS8kywlN~tOehJ!S*0#FZ?86n^3BScfqhb5n1p$>fo!W zK|8nw7R&;O>#2p$BXO975j2GVX5`}#FdXa)XJU#yoso?I4~Je0zXBQ45vjNjP3KXd zy1fhcK<#UZjG>?VG$O(0py00%;cR&!OEnEzF8nvNsY*mHy%$0}_3M!T5cGDu_{;8=fDsOrAaUm6uWD1FUrK zo})V!Z_&kE3N)`5Ah~h`%?dD63>$k&kn_NO!MqeJo~{j21ep_rwXi-4$pOp=QYi99o^cy=K=8i zM)Oo_o}rl4DhqTdo_j$w7k1~>tO9G5Lv*tu>ehm9X4l}|b0>IsPRnbx3ao1!p52P) z(;&*`k`tF9y4xYzqlk8cs5Xh{afj$uMRYTW9@Io@l|ZY-;+@r+2q)U1ayG!q$ose1sGxKUwfmQ1_JYQBk zCwEwjk0udyI7Ig=q7xw61EqOwRe|+QzmfC}MRXiQnE>(%sKENA-w=IM5gh~3?*Qbr zLj~3ezajdbB6?|uwe;KYQ(lj%Kuq!AIwU-MYB9*u$1eei9^{vCANo?Hr(Ue+;repf zdkMW9zssIMx5m>SxU99D8=1-XV%U54g6=fijVt7=;ZYv9h>+VLB>4spd+#49`9puGo9OPS(^rr)zIVXN-+j=HS`0 zX6>7_=$ksZ%v#Qk*W?>y?7aoUA@mG3R+aRpY?_Guj1vE}PVc|gpN&ejkJ#L(wXS#% z2K21?CAw)|@*pEZd+(JM7=b)xx1)=?LnCJk36XxZjujZu%Q%92_FT0BL%5EwSw`)9dr&5g??a39hBCIdaEkL5n$Rq z7%q7oR{@G@k3dn&fa17>%34v1IDr@_R?K(e!4-=USA{NLvBZhbTd_#Ty?Y+McIi*F z{1>$RC5@J6y5fo~0zboN-m}rnaB0ImoH~akTRp$({b2$q++wx}5>_4l21?MyuTmAdb0Uk|WN1G1Z6q@XI)bKy42SO`H?1O{$^IrAblJ9Dkx!0+Lm{^a z$n1R3&W5ArOm>*&o=ZTk1|UNP7=&KT%9~+(eJ0=h89HAq8fRLB@?3JT&xxKLSBx?u z5PMFcJ@;Hy?56CB{MqN$ib4fxP-rsn=N4g)kYzk?fCk^8T@)w)26c=h` z(wG?H*aT!3#ZADBFUVrner*WY1*dg|xP?!OlBV>)9U6{m8bq$%R2Ul{Axgw6ITIXUe`N}mp%~@|3(Y?JzzWuP^&O->uE8{%|$jWxqj02Z5lF6n1-+?eIS!KUD|otzxf5{D{H(aebh zZnRPRls7kSD~iI{JqzW-_6cgv7x$$34w>YLAt6`0T)EpFJm|&(07!}H9|SCzY&B&s z%;&w(3sn>gGYw0}0!~NKIxlq;L@tBuyo`ATx6cr9v%6O(sF7A(BBrk*I zSrX5kn57DX9<$J1m|v@f*5==##9l_R^e@sbCrLuCIDj)nA;C%#_;ssXWmwT0YUQL^ zUtVYz=Qm~d$_o3c#iIM#I5mY{QK(DiYE$G z(IUKMBwXE66l-pdM2j{wwH38g)oiNT01Q4EGHtxv{an=0R1;~d4Hw}9Bt`0e=nuk; z&wo%@5xz(knmSp3o{U)~2)BybqODF&)eW&|0>t_tIrh@Gciz1+D8eUH`pkqcrYdS} zYvP+)9R12F@dh0E{)z=-@~b9=n2+GXO{2Bpy25&x-HL5%2}cpdcnfetGot-0et*S8 zktJUW6~QNW6j=@EfTv6{hhnW&Gz5|`Jzf=6*RTPEs8ka2mTO|8Oa(O49`AeoG_aosv}XF{9cueZEKff>J%<0DSbEz&--b5d%bR^`-+I`$ z)-&>~hu2QPiH}gEKV-r^hr@csTyCAuV0GjeI2gYHaKV%S5zwZ63)zsQC6#l)}=d6wsUE;&mREXhMz z(uY0N<}30PGIO-2^t(MP@;#S{DhqFad4q5BkFOx6#DlK@vAvo#ruXM~#-V`nG8Cd} zI6Vjx+BeedUV;6DCXu1OjD~@m;4g=Kz!9YitA`Ur!c_C9oGyV22QDX@<{N zELjoi{Vk~gpr=>M$s`b%ggZ8oQLHgkk2lHTvr$#GA^j4#I=uSN!sn^@CbI+;&QN$` zWnlTD2);zNG&C_(jMvEVvs3tLnd?JXrECcxjTe@v1wv$2tBveSwJ?hc`SL##n1xc$~kkDiRIP zv65bvCiNMu+KkfU%U@MdHIzjnybHp-#SFt!Ypgkf&x)OKT{}&(thgewIe=PU8Pa0V zgj~ZK-tH(W$k)q4)%cDbK60kNSymslLj0PVRTT?;tgWGDQ%F9Sh6l*}Wn;${7yD)Q z;ve4_YikOZYzQ~u9cnd5R<+jDm#B}g`p0kZk6${$KOV*_!2k}X@pUcX*2actv|%&+ z;e(gRX~PNcQ?%4O^{m>4x;mK+>@YRRkFhm3wp6u->YCV^;Xh(;DXZik_XF(&xP32x zLN_x(oG%=~CI=yo{72eVkz-I!>aj&7YnmVhOqOK-gJmuRirH(a2Upm5)1HtK{R8w>uZ{u)F|S-uF_H;K6F@0)Vy~#J1BLbvqC+? z>GiuQ;mrKfs!#-hcxTCrxy1@LMthfQjt(76y;UpupEORpA?q?oQ%gT=Oj)R9W-v7HDS7_uHbD)PiGC)${lEBbvzc z>oaExPl#)l1cfI~=xeayRO9pxjnO)=Vq(ZO`+?a5fe8DJ%4)=>qF+Nh!$c{WN(jL{ zIkbGyk`=}Hu4!oHgit__>(V-6%kM*~ySeKlg$tC>d38BebY zJbkH6(^B(1dHH74nu3l6xz>hjA~f%)rnX3=C59IXQze;VA(deC(`N$9nuZsj{CzO~ zvdQsbwGkhPm9E~IB)xC;s>9(ttfe`Mjv5Qug#7=-tz1o-mxo)5tq}fCw8HAut18c4 zJ({#1H4_Y}{x51Z@C&7w&2CH3^L-1d=fmiG(Ad=6M@Hw~O;0%!r%H3w$ki4(LkAR| z1phPLZ7RogHS;+MjP9Q149X^RNj)Qk(NsVC!Dlq_)*#ab{Z-r3=kGYml$vI2%%BfV zelIT%Dgn6^E8O7908^x8WN_-)5hURJ7zzJ4eq-sNSVv6RDflMZRQ#t8o?`C=x zGebPnOwq4McCq>fd|MgAqMF0CHMQY2#}lnB*s;R20+Z@cE2h?*8=&7phE=T_YCeEl zIaTl2EQG~DsJRYbC)SJG6n!&B4bwL#tYef!Js0e&I{<;P(rKr(pi)y>3%kdkO9eCb8mT&NlIRbq?7TmnO`2 z^n7FqW+)Xai$g2Pf-#wtoQ_Syyk$k{2kAge?wB^j$7TAlTGn!DZI*+s)lcsQy^@1v zy?7_~a^RhHcpLd?+)6iT1KLhl53?`{yvI?ePZHC<6zxC6Xxt49* zBty1%fNP@$-!N}(G^>R(aU~enh*TZ2EK@6@OSz&s^N|mQb;dk6i=LA9o>2611o&2C zb1m{R3s$qP$3}u{DC2rfmd==Eey}>IU5z?OaO1wVXdO>PHCfVR=zLNawasmq ze28Z_z1a9OaYy%F+Ub0@D$>}D&Ajl(@M3-|v;@y!(&1{W66dVF3te37YRUm!f$?FG z?N)7$$~2)m8s-MqnRO`aO3=3Z*vl4gP!kWaj{I{L2s=E^UM$wBPB+T7YX$_h zhSkj-?u(fYr)i;RTTM+k8kG^|L)b?(RanW?(HTv=QAWKJZ{|2>u*%R`efYJ89AAXW zWH%O#dJ7wMV|A=~OHI;&Ew+V-2eJO4Mk>0~TkniLkHQ(1=ad zI+w+`6y`CHCD!FJL_+_4`ZW-;m(5^0w>ii;L;V%&W=G|*W3j6%oByWKk1er6n`2^J zv+!^ow5RBW>->BXWGo%ha_rO?X(@M({WIN53o2? zIxDw=KdA`kXz<2J!-o18+ko2Ob!{Rf4C?4Kdyiiq=!8-opK!;Md7QqAP*A5s#$2|s zv8t)|GTA$(BgxJiJqc@C_C!%N-s3RDOkNYlP+fJdk3!be)pJo}ouU%QcxN_T zeb^bOJH%292UfeCkZ{H+eUfenr>l5D3d0(xY z$BSE|%zJ7R;pxu%Y!l%bmU-`MBHSl$?s3By_ga~jc@M1{=e4pd^S;_dcz?^hw>J?! zz?y(Z-I6aa-Z#h>*dE*whaMx;I z^jCwP_Az6d@xTqZGu00{$^FA#&{NNSrGC-R=y{*Ol=P+l#uV^PsCU|bUlP;y6!d#iz`vLR{(X_(dtCk==vj}+X3Bo)Pe ze@p@YTMBrF=p(EN|ISVUAD04Nk^+7i;H+O$E>pi@!0D&S{xuH$NiOK0)b(hTYE2=3 zmzKX%(;N9+B0rw5k;9ir_hq-Q34EZn>|#Y3)(j7&pnpCE{1w1i-aC`@c?)p*mzfi9 z)bclqAL)cE_&31)uC$5X0XXIFE>&1l-hCoxpmk8|kM2UQ{VC-01s~MM%;C+rz_+&c zWuN}Ys4w{WDc~~!r=AB({i27_GoaxQB*7~HSCm5T{Nr~Z`hvFt-j_Z%0M2rmwTLN~ zRnr=a#_$c{!W!Jn(i+}?haSRM95wP3qbbaT;o9bqKF6=c?PAeTRohlByjvnTaIY<# zI%BeI7xX5=(zmH0RMpyAwGB6t##*=GbOyIhVQaOq5!>G(R|vNrjUO zi)Y|Tk@;;6ky`!j@D$%##tq_I8k%YovrGKEHg+?s!M!}p`z*qX!;MR;H{vVA04;!V zqV-lNEO#D$hy}KL+snDvm|GXv-@pwVylVkfP_wm)t*a`6yMhxHbKqz(rXVcboQRl$Y13k!9sl6n_&k3lF@jiWO~*3uS>CQHFHcr~G# zXq$AwP*N3LcW0M3#OmjxPrx{1FbUc_H*vY0?f>DOBaTf?_lx^X7Kh?vF6xvc?n<6t z#LtaFA*awBPMxAH3b!_e0#2Xd( z9xdF#z&mRcHO5pH&P*o~idH9^&`DM69vJ_{hYNi^&SMouw>9FZ2qB(hk+9yxcnH_p zVih(u$HIlB^DD;3sy65po+>Eh-8L|z81VDr8v}NNpYAs1XndS z)F4+JhVoybWT1q>v#dgPV3>+Q*>H8+2Jlri;hAESY-p-$Hu0+J>eldPlZxPRWD|#( zOjNTwlhXe|ggb&*;$Y88U1mSpg!&yz8mnlX1R{Ji5L0xA-|SnP&<~vZ!m>NzDn`o9 zAJdAMP%j@Go@D-VV6c>Q@;BdYGvOK*4i{;7VLC3Ck#i|hc)Z`?H~Zx#tk;BCE{UBC zKc0V=YdQX73&Y_z--$D!`7WHI?rpy-fZ#XSjQ!32y9w86Mx(!>GvNl{`As*I<{S2T znQ)$@@V*g*q%rzyu(cTx7SZsV{d_0XgeHH(Z_2+@^Uv3GX5Zh0d=m^W@?)3K$=~pQ z5-?T~_2XsE2TXXL0z3ary&1qRMBM!59KnPwreK=E95a}3cM|_Ty@)g6u%tzu;WPQ) zp2Tm?B}|C7-6UMe`tQ~JMon{mVM3oVjFw{TZ-m~P#Ba_yOi20eD>?t~0O|IBa~@(s z?k~Epa{@EPcvh!AGe%Fgc-Pc~T_qFbt1r}Y`)MeG?X_VSFSU}1p;>g1?f}mWRsb%0kS{tPeKst?K z0$(lyV8wMtidLar!mS92Z>B{6lEV>_PKC-s3T+bCSKx37s0uaB$pGRbTer>OBtj=g zRJe-_!sQ^N_;T`{%p<;Cl5dyfQ=voJuR@hS5<~whB|bT<0*)>f5-nYIuSOdweqbUe zdxiT1SW}_mTLwO|v%i-^5-ykeQu$?@^ot5rITQs0_0vm>f{lg2Kx=GM;ij3>3#XU5 zA|cll&Ns;?+J#G2u_h8DUBUP~qzOj&?=Si4lrL@E`~26BMIEOHb-eofceeb1aO472 zZabcJ>RMWP7QrUtzX<;o?q9wB)L(AB=D^OkvS+^CIpVihtr_u;U%r_QRLZLd0*Ca0 zb;7v5;D=MtZ%P5*4B36j8J9x-jVa{#Q@|fcA^)8e^sAwFR3G_$A%&cgDdhN5&}XK| z*Xa~;N>k9EN+I8sf<7k&d~gcA|4IShm_p7!Q_xRO0smbJ`>6u=RUgVy=&efux21sJ zkwX5i6!xr4L4Q8zo%m0_9tEH;``szzP;2jt-k-vL>L;A|Prk|ka3%whu1+E68o(Xe zIhvz9k?R(Q7ihbZ;LkzU5G_m7auStPfPS!6pgASI>d)JyoMicjSWb?%?}9`*8v4hj zZT{BE8qp_uz3W;+t=>q~7mj+pnzwpot+&x1_OA;>qW#_H)(~{JUKR7l{LB5m#)?=|lRq4XEvZ`WYh4#7tql8pQGaEqrN!6U zxQw_!yCBdSh%_T2uu`tK31H2OTNEg%voh2g^>0c5B#4q^RsIcu1`@r{AFT`eqD`T2 zOLbMmP!Sg+ZNDfSi24oV!6hV^c?_)7Q5O!a3;SB)w$=JumeybCZy@U`Vu4^|oMMGP zDsnYHyv{H}qAvl)Baw^#_AmTPzyUqJb8aDV2P~dT&GX zdT*0208}{88g0_T{*3`*iunEOwI&FU__Y>a5K1(d;}3_mrj6tutqG;q2r&(td{jEV zVBl(!N+!rk)#3|4Z`cp8ztI=<0b3u52qZwzvaqJQqH@g|?^IW*t7Mi)dZ)Nbjo1{| zG^)iG{L%OuAF&A{E{ajy!lSrK5zZVJw2I$?PLTdYC~%bCuTz`VG$;N8XTA!tR_!*a zh^YT6@x+lA2n?bb)i;1Ap4jj}2Fd@9q*wDe)n2#BPH2amuPJ!Bgxizaw@RP9&`Iie z&WGN?xLz^AM;TB}>oUQ|n&78Q@Np*i854ZG39kAZn%fqd;0NY8IhBq(++kctOmOJ} z#vM1oMHvc~6DGKtGZN;c32vTOzhZ)$=apS1IN7AGQzp3bHQ~;f;3%;8CH+7$GUHIq zZh{Xq!5tb@DmKC8(uYwqO>pzLR&IjlDj^6xCOFM; z)m3ML=PMw>H758_6MU@+evS#=Y=Re<;B6-OFcW-}2|nBe-)4fJYl81E!PVM}(7R0V zFPP}>G{Mg^!S|Zr=bPXUncz+ne7^}k(gZ(Xf{!-AkC@=*q4&56KE_0U!UUgSf_vIu zw|hEmuNFHs&-NoxOV0^U`;Y82I}lgTv@aDowVpB8<3J-EhPO=)VH z-6uF5rZhFl?jxMOlG4-^yZ3Xtp3>9=yZ3VX5=v9k>)yrb<&>r-*S(F?izrRaq`QsN z6_lnX*1eX~vnfqYtGkZVQz%VMs=J)i6DdtyS$8p~M^T!ZR=1PW!zoQos@uWoA(YOh zw8rU7N>dZ+KJ_sGV=R=Wrqlfjr{8xWO--iz1gGDoG&Pm(BbVz1*NGebZ_JIPbf`Ipu3IJM=4EJzk4mG4^o<{e0LqEAEz``_3m;` zKTK(=;@!oZzMs-mwY#01zKhaSrMn%RzLnDFQCj2lmnltExck&cl>aYL+DYkGIDIvx zsmgYr;B=VMG(>bC;q;Z1rmEY$pVRe7H|;{2uaY;aJAQtJr{m9__P?I0TUoQymI+^Z zb`JUqT>c_-)mDh6GQ#468D+cJcnS!X;QuSSQV3J;+qs_>x8dtPaB z<(z*#%fI|Nsk5lSu1i&Neda^m7o2{1Ex& z-A$mN7+Pbor(;W(rz7@?r(@MgPy3cGEt-ws6)l>M;G`DIJOJXJ?jJ#jr_EoDgH)ZKp*D)_{|MAweiSXs`Ny>oa_S(p*Jtq0 za>kP!KuPAn4_W3Jm9C+5ryE{B*0qcE9YqqxIy{}Li0TzjC+w;{CDeiHYJ=(|Qwh2I zAm;!neBn9HxGB>ewJ%k5xKC7d)V_GkeTrycOPTwnoSoDy9do}UK<|{fPvq?UCK63K zZyq_FGuVCRnEOM4`>@RY;+88q-5=&W=60N@=4VJ-LyZarzyB0;(4lBA# z5ZQG(=b2=^j$3V4$CkfB11E^b;qD^bhj1v&nsBt^#MeqrOgJ&i{nwm(jtE|q?UyE@ zY*F8H5Z^#?`1-#-pwh8zG@;f@R09{BZKVn2k*ETJsy3nKO4OG3S>SXNYNABd3)E;6 z>Kut$C{QOS#w~o2;;`^Cf%?7)^(~3IPoUm5q5dXOzZR&&Ce-&N>M4Qxwh6UYqV@{Z z*G#CL615#DSqz@*UU#BtbR7B+ZNk&>{Zj}UJ)LP|XnN}DaGxnTa+qpO*U$ge(}NuR z4uNOq_BAL-PsdR-zxgC{=iGLT8C^5r%I!yTZa;GUk*I8_2eKe$=XM&L4w4*C$MdwQ zrghDS4ISHQ5Nba}cP(1f$w`)432~6xh=AKk*R5yJl~9*_7_>cIzl0gPJe@3eK2xC6 zykde=J4nKf*7r0Sz5N1kv>&27E1?K0aS0_o82*HkLQ--!TI^wv^mMhdlAUvxQr@0L z;@I|0fd3i)$Pz`XJDwHVpX9vl%zt^jQ)m}X`w#-*#BH5Sjp~X#5+AEi7wWee>L2}Y zC>>#_f0LLy-P=4JwwoZ*RUl_0p226sU1jX-+_K0%$m*R(|ze*l06sJPDKGoQGiJb~2;Wwv~eIM`Af$e?+4?0Bd@> z%P~g;ssX6^TqP3g=~S~xtD&G{`*^UmAEGjJBz0Xi?;~zm4^7MNckd(@2S(-GatqR39Nwt@C!mdm4V>dG^m1F|`=< z4qD~uoGWNTqILcE+Xnr<6!bqb(L?O^zR7I&F&QQ;Y@FWhgJN^dp7!A=%Af5c3S&$>KU5#t?A00N8W-vUFZ!CLrYJWPRyO|1MuR5EPn1!z|pmc z!GoS-#nbi){o6TMwSzjHD>eG7?s%cP;{-Q!9s$Xm9wL}m1t0{(C!l2)BC0UIzxLoM71BHJ8P#1YxfH6+YIfG zo`TL1hW0lhVHaF>6ZxS6reFj-;Ur%lbDxyqE8G~5xx08yu>}G=9oyGJgQs&^Xe3Ih z)3%0!owcuccCNzezAuNV7Iy`)v%{*`WV*A-6k(I|gvmd%DTjUVCQJ^XDIW$&PuCna zxzqMnCv>6+Y9O&=JFS4)57C{aQG}(rgtTi7Y2P7f=Ni)f3GQ884*`fwHG>Ovj`Oxt zq&j|0I%Zqpk&f+cj28i*W6;t8-6WtK06G!Sw>@ZEZ)1jy28RER?Om8vy`AIW5bl1A zx)gTjd8B#g+$R9&I!eCj9JH7+);Vo52eyySr_?9a!y8EqyR`<^v>&27JAxwYh!`|L z(FRy2x|)6egd>Kaw>S)vo~~CRfoj5iWXViea%_76;HU~vFM1mb+1KGd?&-|Gm#vz% ziv#!Z`A<4QH~#?LIZ95u0oCc)_7PIZb%!V>x}YN<>tsyAlIgfy51~RWgy7EGi6h=A zb!vxc6C-E*ap6xaW7wZvgVBE5PwAKtUjf#EeXxQQec2_)&!QCQBLdy=Jlyaob;(p| zyRLZy(%Bb!JKL=DHV>R~oN{>&+}M7IT@^jIlj&H%?M9zo1_A#V2$^36sOt~p_PgO4 zL5qd#nMTjlfxH%o+PcmI+E}n+4IDcKMfi6hK+=IF#V)y=rL{2rwZ0%kvH~s9gS9@o zyV!p7dZVj^s^tM%MpJ3-6d8GtoZaypF#84O1TZHki#HKw!gHRElb}C|HjaNm zMo3W<&c zUo{_lRb%EA@K#hIg)8QFd{o`>Ud;sB#j$&y{m9yN?`!b#_BW!=l9$!~QBBADH68C& zb-X#hC-3*3_UClZtUttFLs8f-y?p*<^Dm!&#eDB`yW;y*r=LR$CwX#LmYxgI71kEu zF`B>884WpuAz!0&JwFn4X)+$6t981= z;ZS(CbKIE7IA_=&jo~qCqjOB-Nc7Xw|?$y)ePhn7^uK zACFWL2M>!eB`B;_2W7Xq)Z36R-n6HjEFP2o_BEDC2N6bs`?u=1Or z?DgbzymCRW+>RAR>5bq~C-N&(^?_C-aK}?ydbk;lAy3j7P?h-ncf7Q#mu%tQ_AR)OWZZyCG%_DW_6%9KVTj&jGXK&p6n#ELrvowg5DN?%Y@0! z@f$*cM(4x{V|@e&)n7S*Jd=VSRVF2WeLwi8oQ1!kAN*4d{*8ggXtOdO){C^^C-ZrN zc-o4sMAQ!QL+|4NkQYB;Fxjd6#+7^J@>M7Z2v^m*+3Vx9 zxlGx=pD0)PGnL%>UUJ=={0*_F-x+Q8%gP?NKjGL#QVvw@Kv4ofkp9KrF=<5lr(Hcgbk^W-cTdj=gdZS0g>bFQva|fo|cj@U4g7foL*gA^Sdd+?C?{*9 zeVcaS@Un?hFBnA?|UxmdLKfj%y?YO~GIcUHd7z8-sufcx> z@boZGQ}~)Veh@N>-gjdA@W zP$pKC{yW6dn_PC6-pvguan$0wp2E&GJz^^`wkM;t; z0{CkZ_=S-p06PNwW5Ck`NW;ceas96V|1j`<`PfN3DBCNM$LZMJG5D<|1BC=pwikeA z6J+q%T9NIz#Zr-7aI@8&?cAAGkv;w_mdSkOhyc&;`BJ~f8Ya`~_aW-6yV@J!bvkI&??h+~ zPnW{K`*BZ*(s#;5GM?1(wFXb|>B6IHzD`QKdQkABOydXzUj;HWpZzD0+$PfY&T}$_ zT8pnu+CP%&@C!F_?UjNR{J2!C>^>>!@Vt~SrT;vcR&+R2#QB3e8H9&SoZckm;W-ee zm0ftq%IP5zkF73Ft9(fD|F%H=&N^A(^c(?MAj8!%Y?9#y8QvhnJ7oBv41XZQ=Vkb7 z8U9U%@5yig&fC#7REA?^I9-MdWVl*}O)}gd!y9CHhYTN-;SXf^ybOOW!@tS!JsA$* zjUwom;aC|?m*D~#u9jhw429MGzpLLrEv&4Z?Hs?VKGqtIIcK=0xrz&?#yC~7wPdEN zxO9SmYEMJ9s1%|qeOdlG=3(i$(osS=Z#E*jT&9(MjY;X4O!MX)$sR5En?;~qDbq^c zjWW#}aiFo%4k<3xpwE+OrB7hv_k&}_5A;#*^%dAPO{)dx>#BQ*(`l9~k+7aDrMo@7 z0S(Zq+e^{ztd?}!`3Ta|>E{WSQryxmxRl8B0n>rEOhIRnHl_>^%SrS@X=67L(qh{R zEf)I8M%uUvLhgW+wDGqinwDn6&jK~et0?`n3Dtz$gbb%mdYm}zw)X+IQQQ(mN0&C4 zjIz7{&!^30=q2dB(&jO=9W6fXCWc;$oTcsL=y{aWT^#)hnRFXP2iVi?x#$33_gvU* zqqzMg^lT?>@7`8nX~m!e0=8YA&6@_C5kg&qJ++gnVCO^i8c!-$}>l$A(6%jPv)Otrp?BQ zy3CJZjcouY)?{9eF3@Jz{|wmL%pJs&sdpjKocS2pHBcvm+cN2lug$@UXy!1~PTLUu zkHBoo{66sv)hm$LmN^fuvz?<;`F$z#ZBkmG{}a;MGxtz#M(Ct_N9Gquz!?2CknPGW zA-@#rS;)_wnKLQj;>6y}3n@{=iH9;5P@MYX zbo+M!OShAZfmEiHgF#d|2>LJ*49$Z+hURM7WoXMex1(y=F4PI0^C3B+T*O@vVVlqK zbo+2h%ms<#C{(83Na`I0DC_hc9NmaIp5DRH+t3W`!^x(Bm0-9Q|ARHh68PJGZj>Z^ zg$T}LqiYFzK0$}z_pe&E_72)_?on8g^-*5dM~?uJpK+fH(D!|lj_en!XR z9NhFG2Rr}7!Oib+a7z~aoS(6K8V9%4ad1Zy2Vc96gS+-{aQA<3aL+$E*qf72eBT(! z!2^XHJa{1o-}G|u&{htZ=iM#tww=lE*yf=wk(7vGCf&{p0kvoi?*XG?g!B?@P88j z1?K>#4fC#S(}-%g*SmouR8n69-u`_w7zkbfcc&2iGAJ?^-2&na5KY2=3H}SdkV3Go z+1>{2x!(mo)0vq+uy`8fcwk962U8YvFm*Kt)BcDKGJoLo*EpE*E(bI5Q+ohs-^i)5 zTRE6>4+nD};^4wsCMlo6LB*vEtZd?->M9Q0*Kn{fi%C2~IjA1V!J^3=)O?RgYM!NT7l7*UNTtCwY=VSfQXBG>du)>i>J@5kt$bbX5T{3a@qxmGG)U31PwOVVdp z+o-wU3c8W_nYpHyTj|$mlpa-q-dV4*9z{hW>wX5<=&@*M+68wbsV}zv5cP*BM-v*? z7-$S3J=TLNXxEGM^^iR->~GWb68&)y*xmBl zUBMZcs0q}C`Yo7s>(v0SRH)HbGGjs}u`bgop$CAADty81Kx-2}f(CuLUW0xjgJ^aF zf2X1mGA4Zwrs}KppF#&2v>)gn5J?VcnDjN0aH-yee&P*8{{xtp;@F=M)~mmZ9*nTB z1M_wqI}%KKgFXjUnzU!46*=%P@-~UFSL*+XED-N_V5TbUXf}V+*QnfD^(iof1S|%6 znIeH{gpKK|oI*eVn5e=^0aM9>&H5iuAxOYYK+~~1wm@coA4%D!??+EZB#!}2r^3x7 z6@cqE>hDN1egX6!43gnMYLi}shxMEFTcxAl1sbnxl2Vvt@+)wI{)oO6{-OMiY{BaT z^vCpj02XQACQbjIo-4CcIF>{%)IUHsR0~mS@b7Y={!%p();USFYa!?e<~oR$^Hbzh zvyr^9*I@2q*PAUt)N&`+vGo>0*tS3ts$iq0-)*6|W-a2D5VFvsWJ9v#CX$(qW%pli`3pj5c^9Ao|a6JE>5k>lrWG;Bu6c{swSo z6h5&HelP?xC8~~tQHCS%&*Fs))dv3#Z<0Z!K+h(UR??u_pc6j23fSeqtchb~8@v)F zPuMnKHpH>A4IYGVN&C&f+(uZ=n`(nQfF<5<1M@?LO>BdU;bao=&tOnX=kZcCTXoIIP zl^~MMKwqCoqS~Mxrjog;4c356q(vJfA5vPiK@Z|T2hH#CA8&)?!qI)S!FF0o+KAsC zL7mn0yDvxHi%)}Gzjp_E20~2&sCUy9eUj@v^hrcC%v-rYyBie?k>%XTOa28<(`-Z6 zOa6e`!chp1p|0ykU%`D4wC^V2N=Pa#qaMM0Mn7e_0HfPK5dA4J8ExsCsIJtOT;GwA z(LWi5?9xrUz=MXS|5U#omJz3GJp9F7&Vga>xh|6s&Y8%bX3wRrFw@-xN;xrl z5@O|yiv*pM3r}Y*s>j$c3Pe@-UxfdH*J)Jb85dFUjEf?6R2*^HXzaKDoE((tejf1@ z@*OZebI~{9!7qVmAO0W4f5F=+tY&pkgk07;Z~2w(S0h z_8*gFE0Rx;t;nVtb}NWx??nFf%xCGKi=~#ldz7jVmlgNPV<)42>6t#vscCX?C)aH)s~=$^*A|A(7P#a*DR|*;aEVkXwcU_iRMeD z3Hl`zmGny~Zih*>cTr51gCKO&{RM0mCyXDQ{w^pjFT$!J=~$d#(*xv|>FCjO3+f2< zJD_rhwNUiSXa%{$zf94q(ZA-N`%Of#S#|}eoq6aq9b>Ffu&3Lv#(f55>}Etp8#iEX z17PN?$*3TXItx)eLKJiZ^1BFTkGdbN$Fa&n0TIwjUl5cCk&Xl08@ zaWhfUE`S8yOqA=%RkSA{fU~Ty(@|ojeH_{%h}$)K0tm&e88WmvROpUnRw^;tMc^rg zm{-iY7M0eq(L$SBw6h>Z(=Li*g_VkoHX25gX1-~&u0tQ=_^Oo*q|FCrU|rg*Vhp&B z`>mw?X+rUhawYKT_M?DW(`>a)6b!{@-9**rE*3_67s(uR--Q8=GAnIpjHY~wOmX%& zXY(E)LxUFD>JS;a@HwgxKjzJ;xko8}OsB1n(bcd@D=S0a?TC@fFD4)xM=_KZxuEP4 z!tbZ-(hf-+FNBx<5<|4(iBh0x=Oh_D>pk?HtPwh+7GZ>eo0VLG@4_S z5omsaTv127UuRxw%YurWq|M5uOkQNA221VR)M(4A z?C%ij_?ZuA>KJ;V2U1=8BDH(!Fyd&fY!u1edJQmlBx3UO#_@Jm-Xz}6%5(7!RNhqH z@yaWuYBa}JAJH6c&fk9$_wAJT{JiN;B9WgrgEzqPX7UDD-Ynh#%bU#`V0m+RCn|3q zZ-C{M^9ER6C2xS`Rq+N`o|`wo@)q(2Se}P>zVa6F&R5=I-ucR_;hnF%THg7}Tf#eE zd397w`FYD|=gU%xjOQ2B!z{~{8Hf(MmZE=0HmBk3`I%b&`Fn4^hQ93 zPToY(kzgBINIA1~qBw^ZQAM)+jA*BqQS=6)ol!wiC(+KVrs$W6c6KR6m!SR)tzx+c z(3*xWUa=CE8^y_lHEgf+ajlv@1D!E77jv=#Nkbhx(Z9If^zi z^ru4*4ZQ%xHrlSTT!wyZXxmF<$UfBcbN)_SVddSh)DlM77fhh-`E+{+?%7uXVCjbC z1rxu<(0d8m1%PEa7z!pm#?bE*bQwwb0vcn%!kfP z7*^nVi=qEb&~5-MU%|wqpeP-E23jlb*|!3aZl3^-`Oy|_=CJgK0L>T%ClAZu`jv4M ze8Vy+o|f@T+=pdR+@A3mYTdAb6d#z8fq`_`Ac{LOK7u2MWm7ya<9Y}imP7G^jIU7K zLGck8zd=12Hkjhh3?Is1Sl;7^kI!hsATlhUx|CvU!-HWUwe&-X@J6++IcVQ!_`Tl& z#Nh|=@Vz9&@dx;9_@F)}fkq90mf}EYOZ+HRsZdTn=19BTJUefKf z@0xC>O;W4H5d++s=C~gvUGXAucxZR9Ob5;ICDi%RP9X2Li7mIXcA$qZr?GN9Vdw^F zl!vlxP8%4N_72?B?X-boqYafx23Nff3d`b~QMod##pO4yxgQ^5NPSa*VfYZUNzsH2u;0=QNIhrwx<qIZIi%sJ9W=M)oYfO0<|el-=JAm3E(y*nZ^;z>cuQ%harSErPJ-S zPDtl9gpJnXm3G?csboMp$8EHKQfdD(9GGsW+4F)inD;(_%A_rbz+oRFrZ4mXb`mAE za2bl>4MY}v10FsH5#94Q)a8wc(W+`;72H`0;)TCJq?#hd5U`6P_d?MF6qyd>kck?JkTa1kP1)`hR4?Y>UPs1D(<$K!{K=|Ka{nHSDQuJ$3Za1QF!lZY&;r=sR5 z)yMf%%$Q3ei*)IGc8$c2DA_Ftth-;|viKe*>{Q*4-a&sd=B0Uz`Bh*4<~e)>xt6atpO$>+Wq^YwT3)50g&I z?(!OXP-l5Wr0QyBQ8oKERaxuq6I*Ls$fe~mkvzGzhBmV-`>E|(ckjQp=4Gm=KNNCz zY^`~OEIvSmZry$0+S(@Y(zQke+9HUqxsBvUg`vB)YPHWuBEKXG8ALHbbQ_3@;7q!< zh(P;>L9|H_b#1K~N1570jmEnB6nLJHJewuYZw#Jm5_ztnst&(gtJM}FMwjBrLMI_C z-jc|(MeytZ&x1)k7aBZU6M41@o?YOXgwBbsJ4K*H44&%}d9I_TZQXq*c>YWBtd~4@ z8ay3>=T2n08YMtiNCX;fWs3~llqmQn>K?4S_d+l|bD_&C0_`n>XP4mF1HmKFWznUG zhB}qt-GXQ@h_)pWl^I003Zi`=`lBSG84g`*ol4SI1kpnvx*Bzeu5XDzYjNBmoAEL67JX-K;IA zP0i#(Ao`yCQP}-Uz4f?)pF13-2UaXZgOKEtCHkJrNJy{e8iMy;!RxU+i5^A2kN59a zK7n4*5&>5SajK^ztH|EVVejhW=-Kfsg*R(2BIySKeG*~vDHnYYL1~xMkg|s4!q_#} zaJUu&3XRCiX_Gbi(2Txk%xJKt-bKCm4)n6vbR`Xz-JXjN!9x8lnJm|q(9w}wu0kw z)V(T#U*3cWv0gz>I!6Fii2s6-xTV|QLN>)ilPidKHh_y2c61eDUMA5Py9Vi216z++ zrM($<8%fdDl7Nj0jpoz@o85#$NiJ!E(J~R#d`>FJ88nfk>m3mwt5=UfRx5#`(aExV z&5HSm6VS4|ZbgL=_pGQzTx7ax#S$Z4zGAVATX#Qw?b5GH`M;L(m$Yb>nSyKWQrd5! z>!1jhj$AFii$>;Av}hz^Y#MlymBdyY%LaG1z;Yj$<+ zoMDY@P=O5wl>$Gk2>Xzf$Kk~s8G)YiT&c%~1`qx0Gs?vmq%k&1oM-q1War0SK#E6X zl4_@P1hR-HcPC_Op_oIJDLo`7E>|~M>ROhHp=qYxNhPZ2SqHIILE^j znV6Y11umap7By#Br=BHTFwqPr&ls3V$r!;fnX^FNFtSi+rkpdZL3-B(r^4A-MkL~e zoMHo*C0s#yD1rOILhhdu&q3UoQw9D!_!g~!C2+k^&YYI3iwfGBP2#3Y8K_NECuii# ziZ~PPLz`t7fTEc*jOb_Q172pp!6x8y3{3{&BYvM{NS(}AkWJo&YXGvT;VU`LEdQ6wTFtZH>f`p61u-*%LgMJvWIr0XAK55k?lx~3yt6pMt50<8_fSfjrPpCBokp)m*_ z_b6J|&`^YLYk8-a$`5U=YmF7nP}G7>vG$^?58(SF3DWx{DT;1x^CuF10v8)uB*cf> zqDAz!rJ^v-jN|nq%C@4)8obtpzo4Qam-KlO7bXh0aI&+}-{fkBKQva2ClX=c)NZmeHv!v+*aL7E(5*-lRYhM?$zq-MG@#&}S7hA^WJ$-Sp{a!&a z;3*aEM#L&u2DB5SWzAp;FpEYULlS~ptQouJ+4Jwc*S^@A}xV%91Wqnt=2bf`rCiA zR12tD3!5vwTAN$HuImT8EQ@n7>t*3;iOT`O{7_8)Mw^ zTdRXBqSH89T@x&EOuY6Bh6W)%e;d52=vabXa`YF7n#=U%T z5FaC3>Yd^(!JF3TYgG7fnd!q=XO{%;wNyjKim8$-?qBi|x!FAx=aSqSb(Q!kRwO#T zRJd!g@$IdDv%gBFv-=|$G5uaYMy=+g;{7-4n~FoVq`|A|nYRJi^afhmf^*=}*|Wv? zID59j&%tNl);Zx1ynl`GTqWn=ZE#c{=Q-!~1t!d7yaZ7(Vj#i$@|t|Xh<~1z^gb~z z&xmgWa*q#!`66OC3kO4sLLx)K_|?7*rutLN(^$ z1&C|{f7Hxdk5Ad*+h_7qX3Y`JOJ7&hd{OUJu>ii7#-COD3|1KRF5xwSns`;5));7N z;#t5}<#iMgUNzU|3wxVdsRp9Xvya$5)*r9Cvf|>Dog0RR0(8Tt-ejv2dAVc>YH-O6 z6r&&CsS84QE8bLy79ZqV->@M_m7<}UzSsuwDTwQ9>d{0RHqt(2^8kWq1cc_KV{LIQWkdK-ur?c=G#MT%-Vz+53 zYMgu1Cib4@rKw#j`j^jAH$x6g$0S#1_hwHJpMo*_j~j`N73=&!+~h-q3|TZo4Wo?+u#ev{BBJ#6tZ%f zGoz@eW?vm@6@9<)9!8oxKbzrfc=AZ8Q{8iBrcwZbo+&i)dU5hDSwz-Mb}`i&Sqsdp7fUX|N4MF~Dg>Rnmv zb<5G1YbEzDy>k>FX?ReQl}3CVlsrUzIF&T_8u3E!t3Hb^PSt0cvh*s;vpYc5GBHu( z7feR9Y3N&EK%>7QNS+?w8Vd&7qIg*@S;uoEq!Rppb|rAKs)O;_+o$8NERBzgE%?YQ z_xr{y=M&Y7K8Ge!ZJ`KyZLDS!^#7YHyqNVa_qUa3Ufic>e(`!yp|h{1uG+a*bb%2q zKwlokbaZoqou8OdIiW>2gnB0?FFaKDu7#pq(OhSq7{+43ezryCNob0yBTdzYO*MXA z0;9Rp=||ppGAO5XsJWucHT7F(7h$gJ#P5b4Y!kC)xD5vIEK7dSx1XMBT}KnIrl4 zJA1bId(3iG^r;q8(pqCl_1T!IZl|2U(`;Y4SjzuEEu$jlBRAmFvnRj|I>6UN;~Ei@qwFKw^+E+;#n;oO>3pzOLid# z3wMp%sc4^lReM=i;!Vq26$@kd#eIs#T{ca1LpD#iN2X$@@ro9+^y*$*SmKObKR`9M8Df($%)mBHA((Ohm#lJDPMTCwBHoSTENqF!TK#j^`LU)AG$823I0-iAxbWaH>8KcAjDVb*TL0t>8$n|u(@FxMtOPe zjkT`FO9NWHa`y!jUs2`yTj-6eQRs<0XrKS%jY=$@`f9^iaGJ?Sq6|;~`$WiYN)LF*zlo=BxVxg1JarCwAYqWjT3-Q>8nmZSMV=M|tZr-u}+! zLNt71VO26!Nf*oDPb_;BxjW@r8FFVCGvewskucTQsqjt2*x$t}+UUZm?y4b3tgvNu zhdVLN;@Mjy*3jUOM0ia53~mus70wa!5F_7bpWz~pa(7=%u==s^k*YrX@`oRZcq@5_ z7nOP=RqB@dXlP>t)$z@g?A_&>cTY5L9lFyHHjrrpv4ln>9zmkoRk08THyRF&#YDZY zVSTJk42a%HGe$oiq2kMoP%P?ATqwwyYk#{+C5NZZvwniF4z7(bPGJ=LS@o>sqkk+hlStt%Hf@lUL; zV$0gpvirlaK`Uww;Xp!t&=-jKi?A75v;h^tzsVcKNLy6zZ*6F%{fbBt+5wJV6p3#D zsYWqr5_YHg5rhGmt9ds>xggX!$#f_I1w{)pIV0=;^uzFRTZ9&zl*z;%*4SM6H2F6_ zi4mKC{TZi3Kk_M7t-@x}Fwr7Kgl36o&%waD<|tJMu^&zCGu}94GfNon(VzAn+CJ3- zQSs!5wra^&XPIa#r>m8$-1T0@o8HDUo|+ey-PB1%#8d#2Gc}Qp7m^T$yLpf`k3+q4 zH5Jn|+Jx~(@hAXK3*;+C;b=Z+|pHMBIv3WU2S)nz2?bQ&aETO@!lcCjE3F z>9XRj#Lhn9gS0mMj4k=H;ys7k`h@3b>OID09xGmIvcHc4vf{mp2PT>+Mck?l(bi7t z3!bZ~_ad9IR(wTDy$?ANp0D-wK3=Of)Ode!B0Y|!mpjcMj$854KWCqCoTTmJvKlXS zO+;EX9K7%2vWhQj^}<*+`P-=^j8!`?2_lkKP5$`Fgv9|r9M5~J#{}Xi{S4BAa@?O( zjyM#|@KZ_fkrGb7$TwdzkkH|u@zOt}kFEvy*ZU|J`jrLg-IgS0Gt>7j-#Y>C%l^Gm zj_Q#9g*08y;Gg6t{~YBd2~Yk-PPc^j`WX-4ed&DnLf8yeJ+vkn6Uu`QpA7FUA-%FE*L#J_Pn8|9<(m zEGN6ykN@c941+D_X!ayKw-Ow7&Pw*S_fAkn+`lulzg~QnZoI?J6QobjtK75Pi z7xePy_fyC@#_(*d_yVEPFBShL1^rto;PlE0%J14FdvXA$b{88Z1n-pcZ{YSN6Q zN0OaB32#e+`vG@Ko6P~exwbF(&4Bl1=ly_FzSIImu>(|wQ136*;I{bRW zj}=o3J!fgfuTHR25c0~i2#t8$5%KzBoAAU7M`UqQpwTsB`mC9K6JcfC8u0qUVc%x# zgGIxe@%#uU!LS$G(t`bRuPKGChd?xeRQ(*qdr3_R-k+9uZn|!9LtR~S9i5x5nJ&&l z)HT&DZo<=pKmbQ4!866YQ{U9Yz!~z%8y)yvupD2C_PUo;;Zcm&iw8trw?_g!Rm<@N z;bMGBy1Ej$1k-49(*%&a4GU|QR?M&QE?ux-g&U7L=U3nen${Hc2ZQD13u~$?D!o%& zQ(Slq*Z)l?>;W6kyxPK8i8u8UWo!|q+P#n*Yu0$Dx=LLov+!G%idZ1nD8KQIr2O`B z!Z(p|N_}IXwJ}j$;%Bh2@mUY<<$gLK;;;3$EUmv1&%yv&0Ov%SHLsr!P=1D0w|QI4 zIPgq68`%E9$rCzmfg)(wbC!hxv2fdDRzO-{LD2`?*1oiLK+(4U*<*+ zx*>pHF5nP7ZPF#yOgwf8%ZKFa#F>zaQxoyU!TZ^2M$p@KEVO?_4rvvz|_f< zqHV0Kt(5&*L$DG(XVkBJ(uD1})+gVq zs4nP>Hig11MAqU9;!#qfB=L+P5k@=FQ6J?9`?fI0m(=s96B1r;J)Xk}vbI<>k}L&3 zG;HuTL}J`M!yJ)y+0|VXh&ETCzrYxzFbUQhC)3Og@&Dt&7sIEjYsN8|T5o(jMw!xR zypkSm#2-I-@tj0vnub~7e;;SWD2K0T=bjO#zHiI7p0ng7z$mDn_D4IggDmBm<|4L zB!IJB<^;a+;0og)FEPkiTQI7*Xu6@f5OuA?Q&bn8c;U|F?Fxr@n05J^og;81aB@U;XQU=y02=0d~LT-3K=mIj!=KDG{&zSeb^{>fyZwJD_HzWVyGe}hT| z131hhso*6QmF(sz>t~A74x)~KHE&n@*DA!zLlQ2cHD9BVz?9zLUnvEt&}k+{9Lw~C zs{|=Ce^m0TP%bMBo@D+iVDJg|kh1?y8Bw8nQx2A=d|{fNa4kU!t760clagPB+oVD) zyVy^PAHTupYc=lJ@-X<-KD-Lmckl$YkfGufUV(r5#+inYVZb^NGxMwG4l1lu8Iv4%;SygeybUQczj{8Q zLX}_1X4tRb_ayPFeH|5!!rLXxm(r_X-$2ByU+s6P5O35;xRUe#Ey=I4sh*3d&~7+Z zh*5Scq2Ep7SIe&n25ZKfbtrace(trace_, 99); - trace_->open("trace.vcd"); + trace_->open("trace.fst"); #endif this->reset(); @@ -85,6 +85,19 @@ void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) { *ioaddr = host_buffers_[wsid].ioaddr; } +void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { + std::lock_guard guard(mutex_); + + vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; + this->step(); + vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0; + assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid); + *value = vortex_afu_->af2cp_sTxPort_c2_data; +} + void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) { std::lock_guard guard(mutex_); @@ -94,20 +107,7 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8); this->step(); - assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid); -} - -void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { - std::lock_guard guard(mutex_); - - vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1; - vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; - vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; - vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; - this->step(); - assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid); - assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid); - *value = vortex_afu_->af2cp_sTxPort_c2_data; + vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0; } void opae_sim::flush() { @@ -117,24 +117,41 @@ void opae_sim::flush() { /////////////////////////////////////////////////////////////////////////////// void opae_sim::reset() { - vortex_afu_->reset = 1; - this->step(); - vortex_afu_->reset = 0; + + host_buffers_.clear(); + dram_reads_.clear(); + cci_reads_.clear(); + cci_writes_.clear(); + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; + vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0; + vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0; + vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0; + vortex_afu_->avs_readdatavalid = 0; + vortex_afu_->avs_waitrequest = 0; + vortex_afu_->reset = 1; + + vortex_afu_->clk = 0; + this->eval(); + vortex_afu_->clk = 1; + this->eval(); + + vortex_afu_->reset = 0; + // Turn on assertion after reset Verilated::assertOn(true); } void opae_sim::step() { - vortex_afu_->clk = 0; - this->eval(); - - vortex_afu_->clk = 1; - this->eval(); this->sRxPort_bus(); this->sTxPort_bus(); this->avs_bus(); + + vortex_afu_->clk = 0; + this->eval(); + vortex_afu_->clk = 1; + this->eval(); #ifndef NDEBUG fflush(stdout); @@ -149,100 +166,105 @@ void opae_sim::eval() { ++timestamp; } -void opae_sim::sRxPort_bus() { +void opae_sim::sRxPort_bus() { + // check mmio request + bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid + || vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid; + // schedule CCI read responses - int cci_rd_index = -1; - for (int i = 0; i < cci_reads_.size(); i++) { - if (cci_reads_[i].cycles_left > 0) { - cci_reads_[i].cycles_left -= 1; - } - if ((cci_rd_index == -1) - && (cci_reads_[i].cycles_left == 0)) { - cci_rd_index = i; + std::list::iterator cci_rd_it(cci_reads_.end()); + for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) { + if (it->cycles_left > 0) + it->cycles_left -= 1; + if ((cci_rd_it == ie) && (it->cycles_left == 0)) { + cci_rd_it = it; } } // schedule CCI write responses - int cci_wr_index = -1; - for (int i = 0; i < cci_writes_.size(); i++) { - if (cci_writes_[i].cycles_left > 0) { - cci_writes_[i].cycles_left -= 1; + std::list::iterator cci_wr_it(cci_writes_.end()); + for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) { + if (it->cycles_left > 0) + it->cycles_left -= 1; + if ((cci_wr_it == ie) && (it->cycles_left == 0)) { + cci_wr_it = it; } - if ((cci_wr_index == -1) - && (cci_writes_[i].cycles_left == 0)) { - cci_wr_index = i; - } - } - - // send CCI read response - vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; - if (cci_rd_index != -1) { - vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; - memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), CACHE_BLOCK_SIZE); - vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata; - cci_reads_.erase(cci_reads_.begin() + cci_rd_index); } // send CCI write response vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0; - if (cci_wr_index != -1) { + if (cci_wr_it != cci_writes_.end()) { vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1; - vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_writes_[cci_wr_index].mdata; - cci_writes_.erase(cci_writes_.begin() + cci_wr_index); + vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata; + cci_writes_.erase(cci_wr_it); } - // mmio - vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0; - vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0; + // send CCI read response (ensure mmio disabled) + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; + if (!mmio_req_enabled + && (cci_rd_it != cci_reads_.end())) { + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; + memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE); + vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata; + printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata); + for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) { + printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]); + } + printf("\n"); + fflush(stdout); + cci_reads_.erase(cci_rd_it); + } } void opae_sim::sTxPort_bus() { - // check read queue size - vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= CCI_RQ_SIZE); - - // check write queue size - vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= CCI_WQ_SIZE); - // process read requests - if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) { + if (vortex_afu_->af2cp_sTxPort_c0_valid) { + assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull); cci_rd_req_t cci_req; cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); + cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address; cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata; auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE); memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE); - cci_reads_.push_back(cci_req); + printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata); + fflush(stdout); + cci_reads_.emplace_back(cci_req); } // process write requests - if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) { + if (vortex_afu_->af2cp_sTxPort_c1_valid) { + assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull); cci_wr_req_t cci_req; cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata; auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE); memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE); - cci_writes_.push_back(cci_req); + cci_writes_.emplace_back(cci_req); } + + // check queues overflow + vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1)); + vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1)); } void opae_sim::avs_bus() { // schedule DRAM read responses - int dram_rd_index = -1; - for (int i = 0; i < dram_reads_.size(); i++) { - if (dram_reads_[i].cycles_left > 0) { - dram_reads_[i].cycles_left -= 1; + std::list::iterator dram_rd_it(dram_reads_.end()); + for (auto it = dram_reads_.begin(), ie = dram_reads_.end(); it != ie; ++it) { + if (it->cycles_left > 0) { + it->cycles_left -= 1; } - if ((dram_rd_index == -1) - && (dram_reads_[i].cycles_left == 0)) { - dram_rd_index = i; + if ((it != ie) && (it->cycles_left == 0)) { + dram_rd_it = it; } } // send DRAM response vortex_afu_->avs_readdatavalid = 0; - if (dram_rd_index != -1) { + if (dram_rd_it != dram_reads_.end()) { vortex_afu_->avs_readdatavalid = 1; - memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), CACHE_BLOCK_SIZE); - dram_reads_.erase(dram_reads_.begin() + dram_rd_index); + memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE); + dram_reads_.erase(dram_rd_it); } // handle DRAM stalls @@ -275,7 +297,7 @@ void opae_sim::avs_bus() { dram_req.cycles_left = DRAM_LATENCY; unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE); ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data()); - dram_reads_.push_back(dram_req); + dram_reads_.emplace_back(dram_req); } } diff --git a/driver/opae/vlsim/opae_sim.h b/driver/opae/vlsim/opae_sim.h index 9a4906eb..58b57757 100644 --- a/driver/opae/vlsim/opae_sim.h +++ b/driver/opae/vlsim/opae_sim.h @@ -5,7 +5,7 @@ #include "verilated.h" #ifdef VCD_OUTPUT -#include +#include #endif #include @@ -13,7 +13,7 @@ #include #include -#include +#include #include #define CACHE_BLOCK_SIZE 64 @@ -41,18 +41,19 @@ private: typedef struct { int cycles_left; std::array block; - unsigned tag; + uint32_t tag; } dram_rd_req_t; typedef struct { int cycles_left; std::array block; - unsigned mdata; + uint64_t addr; + uint32_t mdata; } cci_rd_req_t; typedef struct { int cycles_left; - unsigned mdata; + uint32_t mdata; } cci_wr_req_t; typedef struct { @@ -76,17 +77,17 @@ private: std::unordered_map host_buffers_; - std::vector dram_reads_; + std::list dram_reads_; - std::vector cci_reads_; + std::list cci_reads_; - std::vector cci_writes_; + std::list cci_writes_; std::mutex mutex_; RAM ram_; Vvortex_afu_shim *vortex_afu_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedFstC *trace_; #endif }; \ No newline at end of file diff --git a/driver/opae/vx_scope.h b/driver/opae/vx_scope.h index edd26cb4..2bb09c4a 100644 --- a/driver/opae/vx_scope.h +++ b/driver/opae/vx_scope.h @@ -1,6 +1,6 @@ #pragma once -//#define HANG_TIMEOUT 60 +#define HANG_TIMEOUT 60 int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); diff --git a/driver/tests/dogfood/Makefile b/driver/tests/dogfood/Makefile index 69a5ec80..46cb364a 100644 --- a/driver/tests/dogfood/Makefile +++ b/driver/tests/dogfood/Makefile @@ -1,7 +1,7 @@ RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain VORTEX_RT_PATH ?= $(wildcard ../../../runtime) -OPTS ?= -n32 +OPTS ?= -n64 VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ diff --git a/hw/opae/README b/hw/opae/README index 84e08e88..e05b1df2 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -76,7 +76,7 @@ tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt # compress VCD trace tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd -tar -zcvf trace.vcd.tar.gz trace.vcd +tar -zcvf trace.fst.tar.gz trace.fst run.log tar -zcvf run.log.tar.gz run.log tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index e23c4caf..c019e54c 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -74,103 +74,103 @@ localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR; localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA; localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ; -logic [127:0] afu_id = `AFU_ACCEL_UUID; +localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE); +localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW; -typedef enum logic[3:0] { - STATE_IDLE, - STATE_READ, - STATE_WRITE, - STATE_START, - STATE_RUN, - STATE_CLFLUSH, - STATE_CSR_READ, - STATE_CSR_WRITE -} state_t; - -typedef logic [$clog2(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag; -typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data; - -state_t state; +localparam STATE_IDLE = 0; +localparam STATE_READ = 1; +localparam STATE_WRITE = 2; +localparam STATE_START = 3; +localparam STATE_RUN = 4; +localparam STATE_CLFLUSH = 5; +localparam STATE_CSR_READ = 6; +localparam STATE_CSR_WRITE = 7; +localparam STATE_MAX_VALUE = 8; +localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE); `ifdef SCOPE `SCOPE_DECL_SIGNALS `endif +wire [127:0] afu_id = `AFU_ACCEL_UUID; + +reg [STATE_WIDTH-1:0] state; + // Vortex ports /////////////////////////////////////////////////////////////// -logic vx_dram_req_valid; -logic vx_dram_req_rw; -logic [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen; -logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; -logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data; -logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; -logic vx_dram_req_ready; +wire vx_dram_req_valid; +wire vx_dram_req_rw; +wire [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen; +wire [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; +wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data; +wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; +wire vx_dram_req_ready; -logic vx_dram_rsp_valid; -logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; -logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; -logic vx_dram_rsp_ready; +wire vx_dram_rsp_valid; +wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; +wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; +wire vx_dram_rsp_ready; -logic vx_snp_req_valid; -logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; -logic vx_snp_req_invalidate = 0; -logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; -logic vx_snp_req_ready; +reg vx_snp_req_valid; +reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; +wire vx_snp_req_invalidate = 0; +reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; +wire vx_snp_req_ready; -logic vx_snp_rsp_valid; +reg vx_snp_rsp_valid; `DEBUG_BEGIN -logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; +reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; `DEBUG_END -logic vx_snp_rsp_ready; +reg vx_snp_rsp_ready; -logic vx_csr_io_req_valid; -logic [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; -logic [11:0] vx_csr_io_req_addr; -logic vx_csr_io_req_rw; -logic [31:0] vx_csr_io_req_data; -logic vx_csr_io_req_ready; +wire vx_csr_io_req_valid; +wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; +wire [11:0] vx_csr_io_req_addr; +wire vx_csr_io_req_rw; +wire [31:0] vx_csr_io_req_data; +wire vx_csr_io_req_ready; -logic vx_csr_io_rsp_valid; -logic [31:0] vx_csr_io_rsp_data; -logic vx_csr_io_rsp_ready; +wire vx_csr_io_rsp_valid; +wire [31:0] vx_csr_io_rsp_data; +wire vx_csr_io_rsp_ready; -logic vx_reset; -logic vx_busy; +reg vx_reset; +wire vx_busy; // AVS Queues ///////////////////////////////////////////////////////////////// -logic avs_rtq_push; -logic avs_rtq_pop; +wire avs_rtq_push; +wire avs_rtq_pop; `DEBUG_BEGIN -logic avs_rtq_empty; -logic avs_rtq_full; +wire avs_rtq_empty; +wire avs_rtq_full; `DEBUG_BEGIN -logic avs_rdq_push; -logic avs_rdq_pop; +wire avs_rdq_push; +wire avs_rdq_pop; t_local_mem_data avs_rdq_dout; -logic avs_rdq_empty; +wire avs_rdq_empty; `DEBUG_BEGIN -logic avs_rdq_full; +wire avs_rdq_full; `DEBUG_END // CMD variables ////////////////////////////////////////////////////////////// t_ccip_clAddr cmd_io_addr; -logic[DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; -logic[DRAM_ADDR_WIDTH-1:0] cmd_data_size; +reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; +reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size; `ifdef SCOPE -logic [63:0] cmd_scope_rdata; -logic [63:0] cmd_scope_wdata; -logic cmd_scope_read; -logic cmd_scope_write; +wire [63:0] cmd_scope_rdata; +wire [63:0] cmd_scope_wdata; +wire cmd_scope_read; +wire cmd_scope_write; `endif -logic [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; -logic [11:0] cmd_csr_addr; -logic [31:0] cmd_csr_rdata; -logic [31:0] cmd_csr_wdata; +reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; +reg [11:0] cmd_csr_addr; +reg [31:0] cmd_csr_rdata; +reg [31:0] cmd_csr_wdata; // MMIO controller //////////////////////////////////////////////////////////// @@ -193,6 +193,10 @@ assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mm `DEBUG_BEGIN wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid; wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid; +wire cp2af_sRxPort_c0_rspValid = cp2af_sRxPort.c0.rspValid; +wire cp2af_sRxPort_c1_rspValid = cp2af_sRxPort.c1.rspValid; +wire cp2af_sRxPort_c0TxAlmFull = cp2af_sRxPort.c0TxAlmFull; +wire cp2af_sRxPort_c1TxAlmFull = cp2af_sRxPort.c1TxAlmFull; wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address; wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length; wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid; @@ -212,8 +216,7 @@ initial begin end `endif -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin `ifndef VERILATOR $asserton; // enable assertions @@ -316,7 +319,7 @@ begin MMIO_STATUS: begin mmio_tx.data <= 64'(state); `ifdef DBG_PRINT_OPAE - if (state != state_t'(mmio_tx.data)) begin + if (state != STATE_WIDTH'(mmio_tx.data)) begin $display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state); end `endif @@ -349,14 +352,13 @@ end // COMMAND FSM //////////////////////////////////////////////////////////////// -logic cmd_read_done; -logic cmd_write_done; -logic cmd_clflush_done; -logic cmd_csr_done; -logic cmd_run_done; +wire cmd_read_done; +wire cmd_write_done; +wire cmd_clflush_done; +wire cmd_csr_done; +wire cmd_run_done; -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin state <= STATE_IDLE; vx_reset <= 0; @@ -479,27 +481,28 @@ end // AVS Controller ///////////////////////////////////////////////////////////// -logic vortex_enabled; -logic cci_rdq_empty; -t_cci_rdq_data cci_rdq_dout; +wire vortex_enabled; +wire cci_rdq_empty; +wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout; -logic cci_dram_rd_req_fire; -logic cci_dram_wr_req_fire; -logic vx_dram_rd_req_fire; +wire cci_dram_rd_req_fire; +wire cci_dram_wr_req_fire; +wire vx_dram_rd_req_fire; `DEBUG_BEGIN -logic vx_dram_wr_req_fire; +wire vx_dram_wr_req_fire; `DEBUG_END -logic vx_dram_rd_rsp_fire; +wire vx_dram_rd_rsp_fire; t_local_mem_byte_mask vx_dram_req_byteen_; -logic [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads, avs_pending_reads_next; -logic [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset; -logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; +reg [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads; +wire [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads_next; +wire [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; -logic cci_dram_rd_req_enable, cci_dram_wr_req_enable; -logic vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable; +wire cci_dram_rd_req_enable, cci_dram_wr_req_enable; +wire vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable; -logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr; assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state); @@ -535,11 +538,10 @@ end else begin assign vx_dram_req_byteen_ = vx_dram_req_byteen; end -always_comb -begin +always @(*) begin case (state) CMD_MEM_READ: avs_address = cci_dram_rd_req_addr; - CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout))); + CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout))); default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH]; endcase @@ -550,8 +552,8 @@ begin endcase case (state) - CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)]; - default: avs_writedata = (DRAM_LINE_WIDTH)'(vx_dram_req_data) << vx_dram_req_offset; + CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; + default: avs_writedata = DRAM_LINE_WIDTH'(vx_dram_req_data) << vx_dram_req_offset; endcase end @@ -560,8 +562,7 @@ assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable; assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size); -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin mem_bank_select <= 0; @@ -594,7 +595,7 @@ begin end if (cci_dram_wr_req_fire) begin - cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); + cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE $display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1)); @@ -654,7 +655,7 @@ VX_generic_queue #( // AVS data read response queue /////////////////////////////////////////////// -logic cci_wr_req_fire; +wire cci_wr_req_fire; assign avs_rdq_push = avs_readdatavalid; assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire; @@ -676,31 +677,37 @@ VX_generic_queue #( // CCI-P Read Request /////////////////////////////////////////////////////////// -logic [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads, cci_pending_reads_next; -logic [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr, cci_rd_req_ctr_next; +reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads; +wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr; +wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next; +wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag; +reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr; t_ccip_clAddr cci_rd_req_addr; -t_cci_rdq_tag cci_rd_rsp_ctr; -logic cci_rd_req_fire, cci_rd_rsp_fire; -logic cci_rd_req_enable, cci_rd_req_wait; +wire cci_rd_req_fire, cci_rd_rsp_fire; +reg cci_rd_req_enable, cci_rd_req_wait; -logic cci_rdq_push, cci_rdq_pop; -t_cci_rdq_data cci_rdq_din; +wire cci_rdq_push, cci_rdq_pop; +wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din; -always_comb begin +always @(*) begin af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0); af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr; - af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(t_cci_rdq_tag'(cci_rd_req_ctr)); + af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag); end assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull; assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; +assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr); +assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata); + assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0); assign cci_rdq_pop = cci_dram_wr_req_fire; assign cci_rdq_push = cci_rd_rsp_fire; -assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)}; +assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag}; assign cci_pending_reads_next = cci_pending_reads + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : @@ -709,8 +716,7 @@ assign cci_pending_reads_next = cci_pending_reads assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; // Send read requests to CCI -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin cci_rd_req_addr <= 0; cci_rd_req_ctr <= 0; @@ -738,21 +744,23 @@ begin if (cci_rd_req_fire) begin cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr_next; - if (t_cci_rdq_tag'(cci_rd_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin - cci_rd_req_wait <= 1; // end current request batch + if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin + cci_rd_req_wait <= 1; // end current request batch + $display("*** %t: CCI Rd Rsp: STOP", $time); end `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); + $display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); `endif end if (cci_rd_rsp_fire) begin - cci_rd_rsp_ctr <= cci_rd_rsp_ctr + t_cci_rdq_tag'(1); - if (cci_rd_rsp_ctr == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin - cci_rd_req_wait <= 0; // restart new request batch + cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1); + if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin + cci_rd_req_wait <= 0; // restart new request batch + $display("*** %t: CCI Rd Rsp: START", $time); end `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata), cci_rd_rsp_ctr); + $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr); `endif end @@ -763,12 +771,11 @@ begin end cci_pending_reads <= cci_pending_reads_next; - end end VX_generic_queue #( - .DATAW($bits(t_ccip_clData) + $bits(t_cci_rdq_tag)), + .DATAW(CCI_RD_RQ_DATAW), .SIZE(CCI_RD_QUEUE_SIZE) ) cci_rd_req_queue ( .clk (clk), @@ -782,14 +789,37 @@ VX_generic_queue #( `UNUSED_PIN (size) ); +`DEBUG_BEGIN +reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask; +always @(posedge clk) begin + if (reset) begin + dbg_cci_rd_rsp_mask <= 0; + end else begin + if (cci_rd_rsp_fire) begin + if (cci_rd_rsp_ctr == 0) begin + dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag); + end else begin + if (dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] != 0) begin + $display("*** %t: Assert: CCI Rd Rsp: idx=%0d, ctr=%0d, mask=%0h, meta=%0h, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, dbg_cci_rd_rsp_mask, cp2af_sRxPort.c0.hdr.mdata, cp2af_sRxPort.c0.data); + assert(0); + end + dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1; + end + end + end +end +`DEBUG_END + // CCI-P Write Request ////////////////////////////////////////////////////////// -logic [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes, cci_pending_writes_next; -logic [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; +reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; +wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; t_ccip_clAddr cci_wr_req_addr; -logic cci_wr_req_enable, cci_wr_rsp_fire; +reg cci_wr_req_enable; +wire cci_wr_rsp_fire; -always_comb begin +always @(*) begin af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0); af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr; af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode @@ -808,7 +838,7 @@ assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty; // Send write requests to CCI -always_ff @(posedge clk) +always @(posedge clk) begin if (reset) begin cci_wr_req_addr <= 0; @@ -833,7 +863,7 @@ begin cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE - $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next); + $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, avs_rdq_dout); `endif end @@ -849,12 +879,12 @@ end // Vortex cache snooping ////////////////////////////////////////////////////// -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_req_ctr_next; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr, snp_rsp_ctr_next; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr; +reg [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr_next, snp_rsp_ctr_next; -logic vx_snp_req_fire, vx_snp_rsp_fire; +wire vx_snp_req_fire, vx_snp_rsp_fire; if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin assign snp_req_baseaddr = {cmd_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)}; @@ -872,8 +902,7 @@ assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'( assign cmd_clflush_done = (0 == snp_rsp_ctr); -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin vx_snp_req_valid <= 0; vx_snp_req_addr <= 0; @@ -911,7 +940,7 @@ begin vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next); snp_req_ctr <= snp_req_ctr_next; `ifdef DBG_PRINT_OPAE - $display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next), (snp_req_size - snp_req_ctr_next)); + $display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next)); `endif end @@ -928,7 +957,7 @@ end // CSRs/////////////////////////////////////////////////////////////////////// -logic csr_io_req_sent; +reg csr_io_req_sent; assign vx_csr_io_req_valid = !csr_io_req_sent && ((STATE_CSR_READ == state || STATE_CSR_WRITE == state)); @@ -941,8 +970,7 @@ assign vx_csr_io_rsp_ready = 1; assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid; -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin csr_io_req_sent <= 0; cmd_csr_rdata <= 0; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index a7a2e0ef..9c8b19dd 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -6,11 +6,6 @@ /////////////////////////////////////////////////////////////////////////////// -// `define SYNTHESIS 1 -// `define ASIC 1 - -/////////////////////////////////////////////////////////////////////////////// - `define NW_BITS `LOG2UP(`NUM_WARPS) `define NT_BITS `LOG2UP(`NUM_THREADS) diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index f60f1964..05833b9d 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -10,131 +10,24 @@ module VX_gpr_ram ( output wire [`NUM_THREADS-1:0][31:0] rs1_data, output wire [`NUM_THREADS-1:0][31:0] rs2_data ); - `ifndef ASIC - - reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0]; - reg [`NUM_THREADS-1:0][31:0] q1, q2; - - always @(posedge clk) begin - for (integer i = 0; i < `NUM_THREADS; i++) begin - if (we[i]) begin - mem[waddr][i][0] <= wdata[i][07:00]; - mem[waddr][i][1] <= wdata[i][15:08]; - mem[waddr][i][2] <= wdata[i][23:16]; - mem[waddr][i][3] <= wdata[i][31:24]; - end - end - q1 <= mem[rs1]; - q2 <= mem[rs2]; - end - - assign rs1_data = q1; - assign rs2_data = q2; - - `else - - wire [`NUM_THREADS-1:0][31:0] write_bit_mask; + reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0]; + reg [`NUM_THREADS-1:0][31:0] q1, q2; + + always @(posedge clk) begin for (integer i = 0; i < `NUM_THREADS; i++) begin - assign write_bit_mask[i] = {32{~we[i]}}; - end - - wire cenb = 0; - wire cena_1 = 0; - wire cena_2 = 0; - - wire [`NUM_THREADS-1:0][31:0] tmp_a; - wire [`NUM_THREADS-1:0][31:0] tmp_b; - - `ifndef SYNTHESIS - for (integer i = 0; i < `NUM_THREADS; i++) begin - for (integer j = 0; j < 32; j++) begin - assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j]; - assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j]; + if (we[i]) begin + mem[waddr][i][0] <= wdata[i][07:00]; + mem[waddr][i][1] <= wdata[i][15:08]; + mem[waddr][i][2] <= wdata[i][23:16]; + mem[waddr][i][3] <= wdata[i][31:24]; end end - `else - assign rs1_data = tmp_a; - assign rs2_data = tmp_b; - `endif - for (integer i = 0; i < 'NT; i=i+4) begin - `IGNORE_WARNINGS_BEGIN - rf2_32x128_wm1 first_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(tmp_a[(i+3):(i)]), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_1), - .AA(rs1[(i+3):(i)]), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask[(i+3):(i)]), - .AB(waddr[(i+3):(i)]), - .DB(wdata[(i+3):(i)]), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); + q1 <= mem[rs1]; + q2 <= mem[rs2]; + end - rf2_`NUM_GPRSx128_wm1 second_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(tmp_b[(i+3):(i)]), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_2), - .AA(rs2[(i+3):(i)]), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask[(i+3):(i)]), - .AB(waddr[(i+3):(i)]), - .DB(wdata[(i+3):(i)]), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - `IGNORE_WARNINGS_END - end - - `endif + assign rs1_data = q1; + assign rs2_data = q2; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 1b957271..6bb52123 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -20,8 +20,8 @@ module VX_icache_stage #( ); `UNUSED_VAR (reset) - reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0]; - reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0]; + `NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0]; + `NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0]; wire icache_req_fire = icache_req_if.valid && icache_req_if.ready; diff --git a/hw/rtl/VX_ipdom_stack.v b/hw/rtl/VX_ipdom_stack.v index e00097ae..4e7d42f9 100644 --- a/hw/rtl/VX_ipdom_stack.v +++ b/hw/rtl/VX_ipdom_stack.v @@ -16,8 +16,8 @@ module VX_ipdom_stack #( ); localparam STACK_SIZE = 2 ** DEPTH; - reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; - reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; + `NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; + `NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; reg is_part [0:STACK_SIZE-1]; reg [DEPTH-1:0] rd_ptr, wr_ptr; diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index d30120dd..783743ee 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -52,7 +52,7 @@ /////////////////////////////////////////////////////////////////////////////// `define USE_FAST_BRAM (* ramstyle="mlab" *) -`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *) +`define NO_RW_RAM_CHECK (* ramstyle="mlab, no_rw_check" *) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index b5f6350d..d3e31162 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -306,9 +306,9 @@ module VX_bank #( assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped - //decides which request to deal with. Priority: 1) DRAM fill, 2) Miss reserve 3) Core req 4) Snp req - assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 : - mrvq_pop_unqual ? mrvq_addr_st0 : + //Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req + assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 : + dfpq_pop_unqual ? dfpq_addr_st0 : reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : snrq_pop_unqual ? snrq_addr_st0 : 0; diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 9f201223..f56d638e 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -56,7 +56,7 @@ module VX_cache_miss_resrv #( output wire miss_resrv_is_snp_st0, output wire miss_resrv_snp_invalidate_st0 ); - reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0]; + wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table; reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; reg [MRVQ_SIZE-1:0] valid_table; @@ -72,8 +72,8 @@ module VX_cache_miss_resrv #( assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE)); assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock - wire enqueue_possible = !miss_resrv_full; - wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; + wire enqueue_possible = !miss_resrv_full; + wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; reg [MRVQ_SIZE-1:0] make_ready; reg [MRVQ_SIZE-1:0] make_ready_push; @@ -86,11 +86,11 @@ module VX_cache_miss_resrv #( assign pending_hazard_st1 = |(valid_address_match); - wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr]; + wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr]; wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr; assign miss_resrv_valid_st0 = dequeue_possible; - assign miss_resrv_addr_st0 = addr_table[dequeue_index]; + assign miss_resrv_addr_st0 = addr_table[dequeue_index]; assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, @@ -98,7 +98,7 @@ module VX_cache_miss_resrv #( miss_resrv_byteen_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0, - miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index]; + miss_resrv_snp_invalidate_st0} = metadata_table; wire mrvq_push = miss_add && enqueue_possible && !is_mrvq; wire mrvq_pop = miss_resrv_pop && dequeue_possible; @@ -125,7 +125,6 @@ module VX_cache_miss_resrv #( valid_table[enqueue_index] <= 1; ready_table[enqueue_index] <= mrvq_init_ready_state; addr_table[enqueue_index] <= miss_add_addr; - metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}; tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); end else if (increment_head) begin valid_table[head_ptr] <= 0; @@ -155,6 +154,22 @@ module VX_cache_miss_resrv #( end end + VX_dp_ram #( + .DATAW(`MRVQ_METADATA_WIDTH), + .SIZE(MRVQ_SIZE), + .BYTEENW(1), + .BUFFERED(0), + .RWCHECK(1) + ) metadata_ram ( + .clk(clk), + .waddr(enqueue_index), + .raddr(dequeue_index), + .wren(mrvq_push), + .rden(1'b1), + .din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}), + .dout(metadata_table) + ); + `ifdef DBG_PRINT_CACHE_MSRQ always @(posedge clk) begin if (mrvq_push || mrvq_pop || increment_head || recover_state) begin diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index a1b80838..6d6d8572 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -183,15 +183,15 @@ module VX_tag_data_access #( if (valid_req_st1) begin if ((| use_write_enable)) begin if (writefill_st1) begin - $display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data); + $display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data); end else begin - $display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1); + $display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1); end end else if (miss_st1) begin - $display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1); + $display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1); end else begin - $display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); + $display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); end end end diff --git a/hw/rtl/cache/VX_tag_data_store.v b/hw/rtl/cache/VX_tag_data_store.v index c0594471..d3a022b2 100644 --- a/hw/rtl/cache/VX_tag_data_store.v +++ b/hw/rtl/cache/VX_tag_data_store.v @@ -78,7 +78,7 @@ module VX_tag_data_store #( .SIZE(`BANK_LINE_COUNT), .BYTEENW(`BANK_LINE_WORDS * WORD_SIZE), .BUFFERED(0), - .RWCHECK(0) + .RWCHECK(1) ) dp_ram ( .clk(clk), .waddr(write_addr), diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index b7d70789..01a0a167 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -6,6 +6,7 @@ module VX_dp_ram #( parameter BYTEENW = 1, parameter BUFFERED = 1, parameter RWCHECK = 1, + parameter RWBYPASS = 0, parameter ADDRW = $clog2(SIZE), parameter SIZEW = $clog2(SIZE+1) ) ( @@ -29,19 +30,46 @@ module VX_dp_ram #( if (wren[i]) mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; end - if (rden) - dout_r <= mem[raddr]; end end else begin always @(posedge clk) begin if (wren) mem[waddr] <= din; - if (rden) - dout_r <= mem[raddr]; end - end - + end + + always @(posedge clk) begin + if (rden) + dout_r <= mem[raddr]; + end + + if (RWBYPASS) begin + reg [DATAW-1:0] din_r; + wire writing; + + if (BYTEENW > 1) begin + assign writing = (| wren); + always @(posedge clk) begin + for (integer i = 0; i < BYTEENW; i++) begin + din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8]; + end + end + end else begin + assign writing = wren; + always @(posedge clk) begin + din_r <= din; + end + end + + reg bypass_r; + always @(posedge clk) begin + bypass_r <= writing && (raddr == waddr); + end + + assign dout = bypass_r ? din_r : dout_r; + end else begin assign dout = dout_r; + end end else begin @@ -65,7 +93,7 @@ module VX_dp_ram #( end end - `ifdef SYNTHESIS + if (RWBYPASS) begin reg [DATAW-1:0] din_r; wire writing; @@ -89,13 +117,13 @@ module VX_dp_ram #( end assign dout = bypass_r ? din_r : mem[raddr]; - `else + end else begin assign dout = mem[raddr]; - `endif + end end else begin - reg [DATAW-1:0] mem [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0]; if (BYTEENW > 1) begin always @(posedge clk) begin diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 68db0d4d..bb5010b7 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -85,7 +85,7 @@ module VX_generic_queue #( .DATAW(DATAW), .SIZE(SIZE), .BUFFERED(0), - .RWCHECK(0) + .RWCHECK(1) ) dp_ram ( .clk(clk), .waddr(wr_ptr_a), diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 9490d6b3..8b089259 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -36,8 +36,9 @@ module VX_scope #( localparam GET_COUNT = 3'd3; localparam GET_OFFSET = 3'd6; - reg [DATAW-1:0] data_store [SIZE-1:0]; - reg [DELTAW-1:0] delta_store [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0]; + reg [UPDW-1:0] prev_trigger_id; reg [DELTAW-1:0] delta; reg [BUSW-1:0] bus_out_r; diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index 20e7e85b..88ac722c 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -44,7 +44,7 @@ gen-s: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' gen-sd: - verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace $(DBG) + verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace-fst --trace-threads 1 $(DBG) gen-st: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS) @@ -53,7 +53,7 @@ gen-m: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' gen-md: - verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace $(DBG) + verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace-fst --trace-threads 1 $(DBG) gen-mt: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) @@ -77,11 +77,12 @@ build-mt: gen-mt (cd obj_dir && make -j -f VVortex.mk) run: run-s + run-s: build-s (cd obj_dir && ./VVortex) run-sd: build-sd - (cd obj_dir && valgrind ./VVortex) + (cd obj_dir && ./VVortex) run-st: build-st (cd obj_dir && ./VVortex) diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 60fde196..2698cc74 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -28,15 +28,11 @@ Simulator::Simulator() { ram_ = nullptr; vortex_ = new VVortex(); - dram_rsp_active_ = false; - snp_req_active_ = false; - csr_req_active_ = false; - #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC(); + trace_ = new VerilatedFstC(); vortex_->trace(trace_, 99); - trace_->open("trace.vcd"); + trace_->open("trace.fst"); #endif // reset the device @@ -66,27 +62,49 @@ void Simulator::reset() { std::cout << timestamp << ": [sim] reset()" << std::endl; #endif - vortex_->reset = 1; - this->step(); - vortex_->reset = 0; - + print_bufs_.clear(); dram_rsp_vec_.clear(); + dram_rsp_active_ = false; + snp_req_active_ = false; + csr_req_active_ = false; + + snp_req_size_ = 0; + pending_snp_reqs_ = 0; + csr_rsp_value_ = nullptr; + + vortex_->dram_rsp_valid = 0; + vortex_->dram_req_ready = 0; + vortex_->io_req_ready = 0; + vortex_->io_rsp_valid = 0; + vortex_->snp_req_valid = 0; + vortex_->snp_rsp_ready = 0; + vortex_->csr_io_req_valid = 0; + vortex_->csr_io_rsp_ready = 0; + + vortex_->reset = 1; + + vortex_->clk = 0; + this->eval(); + vortex_->clk = 1; + this->eval(); + + vortex_->reset = 0; + // Turn on assertion after reset Verilated::assertOn(true); } void Simulator::step() { - vortex_->clk = 0; - this->eval(); - - vortex_->clk = 1; - this->eval(); - this->eval_dram_bus(); this->eval_io_bus(); this->eval_csr_bus(); this->eval_snp_bus(); + + vortex_->clk = 0; + this->eval(); + vortex_->clk = 1; + this->eval(); } void Simulator::eval() { @@ -104,14 +122,13 @@ void Simulator::eval_dram_bus() { } // schedule DRAM responses - int dequeue_index = -1; - for (int i = 0; i < dram_rsp_vec_.size(); i++) { - if (dram_rsp_vec_[i].cycles_left > 0) { - dram_rsp_vec_[i].cycles_left -= 1; + std::list::iterator dram_rsp_it(dram_rsp_vec_.end()); + for (auto it = dram_rsp_vec_.begin(), ie = dram_rsp_vec_.end(); it != ie; ++it) { + if (it->cycles_left > 0) { + it->cycles_left -= 1; } - if ((dequeue_index == -1) - && (dram_rsp_vec_[i].cycles_left == 0)) { - dequeue_index = i; + if ((dram_rsp_it == ie) && (it->cycles_left == 0)) { + dram_rsp_it = it; } } @@ -122,11 +139,11 @@ void Simulator::eval_dram_bus() { dram_rsp_active_ = false; } if (!dram_rsp_active_) { - if (dequeue_index != -1) { + if (dram_rsp_it != dram_rsp_vec_.end()) { vortex_->dram_rsp_valid = 1; - memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].block.data(), GLOBAL_BLOCK_SIZE); - vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag; - dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index); + memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_it->block.data(), GLOBAL_BLOCK_SIZE); + vortex_->dram_rsp_tag = dram_rsp_it->tag; + dram_rsp_vec_.erase(dram_rsp_it); dram_rsp_active_ = true; } else { vortex_->dram_rsp_valid = 0; @@ -161,7 +178,7 @@ void Simulator::eval_dram_bus() { dram_req.cycles_left = DRAM_LATENCY; dram_req.tag = vortex_->dram_req_tag; ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data()); - dram_rsp_vec_.push_back(dram_req); + dram_rsp_vec_.emplace_back(dram_req); } } } diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index cfea9bec..0dcf8a3b 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -5,13 +5,14 @@ #include "verilated.h" #ifdef VCD_OUTPUT -#include +#include #endif #include #include "ram.h" #include +#include #include #include #include @@ -62,7 +63,7 @@ private: void eval_csr_bus(); void eval_snp_bus(); - std::vector dram_rsp_vec_; + std::list dram_rsp_vec_; bool dram_rsp_active_; bool snp_req_active_; @@ -75,6 +76,6 @@ private: RAM *ram_; VVortex *vortex_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedFstC *trace_; #endif }; \ No newline at end of file