First Commit
This commit is contained in:
1
externals/xbyak/.github/FUNDING.yml
vendored
Normal file
1
externals/xbyak/.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1 @@
|
||||
github: herumi
|
||||
21
externals/xbyak/.github/workflows/main.yml
vendored
Normal file
21
externals/xbyak/.github/workflows/main.yml
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
name: test
|
||||
on: [push]
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: sh
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: debian:testing
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- run: apt -y update
|
||||
- run: apt -y install g++-multilib libboost-dev make nasm yasm
|
||||
- run: make test
|
||||
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
|
||||
1
externals/xbyak/.gitignore
vendored
Normal file
1
externals/xbyak/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/build* # cmake
|
||||
8
externals/xbyak/Android.bp
vendored
Normal file
8
externals/xbyak/Android.bp
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
//#################################################
|
||||
cc_library_headers {
|
||||
name: "xbyak_headers",
|
||||
vendor: true,
|
||||
export_include_dirs: [
|
||||
"xbyak"
|
||||
],
|
||||
}
|
||||
55
externals/xbyak/CMakeLists.txt
vendored
Normal file
55
externals/xbyak/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
cmake_minimum_required(VERSION 2.6...3.0.2)
|
||||
|
||||
project(xbyak LANGUAGES CXX VERSION 6.68)
|
||||
|
||||
file(GLOB headers xbyak/*.h)
|
||||
|
||||
if (DEFINED CMAKE_VERSION AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.2)
|
||||
include(GNUInstallDirs)
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME} INTERFACE
|
||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
)
|
||||
|
||||
install(
|
||||
TARGETS ${PROJECT_NAME}
|
||||
EXPORT ${PROJECT_NAME}-targets
|
||||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
|
||||
)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(
|
||||
cmake/config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake"
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
|
||||
install(
|
||||
FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake"
|
||||
DESTINATION
|
||||
${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
|
||||
install(
|
||||
EXPORT ${PROJECT_NAME}-targets
|
||||
NAMESPACE ${PROJECT_NAME}::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
elseif(NOT DEFINED CMAKE_INSTALL_INCLUDEDIR)
|
||||
set(CMAKE_INSTALL_INCLUDEDIR "include")
|
||||
endif()
|
||||
|
||||
install(
|
||||
FILES ${headers}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/xbyak
|
||||
)
|
||||
27
externals/xbyak/COPYRIGHT
vendored
Normal file
27
externals/xbyak/COPYRIGHT
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
|
||||
Copyright (c) 2007 MITSUNARI Shigeo
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
Neither the name of the copyright owner nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
3
externals/xbyak/cmake/config.cmake.in
vendored
Normal file
3
externals/xbyak/cmake/config.cmake.in
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake")
|
||||
8
externals/xbyak/cmake/meson-config.cmake.in
vendored
Normal file
8
externals/xbyak/cmake/meson-config.cmake.in
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
if(NOT TARGET @TARGET_NAME@)
|
||||
add_library(@TARGET_NAME@ INTERFACE IMPORTED)
|
||||
set_target_properties(@TARGET_NAME@ PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "@ABSOLUTE_INCLUDE_DIR@"
|
||||
)
|
||||
endif()
|
||||
193
externals/xbyak/doc/changelog.md
vendored
Normal file
193
externals/xbyak/doc/changelog.md
vendored
Normal file
@ -0,0 +1,193 @@
|
||||
# History
|
||||
|
||||
* 2022/Dec/07 ver 6.68 support prefetchit{0,1}
|
||||
* 2022/Nov/30 ver 6.67 support CMPccXADD
|
||||
* 2022/Nov/25 ver 6.66 support RAO-INT
|
||||
* 2022/Nov/22 ver 6.65 consider x32
|
||||
* 2022/Nov/04 ver 6.64 some vmov* support addressing with mask
|
||||
* 2022/Oct/06 ver 6.63 vpmadd52{h,l}uq support AVX-IFMA
|
||||
* 2022/Oct/05 ver 6.63 support amx_fp16/avx_vnni_int8/avx_ne_convert and add setDefaultEncoding()
|
||||
* 2022/Aug/15 ver 6.62 add serialize instruction
|
||||
* 2022/Aug/02 ver 6.61.1 noexcept is supported by Visual Studio 2015 or later
|
||||
* 2022/Jul/29 ver 6.61 fix exception of movzx eax, ah in 64-bit mode
|
||||
* 2022/Jun/16 ver 6.60.2 fix detection of GFNI, VAES, and VPCLMULQDQ
|
||||
* 2022/Jun/15 ver 6.60.1 fix link error of Xbyak::util::Cpu on Visual Studio with /O0 option
|
||||
* 2022/Jun/06 ver 6.60 change the version format to avoid it going backward
|
||||
* 2022/Jun/01 ver 6.06 refactor Cpu::Type class and improve MmapAllocator when XBYAK_USE_MEMFD is defined.
|
||||
* 2022/Mar/20 ver 6.052 add Cpu::operator==()
|
||||
* 2022/Mar/13 ver 6.051 fix compile error when XBYAK_NO_EXCEPTION is defined
|
||||
* 2022/Mar/12 ver 6.05 add movdiri, movdir64b, clwb, cldemote
|
||||
* 2022/Apr/22 ver 6.041 consider Android and mingw
|
||||
* 2022/Apr/05 ver 6.04 add tpause, umonitor, umwait
|
||||
* 2022/Mar/08 ver 6.03 MmapAllocator supports memfd with user-defined strings.
|
||||
* 2022/Jan/28 ver 6.02 strict check the range of 32-bit dispacement
|
||||
* 2021/Dec/14 ver 6.01 support T_FAR jump/call and retf
|
||||
* 2021/Sep/14 ver 6.00 fully support AVX512-FP16
|
||||
* 2021/Sep/09 ver 5.997 fix vrndscale* to support {sae}
|
||||
* 2021/Sep/03 ver 5.996 fix v{add,sub,mul,div,max,min}{sd,ss} to support T_rd_sae.
|
||||
* 2021/Aug/15 ver 5.995 add a label to /proc/self/maps if XBYAK_USE_MEMFD is defined on Linux
|
||||
* 2021/Jun/17 ver 5.994 add alias of vcmpXX{ps,pd,ss,sd} with mask register
|
||||
* 2021/Jun/06 ver 5.993 strict check of gather/scatter register combination
|
||||
* 2021/May/09 ver 5.992 support endbr32 and endbr64
|
||||
* 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14
|
||||
* 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito)
|
||||
* 2020/Oct/17 ver 5.98 support the form of [scale * reg]
|
||||
* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc.
|
||||
* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later
|
||||
* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`.
|
||||
* 2020/Jul/28 ver 5.94 remove #include <winsock2.h> (only windows)
|
||||
* 2020/Jul/21 ver 5.93 support exception-less mode
|
||||
* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov)
|
||||
* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64
|
||||
* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso)
|
||||
* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined.
|
||||
* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask)
|
||||
* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register
|
||||
* 2020/Feb/26 ver 5.891 fix typo of type
|
||||
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
|
||||
* 2019/Dec/20 ver 5.88 fix compile error on Windows
|
||||
* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified.
|
||||
* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available)
|
||||
* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later
|
||||
* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined
|
||||
* 2019/Oct/12 ver 5.83 exit(1) was removed
|
||||
* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam)
|
||||
* 2019/Sep/14 ver 5.81 support some generic mnemonics.
|
||||
* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov)
|
||||
* 2019/May/27 support vp2intersectd, vp2intersectq (not tested)
|
||||
* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
|
||||
* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky)
|
||||
* 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage)
|
||||
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
|
||||
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
|
||||
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
|
||||
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
|
||||
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
|
||||
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
|
||||
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
|
||||
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
* 2018/Sep/04 ver 5.71 L() returns a new label instance
|
||||
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
|
||||
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
|
||||
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
||||
* 2018/Jul/26 ver 5.661 support mingw64
|
||||
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
||||
* 2018/Jun/26 ver 5.65 fix push(qword [mem])
|
||||
* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
|
||||
* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
|
||||
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
|
||||
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
||||
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
||||
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
|
||||
* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage)
|
||||
* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen)
|
||||
* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan)
|
||||
* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan)
|
||||
* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso)
|
||||
* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio
|
||||
* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar)
|
||||
* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed)
|
||||
* 2017/May/13 ver 5.42 add movs{b,w,d,q}
|
||||
* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso)
|
||||
* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label
|
||||
* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso)
|
||||
* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N
|
||||
* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro)
|
||||
* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW
|
||||
* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso)
|
||||
* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso)
|
||||
* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38)
|
||||
* 2016/Nov/20 ver 5.10 add addressing [rip+&var]
|
||||
* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio)
|
||||
* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h
|
||||
* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4
|
||||
* 2016/Aug/03 ver 5.01 disable omitted operand
|
||||
* 2016/Jun/24 ver 5.00 support avx-512 instruction set
|
||||
* 2016/Jun/13 avx-512 add mask instructions
|
||||
* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu
|
||||
* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp)
|
||||
* 2016/Feb/04 ver 4.90 add jcc(const void *addr);
|
||||
* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell)
|
||||
* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere)
|
||||
* 2015/Oct/05 ver 4.87 support segment selectors
|
||||
* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere)
|
||||
* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen)
|
||||
* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff)
|
||||
* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik)
|
||||
* 2015/May/24 ver 4.82 support detection of F16C
|
||||
* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere)
|
||||
* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere)
|
||||
* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac
|
||||
* 2014/Oct/14 ver 4.70 support MmapAllocator
|
||||
* 2014/Jun/13 ver 4.62 disable warning of VC2014
|
||||
* 2014/May/30 ver 4.61 support bt, bts, btr, btc
|
||||
* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph
|
||||
* 2014/Apr/11 ver 4.52 add detection of rdrand
|
||||
* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels
|
||||
* 2014/Mar/16 ver 4.50 support new Label
|
||||
* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox
|
||||
* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64()
|
||||
* 2013/Oct/16 ver 4.21 label support std::string
|
||||
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
|
||||
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
|
||||
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
|
||||
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
||||
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
|
||||
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
|
||||
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
|
||||
* 2013/Jan/15 ver 3.75 add setSize() to modify generated code
|
||||
* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect()
|
||||
* 2013/Jan/06 ver 3.73 use unordered_map if possible
|
||||
* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const.
|
||||
* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined.
|
||||
* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util.
|
||||
* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias)
|
||||
* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit
|
||||
* 2012/Nov/01 ver 3.61 add fldcw/fstcw
|
||||
* 2012/May/03 ver 3.60 change interface of Allocator
|
||||
* 2012/Mar/23 ver 3.51 fix userPtr mode
|
||||
* 2012/Mar/19 ver 3.50 support AutoGrow mode
|
||||
* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd
|
||||
* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat)
|
||||
* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya)
|
||||
* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc
|
||||
* 2011/May/24 ver 3.01 fix typo of OSXSAVE
|
||||
* 2011/May/23 ver 3.00 add vcmpeqps and so on
|
||||
* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
|
||||
* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
|
||||
* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm
|
||||
* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
|
||||
* 2011/Feb/04 ver 2.99 beta support AVX
|
||||
* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
|
||||
* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||
* 2010/Jun/07 ver 2.29 fix call(<label>)
|
||||
* 2010/Jun/17 ver 2.28 move some member functions to public
|
||||
* 2010/Jun/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
|
||||
* 2010/May/24 ver 2.26 fix sub(rsp, 1000)
|
||||
* 2010/Apr/26 ver 2.25 add jc/jnc(I forgot to implement them...)
|
||||
* 2010/Apr/16 ver 2.24 change the prototype of rewrite() method
|
||||
* 2010/Apr/15 ver 2.23 fix align() and xbyak_util.h for Mac
|
||||
* 2010/Feb/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
|
||||
* 2009/Dec/09 ver 2.21 support cygwin(gcc 4.3.2)
|
||||
* 2009/Nov/28 support a part of FPU
|
||||
* 2009/Jun/25 fix mov(qword[rax], imm); (thanks to Martin)
|
||||
* 2009/Mar/10 fix redundant REX.W prefix on jmp/call reg64
|
||||
* 2009/Feb/24 add movq reg64, mmx/xmm; movq mmx/xmm, reg64
|
||||
* 2009/Feb/13 movd(xmm7, dword[eax]) drops 0x66 prefix (thanks to Gabest)
|
||||
* 2008/Dec/30 fix call in short relative address(thanks to kato san)
|
||||
* 2008/Sep/18 support @@, @f, @b and localization of label(thanks to nobu-q san)
|
||||
* 2008/Sep/18 support (ptr[rip + 32bit offset]) (thanks to Dango-Chu san)
|
||||
* 2008/Jun/03 fix align(). mov(ptr[eax],1) throws ERR_MEM_SIZE_IS_NOT_SPECIFIED.
|
||||
* 2008/Jun/02 support memory interface allocated by user
|
||||
* 2008/May/26 fix protect() to avoid invalid setting(thanks to shinichiro_h san)
|
||||
* 2008/Apr/30 add cmpxchg16b, cdqe
|
||||
* 2008/Apr/29 support x64
|
||||
* 2008/Apr/14 code refactoring
|
||||
* 2008/Mar/12 add bsr/bsf
|
||||
* 2008/Feb/14 fix output of sub eax, 1234 (thanks to Robert)
|
||||
* 2007/Nov/5 support lock, xadd, xchg
|
||||
* 2007/Nov/2 support SSSE3/SSE4 (thanks to Dango-Chu san)
|
||||
* 2007/Feb/4 fix the bug that exception doesn't occur under the condition which the offset of jmp mnemonic without T_NEAR is over 127.
|
||||
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
|
||||
* 2007/Jan/4 first version
|
||||
26
externals/xbyak/doc/install.md
vendored
Normal file
26
externals/xbyak/doc/install.md
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
# Install
|
||||
|
||||
The following files are necessary. Please add the path to your compile directory.
|
||||
|
||||
* xbyak.h
|
||||
* xbyak_mnemonic.h
|
||||
* xbyak_util.h
|
||||
|
||||
Linux:
|
||||
```
|
||||
make install
|
||||
```
|
||||
|
||||
These files are copied into `/usr/local/include/xbyak`.
|
||||
|
||||
# Building xbyak - Using vcpkg
|
||||
|
||||
You can download and install xbyak using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
|
||||
|
||||
git clone https://github.com/Microsoft/vcpkg.git
|
||||
cd vcpkg
|
||||
./bootstrap-vcpkg.sh
|
||||
./vcpkg integrate install
|
||||
./vcpkg install xbyak
|
||||
|
||||
The xbyak port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
|
||||
417
externals/xbyak/doc/usage.md
vendored
Normal file
417
externals/xbyak/doc/usage.md
vendored
Normal file
@ -0,0 +1,417 @@
|
||||
# Usage
|
||||
|
||||
Inherit `Xbyak::CodeGenerator` class and make the class method.
|
||||
```
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int x)
|
||||
{
|
||||
mov(eax, x);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
```
|
||||
Or you can pass the instance of CodeGenerator without inheriting.
|
||||
```
|
||||
void genCode(Xbyak::CodeGenerator& code, int x) {
|
||||
using namespace Xbyak::util;
|
||||
code.mov(eax, x);
|
||||
code.ret();
|
||||
}
|
||||
```
|
||||
|
||||
Make an instance of the class and get the function
|
||||
pointer by calling `getCode()` and call it.
|
||||
```
|
||||
Code c(5);
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("ret=%d\n", f()); // ret = 5
|
||||
```
|
||||
|
||||
## Syntax
|
||||
Similar to MASM/NASM syntax with parentheses.
|
||||
|
||||
```
|
||||
NASM Xbyak
|
||||
mov eax, ebx --> mov(eax, ebx);
|
||||
inc ecx inc(ecx);
|
||||
ret --> ret();
|
||||
```
|
||||
|
||||
## Addressing
|
||||
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
|
||||
otherwise use `ptr`.
|
||||
|
||||
```
|
||||
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||
[rip + 32bit disp] ; x64 only
|
||||
|
||||
NASM Xbyak
|
||||
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
|
||||
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
|
||||
test byte [esp], 4 --> test(byte [esp], 4);
|
||||
inc qword [rax] --> inc(qword [rax]);
|
||||
```
|
||||
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
|
||||
|
||||
### How to use Selector (Segment Register)
|
||||
```
|
||||
mov eax, [fs:eax] --> putSeg(fs);
|
||||
mov(eax, ptr [eax]);
|
||||
mov ax, cs --> mov(ax, cs);
|
||||
```
|
||||
**Note**: Segment class is not derived from `Operand`.
|
||||
|
||||
## AVX
|
||||
|
||||
```
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
||||
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
||||
```
|
||||
|
||||
**Note**:
|
||||
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
|
||||
But the newer version will not support it.
|
||||
```
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
```
|
||||
|
||||
## AVX-512
|
||||
|
||||
```
|
||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
|
||||
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
|
||||
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
|
||||
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
|
||||
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
|
||||
|
||||
vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]);
|
||||
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
|
||||
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
|
||||
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
|
||||
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
|
||||
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
|
||||
|
||||
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
|
||||
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
|
||||
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
|
||||
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
|
||||
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
|
||||
|
||||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
|
||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||
setDefaultEncoding(VexEncoding); // default encoding is VEX
|
||||
vpdpbusd(xm0, xm1, xm2); // VEX encoding
|
||||
```
|
||||
|
||||
- setDefaultEncoding(PreferredEncoding encoding);
|
||||
- Set the default encoding to select EVEX or VEX.
|
||||
- The default value is EvexEncoding.
|
||||
- This function affects only an instruction that has a PreferredEncoding argument such as vpdpbusd.
|
||||
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
- `k0` is dealt as no mask.
|
||||
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
|
||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||
* `k4 | k3` is different from `k3 | k4`.
|
||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
||||
|
||||
## Label
|
||||
Two kinds of Label are supported. (String literal and Label class).
|
||||
|
||||
### String literal
|
||||
```
|
||||
L("L1");
|
||||
jmp("L1");
|
||||
|
||||
jmp("L2");
|
||||
...
|
||||
a few mnemonics (8-bit displacement jmp)
|
||||
...
|
||||
L("L2");
|
||||
|
||||
jmp("L3", T_NEAR);
|
||||
...
|
||||
a lot of mnemonics (32-bit displacement jmp)
|
||||
...
|
||||
L("L3");
|
||||
```
|
||||
|
||||
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
|
||||
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
|
||||
|
||||
### Support `@@`, `@f`, `@b` like MASM
|
||||
|
||||
```
|
||||
L("@@"); // <A>
|
||||
jmp("@b"); // jmp to <A>
|
||||
jmp("@f"); // jmp to <B>
|
||||
L("@@"); // <B>
|
||||
jmp("@b"); // jmp to <B>
|
||||
mov(eax, "@b");
|
||||
jmp(eax); // jmp to <B>
|
||||
```
|
||||
|
||||
### Local label
|
||||
|
||||
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
|
||||
are treated as a local label.
|
||||
`inLocalLabel()` and `outLocalLabel()` can be nested.
|
||||
|
||||
```
|
||||
void func1()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <A> ; local label
|
||||
...
|
||||
jmp(".lp"); // jmp to <A>
|
||||
L("aaa"); // global label <C>
|
||||
outLocalLabel();
|
||||
|
||||
inLocalLabel();
|
||||
L(".lp"); // <B> ; local label
|
||||
func1();
|
||||
jmp(".lp"); // jmp to <B>
|
||||
inLocalLabel();
|
||||
jmp("aaa"); // jmp to <C>
|
||||
}
|
||||
```
|
||||
|
||||
### short and long jump
|
||||
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
|
||||
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.
|
||||
|
||||
```
|
||||
jmp("short-jmp"); // short jmp
|
||||
// small code
|
||||
L("short-jmp");
|
||||
|
||||
jmp("long-jmp");
|
||||
// long code
|
||||
L("long-jmp"); // throw exception
|
||||
```
|
||||
Then specify T_NEAR for jmp.
|
||||
```
|
||||
jmp("long-jmp", T_NEAR); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
```
|
||||
Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
|
||||
```
|
||||
jmp("long-jmp"); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
```
|
||||
|
||||
### Label class
|
||||
|
||||
`L()` and `jxx()` support Label class.
|
||||
|
||||
```
|
||||
Xbyak::Label label1, label2;
|
||||
L(label1);
|
||||
...
|
||||
jmp(label1);
|
||||
...
|
||||
jmp(label2);
|
||||
...
|
||||
L(label2);
|
||||
```
|
||||
|
||||
Use `putL` for jmp table
|
||||
```
|
||||
Label labelTbl, L0, L1, L2;
|
||||
mov(rax, labelTbl);
|
||||
// rdx is an index of jump table
|
||||
jmp(ptr [rax + rdx * sizeof(void*)]);
|
||||
L(labelTbl);
|
||||
putL(L0);
|
||||
putL(L1);
|
||||
putL(L2);
|
||||
L(L0);
|
||||
....
|
||||
L(L1);
|
||||
....
|
||||
```
|
||||
|
||||
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
|
||||
|
||||
```
|
||||
Label label2;
|
||||
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
|
||||
...
|
||||
jmp(label2); // label2 is not determined here
|
||||
...
|
||||
assignL(label2, label1); // label2 <- label1
|
||||
```
|
||||
The `jmp` in the above code jumps to label1 assigned by `assignL`.
|
||||
|
||||
**Note**:
|
||||
* srcLabel must be used in `L()`.
|
||||
* dstLabel must not be used in `L()`.
|
||||
|
||||
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
|
||||
```
|
||||
// not AutoGrow mode
|
||||
Label label;
|
||||
assert(label.getAddress() == 0);
|
||||
L(label);
|
||||
assert(label.getAddress() == getCurr());
|
||||
```
|
||||
|
||||
### Rip ; relative addressing
|
||||
```
|
||||
Label label;
|
||||
mov(eax, ptr [rip + label]); // eax = 4
|
||||
...
|
||||
|
||||
L(label);
|
||||
dd(4);
|
||||
```
|
||||
```
|
||||
int x;
|
||||
...
|
||||
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
||||
```
|
||||
|
||||
## Far jump
|
||||
|
||||
Use `word|dword|qword` instead of `ptr` to specify the address size.
|
||||
|
||||
### 32 bit mode
|
||||
```
|
||||
jmp(word[eax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
|
||||
```
|
||||
|
||||
### 64 bit mode
|
||||
```
|
||||
jmp(word[rax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
|
||||
jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
|
||||
```
|
||||
The same applies to `call`.
|
||||
|
||||
## Code size
|
||||
The default max code size is 4096 bytes.
|
||||
Specify the size in constructor of `CodeGenerator()` if necessary.
|
||||
|
||||
```
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Quantize()
|
||||
: CodeGenerator(8192)
|
||||
{
|
||||
}
|
||||
...
|
||||
};
|
||||
```
|
||||
|
||||
## User allocated memory
|
||||
|
||||
You can make jit code on prepared memory.
|
||||
|
||||
Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.
|
||||
|
||||
```
|
||||
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
|
||||
{
|
||||
mov(rax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
Code c;
|
||||
c.setProtectModeRE(); // set memory to Read/Exec
|
||||
printf("%d\n", c.getCode<int(*)()>()());
|
||||
}
|
||||
```
|
||||
|
||||
**Note**: See [../sample/test0.cpp](../sample/test0.cpp).
|
||||
|
||||
### AutoGrow
|
||||
|
||||
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
|
||||
|
||||
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
||||
{
|
||||
...
|
||||
}
|
||||
};
|
||||
Code c;
|
||||
// generate code for jit
|
||||
c.ready(); // mode = Read/Write/Exec
|
||||
```
|
||||
|
||||
**Note**:
|
||||
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
|
||||
|
||||
### Read/Exec mode
|
||||
Xbyak set Read/Write/Exec mode to memory to run jit code.
|
||||
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
|
||||
call `setProtectModeRE()` after generating jit code.
|
||||
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.setProtectModeRE();
|
||||
...
|
||||
|
||||
```
|
||||
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
|
||||
See [protect-re.cpp](../sample/protect-re.cpp).
|
||||
|
||||
## Exception-less mode
|
||||
If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`.
|
||||
In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong.
|
||||
The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`.
|
||||
`CodeGenerator::reset()` calls `ClearError()`.
|
||||
|
||||
## Macro
|
||||
|
||||
* **XBYAK32** is defined on 32bit.
|
||||
* **XBYAK64** is defined on 64bit.
|
||||
* **XBYAK64_WIN** is defined on 64bit Windows(VC).
|
||||
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin.
|
||||
* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, ....
|
||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future).
|
||||
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro.
|
||||
* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`.
|
||||
* define **XBYAK_USE_MEMFD** on Linux then /proc/self/maps shows the area used by xbyak.
|
||||
* define **XBYAK_OLD_DISP_CHECK** if the old disp check is necessary (deprecated in the future).
|
||||
|
||||
## Sample
|
||||
|
||||
* [test0.cpp](../sample/test0.cpp) ; tiny sample (x86, x64)
|
||||
* [quantize.cpp](../sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
|
||||
* [calc.cpp](../sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
|
||||
* [bf.cpp](../sample/bf.cpp) ; JIT brainfuck (x86, x64)
|
||||
190
externals/xbyak/gen/avx_type.hpp
vendored
Normal file
190
externals/xbyak/gen/avx_type.hpp
vendored
Normal file
@ -0,0 +1,190 @@
|
||||
#include <assert.h>
|
||||
// copy CodeGenerator::AVXtype
|
||||
enum AVXtype {
|
||||
// low 3 bit
|
||||
T_N1 = 1,
|
||||
T_N2 = 2,
|
||||
T_N4 = 3,
|
||||
T_N8 = 4,
|
||||
T_N16 = 5,
|
||||
T_N32 = 6,
|
||||
T_NX_MASK = 7,
|
||||
//
|
||||
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
|
||||
T_DUP = 1 << 4, // N = (8, 32, 64)
|
||||
T_66 = 1 << 5, // pp = 1
|
||||
T_F3 = 1 << 6, // pp = 2
|
||||
T_F2 = T_66 | T_F3, // pp = 3
|
||||
T_ER_R = 1 << 7, // reg{er}
|
||||
T_0F = 1 << 8,
|
||||
T_0F38 = 1 << 9,
|
||||
T_0F3A = 1 << 10,
|
||||
T_L0 = 1 << 11,
|
||||
T_L1 = 1 << 12,
|
||||
T_W0 = 1 << 13,
|
||||
T_W1 = 1 << 14,
|
||||
T_EW0 = 1 << 15,
|
||||
T_EW1 = 1 << 16,
|
||||
T_YMM = 1 << 17, // support YMM, ZMM
|
||||
T_EVEX = 1 << 18,
|
||||
T_ER_X = 1 << 19, // xmm{er}
|
||||
T_ER_Y = 1 << 20, // ymm{er}
|
||||
T_ER_Z = 1 << 21, // zmm{er}
|
||||
T_SAE_X = 1 << 22, // xmm{sae}
|
||||
T_SAE_Y = 1 << 23, // ymm{sae}
|
||||
T_SAE_Z = 1 << 24, // zmm{sae}
|
||||
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
||||
T_B32 = 1 << 26, // m32bcst
|
||||
T_B64 = 1 << 27, // m64bcst
|
||||
T_B16 = T_B32 | T_B64, // m16bcst
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
T_FP16 = 1 << 31,
|
||||
T_MAP5 = T_FP16 | T_0F,
|
||||
T_MAP6 = T_FP16 | T_0F38,
|
||||
T_XXX
|
||||
};
|
||||
// T_66 = 1, T_F3 = 2, T_F2 = 3
|
||||
uint32_t getPP(int type) { return (type >> 5) & 3; }
|
||||
|
||||
|
||||
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
|
||||
|
||||
std::string type2String(int type)
|
||||
{
|
||||
std::string str;
|
||||
int low = type & T_NX_MASK;
|
||||
if (0 < low) {
|
||||
const char *tbl[8] = {
|
||||
"T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32"
|
||||
};
|
||||
assert(low < int(sizeof(tbl) / sizeof(tbl[0])));
|
||||
str = tbl[low - 1];
|
||||
}
|
||||
if (type & T_N_VL) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_N_VL";
|
||||
}
|
||||
if (type & T_DUP) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_DUP";
|
||||
}
|
||||
if (type & T_F2) {
|
||||
if (!str.empty()) str += " | ";
|
||||
switch (type & T_F2) {
|
||||
case T_66: str += "T_66"; break;
|
||||
case T_F3: str += "T_F3"; break;
|
||||
case T_F2: str += "T_F2"; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
if (type & T_0F) {
|
||||
if (!str.empty()) str += " | ";
|
||||
if (type & T_FP16) {
|
||||
str += "T_MAP5";
|
||||
} else {
|
||||
str += "T_0F";
|
||||
}
|
||||
}
|
||||
if (type & T_0F38) {
|
||||
if (!str.empty()) str += " | ";
|
||||
if (type & T_FP16) {
|
||||
str += "T_MAP6";
|
||||
} else {
|
||||
str += "T_0F38";
|
||||
}
|
||||
}
|
||||
if (type & T_0F3A) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_0F3A";
|
||||
}
|
||||
if (type & T_L0) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "VEZ_L0";
|
||||
}
|
||||
if (type & T_L1) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "VEZ_L1";
|
||||
}
|
||||
if (type & T_W0) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_W0";
|
||||
}
|
||||
if (type & T_W1) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_W1";
|
||||
}
|
||||
if (type & T_EW0) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_EW0";
|
||||
}
|
||||
if (type & T_EW1) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_EW1";
|
||||
}
|
||||
if (type & T_YMM) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_YMM";
|
||||
}
|
||||
if (type & T_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_EVEX";
|
||||
}
|
||||
if (type & T_ER_X) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_X";
|
||||
}
|
||||
if (type & T_ER_Y) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_Y";
|
||||
}
|
||||
if (type & T_ER_Z) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_Z";
|
||||
}
|
||||
if (type & T_ER_R) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_R";
|
||||
}
|
||||
if (type & T_SAE_X) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_X";
|
||||
}
|
||||
if (type & T_SAE_Y) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_Y";
|
||||
}
|
||||
if (type & T_SAE_Z) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_Z";
|
||||
}
|
||||
if (type & T_MUST_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_MUST_EVEX";
|
||||
}
|
||||
if (type & T_B32) {
|
||||
if (!str.empty()) str += " | ";
|
||||
if (type & T_B64) {
|
||||
str += "T_B16"; // T_B16 = T_B32 | T_B64
|
||||
} else {
|
||||
str += "T_B32";
|
||||
}
|
||||
} else if (type & T_B64) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_B64";
|
||||
}
|
||||
if (type & T_M_K) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_M_K";
|
||||
}
|
||||
if (type & T_VSIB) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_VSIB";
|
||||
}
|
||||
if (type & T_MEM_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_MEM_EVEX";
|
||||
}
|
||||
return str;
|
||||
}
|
||||
17
externals/xbyak/gen/b2hex.cpp
vendored
Normal file
17
externals/xbyak/gen/b2hex.cpp
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("enum {");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
printf(" B");
|
||||
for (int j = 0; j < 8; j++) {
|
||||
putchar(i & (1 << (7 - j)) ? '1' : '0');
|
||||
}
|
||||
printf("= %d", i);
|
||||
if (i < 255) putchar(',');
|
||||
putchar('\n');
|
||||
}
|
||||
puts("};");
|
||||
return 0;
|
||||
}
|
||||
987
externals/xbyak/gen/gen_avx512.cpp
vendored
Normal file
987
externals/xbyak/gen/gen_avx512.cpp
vendored
Normal file
@ -0,0 +1,987 @@
|
||||
#define XBYAK_DONT_READ_LIST
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../xbyak/xbyak.h"
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
using namespace Xbyak;
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4996) // scanf
|
||||
#define snprintf _snprintf_s
|
||||
#endif
|
||||
|
||||
#include "avx_type.hpp"
|
||||
|
||||
void putOpmask(bool only64bit)
|
||||
{
|
||||
if (only64bit) {
|
||||
puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }");
|
||||
puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }");
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "kadd", 0x4A },
|
||||
{ "kand", 0x41 },
|
||||
{ "kandn", 0x42 },
|
||||
{ "kor", 0x45 },
|
||||
{ "kxnor", 0x46 },
|
||||
{ "kxor", 0x47 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
printf("void %sw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code);
|
||||
}
|
||||
printf("void kunpckbw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); }\n");
|
||||
printf("void kunpckwd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4B); }\n");
|
||||
printf("void kunpckdq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4B); }\n");
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "knot", 0x44 },
|
||||
{ "kortest", 0x98 },
|
||||
{ "ktest", 0x99 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
printf("void %sw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code);
|
||||
}
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "kshiftl", 0x32 },
|
||||
{ "kshiftr", 0x30 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
printf("void %sw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code);
|
||||
printf("void %sq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1);
|
||||
printf("void %sb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code);
|
||||
printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1);
|
||||
}
|
||||
}
|
||||
puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }");
|
||||
puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }");
|
||||
puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }");
|
||||
puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }");
|
||||
|
||||
puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }");
|
||||
puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }");
|
||||
puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }");
|
||||
puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }");
|
||||
|
||||
puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }");
|
||||
puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }");
|
||||
puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }");
|
||||
puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }");
|
||||
puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }");
|
||||
puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }");
|
||||
}
|
||||
|
||||
// vcmppd(k, x, op)
|
||||
void putVcmp()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
{ 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true },
|
||||
{ 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true },
|
||||
{ 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true },
|
||||
{ 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true },
|
||||
{ 0xC2, "vcmpph", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B16, true },
|
||||
{ 0xC2, "vcmpsh", T_F3 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||
|
||||
{ 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
{ 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
{ 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32, false },
|
||||
{ 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
{ 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
{ 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
|
||||
{ 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
|
||||
|
||||
{ 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
|
||||
{ 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
|
||||
{ 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
|
||||
{ 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
|
||||
{ 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
|
||||
{ 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
|
||||
|
||||
{ 0x26, "vptestmb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
|
||||
{ 0x26, "vptestmw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0x27, "vptestmd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0x27, "vptestmq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x26, "vptestnmb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
|
||||
{ 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }");
|
||||
puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }");
|
||||
}
|
||||
|
||||
void putVcmpAlias()
|
||||
{
|
||||
const char pred[32][16] = {
|
||||
"eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
|
||||
"eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
|
||||
"true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
|
||||
"eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
|
||||
};
|
||||
const char suf[][4] = { "pd", "ps", "sd", "ss" };
|
||||
for (int i = 0; i < 4; i++) {
|
||||
const char *s = suf[i];
|
||||
for (int j = 0; j < 32; j++) {
|
||||
printf("void vcmp%s%s(const Opmask& k, const Xmm& x, const Operand& op) { vcmp%s(k, x, op, %d); }\n", pred[j], s, s, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// XM_X
|
||||
void putX_XM()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
|
||||
// putCvt
|
||||
{ 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
|
||||
{ 0x79, "vcvtps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_ER_Z },
|
||||
{ 0xE6, "vcvtqq2pd", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
|
||||
{ 0x7A, "vcvttpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
|
||||
{ 0x78, "vcvttpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
|
||||
{ 0x78, "vcvttps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
|
||||
{ 0x7A, "vcvtudq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z },
|
||||
{ 0x7A, "vcvtuqq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z },
|
||||
|
||||
{ 0x88, "vexpandpd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
{ 0x88, "vexpandps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
|
||||
{ 0x89, "vpexpandd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
{ 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
{ 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
|
||||
{ 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
|
||||
{ 0x42, "vgetexpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||
|
||||
{ 0x7D, "vcvtph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
{ 0x7D, "vcvtph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
{ 0x7C, "vcvttph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||
{ 0x7C, "vcvttph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||
{ 0x7D, "vcvtuw2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
{ 0x7D, "vcvtw2ph", T_F3 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
}
|
||||
puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }");
|
||||
|
||||
puts("void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }");
|
||||
puts("void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }");
|
||||
puts("void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }");
|
||||
puts("void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }");
|
||||
|
||||
puts("void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }");
|
||||
puts("void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }");
|
||||
}
|
||||
|
||||
void putM_X()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K },
|
||||
{ 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
|
||||
void putXM_X()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
{ 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
|
||||
{ 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
{ 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
|
||||
{ 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 },
|
||||
{ 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
|
||||
void putX_X_XM_IMM()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
{ 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true },
|
||||
{ 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM, true },
|
||||
{ 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
|
||||
{ 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
|
||||
{ 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
|
||||
{ 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
|
||||
{ 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0xE2, "vpsraq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_N16, false },
|
||||
{ 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
|
||||
{ 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
|
||||
{ 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
|
||||
{ 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
|
||||
{ 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x7D, "vpermt2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
|
||||
{ 0x7D, "vpermt2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
|
||||
{ 0x7E, "vpermt2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x75, "vpermi2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
|
||||
{ 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
|
||||
{ 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
|
||||
{ 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
|
||||
|
||||
{ 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false },
|
||||
{ 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false },
|
||||
{ 0x43, "vgetexpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||
{ 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||
{ 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||
{ 0x27, "vgetmantsh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||
|
||||
{ 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x55, "vfixupimmsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N8, true },
|
||||
{ 0x55, "vfixupimmss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N4, true },
|
||||
|
||||
{ 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false },
|
||||
{ 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false },
|
||||
|
||||
{ 0x4D, "vrcpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false },
|
||||
|
||||
{ 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false },
|
||||
{ 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false },
|
||||
|
||||
{ 0x4F, "vrsqrtsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false },
|
||||
{ 0x51, "vsqrtsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false },
|
||||
|
||||
{ 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, true },
|
||||
{ 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, true },
|
||||
{ 0x0A, "vrndscalesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, true },
|
||||
|
||||
{ 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false },
|
||||
{ 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false },
|
||||
{ 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
|
||||
{ 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
|
||||
|
||||
{ 0x2C, "vscalefph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Z, false },
|
||||
{ 0x2D, "vscalefsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false },
|
||||
|
||||
{ 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true },
|
||||
{ 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x15, "vprolvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x15, "vprolvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x14, "vprorvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x14, "vprorvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0xCB, "vrcp28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false },
|
||||
{ 0xCB, "vrcp28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false },
|
||||
|
||||
{ 0xCD, "vrsqrt28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false },
|
||||
{ 0xCD, "vrsqrt28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false },
|
||||
|
||||
{ 0x50, "vrangepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x50, "vrangeps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x51, "vrangesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||
{ 0x51, "vrangess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||
|
||||
{ 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||
{ 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||
|
||||
{ 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
||||
{ 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
||||
{ 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
||||
|
||||
{ 0x70, "vpshldvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
{ 0x71, "vpshldvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x71, "vpshldvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
|
||||
|
||||
{ 0x72, "vpshrdw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
||||
{ 0x73, "vpshrdd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
||||
{ 0x73, "vpshrdq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
||||
|
||||
{ 0x72, "vpshrdvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
{ 0x73, "vpshrdvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x73, "vpshrdvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
|
||||
|
||||
{ 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
|
||||
{ 0x5A, "vcvtsd2sh", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
|
||||
{ 0x5A, "vcvtsh2sd", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||
{ 0x13, "vcvtsh2ss", T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||
{ 0x1D, "vcvtss2sh", T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
}
|
||||
|
||||
void putShift()
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
int idx;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 },
|
||||
{ "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
|
||||
{ "vprolq", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 },
|
||||
{ "vprord", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
|
||||
{ "vprorq", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
|
||||
void putExtractInsert()
|
||||
{
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
int type;
|
||||
bool isZMM;
|
||||
} tbl[] = {
|
||||
{ "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
{ "vextractf64x2", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
|
||||
{ "vextractf32x8", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
|
||||
{ "vextractf64x4", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
|
||||
|
||||
{ "vextracti32x4", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
{ "vextracti64x2", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
|
||||
{ "vextracti32x8", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
|
||||
{ "vextracti64x4", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM";
|
||||
printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
int type;
|
||||
bool isZMM;
|
||||
} tbl[] = {
|
||||
{ "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
{ "vinsertf64x2", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
|
||||
{ "vinsertf32x8", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
|
||||
{ "vinsertf64x4", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
|
||||
|
||||
{ "vinserti32x4", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
{ "vinserti64x2", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
|
||||
{ "vinserti32x8", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
|
||||
{ "vinserti64x4", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
const char *x = p.isZMM ? "Zmm" : "Ymm";
|
||||
const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))";
|
||||
printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {"
|
||||
"if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) "
|
||||
"opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void putBroadcast(bool only64bit)
|
||||
{
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
int reg;
|
||||
} tbl[] = {
|
||||
{ 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 },
|
||||
{ 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 },
|
||||
{ 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 },
|
||||
{ 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64},
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) {
|
||||
printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (only64bit) return;
|
||||
puts("void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }");
|
||||
puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }");
|
||||
puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); }");
|
||||
puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); }");
|
||||
puts("void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }");
|
||||
|
||||
puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x59); }");
|
||||
puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x5A); }");
|
||||
puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }");
|
||||
puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }");
|
||||
puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }");
|
||||
}
|
||||
|
||||
void putCvt()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
int ptn;
|
||||
} tbl[] = {
|
||||
{ 0x79, "vcvtsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X, 0 },
|
||||
{ 0x79, "vcvtss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X, 0 },
|
||||
{ 0x78, "vcvttsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X, 0 },
|
||||
{ 0x78, "vcvttss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X, 0 },
|
||||
{ 0x2D, "vcvtsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 },
|
||||
{ 0x79, "vcvtsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 },
|
||||
{ 0x2C, "vcvttsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 },
|
||||
{ 0x78, "vcvttsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 },
|
||||
|
||||
{ 0x7B, "vcvtps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 },
|
||||
{ 0x79, "vcvtps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 },
|
||||
{ 0x7A, "vcvttps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 },
|
||||
{ 0x78, "vcvttps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 },
|
||||
{ 0x7A, "vcvtudq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 1 },
|
||||
{ 0x5B, "vcvtph2dq", T_66 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 },
|
||||
{ 0x13, "vcvtph2psx", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 },
|
||||
{ 0x79, "vcvtph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 },
|
||||
{ 0x5B, "vcvttph2dq", T_F3 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 },
|
||||
{ 0x78, "vcvttph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 },
|
||||
|
||||
{ 0x79, "vcvtpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 },
|
||||
{ 0x5B, "vcvtqq2ps", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 },
|
||||
{ 0x78, "vcvttpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 2 },
|
||||
{ 0x7A, "vcvtuqq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 },
|
||||
|
||||
{ 0x5A, "vcvtph2pd", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
|
||||
{ 0x7B, "vcvtph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 },
|
||||
{ 0x79, "vcvtph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 },
|
||||
{ 0x78, "vcvttph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
|
||||
{ 0x7A, "vcvttph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
|
||||
|
||||
{ 0x5B, "vcvtdq2ph", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
|
||||
{ 0x1D, "vcvtps2phx", T_66 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
|
||||
{ 0x7A, "vcvtudq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
|
||||
|
||||
{ 0x5A, "vcvtpd2ph", T_66 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 },
|
||||
{ 0x5B, "vcvtqq2ph", T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 },
|
||||
{ 0x7A, "vcvtuqq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 },
|
||||
|
||||
{ 0x2A, "vcvtsi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 },
|
||||
{ 0x7B, "vcvtusi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
switch (p.ptn) {
|
||||
case 0:
|
||||
printf("void %s(const Reg32e& r, const Operand& op) { int type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 1:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 2:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 3:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 4:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 5:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 6:
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
}
|
||||
}
|
||||
puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
||||
puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
||||
}
|
||||
|
||||
enum { // same as xbyak.h
|
||||
xx_yy_zz = 0,
|
||||
xx_yx_zy = 1,
|
||||
xx_xy_yz = 2,
|
||||
};
|
||||
void putGather()
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
uint8_t code;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz },
|
||||
{ "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy },
|
||||
{ "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz },
|
||||
{ "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz },
|
||||
{ "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz },
|
||||
{ "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy },
|
||||
{ "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz },
|
||||
{ "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type | T_VSIB);
|
||||
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||
}
|
||||
}
|
||||
void putScatter()
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
uint8_t code;
|
||||
int mode; // reverse of gather
|
||||
} tbl[] = {
|
||||
{ "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz },
|
||||
{ "vpscatterdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA0, xx_yx_zy },
|
||||
{ "vpscatterqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA1, xx_xy_yz },
|
||||
{ "vpscatterqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA1, xx_yy_zz },
|
||||
|
||||
{ "vscatterdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA2, xx_yy_zz },
|
||||
{ "vscatterdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA2, xx_yx_zy },
|
||||
{ "vscatterqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA3, xx_xy_yz },
|
||||
{ "vscatterqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA3, xx_yy_zz },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type | T_VSIB);
|
||||
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||
}
|
||||
}
|
||||
|
||||
void putShuff()
|
||||
{
|
||||
puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }");
|
||||
puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }");
|
||||
puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }");
|
||||
puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }");
|
||||
}
|
||||
|
||||
void putMov()
|
||||
{
|
||||
puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }");
|
||||
puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }");
|
||||
puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }");
|
||||
puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }");
|
||||
|
||||
puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }");
|
||||
puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }");
|
||||
puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }");
|
||||
puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }");
|
||||
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||
|
||||
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
|
||||
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
|
||||
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
|
||||
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
|
||||
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void putX_XM_IMM()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x26, "vgetmantph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true },
|
||||
{ 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
|
||||
{ 0x4C, "vrcpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, false },
|
||||
|
||||
{ 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
|
||||
{ 0x4E, "vrsqrtph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16, false },
|
||||
{ 0x51, "vsqrtph", T_MAP5| T_YMM | T_MUST_EVEX | T_EW0 | T_ER_Z | T_B16, false },
|
||||
|
||||
{ 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x08, "vrndscaleph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true },
|
||||
|
||||
{ 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x44, "vplzcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x44, "vplzcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x56, "vreduceph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true },
|
||||
|
||||
{ 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false },
|
||||
{ 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
{ 0x55, "vpopcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x55, "vpopcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
|
||||
|
||||
{ 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false },
|
||||
{ 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
}
|
||||
|
||||
void putMisc()
|
||||
{
|
||||
puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }");
|
||||
puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }");
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int zm;
|
||||
int type;
|
||||
uint8_t code;
|
||||
bool isZmm;
|
||||
} tbl[] = {
|
||||
{ "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true },
|
||||
{ "vgatherpf0qps", 1, T_EW0 | T_N4, 0xC7, true },
|
||||
{ "vgatherpf0dpd", 1, T_EW1 | T_N8, 0xC6, false },
|
||||
{ "vgatherpf0qpd", 1, T_EW1 | T_N8, 0xC7, true },
|
||||
|
||||
{ "vgatherpf1dps", 2, T_EW0 | T_N4, 0xC6, true },
|
||||
{ "vgatherpf1qps", 2, T_EW0 | T_N4, 0xC7, true },
|
||||
{ "vgatherpf1dpd", 2, T_EW1 | T_N8, 0xC6, false },
|
||||
{ "vgatherpf1qpd", 2, T_EW1 | T_N8, 0xC7, true },
|
||||
|
||||
{ "vscatterpf0dps", 5, T_EW0 | T_N4, 0xC6, true },
|
||||
{ "vscatterpf0qps", 5, T_EW0 | T_N4, 0xC7, true },
|
||||
{ "vscatterpf0dpd", 5, T_EW1 | T_N8, 0xC6, false },
|
||||
{ "vscatterpf0qpd", 5, T_EW1 | T_N8, 0xC7, true },
|
||||
|
||||
{ "vscatterpf1dps", 6, T_EW0 | T_N4, 0xC6, true },
|
||||
{ "vscatterpf1qps", 6, T_EW0 | T_N4, 0xC7, true },
|
||||
{ "vscatterpf1dpd", 6, T_EW1 | T_N8, 0xC6, false },
|
||||
{ "vscatterpf1qpd", 6, T_EW1 | T_N8, 0xC7, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
|
||||
printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n"
|
||||
, p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
|
||||
}
|
||||
}
|
||||
|
||||
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
|
||||
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
|
||||
puts("void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }");
|
||||
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
|
||||
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
|
||||
puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }");
|
||||
|
||||
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
||||
|
||||
puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }");
|
||||
puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }");
|
||||
}
|
||||
|
||||
void putV4FMA()
|
||||
{
|
||||
puts("void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); }");
|
||||
puts("void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }");
|
||||
puts("void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }");
|
||||
puts("void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }");
|
||||
puts("void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }");
|
||||
puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }");
|
||||
}
|
||||
|
||||
void putFP16_1()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
} tbl[] = {
|
||||
{ 0x58, "add" },
|
||||
{ 0x5C, "sub" },
|
||||
{ 0x59, "mul" },
|
||||
{ 0x5E, "div" },
|
||||
{ 0x5F, "max" },
|
||||
{ 0x5D, "min" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
printf("void v%sph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x%02X); }\n", p->name, p->code);
|
||||
printf("void v%ssh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x%02X); }\n", p->name, p->code);
|
||||
}
|
||||
}
|
||||
|
||||
void putFP16_FMA()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
bool isPH;
|
||||
} tbl[] = {
|
||||
{ 0x06, "vfmaddsub", true },
|
||||
{ 0x07, "vfmsubadd", true },
|
||||
{ 0x08, "vfmadd", true },
|
||||
{ 0x0C, "vfnmadd", true },
|
||||
{ 0x0A, "vfmsub", true },
|
||||
{ 0x0E, "vfnmsub", true },
|
||||
{ 0x09, "vfmadd", false },
|
||||
{ 0x0D, "vfnmadd", false },
|
||||
{ 0x0B, "vfmsub", false },
|
||||
{ 0x0F, "vfnmsub", false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
for (int k = 0; k < 3; k++) {
|
||||
const struct Ord {
|
||||
const char *str;
|
||||
uint8_t code;
|
||||
} ord[] = {
|
||||
{ "132", 0x90 },
|
||||
{ "213", 0xA0 },
|
||||
{ "231", 0xB0 },
|
||||
};
|
||||
int t = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||
const char *suf = 0;
|
||||
if (tbl[i].isPH) {
|
||||
t |= T_ER_Z | T_YMM | T_B16;
|
||||
suf = "ph";
|
||||
} else {
|
||||
t |= T_ER_X | T_N2;
|
||||
suf = "sh";
|
||||
}
|
||||
std::string type = type2String(t);
|
||||
printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
|
||||
, tbl[i].name, ord[k].str, suf, type.c_str(), tbl[i].code | ord[k].code);
|
||||
}
|
||||
}
|
||||
}
|
||||
void putFP16_FMA2()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
bool isPH;
|
||||
} tbl[] = {
|
||||
{ 0x56, "maddc", true },
|
||||
{ 0xD6, "mulc", true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
for (int j = 0; j < 2; j++) {
|
||||
int t = T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||
if (j == 0) {
|
||||
t |= T_F2;
|
||||
} else {
|
||||
t |= T_F3;
|
||||
}
|
||||
const char *suf = 0;
|
||||
if (tbl[i].isPH) {
|
||||
t |= T_ER_Z | T_YMM | T_B32;
|
||||
suf = "ph";
|
||||
} else {
|
||||
t |= T_ER_X | T_N2;
|
||||
suf = "sh";
|
||||
}
|
||||
std::string type = type2String(t);
|
||||
printf("void vf%s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
|
||||
, j == 0 ? "c" : "", tbl[i].name, suf, type.c_str(), tbl[i].code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void putFP16_2()
|
||||
{
|
||||
{
|
||||
int t = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2;
|
||||
std::string type = type2String(t);
|
||||
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", type.c_str());
|
||||
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", type.c_str());
|
||||
}
|
||||
{
|
||||
int t = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
|
||||
std::string type = type2String(t);
|
||||
printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", type.c_str());
|
||||
printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", type.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void putFP16()
|
||||
{
|
||||
putFP16_1();
|
||||
putFP16_FMA();
|
||||
putFP16_FMA2();
|
||||
putFP16_2();
|
||||
}
|
||||
|
||||
int main(int argc, char *[])
|
||||
{
|
||||
bool only64bit = argc == 2;
|
||||
putOpmask(only64bit);
|
||||
putBroadcast(only64bit);
|
||||
if (only64bit) {
|
||||
return 0;
|
||||
}
|
||||
putVcmp();
|
||||
putVcmpAlias();
|
||||
putX_XM();
|
||||
putM_X();
|
||||
putXM_X();
|
||||
putX_X_XM_IMM();
|
||||
putShift();
|
||||
putExtractInsert();
|
||||
putCvt();
|
||||
putGather();
|
||||
putShuff();
|
||||
putMov();
|
||||
putX_XM_IMM();
|
||||
putMisc();
|
||||
putScatter();
|
||||
putV4FMA();
|
||||
putFP16();
|
||||
}
|
||||
2030
externals/xbyak/gen/gen_code.cpp
vendored
Normal file
2030
externals/xbyak/gen/gen_code.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
23
externals/xbyak/gen/sortline.cpp
vendored
Normal file
23
externals/xbyak/gen/sortline.cpp
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <set>
|
||||
|
||||
typedef std::set<std::string> StrSet;
|
||||
|
||||
int main()
|
||||
{
|
||||
StrSet ss;
|
||||
std::string line;
|
||||
while (std::getline(std::cin, line)) {
|
||||
if (!line.empty() && line[line.size() - 1] == '\n') {
|
||||
line.resize(line.size() - 1);
|
||||
}
|
||||
if (!line.empty()) {
|
||||
ss.insert(line);
|
||||
}
|
||||
}
|
||||
for (StrSet::const_iterator i = ss.begin(), ie = ss.end(); i != ie; ++i) {
|
||||
std::cout << *i << std::endl;
|
||||
}
|
||||
}
|
||||
17
externals/xbyak/gen/update.bat
vendored
Normal file
17
externals/xbyak/gen/update.bat
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
@echo off
|
||||
set OPT=/EHsc -I../ /W4 -D_CRT_SECURE_NO_WARNINGS
|
||||
set TARGET=..\\xbyak\\xbyak_mnemonic.h
|
||||
set SORT=sortline
|
||||
cl gen_code.cpp %OPT%
|
||||
gen_code | %SORT% > %TARGET%
|
||||
echo #ifdef XBYAK_ENABLE_OMITTED_OPERAND>> %TARGET%
|
||||
gen_code omit | %SORT% >> %TARGET%
|
||||
echo #endif>>%TARGET%
|
||||
gen_code fixed >> %TARGET%
|
||||
cl gen_avx512.cpp %OPT%
|
||||
echo #ifndef XBYAK_DISABLE_AVX512>> %TARGET%
|
||||
gen_avx512 | %SORT% >> %TARGET%
|
||||
echo #ifdef XBYAK64>> %TARGET%
|
||||
gen_avx512 64 | %SORT% >> %TARGET%
|
||||
echo #endif>> %TARGET%
|
||||
echo #endif>> %TARGET%
|
||||
45
externals/xbyak/meson.build
vendored
Normal file
45
externals/xbyak/meson.build
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
# SPDX-FileCopyrightText: 2021 Andrea Pappacoda
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
project(
|
||||
'xbyak',
|
||||
'cpp',
|
||||
version: '6.68',
|
||||
license: 'BSD-3-Clause',
|
||||
default_options: 'b_ndebug=if-release'
|
||||
)
|
||||
|
||||
install_subdir('xbyak', install_dir: get_option('includedir'))
|
||||
|
||||
xbyak_dep = declare_dependency(include_directories: include_directories('.'))
|
||||
|
||||
if meson.version().version_compare('>=0.54.0')
|
||||
meson.override_dependency(meson.project_name(), xbyak_dep)
|
||||
endif
|
||||
|
||||
import('pkgconfig').generate(
|
||||
name: meson.project_name(),
|
||||
description: 'JIT assembler for x86(IA32), x64(AMD64, x86-64)',
|
||||
version: meson.project_version(),
|
||||
url: 'https://github.com/herumi/xbyak'
|
||||
)
|
||||
|
||||
if meson.version().version_compare('>=0.50.0')
|
||||
cmake = import('cmake')
|
||||
|
||||
cmake.write_basic_package_version_file(
|
||||
name: meson.project_name(),
|
||||
version: meson.project_version()
|
||||
)
|
||||
|
||||
cmake_conf = configuration_data()
|
||||
cmake_conf.set('TARGET_NAME', meson.project_name() + '::' + meson.project_name())
|
||||
cmake_conf.set('ABSOLUTE_INCLUDE_DIR', get_option('prefix')/get_option('includedir'))
|
||||
|
||||
cmake.configure_package_config_file(
|
||||
name: meson.project_name(),
|
||||
input: 'cmake'/'meson-config.cmake.in',
|
||||
configuration: cmake_conf
|
||||
)
|
||||
endif
|
||||
76
externals/xbyak/readme.md
vendored
Normal file
76
externals/xbyak/readme.md
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
|
||||
# Xbyak 6.68 [![Badge Build]][Build Status]
|
||||
|
||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||
|
||||
## Menu
|
||||
|
||||
- [Install]
|
||||
- [Usage]
|
||||
- [Changelog]
|
||||
|
||||
## Abstract
|
||||
|
||||
Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
|
||||
|
||||
The pronunciation of Xbyak is `kəi-bja-k`.
|
||||
It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world.
|
||||
|
||||
## Feature
|
||||
|
||||
- header file only
|
||||
- Intel/MASM like syntax
|
||||
- fully support AVX-512
|
||||
|
||||
**Note**:
|
||||
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
|
||||
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||
|
||||
### News
|
||||
|
||||
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
|
||||
- add movdiri, movdir64b, clwb, cldemote
|
||||
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
||||
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
||||
- strictly check address offset disp32 in a signed 32-bit integer. e.g., `ptr[(void*)0xffffffff]` causes an error.
|
||||
- define `XBYAK_OLD_DISP_CHECK` if you need an old check, but the option will be remoevd.
|
||||
- add `jmp(mem, T_FAR)`, `call(mem, T_FAR)` `retf()` for far absolute indirect jump.
|
||||
- vnni instructions such as vpdpbusd supports vex encoding.
|
||||
- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit.
|
||||
- (Windows) `#include <winsock2.h>` has been removed from xbyak.h, so add it explicitly if you need it.
|
||||
- support exception-less mode see. [Exception-less mode](#exception-less-mode)
|
||||
- `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined.
|
||||
|
||||
### Supported OS
|
||||
|
||||
- Windows (Xp, Vista, 7, 10, 11) (32 / 64 bit)
|
||||
- Linux (32 / 64 bit)
|
||||
- macOS (Intel CPU)
|
||||
|
||||
### Supported Compilers
|
||||
|
||||
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
|
||||
|
||||
## License
|
||||
|
||||
[BSD-3-Clause License](http://opensource.org/licenses/BSD-3-Clause)
|
||||
|
||||
## Author
|
||||
|
||||
#### 光成滋生 Mitsunari Shigeo
|
||||
[GitHub](https://github.com/herumi) | [Website (Japanese)](http://herumi.in.coocan.jp/) | [herumi@nifty.com](mailto:herumi@nifty.com)
|
||||
|
||||
## Sponsors welcome
|
||||
[GitHub Sponsor](https://github.com/sponsors/herumi)
|
||||
|
||||
<!----------------------------------------------------------------------------->
|
||||
|
||||
[Badge Build]: https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg
|
||||
[Build Status]: https://github.com/herumi/xbyak/actions/workflows/main.yml
|
||||
|
||||
[License]: COPYRIGHT
|
||||
|
||||
[Changelog]: doc/changelog.md
|
||||
[Install]: doc/install.md
|
||||
[Usage]: doc/usage.md
|
||||
|
||||
607
externals/xbyak/readme.txt
vendored
Normal file
607
externals/xbyak/readme.txt
vendored
Normal file
@ -0,0 +1,607 @@
|
||||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.68
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
||||
これはx86, x64(AMD64, x86-64)のマシン語命令を生成するC++のクラスライブラリです。
|
||||
プログラム実行時に動的にアセンブルすることが可能です。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎特徴
|
||||
|
||||
・ヘッダファイルオンリー
|
||||
xbyak.hをインクルードするだけですぐ利用することができます。
|
||||
C++の枠組み内で閉じているため、外部アセンブラは不要です。
|
||||
32bit/64bit両対応です。
|
||||
対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/VEX-encoded GPR
|
||||
|
||||
・Windows Xp(32bit, 64bit), Windows 7/Linux(32bit, 64bit)/Intel Mac対応
|
||||
Windows Xp, Windows 7上ではVC2008, VC2010, VC2012
|
||||
Linux (kernel 3.8)上ではgcc 4.7.3, clang 3.3
|
||||
Intel Mac
|
||||
などで動作確認をしています。
|
||||
|
||||
※ and, orなどの代わりにand_, or_を使用してください。
|
||||
and, orなどを使いたい場合は-fno-operator-namesをgcc/clangに指定してください。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎準備
|
||||
xbyak.h
|
||||
xbyak_bin2hex.h
|
||||
これらを同一のパスに入れてインクルードパスに追加してください。
|
||||
|
||||
Linuxではmake installで/usr/local/include/xbyakにコピーされます。
|
||||
-----------------------------------------------------------------------------
|
||||
◎下位互換性の破れ
|
||||
* push byte, immまたはpush word, immが下位8bit, 16bitにキャストした値を使うように変更。
|
||||
* (Windows) `<winsock2.h>`をincludeしなくなったので必要なら明示的にincludeしてください。
|
||||
* XBYAK_USE_MMAP_ALLOCATORがデフォルトで有効になりました。従来の方式にする場合はXBYAK_DONT_USE_MMAP_ALLOCATORを定義してください。
|
||||
* Xbyak::Errorの型をenumからclassに変更
|
||||
** 従来のenumの値をとるにはintにキャストしてください。
|
||||
* (古い)Reg32eクラスを(新しい)Reg32eとRegExpに分ける。
|
||||
** (新しい)Reg32eはReg32かReg64
|
||||
** (新しい)RegExpは'Reg32e + (Reg32e|Xmm|Ymm) * scale + disp'の型
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎新機能
|
||||
|
||||
例外なしモード追加
|
||||
XBYAK_NO_EXCEPTIONを定義してコンパイルするとgcc/clangで-fno-exceptionsオプションでコンパイルできます。
|
||||
エラーは例外の代わりに`Xbyak::GetError()`で通達されます。
|
||||
この値が0でなければ何か問題が発生しています。
|
||||
この値は自動的に変更されないので`Xbyak::ClearError()`でリセットしてください。
|
||||
`CodeGenerator::reset()`は`ClearError()`を呼びます。
|
||||
|
||||
MmapAllocator追加
|
||||
これはUnix系OSでのみの仕様です。XBYAK_USE_MMAP_ALLOCATORを使うと利用できます。
|
||||
デフォルトのAllocatorはメモリ確保時にposix_memalignを使います。
|
||||
この領域に対するmprotectはmap countを減らします。
|
||||
map countの最大値は/proc/sys/vm/max_map_countに書かれています。
|
||||
デフォルトでは3万個ほどのXbyak::CodeGeneratorインスタンスを生成するとエラーになります。
|
||||
test/mprotect_test.cppで確認できます。
|
||||
これを避けるためにはmmapを使うMmapAllocatorを使ってください。
|
||||
|
||||
|
||||
AutoGrowモード追加
|
||||
これはメモリ伸長を動的に行うモードです。
|
||||
今まではXbyak::CodeGenerator()に渡したメモリサイズを超えると例外が発生して
|
||||
いましたが、このモードでは内部でメモリを再確保して伸長します。
|
||||
ただし、getCode()を呼び出す前にジャンプ命令のアドレス解決をするためにready()
|
||||
関数を呼ぶ必要があります。
|
||||
|
||||
次のように使います。
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
||||
{
|
||||
...
|
||||
}
|
||||
};
|
||||
Code c;
|
||||
c.ready(); // この呼び出しを忘れてはいけない
|
||||
|
||||
注意1. ready()を呼んで確定するまではgetCurr()で得たポインタは無効化されている
|
||||
可能性があります。getSize()でoffsetを保持しておきready()のあとにgetCode()を
|
||||
呼び出してからgetCode() + offsetで新しいポインタを取得してください。
|
||||
|
||||
注意2. AutoGrowモードでは64bitモードの相対アドレッシング[rip]は非サポートです。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎文法
|
||||
|
||||
Xbyak::CodeGeneratorクラスを継承し、そのクラスメソッド内でx86, x64アセンブラを
|
||||
記述します。そのメソッドを呼び出した後、getCode()メソッドを呼び出し、その戻
|
||||
り値を自分が使いたい関数ポインタに変換して利用します。アセンブルエラーは例外
|
||||
により通知されます(cf. main.cpp)。
|
||||
|
||||
・基本的にnasmの命令で括弧をつければよいです。
|
||||
|
||||
mov eax, ebx --> mov(eax, ebx);
|
||||
inc ecx inc(ecx);
|
||||
ret --> ret();
|
||||
|
||||
・アドレッシング
|
||||
|
||||
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||
[rip + 32bit disp] ; x64 only
|
||||
という形で指定します。サイズを指定する必要がない限りptrを使えばよいです。
|
||||
|
||||
セレクター(セグメントレジスタ)をサポートしました。
|
||||
(注意)セグメントレジスタはOperandを継承していません。
|
||||
|
||||
mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]);
|
||||
mov ax, cs --> mov(ax, cs);
|
||||
|
||||
mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]);
|
||||
test byte [esp], 4 --> test (byte [esp], 4);
|
||||
|
||||
(注意) dword, word, byteはメンバ変数です。従ってたとえばunsigned intの
|
||||
つもりでdwordをtypedefしないでください。
|
||||
|
||||
・AVX
|
||||
|
||||
FMAについては簡略表記を導入するか検討中です(アイデア募集中)。
|
||||
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3, ptr [rax]); // メモリアクセスはptrで
|
||||
|
||||
vfmadd231pd(xmm1, xmm2, xmm3); // xmm1 <- (xmm2 * xmm3) + xmm1
|
||||
|
||||
*注意*
|
||||
デスティネーションの省略形はサポートされなくなりました。
|
||||
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
|
||||
XBYAK_ENABLE_OMITTED_OPERANDを定義すると使えますが、将来はそれも非サポートになるでしょう。
|
||||
|
||||
・AVX-512
|
||||
|
||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
|
||||
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
|
||||
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
|
||||
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
|
||||
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
|
||||
|
||||
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
|
||||
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
|
||||
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
|
||||
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
|
||||
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
|
||||
|
||||
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
|
||||
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
|
||||
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
|
||||
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
|
||||
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
|
||||
|
||||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
|
||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||
setDefaultEncoding(VexEncoding); // default encoding is VEX
|
||||
vpdpbusd(xm0, xm1, xm2); // VEX encoding
|
||||
注意
|
||||
* k1, ..., k7 は新しいopmaskレジスタです。
|
||||
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
|
||||
* `k4 | k3`と`k3 | k4`は意味が異なります。
|
||||
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
|
||||
* 一部の命令はメモリサイズを指定するためにxword/yword/zword(_b)を使ってください。
|
||||
* setDefaultEncoding()でencoding省略時のEVEX/VEXを設定できます。
|
||||
|
||||
・ラベル
|
||||
|
||||
L(文字列);
|
||||
で定義します。ジャンプするときはその文字列を指定します。後方参照も可能ですが、
|
||||
相対アドレスが8ビットに収まらない場合はT_NEARをつけないと実行時に例外が発生
|
||||
します。
|
||||
mov(eax, "L2");の様にラベルが表すアドレスをmovの即値として使えます。
|
||||
|
||||
・hasUndefinedLabel()を呼び出して真ならジャンプ先が存在しないことを示します。
|
||||
コードを見直してください。
|
||||
|
||||
L("L1");
|
||||
jmp ("L1");
|
||||
|
||||
jmp ("L2");
|
||||
...
|
||||
少しの命令の場合。
|
||||
...
|
||||
L("L2");
|
||||
|
||||
jmp ("L3", T_NEAR);
|
||||
...
|
||||
沢山の命令がある場合
|
||||
...
|
||||
L("L3");
|
||||
|
||||
<応用編>
|
||||
|
||||
1. MASMライクな@@, @f, @bをサポート
|
||||
|
||||
L("@@"); // <A>
|
||||
jmp("@b"); // jmp to <A>
|
||||
jmp("@f"); // jmp to <B>
|
||||
L("@@"); // <B>
|
||||
jmp("@b"); // jmp to <B>
|
||||
mov(eax, "@b");
|
||||
jmp(eax); // jmp to <B>
|
||||
|
||||
2. ラベルの局所化
|
||||
|
||||
ピリオドで始まるラベルをinLocalLabel(), outLocalLabel()で挟むことで局所化できます。
|
||||
inLocalLabel(), outLocalLabel()は入れ子にすることができます。
|
||||
|
||||
void func1()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <A> ; ローカルラベル
|
||||
...
|
||||
jmp(".lp"); // jmpt to <A>
|
||||
L("aaa"); // グローバルラベル
|
||||
outLocalLabel();
|
||||
}
|
||||
|
||||
void func2()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <B> ; ローカルラベル
|
||||
func1();
|
||||
jmp(".lp"); // jmp to <B>
|
||||
outLocalLabel();
|
||||
}
|
||||
|
||||
上記サンプルではinLocalLabel(), outLocalLabel()が無いと、
|
||||
".lp"ラベルの二重定義エラーになります。
|
||||
|
||||
3. 新しいLabelクラスによるジャンプ命令
|
||||
|
||||
ジャンプ先を文字列による指定だけでなくラベルクラスを使えるようになりました。
|
||||
|
||||
Label label1, label2;
|
||||
L(label1);
|
||||
...
|
||||
jmp(label1);
|
||||
...
|
||||
jmp(label2);
|
||||
...
|
||||
L(label2);
|
||||
|
||||
更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。
|
||||
|
||||
Label label2;
|
||||
Label label1 = L(); // Label label1; L(label1);と同じ意味
|
||||
...
|
||||
jmp(label2);
|
||||
...
|
||||
assignL(label2, label1);
|
||||
|
||||
上記jmp命令はlabel1にジャンプします。
|
||||
|
||||
制限
|
||||
* srcLabelはL()により飛び先が確定していないといけません。
|
||||
* dstLabelはL()により飛び先が確定していてはいけません。
|
||||
|
||||
ラベルは`getAddress()`によりそのアドレスを取得できます。
|
||||
未定義のときは0が返ります。
|
||||
```
|
||||
// not AutoGrow mode
|
||||
Label label;
|
||||
assert(label.getAddress(), 0);
|
||||
L(label);
|
||||
assert(label.getAddress(), getCurr());
|
||||
```
|
||||
|
||||
4. farジャンプ
|
||||
|
||||
`jmp(mem, T_FAR)`, `call(mem, T_FAR)`, `retf()`をサポートします。
|
||||
サイズを明示するために`ptr`の代わりに`word|dword|qword`を利用してください。
|
||||
|
||||
32bit
|
||||
```
|
||||
jmp(word[eax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
|
||||
```
|
||||
|
||||
64bit
|
||||
```
|
||||
jmp(word[rax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
|
||||
jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
|
||||
```
|
||||
|
||||
・Xbyak::CodeGenerator()コンストラクタインタフェース
|
||||
|
||||
@param maxSize [in] コード生成最大サイズ(デフォルト4096byte)
|
||||
@param userPtr [in] ユーザ指定メモリ
|
||||
|
||||
CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0);
|
||||
|
||||
デフォルトコードサイズは4096(=DEFAULT_MAX_CODE_SIZE)バイトです。
|
||||
それより大きなコードを生成する場合はCodeGenerator()のコンストラクタに指定してください。
|
||||
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Quantize()
|
||||
: CodeGenerator(8192)
|
||||
{
|
||||
}
|
||||
...
|
||||
};
|
||||
|
||||
またユーザ指定メモリをコード生成最大サイズと共に指定すると、CodeGeneratorは
|
||||
指定されたメモリ上にバイト列を生成します。
|
||||
|
||||
補助関数として指定されたアドレスの実行属性を変更するCodeArray::protect()と
|
||||
与えられたポインタからアライメントされたポインタを取得するCodeArray::getAlignedAddress()
|
||||
も用意しました。詳細はsample/test0.cppのuse memory allocated by userを参考に
|
||||
してください。
|
||||
|
||||
/**
|
||||
change exec permission of memory
|
||||
@param addr [in] buffer address
|
||||
@param size [in] buffer size
|
||||
@param canExec [in] true(enable to exec), false(disable to exec)
|
||||
@return true(success), false(failure)
|
||||
*/
|
||||
bool CodeArray::protect(const void *addr, size_t size, bool canExec);
|
||||
|
||||
/**
|
||||
get aligned memory pointer
|
||||
*/
|
||||
uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE);
|
||||
|
||||
・read/execモード
|
||||
デフォルトのCodeGeneratorはコンストラクト時にJIT用の領域をread/write/execモードに設定して利用します。
|
||||
コード生成時はread/writeでコード実行時にはread/execにしたい場合、次のようにしてください。
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontUseProtect) // JIT領域をread/writeのままコード生成
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.setProtectModeRE(); // read/execモードに変更
|
||||
// JIT領域を実行
|
||||
|
||||
AutoGrowの場合はreadyの代わりにreadyRE()を読んでください。
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow) // JIT領域をread/writeのままコード生成
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.readyRE(); // read/exeモードに変更
|
||||
// JIT領域を実行
|
||||
|
||||
setProtectModeRW()を呼ぶと領域が元のread/execモードに戻ります。
|
||||
|
||||
|
||||
その他詳細は各種サンプルを参照してください。
|
||||
-----------------------------------------------------------------------------
|
||||
◎マクロ
|
||||
|
||||
32bit環境上でコンパイルするとXBYAK32が、64bit環境上でコンパイルするとXBYAK64が
|
||||
定義されます。さらに64bit環境上ではWindows(VC)ならXBYAK64_WIN、cygwin, gcc上では
|
||||
XBYAK64_GCCが定義されます。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎使用例
|
||||
|
||||
test0.cpp ; 簡単な例(x86, x64)
|
||||
quantize.cpp ; 割り算のJITアセンブルによる量子化の高速化(x86)
|
||||
calc.cpp ; 与えられた多項式をアセンブルして実行(x86, x64)
|
||||
boost(http://www.boost.org/)が必要
|
||||
bf.cpp ; JIT Brainfuck(x86, x64)
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎ライセンス
|
||||
|
||||
修正された新しいBSDライセンスに従います。
|
||||
http://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||
いただきました。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2022/12/07 ver 6.68 prefetchit{0,1}サポート
|
||||
2022/11/30 ver 6.67 CMPccXADDサポート
|
||||
2022/11/25 ver 6.66 RAO-INTサポート
|
||||
2022/11/22 ver 6.65 x32動作確認
|
||||
2022/11/04 ver 6.64 vmov*命令をmaskつきアドレッシング対応修正
|
||||
2022/10/06 ver 6.63 AVX-IFMA用のvpmadd52{h,l}uq対応
|
||||
2022/10/05 amx_fp16/avx_vnni_int8/avx_ne_convertt対応とsetDefaultEncoding()追加
|
||||
2022/09/15 ver 6.62 serialize追加
|
||||
2022/08/02 ver 6.61.1 noexceptはVisual Studio 2015以降対応
|
||||
2022/07/29 ver 6.61 movzx eax, ahがエラーになるのを修正
|
||||
2022/06/16 ver 6.60.2 GFNI, VAES, VPCLMULQDQの判定修正
|
||||
2022/06/15 ver 6.60.1 Visual Studio /O0でXbyak::util::Cpuがリンクエラーになるのに対応
|
||||
2022/06/06 ver 6.60 バージョンのつけ方を数値が戻らないように変更
|
||||
2022/06/01 ver 6.06 Cpu::TypeクラスのリファクタリングとXBYAK_USE_MEMFDが定義されたときのMmapAllocatorの改善
|
||||
2022/05/20 ver 6.052 Cpu::operator==()を正しく定義
|
||||
2022/05/13 ver 6.051 XYBAK_NO_EXCEPTIONを定義したときのCpuクラスのコンパイルエラー修正
|
||||
2022/05/12 ver 6.05 movdiri, movdir64b, clwb, cldemoteを追加
|
||||
2022/04/05 ver 6.04 tpause, umonitor, umwaitを追加
|
||||
2022/03/08 ver 6.03 MmapAllocatorがmemfd用のユーザ定義文字列をサポート
|
||||
2022/01/28 ver 6.02 dispacementの32bit範囲チェックの厳密化
|
||||
2021/12/14 ver 6.01 T_FAR jump/callとretfをサポート
|
||||
2021/09/14 ver 6.00 AVX512-FP16を完全サポート
|
||||
2021/09/09 ver 5.997 vrndscale*を{sae}をサポートするよう修正
|
||||
2021/09/03 ver 5.996 v{add,sub,mul,div,max,min}{sd,ss}をT_rd_saeなどをサポートするよう修正
|
||||
2021/08/15 ver 5.995 Linux上でXBYAK_USE_MEMFDが定義されたなら/proc/self/mapsにラベル追加
|
||||
2021/06/17 ver 5.994 マスクレジスタ用のvcmpXX{ps,pd,ss,sd}のalias追加
|
||||
2021/06/06 ver 5.993 gather/scatterのレジスタの組み合わせの厳密なチェック
|
||||
2021/05/09 ver 5.992 endbr32とendbr64のサポート
|
||||
2020/11/16 ver 5.991 g++-5のC++14でconstexpr機能の抑制
|
||||
2020/10/19 ver 5.99 VNNI命令サポート(Thanks to akharito)
|
||||
2020/10/17 ver 5.98 [scale * reg]のサポート
|
||||
2020/09/08 ver 5.97 uint32などをuint32_tに置換
|
||||
2020/08/28 ver 5.95 レジスタクラスのコンストラクタがconstexprに対応(C++14以降)
|
||||
2020/08/04 ver 5.941 `CodeGenerator::reset()`が`ClearError()`を呼ぶように変更
|
||||
2020/07/28 ver 5.94 #include <winsock2.h>の削除 (only windows)
|
||||
2020/07/21 ver 5.93 例外なしモード追加
|
||||
2020/06/30 ver 5.92 Intel AMX命令サポート (Thanks to nshustrov)
|
||||
2020/06/19 ver 5.913 32ビット環境でXBYAK64を定義したときのmov(r64, imm64)を修正
|
||||
2020/06/19 ver 5.912 macOSの古いXcodeでもMAP_JITを有効にする(Thanks to rsdubtso)
|
||||
2020/05/10 ver 5.911 Linux/macOSでXBYAK_USE_MMAP_ALLOCATORがデフォルト有効になる
|
||||
2020/04/20 ver 5.91 マスクレジスタk0を受け入れる(マスクをしない)
|
||||
2020/04/09 ver 5.90 kmov{b,w,d,q}がサポートされないレジスタを受けると例外を投げる
|
||||
2020/02/26 ver 5.891 zm0のtype修正
|
||||
2020/01/03 ver 5.89 vfpclasspdの処理エラー修正
|
||||
2019/12/20 ver 5.88 Windowsでのコンパイルエラー修正
|
||||
2019/12/19 ver 5.87 未定義ラベルへのjmp命令のデフォルト挙動をT_NEARにするsetDefaultJmpNEAR()を追加
|
||||
2019/12/13 ver 5.86 [変更] -fno-operator-namesが指定されたときは5.84以前の挙動に戻す
|
||||
2019/12/07 ver 5.85 mmapにMAP_JITフラグを追加(macOS mojave以上)
|
||||
2019/11/29 ver 5.84 [変更] XBYAK_USE_OP_NAMESが定義されていない限りXBYAK_NO_OP_NAMESが定義されるように変更
|
||||
2019/10/12 ver 5.83 exit(1)の除去
|
||||
2019/09/23 ver 5.82 monitorx, mwaitx, clzero対応 (thanks to MagurosanTeam)
|
||||
2019/09/14 ver 5.81 いくつかの一般命令をサポート
|
||||
2019/08/01 ver 5.802 AVX512_BF16判定修正 (thanks to vpirogov)
|
||||
2019/05/27 support vp2intersectd, vp2intersectq (not tested)
|
||||
2019/05/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
|
||||
2019/04/27 ver 5.79 vcmppd/vcmppsのptr_b対応忘れ(thanks to jkopinsky)
|
||||
2019/04/15 ver 5.78 Reg::changeBit()のリファクタリング(thanks to MerryMage)
|
||||
2019/03/06 ver 5.77 LLCキャッシュを共有数CPU数の修整(by densamoilov)
|
||||
2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel)
|
||||
2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元
|
||||
2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す
|
||||
2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除
|
||||
2018/10/15 util::StackFrameでmovの代わりにpush/popを使う
|
||||
2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整
|
||||
2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加
|
||||
2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加
|
||||
2018/08/24 ver 5.68 indexが16以上のVSIBエンコーディングのバグ修正(thanks to petercaday)
|
||||
2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm
|
||||
2018/07/26 ver 5.661 mingw64対応
|
||||
2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加
|
||||
2018/06/26 ver 5.65 fix push(qword [mem])
|
||||
2018/03/07 ver 5.64 Cpu()の中でzero divisionが出ることがあるのを修正
|
||||
2018/02/14 ver 5.63 Cpu::setCacheHierarchy()の修正とclang<3.9のためのEvexModifierZero修正(thanks to mgouicem)
|
||||
2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||
2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)
|
||||
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
|
||||
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
|
||||
2017/08/22 ver 5.53 mpxエンコーディングバグ修正, bnd()プレフィクス追加
|
||||
2017/08/18 ver 5.52 align修正(thanks to MerryMage)
|
||||
2017/08/17 ver 5.51 multi-byte nop追加 align()はそれを使用する(thanks to inolen)
|
||||
2017/08/08 ver 5.50 mpx追加(thanks to magurosan)
|
||||
2017/08/08 ver 5.45 sha追加(thanks to magurosan)
|
||||
2017/08/08 ver 5.44 prefetchw追加(thanks to rsdubtso)
|
||||
2017/07/12 ver 5.432 PVS-studioの警告を減らす
|
||||
2017/07/09 ver 5.431 hasRex()修正 (影響なし) (thanks to drillsar)
|
||||
2017/05/14 ver 5.43 CodeGenerator::resetSize()修正(thanks to gibbed)
|
||||
2017/05/13 ver 5.42 movs{b,w,d,q}追加
|
||||
2017/01/26 ver 5.41 prefetcwt1追加とscale == 0対応(thanks to rsdubtso)
|
||||
2016/12/14 ver 5.40 Labelが示すアドレスを取得するLabel::getAddress()追加
|
||||
2016/12/07 ver 5.34 disp8N時の負のオフセット処理の修正(thanks to rsdubtso)
|
||||
2016/12/06 ver 5.33 disp8N時のvpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w}のバグ修正
|
||||
2016/12/01 ver 5.32 clang for Visual Studioサポートのために__xgetbv()を_xgetbv()に変更(thanks to freiro)
|
||||
2016/11/27 ver 5.31 AVX512_4VNNIをAVX512_4VNNIWに変更
|
||||
2016/11/27 ver 5.30 AVX512_4VNNI, AVX512_4FMAPS命令の追加(thanks to rsdubtso)
|
||||
2016/11/26 ver 5.20 AVX512_4VNNIとAVX512_4FMAPSの判定追加(thanks to rsdubtso)
|
||||
2016/11/20 ver 5.11 何故か消えていたvptest for ymm追加(thanks to gregory38)
|
||||
2016/11/20 ver 5.10 [rip+&var]の形のアドレッシング追加
|
||||
2016/09/29 ver 5.03 ERR_INVALID_OPMASK_WITH_MEMORYの判定ミス修正(thanks to PVS-Studio)
|
||||
2016/08/15 ver 5.02 xbyak_bin2hex.hをincludeしない
|
||||
2016/08/15 ver 5.011 gcc 5.4のバージョン取得ミスの修正
|
||||
2016/08/03 ver 5.01 AVXの省略表記非サポート
|
||||
2016/07/24 ver 5.00 avx-512フルサポート
|
||||
2016/06/13 avx-512 opmask命令サポート
|
||||
2016/05/05 ver 4.91 AVX-512命令の検出サポート
|
||||
2016/03/14 ver 4.901 ready()関数にコメント加筆(thanks to skmp)
|
||||
2016/02/04 ver 4.90 条件分岐命令にjcc(const void *addr);のタイプを追加
|
||||
2016/01/30 ver 4.89 vpblendvbがymmレジスタをサポートしていなかった(thanks to John Funnell)
|
||||
2016/01/24 ver 4.88 lea, cmovの16bitレジスタ対応(thanks to whyisthisfieldhere)
|
||||
2015/08/16 ver 4.87 セグメントセレクタに対応
|
||||
2015/08/16 ver 4.86 [rip + label]アドレッシングで即値を使うと壊れる(thanks to whyisthisfieldhere)
|
||||
2015/08/10 ver 4.85 Address::operator==()が間違っている(thanks to inolen)
|
||||
2015/07/22 ver 4.84 call()がvariadic template対応
|
||||
2015/05/24 ver 4.83 mobveサポート(thanks to benvanik)
|
||||
2015/05/24 ver 4.82 F16Cが使えるかどうかの判定追加
|
||||
2015/04/25 ver 4.81 setSizeが例外を投げる条件を修正(thanks to whyisthisfieldhere)
|
||||
2015/04/22 ver 4.80 rip相対でLabelのサポート(thanks to whyisthisfieldhere)
|
||||
2015/01/28 ver 4.71 adcx, adox, cmpxchg, rdseed, stacのサポート
|
||||
2014/10/14 ver 4.70 MmapAllocatorのサポート
|
||||
2014/06/13 ver 4.62 VC2014で警告抑制
|
||||
2014/05/30 ver 4.61 bt, bts, btr, btcのサポート
|
||||
2014/05/28 ver 4.60 vcvtph2ps, vcvtps2phのサポート
|
||||
2014/04/11 ver 4.52 rdrandの判定追加
|
||||
2014/03/25 ver 4.51 参照されなくなったラベルの状態を削除する
|
||||
2014/03/16 ver 4.50 新しいラベルクラスのサポート
|
||||
2014/03/05 ver 4.40 VirtualBox上でBMI/enhanced repのサポート判定を間違うことがあるのを修正
|
||||
2013/12/03 ver 4.30 Reg::cvt8(), cvt16(), cvt32()のサポート
|
||||
2013/10/16 ver 4.21 ラベルでstd::stringを受け付ける。
|
||||
2013/07/30 ver 4.20 [break backward compatibility] 従来のReg32eクラスをアドレッシング用のRegExpとReg32, Reg64を表すReg32eに分離
|
||||
2013/07/04 ver 4.10 [break backward compatibility] Xbyak::Errorの型をenumからclassに変更
|
||||
2013/06/21 ver 4.02 LABELの指すアドレスを書き込むputL(LABEL)関数の追加。
|
||||
2013/06/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm)
|
||||
support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest)
|
||||
2013/05/30 ver 4.00 AVX2, VEX-encoded GPR-instructionをサポート
|
||||
2013/03/27 ver 3.80 mov(reg, "label");をサポート
|
||||
2013/03/13 ver 3.76 cqo, jcxz, jecxz, jrcxz追加
|
||||
2013/01/15 ver 3.75 生成されたコードを修正するためにsetSize()を追加
|
||||
2013/01/12 ver 3.74 CodeGenerator::reset()とAllocator::useProtect()を追加
|
||||
2013/01/06 ver 3.73 可能ならunordered_mapを使う
|
||||
2012/12/04 ver 3.72 eaxなどをCodeGeneratorのメンバ変数に戻す. Xbyak::util::eaxはstatic const変数
|
||||
2012/11/17 ver 3.71 and_(), or_(), xor_(), not_()をXBYAK_NO_OP_NAMESが定義されていないときでも使えるようにした
|
||||
2012/11/17 CodeGeneratorのeax, ecx, ptrなどのメンバ変数をstaticにし、const参照をXbyak::utilにも定義
|
||||
2012/11/09 ver 3.70 and()をand_()にするためのマクロXBYAK_NO_OP_NAMESを追加(thanks to Mattias)
|
||||
2012/11/01 ver 3.62 add fwait/fnwait/finit/fninit
|
||||
2012/11/01 ver 3.61 add fldcw/fstcw
|
||||
2012/05/03 ver 3.60 Allocatorクラスのインタフェースを変更
|
||||
2012/03/23 ver 3.51 userPtrモードがバグったのを修正
|
||||
2012/03/19 ver 3.50 AutoGrowモードサポート
|
||||
2011/11/09 ver 3.05 rip相対の64bitサイズ以外の扱いのバグ修正 / movsxdサポート
|
||||
2011/08/15 ver 3.04 add(dword [ebp-8], 0xda);などにおけるimm8の扱いのバグ修正(thanks to lolcat)
|
||||
2011/06/16 ver 3.03 Macのgcc上での__GNUC_PREREQがミスってたのを修正(thanks to t_teruya)
|
||||
2011/04/28 ver 3.02 Macのgcc上ではxgetbvをdisable
|
||||
2011/03/24 ver 3.01 fix typo of OSXSAVE
|
||||
2011/03/23 ver 3.00 vcmpeqpsなどを追加
|
||||
2011/02/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
|
||||
2011/02/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
|
||||
2011/02/10 ver 2.992 beta support one argument syntax for fadd like nasm
|
||||
2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
|
||||
2011/02/04 ver 2.99 beta support AVX
|
||||
2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp
|
||||
2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||
2010/07/07 ver 2.29 fix call(<label>)
|
||||
2010/06/17 ver 2.28 move some member functions to public
|
||||
2010/06/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
|
||||
2010/05/24 ver 2.26 fix sub(rsp, 1000)
|
||||
2010/04/26 ver 2.25 add jc/jnc(I forgot to implement them...)
|
||||
2010/04/16 ver 2.24 change the prototype of rewrite() method
|
||||
2010/04/15 ver 2.23 fix align() and xbyak_util.h for Mac
|
||||
2010/02/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
|
||||
2009/12/09 ver 2.21 support cygwin(gcc 4.3.2)
|
||||
2009/11/28 ver 2.20 FPUの一部命令サポート
|
||||
2009/06/25 ver 2.11 64bitモードでの mov(qword[rax], imm); 修正(thanks to Martinさん)
|
||||
2009/03/10 ver 2.10 jmp/call reg64の冗長なREG.W削除
|
||||
2009/02/24 ver 2.09 movq reg64, mmx/xmm; movq mmx/xmm, reg64追加
|
||||
2009/02/13 ver 2.08 movd(xmm7, dword[eax])が0x66を落とすバグ修正(thanks to Gabestさん)
|
||||
2008/12/30 ver 2.07 call()の相対アドレスが8bit以下のときのバグ修正(thanks to katoさん)
|
||||
2008/09/18 ver 2.06 @@, @f, @bとラベルの局所化機能追加(thanks to nobu-qさん)
|
||||
2008/09/18 ver 2.05 ptr [rip + 32bit offset]サポート(thanks to 団子厨(Dango-Chu)さん)
|
||||
2008/06/03 ver 2.04 align()のポカミス修正。mov(ptr[eax],1);などをエラーに
|
||||
2008/06/02 ver 2.03 ユーザ定義メモリインタフェースサポート
|
||||
2008/05/26 ver 2.02 protect()(on Linux)で不正な設定になることがあるのを修正(thanks to sinichiro_hさん)
|
||||
2008/04/30 ver 2.01 cmpxchg16b, cdqe追加
|
||||
2008/04/29 ver 2.00 x86/x64-64版公開
|
||||
2008/04/25 ver 1.90 x64版β公開
|
||||
2008/04/18 ver 1.12 コード整理
|
||||
2008/04/14 ver 1.11 コード整理
|
||||
2008/03/12 ver 1.10 bsf/bsr追加(忘れていた)
|
||||
2008/02/14 ver 1.09 sub eax, 1234が16bitモードで出力されていたのを修正(thanks to Robertさん)
|
||||
2007/11/05 ver 1.08 lock, xadd, xchg追加
|
||||
2007/11/02 ver 1.07 SSSE3/SSE4対応(thanks to 団子厨(Dango-Chu)さん)
|
||||
2007/09/25 ver 1.06 call((int)関数ポインタ); jmp((int)関数ポインタ);のサポート
|
||||
2007/08/04 ver 1.05 細かい修正
|
||||
2007/02/04 後方へのジャンプでT_NEARをつけないときに8bit相対アドレスに入らない
|
||||
場合に例外が発生しないバグの修正
|
||||
2007/01/21 [disp]の形のアドレス生成のバグ修正
|
||||
mov (eax|ax|al, [disp]); mov([disp], eax|ax|al);の短い表現選択
|
||||
2007/01/17 webページ作成
|
||||
2007/01/04 公開開始
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎著作権者
|
||||
|
||||
光成滋生(MITSUNARI Shigeo, herumi@nifty.com)
|
||||
211
externals/xbyak/sample/bf.cpp
vendored
Normal file
211
externals/xbyak/sample/bf.cpp
vendored
Normal file
@ -0,0 +1,211 @@
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stack>
|
||||
#include <fstream>
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4996) // scanf
|
||||
#define snprintf _snprintf_s
|
||||
#endif
|
||||
|
||||
class Brainfuck : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
int getContinuousChar(std::istream& is, char c)
|
||||
{
|
||||
int count = 1;
|
||||
char p;
|
||||
while (is >> p) {
|
||||
if (p != c) break;
|
||||
count++;
|
||||
}
|
||||
is.unget();
|
||||
return count;
|
||||
}
|
||||
Brainfuck(std::istream& is) : CodeGenerator(100000)
|
||||
{
|
||||
// void (*)(void* putchar, void* getchar, int *stack)
|
||||
using namespace Xbyak;
|
||||
#ifdef XBYAK32
|
||||
const Reg32& pPutchar(esi);
|
||||
const Reg32& pGetchar(edi);
|
||||
const Reg32& stack(ebp);
|
||||
const Address cur = dword [stack];
|
||||
push(ebp); // stack
|
||||
push(esi);
|
||||
push(edi);
|
||||
const int P_ = 4 * 3;
|
||||
mov(pPutchar, ptr[esp + P_ + 4]); // putchar
|
||||
mov(pGetchar, ptr[esp + P_ + 8]); // getchar
|
||||
mov(stack, ptr[esp + P_ + 12]); // stack
|
||||
#elif defined(XBYAK64_WIN)
|
||||
const Reg64& pPutchar(rsi);
|
||||
const Reg64& pGetchar(rdi);
|
||||
const Reg64& stack(rbp); // stack
|
||||
const Address cur = dword [stack];
|
||||
push(rsi);
|
||||
push(rdi);
|
||||
push(rbp);
|
||||
mov(pPutchar, rcx); // putchar
|
||||
mov(pGetchar, rdx); // getchar
|
||||
mov(stack, r8); // stack
|
||||
#else
|
||||
const Reg64& pPutchar(rbx);
|
||||
const Reg64& pGetchar(rbp);
|
||||
const Reg64& stack(r12); // stack
|
||||
const Address cur = dword [stack];
|
||||
push(rbx);
|
||||
push(rbp);
|
||||
push(r12);
|
||||
mov(pPutchar, rdi); // putchar
|
||||
mov(pGetchar, rsi); // getchar
|
||||
mov(stack, rdx); // stack
|
||||
#endif
|
||||
std::stack<Label> labelF, labelB;
|
||||
char c;
|
||||
while (is >> c) {
|
||||
switch (c) {
|
||||
case '+':
|
||||
case '-':
|
||||
{
|
||||
int count = getContinuousChar(is, c);
|
||||
if (count == 1) {
|
||||
c == '+' ? inc(cur) : dec(cur);
|
||||
} else {
|
||||
add(cur, (c == '+' ? count : -count));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
case '<':
|
||||
{
|
||||
int count = getContinuousChar(is, c);
|
||||
add(stack, 4 * (c == '>' ? count : -count));
|
||||
}
|
||||
break;
|
||||
case '.':
|
||||
#ifdef XBYAK32
|
||||
push(cur);
|
||||
call(pPutchar);
|
||||
pop(eax);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
mov(ecx, cur);
|
||||
sub(rsp, 32);
|
||||
call(pPutchar);
|
||||
add(rsp, 32);
|
||||
#else
|
||||
mov(edi, cur);
|
||||
call(pPutchar);
|
||||
#endif
|
||||
break;
|
||||
case ',':
|
||||
#if defined(XBYAK32) || defined(XBYAK64_GCC)
|
||||
call(pGetchar);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
sub(rsp, 32);
|
||||
call(pGetchar);
|
||||
add(rsp, 32);
|
||||
#endif
|
||||
mov(cur, eax);
|
||||
break;
|
||||
case '[':
|
||||
{
|
||||
Label B = L();
|
||||
labelB.push(B);
|
||||
mov(eax, cur);
|
||||
test(eax, eax);
|
||||
Label F;
|
||||
jz(F, T_NEAR);
|
||||
labelF.push(F);
|
||||
}
|
||||
break;
|
||||
case ']':
|
||||
{
|
||||
Label B = labelB.top(); labelB.pop();
|
||||
jmp(B);
|
||||
Label F = labelF.top(); labelF.pop();
|
||||
L(F);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
#ifdef XBYAK32
|
||||
pop(edi);
|
||||
pop(esi);
|
||||
pop(ebp);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
pop(rbp);
|
||||
pop(rdi);
|
||||
pop(rsi);
|
||||
#else
|
||||
pop(r12);
|
||||
pop(rbp);
|
||||
pop(rbx);
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
void dump(const uint8_t *code, size_t size)
|
||||
{
|
||||
puts("#include <stdio.h>\nstatic int stack[128 * 1024];");
|
||||
#ifdef _MSC_VER
|
||||
printf("static __declspec(align(4096)) ");
|
||||
#else
|
||||
printf("static __attribute__((aligned(4096)))");
|
||||
#endif
|
||||
puts("const unsigned char code[] = {");
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
printf("0x%02x,", code[i]); if ((i % 16) == 15) putchar('\n');
|
||||
}
|
||||
puts("\n};");
|
||||
#ifdef _MSC_VER
|
||||
puts("#include <windows.h>");
|
||||
#else
|
||||
puts("#include <unistd.h>");
|
||||
puts("#include <sys/mman.h>");
|
||||
#endif
|
||||
puts("int main()\n{");
|
||||
#ifdef _MSC_VER
|
||||
puts("\tDWORD oldProtect;");
|
||||
puts("\tVirtualProtect((void*)code, sizeof(code), PAGE_EXECUTE_READWRITE, &oldProtect);");
|
||||
#else
|
||||
puts("\tlong pageSize = sysconf(_SC_PAGESIZE) - 1;");
|
||||
puts("\tmprotect((void*)code, (sizeof(code) + pageSize) & ~pageSize, PROT_READ | PROT_EXEC);");
|
||||
#endif
|
||||
puts(
|
||||
"\t((void (*)(void*, void*, int *))code)((void*)putchar, (void*)getchar, stack);\n"
|
||||
"}"
|
||||
);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
fprintf(stderr, "32bit mode\n");
|
||||
#else
|
||||
fprintf(stderr, "64bit mode\n");
|
||||
#endif
|
||||
if (argc == 1) {
|
||||
fprintf(stderr, "bf filename.bf [0|1]\n");
|
||||
return 1;
|
||||
}
|
||||
std::ifstream ifs(argv[1]);
|
||||
int mode = argc == 3 ? atoi(argv[2]) : 0;
|
||||
try {
|
||||
Brainfuck bf(ifs);
|
||||
if (mode == 0) {
|
||||
static int stack[128 * 1024];
|
||||
bf.getCode<void (*)(const void*, const void*, int *)>()(reinterpret_cast<const void*>(putchar), reinterpret_cast<const void*>(getchar), stack);
|
||||
} else {
|
||||
dump(bf.getCode(), bf.getSize());
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
}
|
||||
|
||||
228
externals/xbyak/sample/bf.vcxproj
vendored
Normal file
228
externals/xbyak/sample/bf.vcxproj
vendored
Normal file
@ -0,0 +1,228 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{654BD79B-59D3-4B10-BBAA-158BAB272828}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/bf.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/bf.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/bf.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/bf.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/bf.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/bf.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/bf.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/bf.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/bf.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/bf.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="bf.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
229
externals/xbyak/sample/calc.cpp
vendored
Normal file
229
externals/xbyak/sample/calc.cpp
vendored
Normal file
@ -0,0 +1,229 @@
|
||||
/*
|
||||
@author herumi
|
||||
|
||||
tiny calculator
|
||||
This program generates a function to calc the value of
|
||||
polynomial given by user in run-time.
|
||||
use boost::spirit::classic
|
||||
see calc2.cpp for new version of boost::spirit
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4127) // for boost(constant condition)
|
||||
#pragma warning(disable : 4512) // for boost
|
||||
#endif
|
||||
#include <boost/spirit/include/classic_file_iterator.hpp>
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <boost/bind.hpp>
|
||||
|
||||
enum Error {
|
||||
UNDEFINED_VARIABLE = 1
|
||||
};
|
||||
|
||||
/*
|
||||
JIT assemble of given polynomial for VC or gcc
|
||||
*/
|
||||
class FuncGen : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
typedef std::map<std::string, int> Map;
|
||||
private:
|
||||
enum {
|
||||
MAX_CONST_NUM = 32
|
||||
};
|
||||
double constTbl_[MAX_CONST_NUM];
|
||||
size_t constTblPos_;
|
||||
int regIdx_;
|
||||
Map varMap_; // map var name to index
|
||||
#ifdef XBYAK32
|
||||
const Xbyak::Reg32& valTbl_;
|
||||
const Xbyak::Reg32& tbl_;
|
||||
#else
|
||||
const Xbyak::Reg64& valTbl_;
|
||||
const Xbyak::Reg64& tbl_;
|
||||
#endif
|
||||
public:
|
||||
/*
|
||||
@param y [out] the value of f(var)
|
||||
@param var [in] table of input variables
|
||||
func(double *y, const double var[]);
|
||||
@note func does not return double to avoid difference of compiler
|
||||
*/
|
||||
FuncGen(const std::vector<std::string>& varTbl)
|
||||
: constTblPos_(0)
|
||||
, regIdx_(-1)
|
||||
#ifdef XBYAK32
|
||||
, valTbl_(eax)
|
||||
, tbl_(edx)
|
||||
#elif defined(XBYAK64_WIN)
|
||||
, valTbl_(rcx)
|
||||
, tbl_(rdx)
|
||||
#else
|
||||
, valTbl_(rdi)
|
||||
, tbl_(rsi)
|
||||
#endif
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
mov(valTbl_, ptr[esp+8]); // eax == varTbl
|
||||
mov(tbl_, (size_t)constTbl_);
|
||||
#else
|
||||
#ifdef XBYAK64_WIN
|
||||
movaps(ptr [rsp + 8], xm6); // save xm6, xm7
|
||||
movaps(ptr [rsp + 8 + 16], xm7);
|
||||
#endif
|
||||
mov(tbl_, (size_t)constTbl_);
|
||||
#endif
|
||||
for (int i = 0, n = static_cast<int>(varTbl.size()); i < n; i++) {
|
||||
varMap_[varTbl[i]] = i;
|
||||
}
|
||||
}
|
||||
// use edx
|
||||
void genPush(double n)
|
||||
{
|
||||
if (constTblPos_ >= MAX_CONST_NUM) throw;
|
||||
constTbl_[constTblPos_] = n;
|
||||
if (regIdx_ == 7) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), ptr[tbl_ + (int)(constTblPos_ * sizeof(double))]);
|
||||
constTblPos_++;
|
||||
}
|
||||
// use eax
|
||||
void genVal(const char *begin, const char *end)
|
||||
{
|
||||
std::string var(begin, end);
|
||||
if (varMap_.find(var) == varMap_.end()) throw UNDEFINED_VARIABLE;
|
||||
if (regIdx_ == 7) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), ptr[valTbl_ + varMap_[var] * sizeof(double)]);
|
||||
}
|
||||
void genAdd(const char*, const char*)
|
||||
{
|
||||
addsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genSub(const char*, const char*)
|
||||
{
|
||||
subsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genMul(const char*, const char*)
|
||||
{
|
||||
mulsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genDiv(const char*, const char*)
|
||||
{
|
||||
divsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void complete()
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
mov(eax, ptr [esp + 4]); // eax = valTbl
|
||||
movsd(ptr [eax], xm0);
|
||||
#else
|
||||
#ifdef XBYAK64_WIN
|
||||
movaps(xm6, ptr [rsp + 8]);
|
||||
movaps(xm7, ptr [rsp + 8 + 16]);
|
||||
#endif
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
struct Grammar : public boost::spirit::classic::grammar<Grammar> {
|
||||
FuncGen& f_;
|
||||
Grammar(FuncGen& f) : f_(f) { }
|
||||
template<typename ScannerT>
|
||||
struct definition {
|
||||
boost::spirit::classic::rule<ScannerT> poly0, poly1, poly2, var;
|
||||
|
||||
definition(const Grammar& self)
|
||||
{
|
||||
using namespace boost;
|
||||
using namespace boost::spirit::classic;
|
||||
|
||||
poly0 = poly1 >> *(('+' >> poly1)[bind(&FuncGen::genAdd, ref(self.f_), _1, _2)]
|
||||
| ('-' >> poly1)[bind(&FuncGen::genSub, ref(self.f_), _1, _2)]);
|
||||
poly1 = poly2 >> *(('*' >> poly2)[bind(&FuncGen::genMul, ref(self.f_), _1, _2)]
|
||||
| ('/' >> poly2)[bind(&FuncGen::genDiv, ref(self.f_), _1, _2)]);
|
||||
var = (+alpha_p)[bind(&FuncGen::genVal, ref(self.f_), _1, _2)];
|
||||
poly2 = real_p[bind(&FuncGen::genPush, ref(self.f_), _1)]
|
||||
| var
|
||||
| '(' >> poly0 >> ')';
|
||||
}
|
||||
const boost::spirit::classic::rule<ScannerT>& start() const { return poly0; }
|
||||
};
|
||||
};
|
||||
|
||||
void put(const std::vector<double>& x)
|
||||
{
|
||||
for (size_t i = 0, n = x.size(); i < n; i++) {
|
||||
if (i > 0) printf(", ");
|
||||
printf("%f", x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc <= 2) {
|
||||
fprintf(stderr, "calc \"var1 var2 ...\" \"function of var\"\n");
|
||||
fprintf(stderr, "eg. calc x \"x*x\"\n");
|
||||
fprintf(stderr, "eg. calc \"x y z\" \"x*x + y - z\"\n");
|
||||
return 1;
|
||||
}
|
||||
const char *poly = argv[2];
|
||||
try {
|
||||
std::vector<std::string> varTbl;
|
||||
|
||||
// get varTbl from argv[1]
|
||||
{
|
||||
std::istringstream is(argv[1]);
|
||||
int i = 0;
|
||||
printf("varTbl = { ");
|
||||
while (is) {
|
||||
std::string var;
|
||||
is >> var;
|
||||
if (var.empty()) break;
|
||||
printf("%s:%d, ", var.c_str(), i);
|
||||
varTbl.push_back(var);
|
||||
i++;
|
||||
}
|
||||
printf("}\n");
|
||||
}
|
||||
FuncGen funcGen(varTbl);
|
||||
Grammar calc(funcGen);
|
||||
boost::spirit::classic::parse_info<> r = parse(poly, calc, boost::spirit::classic::space_p);
|
||||
if (!r.full) {
|
||||
printf("err poly=%s\n", poly);
|
||||
return 1;
|
||||
}
|
||||
funcGen.complete();
|
||||
std::vector<double> valTbl;
|
||||
valTbl.resize(varTbl.size());
|
||||
#ifdef XBYAK32
|
||||
puts("32bit mode");
|
||||
void (*func)(double *ret, const double *valTbl) = funcGen.getCode<void (*)(double *, const double*)>();
|
||||
#else
|
||||
puts("64bit mode");
|
||||
double (*func)(const double *valTbl) = funcGen.getCode<double (*)(const double*)>();
|
||||
#endif
|
||||
for (int i = 0; i < 10; i++) {
|
||||
for (size_t j = 0, n = valTbl.size(); j < n; j++) {
|
||||
valTbl[j] = rand() % 7;
|
||||
}
|
||||
double y;
|
||||
#ifdef XBYAK32
|
||||
func(&y, &valTbl[0]);
|
||||
#else
|
||||
y = func(&valTbl[0]);
|
||||
#endif
|
||||
printf("f("); put(valTbl); printf(")=%f\n", y);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (Error err) {
|
||||
printf("ERR:%d\n", err);
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
228
externals/xbyak/sample/calc.vcxproj
vendored
Normal file
228
externals/xbyak/sample/calc.vcxproj
vendored
Normal file
@ -0,0 +1,228 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{5FDDFAA6-B947-491D-A17E-BBD863846579}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/calc.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/calc.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/calc.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/calc.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/calc.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/calc.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/calc.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/calc.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/calc.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/calc.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="calc.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
302
externals/xbyak/sample/calc2.cpp
vendored
Normal file
302
externals/xbyak/sample/calc2.cpp
vendored
Normal file
@ -0,0 +1,302 @@
|
||||
/*
|
||||
@author herumi
|
||||
|
||||
tiny calculator 2
|
||||
This program generates a function to calc the value of
|
||||
polynomial given by user in run-time.
|
||||
use boost::spirit::qi
|
||||
*/
|
||||
#ifdef _WIN32
|
||||
#pragma warning(disable : 4127) // for boost(constant condition)
|
||||
#pragma warning(disable : 4512) // for boost
|
||||
#pragma warning(disable : 4819)
|
||||
#endif
|
||||
#include <boost/config/warning_disable.hpp>
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/phoenix_core.hpp>
|
||||
#include <boost/spirit/include/phoenix_container.hpp>
|
||||
#include <boost/spirit/include/phoenix_bind.hpp>
|
||||
#include <boost/timer.hpp>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
enum Operand {
|
||||
OpAdd,
|
||||
OpSub,
|
||||
OpMul,
|
||||
OpDiv,
|
||||
OpNeg,
|
||||
OpImm,
|
||||
OpVarX
|
||||
};
|
||||
|
||||
struct Code {
|
||||
Operand op_;
|
||||
double val_;
|
||||
Code(Operand op)
|
||||
: op_(op)
|
||||
, val_(0)
|
||||
{
|
||||
}
|
||||
Code(double val)
|
||||
: op_(OpImm)
|
||||
, val_(val)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::vector<Code> CodeSet;
|
||||
|
||||
struct Vm {
|
||||
CodeSet code_;
|
||||
double operator()(double x) const
|
||||
{
|
||||
const size_t maxStack = 16;
|
||||
double stack[maxStack];
|
||||
double *p = stack;
|
||||
CodeSet::const_iterator pc = code_.begin();
|
||||
|
||||
while (pc != code_.end()) {
|
||||
switch (pc->op_) {
|
||||
case OpVarX:
|
||||
*p++ = x;
|
||||
break;
|
||||
case OpImm:
|
||||
*p++ = pc->val_;
|
||||
break;
|
||||
case OpNeg:
|
||||
p[-1] = -p[-1];
|
||||
break;
|
||||
case OpAdd:
|
||||
--p;
|
||||
p[-1] += p[0];
|
||||
break;
|
||||
case OpSub:
|
||||
--p;
|
||||
p[-1] -= p[0];
|
||||
break;
|
||||
case OpMul:
|
||||
--p;
|
||||
p[-1] *= p[0];
|
||||
break;
|
||||
case OpDiv:
|
||||
--p;
|
||||
p[-1] /= p[0];
|
||||
break;
|
||||
}
|
||||
++pc;
|
||||
assert(p < stack + maxStack);
|
||||
}
|
||||
return p[-1];
|
||||
}
|
||||
};
|
||||
|
||||
class Jit : public Xbyak::CodeGenerator {
|
||||
private:
|
||||
enum {
|
||||
MAX_CONST_NUM = 32
|
||||
};
|
||||
MIE_ALIGN(16) double constTbl_[MAX_CONST_NUM];
|
||||
Xbyak::uint64_t negConst_;
|
||||
size_t constTblPos_;
|
||||
#ifdef XBYAK32
|
||||
const Xbyak::Reg32& varTbl_;
|
||||
const Xbyak::Reg32& tbl_;
|
||||
#else
|
||||
const Xbyak::Reg64& tbl_;
|
||||
#endif
|
||||
int regIdx_;
|
||||
public:
|
||||
/*
|
||||
double jit(double x);
|
||||
@note 32bit: x : [esp+4], return fp0
|
||||
64bit: x [rcx](win), xmm0(gcc), return xmm0
|
||||
*/
|
||||
Jit()
|
||||
: negConst_(Xbyak::uint64_t(1) << 63)
|
||||
, constTblPos_(0)
|
||||
#ifdef XBYAK32
|
||||
, varTbl_(eax)
|
||||
, tbl_(edx)
|
||||
#elif defined(XBYAK64_WIN)
|
||||
, tbl_(rcx)
|
||||
#else
|
||||
, tbl_(rdi)
|
||||
#endif
|
||||
, regIdx_(-1)
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
lea(varTbl_, ptr [esp+4]);
|
||||
#else
|
||||
#ifdef XBYAK64_WIN
|
||||
movaps(ptr [rsp + 8], xm6); // save xm6, xm7
|
||||
movaps(ptr [rsp + 8 + 16], xm7);
|
||||
#endif
|
||||
movaps(xm7, xm0); // save xm0
|
||||
#endif
|
||||
mov(tbl_, (size_t)constTbl_);
|
||||
}
|
||||
void genPush(double n)
|
||||
{
|
||||
if (constTblPos_ >= MAX_CONST_NUM) throw;
|
||||
constTbl_[constTblPos_] = n;
|
||||
if (regIdx_ == 7) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), ptr[tbl_ + constTblPos_ * sizeof(double)]);
|
||||
constTblPos_++;
|
||||
}
|
||||
void genVarX()
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
if (regIdx_ == 7) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), ptr[varTbl_]);
|
||||
#else
|
||||
if (regIdx_ == 6) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), xm7);
|
||||
#endif
|
||||
}
|
||||
void genAdd()
|
||||
{
|
||||
addsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genSub()
|
||||
{
|
||||
subsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genMul()
|
||||
{
|
||||
mulsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genDiv()
|
||||
{
|
||||
divsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genNeg()
|
||||
{
|
||||
xorpd(Xbyak::Xmm(regIdx_), ptr [tbl_ + MAX_CONST_NUM * sizeof(double)]);
|
||||
}
|
||||
void complete()
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
sub(esp, 8);
|
||||
movsd(ptr [esp], xm0);
|
||||
fld(qword [esp]);
|
||||
add(esp, 8);
|
||||
#else
|
||||
#ifdef XBYAK64_WIN
|
||||
movaps(xm6, ptr [rsp + 8]);
|
||||
movaps(xm7, ptr [rsp + 8 + 16]);
|
||||
#endif
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Iterator>
|
||||
struct Parser : boost::spirit::qi::grammar<Iterator, boost::spirit::ascii::space_type> {
|
||||
boost::spirit::qi::rule<Iterator, boost::spirit::ascii::space_type> expression, term, factor;
|
||||
CodeSet& code_;
|
||||
Parser(CodeSet& code)
|
||||
: Parser::base_type(expression)
|
||||
, code_(code)
|
||||
{
|
||||
namespace qi = boost::spirit::qi;
|
||||
using namespace qi::labels;
|
||||
|
||||
using boost::phoenix::ref;
|
||||
using boost::phoenix::push_back;
|
||||
|
||||
expression = term >> *(('+' > term[push_back(ref(code_), OpAdd)])
|
||||
| ('-' > term[push_back(ref(code_), OpSub)]));
|
||||
|
||||
term = factor >> *(('*' > factor[push_back(ref(code_), OpMul)])
|
||||
| ('/' > factor[push_back(ref(code_), OpDiv)]));
|
||||
|
||||
factor = qi::double_[push_back(ref(code_), _1)]
|
||||
| qi::lit('x')[push_back(ref(code_), OpVarX)]
|
||||
| ('(' > expression > ')')
|
||||
| ('-' > factor[push_back(ref(code_), OpNeg)])
|
||||
| ('+' > factor);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Iterator>
|
||||
struct ParserJit : boost::spirit::qi::grammar<Iterator, boost::spirit::ascii::space_type> {
|
||||
boost::spirit::qi::rule<Iterator, boost::spirit::ascii::space_type> expression, term, factor;
|
||||
Jit code_;
|
||||
ParserJit()
|
||||
: ParserJit::base_type(expression)
|
||||
{
|
||||
namespace qi = boost::spirit::qi;
|
||||
using namespace qi::labels;
|
||||
|
||||
using boost::phoenix::ref;
|
||||
using boost::phoenix::push_back;
|
||||
using boost::phoenix::bind;
|
||||
|
||||
expression = term >> *(('+' > term[bind(&Jit::genAdd, ref(code_))])
|
||||
| ('-' > term[bind(&Jit::genSub, ref(code_))]));
|
||||
|
||||
term = factor >> *(('*' > factor[bind(&Jit::genMul, ref(code_))])
|
||||
| ('/' > factor[bind(&Jit::genDiv, ref(code_))]));
|
||||
|
||||
factor = qi::double_[bind(&Jit::genPush, ref(code_), _1)]
|
||||
| qi::lit('x')[bind(&Jit::genVarX, ref(code_))]
|
||||
| ('(' > expression > ')')
|
||||
| ('-' > factor[bind(&Jit::genNeg, ref(code_))])
|
||||
| ('+' > factor);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Func>
|
||||
void Test(const char *msg, const Func& f)
|
||||
{
|
||||
printf("%s:", msg);
|
||||
boost::timer t;
|
||||
double sum = 0;
|
||||
for (double x = 0; x < 1000; x += 0.0001) {
|
||||
sum += f(x);
|
||||
}
|
||||
printf("sum=%f, %fsec\n", sum, t.elapsed());
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "input formula\n");
|
||||
return 1;
|
||||
}
|
||||
const std::string str(argv[1]);
|
||||
|
||||
try {
|
||||
Vm vm;
|
||||
Parser<std::string::const_iterator> parser(vm.code_);
|
||||
ParserJit<std::string::const_iterator> parserJit;
|
||||
|
||||
const std::string::const_iterator end = str.end();
|
||||
|
||||
std::string::const_iterator i = str.begin();
|
||||
if (!phrase_parse(i, end, parser, boost::spirit::ascii::space) || i != end) {
|
||||
puts("err 1");
|
||||
return 1;
|
||||
}
|
||||
printf("ret=%f\n", vm(2.3));
|
||||
|
||||
i = str.begin();
|
||||
if (!phrase_parse(i, end, parserJit, boost::spirit::ascii::space) || i != end) {
|
||||
puts("err 2");
|
||||
return 1;
|
||||
}
|
||||
parserJit.code_.complete();
|
||||
double (*jit)(double) = parserJit.code_.getCode<double (*)(double)>();
|
||||
|
||||
Test("VM ", vm);
|
||||
Test("JIT", jit);
|
||||
} catch (...) {
|
||||
fprintf(stderr, "err\n");
|
||||
}
|
||||
}
|
||||
2
externals/xbyak/sample/cpuid/adl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/adl.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx_vnni waitpkg clflushopt cldemote movdiri movdir64b
|
||||
2
externals/xbyak/sample/cpuid/bdw.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/bdw.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe
|
||||
2
externals/xbyak/sample/cpuid/clx.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/clx.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni clflushopt
|
||||
2
externals/xbyak/sample/cpuid/cnl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/cnl.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi clflushopt
|
||||
11
externals/xbyak/sample/cpuid/cpuid.sh
vendored
Normal file
11
externals/xbyak/sample/cpuid/cpuid.sh
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
|
||||
make -C ../ test_util64
|
||||
|
||||
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
|
||||
for cpu in ${cpus[@]} ; do
|
||||
echo $cpu
|
||||
~/bin/sde -$cpu -- ../test_util64 -cpuid > tmp.txt
|
||||
diff tmp.txt $cpu.txt
|
||||
done
|
||||
|
||||
2
externals/xbyak/sample/cpuid/cpx.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/cpx.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt
|
||||
2
externals/xbyak/sample/cpuid/glm.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/glm.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
|
||||
2
externals/xbyak/sample/cpuid/glp.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/glp.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
|
||||
2
externals/xbyak/sample/cpuid/hsw.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/hsw.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand f16c movbe
|
||||
2
externals/xbyak/sample/cpuid/icl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/icl.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
|
||||
2
externals/xbyak/sample/cpuid/icx.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/icx.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
|
||||
2
externals/xbyak/sample/cpuid/ivb.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/ivb.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx enh_rep rdrand f16c
|
||||
2
externals/xbyak/sample/cpuid/knl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/knl.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed prefetchwt1 f16c movbe avx512f avx512pf avx512er avx512cd
|
||||
2
externals/xbyak/sample/cpuid/knm.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/knm.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed prefetchwt1 f16c movbe avx512f avx512pf avx512er avx512cd avx512_4vnniw avx512_4fmaps avx512_vpopcntdq
|
||||
2
externals/xbyak/sample/cpuid/mrm.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/mrm.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3
|
||||
2
externals/xbyak/sample/cpuid/nhm.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/nhm.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt rdtscp
|
||||
2
externals/xbyak/sample/cpuid/p4p.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/p4p.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3
|
||||
2
externals/xbyak/sample/cpuid/pnr.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/pnr.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41
|
||||
2
externals/xbyak/sample/cpuid/skl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/skl.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe clflushopt
|
||||
2
externals/xbyak/sample/cpuid/skx.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/skx.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl clflushopt
|
||||
2
externals/xbyak/sample/cpuid/slm.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/slm.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp pclmulqdq prefetchw enh_rep rdrand movbe
|
||||
2
externals/xbyak/sample/cpuid/slt.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/slt.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 movbe
|
||||
2
externals/xbyak/sample/cpuid/snb.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/snb.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx
|
||||
2
externals/xbyak/sample/cpuid/spr.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/spr.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 avx512_vp2intersect amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b
|
||||
2
externals/xbyak/sample/cpuid/tgl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/tgl.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_vp2intersect clflushopt movdiri movdir64b
|
||||
2
externals/xbyak/sample/cpuid/tmp.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/tmp.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 avx512_vp2intersect amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b
|
||||
2
externals/xbyak/sample/cpuid/tnt.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/tnt.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
|
||||
10
externals/xbyak/sample/cpuid/update-txt.sh
vendored
Normal file
10
externals/xbyak/sample/cpuid/update-txt.sh
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
make -C ../ test_util64
|
||||
|
||||
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
|
||||
for cpu in ${cpus[@]} ; do
|
||||
echo $cpu
|
||||
~/bin/sde -$cpu -- ../test_util64 -cpuid > $cpu.txt
|
||||
done
|
||||
|
||||
2
externals/xbyak/sample/cpuid/wsm.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/wsm.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp pclmulqdq
|
||||
5
externals/xbyak/sample/echo.bf
vendored
Normal file
5
externals/xbyak/sample/echo.bf
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
>>++++++++[->++++++++<]>>>>+++++++++[->++++++++++<]>[<<,[->+<<+<<+>>>]<<<[
|
||||
->>>+<<<]>>>>>[->+>>+<<<]>[<<[->+>>+<<<]>>>[-<<<+>>>]<<[[-]<->]>-]>>[-<<<+
|
||||
>>>]<<<<<<<[-<+<<+>>>]<[>>[-<+<<+>>>]<<<[->>>+<<<]>>[[-]>-<]<-]<<[->>>+<<<
|
||||
]>>>>><[[-]>++++++++++++++++++++++++++++++++>[[-]<------------------------
|
||||
-------->]<<]>>[-]<.>>]
|
||||
19
externals/xbyak/sample/fizzbuzz.bf
vendored
Normal file
19
externals/xbyak/sample/fizzbuzz.bf
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
++++++[->++++>>+>+>-<<<<<]>
|
||||
[<++++>>+++>++++>>+++>+++++>+++++>>>>>>++>>++<<<<<<<<<<<<<<-]
|
||||
<++++>+++>-->+++>->>--->++>>>+++++[->++>++<<]<<<<<<<<<<
|
||||
|
||||
[->
|
||||
-[>>>>>>>]>[<+++>.>.>>>>..>>>+<]<<<<<
|
||||
-[>>>>]>[<+++++>.>.>..>>>+<]>>>>
|
||||
|
||||
+<-[<<<]<[
|
||||
[-<<+>>]>>>+>+<<<<<<[->>+>+>-<<<<]<
|
||||
]>>
|
||||
|
||||
[[-]<]>[
|
||||
>>>[>.<<.<<<]<[.<<<<]>
|
||||
]
|
||||
|
||||
>.<<<<<<<<<<<
|
||||
]
|
||||
|
||||
3
externals/xbyak/sample/hello.bf
vendored
Normal file
3
externals/xbyak/sample/hello.bf
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
>+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++
|
||||
++>-]<.>+++++++++++[<+++++>-]<.>++++++++[<+++>-]<.+++.------.--------.[-]>
|
||||
++++++++[<++++>-]<+.[-]++++++++++.
|
||||
128
externals/xbyak/sample/jmp_table.cpp
vendored
Normal file
128
externals/xbyak/sample/jmp_table.cpp
vendored
Normal file
@ -0,0 +1,128 @@
|
||||
/*
|
||||
sample of move(reg, LABEL);, L(LABEL), putL(LABEL);
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
const int expectTbl[] = {
|
||||
5, 9, 12
|
||||
};
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
explicit Code(int mode, size_t size, void *p)
|
||||
: Xbyak::CodeGenerator(size, p)
|
||||
{
|
||||
inLocalLabel();
|
||||
#ifdef XBYAK64
|
||||
const Xbyak::Reg64& a = rax;
|
||||
const Xbyak::Reg64& c = rcx;
|
||||
#ifdef XBYAK64_WIN
|
||||
mov(rax, rcx);
|
||||
#else
|
||||
mov(rax, rdi);
|
||||
#endif
|
||||
#else
|
||||
const Xbyak::Reg32& a = eax;
|
||||
const Xbyak::Reg32& c = ecx;
|
||||
mov(a, ptr [esp + 4]);
|
||||
#endif
|
||||
|
||||
switch (mode) {
|
||||
case 0:
|
||||
mov(c, ".jmp_table");
|
||||
lea(c, ptr [c + a * 8]);
|
||||
jmp(c);
|
||||
align(8);
|
||||
L(".jmp_table");
|
||||
mov(a, expectTbl[0]);
|
||||
ret();
|
||||
align(8);
|
||||
mov(a, expectTbl[1]);
|
||||
ret();
|
||||
align(8);
|
||||
mov(a, expectTbl[2]);
|
||||
ret();
|
||||
break;
|
||||
|
||||
case 1:
|
||||
/*
|
||||
the label for putL is defined when called
|
||||
*/
|
||||
mov(c, ".jmp_table");
|
||||
jmp(ptr [c + a * (int)sizeof(size_t)]);
|
||||
L(".label1");
|
||||
mov(a, expectTbl[0]);
|
||||
jmp(".end");
|
||||
L(".label2");
|
||||
mov(a, expectTbl[1]);
|
||||
jmp(".end");
|
||||
L(".label3");
|
||||
mov(a, expectTbl[2]);
|
||||
jmp(".end");
|
||||
L(".end");
|
||||
ret();
|
||||
ud2();
|
||||
|
||||
align(8);
|
||||
L(".jmp_table");
|
||||
putL(".label1");
|
||||
putL(".label2");
|
||||
putL(".label3");
|
||||
break;
|
||||
|
||||
case 2:
|
||||
/*
|
||||
the label for putL is not defined when called
|
||||
*/
|
||||
jmp(".in");
|
||||
ud2();
|
||||
align(8);
|
||||
L(".jmp_table");
|
||||
putL(".label1");
|
||||
putL(".label2");
|
||||
putL(".label3");
|
||||
L(".in");
|
||||
mov(c, ".jmp_table");
|
||||
jmp(ptr [c + a * (int)sizeof(size_t)]);
|
||||
L(".label1");
|
||||
mov(a, expectTbl[0]);
|
||||
jmp(".end");
|
||||
L(".label2");
|
||||
mov(a, expectTbl[1]);
|
||||
jmp(".end");
|
||||
L(".label3");
|
||||
mov(a, expectTbl[2]);
|
||||
jmp(".end");
|
||||
L(".end");
|
||||
ret();
|
||||
break;
|
||||
}
|
||||
outLocalLabel();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
for (int mode = 0; mode < 3; mode++) {
|
||||
printf("mode=%d\n", mode);
|
||||
for (int grow = 0; grow < 2; grow++) {
|
||||
printf("auto grow=%s\n", grow ? "on" : "off");
|
||||
Code c(mode, grow ? 30 : 4096, grow ? Xbyak::AutoGrow : 0);
|
||||
int (*f)(int) = c.getCode<int (*)(int)>();
|
||||
c.ready();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
const int a = expectTbl[i];
|
||||
const int b = f(i);
|
||||
if (a != b) {
|
||||
printf("ERR i=%d, a=%d, b=%d\n", i, a, b);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
puts("ok");
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR %s\n", e.what());
|
||||
}
|
||||
39
externals/xbyak/sample/memfd.cpp
vendored
Normal file
39
externals/xbyak/sample/memfd.cpp
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
a sample to use MmapAllocator with an user-defined name
|
||||
cat /proc/`psidof ./memfd`/maps
|
||||
|
||||
7fca70b44000-7fca70b4a000 rw-p 00000000 00:00 0
|
||||
7fca70b67000-7fca70b68000 rwxs 00000000 00:05 19960170 /memfd:xyz (deleted)
|
||||
7fca70b68000-7fca70b69000 rwxs 00000000 00:05 19960169 /memfd:abc (deleted)
|
||||
7fca70b69000-7fca70b6a000 r--p 00029000 103:03 19136541 /lib/x86_64-linux-gnu/ld-2.27.so
|
||||
7fca70b6a000-7fca70b6b000 rw-p 0002a000 103:03 19136541 /lib/x86_64-linux-gnu/ld-2.27.so
|
||||
*/
|
||||
#define XBYAK_USE_MEMFD
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <fstream>
|
||||
|
||||
class Code : Xbyak::MmapAllocator, public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Code(const char *name, int v)
|
||||
: Xbyak::MmapAllocator(name)
|
||||
, Xbyak::CodeGenerator(4096, nullptr, this /* specify external MmapAllocator */)
|
||||
{
|
||||
mov(eax, v);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
Code c1("Xbyak::abc", 123);
|
||||
Code c2("Xbyak::xyz", 456);
|
||||
printf("c1 %d\n", c1.getCode<int (*)()>()());
|
||||
printf("c2 %d\n", c2.getCode<int (*)()>()());
|
||||
std::ifstream ifs("/proc/self/maps", std::ios::binary);
|
||||
if (ifs) {
|
||||
std::string line;
|
||||
while (std::getline(ifs, line)) {
|
||||
printf("%s\n", line.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
111
externals/xbyak/sample/memfunc.cpp
vendored
Normal file
111
externals/xbyak/sample/memfunc.cpp
vendored
Normal file
@ -0,0 +1,111 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct A {
|
||||
int x_;
|
||||
int y_;
|
||||
A() : x_(3), y_(5) {}
|
||||
int func(int a, int b, int c, int d, int e) const { return x_ + y_ + a + b + c + d + e; }
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4510 4512 4610)
|
||||
#endif
|
||||
|
||||
struct Code : public Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
using namespace Xbyak;
|
||||
|
||||
int RET_ADJ = 0;
|
||||
#ifdef XBYAK32
|
||||
#ifdef _WIN32
|
||||
const int PARA_ADJ = 0;
|
||||
RET_ADJ = 5 * 4;
|
||||
#else
|
||||
const int PARA_ADJ = 4;
|
||||
mov(ecx, ptr [esp + 4]);
|
||||
#endif
|
||||
#endif
|
||||
const struct {
|
||||
#ifdef XBYAK32
|
||||
const Reg32& self;
|
||||
#else
|
||||
const Reg64& self;
|
||||
#endif
|
||||
const Operand& a;
|
||||
const Operand& b;
|
||||
const Operand& c;
|
||||
const Operand& d;
|
||||
const Operand& e;
|
||||
} para = {
|
||||
#if defined(XBYAK64_WIN)
|
||||
rcx,
|
||||
edx,
|
||||
r8d,
|
||||
r9d,
|
||||
ptr [rsp + 8 * 5],
|
||||
ptr [rsp + 8 * 6],
|
||||
#elif defined(XBYAK64_GCC)
|
||||
rdi,
|
||||
esi,
|
||||
edx,
|
||||
ecx,
|
||||
r8d,
|
||||
r9d,
|
||||
#else
|
||||
ecx,
|
||||
ptr [esp + 4 + PARA_ADJ],
|
||||
ptr [esp + 8 + PARA_ADJ],
|
||||
ptr [esp + 12 + PARA_ADJ],
|
||||
ptr [esp + 16 + PARA_ADJ],
|
||||
ptr [esp + 20 + PARA_ADJ],
|
||||
#endif
|
||||
};
|
||||
mov(eax, ptr [para.self]);
|
||||
add(eax, ptr [para.self + 4]);
|
||||
add(eax, para.a);
|
||||
add(eax, para.b);
|
||||
add(eax, para.c);
|
||||
add(eax, para.d);
|
||||
add(eax, para.e);
|
||||
ret(RET_ADJ);
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
printf("64bit");
|
||||
#else
|
||||
printf("32bit");
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
puts(" win");
|
||||
#else
|
||||
puts(" linux");
|
||||
#endif
|
||||
try {
|
||||
Code code;
|
||||
int (A::*p)(int, int, int, int, int) const = 0;
|
||||
const void *addr = code.getCode<void*>();
|
||||
memcpy(&p, &addr, sizeof(void*));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
A a;
|
||||
int t1, t2, t3, t4, t5, x, y;
|
||||
a.x_ = rand(); a.y_ = rand();
|
||||
t1 = rand(); t2 = rand(); t3 = rand();
|
||||
t4 = rand(); t5 = rand();
|
||||
x = a.func(t1, t2, t3, t4, t5);
|
||||
y = (a.*p)(t1, t2, t3, t4, t5);
|
||||
printf("%c %d, %d\n", x == y ? 'o' : 'x', x, y);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("err=%s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
90
externals/xbyak/sample/profiler.cpp
vendored
Normal file
90
externals/xbyak/sample/profiler.cpp
vendored
Normal file
@ -0,0 +1,90 @@
|
||||
/*
|
||||
How to profile JIT-code with perf or VTune
|
||||
sudo perf record ./profiler 1
|
||||
amplxe-cl -collect hotspots -result-dir r001hs -quiet ./profiler-vtune 2
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
const int N = 3000000;
|
||||
struct Code : public Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
mov(eax, N);
|
||||
Xbyak::Label lp = L();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
sub(eax, 1);
|
||||
}
|
||||
jg(lp);
|
||||
mov(eax, 1);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
struct Code2 : public Xbyak::CodeGenerator {
|
||||
Code2()
|
||||
{
|
||||
mov(eax, N);
|
||||
Xbyak::Label lp = L();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
xorps(xm0, xm0);
|
||||
}
|
||||
sub(eax, 1);
|
||||
jg(lp);
|
||||
mov(eax, 1);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
double s1(int n)
|
||||
{
|
||||
double r = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
r += 1.0 / (i + 1);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
double s2(int n)
|
||||
{
|
||||
double r = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
r += 1.0 / (i * i + 1) + 2.0 / (i + 3);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int mode = argc == 1 ? 0 : atoi(argv[1]);
|
||||
Code c;
|
||||
Code2 c2;
|
||||
int (*f)() = (int (*)())c.getCode();
|
||||
int (*g)() = (int (*)())c2.getCode();
|
||||
|
||||
printf("f:%p, %d\n", (const void*)f, (int)c.getSize());
|
||||
printf("g:%p, %d\n", (const void*)g, (int)c2.getSize());
|
||||
Xbyak::util::Profiler prof;
|
||||
printf("mode=%d\n", mode);
|
||||
prof.init(mode);
|
||||
prof.set("f", (const void*)f, c.getSize());
|
||||
prof.set("g", (const void*)g, c2.getSize());
|
||||
|
||||
double sum = 0;
|
||||
for (int i = 0; i < 20000; i++) {
|
||||
sum += s1(i);
|
||||
sum += s2(i);
|
||||
}
|
||||
printf("sum=%f\n", sum);
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
sum += f();
|
||||
}
|
||||
printf("f=%f\n", sum);
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
sum += g();
|
||||
}
|
||||
printf("g=%f\n", sum);
|
||||
puts("end");
|
||||
}
|
||||
70
externals/xbyak/sample/protect-re.cpp
vendored
Normal file
70
externals/xbyak/sample/protect-re.cpp
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct Code1 : Xbyak::CodeGenerator {
|
||||
Code1()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
void update()
|
||||
{
|
||||
db(0);
|
||||
}
|
||||
};
|
||||
|
||||
void test1(bool updateCode)
|
||||
{
|
||||
Code1 c;
|
||||
c.setProtectModeRE();
|
||||
if (updateCode) c.update(); // segmentation fault
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("f=%d\n", f());
|
||||
|
||||
c.setProtectModeRW();
|
||||
c.update();
|
||||
puts("ok");
|
||||
}
|
||||
|
||||
struct Code2 : Xbyak::CodeGenerator {
|
||||
Code2()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
void update()
|
||||
{
|
||||
db(0);
|
||||
}
|
||||
};
|
||||
|
||||
void test2(bool updateCode)
|
||||
{
|
||||
Code2 c;
|
||||
c.readyRE();
|
||||
if (updateCode) c.update(); // segmentation fault
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("f=%d\n", f());
|
||||
|
||||
c.setProtectModeRW();
|
||||
c.update();
|
||||
puts("ok");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "%s <testNum> [update]\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
bool update = argc == 3;
|
||||
int n = atoi(argv[1]);
|
||||
printf("n=%d update=%d\n", n, update);
|
||||
switch (n) {
|
||||
case 1: test1(update); break;
|
||||
case 2: test2(update); break;
|
||||
default: fprintf(stderr, "no test %d\n", n); break;
|
||||
}
|
||||
}
|
||||
226
externals/xbyak/sample/quantize.cpp
vendored
Normal file
226
externals/xbyak/sample/quantize.cpp
vendored
Normal file
@ -0,0 +1,226 @@
|
||||
/*
|
||||
@author herumi
|
||||
|
||||
JPEG quantize sample
|
||||
This program generates a quantization routine by using fast division algorithm in run-time.
|
||||
|
||||
time(sec)
|
||||
quality 1(high) 10 50 100(low)
|
||||
VC2005 8.0 8.0 8.0 8.0
|
||||
Xbyak 1.6 0.8 0.5 0.5
|
||||
|
||||
|
||||
; generated code at q = 1
|
||||
push esi
|
||||
push edi
|
||||
mov edi,dword ptr [esp+0Ch]
|
||||
mov esi,dword ptr [esp+10h]
|
||||
mov eax,dword ptr [esi]
|
||||
shr eax,4
|
||||
mov dword ptr [edi],eax
|
||||
mov eax,dword ptr [esi+4]
|
||||
mov edx,0BA2E8BA3h
|
||||
mul eax,edx
|
||||
shr edx,3
|
||||
...
|
||||
|
||||
; generated code at q = 100
|
||||
push esi
|
||||
push edi
|
||||
mov edi,dword ptr [esp+0Ch]
|
||||
mov esi,dword ptr [esp+10h]
|
||||
mov eax,dword ptr [esi]
|
||||
mov dword ptr [edi],eax
|
||||
mov eax,dword ptr [esi+4]
|
||||
mov dword ptr [edi+4],eax
|
||||
mov eax,dword ptr [esi+8]
|
||||
mov dword ptr [edi+8],eax
|
||||
mov eax,dword ptr [esi+0Ch]
|
||||
...
|
||||
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4996) // scanf
|
||||
#endif
|
||||
|
||||
const int N = 64;
|
||||
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
static int ilog2(int x)
|
||||
{
|
||||
int shift = 0;
|
||||
while ((1 << shift) <= x) shift++;
|
||||
return shift - 1;
|
||||
}
|
||||
public:
|
||||
/*
|
||||
input : esi
|
||||
output : eax = [esi+offset] / dividend
|
||||
destroy : edx
|
||||
*/
|
||||
void udiv(uint32_t dividend, int offset)
|
||||
{
|
||||
mov(eax, ptr[esi + offset]);
|
||||
|
||||
/* dividend = odd x 2^exponent */
|
||||
int exponent = 0, odd = dividend;
|
||||
while ((odd & 1) == 0) {
|
||||
odd >>= 1; exponent++;
|
||||
}
|
||||
|
||||
if (odd == 1) { // trivial case
|
||||
if (exponent) {
|
||||
shr(eax, exponent);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t mLow, mHigh;
|
||||
int len = ilog2(odd) + 1;
|
||||
{
|
||||
uint64_t roundUp = uint64_t(1) << (32 + len);
|
||||
uint64_t k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
|
||||
mLow = roundUp / odd;
|
||||
mHigh = (roundUp + k) / odd;
|
||||
}
|
||||
|
||||
while (((mLow >> 1) < (mHigh >> 1)) && (len > 0)) {
|
||||
mLow >>= 1; mHigh >>= 1; len--;
|
||||
}
|
||||
|
||||
uint64_t m; int a;
|
||||
if ((mHigh >> 32) == 0) {
|
||||
m = mHigh; a = 0;
|
||||
} else {
|
||||
len = ilog2(odd);
|
||||
uint64_t roundDown = uint64_t(1) << (32 + len);
|
||||
mLow = roundDown / odd;
|
||||
int r = (int)(roundDown % odd);
|
||||
m = (r <= (odd >> 1)) ? mLow : mLow + 1;
|
||||
a = 1;
|
||||
}
|
||||
while ((m & 1) == 0) {
|
||||
m >>= 1; len--;
|
||||
}
|
||||
len += exponent;
|
||||
|
||||
mov(edx, int(m));
|
||||
mul(edx);
|
||||
if (a) {
|
||||
add(eax, int(m));
|
||||
adc(edx, 0);
|
||||
}
|
||||
if (len) {
|
||||
shr(edx, len);
|
||||
}
|
||||
mov(eax, edx);
|
||||
}
|
||||
/*
|
||||
quantize(uint32_t dest[64], const uint32_t src[64]);
|
||||
*/
|
||||
Quantize(const uint32_t qTbl[64])
|
||||
{
|
||||
push(esi);
|
||||
push(edi);
|
||||
const int P_ = 4 * 2;
|
||||
mov(edi, ptr [esp+P_+4]); // dest
|
||||
mov(esi, ptr [esp+P_+8]); // src
|
||||
for (int i = 0; i < N; i++) {
|
||||
udiv(qTbl[i], i * 4);
|
||||
mov(ptr[edi+i*4], eax);
|
||||
}
|
||||
pop(edi);
|
||||
pop(esi);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
void quantize(uint32_t dest[64], const uint32_t src[64], const uint32_t qTbl[64])
|
||||
{
|
||||
for (int i = 0; i < N; i++) {
|
||||
dest[i] = src[i] / qTbl[i];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef XBYAK64
|
||||
int main()
|
||||
{
|
||||
puts("not implemented for 64bit");
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int q;
|
||||
if (argc > 1) {
|
||||
q = atoi(argv[1]);
|
||||
} else {
|
||||
printf("input quantize=");
|
||||
if (scanf("%d", &q) != 1) {
|
||||
fprintf(stderr, "bad number\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
printf("q=%d\n", q);
|
||||
uint32_t qTbl[] = {
|
||||
16, 11, 10, 16, 24, 40, 51, 61,
|
||||
12, 12, 14, 19, 26, 58, 60, 55,
|
||||
14, 13, 16, 24, 40, 57, 69, 56,
|
||||
14, 17, 22, 29, 51, 87, 80, 62,
|
||||
18, 22, 37, 56, 68, 109, 103, 77,
|
||||
24, 35, 55, 64, 81, 104, 113, 92,
|
||||
49, 64, 78, 87, 103, 121, 120, 101,
|
||||
72, 92, 95, 98, 112, 100, 103, 99
|
||||
};
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
qTbl[i] /= q;
|
||||
if (qTbl[i] == 0) qTbl[i] = 1;
|
||||
}
|
||||
|
||||
try {
|
||||
uint32_t src[N];
|
||||
uint32_t dest[N];
|
||||
uint32_t dest2[N];
|
||||
for (int i = 0; i < N; i++) {
|
||||
src[i] = rand() % 2048;
|
||||
}
|
||||
|
||||
Quantize jit(qTbl);
|
||||
//printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode());
|
||||
void (*quantize2)(uint32_t*, const uint32_t*, const uint32_t *) = jit.getCode<void (*)(uint32_t*, const uint32_t*, const uint32_t *)>();
|
||||
|
||||
quantize(dest, src, qTbl);
|
||||
quantize2(dest2, src, qTbl);
|
||||
for (int i = 0; i < N; i++) {
|
||||
if (dest[i] != dest2[i]) {
|
||||
printf("err[%d] %u %u\n", i, dest[i], dest2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
const int count = 10000000;
|
||||
int begin;
|
||||
|
||||
begin = clock();
|
||||
for (int i = 0; i < count; i++) {
|
||||
quantize(dest, src, qTbl);
|
||||
}
|
||||
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
|
||||
|
||||
begin = clock();
|
||||
for (int i = 0; i < count; i++) {
|
||||
quantize2(dest, src, qTbl);
|
||||
}
|
||||
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
228
externals/xbyak/sample/quantize.vcxproj
vendored
Normal file
228
externals/xbyak/sample/quantize.vcxproj
vendored
Normal file
@ -0,0 +1,228 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{D06753BF-E1F3-4578-9B18-08673327F77C}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/quantize.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/quantize.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/quantize.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/quantize.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/quantize.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/quantize.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/quantize.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/quantize.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/quantize.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/quantize.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="quantize.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
29
externals/xbyak/sample/stackframe.cpp
vendored
Normal file
29
externals/xbyak/sample/stackframe.cpp
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
#ifdef XBYAK32
|
||||
#error "this sample is for only 64-bit mode"
|
||||
#endif
|
||||
|
||||
struct Code : public Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
// see xbyak/sample/sf_test.cpp for how to use other parameter
|
||||
Xbyak::util::StackFrame sf(this, 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
Code c;
|
||||
int (*f)(int, int, int) = c.getCode<int(*) (int, int, int)>();
|
||||
int ret = f(3, 5, 2);
|
||||
if (ret == 3 + 5 + 2) {
|
||||
puts("ok");
|
||||
} else {
|
||||
puts("ng");
|
||||
}
|
||||
}
|
||||
45
externals/xbyak/sample/static_buf.cpp
vendored
Normal file
45
externals/xbyak/sample/static_buf.cpp
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
sample to use static memory
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
MIE_ALIGN(4096) char buf[4096];
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(sizeof(buf), buf)
|
||||
{
|
||||
puts("generate");
|
||||
printf("ptr=%p, %p\n", getCode(), buf);
|
||||
#ifdef XBYAK32
|
||||
mov(eax, ptr [esp + 4]);
|
||||
add(eax, ptr [esp + 8]);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
lea(rax, ptr [rcx + rdx]);
|
||||
#else
|
||||
lea(rax, ptr [rdi + rsi]);
|
||||
#endif
|
||||
ret();
|
||||
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RE);
|
||||
}
|
||||
~Code()
|
||||
{
|
||||
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RW);
|
||||
}
|
||||
} s_code;
|
||||
|
||||
inline int add(int a, int b)
|
||||
{
|
||||
return reinterpret_cast<int (*)(int, int)>(buf)(a, b);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int sum = 0;
|
||||
for (int i = 0; i < 10; i++) {
|
||||
sum += add(i, 5);
|
||||
}
|
||||
printf("sum=%d\n", sum);
|
||||
}
|
||||
190
externals/xbyak/sample/test0.cpp
vendored
Normal file
190
externals/xbyak/sample/test0.cpp
vendored
Normal file
@ -0,0 +1,190 @@
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
|
||||
#pragma warning(disable:4514)
|
||||
#pragma warning(disable:4786)
|
||||
#endif
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1900)
|
||||
#pragma warning(disable:4456)
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
class Sample : public Xbyak::CodeGenerator {
|
||||
void operator=(const Sample&);
|
||||
public:
|
||||
Sample(void *userPtr = 0, size_t size = Xbyak::DEFAULT_MAX_CODE_SIZE) : Xbyak::CodeGenerator(size, userPtr)
|
||||
{
|
||||
inLocalLabel(); // use local label for multiple instance
|
||||
#ifdef XBYAK32
|
||||
mov(ecx, ptr [esp + 4]); // n
|
||||
#elif defined(XBYAK64_GCC)
|
||||
mov(ecx, edi); // n
|
||||
#else
|
||||
// n = ecx
|
||||
#endif
|
||||
xor_(eax, eax); // sum
|
||||
test(ecx, ecx);
|
||||
jz(".exit");
|
||||
xor_(edx, edx); // i
|
||||
L(".lp");
|
||||
add(eax, edx);
|
||||
inc(edx);
|
||||
|
||||
cmp(edx, ecx);
|
||||
jbe(".lp"); // jmp to previous @@
|
||||
L(".exit"); // <B>
|
||||
ret();
|
||||
outLocalLabel(); // end of local label
|
||||
}
|
||||
};
|
||||
|
||||
class AddFunc : public Xbyak::CodeGenerator {
|
||||
void operator=(const AddFunc&);
|
||||
public:
|
||||
AddFunc(int y)
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
mov(eax, ptr [esp + 4]);
|
||||
add(eax, y);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
lea(rax, ptr [rcx + y]);
|
||||
#else
|
||||
lea(eax, ptr [edi + y]);
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
int (*get() const)(int) { return getCode<int(*)(int)>(); }
|
||||
};
|
||||
|
||||
class CallAtoi : public Xbyak::CodeGenerator {
|
||||
void operator=(const CallAtoi&);
|
||||
public:
|
||||
CallAtoi()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
#ifdef XBYAK64_WIN
|
||||
sub(rsp, 32); // return-address is destroied if 64bit debug mode
|
||||
#endif
|
||||
mov(rax, (size_t)atoi);
|
||||
call(rax);
|
||||
#ifdef XBYAK64_WIN
|
||||
add(rsp, 32);
|
||||
#endif
|
||||
#else
|
||||
mov(eax, ptr [esp + 4]);
|
||||
push(eax);
|
||||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||
call(atoi);
|
||||
#else
|
||||
call(reinterpret_cast<const void*>(atoi));
|
||||
#endif
|
||||
add(esp, 4);
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
|
||||
};
|
||||
|
||||
class JmpAtoi : public Xbyak::CodeGenerator {
|
||||
void operator=(const JmpAtoi&);
|
||||
public:
|
||||
JmpAtoi()
|
||||
{
|
||||
/* already pushed "456" */
|
||||
#ifdef XBYAK64
|
||||
mov(rax, (size_t)atoi);
|
||||
jmp(rax);
|
||||
#else
|
||||
jmp(reinterpret_cast<const void*>(atoi));
|
||||
#endif
|
||||
}
|
||||
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
|
||||
};
|
||||
|
||||
struct Reset : public Xbyak::CodeGenerator {
|
||||
void init(int n)
|
||||
{
|
||||
xor_(eax, eax);
|
||||
mov(ecx, n);
|
||||
test(ecx, ecx);
|
||||
jnz("@f");
|
||||
ret();
|
||||
L("@@");
|
||||
for (int i = 0; i < 10 - n; i++) {
|
||||
add(eax, ecx);
|
||||
}
|
||||
sub(ecx, 1);
|
||||
jnz("@b");
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
void testReset()
|
||||
{
|
||||
puts("testReset");
|
||||
Reset code;
|
||||
int (*f)(int) = code.getCode<int(*)(int)>();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
code.init(i);
|
||||
int v = f(i);
|
||||
printf("%d %d\n", i, v);
|
||||
code.reset();
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try {
|
||||
Sample s;
|
||||
printf("Xbyak version=%s\n", s.getVersionString());
|
||||
#ifdef XBYAK64_GCC
|
||||
puts("64bit mode(gcc)");
|
||||
#elif defined(XBYAK64_WIN)
|
||||
puts("64bit mode(win)");
|
||||
#else
|
||||
puts("32bit");
|
||||
#endif
|
||||
int (*func)(int) = s.getCode<int (*)(int)>();
|
||||
for (int i = 0; i <= 10; i++) {
|
||||
printf("0 + ... + %d = %d\n", i, func(i));
|
||||
}
|
||||
for (int i = 0; i < 10; i++) {
|
||||
AddFunc a(i);
|
||||
int (*add)(int) = a.get();
|
||||
int y = add(i);
|
||||
printf("%d + %d = %d\n", i, i, y);
|
||||
}
|
||||
CallAtoi c;
|
||||
printf("call atoi(\"123\") = %d\n", c.get()("123"));
|
||||
JmpAtoi j;
|
||||
printf("jmp atoi(\"456\") = %d\n", j.get()("456"));
|
||||
{
|
||||
// use memory allocated by user
|
||||
using namespace Xbyak;
|
||||
const size_t codeSize = 4096;
|
||||
uint8_t buf[codeSize + 16];
|
||||
uint8_t *p = CodeArray::getAlignedAddress(buf);
|
||||
Sample s(p, codeSize);
|
||||
if (!CodeArray::protect(p, codeSize, CodeArray::PROTECT_RWE)) {
|
||||
fprintf(stderr, "can't protect\n");
|
||||
return 1;
|
||||
}
|
||||
int (*func)(int) = s.getCode<int (*)(int)>();
|
||||
const uint8_t *funcp = reinterpret_cast<const uint8_t*>(func);
|
||||
if (funcp != p) {
|
||||
fprintf(stderr, "internal error %p %p\n", p, funcp);
|
||||
return 1;
|
||||
}
|
||||
printf("0 + ... + %d = %d\n", 100, func(100));
|
||||
CodeArray::protect(p, codeSize, CodeArray::PROTECT_RW);
|
||||
}
|
||||
puts("OK");
|
||||
testReset();
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
}
|
||||
|
||||
228
externals/xbyak/sample/test0.vcxproj
vendored
Normal file
228
externals/xbyak/sample/test0.vcxproj
vendored
Normal file
@ -0,0 +1,228 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/test0.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/test0.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/test0.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/test0.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/test0.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/test0.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/test0.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/test0.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/test0.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/test0.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="test0.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
149
externals/xbyak/sample/test_util.cpp
vendored
Normal file
149
externals/xbyak/sample/test_util.cpp
vendored
Normal file
@ -0,0 +1,149 @@
|
||||
#include <stdio.h>
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
struct PopCountTest : public Xbyak::CodeGenerator {
|
||||
PopCountTest(int n)
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
mov(eax, n);
|
||||
popcnt(eax, eax);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
void putCPUinfo(bool onlyCpuidFeature)
|
||||
{
|
||||
using namespace Xbyak::util;
|
||||
Cpu cpu;
|
||||
printf("vendor %s\n", cpu.has(Cpu::tINTEL) ? "intel" : "amd");
|
||||
static const struct {
|
||||
Cpu::Type type;
|
||||
const char *str;
|
||||
} tbl[] = {
|
||||
{ Cpu::tMMX, "mmx" },
|
||||
{ Cpu::tMMX2, "mmx2" },
|
||||
{ Cpu::tCMOV, "cmov" },
|
||||
{ Cpu::tSSE, "sse" },
|
||||
{ Cpu::tSSE2, "sse2" },
|
||||
{ Cpu::tSSE3, "sse3" },
|
||||
{ Cpu::tSSSE3, "ssse3" },
|
||||
{ Cpu::tSSE41, "sse41" },
|
||||
{ Cpu::tSSE42, "sse42" },
|
||||
{ Cpu::tPOPCNT, "popcnt" },
|
||||
{ Cpu::t3DN, "3dn" },
|
||||
{ Cpu::tE3DN, "e3dn" },
|
||||
{ Cpu::tAESNI, "aesni" },
|
||||
{ Cpu::tRDTSCP, "rdtscp" },
|
||||
{ Cpu::tOSXSAVE, "osxsave(xgetvb)" },
|
||||
{ Cpu::tPCLMULQDQ, "pclmulqdq" },
|
||||
{ Cpu::tAVX, "avx" },
|
||||
{ Cpu::tFMA, "fma" },
|
||||
{ Cpu::tAVX2, "avx2" },
|
||||
{ Cpu::tBMI1, "bmi1" },
|
||||
{ Cpu::tBMI2, "bmi2" },
|
||||
{ Cpu::tLZCNT, "lzcnt" },
|
||||
{ Cpu::tPREFETCHW, "prefetchw" },
|
||||
{ Cpu::tENHANCED_REP, "enh_rep" },
|
||||
{ Cpu::tRDRAND, "rdrand" },
|
||||
{ Cpu::tADX, "adx" },
|
||||
{ Cpu::tRDSEED, "rdseed" },
|
||||
{ Cpu::tSMAP, "smap" },
|
||||
{ Cpu::tHLE, "hle" },
|
||||
{ Cpu::tRTM, "rtm" },
|
||||
{ Cpu::tMPX, "mpx" },
|
||||
{ Cpu::tSHA, "sha" },
|
||||
{ Cpu::tPREFETCHWT1, "prefetchwt1" },
|
||||
{ Cpu::tF16C, "f16c" },
|
||||
{ Cpu::tMOVBE, "movbe" },
|
||||
{ Cpu::tAVX512F, "avx512f" },
|
||||
{ Cpu::tAVX512DQ, "avx512dq" },
|
||||
{ Cpu::tAVX512IFMA, "avx512_ifma" },
|
||||
{ Cpu::tAVX512PF, "avx512pf" },
|
||||
{ Cpu::tAVX512ER, "avx512er" },
|
||||
{ Cpu::tAVX512CD, "avx512cd" },
|
||||
{ Cpu::tAVX512BW, "avx512bw" },
|
||||
{ Cpu::tAVX512VL, "avx512vl" },
|
||||
{ Cpu::tAVX512VBMI, "avx512_vbmi" },
|
||||
{ Cpu::tAVX512_4VNNIW, "avx512_4vnniw" },
|
||||
{ Cpu::tAVX512_4FMAPS, "avx512_4fmaps" },
|
||||
|
||||
{ Cpu::tAVX512_VBMI2, "avx512_vbmi2" },
|
||||
{ Cpu::tGFNI, "gfni" },
|
||||
{ Cpu::tVAES, "vaes" },
|
||||
{ Cpu::tVPCLMULQDQ, "vpclmulqdq" },
|
||||
{ Cpu::tAVX512_VNNI, "avx512_vnni" },
|
||||
{ Cpu::tAVX512_BITALG, "avx512_bitalg" },
|
||||
{ Cpu::tAVX512_VPOPCNTDQ, "avx512_vpopcntdq" },
|
||||
{ Cpu::tAVX512_BF16, "avx512_bf16" },
|
||||
{ Cpu::tAVX512_VP2INTERSECT, "avx512_vp2intersect" },
|
||||
{ Cpu::tAMX_TILE, "amx(tile)" },
|
||||
{ Cpu::tAMX_INT8, "amx(int8)" },
|
||||
{ Cpu::tAMX_BF16, "amx(bf16)" },
|
||||
{ Cpu::tAVX_VNNI, "avx_vnni" },
|
||||
{ Cpu::tAVX512_FP16, "avx512_fp16" },
|
||||
{ Cpu::tWAITPKG, "waitpkg" },
|
||||
{ Cpu::tCLFLUSHOPT, "clflushopt" },
|
||||
{ Cpu::tCLDEMOTE, "cldemote" },
|
||||
{ Cpu::tMOVDIRI, "movdiri" },
|
||||
{ Cpu::tMOVDIR64B, "movdir64b" },
|
||||
{ Cpu::tCLZERO, "clzero" },
|
||||
{ Cpu::tAMX_FP16, "amx_fp16" },
|
||||
{ Cpu::tAVX_VNNI_INT8, "avx_vnni_int8" },
|
||||
{ Cpu::tAVX_NE_CONVERT, "avx_ne_convert" },
|
||||
{ Cpu::tAVX_IFMA, "avx_ifma" },
|
||||
{ Cpu::tRAO_INT, "rao-int" },
|
||||
{ Cpu::tCMPCCXADD, "cmpccxadd" },
|
||||
{ Cpu::tPREFETCHITI, "prefetchiti" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||
}
|
||||
printf("\n");
|
||||
if (onlyCpuidFeature) return;
|
||||
if (cpu.has(Cpu::tPOPCNT)) {
|
||||
const int n = 0x12345678; // bitcount = 13
|
||||
const int ok = 13;
|
||||
PopCountTest code(n);
|
||||
code.setProtectModeRE();
|
||||
int (*f)() = code.getCode<int (*)()>();
|
||||
int r = f();
|
||||
if (r == ok) {
|
||||
puts("popcnt ok");
|
||||
} else {
|
||||
printf("popcnt ng %d %d\n", r, ok);
|
||||
}
|
||||
code.setProtectModeRW();
|
||||
}
|
||||
/*
|
||||
displayFamily displayModel
|
||||
Opteron 2376 10 4
|
||||
Core2 Duo T7100 6 F
|
||||
Core i3-2120T 6 2A
|
||||
Core i7-2600 6 2A
|
||||
Xeon X5650 6 2C
|
||||
Core i7-3517 6 3A
|
||||
Core i7-3930K 6 2D
|
||||
*/
|
||||
cpu.putFamily();
|
||||
if (!cpu.has(Cpu::tINTEL)) return;
|
||||
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
|
||||
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
|
||||
}
|
||||
printf("SmtLevel =%u\n", cpu.getNumCores(Xbyak::util::SmtLevel));
|
||||
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
bool onlyCpuidFeature = argc == 2 && strcmp(argv[1], "-cpuid") == 0;
|
||||
if (!onlyCpuidFeature) {
|
||||
#ifdef XBYAK32
|
||||
puts("32bit");
|
||||
#else
|
||||
puts("64bit");
|
||||
#endif
|
||||
}
|
||||
putCPUinfo(onlyCpuidFeature);
|
||||
}
|
||||
228
externals/xbyak/sample/test_util.vcxproj
vendored
Normal file
228
externals/xbyak/sample/test_util.vcxproj
vendored
Normal file
@ -0,0 +1,228 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/test_util.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/test_util.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/test_util.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/test_util.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/test_util.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/test_util.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/test_util.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/test_util.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/test_util.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/test_util.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="test_util.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
380
externals/xbyak/sample/toyvm.cpp
vendored
Normal file
380
externals/xbyak/sample/toyvm.cpp
vendored
Normal file
@ -0,0 +1,380 @@
|
||||
/*
|
||||
toy vm
|
||||
register A, B : 32bit
|
||||
PC : program counter
|
||||
|
||||
mem_ 4byte x 65536
|
||||
|
||||
all instructions are fixed at 4 bytes.
|
||||
all immediate values are 16-bit.
|
||||
|
||||
R = A or B
|
||||
vldiR, imm ; R = imm
|
||||
vldR, idx ; R = mem_[idx]
|
||||
vstR, idx ; mem_[idx] = R
|
||||
vaddiR, imm ; R += imm
|
||||
vsubiR, imm ; R -= imm
|
||||
vaddR, idx ; R += mem_[idx]
|
||||
vsubR, idx ; R -= mem_[idx]
|
||||
vputR ; print R
|
||||
vjnzR, offset; if (R != 0) then jmp(PC += offset(signed))
|
||||
*/
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
|
||||
#pragma warning(disable:4514)
|
||||
#pragma warning(disable:4786)
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#include <vector>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
#ifdef XBYAK64
|
||||
#error "only 32bit"
|
||||
#endif
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
class ToyVm : public Xbyak::CodeGenerator {
|
||||
typedef std::vector<uint32_t> Buffer;
|
||||
public:
|
||||
enum Reg {
|
||||
A, B
|
||||
};
|
||||
enum Code {
|
||||
LD, LDI, ST, ADD, ADDI, SUB, SUBI, PUT, JNZ,
|
||||
END_OF_CODE
|
||||
};
|
||||
ToyVm()
|
||||
: mark_(0)
|
||||
{
|
||||
::memset(mem_, 0, sizeof(mem_));
|
||||
}
|
||||
void vldi(Reg r, uint16_t imm) { encode(LDI, r, imm); }
|
||||
void vld(Reg r, uint16_t idx) { encode(LD, r, idx); }
|
||||
void vst(Reg r, uint16_t idx) { encode(ST, r, idx); }
|
||||
void vadd(Reg r, uint16_t idx) { encode(ADD, r, idx); }
|
||||
void vaddi(Reg r, uint16_t imm) { encode(ADDI, r, imm); }
|
||||
void vsub(Reg r, uint16_t idx) { encode(SUB, r, idx); }
|
||||
void vsubi(Reg r, uint16_t imm) { encode(SUBI, r, imm); }
|
||||
void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast<uint16_t>(offset)); }
|
||||
void vput(Reg r) { encode(PUT, r); }
|
||||
void setMark()
|
||||
{
|
||||
mark_ = (int)code_.size();
|
||||
}
|
||||
int getMarkOffset()
|
||||
{
|
||||
return mark_ - (int)code_.size() - 1;
|
||||
}
|
||||
void run()
|
||||
{
|
||||
bool debug = false;//true;
|
||||
uint32_t reg[2] = { 0, 0 };
|
||||
const size_t end = code_.size();
|
||||
uint32_t pc = 0;
|
||||
for (;;) {
|
||||
uint32_t x = code_[pc];
|
||||
uint32_t code, r, imm;
|
||||
decode(code, r, imm, x);
|
||||
if (debug) {
|
||||
printf("---\n");
|
||||
printf("A %08x B %08x\n", reg[0], reg[1]);
|
||||
printf("mem_[] = %08x %08x %08x\n", mem_[0], mem_[1], mem_[2]);
|
||||
printf("pc=%4d, code=%02x, r=%d, imm=%04x\n", pc, code, r, imm);
|
||||
}
|
||||
switch (code) {
|
||||
case LDI:
|
||||
reg[r] = imm;
|
||||
break;
|
||||
case LD:
|
||||
reg[r] = mem_[imm];
|
||||
break;
|
||||
case ST:
|
||||
mem_[imm] = reg[r];
|
||||
break;
|
||||
case ADD:
|
||||
reg[r] += mem_[imm];
|
||||
break;
|
||||
case ADDI:
|
||||
reg[r] += imm;
|
||||
break;
|
||||
case SUB:
|
||||
reg[r] -= mem_[imm];
|
||||
break;
|
||||
case SUBI:
|
||||
reg[r] -= imm;
|
||||
break;
|
||||
case PUT:
|
||||
printf("%c %8u(0x%08x)\n", 'A' + r, reg[r], reg[r]);
|
||||
break;
|
||||
case JNZ:
|
||||
if (reg[r] != 0) pc += static_cast<signed short>(imm);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
pc++;
|
||||
if (pc >= end) break;
|
||||
} // for (;;)
|
||||
}
|
||||
void recompile()
|
||||
{
|
||||
using namespace Xbyak;
|
||||
/*
|
||||
esi : A
|
||||
edi : B
|
||||
ebx : mem_
|
||||
for speed up
|
||||
mem_[0] : eax
|
||||
mem_[1] : ecx
|
||||
mem_[2] : edx
|
||||
*/
|
||||
push(ebx);
|
||||
push(esi);
|
||||
push(edi);
|
||||
|
||||
const Reg32 reg[2] = { esi, edi };
|
||||
const Reg32 mem(ebx);
|
||||
|
||||
const Reg32 memTbl[] = { eax, ecx, edx };
|
||||
const size_t memTblNum = NUM_OF_ARRAY(memTbl);
|
||||
for (size_t i = 0; i < memTblNum; i++) xor_(memTbl[i], memTbl[i]);
|
||||
|
||||
xor_(esi, esi);
|
||||
xor_(edi, edi);
|
||||
mov(mem, (size_t)mem_);
|
||||
const size_t end = code_.size();
|
||||
uint32_t pc = 0;
|
||||
uint32_t labelNum = 0;
|
||||
for (;;) {
|
||||
uint32_t x = code_[pc];
|
||||
uint32_t code, r, imm;
|
||||
decode(code, r, imm, x);
|
||||
L(Label::toStr(labelNum++));
|
||||
switch (code) {
|
||||
case LDI:
|
||||
mov(reg[r], imm);
|
||||
break;
|
||||
case LD:
|
||||
if (imm < memTblNum) {
|
||||
mov(reg[r], memTbl[imm]);
|
||||
} else {
|
||||
mov(reg[r], ptr[mem + imm * 4]);
|
||||
}
|
||||
break;
|
||||
case ST:
|
||||
if (imm < memTblNum) {
|
||||
mov(memTbl[imm], reg[r]);
|
||||
} else {
|
||||
mov(ptr [mem + imm * 4], reg[r]);
|
||||
}
|
||||
break;
|
||||
case ADD:
|
||||
if (imm < memTblNum) {
|
||||
add(reg[r], memTbl[imm]);
|
||||
} else {
|
||||
add(reg[r], ptr [mem + imm * 4]);
|
||||
}
|
||||
break;
|
||||
case ADDI:
|
||||
add(reg[r], imm);
|
||||
break;
|
||||
case SUB:
|
||||
if (imm < memTblNum) {
|
||||
sub(reg[r], memTbl[imm]);
|
||||
} else {
|
||||
sub(reg[r], ptr [mem + imm * 4]);
|
||||
}
|
||||
break;
|
||||
case SUBI:
|
||||
sub(reg[r], imm);
|
||||
break;
|
||||
case PUT:
|
||||
{
|
||||
static const char *str = "%c %8d(0x%08x)\n";
|
||||
push(eax);
|
||||
push(edx);
|
||||
push(ecx);
|
||||
push(reg[r]);
|
||||
push(reg[r]);
|
||||
push('A' + r);
|
||||
push((int)str);
|
||||
call(reinterpret_cast<const void*>(printf));
|
||||
add(esp, 4 * 4);
|
||||
pop(ecx);
|
||||
pop(edx);
|
||||
pop(eax);
|
||||
}
|
||||
break;
|
||||
case JNZ:
|
||||
test(reg[r], reg[r]);
|
||||
jnz(Label::toStr(labelNum + static_cast<signed short>(imm)));
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
pc++;
|
||||
if (pc >= end) break;
|
||||
} // for (;;)
|
||||
|
||||
pop(edi);
|
||||
pop(esi);
|
||||
pop(ebx);
|
||||
ret();
|
||||
}
|
||||
private:
|
||||
uint32_t mem_[65536];
|
||||
Buffer code_;
|
||||
int mark_;
|
||||
void decode(uint32_t& code, uint32_t& r, uint32_t& imm, uint32_t x)
|
||||
{
|
||||
code = x >> 24;
|
||||
r = (x >> 16) & 0xff;
|
||||
imm = x & 0xffff;
|
||||
}
|
||||
void encode(Code code, Reg r, uint16_t imm = 0)
|
||||
{
|
||||
uint32_t x = (code << 24) | (r << 16) | imm;
|
||||
code_.push_back(x);
|
||||
}
|
||||
};
|
||||
|
||||
class Fib : public ToyVm {
|
||||
public:
|
||||
Fib(int n)
|
||||
{
|
||||
if (n >= 65536) {
|
||||
fprintf(stderr, "current version support only imm16\n");
|
||||
return;
|
||||
}
|
||||
/*
|
||||
A : c
|
||||
B : temporary
|
||||
mem_[0] : p
|
||||
mem_[1] : t
|
||||
mem_[2] : n
|
||||
*/
|
||||
vldi(A, 1); // c
|
||||
vst(A, 0); // p(1)
|
||||
vldi(B, static_cast<uint16_t>(n));
|
||||
vst(B, 2); // n
|
||||
// lp
|
||||
setMark();
|
||||
vst(A, 1); // t = c
|
||||
vadd(A, 0); // c += p
|
||||
vld(B, 1);
|
||||
vst(B, 0); // p = t
|
||||
// vput(A);
|
||||
vld(B, 2);
|
||||
vsubi(B, 1);
|
||||
vst(B, 2); // n--
|
||||
vjnz(B, getMarkOffset());
|
||||
vput(A);
|
||||
}
|
||||
void runByJIT()
|
||||
{
|
||||
getCode<void (*)()>();
|
||||
}
|
||||
};
|
||||
|
||||
void fibC(uint32_t n)
|
||||
{
|
||||
uint32_t p, c, t;
|
||||
p = 1;
|
||||
c = 1;
|
||||
lp:
|
||||
t = c;
|
||||
c += p;
|
||||
p = t;
|
||||
n--;
|
||||
if (n != 0) goto lp;
|
||||
printf("c=%u(0x%08x)\n", c, c);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try {
|
||||
const int n = 10000;
|
||||
Fib fib(n);
|
||||
|
||||
fib.recompile();
|
||||
|
||||
{
|
||||
Xbyak::util::Clock clk;
|
||||
clk.begin();
|
||||
fib.run();
|
||||
clk.end();
|
||||
printf("vm %.2fKclk\n", clk.getClock() * 1e-3);
|
||||
}
|
||||
|
||||
{
|
||||
Xbyak::util::Clock clk;
|
||||
clk.begin();
|
||||
fib.runByJIT();
|
||||
clk.end();
|
||||
printf("jit %.2fKclk\n", clk.getClock() * 1e-3);
|
||||
}
|
||||
|
||||
{
|
||||
Xbyak::util::Clock clk;
|
||||
clk.begin();
|
||||
fibC(n);
|
||||
clk.end();
|
||||
printf("native C %.2fKclk\n", clk.getClock() * 1e-3);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
the code generated by Xbyak
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
xor eax,eax
|
||||
xor ecx,ecx
|
||||
xor edx,edx
|
||||
xor esi,esi
|
||||
xor edi,edi
|
||||
mov ebx,0EFF58h
|
||||
mov esi,1
|
||||
mov eax,esi
|
||||
mov edi,2710h
|
||||
mov edx,edi
|
||||
.lp:
|
||||
mov ecx,esi
|
||||
add esi,eax
|
||||
mov edi,ecx
|
||||
mov eax,edi
|
||||
mov edi,edx
|
||||
sub edi,1
|
||||
mov edx,edi
|
||||
test edi,edi
|
||||
jne .lp
|
||||
push eax
|
||||
push edx
|
||||
push ecx
|
||||
push esi
|
||||
push esi
|
||||
push 41h
|
||||
push 42C434h
|
||||
call printf (409342h)
|
||||
add esp,10h
|
||||
pop ecx
|
||||
pop edx
|
||||
pop eax
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
*/
|
||||
228
externals/xbyak/sample/toyvm.vcxproj
vendored
Normal file
228
externals/xbyak/sample/toyvm.vcxproj
vendored
Normal file
@ -0,0 +1,228 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{2E41C7AF-39FF-454C-B081-37445378DCB3}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/toyvm.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/toyvm.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/toyvm.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/toyvm.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/toyvm.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/toyvm.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/toyvm.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/toyvm.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/toyvm.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/toyvm.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="toyvm.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
14
externals/xbyak/test/Makefile.win
vendored
Normal file
14
externals/xbyak/test/Makefile.win
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS -I ../
|
||||
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
|
||||
../gen/gen_code.exe > $@
|
||||
../gen/gen_avx512.exe >> $@
|
||||
|
||||
../gen/gen_code.exe: ../gen/gen_code.cpp #../xbyak/xbyak.h
|
||||
cl ../gen/gen_code.cpp $(OPT) /Fe:../gen/gen_code.exe
|
||||
|
||||
../gen/gen_avx512.exe: ../gen/gen_avx512.cpp #../xbyak/xbyak.h
|
||||
cl ../gen/gen_avx512.cpp $(OPT) /Fe:../gen/gen_avx512.exe
|
||||
|
||||
SUB_HEADER=../xbyak/xbyak_mnemonic.h
|
||||
|
||||
all: $(SUB_HEADER)
|
||||
9
externals/xbyak/test/a.bat
vendored
Normal file
9
externals/xbyak/test/a.bat
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
@echo off
|
||||
echo 32bit
|
||||
rm -rf a.lst b.lst
|
||||
echo nasm
|
||||
nasm -l a.lst -f win32 -DWIN32 test.asm
|
||||
cat a.lst
|
||||
echo yasm
|
||||
yasm -l b.lst -f win32 -DWIN32 test.asm
|
||||
cat b.lst
|
||||
155
externals/xbyak/test/address.cpp
vendored
Normal file
155
externals/xbyak/test/address.cpp
vendored
Normal file
@ -0,0 +1,155 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
void genVsibSub(bool isJIT, const char *name, const char *tbl[], size_t tblSize)
|
||||
{
|
||||
for (size_t i = 0; i < tblSize; i++) {
|
||||
if (isJIT) {
|
||||
printf("%s (ymm7, ptr[", name);
|
||||
} else {
|
||||
printf("%s ymm7, [", name);
|
||||
}
|
||||
printf("%s", tbl[i]);
|
||||
if (isJIT) {
|
||||
printf("], ymm4); dump();\n");
|
||||
} else {
|
||||
printf("], ymm4\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
void genVsib(bool isJIT)
|
||||
{
|
||||
if (isJIT) puts("void genVsib() {");
|
||||
const char *vm32xTbl[] = {
|
||||
"xmm0",
|
||||
"xmm0 * 1",
|
||||
"xmm0 + 4",
|
||||
"xmm0 + eax",
|
||||
"xmm0 * 4 + ecx",
|
||||
"xmm3 * 8 + edi + 123",
|
||||
"xmm2 * 2 + 5",
|
||||
"eax + xmm0",
|
||||
"esp + xmm2",
|
||||
};
|
||||
const char *vm32yTbl[] = {
|
||||
"ymm0",
|
||||
"ymm0 * 1",
|
||||
"ymm0 + 4",
|
||||
"ymm0 + eax",
|
||||
"ymm0 * 4 + ecx",
|
||||
"ymm3 * 8 + edi + 123",
|
||||
"ymm2 * 2 + 5",
|
||||
"eax + ymm0",
|
||||
"esp + ymm2",
|
||||
};
|
||||
genVsibSub(isJIT, "vgatherdpd", vm32xTbl, NUM_OF_ARRAY(vm32xTbl));
|
||||
genVsibSub(isJIT, "vgatherqpd", vm32yTbl, NUM_OF_ARRAY(vm32yTbl));
|
||||
#ifdef XBYAK64
|
||||
const char *vm32x64Tbl[] = {
|
||||
"xmm0 + r11",
|
||||
"r13 + xmm15",
|
||||
"123 + rsi + xmm2 * 4",
|
||||
};
|
||||
genVsibSub(isJIT, "vgatherdpd", vm32x64Tbl, NUM_OF_ARRAY(vm32x64Tbl));
|
||||
#endif
|
||||
if (isJIT) puts("}");
|
||||
}
|
||||
|
||||
void genAddress(bool isJIT, const char regTbl[][5], size_t regTblNum)
|
||||
{
|
||||
int count = 0;
|
||||
int funcNum = 1;
|
||||
if (isJIT) {
|
||||
puts("void gen0(){");
|
||||
}
|
||||
for (size_t i = 0; i < regTblNum + 1; i++) {
|
||||
const char *base = regTbl[i];
|
||||
for (size_t j = 0; j < regTblNum + 1; j++) {
|
||||
if (j == 4) continue; /* esp is not index register */
|
||||
const char *index = regTbl[j];
|
||||
static const int scaleTbl[] = { 0, 1, 2, 4, 8 };
|
||||
for (size_t k = 0; k < NUM_OF_ARRAY(scaleTbl); k++) {
|
||||
int scale = scaleTbl[k];
|
||||
static const int dispTbl[] = { 0, 1, 1000, -1, -1000 };
|
||||
for (size_t m = 0; m < NUM_OF_ARRAY(dispTbl); m++) {
|
||||
int disp = dispTbl[m];
|
||||
bool isFirst = true;
|
||||
if (isJIT) {
|
||||
printf("mov (ecx, ptr[");
|
||||
} else {
|
||||
printf("mov ecx, [");
|
||||
}
|
||||
if (i < regTblNum) {
|
||||
printf("%s", base);
|
||||
isFirst = false;
|
||||
}
|
||||
if (j < regTblNum) {
|
||||
if (!isFirst) putchar('+');
|
||||
printf("%s", index);
|
||||
if (scale) printf("*%d", scale);
|
||||
isFirst = false;
|
||||
}
|
||||
if (isFirst) {
|
||||
if (isJIT) printf("(void*)");
|
||||
printf("%d", disp);
|
||||
} else {
|
||||
if (disp >= 0) {
|
||||
putchar('+');
|
||||
}
|
||||
printf("%d", disp);
|
||||
isFirst = false;
|
||||
}
|
||||
if (isJIT) {
|
||||
printf("]); dump();\n");
|
||||
} else {
|
||||
printf("]\n");
|
||||
}
|
||||
if (isJIT) {
|
||||
count++;
|
||||
if ((count % 100) == 0) {
|
||||
printf("}\n void gen%d(){\n", funcNum++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isJIT) puts("}");
|
||||
genVsib(isJIT);
|
||||
if (isJIT) {
|
||||
printf("void gen(){\n");
|
||||
for (int i = 0; i < funcNum; i++) {
|
||||
printf(" gen%d();\n", i);
|
||||
}
|
||||
puts("genVsib();");
|
||||
printf("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
argc--, argv++;
|
||||
bool phase = argc > 0 && strcmp(*argv, "1") == 0;
|
||||
bool isJIT = (argc > 1);
|
||||
fprintf(stderr, "phase:%c %s\n", phase ? '1' : '2', isJIT ? "jit" : "asm");
|
||||
if (phase) {
|
||||
fprintf(stderr, "32bit reg\n");
|
||||
static const char reg32Tbl[][5] = {
|
||||
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
|
||||
#ifdef XBYAK64
|
||||
"r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
|
||||
#endif
|
||||
};
|
||||
genAddress(isJIT, reg32Tbl, NUM_OF_ARRAY(reg32Tbl));
|
||||
} else {
|
||||
#ifdef XBYAK64
|
||||
fprintf(stderr, "64bit reg\n");
|
||||
static const char reg64Tbl[][5] = {
|
||||
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
|
||||
};
|
||||
genAddress(isJIT, reg64Tbl, NUM_OF_ARRAY(reg64Tbl));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
28
externals/xbyak/test/bad_address.cpp
vendored
Normal file
28
externals/xbyak/test/bad_address.cpp
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <cybozu/test.hpp>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp + esp]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [ax]), std::exception); // not support
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp * 4]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 16]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(fld(dword [xmm0]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3), std::exception);
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [rax + eax]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0]), std::exception);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
CYBOZU_TEST_AUTO(exception)
|
||||
{
|
||||
Code c;
|
||||
}
|
||||
151
externals/xbyak/test/cvt_test.cpp
vendored
Normal file
151
externals/xbyak/test/cvt_test.cpp
vendored
Normal file
@ -0,0 +1,151 @@
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <cybozu/inttype.hpp>
|
||||
#include <cybozu/test.hpp>
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
#ifdef XBYAK64
|
||||
const struct Ptn {
|
||||
const Reg8 *reg8;
|
||||
Reg16 reg16;
|
||||
Reg32 reg32;
|
||||
Reg64 reg64;
|
||||
} tbl[] = {
|
||||
{ &al, ax, eax, rax },
|
||||
{ &bl, bx, ebx, rbx },
|
||||
{ &cl, cx, ecx, rcx },
|
||||
{ &dl, dx, edx, rdx },
|
||||
{ &sil, si, esi, rsi },
|
||||
{ &dil, di, edi, rdi },
|
||||
{ &bpl, bp, ebp, rbp },
|
||||
{ &spl, sp, esp, rsp },
|
||||
{ &r8b, r8w, r8d, r8 },
|
||||
{ &r9b, r9w, r9d, r9 },
|
||||
{ &r10b, r10w, r10d, r10 },
|
||||
{ &r11b, r11w, r11d, r11 },
|
||||
{ &r12b, r12w, r12d, r12 },
|
||||
{ &r13b, r13w, r13d, r13 },
|
||||
{ &r14b, r14w, r14d, r14 },
|
||||
{ &r15b, r15w, r15d, r15 },
|
||||
};
|
||||
#else
|
||||
const struct Ptn {
|
||||
const Reg8 *reg8;
|
||||
Reg16 reg16;
|
||||
Reg32 reg32;
|
||||
} tbl[] = {
|
||||
{ &al, ax, eax },
|
||||
{ &bl, bx, ebx },
|
||||
{ &cl, cx, ecx },
|
||||
{ &dl, dx, edx },
|
||||
{ 0, si, esi },
|
||||
{ 0, di, edi },
|
||||
{ 0, bp, ebp },
|
||||
{ 0, sp, esp },
|
||||
};
|
||||
#endif
|
||||
|
||||
CYBOZU_TEST_AUTO(cvt)
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(tbl) / sizeof(tbl[0]); i++) {
|
||||
if (tbl[i].reg8) {
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt8() == *tbl[i].reg8);
|
||||
}
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt32() == tbl[i].reg32);
|
||||
#ifdef XBYAK64
|
||||
if (tbl[i].reg8) {
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt64() == tbl[i].reg64);
|
||||
}
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt64() == tbl[i].reg64);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt64() == tbl[i].reg64);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt64() == tbl[i].reg64);
|
||||
#endif
|
||||
}
|
||||
{
|
||||
const Reg8 errTbl[] = {
|
||||
ah, bh, ch, dh
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
|
||||
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt16(), std::exception);
|
||||
}
|
||||
}
|
||||
#ifdef XBYAK32
|
||||
{
|
||||
const Reg16 errTbl[] = {
|
||||
si, di, bp, sp
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
|
||||
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt8(), std::exception);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(changeBit)
|
||||
{
|
||||
using namespace Xbyak::util;
|
||||
#ifdef XBYAK64
|
||||
const size_t N = 7;
|
||||
const Reg* tbl[][N] = {
|
||||
{ &al, &ax, &eax, &rax, &xmm0, &ymm0, &zmm0 },
|
||||
{ &cl, &cx, &ecx, &rcx, &xmm1, &ymm1, &zmm1 },
|
||||
{ &dl, &dx, &edx, &rdx, &xmm2, &ymm2, &zmm2 },
|
||||
{ &bl, &bx, &ebx, &rbx, &xmm3, &ymm3, &zmm3 },
|
||||
{ &spl, &sp, &esp, &rsp, &xmm4, &ymm4, &zmm4 },
|
||||
{ &bpl, &bp, &ebp, &rbp, &xmm5, &ymm5, &zmm5 },
|
||||
{ &sil, &si, &esi, &rsi, &xmm6, &ymm6, &zmm6 },
|
||||
{ &dil, &di, &edi, &rdi, &xmm7, &ymm7, &zmm7 },
|
||||
{ &r8b, &r8w, &r8d, &r8, &xmm8, &ymm8, &zmm8 },
|
||||
{ &r15b, &r15w, &r15d, &r15, &xmm15, &ymm15, &zmm15 },
|
||||
{ 0, 0, 0, 0, &xmm16, &ymm16, &zmm16 },
|
||||
{ 0, 0, 0, 0, &xmm31, &ymm31, &zmm31 },
|
||||
};
|
||||
const int bitTbl[N] = { 8, 16, 32, 64, 128, 256, 512 };
|
||||
#else
|
||||
const size_t N = 6;
|
||||
const Reg* tbl[][N] = {
|
||||
{ &al, &ax, &eax, &xmm0, &ymm0, &zmm0 },
|
||||
{ &cl, &cx, &ecx, &xmm1, &ymm1, &zmm1 },
|
||||
{ &dl, &dx, &edx, &xmm2, &ymm2, &zmm2 },
|
||||
{ &bl, &bx, &ebx, &xmm3, &ymm3, &zmm3 },
|
||||
{ 0, &sp, &esp, &xmm4, &ymm4, &zmm4 },
|
||||
{ 0, &bp, &ebp, &xmm5, &ymm5, &zmm5 },
|
||||
{ 0, &si, &esi, &xmm6, &ymm6, &zmm6 },
|
||||
{ 0, &di, &edi, &xmm7, &ymm7, &zmm7 },
|
||||
};
|
||||
const int bitTbl[N] = { 8, 16, 32, 128, 256, 512 };
|
||||
#endif
|
||||
|
||||
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
|
||||
for (size_t j = 0; j < N; j++) {
|
||||
const Reg *r1 = tbl[i][j];
|
||||
if (r1 == 0) continue;
|
||||
for (size_t k = 0; k < N; k++) {
|
||||
if (tbl[i][k]) {
|
||||
CYBOZU_TEST_ASSERT(*tbl[i][k] == r1->changeBit(bitTbl[k]));
|
||||
// printf("%s->changeBit(%d)=%s %s\n", r1->toString(), bitTbl[k], r1->changeBit(bitTbl[k]).toString(), tbl[i][k]->toString());
|
||||
} else {
|
||||
CYBOZU_TEST_EXCEPTION(r1->changeBit(bitTbl[k]), std::exception);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
const Reg8 *special8bitTbl[] = { &ah, &bh, &ch, &dh };
|
||||
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(special8bitTbl); i++) {
|
||||
CYBOZU_TEST_EXCEPTION(special8bitTbl[i]->changeBit(16), std::exception);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
163
externals/xbyak/test/cybozu/inttype.hpp
vendored
Normal file
163
externals/xbyak/test/cybozu/inttype.hpp
vendored
Normal file
@ -0,0 +1,163 @@
|
||||
#pragma once
|
||||
/**
|
||||
@file
|
||||
@brief int type definition and macros
|
||||
@author MITSUNARI Shigeo(@herumi)
|
||||
*/
|
||||
|
||||
#if defined(_MSC_VER) && (MSC_VER <= 1500) && !defined(CYBOZU_DEFINED_INTXX)
|
||||
#define CYBOZU_DEFINED_INTXX
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef short int16_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed char int8_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifndef CYBOZU_DEFINED_SSIZE_T
|
||||
#define CYBOZU_DEFINED_SSIZE_T
|
||||
#ifdef _WIN64
|
||||
typedef int64_t ssize_t;
|
||||
#else
|
||||
typedef int32_t ssize_t;
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
#include <unistd.h> // for ssize_t
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_ALIGN
|
||||
#ifdef _MSC_VER
|
||||
#define CYBOZU_ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define CYBOZU_ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_FORCE_INLINE
|
||||
#ifdef _MSC_VER
|
||||
#define CYBOZU_FORCE_INLINE __forceinline
|
||||
#else
|
||||
#define CYBOZU_FORCE_INLINE __attribute__((always_inline))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_UNUSED
|
||||
#ifdef __GNUC__
|
||||
#define CYBOZU_UNUSED __attribute__((unused))
|
||||
#else
|
||||
#define CYBOZU_UNUSED
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_ALLOCA
|
||||
#ifdef _MSC_VER
|
||||
#include <malloc.h>
|
||||
#define CYBOZU_ALLOCA(x) _malloca(x)
|
||||
#else
|
||||
#define CYBOZU_ALLOCA(x) __builtin_alloca(x)
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_NUM_OF_ARRAY
|
||||
#define CYBOZU_NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
|
||||
#endif
|
||||
#ifndef CYBOZU_SNPRINTF
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#define CYBOZU_SNPRINTF(x, len, ...) (void)_snprintf_s(x, len, len - 1, __VA_ARGS__)
|
||||
#else
|
||||
#define CYBOZU_SNPRINTF(x, len, ...) (void)snprintf(x, len, __VA_ARGS__)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CYBOZU_CPP_VERSION_CPP03 0
|
||||
#define CYBOZU_CPP_VERSION_TR1 1
|
||||
#define CYBOZU_CPP_VERSION_CPP11 2
|
||||
#define CYBOZU_CPP_VERSION_CPP14 3
|
||||
#define CYBOZU_CPP_VERSION_CPP17 4
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define CYBOZU_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
|
||||
#else
|
||||
#define CYBOZU_GNUC_PREREQ(major, minor) 0
|
||||
#endif
|
||||
|
||||
#if (__cplusplus >= 201703)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP17
|
||||
#elif (__cplusplus >= 201402)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP14
|
||||
#elif (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1600)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
||||
#else
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
|
||||
#endif
|
||||
#elif CYBOZU_GNUC_PREREQ(4, 5) || (CYBOZU_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || (__clang_major__ >= 3)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
||||
#else
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03
|
||||
#endif
|
||||
|
||||
#ifdef CYBOZU_USE_BOOST
|
||||
#define CYBOZU_NAMESPACE_STD boost
|
||||
#define CYBOZU_NAMESPACE_TR1_BEGIN
|
||||
#define CYBOZU_NAMESPACE_TR1_END
|
||||
#elif (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1) && !defined(__APPLE__)
|
||||
#define CYBOZU_NAMESPACE_STD std::tr1
|
||||
#define CYBOZU_NAMESPACE_TR1_BEGIN namespace tr1 {
|
||||
#define CYBOZU_NAMESPACE_TR1_END }
|
||||
#else
|
||||
#define CYBOZU_NAMESPACE_STD std
|
||||
#define CYBOZU_NAMESPACE_TR1_BEGIN
|
||||
#define CYBOZU_NAMESPACE_TR1_END
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_OS_BIT
|
||||
#if defined(_WIN64) || defined(__x86_64__) || defined(__AARCH64EL__) || defined(__EMSCRIPTEN__)
|
||||
#define CYBOZU_OS_BIT 64
|
||||
#else
|
||||
#define CYBOZU_OS_BIT 32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_HOST
|
||||
#define CYBOZU_HOST_UNKNOWN 0
|
||||
#define CYBOZU_HOST_INTEL 1
|
||||
#define CYBOZU_HOST_ARM 2
|
||||
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
|
||||
#define CYBOZU_HOST CYBOZU_HOST_INTEL
|
||||
#elif defined(__arm__) || defined(__AARCH64EL__)
|
||||
#define CYBOZU_HOST CYBOZU_HOST_ARM
|
||||
#else
|
||||
#define CYBOZU_HOST CYBOZU_HOST_UNKNOWN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_ENDIAN
|
||||
#define CYBOZU_ENDIAN_UNKNOWN 0
|
||||
#define CYBOZU_ENDIAN_LITTLE 1
|
||||
#define CYBOZU_ENDIAN_BIG 2
|
||||
#if (CYBOZU_HOST == CYBOZU_HOST_INTEL)
|
||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
|
||||
#elif (CYBOZU_HOST == CYBOZU_HOST_ARM) && (defined(__ARM_EABI__) || defined(__AARCH64EL__))
|
||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
|
||||
#else
|
||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_UNKNOWN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
|
||||
#define CYBOZU_NOEXCEPT noexcept
|
||||
#else
|
||||
#define CYBOZU_NOEXCEPT throw()
|
||||
#endif
|
||||
namespace cybozu {
|
||||
template<class T>
|
||||
void disable_warning_unused_variable(const T&) { }
|
||||
template<class T, class S>
|
||||
T cast(const S* ptr) { return static_cast<T>(static_cast<const void*>(ptr)); }
|
||||
template<class T, class S>
|
||||
T cast(S* ptr) { return static_cast<T>(static_cast<void*>(ptr)); }
|
||||
} // cybozu
|
||||
373
externals/xbyak/test/cybozu/test.hpp
vendored
Normal file
373
externals/xbyak/test/cybozu/test.hpp
vendored
Normal file
@ -0,0 +1,373 @@
|
||||
#pragma once
|
||||
/**
|
||||
@file
|
||||
@brief unit test class
|
||||
|
||||
@author MITSUNARI Shigeo(@herumi)
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#if defined(_MSC_VER) && (MSC_VER <= 1500)
|
||||
#include <cybozu/inttype.hpp>
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
namespace cybozu { namespace test {
|
||||
|
||||
class AutoRun {
|
||||
typedef void (*Func)();
|
||||
typedef std::list<std::pair<const char*, Func> > UnitTestList;
|
||||
public:
|
||||
AutoRun()
|
||||
: init_(0)
|
||||
, term_(0)
|
||||
, okCount_(0)
|
||||
, ngCount_(0)
|
||||
, exceptionCount_(0)
|
||||
{
|
||||
}
|
||||
void setup(Func init, Func term)
|
||||
{
|
||||
init_ = init;
|
||||
term_ = term;
|
||||
}
|
||||
void append(const char *name, Func func)
|
||||
{
|
||||
list_.push_back(std::make_pair(name, func));
|
||||
}
|
||||
void set(bool isOK)
|
||||
{
|
||||
if (isOK) {
|
||||
okCount_++;
|
||||
} else {
|
||||
ngCount_++;
|
||||
}
|
||||
}
|
||||
std::string getBaseName(const std::string& name) const
|
||||
{
|
||||
#ifdef _WIN32
|
||||
const char sep = '\\';
|
||||
#else
|
||||
const char sep = '/';
|
||||
#endif
|
||||
size_t pos = name.find_last_of(sep);
|
||||
std::string ret = name.substr(pos + 1);
|
||||
pos = ret.find('.');
|
||||
return ret.substr(0, pos);
|
||||
}
|
||||
int run(int, char *argv[])
|
||||
{
|
||||
std::string msg;
|
||||
try {
|
||||
if (init_) init_();
|
||||
for (UnitTestList::const_iterator i = list_.begin(), ie = list_.end(); i != ie; ++i) {
|
||||
std::cout << "ctest:module=" << i->first << std::endl;
|
||||
try {
|
||||
(i->second)();
|
||||
} catch (std::exception& e) {
|
||||
exceptionCount_++;
|
||||
std::cout << "ctest: " << i->first << " is stopped by exception " << e.what() << std::endl;
|
||||
} catch (...) {
|
||||
exceptionCount_++;
|
||||
std::cout << "ctest: " << i->first << " is stopped by unknown exception" << std::endl;
|
||||
}
|
||||
}
|
||||
if (term_) term_();
|
||||
} catch (std::exception& e) {
|
||||
msg = std::string("ctest:err:") + e.what();
|
||||
} catch (...) {
|
||||
msg = "ctest:err: catch unknown exception";
|
||||
}
|
||||
fflush(stdout);
|
||||
if (msg.empty()) {
|
||||
int err = ngCount_ + exceptionCount_;
|
||||
int total = okCount_ + err;
|
||||
std::cout << "ctest:name=" << getBaseName(*argv)
|
||||
<< ", module=" << list_.size()
|
||||
<< ", total=" << total
|
||||
<< ", ok=" << okCount_
|
||||
<< ", ng=" << ngCount_
|
||||
<< ", exception=" << exceptionCount_ << std::endl;
|
||||
return err > 0 ? 1 : 0;
|
||||
} else {
|
||||
std::cout << msg << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
static inline AutoRun& getInstance()
|
||||
{
|
||||
static AutoRun instance;
|
||||
return instance;
|
||||
}
|
||||
private:
|
||||
Func init_;
|
||||
Func term_;
|
||||
int okCount_;
|
||||
int ngCount_;
|
||||
int exceptionCount_;
|
||||
UnitTestList list_;
|
||||
};
|
||||
|
||||
static AutoRun& autoRun = AutoRun::getInstance();
|
||||
|
||||
inline void test(bool ret, const std::string& msg, const std::string& param, const char *file, int line)
|
||||
{
|
||||
autoRun.set(ret);
|
||||
if (!ret) {
|
||||
printf("%s(%d):ctest:%s(%s);\n", file, line, msg.c_str(), param.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
bool isEqual(const T& lhs, const U& rhs)
|
||||
{
|
||||
return lhs == rhs;
|
||||
}
|
||||
|
||||
// avoid warning of comparision of integers of different signs
|
||||
inline bool isEqual(size_t lhs, int rhs)
|
||||
{
|
||||
return lhs == size_t(rhs);
|
||||
}
|
||||
inline bool isEqual(int lhs, size_t rhs)
|
||||
{
|
||||
return size_t(lhs) == rhs;
|
||||
}
|
||||
inline bool isEqual(const char *lhs, const char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
inline bool isEqual(char *lhs, const char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
inline bool isEqual(const char *lhs, char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
inline bool isEqual(char *lhs, char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
// avoid to compare float directly
|
||||
inline bool isEqual(float lhs, float rhs)
|
||||
{
|
||||
union fi {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} lfi, rfi;
|
||||
lfi.f = lhs;
|
||||
rfi.f = rhs;
|
||||
return lfi.i == rfi.i;
|
||||
}
|
||||
// avoid to compare double directly
|
||||
inline bool isEqual(double lhs, double rhs)
|
||||
{
|
||||
union di {
|
||||
double d;
|
||||
uint64_t i;
|
||||
} ldi, rdi;
|
||||
ldi.d = lhs;
|
||||
rdi.d = rhs;
|
||||
return ldi.i == rdi.i;
|
||||
}
|
||||
|
||||
} } // cybozu::test
|
||||
|
||||
#ifndef CYBOZU_TEST_DISABLE_AUTO_RUN
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
return cybozu::test::autoRun.run(argc, argv);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
alert if !x
|
||||
@param x [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_ASSERT(x) cybozu::test::test(!!(x), "CYBOZU_TEST_ASSERT", #x, __FILE__, __LINE__)
|
||||
|
||||
/**
|
||||
alert if x != y
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL(x, y) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual(x, y); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
/**
|
||||
alert if fabs(x, y) >= eps
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_NEAR(x, y, eps) { \
|
||||
bool _cybozu_isNear = fabs((x) - (y)) < eps; \
|
||||
cybozu::test::test(_cybozu_isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_isNear) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
|
||||
bool _cybozu_eq = x == y; \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << static_cast<const void*>(x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << static_cast<const void*>(y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
/**
|
||||
alert if x[] != y[]
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
@param n [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n) { \
|
||||
for (size_t _cybozu_test_i = 0, _cybozu_ie = (size_t)(n); _cybozu_test_i < _cybozu_ie; _cybozu_test_i++) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual((x)[_cybozu_test_i], (y)[_cybozu_test_i]); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_ARRAY", #x ", " #y ", " #n, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: i=" << _cybozu_test_i << std::endl; \
|
||||
std::cout << "ctest: lhs=" << (x)[_cybozu_test_i] << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y)[_cybozu_test_i] << std::endl; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
always alert
|
||||
@param msg [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_FAIL(msg) cybozu::test::test(false, "CYBOZU_TEST_FAIL", msg, __FILE__, __LINE__)
|
||||
|
||||
/**
|
||||
verify message in exception
|
||||
*/
|
||||
#define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
|
||||
{ \
|
||||
int _cybozu_ret = 0; \
|
||||
std::string _cybozu_errMsg; \
|
||||
try { \
|
||||
statement; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception& _cybozu_e) { \
|
||||
_cybozu_errMsg = _cybozu_e.what(); \
|
||||
if (_cybozu_errMsg.find(msg) == std::string::npos) { \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
} catch (...) { \
|
||||
_cybozu_ret = 3; \
|
||||
} \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else if (_cybozu_ret == 2) { \
|
||||
std::cout << "ctest: bad exception msg:" << _cybozu_errMsg << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
} \
|
||||
} else { \
|
||||
cybozu::test::autoRun.set(true); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CYBOZU_TEST_EXCEPTION(statement, Exception) \
|
||||
{ \
|
||||
int _cybozu_ret = 0; \
|
||||
try { \
|
||||
statement; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception&) { \
|
||||
} catch (...) { \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
} \
|
||||
} else { \
|
||||
cybozu::test::autoRun.set(true); \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
verify statement does not throw
|
||||
*/
|
||||
#define CYBOZU_TEST_NO_EXCEPTION(statement) \
|
||||
try { \
|
||||
statement; \
|
||||
cybozu::test::autoRun.set(true); \
|
||||
} catch (...) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_NO_EXCEPTION", #statement, __FILE__, __LINE__); \
|
||||
}
|
||||
|
||||
/**
|
||||
append auto unit test
|
||||
@param name [in] module name
|
||||
*/
|
||||
#define CYBOZU_TEST_AUTO(name) \
|
||||
void cybozu_test_ ## name(); \
|
||||
struct cybozu_test_local_ ## name { \
|
||||
cybozu_test_local_ ## name() \
|
||||
{ \
|
||||
cybozu::test::autoRun.append(#name, cybozu_test_ ## name); \
|
||||
} \
|
||||
} cybozu_test_local_instance_ ## name; \
|
||||
void cybozu_test_ ## name()
|
||||
|
||||
/**
|
||||
append auto unit test with fixture
|
||||
@param name [in] module name
|
||||
*/
|
||||
#define CYBOZU_TEST_AUTO_WITH_FIXTURE(name, Fixture) \
|
||||
void cybozu_test_ ## name(); \
|
||||
void cybozu_test_real_ ## name() \
|
||||
{ \
|
||||
Fixture f; \
|
||||
cybozu_test_ ## name(); \
|
||||
} \
|
||||
struct cybozu_test_local_ ## name { \
|
||||
cybozu_test_local_ ## name() \
|
||||
{ \
|
||||
cybozu::test::autoRun.append(#name, cybozu_test_real_ ## name); \
|
||||
} \
|
||||
} cybozu_test_local_instance_ ## name; \
|
||||
void cybozu_test_ ## name()
|
||||
|
||||
/**
|
||||
setup fixture
|
||||
@param Fixture [in] class name of fixture
|
||||
@note cstr of Fixture is called before test and dstr of Fixture is called after test
|
||||
*/
|
||||
#define CYBOZU_TEST_SETUP_FIXTURE(Fixture) \
|
||||
Fixture *cybozu_test_local_fixture; \
|
||||
void cybozu_test_local_init() \
|
||||
{ \
|
||||
cybozu_test_local_fixture = new Fixture(); \
|
||||
} \
|
||||
void cybozu_test_local_term() \
|
||||
{ \
|
||||
delete cybozu_test_local_fixture; \
|
||||
} \
|
||||
struct cybozu_test_local_fixture_setup_ { \
|
||||
cybozu_test_local_fixture_setup_() \
|
||||
{ \
|
||||
cybozu::test::autoRun.setup(cybozu_test_local_init, cybozu_test_local_term); \
|
||||
} \
|
||||
} cybozu_test_local_fixture_setup_instance_;
|
||||
8
externals/xbyak/test/detect_x32.c
vendored
Normal file
8
externals/xbyak/test/detect_x32.c
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
#if defined(__x86_64__) && defined(__ILP32__)
|
||||
puts("x32");
|
||||
#endif
|
||||
}
|
||||
1400
externals/xbyak/test/jmp.cpp
vendored
Normal file
1400
externals/xbyak/test/jmp.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
20
externals/xbyak/test/jmp.sln
vendored
Normal file
20
externals/xbyak/test/jmp.sln
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
Microsoft Visual Studio Solution File, Format Version 10.00
|
||||
# Visual C++ Express 2008
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jmp", "jmp.vcproj", "{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Release|Win32 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Release|Win32.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
195
externals/xbyak/test/jmp.vcproj
vendored
Normal file
195
externals/xbyak/test/jmp.vcproj
vendored
Normal file
@ -0,0 +1,195 @@
|
||||
<?xml version="1.0" encoding="shift_jis"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="9.00"
|
||||
Name="jmp"
|
||||
ProjectGUID="{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}"
|
||||
RootNamespace="jmp"
|
||||
Keyword="Win32Proj"
|
||||
TargetFrameworkVersion="196613"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"
|
||||
/>
|
||||
</Platforms>
|
||||
<ToolFiles>
|
||||
</ToolFiles>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="$(SolutionDir)/../"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="4"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
LinkIncremental="2"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="2"
|
||||
EnableIntrinsicFunctions="true"
|
||||
AdditionalIncludeDirectories="$(SolutionDir)/../"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
|
||||
RuntimeLibrary="2"
|
||||
EnableFunctionLevelLinking="true"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
LinkIncremental="1"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="2"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20>ե<EFBFBD><D5A5><EFBFBD><EFBFBD><EFBFBD>"
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\jmp.cpp"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="<22>إå<D8A5><C3A5><EFBFBD> <20>ե<EFBFBD><D5A5><EFBFBD><EFBFBD><EFBFBD>"
|
||||
Filter="h;hpp;hxx;hm;inl;inc;xsd"
|
||||
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
|
||||
>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="<22><EFBFBD><EAA5BD><EFBFBD><EFBFBD> <20>ե<EFBFBD><D5A5><EFBFBD><EFBFBD><EFBFBD>"
|
||||
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
|
||||
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
|
||||
>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
||||
63
externals/xbyak/test/lib.h
vendored
Normal file
63
externals/xbyak/test/lib.h
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
#include <stdio.h>
|
||||
|
||||
struct Reg {
|
||||
int r_;
|
||||
Reg(int r) : r_(r) {}
|
||||
};
|
||||
|
||||
inline const Reg& getReg0() { static const Reg r(0); return r; }
|
||||
inline const Reg& getReg1() { static const Reg r(1); return r; }
|
||||
inline const Reg& getReg2() { static const Reg r(2); return r; }
|
||||
|
||||
static const Reg& r0 = getReg0();
|
||||
static const Reg& r1 = getReg1();
|
||||
static const Reg& r2 = getReg2();
|
||||
|
||||
inline void putReg()
|
||||
{
|
||||
puts("putReg");
|
||||
printf("r0=%p, %d\n", &r0, r0.r_);
|
||||
printf("r0=%p, %d\n", &r0, r1.r_);
|
||||
printf("r0=%p, %d\n", &r0, r2.r_);
|
||||
}
|
||||
|
||||
struct A {
|
||||
int a;
|
||||
A()
|
||||
: a(5)
|
||||
{
|
||||
puts("A cstr");
|
||||
}
|
||||
~A()
|
||||
{
|
||||
puts("A dstr");
|
||||
}
|
||||
void put() const
|
||||
{
|
||||
printf("a=%d\n", a);
|
||||
}
|
||||
};
|
||||
|
||||
template<int dummy = 0>
|
||||
struct XT {
|
||||
static A a;
|
||||
};
|
||||
|
||||
template<int dummy>
|
||||
A XT<dummy>::a;
|
||||
|
||||
typedef XT<0> X;
|
||||
|
||||
void init();
|
||||
|
||||
struct Init {
|
||||
Init()
|
||||
{
|
||||
puts("Init");
|
||||
init();
|
||||
putReg();
|
||||
}
|
||||
};
|
||||
static Init s_init;
|
||||
|
||||
51
externals/xbyak/test/lib_min.cpp
vendored
Normal file
51
externals/xbyak/test/lib_min.cpp
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
#include <stdio.h>
|
||||
|
||||
static const struct XXX {
|
||||
XXX() { puts("XXX"); }
|
||||
} s_sss;
|
||||
|
||||
struct A {
|
||||
int aaa;
|
||||
A()
|
||||
: aaa(123)
|
||||
{
|
||||
puts("A cstr");
|
||||
}
|
||||
~A()
|
||||
{
|
||||
puts("A dstr");
|
||||
}
|
||||
void put() const
|
||||
{
|
||||
printf("aaa=%d\n", aaa);
|
||||
}
|
||||
};
|
||||
|
||||
template<int dummy = 0>
|
||||
struct XT {
|
||||
static A sss;
|
||||
};
|
||||
|
||||
template<int dummy>
|
||||
A XT<dummy>::sss;
|
||||
|
||||
typedef XT<0> X;
|
||||
|
||||
static struct Init {
|
||||
Init()
|
||||
{
|
||||
puts("Init");
|
||||
X::sss.put();
|
||||
}
|
||||
} s_init;
|
||||
|
||||
int f() { puts("f"); return 4; }
|
||||
|
||||
static const int r = f();
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("main");
|
||||
printf("r=%d\n", r);
|
||||
X::sss.put();
|
||||
}
|
||||
9
externals/xbyak/test/lib_run.cpp
vendored
Normal file
9
externals/xbyak/test/lib_run.cpp
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
#include "lib.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("main");
|
||||
X::a.put();
|
||||
putReg();
|
||||
}
|
||||
|
||||
13
externals/xbyak/test/lib_test.cpp
vendored
Normal file
13
externals/xbyak/test/lib_test.cpp
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
#include "lib.h"
|
||||
|
||||
void init()
|
||||
{
|
||||
static bool init = true;
|
||||
printf("in lib_test %d\n", init);
|
||||
if (!init) return;
|
||||
init = false;
|
||||
X::a.put();
|
||||
putReg();
|
||||
}
|
||||
|
||||
|
||||
2204
externals/xbyak/test/make_512.cpp
vendored
Normal file
2204
externals/xbyak/test/make_512.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3510
externals/xbyak/test/make_nm.cpp
vendored
Normal file
3510
externals/xbyak/test/make_nm.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2160
externals/xbyak/test/misc.cpp
vendored
Normal file
2160
externals/xbyak/test/misc.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
37
externals/xbyak/test/mprotect_test.cpp
vendored
Normal file
37
externals/xbyak/test/mprotect_test.cpp
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int x)
|
||||
{
|
||||
mov(eax, x);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
#ifdef XBYAK_USE_MMAP_ALLOCATOR
|
||||
puts("use Allocator with mmap");
|
||||
#else
|
||||
puts("use Allocator with posix_memalign");
|
||||
#endif
|
||||
const int N = 70000;
|
||||
std::vector<Code*> v(N);
|
||||
for (int i = 0; i < N; i++) {
|
||||
v[i] = new Code(i);
|
||||
}
|
||||
long long sum = 0;
|
||||
for (int i = 0; i < N; i++) {
|
||||
sum += v[i]->getCode<int (*)()>()();
|
||||
}
|
||||
for (int i = 0; i < N; i++) {
|
||||
delete v[i];
|
||||
}
|
||||
printf("sum=%lld\n", sum);
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR %s\n", e.what());
|
||||
}
|
||||
42
externals/xbyak/test/nm_frame.cpp
vendored
Normal file
42
externals/xbyak/test/nm_frame.cpp
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
#include <stdio.h>
|
||||
#define XBYAK_ENABLE_OMITTED_OPERAND
|
||||
#include "xbyak/xbyak.h"
|
||||
#define CYBOZU_TEST_DISABLE_AUTO_RUN
|
||||
#include "cybozu/test.hpp"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4245)
|
||||
#pragma warning(disable : 4312)
|
||||
#endif
|
||||
class Sample : public CodeGenerator {
|
||||
void operator=(const Sample&);
|
||||
public:
|
||||
#include "nm.cpp"
|
||||
};
|
||||
|
||||
|
||||
class ErrorSample : public CodeGenerator {
|
||||
void operator=(const ErrorSample&);
|
||||
public:
|
||||
void gen()
|
||||
{
|
||||
#ifndef XBYAK_NO_EXCEPTION
|
||||
CYBOZU_TEST_EXCEPTION(mov(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(test(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(adc(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(setz(eax), std::exception);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
// the size of Operand exceeds 32 bit.
|
||||
CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 8u);
|
||||
Sample s;
|
||||
s.gen();
|
||||
ErrorSample es;
|
||||
es.gen();
|
||||
}
|
||||
111
externals/xbyak/test/noexception.cpp
vendored
Normal file
111
externals/xbyak/test/noexception.cpp
vendored
Normal file
@ -0,0 +1,111 @@
|
||||
#define XBYAK_NO_EXCEPTION
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
int g_err = 0;
|
||||
int g_test = 0;
|
||||
|
||||
void assertEq(int x, int y)
|
||||
{
|
||||
if (x != y) {
|
||||
printf("ERR x=%d y=%d\n", x, y);
|
||||
g_err++;
|
||||
}
|
||||
g_test++;
|
||||
}
|
||||
|
||||
void assertBool(bool b)
|
||||
{
|
||||
if (!b) {
|
||||
printf("ERR assertBool\n");
|
||||
g_err++;
|
||||
}
|
||||
g_test++;
|
||||
}
|
||||
|
||||
void test1()
|
||||
{
|
||||
const int v = 123;
|
||||
struct Code : CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
mov(eax, v);
|
||||
ret();
|
||||
}
|
||||
} c;
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
assertEq(f(), v);
|
||||
assertEq(Xbyak::GetError(), ERR_NONE);
|
||||
}
|
||||
|
||||
void test2()
|
||||
{
|
||||
struct Code : CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
Label lp;
|
||||
L(lp);
|
||||
L(lp);
|
||||
}
|
||||
} c;
|
||||
assertEq(Xbyak::GetError(), ERR_LABEL_IS_REDEFINED);
|
||||
Xbyak::ClearError();
|
||||
}
|
||||
|
||||
void test3()
|
||||
{
|
||||
static struct EmptyAllocator : Xbyak::Allocator {
|
||||
uint8_t *alloc(size_t) { return 0; }
|
||||
} emptyAllocator;
|
||||
struct Code : CodeGenerator {
|
||||
Code() : CodeGenerator(8, 0, &emptyAllocator)
|
||||
{
|
||||
mov(eax, 3);
|
||||
assertBool(Xbyak::GetError() == 0);
|
||||
mov(eax, 3);
|
||||
mov(eax, 3);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
assertBool(Xbyak::GetError() == 0);
|
||||
}
|
||||
} c;
|
||||
}
|
||||
|
||||
void test4()
|
||||
{
|
||||
struct Code : CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
mov(ptr[eax], 1);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
|
||||
test(ptr[eax], 1);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
|
||||
adc(ptr[eax], 1);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
|
||||
setz(eax);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test1();
|
||||
test2();
|
||||
test3();
|
||||
test4();
|
||||
if (g_err) {
|
||||
printf("err %d/%d\n", g_err, g_test);
|
||||
} else {
|
||||
printf("all ok %d\n", g_test);
|
||||
}
|
||||
return g_err != 0;
|
||||
}
|
||||
45
externals/xbyak/test/normalize_prefix.cpp
vendored
Normal file
45
externals/xbyak/test/normalize_prefix.cpp
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
normalize prefix
|
||||
*/
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include <memory.h>
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
|
||||
std::string normalize(const std::string& line)
|
||||
{
|
||||
static const char tbl[][3] = { "66", "67", "F2", "F3" };
|
||||
size_t tblNum = sizeof(tbl) / sizeof(tbl[0]);
|
||||
typedef std::set<std::string> StringSet;
|
||||
StringSet suf;
|
||||
|
||||
size_t pos = 0;
|
||||
for (; pos < line.size(); pos += 2) {
|
||||
bool found = false;
|
||||
for (size_t i = 0; i < tblNum; i++) {
|
||||
if (::memcmp(&line[pos], tbl[i], 2) == 0) {
|
||||
found = true;
|
||||
suf.insert(tbl[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) break;
|
||||
}
|
||||
std::string ret;
|
||||
for (StringSet::const_iterator i = suf.begin(), e = suf.end(); i != e; ++i) {
|
||||
ret += *i;
|
||||
}
|
||||
ret += &line[pos];
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
std::string line;
|
||||
while (std::getline(std::cin, line)) {
|
||||
std::string normalizedLine = normalize(line);
|
||||
std::cout << normalizedLine << '\n';//std::endl;
|
||||
}
|
||||
}
|
||||
6
externals/xbyak/test/readme.txt
vendored
Normal file
6
externals/xbyak/test/readme.txt
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
|
||||
test script on Windows
|
||||
|
||||
this test requires nasm.exe, yasm.exe, cl.exe, awk, diff
|
||||
|
||||
test_all ; for all tests
|
||||
88
externals/xbyak/test/rip-label-imm.cpp
vendored
Normal file
88
externals/xbyak/test/rip-label-imm.cpp
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
/*
|
||||
dump of vc
|
||||
|
||||
00000000003A0000 F3 0F C2 05 F1 00 00 00 00 cmpeqss xmm0,dword ptr [3A00FAh]
|
||||
00000000003A0009 F7 05 E7 00 00 00 21 00 00 00 test dword ptr [3A00FAh],21h
|
||||
00000000003A0013 0F BA 25 DF 00 00 00 03 bt dword ptr [3A00FAh],3
|
||||
00000000003A001B C4 E3 79 0D 05 D5 00 00 00 03 vblendpd xmm0,xmm0,xmmword ptr [3A00FAh],3
|
||||
00000000003A0025 C4 E3 79 0F 05 CB 00 00 00 04 vpalignr xmm0,xmm0,xmmword ptr [3A00FAh],4
|
||||
00000000003A002F C4 E3 7D 19 1D C1 00 00 00 0C vextractf128 xmmword ptr [3A00FAh],ymm3,0Ch
|
||||
00000000003A0039 C4 E3 75 46 05 B7 00 00 00 0D vperm2i128 ymm0,ymm1,ymmword ptr [3A00FAh],0Dh
|
||||
00000000003A0043 C4 E3 79 1D 15 AD 00 00 00 2C vcvtps2ph mmword ptr [3A00FAh],xmm2,2Ch
|
||||
00000000003A004D C7 05 A3 00 00 00 34 12 00 00 mov dword ptr [3A00FAh],1234h
|
||||
00000000003A0057 C1 25 9C 00 00 00 03 shl dword ptr [3A00FAh],3
|
||||
00000000003A005E D1 2D 96 00 00 00 shr dword ptr [3A00FAh],1
|
||||
00000000003A0064 48 0F A4 05 8D 00 00 00 03 shld qword ptr [3A00FAh],rax,3
|
||||
00000000003A006D 48 6B 05 85 00 00 00 15 imul rax,qword ptr [3A00FAh],15h
|
||||
00000000003A0075 C4 E3 FB F0 05 7B 00 00 00 15 rorx rax,qword ptr [3A00FAh],15h
|
||||
00000000003A007F F7 05 71 00 00 00 05 00 00 00 test dword ptr [3A00FAh],5
|
||||
00000000003A0089 66 48 0F 3A 16 05 66 00 00 00 03 pextrq qword ptr [3A00FAh],xmm0,3
|
||||
00000000003A0094 66 48 0F 3A 22 15 5B 00 00 00 05 pinsrq xmm2,qword ptr [3A00FAh],5
|
||||
00000000003A009F 66 0F 3A 15 0D 51 00 00 00 04 pextrw word ptr [3A00FAh],xmm1,4
|
||||
00000000003A00A9 81 15 47 00 00 00 45 23 01 00 adc dword ptr [3A00FAh],12345h
|
||||
00000000003A00B3 0F BA 25 3F 00 00 00 34 bt dword ptr [3A00FAh],34h
|
||||
00000000003A00BB 66 0F BA 3D 36 00 00 00 34 btc word ptr [3A00FAh],34h
|
||||
00000000003A00C4 0F BA 35 2E 00 00 00 34 btr dword ptr [3A00FAh],34h
|
||||
00000000003A00CC C1 15 27 00 00 00 04 rcl dword ptr [3A00FAh],4
|
||||
00000000003A00D3 48 0F A4 05 1E 00 00 00 04 shld qword ptr [3A00FAh],rax,4
|
||||
00000000003A00DC 0F 3A 0F 05 15 00 00 00 04 palignr mm0,mmword ptr [3A00FAh],4
|
||||
00000000003A00E5 66 0F 3A DF 1D 0B 00 00 00 04 aeskeygenassist xmm3,xmmword ptr [3A00FAh],4
|
||||
00000000003A00EF C4 E3 79 60 15 01 00 00 00 07 vpcmpestrm xmm2,xmmword ptr [3A00FAh],7
|
||||
00000000003A00F9 C3 ret
|
||||
00000000003A00FA F0 DE BC 9A 78 56 34 12
|
||||
*/
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
Xbyak::Label label;
|
||||
cmpss(xmm0, ptr[rip + label], 0);
|
||||
test(dword[rip + label], 33);
|
||||
bt(dword[rip + label ], 3);
|
||||
vblendpd(xmm0, xmm0, dword[rip + label], 3);
|
||||
vpalignr(xmm0, xmm0, qword[rip + label], 4);
|
||||
vextractf128(dword[rip + label], ymm3, 12);
|
||||
vperm2i128(ymm0, ymm1, qword[rip + label], 13);
|
||||
vcvtps2ph(ptr[rip + label], xmm2, 44);
|
||||
mov(dword[rip + label], 0x1234);
|
||||
shl(dword[rip + label], 3);
|
||||
shr(dword[rip + label], 1);
|
||||
shld(qword[rip + label], rax, 3);
|
||||
imul(rax, qword[rip + label], 21);
|
||||
rorx(rax, qword[rip + label], 21);
|
||||
test(dword[rip + label], 5);
|
||||
pextrq(ptr[rip + label], xmm0, 3);
|
||||
pinsrq(xmm2, ptr[rip + label], 5);
|
||||
pextrw(ptr[rip + label], xmm1, 4);
|
||||
adc(dword[rip + label], 0x12345);
|
||||
bt(byte[rip + label], 0x34);
|
||||
btc(word[rip + label], 0x34);
|
||||
btr(dword[rip + label], 0x34);
|
||||
rcl(dword[rip + label], 4);
|
||||
shld(qword[rip + label], rax, 4);
|
||||
palignr(mm0, ptr[rip + label], 4);
|
||||
aeskeygenassist(xmm3, ptr[rip + label], 4);
|
||||
vpcmpestrm(xmm2, ptr[rip + label], 7);
|
||||
ret();
|
||||
L(label);
|
||||
dq(0x123456789abcdef0ull);
|
||||
};
|
||||
};
|
||||
|
||||
void dump(const unsigned char *p, size_t n)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
printf("%02x ", p[i]);
|
||||
if ((i % 16) == 15) putchar('\n');
|
||||
}
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
Code code;
|
||||
void (*f)() = code.getCode<void (*)()>();
|
||||
dump(code.getCode(), code.getSize());
|
||||
f();
|
||||
}
|
||||
2
externals/xbyak/test/set_opt.bat
vendored
Normal file
2
externals/xbyak/test/set_opt.bat
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
@echo off
|
||||
set OPT=/EHsc -I../xbyak -I./ /W4 -D_CRT_SECURE_NO_WARNINGS /nologo
|
||||
416
externals/xbyak/test/sf_test.cpp
vendored
Normal file
416
externals/xbyak/test/sf_test.cpp
vendored
Normal file
@ -0,0 +1,416 @@
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
#ifdef XBYAK32
|
||||
#error "this sample is for only 64-bit mode"
|
||||
#endif
|
||||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
struct Code : public Xbyak::CodeGenerator {
|
||||
void gen1()
|
||||
{
|
||||
StackFrame sf(this, 1);
|
||||
mov(rax, sf.p[0]);
|
||||
}
|
||||
void gen2()
|
||||
{
|
||||
StackFrame sf(this, 2);
|
||||
lea(rax, ptr [sf.p[0] + sf.p[1]]);
|
||||
}
|
||||
void gen3()
|
||||
{
|
||||
StackFrame sf(this, 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
void gen4()
|
||||
{
|
||||
StackFrame sf(this, 4);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
void gen5()
|
||||
{
|
||||
StackFrame sf(this, 4, UseRCX);
|
||||
xor_(rcx, rcx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
void gen6()
|
||||
{
|
||||
StackFrame sf(this, 4, UseRCX | UseRDX);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
void gen7()
|
||||
{
|
||||
StackFrame sf(this, 3, UseRCX | UseRDX);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
|
||||
void gen8()
|
||||
{
|
||||
StackFrame sf(this, 3, 3 | UseRCX | UseRDX);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(sf.t[0], 1);
|
||||
mov(sf.t[1], 2);
|
||||
mov(sf.t[2], 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
|
||||
void gen9()
|
||||
{
|
||||
StackFrame sf(this, 3, 3 | UseRCX | UseRDX, 32);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(sf.t[0], 1);
|
||||
mov(sf.t[1], 2);
|
||||
mov(sf.t[2], 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
mov(ptr [rsp + 8 * 0], rax);
|
||||
mov(ptr [rsp + 8 * 1], rax);
|
||||
mov(ptr [rsp + 8 * 2], rax);
|
||||
mov(ptr [rsp + 8 * 3], rax);
|
||||
}
|
||||
|
||||
void gen10()
|
||||
{
|
||||
StackFrame sf(this, 4, 8 | UseRCX | UseRDX, 32);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
for (int i = 0; i < 8; i++) {
|
||||
mov(sf.t[i], i);
|
||||
}
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
mov(ptr [rsp + 8 * 0], rax);
|
||||
mov(ptr [rsp + 8 * 1], rax);
|
||||
mov(ptr [rsp + 8 * 2], rax);
|
||||
mov(ptr [rsp + 8 * 3], rax);
|
||||
}
|
||||
|
||||
void gen11()
|
||||
{
|
||||
StackFrame sf(this, 0, UseRCX);
|
||||
xor_(rcx, rcx);
|
||||
mov(rax, 3);
|
||||
}
|
||||
|
||||
void gen12()
|
||||
{
|
||||
StackFrame sf(this, 4, UseRDX);
|
||||
xor_(rdx, rdx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
/*
|
||||
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
|
||||
*/
|
||||
void gen13()
|
||||
{
|
||||
StackFrame sf(this, 1, 13);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, sf.t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, sf.t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
same as gen13
|
||||
*/
|
||||
void gen14()
|
||||
{
|
||||
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
|
||||
Pack t = sf.t;
|
||||
t.append(rcx);
|
||||
t.append(rdx);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
return (1 << 15) - 1;
|
||||
*/
|
||||
void gen15()
|
||||
{
|
||||
StackFrame sf(this, 0, 14, 8);
|
||||
Pack t = sf.t;
|
||||
t.append(rax);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
mov(t[i], 1 << i);
|
||||
}
|
||||
mov(qword[rsp], 0);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
add(ptr[rsp], t[i]);
|
||||
}
|
||||
mov(rax, ptr[rsp]);
|
||||
}
|
||||
};
|
||||
|
||||
struct Code2 : Xbyak::CodeGenerator {
|
||||
Code2()
|
||||
: Xbyak::CodeGenerator(4096 * 32)
|
||||
{
|
||||
}
|
||||
void gen(int pNum, int tNum, int stackSizeByte)
|
||||
{
|
||||
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
||||
if (tNum & UseRCX) xor_(rcx, rcx);
|
||||
if (tNum & UseRDX) xor_(rdx, rdx);
|
||||
for (int i = 0, n = tNum & ~(UseRCX | UseRDX); i < n; i++) {
|
||||
mov(sf.t[i], 5);
|
||||
}
|
||||
for (int i = 0; i < stackSizeByte; i++) {
|
||||
mov(byte [rsp + i], 0);
|
||||
}
|
||||
mov(rax, 1);
|
||||
for (int i = 0; i < pNum; i++) {
|
||||
add(rax, sf.p[i]);
|
||||
}
|
||||
}
|
||||
void gen2(int pNum, int tNum, int stackSizeByte)
|
||||
{
|
||||
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
||||
mov(rax, rsp);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static int errNum = 0;
|
||||
void check(int x, int y)
|
||||
{
|
||||
if (x != y) {
|
||||
printf("err x=%d, y=%d\n", x, y);
|
||||
errNum++;
|
||||
}
|
||||
}
|
||||
|
||||
void verify(const uint8_t *f, int pNum)
|
||||
{
|
||||
switch (pNum) {
|
||||
case 0:
|
||||
check(1, reinterpret_cast<int (*)()>(f)());
|
||||
return;
|
||||
case 1:
|
||||
check(11, reinterpret_cast<int (*)(int)>(f)(10));
|
||||
return;
|
||||
case 2:
|
||||
check(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
|
||||
return;
|
||||
case 3:
|
||||
check(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
|
||||
return;
|
||||
case 4:
|
||||
check(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
||||
return;
|
||||
default:
|
||||
printf("ERR pNum=%d\n", pNum);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void testAll()
|
||||
{
|
||||
Code2 code;
|
||||
for (int stackSize = 0; stackSize < 32; stackSize += 7) {
|
||||
for (int pNum = 0; pNum < 4; pNum++) {
|
||||
for (int mode = 0; mode < 4; mode++) {
|
||||
int maxNum = 0;
|
||||
int opt = 0;
|
||||
if (mode == 0) {
|
||||
maxNum = 10;
|
||||
} else if (mode == 1) {
|
||||
maxNum = 9;
|
||||
opt = UseRCX;
|
||||
} else if (mode == 2) {
|
||||
maxNum = 9;
|
||||
opt = UseRDX;
|
||||
} else {
|
||||
maxNum = 8;
|
||||
opt = UseRCX | UseRDX;
|
||||
}
|
||||
for (int tNum = 0; tNum < maxNum; tNum++) {
|
||||
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
|
||||
const uint8_t *f = code.getCurr();
|
||||
code.gen(pNum, tNum | opt, stackSize);
|
||||
verify(f, pNum);
|
||||
/*
|
||||
check rsp is 16-byte aligned if stackSize > 0
|
||||
*/
|
||||
if (stackSize > 0) {
|
||||
Code2 c2;
|
||||
c2.gen2(pNum, tNum | opt, stackSize);
|
||||
uint64_t addr = c2.getCode<uint64_t (*)()>()();
|
||||
check(addr % 16, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void testPartial()
|
||||
{
|
||||
Code code;
|
||||
int (*f1)(int) = code.getCurr<int (*)(int)>();
|
||||
code.gen1();
|
||||
check(5, f1(5));
|
||||
|
||||
int (*f2)(int, int) = code.getCurr<int (*)(int, int)>();
|
||||
code.gen2();
|
||||
check(9, f2(3, 6));
|
||||
|
||||
int (*f3)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen3();
|
||||
check(14, f3(1, 4, 9));
|
||||
|
||||
int (*f4)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen4();
|
||||
check(30, f4(1, 4, 9, 16));
|
||||
|
||||
int (*f5)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen5();
|
||||
check(23, f5(2, 5, 7, 9));
|
||||
|
||||
int (*f6)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen6();
|
||||
check(18, f6(3, 4, 5, 6));
|
||||
|
||||
int (*f7)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen7();
|
||||
check(12, f7(3, 4, 5));
|
||||
|
||||
int (*f8)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen8();
|
||||
check(23, f8(5, 8, 10));
|
||||
|
||||
int (*f9)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen9();
|
||||
check(60, f9(10, 20, 30));
|
||||
|
||||
int (*f10)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen10();
|
||||
check(100, f10(10, 20, 30, 40));
|
||||
|
||||
int (*f11)() = code.getCurr<int (*)()>();
|
||||
code.gen11();
|
||||
check(3, f11());
|
||||
|
||||
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen12();
|
||||
check(24, f12(3, 5, 7, 9));
|
||||
|
||||
{
|
||||
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
|
||||
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen13();
|
||||
check(91, f13(tbl));
|
||||
|
||||
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen14();
|
||||
check(91, f14(tbl));
|
||||
}
|
||||
int (*f15)() = code.getCurr<int (*)()>();
|
||||
code.gen15();
|
||||
check((1 << 15) - 1, f15());
|
||||
}
|
||||
|
||||
void put(const Xbyak::util::Pack& p)
|
||||
{
|
||||
for (size_t i = 0, n = p.size(); i < n; i++) {
|
||||
printf("%s ", p[i].toString());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void verifyPack(const Xbyak::util::Pack& p, const int *tbl, size_t tblNum)
|
||||
{
|
||||
for (size_t i = 0; i < tblNum; i++) {
|
||||
check(p[i].getIdx(), tbl[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void testPack()
|
||||
{
|
||||
const int N = 10;
|
||||
Xbyak::Reg64 regTbl[N];
|
||||
for (int i = 0; i < N; i++) {
|
||||
regTbl[i] = Xbyak::Reg64(i);
|
||||
}
|
||||
Xbyak::util::Pack p(regTbl, N);
|
||||
const struct {
|
||||
int pos;
|
||||
int num;
|
||||
int tbl[10];
|
||||
} tbl[] = {
|
||||
{ 0, 10, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 1, 9, { 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 2, 8, { 2, 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 3, 7, { 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 4, 6, { 4, 5, 6, 7, 8, 9 } },
|
||||
{ 5, 5, { 5, 6, 7, 8, 9 } },
|
||||
{ 6, 4, { 6, 7, 8, 9 } },
|
||||
{ 7, 3, { 7, 8, 9 } },
|
||||
{ 8, 2, { 8, 9 } },
|
||||
{ 9, 1, { 9 } },
|
||||
{ 3, 5, { 3, 4, 5, 6, 7 } },
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(tbl) / sizeof(*tbl); i++) {
|
||||
const int pos = tbl[i].pos;
|
||||
const int num = tbl[i].num;
|
||||
verifyPack(p.sub(pos, num), tbl[i].tbl, num);
|
||||
if (pos + num == N) {
|
||||
verifyPack(p.sub(pos), tbl[i].tbl, num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
testAll();
|
||||
|
||||
testPartial();
|
||||
testPack();
|
||||
printf("errNum=%d\n", errNum);
|
||||
} catch (std::exception& e) {
|
||||
printf("err %s\n", e.what());
|
||||
return 1;
|
||||
} catch (...) {
|
||||
puts("ERR");
|
||||
return 1;
|
||||
}
|
||||
|
||||
BIN
externals/xbyak/test/state.pptx
vendored
Normal file
BIN
externals/xbyak/test/state.pptx
vendored
Normal file
Binary file not shown.
37
externals/xbyak/test/test_address.bat
vendored
Normal file
37
externals/xbyak/test/test_address.bat
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
@echo off
|
||||
set FILTER=grep -v warning
|
||||
if /i "%1"=="64" (
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
) else (
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
)
|
||||
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
|
||||
if /i "%1"=="64" (
|
||||
call :sub 1
|
||||
call :sub 2
|
||||
) else (
|
||||
call :sub 1
|
||||
)
|
||||
goto end
|
||||
|
||||
:sub
|
||||
echo cl address.cpp %OPT% %OPT2%
|
||||
cl address.cpp %OPT% %OPT2%
|
||||
address %1% > a.asm
|
||||
echo nasm -f %OPT3% -l a.lst a.asm
|
||||
nasm -f %OPT3% -l a.lst a.asm
|
||||
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
echo address %1% jit > nm.cpp
|
||||
address %1% jit > nm.cpp
|
||||
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
nm_frame > x.lst
|
||||
diff -w x.lst ok.lst
|
||||
wc x.lst
|
||||
|
||||
:end
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user