From b91882be42da1a6078d69ddab87e59a65c5e4e24 Mon Sep 17 00:00:00 2001 From: tevador <37503146+tevador@users.noreply.github.com> Date: Fri, 28 Jun 2019 19:50:41 +0200 Subject: [PATCH] Use cmake for building (#90) * Use cmake for building * CMakeLists.txt modified to support full standalone build * added support for ARCH=native builds * added PowerPC flags * added ARMv8 flags * check for x86 AES-NI at compile time --- .gitignore | 3 +- CMakeLists.txt | 113 ++++++++++++++++++++++-- README.md | 19 ++-- makefile | 200 ------------------------------------------ src/intrin_portable.h | 53 ++++++----- 5 files changed, 148 insertions(+), 240 deletions(-) delete mode 100644 makefile diff --git a/.gitignore b/.gitignore index dd437d1..ec94c2c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ obj/ .vs x64/ Release/ -Debug/ \ No newline at end of file +Debug/ +build/ diff --git a/CMakeLists.txt b/CMakeLists.txt index d926174..9af9741 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,31 +51,126 @@ src/virtual_machine.cpp src/vm_compiled_light.cpp src/blake2/blake2b.c) -if (NOT ARCH_ID) - set(ARCH_ID ${CMAKE_HOST_SYSTEM_PROCESSOR}) +if(NOT ARCH_ID) + # allow cross compiling + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "") + set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}) + endif() + string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" ARCH_ID) +endif() + +if(NOT ARM_ID) + set(ARM_ID "${ARCH_ID}") +endif() + +if(NOT ARCH) + set(ARCH "default") endif() if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) + message(STATUS "Setting default build type: ${CMAKE_BUILD_TYPE}") endif() +include(CheckCXXCompilerFlag) +include(CheckCCompilerFlag) + +function(add_flag flag) + string(REPLACE "-" "_" supported_cxx ${flag}_cxx) + check_cxx_compiler_flag(${flag} ${supported_cxx}) + if(${${supported_cxx}}) + message(STATUS "Setting CXX flag ${flag}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE) + endif() + string(REPLACE "-" "_" supported_c ${flag}_c) + check_c_compiler_flag(${flag} ${supported_c}) + if(${${supported_c}}) + message(STATUS "Setting C flag ${flag}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE) + endif() +endfunction() + +# x86-64 if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64") list(APPEND randomx_sources src/jit_compiler_x86_static.S src/jit_compiler_x86.cpp) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") + # cheat because cmake and ccache hate each other + set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) + + if(ARCH STREQUAL "native") + add_flag("-march=native") + else() + # default build has hardware AES enabled (software AES can be selected at runtime) + add_flag("-maes") + endif() +endif() + +# PowerPC +if (ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le") + if(ARCH STREQUAL "native") + add_flag("-mcpu=native") + endif() + # PowerPC AES requires ALTIVEC (POWER7+), so it cannot be enabled in the default build +endif() + +# ARMv8 +if (ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv8-a") + if(ARCH STREQUAL "native") + add_flag("-march=native") + else() + # default build has hardware AES enabled (software AES can be selected at runtime) + add_flag("-march=armv8-a+crypto") + endif() endif() set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path") add_library(randomx ${randomx_sources}) -target_link_libraries(randomx - PRIVATE - ${CMAKE_THREAD_LIBS_INIT}) set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET randomx PROPERTY CXX_STANDARD 11) +set_property(TARGET randomx PROPERTY CXX_STANDARD_REQUIRED ON) -# cheat because cmake and ccache hate each other -set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) +add_executable(randomx-tests + src/tests/tests.cpp) +target_link_libraries(randomx-tests + PRIVATE randomx) +set_property(TARGET randomx-tests PROPERTY POSITION_INDEPENDENT_CODE ON) +set_property(TARGET randomx-tests PROPERTY CXX_STANDARD 11) + +add_executable(randomx-codegen + src/tests/code-generator.cpp) +target_link_libraries(randomx-codegen + PRIVATE randomx) + +set_property(TARGET randomx-codegen PROPERTY POSITION_INDEPENDENT_CODE ON) +set_property(TARGET randomx-codegen PROPERTY CXX_STANDARD 11) + +if (NOT Threads_FOUND AND UNIX AND NOT APPLE) + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads) +endif() + +add_executable(randomx-benchmark + src/tests/benchmark.cpp + src/tests/affinity.cpp) +target_link_libraries(randomx-benchmark + PRIVATE randomx + PRIVATE ${CMAKE_THREAD_LIBS_INIT}) + +include(CheckCXXSourceCompiles) +check_cxx_source_compiles(" +#include +#include +int main() { + std::atomic a; + a.is_lock_free(); +}" HAVE_CXX_ATOMICS) + +if(NOT HAVE_CXX_ATOMICS) + target_link_libraries(randomx-benchmark + PRIVATE "atomic") +endif() +set_property(TARGET randomx-benchmark PROPERTY POSITION_INDEPENDENT_CODE ON) +set_property(TARGET randomx-benchmark PROPERTY CXX_STANDARD 11) diff --git a/README.md b/README.md index 89ae3e4..70f8d97 100644 --- a/README.md +++ b/README.md @@ -20,23 +20,28 @@ Design description and analysis is available in [design.md](doc/design.md). ## Build -RandomX is written in C++11 and builds a static library with a C API provided by header file [randomx.h](src/randomx.h). Minimal API usage example is provided in [api-example1.c](src/tests/api-example1.c). The reference code includes a `benchmark` executable for testing. +RandomX is written in C++11 and builds a static library with a C API provided by header file [randomx.h](src/randomx.h). Minimal API usage example is provided in [api-example1.c](src/tests/api-example1.c). The reference code includes a `randomx-benchmark` and `randomx-tests` executables for testing. ### Linux -Build dependencies: `make` and `gcc` (minimum version 4.8, but version 7+ is recommended). +Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended). -Build using the provided makefile. +To build optimized binaries for your machine, run: +``` +git clone https://github.com/tevador/RandomX.git +cd RandomX +mkdir build && cd build +cmake -DARCH=native .. +make +``` ### Windows -Build dependencies: Visual Studio 2017. - -A solution file is provided. +On Windows, it is possible to build using MinGW (same procedure as on Linux) or using Visual Studio 2017 (solution file is provided). ### Precompiled binaries -Precompiled `benchmark` binaries are available on the [Releases page](https://github.com/tevador/RandomX/releases). +Precompiled `randomx-benchmark` binaries are available on the [Releases page](https://github.com/tevador/RandomX/releases). ## Proof of work diff --git a/makefile b/makefile deleted file mode 100644 index 49913bc..0000000 --- a/makefile +++ /dev/null @@ -1,200 +0,0 @@ -#CXX=g++-8 -#CC=gcc-8 -AR=gcc-ar -PLATFORM=$(shell uname -m) -OS=$(shell uname -s) -CXXFLAGS=-std=c++11 -CCFLAGS=-std=c99 -ARFLAGS=rcs -BINDIR=bin -SRCDIR=src -TESTDIR=src/tests -OBJDIR=obj -LDFLAGS=-lpthread -RXA=$(BINDIR)/librandomx.a -BINARIES=$(RXA) $(BINDIR)/randomx-benchmark $(BINDIR)/randomx-generator $(BINDIR)/randomx-tests -RXOBJS=$(addprefix $(OBJDIR)/,aes_hash.o argon2_ref.o bytecode_machine.o dataset.o soft_aes.o virtual_memory.o vm_interpreted.o allocator.o assembly_generator_x86.o instruction.o randomx.o superscalar.o vm_compiled.o vm_interpreted_light.o argon2_core.o blake2_generator.o instructions_portable.o reciprocal.o virtual_machine.o vm_compiled_light.o blake2b.o) -ifeq ($(PLATFORM),amd64) - RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o) - CXXFLAGS += -maes -endif -ifeq ($(PLATFORM),x86_64) - RXOBJS += $(addprefix $(OBJDIR)/,jit_compiler_x86_static.o jit_compiler_x86.o) - CXXFLAGS += -maes -endif -ifeq ($(OS),Darwin) - AR=ar -endif - -ifeq ($(PLATFORM),ppc64) - CXXFLAGS += -mcpu=native -endif - -ifeq ($(PLATFORM),ppc64le) - CXXFLAGS += -mcpu=native -endif - -release: CXXFLAGS += -O3 -flto -release: CCFLAGS += -O3 -flto -release: LDFLAGS += -flto -release: $(BINARIES) - -native: CXXFLAGS += -march=native -O3 -flto -native: CCFLAGS += -march=native -O3 -flto -native: $(BINARIES) - -nolto: CXXFLAGS += -O3 -nolto: CCFLAGS += -O3 -nolto: $(BINARIES) - -debug: CXXFLAGS += -g -debug: CCFLAGS += -g -debug: LDFLAGS += -g -debug: $(BINARIES) - -profile: CXXFLAGS += -pg -profile: CCFLAGS += -pg -profile: LDFLAGS += -pg -profile: $(BINDIR)/randomx-benchmark - -test: CXXFLAGS += -O0 - -$(RXA): $(RXOBJS) | $(BINDIR) - $(AR) $(ARFLAGS) $@ $(RXOBJS) -$(OBJDIR): - mkdir $(OBJDIR) -$(BINDIR): - mkdir $(BINDIR) -$(OBJDIR)/affinity.o: $(TESTDIR)/affinity.cpp $(TESTDIR)/affinity.hpp - $(CXX) $(CXXFLAGS) -c $< -o $@ -$(OBJDIR)/benchmark.o: $(TESTDIR)/benchmark.cpp $(TESTDIR)/stopwatch.hpp \ - $(TESTDIR)/utility.hpp $(SRCDIR)/randomx.h $(SRCDIR)/blake2/endian.h $(TESTDIR)/affinity.hpp - $(CXX) $(CXXFLAGS) -pthread -c $< -o $@ -$(BINDIR)/randomx-benchmark: $(OBJDIR)/benchmark.o $(OBJDIR)/affinity.o $(RXA) - $(CXX) $(LDFLAGS) -pthread $< $(OBJDIR)/affinity.o $(RXA) -o $@ -$(OBJDIR)/code-generator.o: $(TESTDIR)/code-generator.cpp $(TESTDIR)/utility.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/assembly_generator_x86.hpp $(SRCDIR)/superscalar.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/aes_hash.hpp \ - $(SRCDIR)/blake2/blake2.h $(SRCDIR)/program.hpp - $(CXX) $(CXXFLAGS) -c $< -o $@ -$(BINDIR)/randomx-generator: $(OBJDIR)/code-generator.o $(RXA) - $(CXX) $(LDFLAGS) $< $(RXA) -o $@ -$(OBJDIR)/tests.o: $(TESTDIR)/tests.cpp $(TESTDIR)/utility.hpp \ - $(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h \ - $(SRCDIR)/randomx.h $(SRCDIR)/intrin_portable.h \ - $(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \ - $(SRCDIR)/dataset.hpp $(SRCDIR)/superscalar_program.hpp \ - $(SRCDIR)/allocator.hpp $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/superscalar.hpp \ - $(SRCDIR)/reciprocal.h $(SRCDIR)/jit_compiler.hpp \ - $(SRCDIR)/jit_compiler_x86.hpp - $(CXX) $(CXXFLAGS) -c $< -o $@ -$(BINDIR)/randomx-tests: $(OBJDIR)/tests.o $(RXA) - $(CXX) $(LDFLAGS) $< $(RXA) -o $@ -$(OBJDIR)/aes_hash.o: $(SRCDIR)/aes_hash.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h | $(OBJDIR) -$(OBJDIR)/argon2_ref.o: $(SRCDIR)/argon2_ref.c $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \ - $(SRCDIR)/blake2/blamka-round-ref.h $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2/blake2-impl.h \ - $(SRCDIR)/blake2/blake2.h -$(OBJDIR)/bytecode_machine.o: $(SRCDIR)/bytecode_machine.cpp $(SRCDIR)/bytecode_machine.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/instruction.hpp $(SRCDIR)/program.hpp \ - $(SRCDIR)/reciprocal.h -$(OBJDIR)/blake2b.o: $(SRCDIR)/blake2/blake2b.c $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h - $(CC) $(CCFLAGS) -c $< -o $@ -$(OBJDIR)/dataset.o: $(SRCDIR)/dataset.cpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp \ - $(SRCDIR)/allocator.hpp $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/superscalar.hpp \ - $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \ - $(SRCDIR)/intrin_portable.h -$(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compiler_x86.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/jit_compiler_x86_static.hpp $(SRCDIR)/superscalar.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/blake2_generator.hpp \ - $(SRCDIR)/program.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/virtual_memory.hpp \ - $(SRCDIR)/instruction_weights.hpp -$(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S $(SRCDIR)/configuration.h \ - $(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \ - $(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \ - $(SRCDIR)/asm/program_read_dataset_sshash_init.inc \ - $(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \ - $(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \ - $(SRCDIR)/asm/program_epilogue_store.inc $(SRCDIR)/asm/program_sshash_load.inc \ - $(SRCDIR)/asm/program_sshash_prefetch.inc $(SRCDIR)/asm/program_sshash_constants.inc \ - $(SRCDIR)/asm/randomx_reciprocal.inc -$(OBJDIR)/soft_aes.o: $(SRCDIR)/soft_aes.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h -$(OBJDIR)/virtual_memory.o: $(SRCDIR)/virtual_memory.cpp $(SRCDIR)/virtual_memory.hpp -$(OBJDIR)/vm_interpreted.o: $(SRCDIR)/vm_interpreted.cpp $(SRCDIR)/vm_interpreted.hpp \ - $(SRCDIR)/bytecode_machine.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/virtual_machine.hpp \ - $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/instruction_weights.hpp \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/reciprocal.h -$(OBJDIR)/allocator.o: $(SRCDIR)/allocator.cpp $(SRCDIR)/allocator.hpp $(SRCDIR)/intrin_portable.h \ - $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h -$(OBJDIR)/assembly_generator_x86.o: $(SRCDIR)/assembly_generator_x86.cpp \ - $(SRCDIR)/assembly_generator_x86.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/reciprocal.h $(SRCDIR)/program.hpp \ - $(SRCDIR)/instruction.hpp $(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp \ - $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/instruction_weights.hpp -$(OBJDIR)/instruction.o: $(SRCDIR)/instruction.cpp $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/common.hpp $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/instruction_weights.hpp -$(OBJDIR)/randomx.o: $(SRCDIR)/randomx.cpp $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/superscalar_program.hpp \ - $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \ - $(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_compiled.hpp \ - $(SRCDIR)/vm_compiled_light.hpp $(SRCDIR)/blake2/blake2.h $(SRCDIR)/bytecode_machine.hpp -$(OBJDIR)/superscalar.o: $(SRCDIR)/superscalar.cpp $(SRCDIR)/configuration.h $(SRCDIR)/program.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/randomx.h $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/superscalar.hpp $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/blake2_generator.hpp \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/reciprocal.h -$(OBJDIR)/vm_compiled.o: $(SRCDIR)/vm_compiled.cpp $(SRCDIR)/vm_compiled.hpp \ - $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h \ - $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp -$(OBJDIR)/vm_interpreted_light.o: $(SRCDIR)/vm_interpreted_light.cpp \ - $(SRCDIR)/vm_interpreted_light.hpp $(SRCDIR)/vm_interpreted.hpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp $(SRCDIR)/dataset.hpp \ - $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/jit_compiler_x86.hpp \ - $(SRCDIR)/bytecode_machine.hpp -$(OBJDIR)/argon2_core.o: $(SRCDIR)/argon2_core.c $(SRCDIR)/argon2_core.h $(SRCDIR)/argon2.h \ - $(SRCDIR)/blake2/blake2.h $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h -$(OBJDIR)/blake2_generator.o: $(SRCDIR)/blake2_generator.cpp $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2_generator.hpp -$(OBJDIR)/instructions_portable.o: $(SRCDIR)/instructions_portable.cpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/intrin_portable.h -$(OBJDIR)/reciprocal.o: $(SRCDIR)/reciprocal.c $(SRCDIR)/reciprocal.h -$(OBJDIR)/virtual_machine.o: $(SRCDIR)/virtual_machine.cpp $(SRCDIR)/virtual_machine.hpp \ - $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \ - $(SRCDIR)/program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/aes_hash.hpp $(SRCDIR)/blake2/blake2.h \ - $(SRCDIR)/intrin_portable.h $(SRCDIR)/allocator.hpp -$(OBJDIR)/vm_compiled_light.o: $(SRCDIR)/vm_compiled_light.cpp $(SRCDIR)/vm_compiled_light.hpp \ - $(SRCDIR)/vm_compiled.hpp $(SRCDIR)/virtual_machine.hpp $(SRCDIR)/common.hpp \ - $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/program.hpp \ - $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp $(SRCDIR)/allocator.hpp \ - $(SRCDIR)/dataset.hpp $(SRCDIR)/superscalar_program.hpp - -$(OBJDIR)/%.o: $(SRCDIR)/%.c - $(CC) $(CCFLAGS) -c $< -o $@ - -$(OBJDIR)/%.o: $(SRCDIR)/%.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -$(OBJDIR)/%.o: $(SRCDIR)/%.S - $(CXX) -x assembler-with-cpp -c $< -o $@ - -clean: - rm -f $(BINARIES) $(OBJDIR)/*.o diff --git a/src/intrin_portable.h b/src/intrin_portable.h index 14176d9..b4f1b50 100644 --- a/src/intrin_portable.h +++ b/src/intrin_portable.h @@ -53,6 +53,11 @@ constexpr int RoundToZero = 3; #define __SSE2__ 1 #endif +//MSVC doesn't define __AES__ +#if defined(_MSC_VER) && defined(__SSE2__) +#define __AES__ +#endif + //the library "sqrt" function provided by MSVC for x86 targets doesn't give //the correct results, so we have to use inline assembly to call x87 fsqrt directly #if !defined(__SSE2__) @@ -121,9 +126,16 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { #define rx_xor_vec_f128 _mm_xor_pd #define rx_and_vec_f128 _mm_and_pd #define rx_or_vec_f128 _mm_or_pd + +#ifdef __AES__ + #define rx_aesenc_vec_i128 _mm_aesenc_si128 #define rx_aesdec_vec_i128 _mm_aesdec_si128 +#define HAVE_AES + +#endif //__AES__ + FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { return _mm_cvtsi128_si32(a); } @@ -164,7 +176,7 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) { #include #include #include -#include +#include #undef vector #undef pixel #undef bool @@ -190,7 +202,6 @@ typedef union{ #define rx_aligned_free(a) free(a) #define rx_prefetch_nta(x) - /* Splat 64-bit long long to 2 64-bit long longs */ FORCE_INLINE __m128i vec_splat2sd (int64_t scalar) { return (__m128i) vec_splats (scalar); } @@ -268,6 +279,7 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { return (rx_vec_f128)vec_or(a,b); } + #if defined(__CRYPTO__) FORCE_INLINE __m128ll vrev(__m128i v){ @@ -291,18 +303,9 @@ FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { __m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero)); return (rx_vec_i128)vec_xor((__m128i)out,rkey); } -#else -static const char* platformError = "Platform doesn't support hardware AES"; - -FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - throw std::runtime_error(platformError); -} - -FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - throw std::runtime_error(platformError); -} -#endif +#define HAVE_AES +#endif //__CRYPTO__ FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { vec_u _a; @@ -506,16 +509,6 @@ FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { return x; } -static const char* platformError = "Platform doesn't support hardware AES"; - -FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - throw std::runtime_error(platformError); -} - -FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - throw std::runtime_error(platformError); -} - FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { return a.u32[0]; } @@ -591,6 +584,20 @@ void rx_set_rounding_mode(uint32_t mode); #endif +#ifndef HAVE_AES +static const char* platformError = "Platform doesn't support hardware AES"; + +#include + +FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { + throw std::runtime_error(platformError); +} + +FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { + throw std::runtime_error(platformError); +} +#endif + double loadDoublePortable(const void* addr); uint64_t mulh(uint64_t, uint64_t); int64_t smulh(int64_t, int64_t);