diff --git a/makefile b/makefile index 8dcefed..c3694f3 100644 --- a/makefile +++ b/makefile @@ -88,7 +88,7 @@ $(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compi $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/blake2_generator.hpp \ $(SRCDIR)/program.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/virtual_memory.hpp \ $(SRCDIR)/instruction_weights.hpp -$(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S \ +$(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S $(SRCDIR)/configuration.h \ $(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \ $(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \ $(SRCDIR)/asm/program_read_dataset_sshash_init.inc \ diff --git a/src/asm/configuration.asm b/src/asm/configuration.asm new file mode 100644 index 0000000..47feeed --- /dev/null +++ b/src/asm/configuration.asm @@ -0,0 +1,49 @@ +; File start: ..\src\configuration.h +RANDOMX_ARGON_MEMORY EQU 262144t +RANDOMX_ARGON_ITERATIONS EQU 3t +RANDOMX_ARGON_LANES EQU 1t +RANDOMX_ARGON_SALT TEXTEQU <"RandomX\x03"> +RANDOMX_CACHE_ACCESSES EQU 8t +RANDOMX_SUPERSCALAR_LATENCY EQU 170t +RANDOMX_SUPERSCALAR_MAX_SIZE EQU 512t +RANDOMX_DATASET_BASE_SIZE EQU 2147483648t +RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t +RANDOMX_PROGRAM_SIZE EQU 256t +RANDOMX_PROGRAM_ITERATIONS EQU 2048t +RANDOMX_PROGRAM_COUNT EQU 8t +RANDOMX_SCRATCHPAD_L3 EQU 2097152t +RANDOMX_SCRATCHPAD_L2 EQU 262144t +RANDOMX_SCRATCHPAD_L1 EQU 16384t +RANDOMX_JUMP_BITS EQU 8t +RANDOMX_JUMP_OFFSET EQU 8t +RANDOMX_FREQ_IADD_RS EQU 25t +RANDOMX_FREQ_IADD_M EQU 7t +RANDOMX_FREQ_ISUB_R EQU 16t +RANDOMX_FREQ_ISUB_M EQU 7t +RANDOMX_FREQ_IMUL_R EQU 16t +RANDOMX_FREQ_IMUL_M EQU 4t +RANDOMX_FREQ_IMULH_R EQU 4t +RANDOMX_FREQ_IMULH_M EQU 1t +RANDOMX_FREQ_ISMULH_R EQU 4t +RANDOMX_FREQ_ISMULH_M EQU 1t +RANDOMX_FREQ_IMUL_RCP EQU 8t +RANDOMX_FREQ_INEG_R EQU 2t +RANDOMX_FREQ_IXOR_R EQU 15t +RANDOMX_FREQ_IXOR_M EQU 5t +RANDOMX_FREQ_IROR_R EQU 10t +RANDOMX_FREQ_IROL_R EQU 0t +RANDOMX_FREQ_ISWAP_R EQU 4t +RANDOMX_FREQ_FSWAP_R EQU 8t +RANDOMX_FREQ_FADD_R EQU 20t +RANDOMX_FREQ_FADD_M EQU 5t +RANDOMX_FREQ_FSUB_R EQU 20t +RANDOMX_FREQ_FSUB_M EQU 5t +RANDOMX_FREQ_FSCAL_R EQU 6t +RANDOMX_FREQ_FMUL_R EQU 20t +RANDOMX_FREQ_FDIV_M EQU 4t +RANDOMX_FREQ_FSQRT_R EQU 6t +RANDOMX_FREQ_CBRANCH EQU 16t +RANDOMX_FREQ_CFROUND EQU 1t +RANDOMX_FREQ_ISTORE EQU 16t +RANDOMX_FREQ_NOP EQU 0t +; File end: ..\src\configuration.h diff --git a/src/asm/program_loop_load.inc b/src/asm/program_loop_load.inc index 6ef67ec..374af66 100644 --- a/src/asm/program_loop_load.inc +++ b/src/asm/program_loop_load.inc @@ -1,5 +1,5 @@ mov rdx, rax - and eax, 2097088 + and eax, RANDOMX_SCRATCHPAD_MASK lea rcx, [rsi+rax] push rcx xor r8, qword ptr [rcx+0] @@ -11,7 +11,7 @@ xor r14, qword ptr [rcx+48] xor r15, qword ptr [rcx+56] ror rdx, 32 - and edx, 2097088 + and edx, RANDOMX_SCRATCHPAD_MASK lea rcx, [rsi+rdx] push rcx cvtdq2pd xmm0, qword ptr [rcx+0] diff --git a/src/asm/program_read_dataset.inc b/src/asm/program_read_dataset.inc index 4cdabb9..b81d0c3 100644 --- a/src/asm/program_read_dataset.inc +++ b/src/asm/program_read_dataset.inc @@ -1,10 +1,10 @@ xor rbp, rax ;# modify "mx" mov edx, ebp ;# edx = mx - and edx, 2147483584 ;# align "mx" to the start of a cache line + and edx, RANDOMX_DATASET_BASE_MASK prefetchnta byte ptr [rdi+rdx] ror rbp, 32 ;# swap "ma" and "mx" mov edx, ebp ;# edx = ma - and edx, 2147483584 ;# align "ma" to the start of a cache line + and edx, RANDOMX_DATASET_BASE_MASK lea rcx, [rdi+rdx] ;# dataset cache line xor r8, qword ptr [rcx+0] xor r9, qword ptr [rcx+8] diff --git a/src/asm/program_read_dataset_sshash_init.inc b/src/asm/program_read_dataset_sshash_init.inc index 0145f08..6fe9525 100644 --- a/src/asm/program_read_dataset_sshash_init.inc +++ b/src/asm/program_read_dataset_sshash_init.inc @@ -11,7 +11,7 @@ xor rbp, rax ;# modify "mx" ror rbp, 32 ;# swap "ma" and "mx" mov ebx, ebp ;# ecx = ma - and ebx, 2147483584 ;# align "ma" to the start of a cache line + and ebx, RANDOMX_DATASET_BASE_MASK shr ebx, 6 ;# ebx = Dataset block number ;# add ebx, datasetOffset / 64 ;# call 32768 \ No newline at end of file diff --git a/src/asm/program_sshash_prefetch.inc b/src/asm/program_sshash_prefetch.inc index 96ec35a..26efb51 100644 --- a/src/asm/program_sshash_prefetch.inc +++ b/src/asm/program_sshash_prefetch.inc @@ -1,4 +1,4 @@ - and rbx, 4194303 + and rbx, RANDOMX_CACHE_MASK shl rbx, 6 add rbx, rdi prefetchnta byte ptr [rbx] \ No newline at end of file diff --git a/src/configuration.h b/src/configuration.h index c77ebb2..ba1f78d 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -20,7 +20,7 @@ along with RandomX. If not, see. #pragma once //Cache size in KiB. Must be a power of 2. -#define RANDOMX_ARGON_MEMORY (256 * 1024) +#define RANDOMX_ARGON_MEMORY 262144 //Number of Argon2d iterations for Cache initialization #define RANDOMX_ARGON_ITERATIONS 3 @@ -41,7 +41,7 @@ along with RandomX. If not, see. #define RANDOMX_SUPERSCALAR_MAX_SIZE 512 //Dataset base size in bytes. Must be a power of 2. -#define RANDOMX_DATASET_BASE_SIZE (2ULL * 1024 * 1024 * 1024) +#define RANDOMX_DATASET_BASE_SIZE 2147483648 //Dataset extra size. Must be divisible by 64. #define RANDOMX_DATASET_EXTRA_SIZE 33554368 @@ -56,13 +56,13 @@ along with RandomX. If not, see. #define RANDOMX_PROGRAM_COUNT 8 //Scratchpad L3 size in bytes. Must be a power of 2. -#define RANDOMX_SCRATCHPAD_L3 (2 * 1024 * 1024) +#define RANDOMX_SCRATCHPAD_L3 2097152 //Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3. -#define RANDOMX_SCRATCHPAD_L2 (256 * 1024) +#define RANDOMX_SCRATCHPAD_L2 262144 //Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2. -#define RANDOMX_SCRATCHPAD_L1 (16 * 1024) +#define RANDOMX_SCRATCHPAD_L1 16384 //Jump condition mask size in bits. #define RANDOMX_JUMP_BITS 8 diff --git a/src/jit_compiler_x86_static.S b/src/jit_compiler_x86_static.S index 3254c4c..6022201 100644 --- a/src/jit_compiler_x86_static.S +++ b/src/jit_compiler_x86_static.S @@ -44,6 +44,12 @@ .global DECL(randomx_program_end) .global DECL(randomx_reciprocal_fast) +#include "configuration.h" + +#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64) +#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64) +#define RANDOMX_CACHE_MASK (RANDOMX_ARGON_MEMORY*16-1) + #define db .byte .balign 64 diff --git a/src/jit_compiler_x86_static.asm b/src/jit_compiler_x86_static.asm index d515828..51dd940 100644 --- a/src/jit_compiler_x86_static.asm +++ b/src/jit_compiler_x86_static.asm @@ -37,6 +37,12 @@ PUBLIC randomx_sshash_init PUBLIC randomx_program_end PUBLIC randomx_reciprocal_fast +include asm/configuration.asm + +RANDOMX_SCRATCHPAD_MASK EQU (RANDOMX_SCRATCHPAD_L3-64) +RANDOMX_DATASET_BASE_MASK EQU (RANDOMX_DATASET_BASE_SIZE-64) +RANDOMX_CACHE_MASK EQU (RANDOMX_ARGON_MEMORY*16-1) + ALIGN 64 randomx_program_prologue PROC include asm/program_prologue_win64.inc diff --git a/vcxproj/h2inc.ps1 b/vcxproj/h2inc.ps1 new file mode 100644 index 0000000..ded47b8 --- /dev/null +++ b/vcxproj/h2inc.ps1 @@ -0,0 +1,90 @@ +# The MIT License (MIT) +# +# Copyright (c) .NET Foundation and Contributors +# +# All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# C to MASM include file translator +# This is replacement for the deprecated h2inc tool that used to be part of VS. + +# +# The use of [console]::WriteLine (instead of Write-Output) is intentional. +# PowerShell 2.0 (installed by default on Windows 7) wraps lines written with +# Write-Output at whatever column width is being used by the current terminal, +# even when output is being redirected to a file. We can't have this behavior +# because it will cause the generated file to be malformed. +# + +Function ProcessFile($filePath) { + + [console]::WriteLine("; File start: $filePath") + + Get-Content $filePath | ForEach-Object { + + if ($_ -match "^\s*#\spragma") { + # Ignore pragmas + return + } + + if ($_ -match "^\s*#\s*include\s*`"(.*)`"") + { + # Expand includes. + ProcessFile(Join-Path (Split-Path -Parent $filePath) $Matches[1]) + return + } + + if ($_ -match "^\s*#define\s+(\S+)\s*(.*)") + { + # Augment #defines with their MASM equivalent + $name = $Matches[1] + $value = $Matches[2] + + # Note that we do not handle multiline constants + + # Strip comments from value + $value = $value -replace "//.*", "" + $value = $value -replace "/\*.*\*/", "" + + # Strip whitespaces from value + $value = $value -replace "\s+$", "" + + # ignore #defines with arguments + if ($name -notmatch "\(") { + $HEX_NUMBER_PATTERN = "\b0x(\w+)\b" + $DECIMAL_NUMBER_PATTERN = "(-?\b\d+\b)" + + if ($value -match $HEX_NUMBER_PATTERN -or $value -match $DECIMAL_NUMBER_PATTERN) { + $value = $value -replace $HEX_NUMBER_PATTERN, "0`$1h" # Convert hex constants + $value = $value -replace $DECIMAL_NUMBER_PATTERN, "`$1t" # Convert dec constants + [console]::WriteLine("$name EQU $value") + } else { + [console]::WriteLine("$name TEXTEQU <$value>") + } + } + } + + # [console]::WriteLine("$_") + } + + [console]::WriteLine("; File end: $filePath") +} + +ProcessFile $args[0] diff --git a/vcxproj/randomx.vcxproj b/vcxproj/randomx.vcxproj index 218975a..a5de676 100644 --- a/vcxproj/randomx.vcxproj +++ b/vcxproj/randomx.vcxproj @@ -122,6 +122,10 @@ 4194304 + + powershell -ExecutionPolicy Bypass -File .\h2inc.ps1 ..\src\configuration.h > ..\src\asm\configuration.asm +SET ERRORLEVEL = 0 +