From a560cec3e713bab08c81edf13a10291a685f4b40 Mon Sep 17 00:00:00 2001 From: tevador Date: Fri, 10 May 2019 16:43:24 +0200 Subject: [PATCH] Clarifications/corrections in the specification/comments Removed some unused code --- doc/specs.md | 10 +++++----- src/assembly_generator_x86.hpp | 1 - src/jit_compiler_x86.cpp | 14 ++++++-------- src/jit_compiler_x86.hpp | 2 +- src/reciprocal.c | 1 + 5 files changed, 13 insertions(+), 15 deletions(-) diff --git a/doc/specs.md b/doc/specs.md index ffdd9f8..1eaecd3 100644 --- a/doc/specs.md +++ b/doc/specs.md @@ -86,7 +86,7 @@ and outputs a 256-bit result `R`. The algorithm consists of the following steps: -1. The Dataset is initialized using the key value `K` (see chapter 6 for details). +1. The Dataset is initialized using the key value `K` (see chapter 7 for details). 1. 64-byte seed `S` is calculated as `S = Hash512(H)`. 1. AesGenerator is initialized with state `S`. 1. The Scratchpad is filled with `RANDOMX_SCRATCHPAD_L3` random bytes obtained from the AesGenerator. @@ -383,8 +383,8 @@ The loop described below is repeated until the value of the `ic` register reache 3. `spAddr1` is used to perform a 64-byte aligned read from Scratchpad level 3 (using mask from Table 4.2.1). Each floating point register `f0`-`f3` and `e0`-`e3` is initialized using an 8-byte value according to the conversion rules from chapters 4.3.1 and 4.3.2. 4. The 256 instructions stored in the Program Buffer are executed. 5. The `mx` register is XORed with the low 32 bits of registers `readReg2` and `readReg3` (see Table 4.5.3). -6. A 64-byte memory block at address `datasetOffset + mx` is prefetched from the Dataset (this has no effect on the VM state). -7. A 64-byte memory block at address `datasetOffset + ma` is loaded from the Dataset. The 64 bytes are XORed with all integer registers in order `r0`-`r7`. +6. A 64-byte Dataset item at address `datasetOffset + mx % RANDOMX_DATASET_BASE_SIZE` is prefetched from the Dataset (it will be used during the next iteration). +7. A 64-byte Dataset item at address `datasetOffset + ma % RANDOMX_DATASET_BASE_SIZE` is loaded from the Dataset. The 64 bytes are XORed with all integer registers in order `r0`-`r7`. 8. The values of registers `mx` and `ma` are swapped. 9. The values of all integer registers `r0`-`r7` are written to the Scratchpad (L3) at address `spAddr1` (64-byte aligned). 10. Register `f0` is XORed with register `e0` and the result is stored in register `f0`. Register `f1` is XORed with register `e1` and the result is stored in register `f1`. Register `f2` is XORed with register `e2` and the result is stored in register `f2`. Register `f3` is XORed with register `e3` and the result is stored in register `f3`. @@ -621,8 +621,8 @@ A register is considered as modified by an instruction in the following cases: There are 3 rules for the selection of the `creg` register, evaluated in this order: 1. The register with the lowest value of `lastUsed` tag is selected. -1. In case multiple registers have the same value of the `lastUsed` tag, the register with the lowest value of the `count` tag is selected. -1. In case multiple registers have the same values of both `lastUsed` and `count` tags, a register with the lowest index is selected (`r0` before `r1` etc.). +1. In case multiple registers have the same value of the `lastUsed` tag, the register with the lowest value of the `count` tag is selected from them. +1. In case multiple registers have the same values of both `lastUsed` and `count` tags, the register with the lowest index is selected (`r0` before `r1` etc.) from them. Whenever a register is selected as the operand of a CBRANCH instruction, its `count` tag is increased by 1. diff --git a/src/assembly_generator_x86.hpp b/src/assembly_generator_x86.hpp index a7c8556..72e9c8e 100644 --- a/src/assembly_generator_x86.hpp +++ b/src/assembly_generator_x86.hpp @@ -58,7 +58,6 @@ namespace randomx { void h_ISMULH_R(Instruction&, int); void h_ISMULH_M(Instruction&, int); void h_IMUL_RCP(Instruction&, int); - void h_ISDIV_C(Instruction&, int); void h_INEG_R(Instruction&, int); void h_IXOR_R(Instruction&, int); void h_IXOR_M(Instruction&, int); diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index 4918fc5..9a1c412 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -72,7 +72,7 @@ namespace randomx { REGISTER ALLOCATION: ; rax -> temporary - ; rbx -> loop counter "lc" + ; rbx -> iteration counter "ic" ; rcx -> temporary ; rdx -> temporary ; rsi -> scratchpad pointer @@ -100,9 +100,9 @@ namespace randomx { ; xmm10 -> "a2" ; xmm11 -> "a3" ; xmm12 -> temporary - ; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff - ; xmm14 -> exponent 2**-240 = 0x30f00000000xxxxx30f00000000xxxxx - ; xmm15 -> scale mask = 0x81f000000000000081f0000000000000 + ; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff + ; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000****** + ; xmm15 -> scale mask = 0x81f000000000000081f0000000000000 */ @@ -444,7 +444,7 @@ namespace randomx { emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); } - void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) { + void JitCompilerX86::genAddressRegDst(Instruction& instr) { emit(LEA_32); emitByte(0x80 + instr.dst); if (instr.dst == RegisterNeedsSib) { @@ -453,9 +453,7 @@ namespace randomx { emit32(instr.getImm32()); emitByte(AND_EAX_I); if (instr.getModCond() < StoreL3Condition) { - int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask; - int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask; - emit32(instr.getModMem() ? maskL1 : maskL2); + emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); } else { emit32(ScratchpadL3Mask); diff --git a/src/jit_compiler_x86.hpp b/src/jit_compiler_x86.hpp index d814281..5287587 100644 --- a/src/jit_compiler_x86.hpp +++ b/src/jit_compiler_x86.hpp @@ -65,7 +65,7 @@ namespace randomx { void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&); void genAddressReg(Instruction&, bool); - void genAddressRegDst(Instruction&, bool); + void genAddressRegDst(Instruction&); void genAddressImm(Instruction&); void genSIB(int scale, int index, int base); diff --git a/src/reciprocal.c b/src/reciprocal.c index ebe02ee..12da813 100644 --- a/src/reciprocal.c +++ b/src/reciprocal.c @@ -21,6 +21,7 @@ along with RandomX. If not, see. /* Calculates rcp = 2**x / divisor for highest integer x such that rcp < 2**64. + divisor must not be 0 or a power of 2 Equivalent x86 assembly (divisor in rcx):