mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Optimized Argon2 (SSSE3/AVX2)
This commit is contained in:
parent
298cc77095
commit
900a936816
17 changed files with 886 additions and 255 deletions
|
@ -31,6 +31,8 @@ cmake_minimum_required(VERSION 2.8.7)
|
|||
set (randomx_sources
|
||||
src/aes_hash.cpp
|
||||
src/argon2_ref.c
|
||||
src/argon2_sse3.c
|
||||
src/argon2_avx2.c
|
||||
src/bytecode_machine.cpp
|
||||
src/dataset.cpp
|
||||
src/soft_aes.cpp
|
||||
|
@ -103,6 +105,14 @@ if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "
|
|||
else()
|
||||
# default build has hardware AES enabled (software AES can be selected at runtime)
|
||||
add_flag("-maes")
|
||||
check_c_compiler_flag(-mssse3 HAVE_SSSE3)
|
||||
if(HAVE_SSSE3)
|
||||
set_source_files_properties(src/argon2_sse3.c COMPILE_FLAGS -mssse3)
|
||||
endif()
|
||||
check_c_compiler_flag(-mavx2 HAVE_AVX2)
|
||||
if(HAVE_AVX2)
|
||||
set_source_files_properties(src/argon2_avx2.c COMPILE_FLAGS -mavx2)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
32
src/argon2.h
32
src/argon2.h
|
@ -227,3 +227,35 @@ typedef enum Argon2_version {
|
|||
ARGON2_VERSION_13 = 0x13,
|
||||
ARGON2_VERSION_NUMBER = ARGON2_VERSION_13
|
||||
} argon2_version;
|
||||
|
||||
//Argon2 instance - forward declaration
|
||||
typedef struct Argon2_instance_t argon2_instance_t;
|
||||
|
||||
//Argon2 position = forward declaration
|
||||
typedef struct Argon2_position_t argon2_position_t;
|
||||
|
||||
//Argon2 implementation function
|
||||
typedef void randomx_argon2_impl(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
* threads
|
||||
* @param context current context
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void randomx_argon2_fill_segment_ref(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
|
||||
randomx_argon2_impl *randomx_argon2_impl_sse3();
|
||||
randomx_argon2_impl *randomx_argon2_impl_avx2();
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
|
174
src/argon2_avx2.c
Normal file
174
src/argon2_avx2.c
Normal file
|
@ -0,0 +1,174 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "argon2.h"
|
||||
|
||||
void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
|
||||
randomx_argon2_impl* randomx_argon2_impl_avx2() {
|
||||
#if defined(__AVX2__)
|
||||
return &randomx_argon2_fill_segment_avx2;
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include "argon2_core.h"
|
||||
|
||||
#include "blake2/blamka-round-avx2.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
#include "blake2/blake2.h"
|
||||
|
||||
static void fill_block(__m256i* state, const block* ref_block,
|
||||
block* next_block, int with_xor) {
|
||||
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
|
||||
block_XY[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i*)next_block->v + i));
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
|
||||
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
BLAKE2_ROUND_2(state[0 + i], state[4 + i], state[8 + i], state[12 + i],
|
||||
state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
|
||||
_mm256_storeu_si256((__m256i*)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
|
||||
argon2_position_t position) {
|
||||
block* ref_block = NULL, * curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index, i;
|
||||
__m256i state[ARGON2_HWORDS_IN_BLOCK];
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * instance->lane_length +
|
||||
position.slice * instance->segment_length + starting_index;
|
||||
|
||||
if (0 == curr_offset % instance->lane_length) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + instance->lane_length - 1;
|
||||
}
|
||||
else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
if (ARGON2_VERSION_10 == instance->version) {
|
||||
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
if (0 == position.pass) {
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
fill_block(state, ref_block, curr_block, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -70,18 +70,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
/***************Instance and Position constructors**********/
|
||||
void rxa2_init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
|
||||
void rxa2_copy_block(block *dst, const block *src) {
|
||||
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
void rxa2_xor_block(block *dst, const block *src) {
|
||||
int i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] ^= src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void load_block(block *dst, const void *input) {
|
||||
unsigned i;
|
||||
|
@ -97,69 +85,7 @@ static void store_block(void *output, const block *src) {
|
|||
}
|
||||
}
|
||||
|
||||
/***************Memory functions*****************/
|
||||
|
||||
int rxa2_allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
size_t num, size_t size) {
|
||||
size_t memory_size = num * size;
|
||||
if (memory == NULL) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
/* 1. Check for multiplication overflow */
|
||||
if (size != 0 && memory_size / size != num) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
/* 2. Try to allocate with appropriate allocator */
|
||||
if (context->allocate_cbk) {
|
||||
(context->allocate_cbk)(memory, memory_size);
|
||||
}
|
||||
else {
|
||||
*memory = (uint8_t*)malloc(memory_size);
|
||||
}
|
||||
|
||||
if (*memory == NULL) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
void rxa2_free_memory(const argon2_context *context, uint8_t *memory,
|
||||
size_t num, size_t size) {
|
||||
size_t memory_size = num * size;
|
||||
rxa2_clear_internal_memory(memory, memory_size);
|
||||
if (context->free_cbk) {
|
||||
(context->free_cbk)(memory, memory_size);
|
||||
}
|
||||
else {
|
||||
free(memory);
|
||||
}
|
||||
}
|
||||
|
||||
void NOT_OPTIMIZED rxa2_secure_wipe_memory(void *v, size_t n) {
|
||||
#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER)
|
||||
SecureZeroMemory(v, n);
|
||||
#elif defined memset_s
|
||||
memset_s(v, n, 0, n);
|
||||
#elif defined(__OpenBSD__)
|
||||
explicit_bzero(v, n);
|
||||
#else
|
||||
static void *(*const volatile memset_sec)(void *, int, size_t) = &memset;
|
||||
memset_sec(v, 0, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Memory clear flag defaults to true. */
|
||||
#define FLAG_clear_internal_memory 0
|
||||
void rxa2_clear_internal_memory(void *v, size_t n) {
|
||||
if (FLAG_clear_internal_memory && v) {
|
||||
rxa2_secure_wipe_memory(v, n);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t rxa2_index_alpha(const argon2_instance_t *instance,
|
||||
uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane) {
|
||||
/*
|
||||
|
@ -241,24 +167,22 @@ static int fill_memory_blocks_st(argon2_instance_t *instance) {
|
|||
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
argon2_position_t position = { r, l, (uint8_t)s, 0 };
|
||||
rxa2_fill_segment(instance, position);
|
||||
//fill the segment using the selected implementation
|
||||
instance->impl(instance, position);
|
||||
}
|
||||
}
|
||||
#ifdef GENKAT
|
||||
internal_kat(instance, r); /* Print all memory blocks */
|
||||
#endif
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int rxa2_fill_memory_blocks(argon2_instance_t *instance) {
|
||||
int randomx_argon2_fill_memory_blocks(argon2_instance_t *instance) {
|
||||
if (instance == NULL || instance->lanes == 0) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
return fill_memory_blocks_st(instance);
|
||||
}
|
||||
|
||||
int rxa2_validate_inputs(const argon2_context *context) {
|
||||
int randomx_argon2_validate_inputs(const argon2_context *context) {
|
||||
if (NULL == context) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
|
@ -394,7 +318,6 @@ void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instanc
|
|||
load_block(&instance->memory[l * instance->lane_length + 1],
|
||||
blockhash_bytes);
|
||||
}
|
||||
rxa2_clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) {
|
||||
|
@ -431,11 +354,6 @@ void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type
|
|||
if (context->pwd != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
|
||||
context->pwdlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
|
||||
rxa2_secure_wipe_memory(context->pwd, context->pwdlen);
|
||||
context->pwdlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
store32(&value, context->saltlen);
|
||||
|
@ -451,11 +369,6 @@ void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type
|
|||
if (context->secret != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
|
||||
context->secretlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
|
||||
rxa2_secure_wipe_memory(context->secret, context->secretlen);
|
||||
context->secretlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
store32(&value, context->adlen);
|
||||
|
@ -469,7 +382,7 @@ void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type
|
|||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
int rxa2_argon_initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
||||
int result = ARGON2_OK;
|
||||
|
||||
|
@ -478,10 +391,7 @@ int rxa2_argon_initialize(argon2_instance_t *instance, argon2_context *context)
|
|||
instance->context_ptr = context;
|
||||
|
||||
/* 1. Memory allocation */
|
||||
/*result = allocate_memory(context, (uint8_t **)&(instance->memory), instance->memory_blocks, sizeof(block));
|
||||
if (result != ARGON2_OK) {
|
||||
return result;
|
||||
}*/
|
||||
//RandomX takes care of memory allocation
|
||||
|
||||
/* 2. Initial hashing */
|
||||
/* H_0 + 8 extra bytes to produce the first blocks */
|
||||
|
@ -489,15 +399,13 @@ int rxa2_argon_initialize(argon2_instance_t *instance, argon2_context *context)
|
|||
/* Hashing all inputs */
|
||||
rxa2_initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
rxa2_clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
/*rxa2_clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);*/
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
rxa2_fill_first_blocks(blockhash, instance);
|
||||
/* Clearing the hash */
|
||||
rxa2_clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
|
|
@ -73,17 +73,6 @@ enum argon2_core_constants {
|
|||
*/
|
||||
typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
|
||||
|
||||
/*****************Functions that work with the block******************/
|
||||
|
||||
/* Initialize each byte of the block with @in */
|
||||
void rxa2_init_block_value(block *b, uint8_t in);
|
||||
|
||||
/* Copy block @src to block @dst */
|
||||
void rxa2_copy_block(block *dst, const block *src);
|
||||
|
||||
/* XOR @src onto @dst bytewise */
|
||||
void rxa2_xor_block(block *dst, const block *src);
|
||||
|
||||
/*
|
||||
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
|
||||
* and derived values.
|
||||
|
@ -102,6 +91,7 @@ typedef struct Argon2_instance_t {
|
|||
argon2_type type;
|
||||
int print_internals; /* whether to print the memory blocks */
|
||||
argon2_context *context_ptr; /* points back to original context */
|
||||
randomx_argon2_impl *impl;
|
||||
} argon2_instance_t;
|
||||
|
||||
/*
|
||||
|
@ -123,42 +113,6 @@ typedef struct Argon2_thread_data {
|
|||
|
||||
/*************************Argon2 core functions********************************/
|
||||
|
||||
/* Allocates memory to the given pointer, uses the appropriate allocator as
|
||||
* specified in the context. Total allocated memory is num*size.
|
||||
* @param context argon2_context which specifies the allocator
|
||||
* @param memory pointer to the pointer to the memory
|
||||
* @param size the size in bytes for each element to be allocated
|
||||
* @param num the number of elements to be allocated
|
||||
* @return ARGON2_OK if @memory is a valid pointer and memory is allocated
|
||||
*/
|
||||
int rxa2_allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
size_t num, size_t size);
|
||||
|
||||
/*
|
||||
* Frees memory at the given pointer, uses the appropriate deallocator as
|
||||
* specified in the context. Also cleans the memory using clear_internal_memory.
|
||||
* @param context argon2_context which specifies the deallocator
|
||||
* @param memory pointer to buffer to be freed
|
||||
* @param size the size in bytes for each element to be deallocated
|
||||
* @param num the number of elements to be deallocated
|
||||
*/
|
||||
void rxa2_free_memory(const argon2_context *context, uint8_t *memory,
|
||||
size_t num, size_t size);
|
||||
|
||||
/* Function that securely cleans the memory. This ignores any flags set
|
||||
* regarding clearing memory. Usually one just calls clear_internal_memory.
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void rxa2_secure_wipe_memory(void *v, size_t n);
|
||||
|
||||
/* Function that securely clears the memory if FLAG_clear_internal_memory is
|
||||
* set. If the flag isn't set, this function does nothing.
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void rxa2_clear_internal_memory(void *v, size_t n);
|
||||
|
||||
/*
|
||||
* Computes absolute position of reference block in the lane following a skewed
|
||||
* distribution and using a pseudo-random value as input
|
||||
|
@ -169,7 +123,7 @@ void rxa2_clear_internal_memory(void *v, size_t n);
|
|||
* If so we can reference the current segment
|
||||
* @pre All pointers must be valid
|
||||
*/
|
||||
uint32_t rxa2_index_alpha(const argon2_instance_t *instance,
|
||||
uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane);
|
||||
|
||||
|
@ -180,28 +134,7 @@ uint32_t rxa2_index_alpha(const argon2_instance_t *instance,
|
|||
* @return ARGON2_OK if everything is all right, otherwise one of error codes
|
||||
* (all defined in <argon2.h>
|
||||
*/
|
||||
int rxa2_validate_inputs(const argon2_context *context);
|
||||
|
||||
/*
|
||||
* Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears
|
||||
* password and secret if needed
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param blockhash Buffer for pre-hashing digest
|
||||
* @param type Argon2 type
|
||||
* @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes
|
||||
* allocated
|
||||
*/
|
||||
void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
* Function creates first 2 blocks per lane
|
||||
* @param instance Pointer to the current instance
|
||||
* @param blockhash Pointer to the pre-hashing digest
|
||||
* @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values
|
||||
*/
|
||||
void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
int randomx_argon2_validate_inputs(const argon2_context *context);
|
||||
|
||||
/*
|
||||
* Function allocates memory, hashes the inputs with Blake, and creates first
|
||||
|
@ -213,31 +146,7 @@ void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instanc
|
|||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||
* will be modified if successful.
|
||||
*/
|
||||
int rxa2_argon_initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
||||
* the memory.
|
||||
* @param context Pointer to current Argon2 context (use only the out parameters
|
||||
* from it)
|
||||
* @param instance Pointer to current instance of Argon2
|
||||
* @pre instance->state must point to necessary amount of memory
|
||||
* @pre context->out must point to outlen bytes of memory
|
||||
* @pre if context->free_cbk is not NULL, it should point to a function that
|
||||
* deallocates memory
|
||||
*/
|
||||
void rxa2_finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
* threads
|
||||
* @param context current context
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void rxa2_fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* Function that fills the entire memory t_cost times based on the first two
|
||||
|
@ -245,7 +154,7 @@ void rxa2_fill_segment(const argon2_instance_t *instance,
|
|||
* @param instance Pointer to the current instance
|
||||
* @return ARGON2_OK if successful, @context->state
|
||||
*/
|
||||
int rxa2_fill_memory_blocks(argon2_instance_t *instance);
|
||||
int randomx_argon2_fill_memory_blocks(argon2_instance_t* instance);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
|
|
|
@ -43,6 +43,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "blake2/blake2-impl.h"
|
||||
#include "blake2/blake2.h"
|
||||
|
||||
static void copy_block(block* dst, const block* src) {
|
||||
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
static void xor_block(block* dst, const block* src) {
|
||||
int i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] ^= src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Function fills a new memory block and optionally XORs the old block over the new one.
|
||||
* @next_block must be initialized.
|
||||
|
@ -57,13 +68,13 @@ static void fill_block(const block *prev_block, const block *ref_block,
|
|||
block blockR, block_tmp;
|
||||
unsigned i;
|
||||
|
||||
rxa2_copy_block(&blockR, ref_block);
|
||||
rxa2_xor_block(&blockR, prev_block);
|
||||
rxa2_copy_block(&block_tmp, &blockR);
|
||||
copy_block(&blockR, ref_block);
|
||||
xor_block(&blockR, prev_block);
|
||||
copy_block(&block_tmp, &blockR);
|
||||
/* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */
|
||||
if (with_xor) {
|
||||
/* Saving the next block contents for XOR over: */
|
||||
rxa2_xor_block(&block_tmp, next_block);
|
||||
xor_block(&block_tmp, next_block);
|
||||
/* Now blockR = ref_block + prev_block and
|
||||
block_tmp = ref_block + prev_block + next_block */
|
||||
}
|
||||
|
@ -92,18 +103,11 @@ static void fill_block(const block *prev_block, const block *ref_block,
|
|||
blockR.v[2 * i + 113]);
|
||||
}
|
||||
|
||||
rxa2_copy_block(next_block, &block_tmp);
|
||||
rxa2_xor_block(next_block, &blockR);
|
||||
copy_block(next_block, &block_tmp);
|
||||
xor_block(next_block, &blockR);
|
||||
}
|
||||
|
||||
static void next_addresses(block *address_block, block *input_block,
|
||||
const block *zero_block) {
|
||||
input_block->v[6]++;
|
||||
fill_block(zero_block, input_block, address_block, 0);
|
||||
fill_block(zero_block, address_block, address_block, 0);
|
||||
}
|
||||
|
||||
void rxa2_fill_segment(const argon2_instance_t *instance,
|
||||
void randomx_argon2_fill_segment_ref(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block, zero_block;
|
||||
|
@ -111,38 +115,15 @@ void rxa2_fill_segment(const argon2_instance_t *instance,
|
|||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index;
|
||||
uint32_t i;
|
||||
int data_independent_addressing;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
data_independent_addressing =
|
||||
(instance->type == Argon2_i) ||
|
||||
(instance->type == Argon2_id && (position.pass == 0) &&
|
||||
(position.slice < ARGON2_SYNC_POINTS / 2));
|
||||
|
||||
if (data_independent_addressing) {
|
||||
rxa2_init_block_value(&zero_block, 0);
|
||||
rxa2_init_block_value(&input_block, 0);
|
||||
|
||||
input_block.v[0] = position.pass;
|
||||
input_block.v[1] = position.lane;
|
||||
input_block.v[2] = position.slice;
|
||||
input_block.v[3] = instance->memory_blocks;
|
||||
input_block.v[4] = instance->passes;
|
||||
input_block.v[5] = instance->type;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
|
||||
/* Don't forget to generate the first block of addresses: */
|
||||
if (data_independent_addressing) {
|
||||
next_addresses(&address_block, &input_block, &zero_block);
|
||||
}
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
|
@ -167,15 +148,7 @@ void rxa2_fill_segment(const argon2_instance_t *instance,
|
|||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
if (data_independent_addressing) {
|
||||
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
next_addresses(&address_block, &input_block, &zero_block);
|
||||
}
|
||||
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
}
|
||||
else {
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
}
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
|
@ -189,7 +162,7 @@ void rxa2_fill_segment(const argon2_instance_t *instance,
|
|||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = rxa2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
|
|
182
src/argon2_sse3.c
Normal file
182
src/argon2_sse3.c
Normal file
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "argon2.h"
|
||||
|
||||
#if defined(_MSC_VER) //MSVC doesn't define SSSE3
|
||||
#define __SSSE3__
|
||||
#endif
|
||||
|
||||
void randomx_argon2_fill_segment_sse3(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
|
||||
randomx_argon2_impl* randomx_argon2_impl_sse3() {
|
||||
#if defined(__SSSE3__)
|
||||
return &randomx_argon2_fill_segment_sse3;
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
|
||||
|
||||
#include "argon2_core.h"
|
||||
|
||||
#include "blake2/blamka-round-sse3.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
#include "blake2/blake2.h"
|
||||
|
||||
static void fill_block(__m128i* state, const block* ref_block,
|
||||
block* next_block, int with_xor) {
|
||||
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
|
||||
block_XY[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i*)next_block->v + i));
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
|
||||
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
|
||||
state[8 * i + 6], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
|
||||
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
|
||||
state[8 * 6 + i], state[8 * 7 + i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(state[i], block_XY[i]);
|
||||
_mm_storeu_si128((__m128i*)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void randomx_argon2_fill_segment_sse3(const argon2_instance_t* instance,
|
||||
argon2_position_t position) {
|
||||
block* ref_block = NULL, * curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index, i;
|
||||
__m128i state[ARGON2_OWORDS_IN_BLOCK];
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * instance->lane_length +
|
||||
position.slice * instance->segment_length + starting_index;
|
||||
|
||||
if (0 == curr_offset % instance->lane_length) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + instance->lane_length - 1;
|
||||
}
|
||||
else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
if (ARGON2_VERSION_10 == instance->version) {
|
||||
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
if (0 == position.pass) {
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
fill_block(state, ref_block, curr_block, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
189
src/blake2/blamka-round-avx2.h
Normal file
189
src/blake2/blamka-round-avx2.h
Normal file
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_OPT_H
|
||||
#define BLAKE_ROUND_MKA_OPT_H
|
||||
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <x86intrin.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
||||
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
|
||||
|
||||
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr32(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
\
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr24(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D1 = rotr32(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
\
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B1 = rotr24(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr16(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr63(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D1 = rotr16(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B1 = rotr63(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
\
|
||||
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while(0);
|
||||
|
||||
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
\
|
||||
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do{ \
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
\
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do{ \
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
} while((void)0, 0);
|
||||
|
||||
#endif /* BLAKE_ROUND_MKA_OPT_H */
|
158
src/blake2/blamka-round-sse3.h
Normal file
158
src/blake2/blamka-round-sse3.h
Normal file
|
@ -0,0 +1,158 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_OPT_H
|
||||
#define BLAKE_ROUND_MKA_OPT_H
|
||||
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <x86intrin.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#define r16 \
|
||||
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define r24 \
|
||||
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define _mm_roti_epi64(x, c) \
|
||||
(-(c) == 32) \
|
||||
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
|
||||
: (-(c) == 24) \
|
||||
? _mm_shuffle_epi8((x), r24) \
|
||||
: (-(c) == 16) \
|
||||
? _mm_shuffle_epi8((x), r16) \
|
||||
: (-(c) == 63) \
|
||||
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_add_epi64((x), (x))) \
|
||||
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_slli_epi64((x), 64 - (-(c))))
|
||||
|
||||
static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
const __m128i z = _mm_mul_epu32(x, y);
|
||||
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
|
||||
}
|
||||
|
||||
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -32); \
|
||||
D1 = _mm_roti_epi64(D1, -32); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -24); \
|
||||
B1 = _mm_roti_epi64(B1, -24); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -16); \
|
||||
D1 = _mm_roti_epi64(D1, -16); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -63); \
|
||||
B1 = _mm_roti_epi64(B1, -63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
t1 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
t1 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
|
||||
#endif /* BLAKE_ROUND_MKA_OPT_H */
|
|
@ -92,7 +92,7 @@ namespace randomx {
|
|||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = ARGON2_VERSION_NUMBER;
|
||||
|
||||
int inputsValid = rxa2_validate_inputs(&context);
|
||||
int inputsValid = randomx_argon2_validate_inputs(&context);
|
||||
assert(inputsValid == ARGON2_OK);
|
||||
|
||||
/* 2. Align memory size */
|
||||
|
@ -111,6 +111,7 @@ namespace randomx {
|
|||
instance.threads = context.threads;
|
||||
instance.type = Argon2_d;
|
||||
instance.memory = (block*)cache->memory;
|
||||
instance.impl = cache->argonImpl;
|
||||
|
||||
if (instance.threads > instance.lanes) {
|
||||
instance.threads = instance.lanes;
|
||||
|
@ -119,9 +120,9 @@ namespace randomx {
|
|||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
rxa2_argon_initialize(&instance, &context);
|
||||
randomx_argon2_initialize(&instance, &context);
|
||||
|
||||
rxa2_fill_memory_blocks(&instance);
|
||||
randomx_argon2_fill_memory_blocks(&instance);
|
||||
|
||||
cache->reciprocalCache.clear();
|
||||
randomx::Blake2Generator gen(key, keySize);
|
||||
|
|
|
@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.hpp"
|
||||
#include "superscalar_program.hpp"
|
||||
#include "allocator.hpp"
|
||||
#include "argon2.h"
|
||||
|
||||
/* Global scope for C binding */
|
||||
struct randomx_dataset {
|
||||
|
@ -51,6 +52,7 @@ struct randomx_cache {
|
|||
randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
|
||||
std::vector<uint64_t> reciprocalCache;
|
||||
std::string cacheKey;
|
||||
randomx_argon2_impl* argonImpl;
|
||||
|
||||
bool isInitialized() {
|
||||
return programs[0].getSize() != 0;
|
||||
|
@ -79,4 +81,21 @@ namespace randomx {
|
|||
void initCacheCompile(randomx_cache*, const void*, size_t);
|
||||
void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t blockNumber);
|
||||
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
|
||||
|
||||
inline randomx_argon2_impl* selectArgonImpl(randomx_flags flags) {
|
||||
if ((flags & RANDOMX_FLAG_ARGON2) == 0) {
|
||||
return &randomx_argon2_fill_segment_ref;
|
||||
}
|
||||
randomx_argon2_impl* impl = nullptr;
|
||||
if ((flags & RANDOMX_FLAG_ARGON2) == RANDOMX_FLAG_ARGON2_SSE3) {
|
||||
impl = randomx_argon2_impl_sse3();
|
||||
}
|
||||
if ((flags & RANDOMX_FLAG_ARGON2) == RANDOMX_FLAG_ARGON2_AVX2) {
|
||||
impl = randomx_argon2_impl_avx2();
|
||||
}
|
||||
if (impl != nullptr) {
|
||||
return impl;
|
||||
}
|
||||
throw std::runtime_error("Unsupported Argon2 implementation");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,10 +39,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
extern "C" {
|
||||
|
||||
randomx_cache *randomx_alloc_cache(randomx_flags flags) {
|
||||
randomx_cache *cache;
|
||||
randomx_cache *cache = nullptr;
|
||||
|
||||
try {
|
||||
cache = new randomx_cache();
|
||||
cache->argonImpl = randomx::selectArgonImpl(flags);
|
||||
switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) {
|
||||
case RANDOMX_FLAG_DEFAULT:
|
||||
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
|
||||
|
@ -103,7 +104,9 @@ extern "C" {
|
|||
|
||||
void randomx_release_cache(randomx_cache* cache) {
|
||||
assert(cache != nullptr);
|
||||
cache->dealloc(cache);
|
||||
if (cache->memory != nullptr) {
|
||||
cache->dealloc(cache);
|
||||
}
|
||||
delete cache;
|
||||
}
|
||||
|
||||
|
@ -114,7 +117,7 @@ extern "C" {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
randomx_dataset *dataset;
|
||||
randomx_dataset *dataset = nullptr;
|
||||
|
||||
try {
|
||||
dataset = new randomx_dataset();
|
||||
|
|
|
@ -44,7 +44,10 @@ typedef enum {
|
|||
RANDOMX_FLAG_HARD_AES = 2,
|
||||
RANDOMX_FLAG_FULL_MEM = 4,
|
||||
RANDOMX_FLAG_JIT = 8,
|
||||
RANDOMX_FLAG_SECURE = 16
|
||||
RANDOMX_FLAG_SECURE = 16,
|
||||
RANDOMX_FLAG_ARGON2_SSE3 = 32,
|
||||
RANDOMX_FLAG_ARGON2_AVX2 = 64,
|
||||
RANDOMX_FLAG_ARGON2 = 96
|
||||
} randomx_flags;
|
||||
|
||||
typedef struct randomx_dataset randomx_dataset;
|
||||
|
@ -62,10 +65,17 @@ extern "C" {
|
|||
* RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
|
||||
* RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes
|
||||
* subsequent Dataset initialization faster
|
||||
* Optionally, one of these two flags may be selected:
|
||||
* RANDOMX_FLAG_ARGON2_SSE3 - optimized Argon2 for CPUs with the SSSE3 instruction set
|
||||
* makes subsequent cache initialization faster
|
||||
* RANDOMX_FLAG_ARGON2_AVX2 - optimized Argon2 for CPUs with the AVX2 instruction set
|
||||
* makes subsequent cache initialization faster
|
||||
*
|
||||
* @return Pointer to an allocated randomx_cache structure.
|
||||
* NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT
|
||||
* is set and JIT compilation is not supported on the current platform.
|
||||
* Returns NULL if:
|
||||
* (1) memory allocation fails
|
||||
* (2) the RANDOMX_FLAG_JIT is set and JIT compilation is not supported on the current platform
|
||||
* (3) an invalid or unsupported RANDOMX_FLAG_ARGON2 value is set
|
||||
*/
|
||||
RANDOMX_EXPORT randomx_cache *randomx_alloc_cache(randomx_flags flags);
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "stopwatch.hpp"
|
||||
#include "utility.hpp"
|
||||
#include "../randomx.h"
|
||||
#include "../dataset.hpp"
|
||||
#include "../blake2/endian.h"
|
||||
#include "../common.hpp"
|
||||
#ifdef _WIN32
|
||||
|
@ -90,6 +91,8 @@ void printUsage(const char* executable) {
|
|||
std::cout << " --init Q initialize dataset with Q threads (default: 1)" << std::endl;
|
||||
std::cout << " --nonces N run N nonces (default: 1000)" << std::endl;
|
||||
std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl;
|
||||
std::cout << " --sse3 use optimized Argon2 for SSSE3 CPUs" << std::endl;
|
||||
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
|
||||
}
|
||||
|
||||
struct MemoryException : public std::exception {
|
||||
|
@ -127,7 +130,7 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
|
|||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, miningMode, verificationMode, help, largePages, jit, secure;
|
||||
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, sse3, avx2;
|
||||
int noncesCount, threadCount, initThreadCount;
|
||||
uint64_t threadAffinity;
|
||||
int32_t seedValue;
|
||||
|
@ -148,6 +151,8 @@ int main(int argc, char** argv) {
|
|||
readOption("--jit", argc, argv, jit);
|
||||
readOption("--help", argc, argv, help);
|
||||
readOption("--secure", argc, argv, secure);
|
||||
readOption("--sse3", argc, argv, sse3);
|
||||
readOption("--avx2", argc, argv, avx2);
|
||||
|
||||
store32(&seed, seedValue);
|
||||
|
||||
|
@ -166,6 +171,16 @@ int main(int argc, char** argv) {
|
|||
randomx_cache* cache;
|
||||
randomx_flags flags = RANDOMX_FLAG_DEFAULT;
|
||||
|
||||
if (sse3) {
|
||||
flags = (randomx_flags)(flags | RANDOMX_FLAG_ARGON2_SSE3);
|
||||
std::cout << " - Argon2 implementation: SSE3" << std::endl;
|
||||
}
|
||||
|
||||
if (avx2) {
|
||||
flags = (randomx_flags)(flags | RANDOMX_FLAG_ARGON2_AVX2);
|
||||
std::cout << " - Argon2 implementation: AVX2" << std::endl;
|
||||
}
|
||||
|
||||
if (miningMode) {
|
||||
flags = (randomx_flags)(flags | RANDOMX_FLAG_FULL_MEM);
|
||||
std::cout << " - full memory mode (2080 MiB)" << std::endl;
|
||||
|
@ -213,6 +228,7 @@ int main(int argc, char** argv) {
|
|||
std::cout << " ..." << std::endl;
|
||||
|
||||
try {
|
||||
randomx::selectArgonImpl(flags); //just to check if flags are valid
|
||||
if (jit && !RANDOMX_HAVE_COMPILER) {
|
||||
throw std::runtime_error("JIT compilation is not supported on this platform. Try without --jit");
|
||||
}
|
||||
|
|
|
@ -997,8 +997,9 @@ int main() {
|
|||
|
||||
if (RANDOMX_HAVE_COMPILER) {
|
||||
randomx_release_cache(cache);
|
||||
cache = randomx_alloc_cache(RANDOMX_FLAG_JIT);
|
||||
randomx_destroy_vm(vm);
|
||||
vm = nullptr;
|
||||
cache = randomx_alloc_cache(RANDOMX_FLAG_JIT);
|
||||
initCache("test key 000");
|
||||
vm = randomx_create_vm(RANDOMX_FLAG_JIT, cache, nullptr);
|
||||
}
|
||||
|
@ -1013,6 +1014,35 @@ int main() {
|
|||
|
||||
runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e);
|
||||
|
||||
randomx_destroy_vm(vm);
|
||||
vm = nullptr;
|
||||
|
||||
randomx_release_cache(cache);
|
||||
cache = randomx_alloc_cache(RANDOMX_FLAG_ARGON2_SSE3);
|
||||
|
||||
runTest("Cache initialization: SSSE3", cache != nullptr && RANDOMX_ARGON_ITERATIONS == 3 && RANDOMX_ARGON_LANES == 1 && RANDOMX_ARGON_MEMORY == 262144 && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
|
||||
initCache("test key 000");
|
||||
uint64_t* cacheMemory = (uint64_t*)cache->memory;
|
||||
assert(cacheMemory[0] == 0x191e0e1d23c02186);
|
||||
assert(cacheMemory[1568413] == 0xf1b62fe6210bf8b1);
|
||||
assert(cacheMemory[33554431] == 0x1f47f056d05cd99b);
|
||||
});
|
||||
|
||||
if (cache != nullptr)
|
||||
randomx_release_cache(cache);
|
||||
cache = randomx_alloc_cache(RANDOMX_FLAG_ARGON2_AVX2);
|
||||
|
||||
runTest("Cache initialization: AVX2", cache != nullptr && RANDOMX_ARGON_ITERATIONS == 3 && RANDOMX_ARGON_LANES == 1 && RANDOMX_ARGON_MEMORY == 262144 && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
|
||||
initCache("test key 000");
|
||||
uint64_t* cacheMemory = (uint64_t*)cache->memory;
|
||||
assert(cacheMemory[0] == 0x191e0e1d23c02186);
|
||||
assert(cacheMemory[1568413] == 0xf1b62fe6210bf8b1);
|
||||
assert(cacheMemory[33554431] == 0x1f47f056d05cd99b);
|
||||
});
|
||||
|
||||
if (cache != nullptr)
|
||||
randomx_release_cache(cache);
|
||||
|
||||
std::cout << std::endl << "All tests PASSED" << std::endl;
|
||||
|
||||
if (skipped) {
|
||||
|
|
|
@ -114,6 +114,7 @@
|
|||
<ConformanceMode>true</ConformanceMode>
|
||||
<AssemblerOutput>AssemblyCode</AssemblerOutput>
|
||||
<PreprocessorDefinitions>_MBCS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
|
@ -131,8 +132,10 @@ SET ERRORLEVEL = 0</Command>
|
|||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\src\allocator.cpp" />
|
||||
<ClCompile Include="..\src\argon2_avx2.c" />
|
||||
<ClCompile Include="..\src\argon2_core.c" />
|
||||
<ClCompile Include="..\src\argon2_ref.c" />
|
||||
<ClCompile Include="..\src\argon2_sse3.c" />
|
||||
<ClCompile Include="..\src\assembly_generator_x86.cpp" />
|
||||
<ClCompile Include="..\src\blake2_generator.cpp" />
|
||||
<ClCompile Include="..\src\blake2\blake2b.c" />
|
||||
|
@ -163,7 +166,9 @@ SET ERRORLEVEL = 0</Command>
|
|||
<ClInclude Include="..\src\assembly_generator_x86.hpp" />
|
||||
<ClInclude Include="..\src\blake2\blake2-impl.h" />
|
||||
<ClInclude Include="..\src\blake2\blake2.h" />
|
||||
<ClInclude Include="..\src\blake2\blamka-round-avx2.h" />
|
||||
<ClInclude Include="..\src\blake2\blamka-round-ref.h" />
|
||||
<ClInclude Include="..\src\blake2\blamka-round-sse3.h" />
|
||||
<ClInclude Include="..\src\blake2\endian.h" />
|
||||
<ClInclude Include="..\src\blake2_generator.hpp" />
|
||||
<ClInclude Include="..\src\bytecode_machine.hpp" />
|
||||
|
|
|
@ -81,6 +81,12 @@
|
|||
<ClCompile Include="..\src\bytecode_machine.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\argon2_sse3.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\argon2_avx2.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\src\argon2.h">
|
||||
|
@ -185,6 +191,12 @@
|
|||
<ClInclude Include="..\src\bytecode_machine.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\blake2\blamka-round-sse3.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\blake2\blamka-round-avx2.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<MASM Include="..\src\jit_compiler_x86_static.asm">
|
||||
|
|
Loading…
Reference in a new issue