Merge pull request #8031

187633c slow_hash: don't blow out Mac stack on ARM64 (Howard Chu)
This commit is contained in:
luigi1111 2021-11-09 22:50:36 -06:00
commit 2844b5b8db
No known key found for this signature in database
GPG key ID: F4ACA0183641E010

View file

@ -51,6 +51,12 @@
#define INIT_SIZE_BLK 8 #define INIT_SIZE_BLK 8
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) #define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
#if defined(_MSC_VER)
#define THREADV __declspec(thread)
#else
#define THREADV __thread
#endif
extern void aesb_single_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); extern void aesb_single_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
@ -459,12 +465,6 @@ static inline int force_software_aes(void)
_b1 = _b; \ _b1 = _b; \
_b = _c; \ _b = _c; \
#if defined(_MSC_VER)
#define THREADV __declspec(thread)
#else
#define THREADV __thread
#endif
#pragma pack(push, 1) #pragma pack(push, 1)
union cn_slow_hash_state union cn_slow_hash_state
{ {
@ -1012,6 +1012,44 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
} }
#elif !defined NO_AES && (defined(__arm__) || defined(__aarch64__)) #elif !defined NO_AES && (defined(__arm__) || defined(__aarch64__))
#ifdef __aarch64__
#include <sys/mman.h>
THREADV uint8_t *hp_state = NULL;
THREADV int hp_malloced = 0;
void cn_slow_hash_allocate_state(void)
{
if(hp_state != NULL)
return;
#ifndef MAP_HUGETLB
#define MAP_HUGETLB 0
#endif
hp_state = mmap(0, MEMORY, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON | MAP_HUGETLB, -1, 0);
if(hp_state == MAP_FAILED)
hp_state = NULL;
if(hp_state == NULL)
{
hp_malloced = 1;
hp_state = (uint8_t *) malloc(MEMORY);
}
}
void cn_slow_hash_free_state(void)
{
if(hp_state == NULL)
return;
if (hp_malloced)
free(hp_state);
else
munmap(hp_state, MEMORY);
hp_state = NULL;
hp_malloced = 0;
}
#else
void cn_slow_hash_allocate_state(void) void cn_slow_hash_allocate_state(void)
{ {
// Do nothing, this is just to maintain compatibility with the upgraded slow-hash.c // Do nothing, this is just to maintain compatibility with the upgraded slow-hash.c
@ -1023,6 +1061,7 @@ void cn_slow_hash_free_state(void)
// As above // As above
return; return;
} }
#endif
#if defined(__GNUC__) #if defined(__GNUC__)
#define RDATA_ALIGN16 __attribute__ ((aligned(16))) #define RDATA_ALIGN16 __attribute__ ((aligned(16)))
@ -1272,12 +1311,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
{ {
RDATA_ALIGN16 uint8_t expandedKey[240]; RDATA_ALIGN16 uint8_t expandedKey[240];
#ifndef FORCE_USE_HEAP uint8_t *local_hp_state;
RDATA_ALIGN16 uint8_t local_hp_state[MEMORY];
#else
uint8_t *local_hp_state = (uint8_t *)aligned_malloc(MEMORY,16);
#endif
uint8_t text[INIT_SIZE_BYTE]; uint8_t text[INIT_SIZE_BYTE];
RDATA_ALIGN16 uint64_t a[2]; RDATA_ALIGN16 uint64_t a[2];
RDATA_ALIGN16 uint64_t b[4]; RDATA_ALIGN16 uint64_t b[4];
@ -1296,6 +1330,14 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
hash_extra_blake, hash_extra_groestl, hash_extra_jh, hash_extra_skein hash_extra_blake, hash_extra_groestl, hash_extra_jh, hash_extra_skein
}; };
// this isn't supposed to happen, but guard against it for now.
if(hp_state == NULL)
cn_slow_hash_allocate_state();
// locals to avoid constant TLS dereferencing
local_hp_state = hp_state;
// locals to avoid constant TLS dereferencing
/* CryptoNight Step 1: Use Keccak1600 to initialize the 'state' (and 'text') buffers from the data. */ /* CryptoNight Step 1: Use Keccak1600 to initialize the 'state' (and 'text') buffers from the data. */
if (prehashed) { if (prehashed) {
@ -1409,10 +1451,6 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
memcpy(state.init, text, INIT_SIZE_BYTE); memcpy(state.init, text, INIT_SIZE_BYTE);
hash_permutation(&state.hs); hash_permutation(&state.hs);
extra_hashes[state.hs.b[0] & 3](&state, 200, hash); extra_hashes[state.hs.b[0] & 3](&state, 200, hash);
#ifdef FORCE_USE_HEAP
aligned_free(local_hp_state);
#endif
} }
#else /* aarch64 && crypto */ #else /* aarch64 && crypto */