diff options
Diffstat (limited to 'src/ext/equix/hashx/src')
29 files changed, 3106 insertions, 0 deletions
diff --git a/src/ext/equix/hashx/src/bench.c b/src/ext/equix/hashx/src/bench.c new file mode 100644 index 0000000000..fbcd41a064 --- /dev/null +++ b/src/ext/equix/hashx/src/bench.c @@ -0,0 +1,135 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include "test_utils.h" +#include "hashx_thread.h" +#include "hashx_endian.h" +#include "hashx_time.h" +#include <limits.h> +#include <inttypes.h> + +typedef struct worker_job { + int id; + hashx_thread thread; + hashx_ctx* ctx; + int64_t total_hashes; + uint64_t best_hash; + uint64_t threshold; + int start; + int step; + int end; + int nonces; +} worker_job; + +static hashx_thread_retval worker(void* args) { + worker_job* job = (worker_job*)args; + job->total_hashes = 0; + job->best_hash = UINT64_MAX; + for (int seed = job->start; seed < job->end; seed += job->step) { + if (!hashx_make(job->ctx, &seed, sizeof(seed))) { + continue; + } + for (int nonce = 0; nonce < job->nonces; ++nonce) { + uint8_t hash[HASHX_SIZE] = { 0 }; +#ifndef HASHX_BLOCK_MODE + hashx_exec(job->ctx, nonce, hash); +#else + hashx_exec(job->ctx, &nonce, sizeof(nonce), hash); +#endif + uint64_t hashval = load64(hash); + if (hashval < job->best_hash) { + job->best_hash = hashval; + } + if (hashval < job->threshold) { + printf("[thread %2i] Hash (%5i, %5i) below threshold:" + " ...%02x%02x%02x%02x%02x%02x%02x%02x\n", + job->id, + seed, + nonce, + hash[0], + hash[1], + hash[2], + hash[3], + hash[4], + hash[5], + hash[6], + hash[7]); + } + } + job->total_hashes += job->nonces; + } + return HASHX_THREAD_SUCCESS; +} + +int main(int argc, char** argv) { + int nonces, seeds, start, diff, threads; + bool interpret; + read_int_option("--diff", argc, argv, &diff, INT_MAX); + read_int_option("--start", argc, argv, &start, 0); + read_int_option("--seeds", argc, argv, &seeds, 500); + read_int_option("--nonces", argc, argv, &nonces, 65536); + read_int_option("--threads", argc, argv, &threads, 1); + read_option("--interpret", argc, argv, &interpret); + hashx_type flags = HASHX_INTERPRETED; + if (!interpret) { + flags = HASHX_COMPILED; + } + uint64_t best_hash = UINT64_MAX; + uint64_t diff_ex = (uint64_t)diff * 1000ULL; + uint64_t threshold = UINT64_MAX / diff_ex; + int seeds_end = seeds + start; + int64_t total_hashes = 0; + printf("Interpret: %i, Target diff.: %" PRIu64 ", Threads: %i\n", interpret, diff_ex, threads); + printf("Testing seeds %i-%i with %i nonces each ...\n", start, seeds_end - 1, nonces); + double time_start, time_end; + worker_job* jobs = malloc(sizeof(worker_job) * threads); + if (jobs == NULL) { + printf("Error: memory allocation failure\n"); + return 1; + } + for (int thd = 0; thd < threads; ++thd) { + jobs[thd].ctx = hashx_alloc(flags); + if (jobs[thd].ctx == NULL) { + printf("Error: memory allocation failure\n"); + return 1; + } + if (jobs[thd].ctx == HASHX_NOTSUPP) { + printf("Error: not supported. Try with --interpret\n"); + return 1; + } + jobs[thd].id = thd; + jobs[thd].start = start + thd; + jobs[thd].step = threads; + jobs[thd].end = seeds_end; + jobs[thd].nonces = nonces; + jobs[thd].threshold = threshold; + } + time_start = hashx_time(); + if (threads > 1) { + for (int thd = 0; thd < threads; ++thd) { + jobs[thd].thread = hashx_thread_create(&worker, &jobs[thd]); + } + for (int thd = 0; thd < threads; ++thd) { + hashx_thread_join(jobs[thd].thread); + } + } + else { + worker(jobs); + } + time_end = hashx_time(); + for (int thd = 0; thd < threads; ++thd) { + total_hashes += jobs[thd].total_hashes; + if (jobs[thd].best_hash < best_hash) { + best_hash = jobs[thd].best_hash; + } + } + double elapsed = time_end - time_start; + printf("Total hashes: %" PRIi64 "\n", total_hashes); + printf("%f hashes/sec.\n", total_hashes / elapsed); + printf("%f seeds/sec.\n", seeds / elapsed); + printf("Best hash: ..."); + output_hex((char*)&best_hash, sizeof(best_hash)); + printf(" (diff: %" PRIu64 ")\n", UINT64_MAX / best_hash); + free(jobs); + return 0; +} diff --git a/src/ext/equix/hashx/src/blake2.c b/src/ext/equix/hashx/src/blake2.c new file mode 100644 index 0000000000..f353cb3064 --- /dev/null +++ b/src/ext/equix/hashx/src/blake2.c @@ -0,0 +1,467 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +/* Original code from Argon2 reference source code package used under CC0 + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include "blake2.h" +#include "hashx_endian.h" + +static const uint64_t blake2b_IV[8] = { + UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), + UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), + UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), + UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) }; + +#define BLAKE2_SIGMA_0_0 0 +#define BLAKE2_SIGMA_0_1 1 +#define BLAKE2_SIGMA_0_2 2 +#define BLAKE2_SIGMA_0_3 3 +#define BLAKE2_SIGMA_0_4 4 +#define BLAKE2_SIGMA_0_5 5 +#define BLAKE2_SIGMA_0_6 6 +#define BLAKE2_SIGMA_0_7 7 +#define BLAKE2_SIGMA_0_8 8 +#define BLAKE2_SIGMA_0_9 9 +#define BLAKE2_SIGMA_0_10 10 +#define BLAKE2_SIGMA_0_11 11 +#define BLAKE2_SIGMA_0_12 12 +#define BLAKE2_SIGMA_0_13 13 +#define BLAKE2_SIGMA_0_14 14 +#define BLAKE2_SIGMA_0_15 15 + +#define BLAKE2_SIGMA_1_0 14 +#define BLAKE2_SIGMA_1_1 10 +#define BLAKE2_SIGMA_1_2 4 +#define BLAKE2_SIGMA_1_3 8 +#define BLAKE2_SIGMA_1_4 9 +#define BLAKE2_SIGMA_1_5 15 +#define BLAKE2_SIGMA_1_6 13 +#define BLAKE2_SIGMA_1_7 6 +#define BLAKE2_SIGMA_1_8 1 +#define BLAKE2_SIGMA_1_9 12 +#define BLAKE2_SIGMA_1_10 0 +#define BLAKE2_SIGMA_1_11 2 +#define BLAKE2_SIGMA_1_12 11 +#define BLAKE2_SIGMA_1_13 7 +#define BLAKE2_SIGMA_1_14 5 +#define BLAKE2_SIGMA_1_15 3 + +#define BLAKE2_SIGMA_2_0 11 +#define BLAKE2_SIGMA_2_1 8 +#define BLAKE2_SIGMA_2_2 12 +#define BLAKE2_SIGMA_2_3 0 +#define BLAKE2_SIGMA_2_4 5 +#define BLAKE2_SIGMA_2_5 2 +#define BLAKE2_SIGMA_2_6 15 +#define BLAKE2_SIGMA_2_7 13 +#define BLAKE2_SIGMA_2_8 10 +#define BLAKE2_SIGMA_2_9 14 +#define BLAKE2_SIGMA_2_10 3 +#define BLAKE2_SIGMA_2_11 6 +#define BLAKE2_SIGMA_2_12 7 +#define BLAKE2_SIGMA_2_13 1 +#define BLAKE2_SIGMA_2_14 9 +#define BLAKE2_SIGMA_2_15 4 + +#define BLAKE2_SIGMA_3_0 7 +#define BLAKE2_SIGMA_3_1 9 +#define BLAKE2_SIGMA_3_2 3 +#define BLAKE2_SIGMA_3_3 1 +#define BLAKE2_SIGMA_3_4 13 +#define BLAKE2_SIGMA_3_5 12 +#define BLAKE2_SIGMA_3_6 11 +#define BLAKE2_SIGMA_3_7 14 +#define BLAKE2_SIGMA_3_8 2 +#define BLAKE2_SIGMA_3_9 6 +#define BLAKE2_SIGMA_3_10 5 +#define BLAKE2_SIGMA_3_11 10 +#define BLAKE2_SIGMA_3_12 4 +#define BLAKE2_SIGMA_3_13 0 +#define BLAKE2_SIGMA_3_14 15 +#define BLAKE2_SIGMA_3_15 8 + +#define BLAKE2_SIGMA_4_0 9 +#define BLAKE2_SIGMA_4_1 0 +#define BLAKE2_SIGMA_4_2 5 +#define BLAKE2_SIGMA_4_3 7 +#define BLAKE2_SIGMA_4_4 2 +#define BLAKE2_SIGMA_4_5 4 +#define BLAKE2_SIGMA_4_6 10 +#define BLAKE2_SIGMA_4_7 15 +#define BLAKE2_SIGMA_4_8 14 +#define BLAKE2_SIGMA_4_9 1 +#define BLAKE2_SIGMA_4_10 11 +#define BLAKE2_SIGMA_4_11 12 +#define BLAKE2_SIGMA_4_12 6 +#define BLAKE2_SIGMA_4_13 8 +#define BLAKE2_SIGMA_4_14 3 +#define BLAKE2_SIGMA_4_15 13 + +#define BLAKE2_SIGMA_5_0 2 +#define BLAKE2_SIGMA_5_1 12 +#define BLAKE2_SIGMA_5_2 6 +#define BLAKE2_SIGMA_5_3 10 +#define BLAKE2_SIGMA_5_4 0 +#define BLAKE2_SIGMA_5_5 11 +#define BLAKE2_SIGMA_5_6 8 +#define BLAKE2_SIGMA_5_7 3 +#define BLAKE2_SIGMA_5_8 4 +#define BLAKE2_SIGMA_5_9 13 +#define BLAKE2_SIGMA_5_10 7 +#define BLAKE2_SIGMA_5_11 5 +#define BLAKE2_SIGMA_5_12 15 +#define BLAKE2_SIGMA_5_13 14 +#define BLAKE2_SIGMA_5_14 1 +#define BLAKE2_SIGMA_5_15 9 + +#define BLAKE2_SIGMA_6_0 12 +#define BLAKE2_SIGMA_6_1 5 +#define BLAKE2_SIGMA_6_2 1 +#define BLAKE2_SIGMA_6_3 15 +#define BLAKE2_SIGMA_6_4 14 +#define BLAKE2_SIGMA_6_5 13 +#define BLAKE2_SIGMA_6_6 4 +#define BLAKE2_SIGMA_6_7 10 +#define BLAKE2_SIGMA_6_8 0 +#define BLAKE2_SIGMA_6_9 7 +#define BLAKE2_SIGMA_6_10 6 +#define BLAKE2_SIGMA_6_11 3 +#define BLAKE2_SIGMA_6_12 9 +#define BLAKE2_SIGMA_6_13 2 +#define BLAKE2_SIGMA_6_14 8 +#define BLAKE2_SIGMA_6_15 11 + +#define BLAKE2_SIGMA_7_0 13 +#define BLAKE2_SIGMA_7_1 11 +#define BLAKE2_SIGMA_7_2 7 +#define BLAKE2_SIGMA_7_3 14 +#define BLAKE2_SIGMA_7_4 12 +#define BLAKE2_SIGMA_7_5 1 +#define BLAKE2_SIGMA_7_6 3 +#define BLAKE2_SIGMA_7_7 9 +#define BLAKE2_SIGMA_7_8 5 +#define BLAKE2_SIGMA_7_9 0 +#define BLAKE2_SIGMA_7_10 15 +#define BLAKE2_SIGMA_7_11 4 +#define BLAKE2_SIGMA_7_12 8 +#define BLAKE2_SIGMA_7_13 6 +#define BLAKE2_SIGMA_7_14 2 +#define BLAKE2_SIGMA_7_15 10 + +#define BLAKE2_SIGMA_8_0 6 +#define BLAKE2_SIGMA_8_1 15 +#define BLAKE2_SIGMA_8_2 14 +#define BLAKE2_SIGMA_8_3 9 +#define BLAKE2_SIGMA_8_4 11 +#define BLAKE2_SIGMA_8_5 3 +#define BLAKE2_SIGMA_8_6 0 +#define BLAKE2_SIGMA_8_7 8 +#define BLAKE2_SIGMA_8_8 12 +#define BLAKE2_SIGMA_8_9 2 +#define BLAKE2_SIGMA_8_10 13 +#define BLAKE2_SIGMA_8_11 7 +#define BLAKE2_SIGMA_8_12 1 +#define BLAKE2_SIGMA_8_13 4 +#define BLAKE2_SIGMA_8_14 10 +#define BLAKE2_SIGMA_8_15 5 + +#define BLAKE2_SIGMA_9_0 10 +#define BLAKE2_SIGMA_9_1 2 +#define BLAKE2_SIGMA_9_2 8 +#define BLAKE2_SIGMA_9_3 4 +#define BLAKE2_SIGMA_9_4 7 +#define BLAKE2_SIGMA_9_5 6 +#define BLAKE2_SIGMA_9_6 1 +#define BLAKE2_SIGMA_9_7 5 +#define BLAKE2_SIGMA_9_8 15 +#define BLAKE2_SIGMA_9_9 11 +#define BLAKE2_SIGMA_9_10 9 +#define BLAKE2_SIGMA_9_11 14 +#define BLAKE2_SIGMA_9_12 3 +#define BLAKE2_SIGMA_9_13 12 +#define BLAKE2_SIGMA_9_14 13 +#define BLAKE2_SIGMA_9_15 0 + +#define BLAKE2_SIGMA_10_0 0 +#define BLAKE2_SIGMA_10_1 1 +#define BLAKE2_SIGMA_10_2 2 +#define BLAKE2_SIGMA_10_3 3 +#define BLAKE2_SIGMA_10_4 4 +#define BLAKE2_SIGMA_10_5 5 +#define BLAKE2_SIGMA_10_6 6 +#define BLAKE2_SIGMA_10_7 7 +#define BLAKE2_SIGMA_10_8 8 +#define BLAKE2_SIGMA_10_9 9 +#define BLAKE2_SIGMA_10_10 10 +#define BLAKE2_SIGMA_10_11 11 +#define BLAKE2_SIGMA_10_12 12 +#define BLAKE2_SIGMA_10_13 13 +#define BLAKE2_SIGMA_10_14 14 +#define BLAKE2_SIGMA_10_15 15 + +#define BLAKE2_SIGMA_11_0 14 +#define BLAKE2_SIGMA_11_1 10 +#define BLAKE2_SIGMA_11_2 4 +#define BLAKE2_SIGMA_11_3 8 +#define BLAKE2_SIGMA_11_4 9 +#define BLAKE2_SIGMA_11_5 15 +#define BLAKE2_SIGMA_11_6 13 +#define BLAKE2_SIGMA_11_7 6 +#define BLAKE2_SIGMA_11_8 1 +#define BLAKE2_SIGMA_11_9 12 +#define BLAKE2_SIGMA_11_10 0 +#define BLAKE2_SIGMA_11_11 2 +#define BLAKE2_SIGMA_11_12 11 +#define BLAKE2_SIGMA_11_13 7 +#define BLAKE2_SIGMA_11_14 5 +#define BLAKE2_SIGMA_11_15 3 + +static FORCE_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { + return (w >> c) | (w << (64 - c)); +} + +static FORCE_INLINE void blake2b_set_lastblock(blake2b_state* S) { + S->f[0] = (uint64_t)-1; +} + +static FORCE_INLINE void blake2b_increment_counter(blake2b_state* S, + uint64_t inc) { + S->t[0] += inc; + S->t[1] += (S->t[0] < inc); +} + +static FORCE_INLINE void blake2b_invalidate_state(blake2b_state* S) { + //clear_internal_memory(S, sizeof(*S)); /* wipe */ + blake2b_set_lastblock(S); /* invalidate for further use */ +} + +static FORCE_INLINE void blake2b_init0(blake2b_state* S) { + memset(S, 0, sizeof(*S)); + memcpy(S->h, blake2b_IV, sizeof(S->h)); +} + +int hashx_blake2b_init_param(blake2b_state* S, const blake2b_param* P) { + const unsigned char* p = (const unsigned char*)P; + unsigned int i; + + if (NULL == P || NULL == S) { + return -1; + } + + blake2b_init0(S); + /* IV XOR Parameter Block */ + for (i = 0; i < 8; ++i) { + S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); + } + S->outlen = P->digest_length; + return 0; +} + +#define SIGMA(r, k) BLAKE2_SIGMA_ ## r ## _ ## k + +#define G(r, i, j, a, b, c, d) \ + do { \ + a = a + b + m[SIGMA(r, i)]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[SIGMA(r, j)]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define ROUND_INNER(r) \ + do { \ + G(r, 0, 1, v[0], v[4], v[8], v[12]); \ + G(r, 2, 3, v[1], v[5], v[9], v[13]); \ + G(r, 4, 5, v[2], v[6], v[10], v[14]); \ + G(r, 6, 7, v[3], v[7], v[11], v[15]); \ + G(r, 8, 9, v[0], v[5], v[10], v[15]); \ + G(r, 10, 11, v[1], v[6], v[11], v[12]); \ + G(r, 12, 13, v[2], v[7], v[8], v[13]); \ + G(r, 14, 15, v[3], v[4], v[9], v[14]); \ + } while ((void)0, 0) + +#define ROUND(r) ROUND_INNER(r) + +static void blake2b_compress(blake2b_state* S, const uint8_t* block) { + uint64_t m[16]; + uint64_t v[16]; + unsigned int i; + + for (i = 0; i < 16; ++i) { + m[i] = load64(block + i * sizeof(m[i])); + } + + for (i = 0; i < 8; ++i) { + v[i] = S->h[i]; + } + + v[8] = blake2b_IV[0]; + v[9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + ROUND(10); + ROUND(11); + + for (i = 0; i < 8; ++i) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} + +static void blake2b_compress_4r(blake2b_state* S, const uint8_t* block) { + uint64_t m[16]; + uint64_t v[16]; + unsigned int i; + + for (i = 0; i < 16; ++i) { + m[i] = load64(block + i * sizeof(m[i])); + } + + for (i = 0; i < 8; ++i) { + v[i] = S->h[i]; + } + + v[8] = blake2b_IV[0]; + v[9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + + for (i = 0; i < 8; ++i) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} + +int hashx_blake2b_update(blake2b_state* S, const void* in, size_t inlen) { + const uint8_t* pin = (const uint8_t*)in; + + if (inlen == 0) { + return 0; + } + + /* Sanity check */ + if (S == NULL || in == NULL) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { + /* Complete current block */ + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + memcpy(&S->buf[left], pin, fill); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf); + S->buflen = 0; + inlen -= fill; + pin += fill; + /* Avoid buffer copies when possible */ + while (inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, pin); + inlen -= BLAKE2B_BLOCKBYTES; + pin += BLAKE2B_BLOCKBYTES; + } + } + memcpy(&S->buf[S->buflen], pin, inlen); + S->buflen += (unsigned int)inlen; + return 0; +} + +int hashx_blake2b_final(blake2b_state* S, void* out, size_t outlen) { + uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 }; + unsigned int i; + + /* Sanity checks */ + if (S == NULL || out == NULL || outlen < S->outlen) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + blake2b_increment_counter(S, S->buflen); + blake2b_set_lastblock(S); + memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ + blake2b_compress(S, S->buf); + + for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ + store64(buffer + sizeof(S->h[i]) * i, S->h[i]); + } + + memcpy(out, buffer, S->outlen); + + return 0; +} + +/* 4-round version of Blake2b */ +void hashx_blake2b_4r(const blake2b_param* params, const void* in, + size_t inlen, void* out) { + + blake2b_state state; + const uint8_t* p = (const uint8_t*)params; + + blake2b_init0(&state); + /* IV XOR Parameter Block */ + for (unsigned i = 0; i < 8; ++i) { + state.h[i] ^= load64(&p[i * sizeof(state.h[i])]); + } + //state.outlen = blake_params.digest_length; + + const uint8_t* pin = (const uint8_t*)in; + + while (inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(&state, BLAKE2B_BLOCKBYTES); + blake2b_compress_4r(&state, pin); + inlen -= BLAKE2B_BLOCKBYTES; + pin += BLAKE2B_BLOCKBYTES; + } + + memcpy(state.buf, pin, inlen); + blake2b_increment_counter(&state, inlen); + blake2b_set_lastblock(&state); + blake2b_compress_4r(&state, state.buf); + + /* Output hash */ + memcpy(out, state.h, sizeof(state.h)); +} diff --git a/src/ext/equix/hashx/src/blake2.h b/src/ext/equix/hashx/src/blake2.h new file mode 100644 index 0000000000..ba535b21fb --- /dev/null +++ b/src/ext/equix/hashx/src/blake2.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +/* Original code from Argon2 reference source code package used under CC0 Licence + * https://github.com/P-H-C/phc-winner-argon2 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves +*/ + +#ifndef PORTABLE_BLAKE2_H +#define PORTABLE_BLAKE2_H + +#include <stdint.h> +#include <limits.h> +#include <stddef.h> +#include <hashx.h> + +#if defined(__cplusplus) +extern "C" { +#endif + +enum blake2b_constant { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 +}; + +#pragma pack(push, 1) +typedef struct blake2b_param { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint64_t node_offset; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ +} blake2b_param; +#pragma pack(pop) + +typedef struct blake2b_state { + uint64_t h[8]; + uint64_t t[2]; + uint64_t f[2]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + unsigned buflen; + unsigned outlen; + uint8_t last_node; +} blake2b_state; + +/* Ensure param structs have not been wrongly padded */ +/* Poor man's static_assert */ +enum { + blake2_size_check_0 = 1 / !!(CHAR_BIT == 8), + blake2_size_check_2 = + 1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT) +}; + +HASHX_PRIVATE int hashx_blake2b_init_param(blake2b_state* S, const blake2b_param* P); +HASHX_PRIVATE int hashx_blake2b_update(blake2b_state* S, const void* in, size_t inlen); +HASHX_PRIVATE int hashx_blake2b_final(blake2b_state* S, void* out, size_t outlen); +HASHX_PRIVATE void hashx_blake2b_4r(const blake2b_param* P, const void* in, size_t inlen, void* out); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/ext/equix/hashx/src/compiler.c b/src/ext/equix/hashx/src/compiler.c new file mode 100644 index 0000000000..f180bf2d25 --- /dev/null +++ b/src/ext/equix/hashx/src/compiler.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include <stdbool.h> + +#include "compiler.h" +#include "virtual_memory.h" +#include "program.h" +#include "context.h" + +bool hashx_compiler_init(hashx_ctx* ctx) { + ctx->code = hashx_vm_alloc(COMP_CODE_SIZE); + return ctx->code != NULL; +} + +void hashx_compiler_destroy(hashx_ctx* ctx) { + hashx_vm_free(ctx->code, COMP_CODE_SIZE); +} diff --git a/src/ext/equix/hashx/src/compiler.h b/src/ext/equix/hashx/src/compiler.h new file mode 100644 index 0000000000..ef0201be02 --- /dev/null +++ b/src/ext/equix/hashx/src/compiler.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef COMPILER_H +#define COMPILER_H + +#include <stdint.h> +#include <stdbool.h> +#include <hashx.h> +#include "virtual_memory.h" +#include "program.h" + +HASHX_PRIVATE void hashx_compile_x86(const hashx_program* program, uint8_t* code); + +HASHX_PRIVATE void hashx_compile_a64(const hashx_program* program, uint8_t* code); + +#if defined(_M_X64) || defined(__x86_64__) +#define HASHX_COMPILER 1 +#define HASHX_COMPILER_X86 +#define hashx_compile hashx_compile_x86 +#elif defined(__aarch64__) +#define HASHX_COMPILER 1 +#define HASHX_COMPILER_A64 +#define hashx_compile hashx_compile_a64 +#else +#define HASHX_COMPILER 0 +#define hashx_compile +#endif + +HASHX_PRIVATE bool hashx_compiler_init(hashx_ctx* compiler); +HASHX_PRIVATE void hashx_compiler_destroy(hashx_ctx* compiler); + +#define COMP_PAGE_SIZE 4096 +#define COMP_RESERVE_SIZE 1024 +#define COMP_AVG_INSTR_SIZE 5 +#define COMP_CODE_SIZE \ + ALIGN_SIZE( \ + HASHX_PROGRAM_MAX_SIZE * COMP_AVG_INSTR_SIZE + COMP_RESERVE_SIZE, \ + COMP_PAGE_SIZE) + +#endif diff --git a/src/ext/equix/hashx/src/compiler_a64.c b/src/ext/equix/hashx/src/compiler_a64.c new file mode 100644 index 0000000000..48f743b988 --- /dev/null +++ b/src/ext/equix/hashx/src/compiler_a64.c @@ -0,0 +1,154 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include <string.h> +#include <assert.h> + +#include "compiler.h" +#include "program.h" +#include "virtual_memory.h" +#include "unreachable.h" + +#define EMIT(p,x) do { \ + memcpy(p, x, sizeof(x)); \ + p += sizeof(x); \ + } while (0) +#define EMIT_U32(p,x) *((uint32_t*)(p)) = x; p += sizeof(uint32_t) +#define EMIT_IMM32(p,x) \ + EMIT_U32(p, 0x9280000c | \ + ((x <= INT32_MAX) << 30) | \ + (((x <= INT32_MAX) ? (x & 0xFFFF) : (~x & 0xFFFF)) << 5)); \ + EMIT_U32(p, 0xf2a0000c | \ + (((x >> 16) & 0xFFFF) << 5)); + +#ifdef HASHX_COMPILER_A64 + +static const uint8_t a64_prologue[] = { + 0x07, 0x1c, 0x40, 0xf9, /* ldr x7, [x0, #56] */ + 0x06, 0x18, 0x40, 0xf9, /* ldr x6, [x0, #48] */ + 0x05, 0x14, 0x40, 0xf9, /* ldr x5, [x0, #40] */ + 0x04, 0x10, 0x40, 0xf9, /* ldr x4, [x0, #32] */ + 0x03, 0x0c, 0x40, 0xf9, /* ldr x3, [x0, #24] */ + 0x02, 0x08, 0x40, 0xf9, /* ldr x2, [x0, #16] */ + 0x01, 0x04, 0x40, 0xf9, /* ldr x1, [x0, #8] */ + 0xe8, 0x03, 0x00, 0xaa, /* mov x8, x0 */ + 0x00, 0x00, 0x40, 0xf9, /* ldr x0, [x0] */ + 0xe9, 0x03, 0x1f, 0x2a, /* mov w9, wzr */ +}; + +static const uint8_t a64_epilogue[] = { + 0x00, 0x01, 0x00, 0xf9, /* str x0, [x8] */ + 0x01, 0x05, 0x00, 0xf9, /* str x1, [x8, #8] */ + 0x02, 0x09, 0x00, 0xf9, /* str x2, [x8, #16] */ + 0x03, 0x0d, 0x00, 0xf9, /* str x3, [x8, #24] */ + 0x04, 0x11, 0x00, 0xf9, /* str x4, [x8, #32] */ + 0x05, 0x15, 0x00, 0xf9, /* str x5, [x8, #40] */ + 0x06, 0x19, 0x00, 0xf9, /* str x6, [x8, #48] */ + 0x07, 0x1d, 0x00, 0xf9, /* str x7, [x8, #56] */ + 0xc0, 0x03, 0x5f, 0xd6, /* ret */ +}; + +void hashx_compile_a64(const hashx_program* program, uint8_t* code) { + hashx_vm_rw(code, COMP_CODE_SIZE); + uint8_t* pos = code; + uint8_t* target = NULL; + int creg = -1; + EMIT(pos, a64_prologue); + for (int i = 0; i < program->code_size; ++i) { + const instruction* instr = &program->code[i]; + switch (instr->opcode) + { + case INSTR_UMULH_R: + EMIT_U32(pos, 0x9bc07c00 | + (instr->src << 16) | + (instr->dst << 5) | + (instr->dst)); + if (target != NULL) { + creg = instr->dst; + } + break; + case INSTR_SMULH_R: + EMIT_U32(pos, 0x9b407c00 | + (instr->src << 16) | + (instr->dst << 5) | + (instr->dst)); + if (target != NULL) { + creg = instr->dst; + } + break; + case INSTR_MUL_R: + assert(creg != instr->dst); + EMIT_U32(pos, 0x9b007c00 | + (instr->src << 16) | + (instr->dst << 5) | + (instr->dst)); + break; + case INSTR_SUB_R: + assert(creg != instr->dst); + EMIT_U32(pos, 0xcb000000 | + (instr->src << 16) | + (instr->dst << 5) | + (instr->dst)); + break; + case INSTR_XOR_R: + assert(creg != instr->dst); + EMIT_U32(pos, 0xca000000 | + (instr->src << 16) | + (instr->dst << 5) | + (instr->dst)); + break; + case INSTR_ADD_RS: + assert(creg != instr->dst); + EMIT_U32(pos, 0x8b000000 | + (instr->src << 16) | + (instr->imm32 << 10) | + (instr->dst << 5) | + (instr->dst)); + break; + case INSTR_ROR_C: + assert(creg != instr->dst); + EMIT_U32(pos, 0x93c00000 | + (instr->dst << 16) | + (instr->imm32 << 10) | + (instr->dst << 5) | + (instr->dst)); + break; + case INSTR_ADD_C: + assert(creg != instr->dst); + EMIT_IMM32(pos, instr->imm32); + EMIT_U32(pos, 0x8b0c0000 | + (instr->dst << 5) | + (instr->dst)); + break; + case INSTR_XOR_C: + assert(creg != instr->dst); + EMIT_IMM32(pos, instr->imm32); + EMIT_U32(pos, 0xca0c0000 | + (instr->dst << 5) | + (instr->dst)); + break; + case INSTR_TARGET: + target = pos; + break; + case INSTR_BRANCH: + EMIT_IMM32(pos, instr->imm32); + EMIT_U32(pos, 0x2a00012b | (creg << 16)); + EMIT_U32(pos, 0x6a0c017f); + EMIT_U32(pos, 0x5a891129); + EMIT_U32(pos, 0x54000000 | + ((((uint32_t)(target - pos)) >> 2) & 0x7FFFF) << 5); + target = NULL; + creg = -1; + break; + default: + UNREACHABLE; + } + } + EMIT(pos, a64_epilogue); + hashx_vm_rx(code, COMP_CODE_SIZE); +#ifdef __GNUC__ + __builtin___clear_cache(code, pos); +#endif +} + +#endif diff --git a/src/ext/equix/hashx/src/compiler_x86.c b/src/ext/equix/hashx/src/compiler_x86.c new file mode 100644 index 0000000000..0a1d9efd8f --- /dev/null +++ b/src/ext/equix/hashx/src/compiler_x86.c @@ -0,0 +1,151 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include <string.h> + +#include "compiler.h" +#include "program.h" +#include "virtual_memory.h" +#include "unreachable.h" + +#if defined(_WIN32) || defined(__CYGWIN__) +#define WINABI +#endif + +#define EMIT(p,x) do { \ + memcpy(p, x, sizeof(x)); \ + p += sizeof(x); \ + } while (0) +#define EMIT_BYTE(p,x) *((p)++) = x +#define EMIT_U16(p,x) *((uint16_t*)(p)) = x; p += sizeof(uint16_t) +#define EMIT_U32(p,x) *((uint32_t*)(p)) = x; p += sizeof(uint32_t) +#define EMIT_U64(p,x) *((uint64_t*)(p)) = x; p += sizeof(uint64_t) + +#define GEN_SIB(scale, index, base) ((scale << 6) | (index << 3) | base) + +#ifdef HASHX_COMPILER_X86 + +static const uint8_t x86_prologue[] = { +#ifndef WINABI + 0x48, 0x89, 0xF9, /* mov rcx, rdi */ + 0x48, 0x83, 0xEC, 0x20, /* sub rsp, 32 */ + 0x4C, 0x89, 0x24, 0x24, /* mov qword ptr [rsp+0], r12 */ + 0x4C, 0x89, 0x6C, 0x24, 0x08, /* mov qword ptr [rsp+8], r13 */ + 0x4C, 0x89, 0x74, 0x24, 0x10, /* mov qword ptr [rsp+16], r14 */ + 0x4C, 0x89, 0x7C, 0x24, 0x18, /* mov qword ptr [rsp+24], r15 */ +#else + 0x4C, 0x89, 0x64, 0x24, 0x08, /* mov qword ptr [rsp+8], r12 */ + 0x4C, 0x89, 0x6C, 0x24, 0x10, /* mov qword ptr [rsp+16], r13 */ + 0x4C, 0x89, 0x74, 0x24, 0x18, /* mov qword ptr [rsp+24], r14 */ + 0x4C, 0x89, 0x7C, 0x24, 0x20, /* mov qword ptr [rsp+32], r15 */ + 0x48, 0x83, 0xEC, 0x10, /* sub rsp, 16 */ + 0x48, 0x89, 0x34, 0x24, /* mov qword ptr [rsp+0], rsi */ + 0x48, 0x89, 0x7C, 0x24, 0x08, /* mov qword ptr [rsp+8], rdi */ +#endif + 0x31, 0xF6, /* xor esi, esi */ + 0x8D, 0x7E, 0xFF, /* lea edi, [rsi-1] */ + 0x4C, 0x8B, 0x01, /* mov r8, qword ptr [rcx+0] */ + 0x4C, 0x8B, 0x49, 0x08, /* mov r9, qword ptr [rcx+8] */ + 0x4C, 0x8B, 0x51, 0x10, /* mov r10, qword ptr [rcx+16] */ + 0x4C, 0x8B, 0x59, 0x18, /* mov r11, qword ptr [rcx+24] */ + 0x4C, 0x8B, 0x61, 0x20, /* mov r12, qword ptr [rcx+32] */ + 0x4C, 0x8B, 0x69, 0x28, /* mov r13, qword ptr [rcx+40] */ + 0x4C, 0x8B, 0x71, 0x30, /* mov r14, qword ptr [rcx+48] */ + 0x4C, 0x8B, 0x79, 0x38 /* mov r15, qword ptr [rcx+56] */ +}; + +static const uint8_t x86_epilogue[] = { + 0x4C, 0x89, 0x01, /* mov qword ptr [rcx+0], r8 */ + 0x4C, 0x89, 0x49, 0x08, /* mov qword ptr [rcx+8], r9 */ + 0x4C, 0x89, 0x51, 0x10, /* mov qword ptr [rcx+16], r10 */ + 0x4C, 0x89, 0x59, 0x18, /* mov qword ptr [rcx+24], r11 */ + 0x4C, 0x89, 0x61, 0x20, /* mov qword ptr [rcx+32], r12 */ + 0x4C, 0x89, 0x69, 0x28, /* mov qword ptr [rcx+40], r13 */ + 0x4C, 0x89, 0x71, 0x30, /* mov qword ptr [rcx+48], r14 */ + 0x4C, 0x89, 0x79, 0x38, /* mov qword ptr [rcx+56], r15 */ +#ifndef WINABI + 0x4C, 0x8B, 0x24, 0x24, /* mov r12, qword ptr [rsp+0] */ + 0x4C, 0x8B, 0x6C, 0x24, 0x08, /* mov r13, qword ptr [rsp+8] */ + 0x4C, 0x8B, 0x74, 0x24, 0x10, /* mov r14, qword ptr [rsp+16] */ + 0x4C, 0x8B, 0x7C, 0x24, 0x18, /* mov r15, qword ptr [rsp+24] */ + 0x48, 0x83, 0xC4, 0x20, /* add rsp, 32 */ +#else + 0x48, 0x8B, 0x34, 0x24, /* mov rsi, qword ptr [rsp+0] */ + 0x48, 0x8B, 0x7C, 0x24, 0x08, /* mov rdi, qword ptr [rsp+8] */ + 0x48, 0x83, 0xC4, 0x10, /* add rsp, 16 */ + 0x4C, 0x8B, 0x64, 0x24, 0x08, /* mov r12, qword ptr [rsp+8] */ + 0x4C, 0x8B, 0x6C, 0x24, 0x10, /* mov r13, qword ptr [rsp+16] */ + 0x4C, 0x8B, 0x74, 0x24, 0x18, /* mov r14, qword ptr [rsp+24] */ + 0x4C, 0x8B, 0x7C, 0x24, 0x20, /* mov r15, qword ptr [rsp+32] */ +#endif + 0xC3 /* ret */ +}; + +void hashx_compile_x86(const hashx_program* program, uint8_t* code) { + hashx_vm_rw(code, COMP_CODE_SIZE); + uint8_t* pos = code; + uint8_t* target = NULL; + EMIT(pos, x86_prologue); + for (int i = 0; i < program->code_size; ++i) { + const instruction* instr = &program->code[i]; + switch (instr->opcode) + { + case INSTR_UMULH_R: + EMIT_U64(pos, 0x8b4ce0f749c08b49 | + (((uint64_t)instr->src) << 40) | + (((uint64_t)instr->dst) << 16)); + EMIT_BYTE(pos, 0xc2 + 8 * instr->dst); + break; + case INSTR_SMULH_R: + EMIT_U64(pos, 0x8b4ce8f749c08b49 | + (((uint64_t)instr->src) << 40) | + (((uint64_t)instr->dst) << 16)); + EMIT_BYTE(pos, 0xc2 + 8 * instr->dst); + break; + case INSTR_MUL_R: + EMIT_U32(pos, 0xc0af0f4d | (instr->dst << 27) | (instr->src << 24)); + break; + case INSTR_SUB_R: + EMIT_U16(pos, 0x2b4d); + EMIT_BYTE(pos, 0xc0 | (instr->dst << 3) | instr->src); + break; + case INSTR_XOR_R: + EMIT_U16(pos, 0x334d); + EMIT_BYTE(pos, 0xc0 | (instr->dst << 3) | instr->src); + break; + case INSTR_ADD_RS: + EMIT_U32(pos, 0x00048d4f | + (instr->dst << 19) | + GEN_SIB(instr->imm32, instr->src, instr->dst) << 24); + break; + case INSTR_ROR_C: + EMIT_U32(pos, 0x00c8c149 | (instr->dst << 16) | (instr->imm32 << 24)); + break; + case INSTR_ADD_C: + EMIT_U16(pos, 0x8149); + EMIT_BYTE(pos, 0xc0 | instr->dst); + EMIT_U32(pos, instr->imm32); + break; + case INSTR_XOR_C: + EMIT_U16(pos, 0x8149); + EMIT_BYTE(pos, 0xf0 | instr->dst); + EMIT_U32(pos, instr->imm32); + break; + case INSTR_TARGET: + target = pos; /* +2 */ + EMIT_U32(pos, 0x440fff85); + EMIT_BYTE(pos, 0xf7); + break; + case INSTR_BRANCH: + EMIT_U64(pos, ((uint64_t)instr->imm32) << 32 | 0xc2f7f209); + EMIT_U16(pos, ((target - pos) << 8) | 0x74); + break; + default: + UNREACHABLE; + } + } + EMIT(pos, x86_epilogue); + hashx_vm_rx(code, COMP_CODE_SIZE); +} + +#endif diff --git a/src/ext/equix/hashx/src/context.c b/src/ext/equix/hashx/src/context.c new file mode 100644 index 0000000000..de2144d46c --- /dev/null +++ b/src/ext/equix/hashx/src/context.c @@ -0,0 +1,81 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include <stdlib.h> +#include <string.h> + +#include <hashx.h> +#include "context.h" +#include "compiler.h" +#include "program.h" + +#define STRINGIZE_INNER(x) #x +#define STRINGIZE(x) STRINGIZE_INNER(x) + +/* Salt used when generating hash functions. Useful for domain separation. */ +#ifndef HASHX_SALT +#define HASHX_SALT HashX v1 +#endif + +/* Blake2b params used to generate program keys */ +const blake2b_param hashx_blake2_params = { + .digest_length = 64, + .key_length = 0, + .fanout = 1, + .depth = 1, + .leaf_length = 0, + .node_offset = 0, + .node_depth = 0, + .inner_length = 0, + .reserved = { 0 }, + .salt = STRINGIZE(HASHX_SALT), + .personal = { 0 } +}; + +hashx_ctx* hashx_alloc(hashx_type type) { + if (!HASHX_COMPILER && (type & HASHX_COMPILED)) { + return HASHX_NOTSUPP; + } + hashx_ctx* ctx = malloc(sizeof(hashx_ctx)); + if (ctx == NULL) { + goto failure; + } + ctx->code = NULL; + if (type & HASHX_COMPILED) { + if (!hashx_compiler_init(ctx)) { + goto failure; + } + ctx->type = HASHX_COMPILED; + } + else { + ctx->program = malloc(sizeof(hashx_program)); + if (ctx->program == NULL) { + goto failure; + } + ctx->type = HASHX_INTERPRETED; + } +#ifdef HASHX_BLOCK_MODE + memcpy(&ctx->params, &hashx_blake2_params, 32); +#endif +#ifndef NDEBUG + ctx->has_program = false; +#endif + return ctx; +failure: + hashx_free(ctx); + return NULL; +} + +void hashx_free(hashx_ctx* ctx) { + if (ctx != NULL && ctx != HASHX_NOTSUPP) { + if (ctx->code != NULL) { + if (ctx->type & HASHX_COMPILED) { + hashx_compiler_destroy(ctx); + } + else { + free(ctx->program); + } + } + free(ctx); + } +} diff --git a/src/ext/equix/hashx/src/context.h b/src/ext/equix/hashx/src/context.h new file mode 100644 index 0000000000..40736397f8 --- /dev/null +++ b/src/ext/equix/hashx/src/context.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef CONTEXT_H +#define CONTEXT_H + +#include <stdbool.h> + +#include "hashx.h" +#include "blake2.h" +#include "siphash.h" + +typedef void program_func(uint64_t r[8]); + +#ifdef __cplusplus +extern "C" { +#endif + +HASHX_PRIVATE extern const blake2b_param hashx_blake2_params; + +#ifdef __cplusplus +} +#endif + +typedef struct hashx_program hashx_program; + +/* HashX context. */ +typedef struct hashx_ctx { + union { + uint8_t* code; + program_func* func; + hashx_program* program; + }; + hashx_type type; +#ifndef HASHX_BLOCK_MODE + siphash_state keys; +#else + blake2b_param params; +#endif +#ifndef NDEBUG + bool has_program; +#endif +} hashx_ctx; + +#endif diff --git a/src/ext/equix/hashx/src/force_inline.h b/src/ext/equix/hashx/src/force_inline.h new file mode 100644 index 0000000000..d9af3f32e8 --- /dev/null +++ b/src/ext/equix/hashx/src/force_inline.h @@ -0,0 +1,9 @@ +#ifndef FORCE_INLINE +#if defined(_MSC_VER) +#define FORCE_INLINE __inline +#elif defined(__GNUC__) || defined(__clang__) +#define FORCE_INLINE __inline__ +#else +#define FORCE_INLINE +#endif +#endif
\ No newline at end of file diff --git a/src/ext/equix/hashx/src/hashx.c b/src/ext/equix/hashx/src/hashx.c new file mode 100644 index 0000000000..1f5715dce8 --- /dev/null +++ b/src/ext/equix/hashx/src/hashx.c @@ -0,0 +1,134 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <hashx.h> +#include "blake2.h" +#include "hashx_endian.h" +#include "program.h" +#include "context.h" +#include "compiler.h" + +#if HASHX_SIZE > 32 +#error HASHX_SIZE cannot be more than 32 +#endif + +#ifndef HASHX_BLOCK_MODE +#define HASHX_INPUT_ARGS input +#else +#define HASHX_INPUT_ARGS input, size +#endif + +static int initialize_program(hashx_ctx* ctx, hashx_program* program, + siphash_state keys[2]) { + + if (!hashx_program_generate(&keys[0], program)) { + return 0; + } +#ifndef HASHX_BLOCK_MODE + memcpy(&ctx->keys, &keys[1], 32); +#else + memcpy(&ctx->params.salt, &keys[1], 32); +#endif +#ifndef NDEBUG + ctx->has_program = true; +#endif + return 1; +} + +int hashx_make(hashx_ctx* ctx, const void* seed, size_t size) { + assert(ctx != NULL && ctx != HASHX_NOTSUPP); + assert(seed != NULL || size == 0); + siphash_state keys[2]; + blake2b_state hash_state; + hashx_blake2b_init_param(&hash_state, &hashx_blake2_params); + hashx_blake2b_update(&hash_state, seed, size); + hashx_blake2b_final(&hash_state, &keys, sizeof(keys)); + if (ctx->type & HASHX_COMPILED) { + hashx_program program; + if (!initialize_program(ctx, &program, keys)) { + return 0; + } + hashx_compile(&program, ctx->code); + return 1; + } + return initialize_program(ctx, ctx->program, keys); +} + +void hashx_exec(const hashx_ctx* ctx, HASHX_INPUT, void* output) { + assert(ctx != NULL && ctx != HASHX_NOTSUPP); + assert(output != NULL); + assert(ctx->has_program); + uint64_t r[8]; +#ifndef HASHX_BLOCK_MODE + hashx_siphash24_ctr_state512(&ctx->keys, input, r); +#else + hashx_blake2b_4r(&ctx->params, input, size, r); +#endif + + if (ctx->type & HASHX_COMPILED) { + ctx->func(r); + } + else { + hashx_program_execute(ctx->program, r); + } + + /* Hash finalization to remove bias toward 0 caused by multiplications */ +#ifndef HASHX_BLOCK_MODE + r[0] += ctx->keys.v0; + r[1] += ctx->keys.v1; + r[6] += ctx->keys.v2; + r[7] += ctx->keys.v3; +#else + const uint8_t* p = (const uint8_t*)&ctx->params; + r[0] ^= load64(&p[8 * 0]); + r[1] ^= load64(&p[8 * 1]); + r[2] ^= load64(&p[8 * 2]); + r[3] ^= load64(&p[8 * 3]); + r[4] ^= load64(&p[8 * 4]); + r[5] ^= load64(&p[8 * 5]); + r[6] ^= load64(&p[8 * 6]); + r[7] ^= load64(&p[8 * 7]); +#endif + /* 1 SipRound per 4 registers is enough to pass SMHasher. */ + SIPROUND(r[0], r[1], r[2], r[3]); + SIPROUND(r[4], r[5], r[6], r[7]); + + /* output */ +#if HASHX_SIZE > 0 + /* optimized output for hash sizes that are multiples of 8 */ +#if HASHX_SIZE % 8 == 0 + uint8_t* temp_out = (uint8_t*)output; +#if HASHX_SIZE >= 8 + store64(temp_out + 0, r[0] ^ r[4]); +#endif +#if HASHX_SIZE >= 16 + store64(temp_out + 8, r[1] ^ r[5]); +#endif +#if HASHX_SIZE >= 24 + store64(temp_out + 16, r[2] ^ r[6]); +#endif +#if HASHX_SIZE >= 32 + store64(temp_out + 24, r[3] ^ r[7]); +#endif +#else /* any output size */ + uint8_t temp_out[32]; +#if HASHX_SIZE > 0 + store64(temp_out + 0, r[0] ^ r[4]); +#endif +#if HASHX_SIZE > 8 + store64(temp_out + 8, r[1] ^ r[5]); +#endif +#if HASHX_SIZE > 16 + store64(temp_out + 16, r[2] ^ r[6]); +#endif +#if HASHX_SIZE > 24 + store64(temp_out + 24, r[3] ^ r[7]); +#endif + memcpy(output, temp_out, HASHX_SIZE); +#endif +#endif +} diff --git a/src/ext/equix/hashx/src/hashx_endian.h b/src/ext/equix/hashx/src/hashx_endian.h new file mode 100644 index 0000000000..23537cfba5 --- /dev/null +++ b/src/ext/equix/hashx/src/hashx_endian.h @@ -0,0 +1,103 @@ +#ifndef ENDIAN_H +#define ENDIAN_H + +#include <stdint.h> +#include <string.h> +#include "force_inline.h" + +/* Argon2 Team - Begin Code */ +/* + Not an exhaustive list, but should cover the majority of modern platforms + Additionally, the code will always be correct---this is only a performance + tweak. +*/ +#if (defined(__BYTE_ORDER__) && \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ + defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ + defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ + defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ + defined(_M_ARM) +#define NATIVE_LITTLE_ENDIAN +#endif +/* Argon2 Team - End Code */ + +static FORCE_INLINE uint32_t load32(const void* src) { +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t* p = (const uint8_t*)src; + uint32_t w = *p++; + w |= (uint32_t)(*p++) << 8; + w |= (uint32_t)(*p++) << 16; + w |= (uint32_t)(*p++) << 24; + return w; +#endif +} + +static FORCE_INLINE uint64_t load64_native(const void* src) { + uint64_t w; + memcpy(&w, src, sizeof w); + return w; +} + +static FORCE_INLINE uint64_t load64(const void* src) { +#if defined(NATIVE_LITTLE_ENDIAN) + return load64_native(src); +#else + const uint8_t* p = (const uint8_t*)src; + uint64_t w = *p++; + w |= (uint64_t)(*p++) << 8; + w |= (uint64_t)(*p++) << 16; + w |= (uint64_t)(*p++) << 24; + w |= (uint64_t)(*p++) << 32; + w |= (uint64_t)(*p++) << 40; + w |= (uint64_t)(*p++) << 48; + w |= (uint64_t)(*p++) << 56; + return w; +#endif +} + +static FORCE_INLINE void store32(void* dst, uint32_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t* p = (uint8_t*)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +static FORCE_INLINE void store64_native(void* dst, uint64_t w) { + memcpy(dst, &w, sizeof w); +} + +static FORCE_INLINE void store64(void* dst, uint64_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + store64_native(dst, w); +#else + uint8_t* p = (uint8_t*)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} +#endif diff --git a/src/ext/equix/hashx/src/hashx_thread.c b/src/ext/equix/hashx/src/hashx_thread.c new file mode 100644 index 0000000000..6618378e66 --- /dev/null +++ b/src/ext/equix/hashx/src/hashx_thread.c @@ -0,0 +1,27 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include "hashx_thread.h" + +hashx_thread hashx_thread_create(hashx_thread_func* func, void* args) { +#ifdef HASHX_WIN + return CreateThread(NULL, 0, func, args, 0, NULL); +#else + hashx_thread thread; + if (pthread_create(&thread, NULL, func, args) != 0) + { + thread = 0; + } + return thread; +#endif +} + +void hashx_thread_join(hashx_thread thread) { +#ifdef HASHX_WIN + WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); +#else + void* retval; + pthread_join(thread, &retval); +#endif +} diff --git a/src/ext/equix/hashx/src/hashx_thread.h b/src/ext/equix/hashx/src/hashx_thread.h new file mode 100644 index 0000000000..01556626d8 --- /dev/null +++ b/src/ext/equix/hashx/src/hashx_thread.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef HASHX_THREAD_H +#define HASHX_THREAD_H + +#include <hashx.h> + +#ifdef HASHX_WIN +#include <Windows.h> +typedef HANDLE hashx_thread; +typedef DWORD hashx_thread_retval; +#define HASHX_THREAD_SUCCESS 0 +#else +#include <pthread.h> +typedef pthread_t hashx_thread; +typedef void* hashx_thread_retval; +#define HASHX_THREAD_SUCCESS NULL +#endif + +typedef hashx_thread_retval hashx_thread_func(void* args); + +hashx_thread hashx_thread_create(hashx_thread_func* func, void* args); + +void hashx_thread_join(hashx_thread thread); + +#endif diff --git a/src/ext/equix/hashx/src/hashx_time.c b/src/ext/equix/hashx/src/hashx_time.c new file mode 100644 index 0000000000..d3eaaed10e --- /dev/null +++ b/src/ext/equix/hashx/src/hashx_time.c @@ -0,0 +1,35 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include "hashx_time.h" +#include <hashx.h> + +#if defined(HASHX_WIN) +#include <windows.h> +#else +#include <sys/time.h> +#endif + +double hashx_time() { +#ifdef HASHX_WIN + static double freq = 0; + if (freq == 0) { + LARGE_INTEGER freq_long; + if (!QueryPerformanceFrequency(&freq_long)) { + return 0; + } + freq = freq_long.QuadPart; + } + LARGE_INTEGER time; + if (!QueryPerformanceCounter(&time)) { + return 0; + } + return time.QuadPart / freq; +#else + struct timeval time; + if (gettimeofday(&time, NULL) != 0) { + return 0; + } + return (double)time.tv_sec + (double)time.tv_usec * 1.0e-6; +#endif +} diff --git a/src/ext/equix/hashx/src/hashx_time.h b/src/ext/equix/hashx/src/hashx_time.h new file mode 100644 index 0000000000..da1ca746fc --- /dev/null +++ b/src/ext/equix/hashx/src/hashx_time.h @@ -0,0 +1,9 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef HASHX_TIME_H +#define HASHX_TIME_H + +double hashx_time(void); + +#endif diff --git a/src/ext/equix/hashx/src/instruction.h b/src/ext/equix/hashx/src/instruction.h new file mode 100644 index 0000000000..f17582ffea --- /dev/null +++ b/src/ext/equix/hashx/src/instruction.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef INSTRUCTION_H +#define INSTRUCTION_H + +#include <stdint.h> + +typedef enum instr_type { + INSTR_UMULH_R, /* unsigned high multiplication by a register */ + INSTR_SMULH_R, /* signed high multiplication by a register */ + INSTR_MUL_R, /* multiplication by a register */ + INSTR_SUB_R, /* subtraction of a register */ + INSTR_XOR_R, /* xor with a register */ + INSTR_ADD_RS, /* addition of a shifted register */ + INSTR_ROR_C, /* rotation by a constant */ + INSTR_ADD_C, /* addition of a constant */ + INSTR_XOR_C, /* xor with a constant */ + INSTR_TARGET, /* branch instruction target */ + INSTR_BRANCH, /* conditional branch */ +} instr_type; + +typedef struct instruction { + instr_type opcode; + int src; + int dst; + uint32_t imm32; + uint32_t op_par; +} instruction; + +#endif diff --git a/src/ext/equix/hashx/src/program.c b/src/ext/equix/hashx/src/program.c new file mode 100644 index 0000000000..7f4b0cef0a --- /dev/null +++ b/src/ext/equix/hashx/src/program.c @@ -0,0 +1,771 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "program.h" +#include "unreachable.h" +#include "siphash_rng.h" + +/* instructions are generated until this CPU cycle */ +#define TARGET_CYCLE 192 + +/* requirements for the program to be acceptable */ +#define REQUIREMENT_SIZE 512 +#define REQUIREMENT_MUL_COUNT 192 +#define REQUIREMENT_LATENCY 195 + +/* R5 (x86 = r13) register cannot be used as the destination of INSTR_ADD_RS */ +#define REGISTER_NEEDS_DISPLACEMENT 5 + +#define PORT_MAP_SIZE (TARGET_CYCLE + 4) +#define NUM_PORTS 3 +#define MAX_RETRIES 1 +#define LOG2_BRANCH_PROB 4 +#define BRANCH_MASK 0x80000000 + +#define TRACE false +#define TRACE_PRINT(...) do { if (TRACE) printf(__VA_ARGS__); } while (false) + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +/* If the instruction is a multiplication. */ +static inline bool is_mul(instr_type type) { + return type <= INSTR_MUL_R; +} + +/* If the instruction is a 64x64->128 bit multiplication. */ +static inline bool is_wide_mul(instr_type type) { + return type < INSTR_MUL_R; +} + +/* Ivy Bridge integer execution ports: P0, P1, P5 */ +typedef enum execution_port { + PORT_NONE = 0, + PORT_P0 = 1, + PORT_P1 = 2, + PORT_P5 = 4, + PORT_P01 = PORT_P0 | PORT_P1, + PORT_P05 = PORT_P0 | PORT_P5, + PORT_P015 = PORT_P0 | PORT_P1 | PORT_P5 +} execution_port; + +static const char* execution_port_names[] = { + "PORT_NONE", "PORT_P0", "PORT_P1", "PORT_P01", "PORT_P5", "PORT_P05", "PORT_P15", "PORT_P015" +}; + +typedef struct instr_template { + instr_type type; /* instruction type */ + const char* x86_asm; /* x86 assembly */ + int x86_size; /* x86 code size */ + int latency; /* latency in cycles */ + execution_port uop1; /* ex. ports for the 1st uop */ + execution_port uop2; /* ex. ports for the 2nd uop */ + uint32_t immediate_mask; /* mask for imm32 */ + instr_type group; /* instruction group */ + bool imm_can_be_0; /* if imm32 can be zero */ + bool distinct_dst; /* if dst and src must be distinct */ + bool op_par_src; /* operation parameter is equal to src */ + bool has_src; /* if the instruction has a source operand */ + bool has_dst; /* if the instr. has a destination operand */ +} instr_template; + +typedef struct register_info { + int latency; /* cycle when the register value will be ready */ + instr_type last_op; /* last op applied to the register */ + int last_op_par; /* parameter of the last op (-1 = constant) */ +} register_info; + +typedef struct program_item { + const instr_template** templates; + uint32_t mask0; + uint32_t mask1; + bool duplicates; +} program_item; + +typedef struct generator_ctx { + int cycle; + int sub_cycle; + int mul_count; + bool chain_mul; + int latency; + siphash_rng gen; + register_info registers[8]; + execution_port ports[PORT_MAP_SIZE][NUM_PORTS]; +} generator_ctx; + +const static instr_template tpl_umulh_r = { + .type = INSTR_UMULH_R, + .x86_asm = "mul r", + .x86_size = 9, /* mov, mul, mov */ + .latency = 4, + .uop1 = PORT_P1, + .uop2 = PORT_P5, + .immediate_mask = 0, + .group = INSTR_UMULH_R, + .imm_can_be_0 = false, + .distinct_dst = false, + .op_par_src = false, + .has_src = true, + .has_dst = true, +}; + +const static instr_template tpl_smulh_r = { + .type = INSTR_SMULH_R, + .x86_asm = "imul r", + .x86_size = 9, /* mov, mul, mov */ + .latency = 4, + .uop1 = PORT_P1, + .uop2 = PORT_P5, + .immediate_mask = 0, + .group = INSTR_SMULH_R, + .imm_can_be_0 = false, + .distinct_dst = false, + .op_par_src = false, + .has_src = true, + .has_dst = true, +}; + +const static instr_template tpl_mul_r = { + .type = INSTR_MUL_R, + .x86_asm = "imul r,r", + .x86_size = 4, + .latency = 3, + .uop1 = PORT_P1, + .uop2 = PORT_NONE, + .immediate_mask = 0, + .group = INSTR_MUL_R, + .imm_can_be_0 = false, + .distinct_dst = true, + .op_par_src = true, + .has_src = true, + .has_dst = true, +}; + +const static instr_template tpl_sub_r = { + .type = INSTR_SUB_R, + .x86_asm = "sub r,r", + .x86_size = 3, + .latency = 1, + .uop1 = PORT_P015, + .uop2 = PORT_NONE, + .immediate_mask = 0, + .group = INSTR_ADD_RS, + .imm_can_be_0 = false, + .distinct_dst = true, + .op_par_src = true, + .has_src = true, + .has_dst = true, +}; + +const static instr_template tpl_xor_r = { + .type = INSTR_XOR_R, + .x86_asm = "xor r,r", + .x86_size = 3, + .latency = 1, + .uop1 = PORT_P015, + .uop2 = PORT_NONE, + .immediate_mask = 0, + .group = INSTR_XOR_R, + .imm_can_be_0 = false, + .distinct_dst = true, + .op_par_src = true, + .has_src = true, + .has_dst = true, +}; + +const static instr_template tpl_add_rs = { + .type = INSTR_ADD_RS, + .x86_asm = "lea r,r+r*s", + .x86_size = 4, + .latency = 1, + .uop1 = PORT_P01, + .uop2 = PORT_NONE, + .immediate_mask = 3, + .group = INSTR_ADD_RS, + .imm_can_be_0 = true, + .distinct_dst = true, + .op_par_src = true, + .has_src = true, + .has_dst = true, +}; + +const static instr_template tpl_ror_c = { + .type = INSTR_ROR_C, + .x86_asm = "ror r,i", + .x86_size = 4, + .latency = 1, + .uop1 = PORT_P05, + .uop2 = PORT_NONE, + .immediate_mask = 63, + .group = INSTR_ROR_C, + .imm_can_be_0 = false, + .distinct_dst = true, + .op_par_src = false, + .has_src = false, + .has_dst = true, +}; + +const static instr_template tpl_add_c = { + .type = INSTR_ADD_C, + .x86_asm = "add r,i", + .x86_size = 7, + .latency = 1, + .uop1 = PORT_P015, + .uop2 = PORT_NONE, + .immediate_mask = UINT32_MAX, + .group = INSTR_ADD_C, + .imm_can_be_0 = false, + .distinct_dst = true, + .op_par_src = false, + .has_src = false, + .has_dst = true, +}; + +const static instr_template tpl_xor_c = { + .type = INSTR_XOR_C, + .x86_asm = "xor r,i", + .x86_size = 7, + .latency = 1, + .uop1 = PORT_P015, + .uop2 = PORT_NONE, + .immediate_mask = UINT32_MAX, + .group = INSTR_XOR_C, + .imm_can_be_0 = false, + .distinct_dst = true, + .op_par_src = false, + .has_src = false, + .has_dst = true, +}; + + +const static instr_template tpl_target = { + .type = INSTR_TARGET, + .x86_asm = "cmovz esi, edi", + .x86_size = 5, /* test, cmovz */ + .latency = 1, + .uop1 = PORT_P015, + .uop2 = PORT_P015, + .immediate_mask = 0, + .group = INSTR_TARGET, + .imm_can_be_0 = false, + .distinct_dst = true, + .op_par_src = false, + .has_src = false, + .has_dst = false, +}; + +const static instr_template tpl_branch = { + .type = INSTR_BRANCH, + .x86_asm = "jz target", + .x86_size = 10, /* or, test, jz */ + .latency = 1, + .uop1 = PORT_P015, + .uop2 = PORT_P015, + .immediate_mask = BRANCH_MASK, + .group = INSTR_BRANCH, + .imm_can_be_0 = false, + .distinct_dst = true, + .op_par_src = false, + .has_src = false, + .has_dst = false, +}; + +const static instr_template* instr_lookup[] = { + &tpl_ror_c, + &tpl_xor_c, + &tpl_add_c, + &tpl_add_c, + &tpl_sub_r, + &tpl_xor_r, + &tpl_xor_c, + &tpl_add_rs, +}; + +const static instr_template* wide_mul_lookup[] = { + &tpl_smulh_r, + &tpl_umulh_r +}; + +const static instr_template* mul_lookup = &tpl_mul_r; +const static instr_template* target_lookup = &tpl_target; +const static instr_template* branch_lookup = &tpl_branch; + +const static program_item item_mul = { + .templates = &mul_lookup, + .mask0 = 0, + .mask1 = 0, + .duplicates = true +}; + +const static program_item item_target = { + .templates = &target_lookup, + .mask0 = 0, + .mask1 = 0, + .duplicates = true +}; + +const static program_item item_branch = { + .templates = &branch_lookup, + .mask0 = 0, + .mask1 = 0, + .duplicates = true +}; + +const static program_item item_wide_mul = { + .templates = wide_mul_lookup, + .mask0 = 1, + .mask1 = 1, + .duplicates = true +}; + +const static program_item item_any = { + .templates = instr_lookup, + .mask0 = 7, + .mask1 = 3, /* instructions that don't need a src register */ + .duplicates = false +}; + +const static program_item* program_layout[] = { + &item_mul, + &item_target, + &item_any, + &item_mul, + &item_any, + &item_any, + &item_mul, + &item_any, + &item_any, + &item_mul, + &item_any, + &item_any, + &item_wide_mul, + &item_any, + &item_any, + &item_mul, + &item_any, + &item_any, + &item_mul, + &item_branch, + &item_any, + &item_mul, + &item_any, + &item_any, + &item_wide_mul, + &item_any, + &item_any, + &item_mul, + &item_any, + &item_any, + &item_mul, + &item_any, + &item_any, + &item_mul, + &item_any, + &item_any, +}; + +static const instr_template* select_template(generator_ctx* ctx, instr_type last_instr, int attempt) { + const program_item* item = program_layout[ctx->sub_cycle % 36]; + const instr_template* tpl; + do { + int index = item->mask0 ? hashx_siphash_rng_u8(&ctx->gen) & (attempt > 0 ? item->mask1 : item->mask0) : 0; + tpl = item->templates[index]; + } while (!item->duplicates && tpl->group == last_instr); + return tpl; +} + +static uint32_t branch_mask(siphash_rng* gen) { + uint32_t mask = 0; + int popcnt = 0; + while (popcnt < LOG2_BRANCH_PROB) { + int bit = hashx_siphash_rng_u8(gen) % 32; + uint32_t bitmask = 1U << bit; + if (!(mask & bitmask)) { + mask |= bitmask; + popcnt++; + } + } + return mask; +} + +static void instr_from_template(const instr_template* tpl, siphash_rng* gen, instruction* instr) { + instr->opcode = tpl->type; + if (tpl->immediate_mask) { + if (tpl->immediate_mask == BRANCH_MASK) { + instr->imm32 = branch_mask(gen); + } + else do { + instr->imm32 = hashx_siphash_rng_u32(gen) & tpl->immediate_mask; + } while (instr->imm32 == 0 && !tpl->imm_can_be_0); + } + if (!tpl->op_par_src) { + if (tpl->distinct_dst) { + instr->op_par = UINT32_MAX; + } + else { + instr->op_par = hashx_siphash_rng_u32(gen); + } + } + if (!tpl->has_src) { + instr->src = -1; + } + if (!tpl->has_dst) { + instr->dst = -1; + } +} + +static bool select_register(int available_regs[8], int regs_count, siphash_rng* gen, int* reg_out) { + if (regs_count == 0) + return false; + + int index; + + if (regs_count > 1) { + index = hashx_siphash_rng_u32(gen) % regs_count; + } + else { + index = 0; + } + *reg_out = available_regs[index]; + return true; +} + +static bool select_destination(const instr_template* tpl, instruction* instr, generator_ctx* ctx, int cycle) { + int available_regs[8]; + int regs_count = 0; + /* Conditions for the destination register: + // * value must be ready at the required cycle + // * cannot be the same as the source register unless the instruction allows it + // - this avoids optimizable instructions such as "xor r, r" or "sub r, r" + // * register cannot be multiplied twice in a row unless chain_mul is true + // - this avoids accumulation of trailing zeroes in registers due to excessive multiplication + // - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator) + // * either the last instruction applied to the register or its source must be different than this instruction + // - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2" + // * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction) */ + for (int i = 0; i < 8; ++i) { + bool available = ctx->registers[i].latency <= cycle; + available &= ((!tpl->distinct_dst) | (i != instr->src)); + available &= (ctx->chain_mul | (tpl->group != INSTR_MUL_R) | (ctx->registers[i].last_op != INSTR_MUL_R)); + available &= ((ctx->registers[i].last_op != tpl->group) | (ctx->registers[i].last_op_par != instr->op_par)); + available &= ((instr->opcode != INSTR_ADD_RS) | (i != REGISTER_NEEDS_DISPLACEMENT)); + available_regs[regs_count] = available ? i : 0; + regs_count += available; + } + return select_register(available_regs, regs_count, &ctx->gen, &instr->dst); +} + +static bool select_source(const instr_template* tpl, instruction* instr, generator_ctx* ctx, int cycle) { + int available_regs[8]; + int regs_count = 0; + /* all registers that are ready at the cycle */ + for (int i = 0; i < 8; ++i) { + if (ctx->registers[i].latency <= cycle) + available_regs[regs_count++] = i; + } + /* if there are only 2 available registers for ADD_RS and one of them is r5, select it as the source because it cannot be the destination */ + if (regs_count == 2 && instr->opcode == INSTR_ADD_RS) { + if (available_regs[0] == REGISTER_NEEDS_DISPLACEMENT || available_regs[1] == REGISTER_NEEDS_DISPLACEMENT) { + instr->op_par = instr->src = REGISTER_NEEDS_DISPLACEMENT; + return true; + } + } + if (select_register(available_regs, regs_count, &ctx->gen, &instr->src)) { + if (tpl->op_par_src) + instr->op_par = instr->src; + return true; + } + return false; +} + +static int schedule_uop(execution_port uop, generator_ctx* ctx, int cycle, bool commit) { + /* The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload + port P1 (multiplication) by instructions that can go to any port. */ + for (; cycle < PORT_MAP_SIZE; ++cycle) { + if ((uop & PORT_P5) && !ctx->ports[cycle][2]) { + if (commit) { + ctx->ports[cycle][2] = uop; + } + TRACE_PRINT("%s scheduled to port P5 at cycle %i (commit = %i)\n", execution_port_names[uop], cycle, commit); + return cycle; + } + if ((uop & PORT_P0) && !ctx->ports[cycle][0]) { + if (commit) { + ctx->ports[cycle][0] = uop; + } + TRACE_PRINT("%s scheduled to port P0 at cycle %i (commit = %i)\n", execution_port_names[uop], cycle, commit); + return cycle; + } + if ((uop & PORT_P1) != 0 && !ctx->ports[cycle][1]) { + if (commit) { + ctx->ports[cycle][1] = uop; + } + TRACE_PRINT("%s scheduled to port P1 at cycle %i (commit = %i)\n", execution_port_names[uop], cycle, commit); + return cycle; + } + } + return -1; +} + +static int schedule_instr(const instr_template* tpl, generator_ctx* ctx, bool commit) { + if (tpl->uop2 == PORT_NONE) { + /* this instruction has only one uOP */ + return schedule_uop(tpl->uop1, ctx, ctx->cycle, commit); + } + else { + /* instructions with 2 uOPs are scheduled conservatively by requiring both uOPs to execute in the same cycle */ + for (int cycle = ctx->cycle; cycle < PORT_MAP_SIZE; ++cycle) { + + int cycle1 = schedule_uop(tpl->uop1, ctx, cycle, false); + int cycle2 = schedule_uop(tpl->uop2, ctx, cycle, false); + + if (cycle1 >= 0 && cycle1 == cycle2) { + if (commit) { + schedule_uop(tpl->uop1, ctx, cycle, true); + schedule_uop(tpl->uop2, ctx, cycle, true); + } + return cycle1; + } + } + } + + return -1; +} + +static void print_registers(const generator_ctx* ctx) { + for (int i = 0; i < 8; ++i) { + printf(" R%i = %i\n", i, ctx->registers[i].latency); + } +} + +bool hashx_program_generate(const siphash_state* key, hashx_program* program) { + generator_ctx ctx = { + .cycle = 0, + .sub_cycle = 0, /* 3 sub-cycles = 1 cycle */ + .mul_count = 0, + .chain_mul = false, + .latency = 0, + .ports = { 0 } + }; + hashx_siphash_rng_init(&ctx.gen, key); + for (int i = 0; i < 8; ++i) { + ctx.registers[i].last_op = -1; + ctx.registers[i].latency = 0; + ctx.registers[i].last_op_par = -1; + } + program->code_size = 0; + + int attempt = 0; + instr_type last_instr = -1; +#ifdef HASHX_PROGRAM_STATS + program->x86_size = 0; +#endif + + while (program->code_size < HASHX_PROGRAM_MAX_SIZE) { + instruction* instr = &program->code[program->code_size]; + TRACE_PRINT("CYCLE: %i/%i\n", ctx.sub_cycle, ctx.cycle); + + /* select an instruction template */ + const instr_template* tpl = select_template(&ctx, last_instr, attempt); + last_instr = tpl->group; + + TRACE_PRINT("Template: %s\n", tpl->x86_asm); + + instr_from_template(tpl, &ctx.gen, instr); + + /* calculate the earliest cycle when this instruction (all of its uOPs) can be scheduled for execution */ + int scheduleCycle = schedule_instr(tpl, &ctx, false); + if (scheduleCycle < 0) { + TRACE_PRINT("Unable to map operation '%s' to execution port (cycle %i)\n", tpl->x86_asm, ctx.cycle); + /* __debugbreak(); */ + break; + } + + ctx.chain_mul = attempt > 0; + + /* find a source register (if applicable) that will be ready when this instruction executes */ + if (tpl->has_src) { + if (!select_source(tpl, instr, &ctx, scheduleCycle)) { + TRACE_PRINT("; src STALL (attempt %i)\n", attempt); + if (attempt++ < MAX_RETRIES) { + continue; + } + if (TRACE) { + printf("; select_source FAILED at cycle %i\n", ctx.cycle); + print_registers(&ctx); + /* __debugbreak(); */ + } + ctx.sub_cycle += 3; + ctx.cycle = ctx.sub_cycle / 3; + attempt = 0; + continue; + } + TRACE_PRINT("; src = r%i\n", instr->src); + } + + /* find a destination register that will be ready when this instruction executes */ + if (tpl->has_dst) { + if (!select_destination(tpl, instr, &ctx, scheduleCycle)) { + TRACE_PRINT("; dst STALL (attempt %i)\n", attempt); + if (attempt++ < MAX_RETRIES) { + continue; + } + if (TRACE) { + printf("; select_destination FAILED at cycle %i\n", ctx.cycle); + print_registers(&ctx); + /* __debugbreak(); */ + } + ctx.sub_cycle += 3; + ctx.cycle = ctx.sub_cycle / 3; + attempt = 0; + continue; + } + TRACE_PRINT("; dst = r%i\n", instr->dst); + } + attempt = 0; + + /* recalculate when the instruction can be scheduled for execution based on operand availability */ + scheduleCycle = schedule_instr(tpl, &ctx, true); + + if (scheduleCycle < 0) { + TRACE_PRINT("Unable to map operation '%s' to execution port (cycle %i)\n", tpl->x86_asm, ctx.cycle); + break; + } + + /* terminating condition */ + if (scheduleCycle >= TARGET_CYCLE) { + break; + } + + if (tpl->has_dst) { + register_info* ri = &ctx.registers[instr->dst]; + int retireCycle = scheduleCycle + tpl->latency; + ri->latency = retireCycle; + ri->last_op = tpl->group; + ri->last_op_par = instr->op_par; + ctx.latency = MAX(retireCycle, ctx.latency); + TRACE_PRINT("; RETIRED at cycle %i\n", retireCycle); + } + + program->code_size++; +#ifdef HASHX_PROGRAM_STATS + program->x86_size += tpl->x86_size; +#endif + + ctx.mul_count += is_mul(instr->opcode); + + ++ctx.sub_cycle; + ctx.sub_cycle += (tpl->uop2 != PORT_NONE); + ctx.cycle = ctx.sub_cycle / 3; + } + +#ifdef HASHX_PROGRAM_STATS + memset(program->asic_latencies, 0, sizeof(program->asic_latencies)); + + program->counter = ctx.gen.counter; + program->wide_mul_count = 0; + program->mul_count = ctx.mul_count; + + /* Calculate ASIC latency: + Assumes 1 cycle latency for all operations and unlimited parallelization. */ + for (int i = 0; i < program->code_size; ++i) { + instruction* instr = &program->code[i]; + if (instr->dst < 0) + continue; + int last_dst = program->asic_latencies[instr->dst] + 1; + int lat_src = instr->dst != instr->src ? program->asic_latencies[instr->src] + 1 : 0; + program->asic_latencies[instr->dst] = MAX(last_dst, lat_src); + program->wide_mul_count += is_wide_mul(instr->opcode); + } + + program->asic_latency = 0; + program->cpu_latency = 0; + for (int i = 0; i < 8; ++i) { + program->asic_latency = MAX(program->asic_latency, program->asic_latencies[i]); + program->cpu_latencies[i] = ctx.registers[i].latency; + program->cpu_latency = MAX(program->cpu_latency, program->cpu_latencies[i]); + } + + program->ipc = program->code_size / (double)program->cpu_latency; + program->branch_count = 0; + memset(program->branches, 0, sizeof(program->branches)); + + if (TRACE) { + printf("; ALU port utilization:\n"); + printf("; (* = in use, _ = idle)\n"); + for (int i = 0; i < PORT_MAP_SIZE; ++i) { + printf("; %3i ", i); + for (int j = 0; j < NUM_PORTS; ++j) { + printf("%c", (ctx.ports[i][j] ? '*' : '_')); + } + printf("\n"); + } + } +#endif + + /* reject programs that don't meet the uniform complexity requirements */ + /* this happens in less than 1 seed out of 10000 */ + return + (program->code_size == REQUIREMENT_SIZE) & + (ctx.mul_count == REQUIREMENT_MUL_COUNT) & + (ctx.latency == REQUIREMENT_LATENCY - 1); /* cycles are numbered from 0 */ +} + +static const char* x86_reg_map[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }; + +void hashx_program_asm_x86(const hashx_program* program) { + int target = 0; + for (unsigned i = 0; i < program->code_size; ++i) { + const instruction* instr = &program->code[i]; + switch (instr->opcode) + { + case INSTR_SUB_R: + printf("sub %s, %s\n", x86_reg_map[instr->dst], x86_reg_map[instr->src]); + break; + case INSTR_XOR_R: + printf("xor %s, %s\n", x86_reg_map[instr->dst], x86_reg_map[instr->src]); + break; + case INSTR_ADD_RS: + printf("lea %s, [%s+%s*%u]\n", x86_reg_map[instr->dst], x86_reg_map[instr->dst], x86_reg_map[instr->src], 1 << instr->imm32); + break; + case INSTR_MUL_R: + printf("imul %s, %s\n", x86_reg_map[instr->dst], x86_reg_map[instr->src]); + break; + case INSTR_ROR_C: + printf("ror %s, %u\n", x86_reg_map[instr->dst], instr->imm32); + break; + case INSTR_ADD_C: + printf("add %s, %i\n", x86_reg_map[instr->dst], instr->imm32); + break; + case INSTR_XOR_C: + printf("xor %s, %i\n", x86_reg_map[instr->dst], instr->imm32); + break; + case INSTR_UMULH_R: + printf("mov rax, %s\n", x86_reg_map[instr->dst]); + printf("mul %s\n", x86_reg_map[instr->src]); + printf("mov %s, rdx\n", x86_reg_map[instr->dst]); + break; + case INSTR_SMULH_R: + printf("mov rax, %s\n", x86_reg_map[instr->dst]); + printf("imul %s\n", x86_reg_map[instr->src]); + printf("mov %s, rdx\n", x86_reg_map[instr->dst]); + break; + case INSTR_TARGET: + printf("test edi, edi\n"); + printf("target_%i: cmovz esi, edi\n", i); + target = i; + break; + case INSTR_BRANCH: + printf("or edx, esi\n"); + printf("test edx, %i\n", instr->imm32); + printf("jz target_%i\n", target); + break; + default: + UNREACHABLE; + } + } +} diff --git a/src/ext/equix/hashx/src/program.h b/src/ext/equix/hashx/src/program.h new file mode 100644 index 0000000000..096cc4ee0a --- /dev/null +++ b/src/ext/equix/hashx/src/program.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef PROGRAM_H +#define PROGRAM_H + +#include <stdint.h> +#include <stdbool.h> +#include <hashx.h> +#include "instruction.h" +#include "siphash.h" +#include "blake2.h" + +#define HASHX_PROGRAM_MAX_SIZE 512 + +typedef struct hashx_program { + instruction code[HASHX_PROGRAM_MAX_SIZE]; + size_t code_size; +#ifdef HASHX_PROGRAM_STATS + unsigned counter; + double ipc; + int x86_size; + int cpu_latency; + int asic_latency; + int mul_count; + int wide_mul_count; + int cpu_latencies[8]; + int asic_latencies[8]; + int branch_count; + int branches[16]; +#endif +} hashx_program; + +#ifdef __cplusplus +extern "C" { +#endif + +HASHX_PRIVATE bool hashx_program_generate(const siphash_state* key, hashx_program* program); + +HASHX_PRIVATE void hashx_program_execute(const hashx_program* program, uint64_t r[8]); + +HASHX_PRIVATE void hashx_program_asm_x86(const hashx_program* program); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/ext/equix/hashx/src/program_exec.c b/src/ext/equix/hashx/src/program_exec.c new file mode 100644 index 0000000000..f8b991e01e --- /dev/null +++ b/src/ext/equix/hashx/src/program_exec.c @@ -0,0 +1,152 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include "program.h" +#include "force_inline.h" +#include "unreachable.h" +#include "siphash.h" +#include "hashx_endian.h" + +#if defined(__SIZEOF_INT128__) +typedef unsigned __int128 uint128_t; +typedef __int128 int128_t; +static FORCE_INLINE uint64_t umulh(uint64_t a, uint64_t b) { + return ((uint128_t)a * b) >> 64; + } +static FORCE_INLINE int64_t smulh(int64_t a, int64_t b) { + return ((int128_t)a * b) >> 64; +} +#define HAVE_UMULH +#define HAVE_SMULH +#endif + +#if defined(_MSC_VER) +#pragma warning (disable : 4146) /* unary minus applied to unsigned type */ +#define HAS_VALUE(X) X ## 0 +#define EVAL_DEFINE(X) HAS_VALUE(X) +#include <intrin.h> +#include <stdlib.h> + +static FORCE_INLINE uint64_t rotr64(uint64_t x, unsigned int c) { + return _rotr64(x, c); +} + +#define HAVE_ROTR + +#if EVAL_DEFINE(__MACHINEARM64_X64(1)) +static FORCE_INLINE uint64_t umulh(uint64_t a, uint64_t b) { + return __umulh(a, b); +} +#define HAVE_UMULH +#endif + +#if EVAL_DEFINE(__MACHINEX64(1)) +static FORCE_INLINE int64_t smulh(int64_t a, int64_t b) { + int64_t hi; + _mul128(a, b, &hi); + return hi; +} +#define HAVE_SMULH +#endif + +#endif + +#ifndef HAVE_ROTR +static FORCE_INLINE uint64_t rotr64(uint64_t a, unsigned int b) { + return (a >> b) | (a << (64 - b)); +} +#define HAVE_ROTR +#endif + +#ifndef HAVE_UMULH +#define LO(x) ((x)&0xffffffff) +#define HI(x) ((x)>>32) +uint64_t umulh(uint64_t a, uint64_t b) { + uint64_t ah = HI(a), al = LO(a); + uint64_t bh = HI(b), bl = LO(b); + uint64_t x00 = al * bl; + uint64_t x01 = al * bh; + uint64_t x10 = ah * bl; + uint64_t x11 = ah * bh; + uint64_t m1 = LO(x10) + LO(x01) + HI(x00); + uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1); + uint64_t m3 = HI(x11) + HI(m2); + + return (m3 << 32) + LO(m2); +} +#undef LO +#undef HI +#define HAVE_UMULH +#endif + +#ifndef HAVE_SMULH +int64_t smulh(int64_t a, int64_t b) { + int64_t hi = umulh(a, b); + if (a < 0LL) hi -= b; + if (b < 0LL) hi -= a; + return hi; +} +#define HAVE_SMULH +#endif + +static FORCE_INLINE uint64_t sign_extend_2s_compl(uint32_t x) { + return (-1 == ~0) ? + (int64_t)(int32_t)(x) : + (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x); +} + +void hashx_program_execute(const hashx_program* program, uint64_t r[8]) { + int target = 0; + bool branch_enable = true; + uint32_t result = 0; + int branch_idx = 0; + for (int i = 0; i < program->code_size; ++i) { + const instruction* instr = &program->code[i]; + switch (instr->opcode) + { + case INSTR_UMULH_R: + result = r[instr->dst] = umulh(r[instr->dst], r[instr->src]); + break; + case INSTR_SMULH_R: + result = r[instr->dst] = smulh(r[instr->dst], r[instr->src]); + break; + case INSTR_MUL_R: + r[instr->dst] *= r[instr->src]; + break; + case INSTR_SUB_R: + r[instr->dst] -= r[instr->src]; + break; + case INSTR_XOR_R: + r[instr->dst] ^= r[instr->src]; + break; + case INSTR_ADD_RS: + r[instr->dst] += r[instr->src] << instr->imm32; + break; + case INSTR_ROR_C: + r[instr->dst] = rotr64(r[instr->dst], instr->imm32); + break; + case INSTR_ADD_C: + r[instr->dst] += sign_extend_2s_compl(instr->imm32); + break; + case INSTR_XOR_C: + r[instr->dst] ^= sign_extend_2s_compl(instr->imm32); + break; + case INSTR_TARGET: + target = i; + break; + case INSTR_BRANCH: + if (branch_enable && (result & instr->imm32) == 0) { + i = target; + branch_enable = false; +#ifdef HASHX_PROGRAM_STATS + ((hashx_program*)program)->branch_count++; + ((hashx_program*)program)->branches[branch_idx]++; +#endif + } + branch_idx++; + break; + default: + UNREACHABLE; + } + } +} diff --git a/src/ext/equix/hashx/src/siphash.c b/src/ext/equix/hashx/src/siphash.c new file mode 100644 index 0000000000..0acfca8814 --- /dev/null +++ b/src/ext/equix/hashx/src/siphash.c @@ -0,0 +1,66 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include "siphash.h" +#include "hashx_endian.h" +#include "unreachable.h" + +uint64_t hashx_siphash13_ctr(uint64_t input, const siphash_state* keys) { + uint64_t v0 = keys->v0; + uint64_t v1 = keys->v1; + uint64_t v2 = keys->v2; + uint64_t v3 = keys->v3; + + v3 ^= input; + + SIPROUND(v0, v1, v2, v3); + + v0 ^= input; + v2 ^= 0xff; + + SIPROUND(v0, v1, v2, v3); + SIPROUND(v0, v1, v2, v3); + SIPROUND(v0, v1, v2, v3); + + return (v0 ^ v1) ^ (v2 ^ v3); +} + +void hashx_siphash24_ctr_state512(const siphash_state* keys, uint64_t input, + uint64_t state_out[8]) { + + uint64_t v0 = keys->v0; + uint64_t v1 = keys->v1; + uint64_t v2 = keys->v2; + uint64_t v3 = keys->v3; + + v1 ^= 0xee; + v3 ^= input; + + SIPROUND(v0, v1, v2, v3); + SIPROUND(v0, v1, v2, v3); + + v0 ^= input; + v2 ^= 0xee; + + SIPROUND(v0, v1, v2, v3); + SIPROUND(v0, v1, v2, v3); + SIPROUND(v0, v1, v2, v3); + SIPROUND(v0, v1, v2, v3); + + state_out[0] = v0; + state_out[1] = v1; + state_out[2] = v2; + state_out[3] = v3; + + v1 ^= 0xdd; + + SIPROUND(v0, v1, v2, v3); + SIPROUND(v0, v1, v2, v3); + SIPROUND(v0, v1, v2, v3); + SIPROUND(v0, v1, v2, v3); + + state_out[4] = v0; + state_out[5] = v1; + state_out[6] = v2; + state_out[7] = v3; +} diff --git a/src/ext/equix/hashx/src/siphash.h b/src/ext/equix/hashx/src/siphash.h new file mode 100644 index 0000000000..bb468402c3 --- /dev/null +++ b/src/ext/equix/hashx/src/siphash.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef SIPHASH_H +#define SIPHASH_H + +#include <stdint.h> +#include <hashx.h> + +#define ROTL(x, b) (((x) << (b)) | ((x) >> (64 - (b)))) +#define SIPROUND(v0, v1, v2, v3) \ + do { \ + v0 += v1; v2 += v3; v1 = ROTL(v1, 13); \ + v3 = ROTL(v3, 16); v1 ^= v0; v3 ^= v2; \ + v0 = ROTL(v0, 32); v2 += v1; v0 += v3; \ + v1 = ROTL(v1, 17); v3 = ROTL(v3, 21); \ + v1 ^= v2; v3 ^= v0; v2 = ROTL(v2, 32); \ + } while (0) + +typedef struct siphash_state { + uint64_t v0, v1, v2, v3; +} siphash_state; + +#ifdef __cplusplus +extern "C" { +#endif + +HASHX_PRIVATE uint64_t hashx_siphash13_ctr(uint64_t input, const siphash_state* keys); +HASHX_PRIVATE void hashx_siphash24_ctr_state512(const siphash_state* keys, uint64_t input, uint64_t state_out[8]); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/ext/equix/hashx/src/siphash_rng.c b/src/ext/equix/hashx/src/siphash_rng.c new file mode 100644 index 0000000000..f1ec23bf47 --- /dev/null +++ b/src/ext/equix/hashx/src/siphash_rng.c @@ -0,0 +1,31 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include "siphash_rng.h" + +void hashx_siphash_rng_init(siphash_rng* gen, const siphash_state* state) { + gen->keys = *state; + gen->counter = 0; + gen->count8 = 0; + gen->count32 = 0; +} + +uint8_t hashx_siphash_rng_u8(siphash_rng* gen) { + if (gen->count8 == 0) { + gen->buffer8 = hashx_siphash13_ctr(gen->counter, &gen->keys); + gen->counter++; + gen->count8 = sizeof(gen->buffer8); + } + gen->count8--; + return gen->buffer8 >> (gen->count8 * 8); +} + +uint32_t hashx_siphash_rng_u32(siphash_rng* gen) { + if (gen->count32 == 0) { + gen->buffer32 = hashx_siphash13_ctr(gen->counter, &gen->keys); + gen->counter++; + gen->count32 = sizeof(gen->buffer32) / sizeof(uint32_t); + } + gen->count32--; + return gen->buffer32 >> (gen->count32 * 32); +} diff --git a/src/ext/equix/hashx/src/siphash_rng.h b/src/ext/equix/hashx/src/siphash_rng.h new file mode 100644 index 0000000000..638b177e06 --- /dev/null +++ b/src/ext/equix/hashx/src/siphash_rng.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef SIPHASH_GENERATOR_H +#define SIPHASH_GENERATOR_H + +#include <stdint.h> +#include <hashx.h> +#include "siphash.h" + +typedef struct siphash_rng { + siphash_state keys; + uint64_t counter; + uint64_t buffer8, buffer32; + unsigned count8, count32; +} siphash_rng; + +#ifdef __cplusplus +extern "C" { +#endif + +HASHX_PRIVATE void hashx_siphash_rng_init(siphash_rng* gen, const siphash_state* state); +HASHX_PRIVATE uint32_t hashx_siphash_rng_u32(siphash_rng* gen); +HASHX_PRIVATE uint8_t hashx_siphash_rng_u8(siphash_rng* gen); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/ext/equix/hashx/src/test_utils.h b/src/ext/equix/hashx/src/test_utils.h new file mode 100644 index 0000000000..54c2f7ec80 --- /dev/null +++ b/src/ext/equix/hashx/src/test_utils.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef TEST_UTILS_H +#define TEST_UTILS_H + +#include <stdio.h> +#include <stdbool.h> +#include <string.h> +#include <stdlib.h> +#include <hashx.h> + +static inline void read_option(const char* option, int argc, char** argv, bool* out) { + for (int i = 0; i < argc; ++i) { + if (strcmp(argv[i], option) == 0) { + *out = true; + return; + } + } + *out = false; +} + +static inline void read_int_option(const char* option, int argc, char** argv, int* out, int default_val) { + for (int i = 0; i < argc - 1; ++i) { + if (strcmp(argv[i], option) == 0 && (*out = atoi(argv[i + 1])) > 0) { + return; + } + } + *out = default_val; +} + +static inline char parse_nibble(char hex) { + hex &= ~0x20; + return (hex & 0x40) ? hex - ('A' - 10) : hex & 0xf; +} + +static inline void hex2bin(const char* in, int length, char* out) { + for (int i = 0; i < length; i += 2) { + char nibble1 = parse_nibble(*in++); + char nibble2 = parse_nibble(*in++); + *out++ = nibble1 << 4 | nibble2; + } +} + +static inline void output_hex(const char* data, int length) { + for (unsigned i = 0; i < length; ++i) + printf("%02x", data[i] & 0xff); +} + +static inline bool hashes_equal(char* a, char* b) { + return memcmp(a, b, HASHX_SIZE) == 0; +} + +static inline bool equals_hex(const void* hash, const char* hex) { + char reference[HASHX_SIZE]; + hex2bin(hex, 2 * HASHX_SIZE, reference); + return memcmp(hash, reference, sizeof(reference)) == 0; +} + +#endif diff --git a/src/ext/equix/hashx/src/tests.c b/src/ext/equix/hashx/src/tests.c new file mode 100644 index 0000000000..f04d8b9d8f --- /dev/null +++ b/src/ext/equix/hashx/src/tests.c @@ -0,0 +1,219 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifdef NDEBUG +#undef NDEBUG +#endif + +#include <assert.h> +#include "test_utils.h" + +typedef bool test_func(); + +static int test_no = 0; + +static hashx_ctx* ctx_int = NULL; +static hashx_ctx* ctx_cmp = NULL; + +static const char seed1[] = "This is a test"; +static const char seed2[] = "Lorem ipsum dolor sit amet"; + +static const uint64_t counter1 = 0; +static const uint64_t counter2 = 123456; +static const uint64_t counter3 = 987654321123456789; + +static const unsigned char long_input[] = { + 0x0b, 0x0b, 0x98, 0xbe, 0xa7, 0xe8, 0x05, 0xe0, 0x01, 0x0a, 0x21, 0x26, + 0xd2, 0x87, 0xa2, 0xa0, 0xcc, 0x83, 0x3d, 0x31, 0x2c, 0xb7, 0x86, 0x38, + 0x5a, 0x7c, 0x2f, 0x9d, 0xe6, 0x9d, 0x25, 0x53, 0x7f, 0x58, 0x4a, 0x9b, + 0xc9, 0x97, 0x7b, 0x00, 0x00, 0x00, 0x00, 0x66, 0x6f, 0xd8, 0x75, 0x3b, + 0xf6, 0x1a, 0x86, 0x31, 0xf1, 0x29, 0x84, 0xe3, 0xfd, 0x44, 0xf4, 0x01, + 0x4e, 0xca, 0x62, 0x92, 0x76, 0x81, 0x7b, 0x56, 0xf3, 0x2e, 0x9b, 0x68, + 0xbd, 0x82, 0xf4, 0x16 +}; + +#define RUN_TEST(x) run_test(#x, &x) + +static void run_test(const char* name, test_func* func) { + printf("[%2i] %-40s ... ", ++test_no, name); + printf(func() ? "PASSED\n" : "SKIPPED\n"); +} + +static bool test_alloc() { + ctx_int = hashx_alloc(HASHX_INTERPRETED); + assert(ctx_int != NULL && ctx_int != HASHX_NOTSUPP); + return true; +} + +static bool test_free() { + hashx_free(ctx_int); + hashx_free(ctx_cmp); + return true; +} + +static bool test_make1() { + int result = hashx_make(ctx_int, seed1, sizeof(seed1)); + assert(result == 1); + return true; +} + +static bool test_hash_ctr1() { +#ifdef HASHX_SALT + return false; +#endif +#ifndef HASHX_BLOCK_MODE + char hash[HASHX_SIZE]; + hashx_exec(ctx_int, counter2, hash); + /* printf("\n"); + output_hex(hash, HASHX_SIZE); + printf("\n"); */ + assert(equals_hex(hash, "aebdd50aa67c93afb82a4c534603b65e46decd584c55161c526ebc099415ccf1")); + return true; +#else + return false; +#endif +} + +static bool test_hash_ctr2() { +#ifdef HASHX_SALT + return false; +#endif +#ifndef HASHX_BLOCK_MODE + char hash[HASHX_SIZE]; + hashx_exec(ctx_int, counter1, hash); + assert(equals_hex(hash, "2b2f54567dcbea98fdb5d5e5ce9a65983c4a4e35ab1464b1efb61e83b7074bb2")); + return true; +#else + return false; +#endif +} + +static bool test_make2() { + int result = hashx_make(ctx_int, seed2, sizeof(seed2)); + assert(result == 1); + return true; +} + +static bool test_hash_ctr3() { +#ifdef HASHX_SALT + return false; +#endif +#ifndef HASHX_BLOCK_MODE + char hash[HASHX_SIZE]; + hashx_exec(ctx_int, counter2, hash); + assert(equals_hex(hash, "ab3d155bf4bbb0aa3a71b7801089826186e44300e6932e6ffd287cf302bbb0ba")); + return true; +#else + return false; +#endif +} + +static bool test_hash_ctr4() { +#ifdef HASHX_SALT + return false; +#endif +#ifndef HASHX_BLOCK_MODE + char hash[HASHX_SIZE]; + hashx_exec(ctx_int, counter3, hash); + assert(equals_hex(hash, "8dfef0497c323274a60d1d93292b68d9a0496379ba407b4341cf868a14d30113")); + return true; +#else + return false; +#endif +} + +static bool test_hash_block1() { +#ifdef HASHX_SALT + return false; +#endif +#ifndef HASHX_BLOCK_MODE + return false; +#else + char hash[HASHX_SIZE]; + hashx_exec(ctx_int, long_input, sizeof(long_input), hash); + assert(equals_hex(hash, "d0b232b832459501ca1ac9dc0429fd931414ead7624a457e375a43ea3e5e737a")); + return true; +#endif +} + +static bool test_alloc_compiler() { + ctx_cmp = hashx_alloc(HASHX_COMPILED); + assert(ctx_cmp != NULL); + return ctx_cmp != HASHX_NOTSUPP; +} + +static bool test_make3() { + if (ctx_cmp == HASHX_NOTSUPP) + return false; + + int result = hashx_make(ctx_cmp, seed2, sizeof(seed2)); + assert(result == 1); + return true; +} + +static bool test_compiler_ctr1() { + if (ctx_cmp == HASHX_NOTSUPP) + return false; + +#ifndef HASHX_BLOCK_MODE + char hash1[HASHX_SIZE]; + char hash2[HASHX_SIZE]; + hashx_exec(ctx_int, counter2, hash1); + hashx_exec(ctx_cmp, counter2, hash2); + assert(hashes_equal(hash1, hash2)); + return true; +#else + return false; +#endif +} + +static bool test_compiler_ctr2() { + if (ctx_cmp == HASHX_NOTSUPP) + return false; + +#ifndef HASHX_BLOCK_MODE + char hash1[HASHX_SIZE]; + char hash2[HASHX_SIZE]; + hashx_exec(ctx_int, counter1, hash1); + hashx_exec(ctx_cmp, counter1, hash2); + assert(hashes_equal(hash1, hash2)); + return true; +#else + return false; +#endif +} + +static bool test_compiler_block1() { + if (ctx_cmp == HASHX_NOTSUPP) + return false; +#ifndef HASHX_BLOCK_MODE + return false; +#else + char hash1[HASHX_SIZE]; + char hash2[HASHX_SIZE]; + hashx_exec(ctx_int, long_input, sizeof(long_input), hash1); + hashx_exec(ctx_cmp, long_input, sizeof(long_input), hash2); + assert(hashes_equal(hash1, hash2)); + return true; +#endif +} + +int main() { + RUN_TEST(test_alloc); + RUN_TEST(test_make1); + RUN_TEST(test_hash_ctr1); + RUN_TEST(test_hash_ctr2); + RUN_TEST(test_make2); + RUN_TEST(test_hash_ctr3); + RUN_TEST(test_hash_ctr4); + RUN_TEST(test_alloc_compiler); + RUN_TEST(test_make3); + RUN_TEST(test_compiler_ctr1); + RUN_TEST(test_compiler_ctr2); + RUN_TEST(test_hash_block1); + RUN_TEST(test_compiler_block1); + RUN_TEST(test_free); + + printf("\nAll tests were successful\n"); + return 0; +} diff --git a/src/ext/equix/hashx/src/unreachable.h b/src/ext/equix/hashx/src/unreachable.h new file mode 100644 index 0000000000..69807fb395 --- /dev/null +++ b/src/ext/equix/hashx/src/unreachable.h @@ -0,0 +1,9 @@ +#ifndef UNREACHABLE +#ifdef __GNUC__ +#define UNREACHABLE __builtin_unreachable() +#elif _MSC_VER +#define UNREACHABLE __assume(0) +#else +#define UNREACHABLE +#endif +#endif diff --git a/src/ext/equix/hashx/src/virtual_memory.c b/src/ext/equix/hashx/src/virtual_memory.c new file mode 100644 index 0000000000..7dc63e1e68 --- /dev/null +++ b/src/ext/equix/hashx/src/virtual_memory.c @@ -0,0 +1,126 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#include "virtual_memory.h" + +#ifdef HASHX_WIN +#include <windows.h> +#else +#ifdef __APPLE__ +#include <mach/vm_statistics.h> +#endif +#include <sys/types.h> +#include <sys/mman.h> +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif +#define PAGE_READONLY PROT_READ +#define PAGE_READWRITE (PROT_READ | PROT_WRITE) +#define PAGE_EXECUTE_READ (PROT_READ | PROT_EXEC) +#define PAGE_EXECUTE_READWRITE (PROT_READ | PROT_WRITE | PROT_EXEC) +#endif + +#ifdef HASHX_WIN + +static int set_privilege(const char* pszPrivilege, BOOL bEnable) { + HANDLE hToken; + TOKEN_PRIVILEGES tp; + BOOL status; + DWORD error; + + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES + | TOKEN_QUERY, &hToken)) + return 0; + + if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid)) + return 0; + + tp.PrivilegeCount = 1; + + if (bEnable) + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + else + tp.Privileges[0].Attributes = 0; + + status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, + (PTOKEN_PRIVILEGES)NULL, 0); + error = GetLastError(); + + CloseHandle(hToken); + + return status && (error == ERROR_SUCCESS); +} +#endif + +void* hashx_vm_alloc(size_t bytes) { + void* mem; +#ifdef HASHX_WIN + mem = VirtualAlloc(NULL, bytes, MEM_COMMIT, PAGE_READWRITE); +#else + mem = mmap(NULL, bytes, PAGE_READWRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (mem == MAP_FAILED) + return NULL; +#endif + return mem; +} + +static inline int page_protect(void* ptr, size_t bytes, int rules) { +#ifdef HASHX_WIN + DWORD oldp; + if (!VirtualProtect(ptr, bytes, (DWORD)rules, &oldp)) { + return 0; + } +#else + if (-1 == mprotect(ptr, bytes, rules)) + return 0; +#endif + return 1; +} + +void hashx_vm_rw(void* ptr, size_t bytes) { + page_protect(ptr, bytes, PAGE_READWRITE); +} + +void hashx_vm_rx(void* ptr, size_t bytes) { + page_protect(ptr, bytes, PAGE_EXECUTE_READ); +} + +void* hashx_vm_alloc_huge(size_t bytes) { + void* mem; +#ifdef HASHX_WIN + set_privilege("SeLockMemoryPrivilege", 1); + SIZE_T page_min = GetLargePageMinimum(); + if (page_min > 0) { + mem = VirtualAlloc(NULL, ALIGN_SIZE(bytes, page_min), MEM_COMMIT + | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE); + } + else { + mem = NULL; + } +#else +#ifdef __APPLE__ + mem = mmap(NULL, bytes, PAGE_READWRITE, MAP_PRIVATE | MAP_ANONYMOUS, + VM_FLAGS_SUPERPAGE_SIZE_2MB, 0); +#elif defined(__FreeBSD__) + mem = mmap(NULL, bytes, PAGE_READWRITE, MAP_PRIVATE | MAP_ANONYMOUS + | MAP_ALIGNED_SUPER, -1, 0); +#elif defined(__OpenBSD__) + mem = MAP_FAILED; // OpenBSD does not support huge pages +#else + mem = mmap(NULL, bytes, PAGE_READWRITE, MAP_PRIVATE | MAP_ANONYMOUS + | MAP_HUGETLB | MAP_POPULATE, -1, 0); +#endif + if (mem == MAP_FAILED) { + mem = NULL; + } +#endif + return mem; +} + +void hashx_vm_free(void* ptr, size_t bytes) { +#ifdef HASHX_WIN + VirtualFree(ptr, 0, MEM_RELEASE); +#else + munmap(ptr, bytes); +#endif +} diff --git a/src/ext/equix/hashx/src/virtual_memory.h b/src/ext/equix/hashx/src/virtual_memory.h new file mode 100644 index 0000000000..d08f74dcc6 --- /dev/null +++ b/src/ext/equix/hashx/src/virtual_memory.h @@ -0,0 +1,19 @@ +/* Copyright (c) 2020 tevador <tevador@gmail.com> */ +/* See LICENSE for licensing information */ + +#ifndef VIRTUAL_MEMORY_H +#define VIRTUAL_MEMORY_H + +#include <stdint.h> +#include <stddef.h> +#include <hashx.h> + +#define ALIGN_SIZE(pos, align) ((((pos) - 1) / (align) + 1) * (align)) + +HASHX_PRIVATE void* hashx_vm_alloc(size_t size); +HASHX_PRIVATE void hashx_vm_rw(void* ptr, size_t size); +HASHX_PRIVATE void hashx_vm_rx(void* ptr, size_t size); +HASHX_PRIVATE void* hashx_vm_alloc_huge(size_t size); +HASHX_PRIVATE void hashx_vm_free(void* ptr, size_t size); + +#endif |