aboutsummaryrefslogtreecommitdiff
path: root/src/ext/equix/hashx/src
diff options
context:
space:
mode:
Diffstat (limited to 'src/ext/equix/hashx/src')
-rw-r--r--src/ext/equix/hashx/src/bench.c147
-rw-r--r--src/ext/equix/hashx/src/blake2.c462
-rw-r--r--src/ext/equix/hashx/src/blake2.h73
-rw-r--r--src/ext/equix/hashx/src/compiler.c19
-rw-r--r--src/ext/equix/hashx/src/compiler.h77
-rw-r--r--src/ext/equix/hashx/src/compiler_a64.c164
-rw-r--r--src/ext/equix/hashx/src/compiler_x86.c159
-rw-r--r--src/ext/equix/hashx/src/context.c67
-rw-r--r--src/ext/equix/hashx/src/context.h39
-rw-r--r--src/ext/equix/hashx/src/force_inline.h9
-rw-r--r--src/ext/equix/hashx/src/hashx.c172
-rw-r--r--src/ext/equix/hashx/src/hashx_endian.h103
-rw-r--r--src/ext/equix/hashx/src/hashx_thread.c27
-rw-r--r--src/ext/equix/hashx/src/hashx_thread.h27
-rw-r--r--src/ext/equix/hashx/src/hashx_time.c35
-rw-r--r--src/ext/equix/hashx/src/hashx_time.h9
-rw-r--r--src/ext/equix/hashx/src/instruction.h31
-rw-r--r--src/ext/equix/hashx/src/program.c777
-rw-r--r--src/ext/equix/hashx/src/program.h52
-rw-r--r--src/ext/equix/hashx/src/program_exec.c158
-rw-r--r--src/ext/equix/hashx/src/siphash.c66
-rw-r--r--src/ext/equix/hashx/src/siphash.h35
-rw-r--r--src/ext/equix/hashx/src/siphash_rng.c41
-rw-r--r--src/ext/equix/hashx/src/siphash_rng.h34
-rw-r--r--src/ext/equix/hashx/src/test_utils.h60
-rw-r--r--src/ext/equix/hashx/src/tests.c275
-rw-r--r--src/ext/equix/hashx/src/unreachable.h9
-rw-r--r--src/ext/equix/hashx/src/virtual_memory.c140
-rw-r--r--src/ext/equix/hashx/src/virtual_memory.h23
29 files changed, 3290 insertions, 0 deletions
diff --git a/src/ext/equix/hashx/src/bench.c b/src/ext/equix/hashx/src/bench.c
new file mode 100644
index 0000000000..a440825436
--- /dev/null
+++ b/src/ext/equix/hashx/src/bench.c
@@ -0,0 +1,147 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include "test_utils.h"
+#include "hashx_thread.h"
+#include "hashx_endian.h"
+#include "hashx_time.h"
+#include <assert.h>
+#include <limits.h>
+#include <inttypes.h>
+
+typedef struct worker_job {
+ int id;
+ hashx_thread thread;
+ hashx_ctx* ctx;
+ int64_t total_hashes;
+ uint64_t best_hash;
+ uint64_t threshold;
+ int start;
+ int step;
+ int end;
+ int nonces;
+} worker_job;
+
+static hashx_thread_retval worker(void* args) {
+ worker_job* job = (worker_job*)args;
+ job->total_hashes = 0;
+ job->best_hash = UINT64_MAX;
+ for (int seed = job->start; seed < job->end; seed += job->step) {
+ {
+ hashx_result result = hashx_make(job->ctx, &seed, sizeof(seed));
+ if (result == HASHX_FAIL_SEED) {
+ continue;
+ }
+ if (result == HASHX_FAIL_COMPILE) {
+ printf("Error: not supported. Try with --interpret\n");
+ }
+ assert(result == HASHX_OK);
+ if (result != HASHX_OK)
+ break;
+ }
+ for (int nonce = 0; nonce < job->nonces; ++nonce) {
+ uint8_t hash[HASHX_SIZE] = { 0 };
+ {
+#ifndef HASHX_BLOCK_MODE
+ hashx_result result = hashx_exec(job->ctx, nonce, hash);
+#else
+ hashx_result result = hashx_exec(job->ctx,
+ &nonce, sizeof(nonce), hash);
+#endif
+ assert(result == HASHX_OK);
+ if (result != HASHX_OK)
+ break;
+ }
+ uint64_t hashval = load64(hash);
+ if (hashval < job->best_hash) {
+ job->best_hash = hashval;
+ }
+ if (hashval < job->threshold) {
+ printf("[thread %2i] Hash (%5i, %5i) below threshold:"
+ " ...%02x%02x%02x%02x%02x%02x%02x%02x\n",
+ job->id,
+ seed,
+ nonce,
+ hash[0],
+ hash[1],
+ hash[2],
+ hash[3],
+ hash[4],
+ hash[5],
+ hash[6],
+ hash[7]);
+ }
+ }
+ job->total_hashes += job->nonces;
+ }
+ return HASHX_THREAD_SUCCESS;
+}
+
+int main(int argc, char** argv) {
+ int nonces, seeds, start, diff, threads;
+ bool interpret;
+ read_int_option("--diff", argc, argv, &diff, INT_MAX);
+ read_int_option("--start", argc, argv, &start, 0);
+ read_int_option("--seeds", argc, argv, &seeds, 500);
+ read_int_option("--nonces", argc, argv, &nonces, 65536);
+ read_int_option("--threads", argc, argv, &threads, 1);
+ read_option("--interpret", argc, argv, &interpret);
+ hashx_type ctx_type = HASHX_TYPE_INTERPRETED;
+ if (!interpret) {
+ ctx_type = HASHX_TYPE_COMPILED;
+ }
+ uint64_t best_hash = UINT64_MAX;
+ uint64_t diff_ex = (uint64_t)diff * 1000ULL;
+ uint64_t threshold = UINT64_MAX / diff_ex;
+ int seeds_end = seeds + start;
+ int64_t total_hashes = 0;
+ printf("Interpret: %i, Target diff.: %" PRIu64 ", Threads: %i\n", interpret, diff_ex, threads);
+ printf("Testing seeds %i-%i with %i nonces each ...\n", start, seeds_end - 1, nonces);
+ double time_start, time_end;
+ worker_job* jobs = malloc(sizeof(worker_job) * threads);
+ if (jobs == NULL) {
+ printf("Error: memory allocation failure\n");
+ return 1;
+ }
+ for (int thd = 0; thd < threads; ++thd) {
+ jobs[thd].ctx = hashx_alloc(ctx_type);
+ if (jobs[thd].ctx == NULL) {
+ printf("Error: memory allocation failure\n");
+ return 1;
+ }
+ jobs[thd].id = thd;
+ jobs[thd].start = start + thd;
+ jobs[thd].step = threads;
+ jobs[thd].end = seeds_end;
+ jobs[thd].nonces = nonces;
+ jobs[thd].threshold = threshold;
+ }
+ time_start = hashx_time();
+ if (threads > 1) {
+ for (int thd = 0; thd < threads; ++thd) {
+ jobs[thd].thread = hashx_thread_create(&worker, &jobs[thd]);
+ }
+ for (int thd = 0; thd < threads; ++thd) {
+ hashx_thread_join(jobs[thd].thread);
+ }
+ }
+ else {
+ worker(jobs);
+ }
+ time_end = hashx_time();
+ for (int thd = 0; thd < threads; ++thd) {
+ total_hashes += jobs[thd].total_hashes;
+ if (jobs[thd].best_hash < best_hash) {
+ best_hash = jobs[thd].best_hash;
+ }
+ }
+ double elapsed = time_end - time_start;
+ printf("Total hashes: %" PRIi64 "\n", total_hashes);
+ printf("%f hashes/sec.\n", total_hashes / elapsed);
+ printf("%f seeds/sec.\n", seeds / elapsed);
+ printf("Best hash: ...");
+ output_hex((char*)&best_hash, sizeof(best_hash));
+ printf(" (diff: %" PRIu64 ")\n", UINT64_MAX / best_hash);
+ free(jobs);
+ return 0;
+}
diff --git a/src/ext/equix/hashx/src/blake2.c b/src/ext/equix/hashx/src/blake2.c
new file mode 100644
index 0000000000..916dd9b6ba
--- /dev/null
+++ b/src/ext/equix/hashx/src/blake2.c
@@ -0,0 +1,462 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+/* Original code from Argon2 reference source code package used under CC0
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "blake2.h"
+#include "hashx_endian.h"
+
+static const uint64_t blake2b_IV[8] = {
+ UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
+ UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
+ UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
+ UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
+
+#define BLAKE2_SIGMA_0_0 0
+#define BLAKE2_SIGMA_0_1 1
+#define BLAKE2_SIGMA_0_2 2
+#define BLAKE2_SIGMA_0_3 3
+#define BLAKE2_SIGMA_0_4 4
+#define BLAKE2_SIGMA_0_5 5
+#define BLAKE2_SIGMA_0_6 6
+#define BLAKE2_SIGMA_0_7 7
+#define BLAKE2_SIGMA_0_8 8
+#define BLAKE2_SIGMA_0_9 9
+#define BLAKE2_SIGMA_0_10 10
+#define BLAKE2_SIGMA_0_11 11
+#define BLAKE2_SIGMA_0_12 12
+#define BLAKE2_SIGMA_0_13 13
+#define BLAKE2_SIGMA_0_14 14
+#define BLAKE2_SIGMA_0_15 15
+
+#define BLAKE2_SIGMA_1_0 14
+#define BLAKE2_SIGMA_1_1 10
+#define BLAKE2_SIGMA_1_2 4
+#define BLAKE2_SIGMA_1_3 8
+#define BLAKE2_SIGMA_1_4 9
+#define BLAKE2_SIGMA_1_5 15
+#define BLAKE2_SIGMA_1_6 13
+#define BLAKE2_SIGMA_1_7 6
+#define BLAKE2_SIGMA_1_8 1
+#define BLAKE2_SIGMA_1_9 12
+#define BLAKE2_SIGMA_1_10 0
+#define BLAKE2_SIGMA_1_11 2
+#define BLAKE2_SIGMA_1_12 11
+#define BLAKE2_SIGMA_1_13 7
+#define BLAKE2_SIGMA_1_14 5
+#define BLAKE2_SIGMA_1_15 3
+
+#define BLAKE2_SIGMA_2_0 11
+#define BLAKE2_SIGMA_2_1 8
+#define BLAKE2_SIGMA_2_2 12
+#define BLAKE2_SIGMA_2_3 0
+#define BLAKE2_SIGMA_2_4 5
+#define BLAKE2_SIGMA_2_5 2
+#define BLAKE2_SIGMA_2_6 15
+#define BLAKE2_SIGMA_2_7 13
+#define BLAKE2_SIGMA_2_8 10
+#define BLAKE2_SIGMA_2_9 14
+#define BLAKE2_SIGMA_2_10 3
+#define BLAKE2_SIGMA_2_11 6
+#define BLAKE2_SIGMA_2_12 7
+#define BLAKE2_SIGMA_2_13 1
+#define BLAKE2_SIGMA_2_14 9
+#define BLAKE2_SIGMA_2_15 4
+
+#define BLAKE2_SIGMA_3_0 7
+#define BLAKE2_SIGMA_3_1 9
+#define BLAKE2_SIGMA_3_2 3
+#define BLAKE2_SIGMA_3_3 1
+#define BLAKE2_SIGMA_3_4 13
+#define BLAKE2_SIGMA_3_5 12
+#define BLAKE2_SIGMA_3_6 11
+#define BLAKE2_SIGMA_3_7 14
+#define BLAKE2_SIGMA_3_8 2
+#define BLAKE2_SIGMA_3_9 6
+#define BLAKE2_SIGMA_3_10 5
+#define BLAKE2_SIGMA_3_11 10
+#define BLAKE2_SIGMA_3_12 4
+#define BLAKE2_SIGMA_3_13 0
+#define BLAKE2_SIGMA_3_14 15
+#define BLAKE2_SIGMA_3_15 8
+
+#define BLAKE2_SIGMA_4_0 9
+#define BLAKE2_SIGMA_4_1 0
+#define BLAKE2_SIGMA_4_2 5
+#define BLAKE2_SIGMA_4_3 7
+#define BLAKE2_SIGMA_4_4 2
+#define BLAKE2_SIGMA_4_5 4
+#define BLAKE2_SIGMA_4_6 10
+#define BLAKE2_SIGMA_4_7 15
+#define BLAKE2_SIGMA_4_8 14
+#define BLAKE2_SIGMA_4_9 1
+#define BLAKE2_SIGMA_4_10 11
+#define BLAKE2_SIGMA_4_11 12
+#define BLAKE2_SIGMA_4_12 6
+#define BLAKE2_SIGMA_4_13 8
+#define BLAKE2_SIGMA_4_14 3
+#define BLAKE2_SIGMA_4_15 13
+
+#define BLAKE2_SIGMA_5_0 2
+#define BLAKE2_SIGMA_5_1 12
+#define BLAKE2_SIGMA_5_2 6
+#define BLAKE2_SIGMA_5_3 10
+#define BLAKE2_SIGMA_5_4 0
+#define BLAKE2_SIGMA_5_5 11
+#define BLAKE2_SIGMA_5_6 8
+#define BLAKE2_SIGMA_5_7 3
+#define BLAKE2_SIGMA_5_8 4
+#define BLAKE2_SIGMA_5_9 13
+#define BLAKE2_SIGMA_5_10 7
+#define BLAKE2_SIGMA_5_11 5
+#define BLAKE2_SIGMA_5_12 15
+#define BLAKE2_SIGMA_5_13 14
+#define BLAKE2_SIGMA_5_14 1
+#define BLAKE2_SIGMA_5_15 9
+
+#define BLAKE2_SIGMA_6_0 12
+#define BLAKE2_SIGMA_6_1 5
+#define BLAKE2_SIGMA_6_2 1
+#define BLAKE2_SIGMA_6_3 15
+#define BLAKE2_SIGMA_6_4 14
+#define BLAKE2_SIGMA_6_5 13
+#define BLAKE2_SIGMA_6_6 4
+#define BLAKE2_SIGMA_6_7 10
+#define BLAKE2_SIGMA_6_8 0
+#define BLAKE2_SIGMA_6_9 7
+#define BLAKE2_SIGMA_6_10 6
+#define BLAKE2_SIGMA_6_11 3
+#define BLAKE2_SIGMA_6_12 9
+#define BLAKE2_SIGMA_6_13 2
+#define BLAKE2_SIGMA_6_14 8
+#define BLAKE2_SIGMA_6_15 11
+
+#define BLAKE2_SIGMA_7_0 13
+#define BLAKE2_SIGMA_7_1 11
+#define BLAKE2_SIGMA_7_2 7
+#define BLAKE2_SIGMA_7_3 14
+#define BLAKE2_SIGMA_7_4 12
+#define BLAKE2_SIGMA_7_5 1
+#define BLAKE2_SIGMA_7_6 3
+#define BLAKE2_SIGMA_7_7 9
+#define BLAKE2_SIGMA_7_8 5
+#define BLAKE2_SIGMA_7_9 0
+#define BLAKE2_SIGMA_7_10 15
+#define BLAKE2_SIGMA_7_11 4
+#define BLAKE2_SIGMA_7_12 8
+#define BLAKE2_SIGMA_7_13 6
+#define BLAKE2_SIGMA_7_14 2
+#define BLAKE2_SIGMA_7_15 10
+
+#define BLAKE2_SIGMA_8_0 6
+#define BLAKE2_SIGMA_8_1 15
+#define BLAKE2_SIGMA_8_2 14
+#define BLAKE2_SIGMA_8_3 9
+#define BLAKE2_SIGMA_8_4 11
+#define BLAKE2_SIGMA_8_5 3
+#define BLAKE2_SIGMA_8_6 0
+#define BLAKE2_SIGMA_8_7 8
+#define BLAKE2_SIGMA_8_8 12
+#define BLAKE2_SIGMA_8_9 2
+#define BLAKE2_SIGMA_8_10 13
+#define BLAKE2_SIGMA_8_11 7
+#define BLAKE2_SIGMA_8_12 1
+#define BLAKE2_SIGMA_8_13 4
+#define BLAKE2_SIGMA_8_14 10
+#define BLAKE2_SIGMA_8_15 5
+
+#define BLAKE2_SIGMA_9_0 10
+#define BLAKE2_SIGMA_9_1 2
+#define BLAKE2_SIGMA_9_2 8
+#define BLAKE2_SIGMA_9_3 4
+#define BLAKE2_SIGMA_9_4 7
+#define BLAKE2_SIGMA_9_5 6
+#define BLAKE2_SIGMA_9_6 1
+#define BLAKE2_SIGMA_9_7 5
+#define BLAKE2_SIGMA_9_8 15
+#define BLAKE2_SIGMA_9_9 11
+#define BLAKE2_SIGMA_9_10 9
+#define BLAKE2_SIGMA_9_11 14
+#define BLAKE2_SIGMA_9_12 3
+#define BLAKE2_SIGMA_9_13 12
+#define BLAKE2_SIGMA_9_14 13
+#define BLAKE2_SIGMA_9_15 0
+
+#define BLAKE2_SIGMA_10_0 0
+#define BLAKE2_SIGMA_10_1 1
+#define BLAKE2_SIGMA_10_2 2
+#define BLAKE2_SIGMA_10_3 3
+#define BLAKE2_SIGMA_10_4 4
+#define BLAKE2_SIGMA_10_5 5
+#define BLAKE2_SIGMA_10_6 6
+#define BLAKE2_SIGMA_10_7 7
+#define BLAKE2_SIGMA_10_8 8
+#define BLAKE2_SIGMA_10_9 9
+#define BLAKE2_SIGMA_10_10 10
+#define BLAKE2_SIGMA_10_11 11
+#define BLAKE2_SIGMA_10_12 12
+#define BLAKE2_SIGMA_10_13 13
+#define BLAKE2_SIGMA_10_14 14
+#define BLAKE2_SIGMA_10_15 15
+
+#define BLAKE2_SIGMA_11_0 14
+#define BLAKE2_SIGMA_11_1 10
+#define BLAKE2_SIGMA_11_2 4
+#define BLAKE2_SIGMA_11_3 8
+#define BLAKE2_SIGMA_11_4 9
+#define BLAKE2_SIGMA_11_5 15
+#define BLAKE2_SIGMA_11_6 13
+#define BLAKE2_SIGMA_11_7 6
+#define BLAKE2_SIGMA_11_8 1
+#define BLAKE2_SIGMA_11_9 12
+#define BLAKE2_SIGMA_11_10 0
+#define BLAKE2_SIGMA_11_11 2
+#define BLAKE2_SIGMA_11_12 11
+#define BLAKE2_SIGMA_11_13 7
+#define BLAKE2_SIGMA_11_14 5
+#define BLAKE2_SIGMA_11_15 3
+
+static FORCE_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
+ return (w >> c) | (w << (64 - c));
+}
+
+static FORCE_INLINE void blake2b_set_lastblock(blake2b_state* S) {
+ S->f[0] = (uint64_t)-1;
+}
+
+static FORCE_INLINE void blake2b_increment_counter(blake2b_state* S,
+ uint64_t inc) {
+ S->t[0] += inc;
+ S->t[1] += (S->t[0] < inc);
+}
+
+static FORCE_INLINE void blake2b_init0(blake2b_state* S) {
+ memset(S, 0, sizeof(*S));
+ memcpy(S->h, blake2b_IV, sizeof(S->h));
+}
+
+int hashx_blake2b_init_param(blake2b_state* S, const blake2b_param* P) {
+ const unsigned char* p = (const unsigned char*)P;
+ unsigned int i;
+
+ if (NULL == P || NULL == S) {
+ return -1;
+ }
+
+ blake2b_init0(S);
+ /* IV XOR Parameter Block */
+ for (i = 0; i < 8; ++i) {
+ S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
+ }
+ S->outlen = P->digest_length;
+ return 0;
+}
+
+#define SIGMA(r, k) BLAKE2_SIGMA_ ## r ## _ ## k
+
+#define G(r, i, j, a, b, c, d) \
+ do { \
+ a = a + b + m[SIGMA(r, i)]; \
+ d = rotr64(d ^ a, 32); \
+ c = c + d; \
+ b = rotr64(b ^ c, 24); \
+ a = a + b + m[SIGMA(r, j)]; \
+ d = rotr64(d ^ a, 16); \
+ c = c + d; \
+ b = rotr64(b ^ c, 63); \
+ } while ((void)0, 0)
+
+#define ROUND_INNER(r) \
+ do { \
+ G(r, 0, 1, v[0], v[4], v[8], v[12]); \
+ G(r, 2, 3, v[1], v[5], v[9], v[13]); \
+ G(r, 4, 5, v[2], v[6], v[10], v[14]); \
+ G(r, 6, 7, v[3], v[7], v[11], v[15]); \
+ G(r, 8, 9, v[0], v[5], v[10], v[15]); \
+ G(r, 10, 11, v[1], v[6], v[11], v[12]); \
+ G(r, 12, 13, v[2], v[7], v[8], v[13]); \
+ G(r, 14, 15, v[3], v[4], v[9], v[14]); \
+ } while ((void)0, 0)
+
+#define ROUND(r) ROUND_INNER(r)
+
+static void blake2b_compress(blake2b_state* S, const uint8_t* block) {
+ uint64_t m[16];
+ uint64_t v[16];
+ unsigned int i;
+
+ for (i = 0; i < 16; ++i) {
+ m[i] = load64(block + i * sizeof(m[i]));
+ }
+
+ for (i = 0; i < 8; ++i) {
+ v[i] = S->h[i];
+ }
+
+ v[8] = blake2b_IV[0];
+ v[9] = blake2b_IV[1];
+ v[10] = blake2b_IV[2];
+ v[11] = blake2b_IV[3];
+ v[12] = blake2b_IV[4] ^ S->t[0];
+ v[13] = blake2b_IV[5] ^ S->t[1];
+ v[14] = blake2b_IV[6] ^ S->f[0];
+ v[15] = blake2b_IV[7] ^ S->f[1];
+
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+ ROUND(10);
+ ROUND(11);
+
+ for (i = 0; i < 8; ++i) {
+ S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+ }
+}
+
+static void blake2b_compress_4r(blake2b_state* S, const uint8_t* block) {
+ uint64_t m[16];
+ uint64_t v[16];
+ unsigned int i;
+
+ for (i = 0; i < 16; ++i) {
+ m[i] = load64(block + i * sizeof(m[i]));
+ }
+
+ for (i = 0; i < 8; ++i) {
+ v[i] = S->h[i];
+ }
+
+ v[8] = blake2b_IV[0];
+ v[9] = blake2b_IV[1];
+ v[10] = blake2b_IV[2];
+ v[11] = blake2b_IV[3];
+ v[12] = blake2b_IV[4] ^ S->t[0];
+ v[13] = blake2b_IV[5] ^ S->t[1];
+ v[14] = blake2b_IV[6] ^ S->f[0];
+ v[15] = blake2b_IV[7] ^ S->f[1];
+
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+
+ for (i = 0; i < 8; ++i) {
+ S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+ }
+}
+
+int hashx_blake2b_update(blake2b_state* S, const void* in, size_t inlen) {
+ const uint8_t* pin = (const uint8_t*)in;
+
+ if (inlen == 0) {
+ return 0;
+ }
+
+ /* Sanity check */
+ if (S == NULL || in == NULL) {
+ return -1;
+ }
+
+ /* Is this a reused state? */
+ if (S->f[0] != 0) {
+ return -1;
+ }
+
+ if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
+ /* Complete current block */
+ size_t left = S->buflen;
+ size_t fill = BLAKE2B_BLOCKBYTES - left;
+ memcpy(&S->buf[left], pin, fill);
+ blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+ blake2b_compress(S, S->buf);
+ S->buflen = 0;
+ inlen -= fill;
+ pin += fill;
+ /* Avoid buffer copies when possible */
+ while (inlen > BLAKE2B_BLOCKBYTES) {
+ blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+ blake2b_compress(S, pin);
+ inlen -= BLAKE2B_BLOCKBYTES;
+ pin += BLAKE2B_BLOCKBYTES;
+ }
+ }
+ memcpy(&S->buf[S->buflen], pin, inlen);
+ S->buflen += (unsigned int)inlen;
+ return 0;
+}
+
+int hashx_blake2b_final(blake2b_state* S, void* out, size_t outlen) {
+ uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 };
+ unsigned int i;
+
+ /* Sanity checks */
+ if (S == NULL || out == NULL || outlen < S->outlen) {
+ return -1;
+ }
+
+ /* Is this a reused state? */
+ if (S->f[0] != 0) {
+ return -1;
+ }
+
+ blake2b_increment_counter(S, S->buflen);
+ blake2b_set_lastblock(S);
+ memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
+ blake2b_compress(S, S->buf);
+
+ for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
+ store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
+ }
+
+ memcpy(out, buffer, S->outlen);
+
+ return 0;
+}
+
+/* 4-round version of Blake2b */
+void hashx_blake2b_4r(const blake2b_param* params, const void* in,
+ size_t inlen, void* out) {
+
+ blake2b_state state;
+ const uint8_t* p = (const uint8_t*)params;
+
+ blake2b_init0(&state);
+ /* IV XOR Parameter Block */
+ for (unsigned i = 0; i < 8; ++i) {
+ state.h[i] ^= load64(&p[i * sizeof(state.h[i])]);
+ }
+ //state.outlen = blake_params.digest_length;
+
+ const uint8_t* pin = (const uint8_t*)in;
+
+ while (inlen > BLAKE2B_BLOCKBYTES) {
+ blake2b_increment_counter(&state, BLAKE2B_BLOCKBYTES);
+ blake2b_compress_4r(&state, pin);
+ inlen -= BLAKE2B_BLOCKBYTES;
+ pin += BLAKE2B_BLOCKBYTES;
+ }
+
+ memcpy(state.buf, pin, inlen);
+ blake2b_increment_counter(&state, inlen);
+ blake2b_set_lastblock(&state);
+ blake2b_compress_4r(&state, state.buf);
+
+ /* Output hash */
+ memcpy(out, state.h, sizeof(state.h));
+}
diff --git a/src/ext/equix/hashx/src/blake2.h b/src/ext/equix/hashx/src/blake2.h
new file mode 100644
index 0000000000..ba535b21fb
--- /dev/null
+++ b/src/ext/equix/hashx/src/blake2.h
@@ -0,0 +1,73 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef PORTABLE_BLAKE2_H
+#define PORTABLE_BLAKE2_H
+
+#include <stdint.h>
+#include <limits.h>
+#include <stddef.h>
+#include <hashx.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+enum blake2b_constant {
+ BLAKE2B_BLOCKBYTES = 128,
+ BLAKE2B_OUTBYTES = 64,
+ BLAKE2B_KEYBYTES = 64,
+ BLAKE2B_SALTBYTES = 16,
+ BLAKE2B_PERSONALBYTES = 16
+};
+
+#pragma pack(push, 1)
+typedef struct blake2b_param {
+ uint8_t digest_length; /* 1 */
+ uint8_t key_length; /* 2 */
+ uint8_t fanout; /* 3 */
+ uint8_t depth; /* 4 */
+ uint32_t leaf_length; /* 8 */
+ uint64_t node_offset; /* 16 */
+ uint8_t node_depth; /* 17 */
+ uint8_t inner_length; /* 18 */
+ uint8_t reserved[14]; /* 32 */
+ uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
+ uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
+} blake2b_param;
+#pragma pack(pop)
+
+typedef struct blake2b_state {
+ uint64_t h[8];
+ uint64_t t[2];
+ uint64_t f[2];
+ uint8_t buf[BLAKE2B_BLOCKBYTES];
+ unsigned buflen;
+ unsigned outlen;
+ uint8_t last_node;
+} blake2b_state;
+
+/* Ensure param structs have not been wrongly padded */
+/* Poor man's static_assert */
+enum {
+ blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
+ blake2_size_check_2 =
+ 1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
+};
+
+HASHX_PRIVATE int hashx_blake2b_init_param(blake2b_state* S, const blake2b_param* P);
+HASHX_PRIVATE int hashx_blake2b_update(blake2b_state* S, const void* in, size_t inlen);
+HASHX_PRIVATE int hashx_blake2b_final(blake2b_state* S, void* out, size_t outlen);
+HASHX_PRIVATE void hashx_blake2b_4r(const blake2b_param* P, const void* in, size_t inlen, void* out);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/src/ext/equix/hashx/src/compiler.c b/src/ext/equix/hashx/src/compiler.c
new file mode 100644
index 0000000000..870f3654b6
--- /dev/null
+++ b/src/ext/equix/hashx/src/compiler.c
@@ -0,0 +1,19 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include <stdbool.h>
+
+#include "compiler.h"
+#include "virtual_memory.h"
+#include "program.h"
+#include "context.h"
+
+void hashx_compiler_init(hashx_ctx* ctx) {
+ /* This can fail, but it's uncommon. We report this up the call chain
+ * later, at the same time as an mprotect or similar failure. */
+ ctx->compiler_mem = hashx_vm_alloc(COMP_CODE_SIZE);
+}
+
+void hashx_compiler_destroy(hashx_ctx* ctx) {
+ hashx_vm_free(ctx->compiler_mem, COMP_CODE_SIZE);
+}
diff --git a/src/ext/equix/hashx/src/compiler.h b/src/ext/equix/hashx/src/compiler.h
new file mode 100644
index 0000000000..b7b47dfcb5
--- /dev/null
+++ b/src/ext/equix/hashx/src/compiler.h
@@ -0,0 +1,77 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef COMPILER_H
+#define COMPILER_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <hashx.h>
+#include "virtual_memory.h"
+#include "program.h"
+
+HASHX_PRIVATE bool hashx_compile_x86(const hashx_program* program, uint8_t* code);
+
+HASHX_PRIVATE bool hashx_compile_a64(const hashx_program* program, uint8_t* code);
+
+#if defined(_M_X64) || defined(__x86_64__)
+#define HASHX_COMPILER_X86
+#define hashx_compile(p,c) hashx_compile_x86(p,c)
+#elif defined(__aarch64__)
+#define HASHX_COMPILER_A64
+#define hashx_compile(p,c) hashx_compile_a64(p,c)
+#else
+#define hashx_compile(p,c) (false)
+#endif
+
+HASHX_PRIVATE void hashx_compiler_init(hashx_ctx* compiler);
+HASHX_PRIVATE void hashx_compiler_destroy(hashx_ctx* compiler);
+
+/* Compiled code sizes in bytes:
+ *
+ * Prologue Epilogue MulH Reg-Reg Reg-Imm32 Branch+Tgt MaxInst
+ * X86 69 64 9 3..4 7 15 10 (br)
+ * A64 40 36 4 4 12 24 24 (br)
+ *
+ * Maximum code sizes, assuming an arbitrary instruction mix including unlimited
+ * branch instructions. (Branch size * 512 + prologue + epilogue)
+ *
+ * Max possible code size (any instructions)
+ * X86 5253
+ * A64 12364
+ *
+ * Actual code sizes tend to be much smaller due to the instruction mix chosen
+ * by the program generator. To get a quick overview of the statistics, we
+ * measure the sample mean and sample standard deviation for 1 million random
+ * hash programs:
+ *
+ * Mean Std Deviation 4096 bytes at
+ * X86 2786.4 26.259 49.9 standard deviations
+ * A64 3507.7 58.526 10.1 standard deviations
+ *
+ * If we search for PRNG sequences that maximize generated code size, it's easy
+ * to find aarch64 code that needs in the range of 4100-4300 bytes. On x86, this
+ * search still doesn't turn up programs anywhere close to a full page.
+ *
+ * Anyway, this is all to say that a one-page buffer is fine except for in
+ * extremely rare cases on aarch64, and a two-page buffer is enough for any
+ * behavior we can expect from the program generator under arbitrary input,
+ * but only a 4-page buffer is enough for fully arbitrary instruction streams
+ * on any architecture.
+ *
+ * Let's use a 2-page buffer on aarch64, or 1-page elsewhere.
+ *
+ * Note that the buffer allocation is done by platform-independent code,
+ * so COMP_CODE_SIZE must always have a valid size even on platforms where
+ * it is not actually supported or used.
+ *
+ * If this buffer fills up, compilation will fail with a runtime error.
+ */
+
+#ifdef HASHX_COMPILER_A64
+#define COMP_CODE_SIZE (4096 * 2)
+#else
+#define COMP_CODE_SIZE (4096 * 1)
+#endif
+
+#endif
diff --git a/src/ext/equix/hashx/src/compiler_a64.c b/src/ext/equix/hashx/src/compiler_a64.c
new file mode 100644
index 0000000000..f4efa561b9
--- /dev/null
+++ b/src/ext/equix/hashx/src/compiler_a64.c
@@ -0,0 +1,164 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include <string.h>
+#include <assert.h>
+
+#include "compiler.h"
+#include "program.h"
+#include "virtual_memory.h"
+#include "unreachable.h"
+
+#define EMIT(p,x) do { \
+ memcpy(p, x, sizeof(x)); \
+ p += sizeof(x); \
+ } while (0)
+#define EMIT_U32(p,x) *((uint32_t*)(p)) = x; p += sizeof(uint32_t)
+#define EMIT_IMM32(p,x) \
+ EMIT_U32(p, 0x9280000c | \
+ ((x <= INT32_MAX) << 30) | \
+ (((x <= INT32_MAX) ? (x & 0xFFFF) : (~x & 0xFFFF)) << 5)); \
+ EMIT_U32(p, 0xf2a0000c | \
+ (((x >> 16) & 0xFFFF) << 5));
+
+#ifdef HASHX_COMPILER_A64
+
+/* Largest compiled instruction (BRANCH) */
+#define COMP_MAX_INSTR_SIZE 24
+
+static const uint8_t a64_prologue[] = {
+ 0x07, 0x1c, 0x40, 0xf9, /* ldr x7, [x0, #56] */
+ 0x06, 0x18, 0x40, 0xf9, /* ldr x6, [x0, #48] */
+ 0x05, 0x14, 0x40, 0xf9, /* ldr x5, [x0, #40] */
+ 0x04, 0x10, 0x40, 0xf9, /* ldr x4, [x0, #32] */
+ 0x03, 0x0c, 0x40, 0xf9, /* ldr x3, [x0, #24] */
+ 0x02, 0x08, 0x40, 0xf9, /* ldr x2, [x0, #16] */
+ 0x01, 0x04, 0x40, 0xf9, /* ldr x1, [x0, #8] */
+ 0xe8, 0x03, 0x00, 0xaa, /* mov x8, x0 */
+ 0x00, 0x00, 0x40, 0xf9, /* ldr x0, [x0] */
+ 0xe9, 0x03, 0x1f, 0x2a, /* mov w9, wzr */
+};
+
+static const uint8_t a64_epilogue[] = {
+ 0x00, 0x01, 0x00, 0xf9, /* str x0, [x8] */
+ 0x01, 0x05, 0x00, 0xf9, /* str x1, [x8, #8] */
+ 0x02, 0x09, 0x00, 0xf9, /* str x2, [x8, #16] */
+ 0x03, 0x0d, 0x00, 0xf9, /* str x3, [x8, #24] */
+ 0x04, 0x11, 0x00, 0xf9, /* str x4, [x8, #32] */
+ 0x05, 0x15, 0x00, 0xf9, /* str x5, [x8, #40] */
+ 0x06, 0x19, 0x00, 0xf9, /* str x6, [x8, #48] */
+ 0x07, 0x1d, 0x00, 0xf9, /* str x7, [x8, #56] */
+ 0xc0, 0x03, 0x5f, 0xd6, /* ret */
+};
+
+bool hashx_compile_a64(const hashx_program* program, uint8_t* code) {
+ if (!hashx_vm_rw(code, COMP_CODE_SIZE))
+ return false;
+ uint8_t* pos = code;
+ uint8_t* target = NULL;
+ int creg = -1;
+ EMIT(pos, a64_prologue);
+ for (size_t i = 0; i < program->code_size; ++i) {
+ if (pos + COMP_MAX_INSTR_SIZE > code + COMP_CODE_SIZE)
+ return false;
+ const instruction* instr = &program->code[i];
+ switch (instr->opcode)
+ {
+ case INSTR_UMULH_R:
+ EMIT_U32(pos, 0x9bc07c00 |
+ (instr->src << 16) |
+ (instr->dst << 5) |
+ (instr->dst));
+ if (target != NULL) {
+ creg = instr->dst;
+ }
+ break;
+ case INSTR_SMULH_R:
+ EMIT_U32(pos, 0x9b407c00 |
+ (instr->src << 16) |
+ (instr->dst << 5) |
+ (instr->dst));
+ if (target != NULL) {
+ creg = instr->dst;
+ }
+ break;
+ case INSTR_MUL_R:
+ assert(creg != instr->dst);
+ EMIT_U32(pos, 0x9b007c00 |
+ (instr->src << 16) |
+ (instr->dst << 5) |
+ (instr->dst));
+ break;
+ case INSTR_SUB_R:
+ assert(creg != instr->dst);
+ EMIT_U32(pos, 0xcb000000 |
+ (instr->src << 16) |
+ (instr->dst << 5) |
+ (instr->dst));
+ break;
+ case INSTR_XOR_R:
+ assert(creg != instr->dst);
+ EMIT_U32(pos, 0xca000000 |
+ (instr->src << 16) |
+ (instr->dst << 5) |
+ (instr->dst));
+ break;
+ case INSTR_ADD_RS:
+ assert(creg != instr->dst);
+ EMIT_U32(pos, 0x8b000000 |
+ (instr->src << 16) |
+ (instr->imm32 << 10) |
+ (instr->dst << 5) |
+ (instr->dst));
+ break;
+ case INSTR_ROR_C:
+ assert(creg != instr->dst);
+ EMIT_U32(pos, 0x93c00000 |
+ (instr->dst << 16) |
+ (instr->imm32 << 10) |
+ (instr->dst << 5) |
+ (instr->dst));
+ break;
+ case INSTR_ADD_C:
+ assert(creg != instr->dst);
+ EMIT_IMM32(pos, instr->imm32);
+ EMIT_U32(pos, 0x8b0c0000 |
+ (instr->dst << 5) |
+ (instr->dst));
+ break;
+ case INSTR_XOR_C:
+ assert(creg != instr->dst);
+ EMIT_IMM32(pos, instr->imm32);
+ EMIT_U32(pos, 0xca0c0000 |
+ (instr->dst << 5) |
+ (instr->dst));
+ break;
+ case INSTR_TARGET:
+ target = pos;
+ break;
+ case INSTR_BRANCH:
+ EMIT_IMM32(pos, instr->imm32);
+ EMIT_U32(pos, 0x2a00012b | (creg << 16));
+ EMIT_U32(pos, 0x6a0c017f);
+ EMIT_U32(pos, 0x5a891129);
+ EMIT_U32(pos, 0x54000000 |
+ ((((uint32_t)(target - pos)) >> 2) & 0x7FFFF) << 5);
+ target = NULL;
+ creg = -1;
+ break;
+ default:
+ UNREACHABLE;
+ }
+ }
+ if (pos + sizeof a64_epilogue > code + COMP_CODE_SIZE)
+ return false;
+ EMIT(pos, a64_epilogue);
+ if (!hashx_vm_rx(code, COMP_CODE_SIZE))
+ return false;
+#ifdef __GNUC__
+ __builtin___clear_cache((void*)code, (void*)pos);
+#endif
+ return true;
+}
+
+#endif
diff --git a/src/ext/equix/hashx/src/compiler_x86.c b/src/ext/equix/hashx/src/compiler_x86.c
new file mode 100644
index 0000000000..df4fdbebb8
--- /dev/null
+++ b/src/ext/equix/hashx/src/compiler_x86.c
@@ -0,0 +1,159 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include <string.h>
+
+#include "compiler.h"
+#include "program.h"
+#include "virtual_memory.h"
+#include "unreachable.h"
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define WINABI
+#endif
+
+#define EMIT(p,x) do { \
+ memcpy(p, x, sizeof(x)); \
+ p += sizeof(x); \
+ } while (0)
+#define EMIT_BYTE(p,x) *((p)++) = x
+#define EMIT_U16(p,x) *((uint16_t*)(p)) = x; p += sizeof(uint16_t)
+#define EMIT_U32(p,x) *((uint32_t*)(p)) = x; p += sizeof(uint32_t)
+#define EMIT_U64(p,x) *((uint64_t*)(p)) = x; p += sizeof(uint64_t)
+
+#define GEN_SIB(scale, index, base) ((scale << 6) | (index << 3) | base)
+
+#ifdef HASHX_COMPILER_X86
+
+/* Largest compiled instruction (BRANCH) */
+#define COMP_MAX_INSTR_SIZE 10
+
+static const uint8_t x86_prologue[] = {
+#ifndef WINABI
+ 0x48, 0x89, 0xF9, /* mov rcx, rdi */
+ 0x48, 0x83, 0xEC, 0x20, /* sub rsp, 32 */
+ 0x4C, 0x89, 0x24, 0x24, /* mov qword ptr [rsp+0], r12 */
+ 0x4C, 0x89, 0x6C, 0x24, 0x08, /* mov qword ptr [rsp+8], r13 */
+ 0x4C, 0x89, 0x74, 0x24, 0x10, /* mov qword ptr [rsp+16], r14 */
+ 0x4C, 0x89, 0x7C, 0x24, 0x18, /* mov qword ptr [rsp+24], r15 */
+#else
+ 0x4C, 0x89, 0x64, 0x24, 0x08, /* mov qword ptr [rsp+8], r12 */
+ 0x4C, 0x89, 0x6C, 0x24, 0x10, /* mov qword ptr [rsp+16], r13 */
+ 0x4C, 0x89, 0x74, 0x24, 0x18, /* mov qword ptr [rsp+24], r14 */
+ 0x4C, 0x89, 0x7C, 0x24, 0x20, /* mov qword ptr [rsp+32], r15 */
+ 0x48, 0x83, 0xEC, 0x10, /* sub rsp, 16 */
+ 0x48, 0x89, 0x34, 0x24, /* mov qword ptr [rsp+0], rsi */
+ 0x48, 0x89, 0x7C, 0x24, 0x08, /* mov qword ptr [rsp+8], rdi */
+#endif
+ 0x31, 0xF6, /* xor esi, esi */
+ 0x8D, 0x7E, 0xFF, /* lea edi, [rsi-1] */
+ 0x4C, 0x8B, 0x01, /* mov r8, qword ptr [rcx+0] */
+ 0x4C, 0x8B, 0x49, 0x08, /* mov r9, qword ptr [rcx+8] */
+ 0x4C, 0x8B, 0x51, 0x10, /* mov r10, qword ptr [rcx+16] */
+ 0x4C, 0x8B, 0x59, 0x18, /* mov r11, qword ptr [rcx+24] */
+ 0x4C, 0x8B, 0x61, 0x20, /* mov r12, qword ptr [rcx+32] */
+ 0x4C, 0x8B, 0x69, 0x28, /* mov r13, qword ptr [rcx+40] */
+ 0x4C, 0x8B, 0x71, 0x30, /* mov r14, qword ptr [rcx+48] */
+ 0x4C, 0x8B, 0x79, 0x38 /* mov r15, qword ptr [rcx+56] */
+};
+
+static const uint8_t x86_epilogue[] = {
+ 0x4C, 0x89, 0x01, /* mov qword ptr [rcx+0], r8 */
+ 0x4C, 0x89, 0x49, 0x08, /* mov qword ptr [rcx+8], r9 */
+ 0x4C, 0x89, 0x51, 0x10, /* mov qword ptr [rcx+16], r10 */
+ 0x4C, 0x89, 0x59, 0x18, /* mov qword ptr [rcx+24], r11 */
+ 0x4C, 0x89, 0x61, 0x20, /* mov qword ptr [rcx+32], r12 */
+ 0x4C, 0x89, 0x69, 0x28, /* mov qword ptr [rcx+40], r13 */
+ 0x4C, 0x89, 0x71, 0x30, /* mov qword ptr [rcx+48], r14 */
+ 0x4C, 0x89, 0x79, 0x38, /* mov qword ptr [rcx+56], r15 */
+#ifndef WINABI
+ 0x4C, 0x8B, 0x24, 0x24, /* mov r12, qword ptr [rsp+0] */
+ 0x4C, 0x8B, 0x6C, 0x24, 0x08, /* mov r13, qword ptr [rsp+8] */
+ 0x4C, 0x8B, 0x74, 0x24, 0x10, /* mov r14, qword ptr [rsp+16] */
+ 0x4C, 0x8B, 0x7C, 0x24, 0x18, /* mov r15, qword ptr [rsp+24] */
+ 0x48, 0x83, 0xC4, 0x20, /* add rsp, 32 */
+#else
+ 0x48, 0x8B, 0x34, 0x24, /* mov rsi, qword ptr [rsp+0] */
+ 0x48, 0x8B, 0x7C, 0x24, 0x08, /* mov rdi, qword ptr [rsp+8] */
+ 0x48, 0x83, 0xC4, 0x10, /* add rsp, 16 */
+ 0x4C, 0x8B, 0x64, 0x24, 0x08, /* mov r12, qword ptr [rsp+8] */
+ 0x4C, 0x8B, 0x6C, 0x24, 0x10, /* mov r13, qword ptr [rsp+16] */
+ 0x4C, 0x8B, 0x74, 0x24, 0x18, /* mov r14, qword ptr [rsp+24] */
+ 0x4C, 0x8B, 0x7C, 0x24, 0x20, /* mov r15, qword ptr [rsp+32] */
+#endif
+ 0xC3 /* ret */
+};
+
+bool hashx_compile_x86(const hashx_program* program, uint8_t* code) {
+ if (!hashx_vm_rw(code, COMP_CODE_SIZE))
+ return false;
+ uint8_t* pos = code;
+ uint8_t* target = NULL;
+ EMIT(pos, x86_prologue);
+ for (size_t i = 0; i < program->code_size; ++i) {
+ if (pos + COMP_MAX_INSTR_SIZE > code + COMP_CODE_SIZE)
+ return false;
+ const instruction* instr = &program->code[i];
+ switch (instr->opcode)
+ {
+ case INSTR_UMULH_R:
+ EMIT_U64(pos, 0x8b4ce0f749c08b49 |
+ (((uint64_t)instr->src) << 40) |
+ (((uint64_t)instr->dst) << 16));
+ EMIT_BYTE(pos, 0xc2 + 8 * instr->dst);
+ break;
+ case INSTR_SMULH_R:
+ EMIT_U64(pos, 0x8b4ce8f749c08b49 |
+ (((uint64_t)instr->src) << 40) |
+ (((uint64_t)instr->dst) << 16));
+ EMIT_BYTE(pos, 0xc2 + 8 * instr->dst);
+ break;
+ case INSTR_MUL_R:
+ EMIT_U32(pos, 0xc0af0f4d | (instr->dst << 27) | (instr->src << 24));
+ break;
+ case INSTR_SUB_R:
+ EMIT_U16(pos, 0x2b4d);
+ EMIT_BYTE(pos, 0xc0 | (instr->dst << 3) | instr->src);
+ break;
+ case INSTR_XOR_R:
+ EMIT_U16(pos, 0x334d);
+ EMIT_BYTE(pos, 0xc0 | (instr->dst << 3) | instr->src);
+ break;
+ case INSTR_ADD_RS:
+ EMIT_U32(pos, 0x00048d4f |
+ (instr->dst << 19) |
+ GEN_SIB(instr->imm32, instr->src, instr->dst) << 24);
+ break;
+ case INSTR_ROR_C:
+ EMIT_U32(pos, 0x00c8c149 | (instr->dst << 16) | (instr->imm32 << 24));
+ break;
+ case INSTR_ADD_C:
+ EMIT_U16(pos, 0x8149);
+ EMIT_BYTE(pos, 0xc0 | instr->dst);
+ EMIT_U32(pos, instr->imm32);
+ break;
+ case INSTR_XOR_C:
+ EMIT_U16(pos, 0x8149);
+ EMIT_BYTE(pos, 0xf0 | instr->dst);
+ EMIT_U32(pos, instr->imm32);
+ break;
+ case INSTR_TARGET:
+ target = pos; /* +2 */
+ EMIT_U32(pos, 0x440fff85);
+ EMIT_BYTE(pos, 0xf7);
+ break;
+ case INSTR_BRANCH:
+ EMIT_U64(pos, ((uint64_t)instr->imm32) << 32 | 0xc2f7f209);
+ EMIT_U16(pos, ((target - pos) << 8) | 0x74);
+ break;
+ default:
+ UNREACHABLE;
+ }
+ }
+ if (pos + sizeof x86_epilogue > code + COMP_CODE_SIZE)
+ return false;
+ EMIT(pos, x86_epilogue);
+ return hashx_vm_rx(code, COMP_CODE_SIZE);
+}
+
+#endif
diff --git a/src/ext/equix/hashx/src/context.c b/src/ext/equix/hashx/src/context.c
new file mode 100644
index 0000000000..da1b997e16
--- /dev/null
+++ b/src/ext/equix/hashx/src/context.c
@@ -0,0 +1,67 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <hashx.h>
+#include "context.h"
+#include "compiler.h"
+#include "program.h"
+
+#define STRINGIZE_INNER(x) #x
+#define STRINGIZE(x) STRINGIZE_INNER(x)
+
+/* Salt used when generating hash functions. Useful for domain separation. */
+#ifndef HASHX_SALT
+#define HASHX_SALT HashX v1
+#endif
+
+/* Blake2b params used to generate program keys */
+const blake2b_param hashx_blake2_params = {
+ .digest_length = 64,
+ .key_length = 0,
+ .fanout = 1,
+ .depth = 1,
+ .leaf_length = 0,
+ .node_offset = 0,
+ .node_depth = 0,
+ .inner_length = 0,
+ .reserved = { 0 },
+ .salt = STRINGIZE(HASHX_SALT),
+ .personal = { 0 }
+};
+
+hashx_ctx* hashx_alloc(hashx_type type) {
+ hashx_ctx* ctx = malloc(sizeof(hashx_ctx));
+ if (ctx == NULL)
+ return NULL;
+
+ memset(ctx, 0, sizeof *ctx);
+ ctx->ctx_type = type;
+ if (type == HASHX_TYPE_COMPILED || type == HASHX_TRY_COMPILE) {
+ hashx_compiler_init(ctx);
+ }
+
+#ifdef HASHX_BLOCK_MODE
+ memcpy(&ctx->params, &hashx_blake2_params, 32);
+#endif
+ return ctx;
+}
+
+void hashx_free(hashx_ctx* ctx) {
+ if (ctx != NULL) {
+ hashx_compiler_destroy(ctx);
+ free(ctx);
+ }
+}
+
+#ifdef HASHX_RNG_CALLBACK
+void hashx_rng_callback(hashx_ctx* ctx,
+ void (*callback)(uint64_t*, void*),
+ void* callback_user_data)
+{
+ ctx->program.rng_callback = callback;
+ ctx->program.rng_callback_user_data = callback_user_data;
+}
+#endif
diff --git a/src/ext/equix/hashx/src/context.h b/src/ext/equix/hashx/src/context.h
new file mode 100644
index 0000000000..ad434eb66c
--- /dev/null
+++ b/src/ext/equix/hashx/src/context.h
@@ -0,0 +1,39 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef CONTEXT_H
+#define CONTEXT_H
+
+#include <stdbool.h>
+
+#include "hashx.h"
+#include "blake2.h"
+#include "siphash.h"
+#include "program.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+HASHX_PRIVATE extern const blake2b_param hashx_blake2_params;
+
+#ifdef __cplusplus
+}
+#endif
+
+typedef struct hashx_program hashx_program;
+
+/* HashX context. */
+typedef struct hashx_ctx {
+ uint8_t* compiler_mem;
+ hashx_type ctx_type;
+ hashx_type func_type;
+ hashx_program program;
+#ifndef HASHX_BLOCK_MODE
+ siphash_state keys;
+#else
+ blake2b_param params;
+#endif
+} hashx_ctx;
+
+#endif
diff --git a/src/ext/equix/hashx/src/force_inline.h b/src/ext/equix/hashx/src/force_inline.h
new file mode 100644
index 0000000000..ba3002f0f3
--- /dev/null
+++ b/src/ext/equix/hashx/src/force_inline.h
@@ -0,0 +1,9 @@
+#ifndef FORCE_INLINE
+#if defined(_MSC_VER)
+#define FORCE_INLINE __inline
+#elif defined(__GNUC__) || defined(__clang__)
+#define FORCE_INLINE __inline__
+#else
+#define FORCE_INLINE
+#endif
+#endif
diff --git a/src/ext/equix/hashx/src/hashx.c b/src/ext/equix/hashx/src/hashx.c
new file mode 100644
index 0000000000..36a32fc298
--- /dev/null
+++ b/src/ext/equix/hashx/src/hashx.c
@@ -0,0 +1,172 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <hashx.h>
+#include "blake2.h"
+#include "hashx_endian.h"
+#include "program.h"
+#include "context.h"
+#include "compiler.h"
+
+#if HASHX_SIZE > 32
+#error HASHX_SIZE cannot be more than 32
+#endif
+
+#ifndef HASHX_BLOCK_MODE
+#define HASHX_INPUT_ARGS input
+#else
+#define HASHX_INPUT_ARGS input, size
+#endif
+
+static bool initialize_program(hashx_ctx* ctx, siphash_state keys[2]) {
+ if (!hashx_program_generate(&keys[0], &ctx->program)) {
+ return false;
+ }
+#ifndef HASHX_BLOCK_MODE
+ memcpy(&ctx->keys, &keys[1], 32);
+#else
+ memcpy(&ctx->params.salt, &keys[1], 32);
+#endif
+ return true;
+}
+
+hashx_result hashx_make(hashx_ctx* ctx, const void* seed, size_t size) {
+ assert(ctx != NULL);
+ assert(seed != NULL || size == 0);
+
+ uint8_t keys_bytes[2 * sizeof(siphash_state)];
+ blake2b_state hash_state;
+ hashx_blake2b_init_param(&hash_state, &hashx_blake2_params);
+ hashx_blake2b_update(&hash_state, seed, size);
+ hashx_blake2b_final(&hash_state, keys_bytes, sizeof(keys_bytes));
+
+ siphash_state keys[2];
+ keys[0].v0 = load64(keys_bytes + 0 * sizeof(uint64_t));
+ keys[0].v1 = load64(keys_bytes + 1 * sizeof(uint64_t));
+ keys[0].v2 = load64(keys_bytes + 2 * sizeof(uint64_t));
+ keys[0].v3 = load64(keys_bytes + 3 * sizeof(uint64_t));
+ keys[1].v0 = load64(keys_bytes + 4 * sizeof(uint64_t));
+ keys[1].v1 = load64(keys_bytes + 5 * sizeof(uint64_t));
+ keys[1].v2 = load64(keys_bytes + 6 * sizeof(uint64_t));
+ keys[1].v3 = load64(keys_bytes + 7 * sizeof(uint64_t));
+
+ ctx->func_type = (hashx_type)0;
+ if (!initialize_program(ctx, keys)) {
+ return HASHX_FAIL_SEED;
+ }
+
+ switch (ctx->ctx_type) {
+ case HASHX_TYPE_INTERPRETED:
+ ctx->func_type = HASHX_TYPE_INTERPRETED;
+ return HASHX_OK;
+ case HASHX_TYPE_COMPILED:
+ case HASHX_TRY_COMPILE:
+ if (ctx->compiler_mem != NULL &&
+ hashx_compile(&ctx->program, ctx->compiler_mem)) {
+ ctx->func_type = HASHX_TYPE_COMPILED;
+ return HASHX_OK;
+ }
+ if (ctx->ctx_type == HASHX_TRY_COMPILE) {
+ ctx->func_type = HASHX_TYPE_INTERPRETED;
+ return HASHX_OK;
+ } else {
+ return HASHX_FAIL_COMPILE;
+ }
+ default:
+ return HASHX_FAIL_UNDEFINED;
+ }
+}
+
+hashx_result hashx_query_type(hashx_ctx* ctx, hashx_type *type_out) {
+ assert(ctx != NULL);
+ assert(type_out != NULL);
+
+ if (ctx->func_type == (hashx_type)0) {
+ return HASHX_FAIL_UNPREPARED;
+ }
+ *type_out = ctx->func_type;
+ return HASHX_OK;
+}
+
+hashx_result hashx_exec(const hashx_ctx* ctx, HASHX_INPUT, void* output) {
+ assert(ctx != NULL);
+ assert(output != NULL);
+
+ uint64_t r[8];
+#ifndef HASHX_BLOCK_MODE
+ hashx_siphash24_ctr_state512(&ctx->keys, input, r);
+#else
+ hashx_blake2b_4r(&ctx->params, input, size, r);
+#endif
+
+ if (ctx->func_type == HASHX_TYPE_COMPILED) {
+ typedef void program_func(uint64_t r[8]);
+ assert(ctx->compiler_mem != NULL);
+ ((program_func*)ctx->compiler_mem)(r);
+ } else if (ctx->func_type == HASHX_TYPE_INTERPRETED) {
+ hashx_program_execute(&ctx->program, r);
+ } else {
+ return HASHX_FAIL_UNPREPARED;
+ }
+
+ /* Hash finalization to remove bias toward 0 caused by multiplications */
+#ifndef HASHX_BLOCK_MODE
+ r[0] += ctx->keys.v0;
+ r[1] += ctx->keys.v1;
+ r[6] += ctx->keys.v2;
+ r[7] += ctx->keys.v3;
+#else
+ const uint8_t* p = (const uint8_t*)&ctx->params;
+ r[0] ^= load64(&p[8 * 0]);
+ r[1] ^= load64(&p[8 * 1]);
+ r[2] ^= load64(&p[8 * 2]);
+ r[3] ^= load64(&p[8 * 3]);
+ r[4] ^= load64(&p[8 * 4]);
+ r[5] ^= load64(&p[8 * 5]);
+ r[6] ^= load64(&p[8 * 6]);
+ r[7] ^= load64(&p[8 * 7]);
+#endif
+ /* 1 SipRound per 4 registers is enough to pass SMHasher. */
+ SIPROUND(r[0], r[1], r[2], r[3]);
+ SIPROUND(r[4], r[5], r[6], r[7]);
+
+ /* output */
+#if HASHX_SIZE > 0
+ /* optimized output for hash sizes that are multiples of 8 */
+#if HASHX_SIZE % 8 == 0
+ uint8_t* temp_out = (uint8_t*)output;
+#if HASHX_SIZE >= 8
+ store64(temp_out + 0, r[0] ^ r[4]);
+#endif
+#if HASHX_SIZE >= 16
+ store64(temp_out + 8, r[1] ^ r[5]);
+#endif
+#if HASHX_SIZE >= 24
+ store64(temp_out + 16, r[2] ^ r[6]);
+#endif
+#if HASHX_SIZE >= 32
+ store64(temp_out + 24, r[3] ^ r[7]);
+#endif
+#else /* any output size */
+ uint8_t temp_out[32];
+#if HASHX_SIZE > 0
+ store64(temp_out + 0, r[0] ^ r[4]);
+#endif
+#if HASHX_SIZE > 8
+ store64(temp_out + 8, r[1] ^ r[5]);
+#endif
+#if HASHX_SIZE > 16
+ store64(temp_out + 16, r[2] ^ r[6]);
+#endif
+#if HASHX_SIZE > 24
+ store64(temp_out + 24, r[3] ^ r[7]);
+#endif
+ memcpy(output, temp_out, HASHX_SIZE);
+#endif
+#endif
+ return HASHX_OK;
+}
diff --git a/src/ext/equix/hashx/src/hashx_endian.h b/src/ext/equix/hashx/src/hashx_endian.h
new file mode 100644
index 0000000000..23537cfba5
--- /dev/null
+++ b/src/ext/equix/hashx/src/hashx_endian.h
@@ -0,0 +1,103 @@
+#ifndef ENDIAN_H
+#define ENDIAN_H
+
+#include <stdint.h>
+#include <string.h>
+#include "force_inline.h"
+
+/* Argon2 Team - Begin Code */
+/*
+ Not an exhaustive list, but should cover the majority of modern platforms
+ Additionally, the code will always be correct---this is only a performance
+ tweak.
+*/
+#if (defined(__BYTE_ORDER__) && \
+ (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
+ defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
+ defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
+ defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
+ defined(_M_ARM)
+#define NATIVE_LITTLE_ENDIAN
+#endif
+/* Argon2 Team - End Code */
+
+static FORCE_INLINE uint32_t load32(const void* src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ uint32_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ const uint8_t* p = (const uint8_t*)src;
+ uint32_t w = *p++;
+ w |= (uint32_t)(*p++) << 8;
+ w |= (uint32_t)(*p++) << 16;
+ w |= (uint32_t)(*p++) << 24;
+ return w;
+#endif
+}
+
+static FORCE_INLINE uint64_t load64_native(const void* src) {
+ uint64_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+}
+
+static FORCE_INLINE uint64_t load64(const void* src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ return load64_native(src);
+#else
+ const uint8_t* p = (const uint8_t*)src;
+ uint64_t w = *p++;
+ w |= (uint64_t)(*p++) << 8;
+ w |= (uint64_t)(*p++) << 16;
+ w |= (uint64_t)(*p++) << 24;
+ w |= (uint64_t)(*p++) << 32;
+ w |= (uint64_t)(*p++) << 40;
+ w |= (uint64_t)(*p++) << 48;
+ w |= (uint64_t)(*p++) << 56;
+ return w;
+#endif
+}
+
+static FORCE_INLINE void store32(void* dst, uint32_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ memcpy(dst, &w, sizeof w);
+#else
+ uint8_t* p = (uint8_t*)dst;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+#endif
+}
+
+static FORCE_INLINE void store64_native(void* dst, uint64_t w) {
+ memcpy(dst, &w, sizeof w);
+}
+
+static FORCE_INLINE void store64(void* dst, uint64_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ store64_native(dst, w);
+#else
+ uint8_t* p = (uint8_t*)dst;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+#endif
+}
+#endif
diff --git a/src/ext/equix/hashx/src/hashx_thread.c b/src/ext/equix/hashx/src/hashx_thread.c
new file mode 100644
index 0000000000..6618378e66
--- /dev/null
+++ b/src/ext/equix/hashx/src/hashx_thread.c
@@ -0,0 +1,27 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include "hashx_thread.h"
+
+hashx_thread hashx_thread_create(hashx_thread_func* func, void* args) {
+#ifdef HASHX_WIN
+ return CreateThread(NULL, 0, func, args, 0, NULL);
+#else
+ hashx_thread thread;
+ if (pthread_create(&thread, NULL, func, args) != 0)
+ {
+ thread = 0;
+ }
+ return thread;
+#endif
+}
+
+void hashx_thread_join(hashx_thread thread) {
+#ifdef HASHX_WIN
+ WaitForSingleObject(thread, INFINITE);
+ CloseHandle(thread);
+#else
+ void* retval;
+ pthread_join(thread, &retval);
+#endif
+}
diff --git a/src/ext/equix/hashx/src/hashx_thread.h b/src/ext/equix/hashx/src/hashx_thread.h
new file mode 100644
index 0000000000..01556626d8
--- /dev/null
+++ b/src/ext/equix/hashx/src/hashx_thread.h
@@ -0,0 +1,27 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef HASHX_THREAD_H
+#define HASHX_THREAD_H
+
+#include <hashx.h>
+
+#ifdef HASHX_WIN
+#include <Windows.h>
+typedef HANDLE hashx_thread;
+typedef DWORD hashx_thread_retval;
+#define HASHX_THREAD_SUCCESS 0
+#else
+#include <pthread.h>
+typedef pthread_t hashx_thread;
+typedef void* hashx_thread_retval;
+#define HASHX_THREAD_SUCCESS NULL
+#endif
+
+typedef hashx_thread_retval hashx_thread_func(void* args);
+
+hashx_thread hashx_thread_create(hashx_thread_func* func, void* args);
+
+void hashx_thread_join(hashx_thread thread);
+
+#endif
diff --git a/src/ext/equix/hashx/src/hashx_time.c b/src/ext/equix/hashx/src/hashx_time.c
new file mode 100644
index 0000000000..d3eaaed10e
--- /dev/null
+++ b/src/ext/equix/hashx/src/hashx_time.c
@@ -0,0 +1,35 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include "hashx_time.h"
+#include <hashx.h>
+
+#if defined(HASHX_WIN)
+#include <windows.h>
+#else
+#include <sys/time.h>
+#endif
+
+double hashx_time() {
+#ifdef HASHX_WIN
+ static double freq = 0;
+ if (freq == 0) {
+ LARGE_INTEGER freq_long;
+ if (!QueryPerformanceFrequency(&freq_long)) {
+ return 0;
+ }
+ freq = freq_long.QuadPart;
+ }
+ LARGE_INTEGER time;
+ if (!QueryPerformanceCounter(&time)) {
+ return 0;
+ }
+ return time.QuadPart / freq;
+#else
+ struct timeval time;
+ if (gettimeofday(&time, NULL) != 0) {
+ return 0;
+ }
+ return (double)time.tv_sec + (double)time.tv_usec * 1.0e-6;
+#endif
+}
diff --git a/src/ext/equix/hashx/src/hashx_time.h b/src/ext/equix/hashx/src/hashx_time.h
new file mode 100644
index 0000000000..da1ca746fc
--- /dev/null
+++ b/src/ext/equix/hashx/src/hashx_time.h
@@ -0,0 +1,9 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef HASHX_TIME_H
+#define HASHX_TIME_H
+
+double hashx_time(void);
+
+#endif
diff --git a/src/ext/equix/hashx/src/instruction.h b/src/ext/equix/hashx/src/instruction.h
new file mode 100644
index 0000000000..f17582ffea
--- /dev/null
+++ b/src/ext/equix/hashx/src/instruction.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef INSTRUCTION_H
+#define INSTRUCTION_H
+
+#include <stdint.h>
+
+typedef enum instr_type {
+ INSTR_UMULH_R, /* unsigned high multiplication by a register */
+ INSTR_SMULH_R, /* signed high multiplication by a register */
+ INSTR_MUL_R, /* multiplication by a register */
+ INSTR_SUB_R, /* subtraction of a register */
+ INSTR_XOR_R, /* xor with a register */
+ INSTR_ADD_RS, /* addition of a shifted register */
+ INSTR_ROR_C, /* rotation by a constant */
+ INSTR_ADD_C, /* addition of a constant */
+ INSTR_XOR_C, /* xor with a constant */
+ INSTR_TARGET, /* branch instruction target */
+ INSTR_BRANCH, /* conditional branch */
+} instr_type;
+
+typedef struct instruction {
+ instr_type opcode;
+ int src;
+ int dst;
+ uint32_t imm32;
+ uint32_t op_par;
+} instruction;
+
+#endif
diff --git a/src/ext/equix/hashx/src/program.c b/src/ext/equix/hashx/src/program.c
new file mode 100644
index 0000000000..267ab5956a
--- /dev/null
+++ b/src/ext/equix/hashx/src/program.c
@@ -0,0 +1,777 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "program.h"
+#include "unreachable.h"
+#include "siphash_rng.h"
+
+/* instructions are generated until this CPU cycle */
+#define TARGET_CYCLE 192
+
+/* requirements for the program to be acceptable */
+#define REQUIREMENT_SIZE 512
+#define REQUIREMENT_MUL_COUNT 192
+#define REQUIREMENT_LATENCY 195
+
+/* R5 (x86 = r13) register cannot be used as the destination of INSTR_ADD_RS */
+#define REGISTER_NEEDS_DISPLACEMENT 5
+
+#define PORT_MAP_SIZE (TARGET_CYCLE + 4)
+#define NUM_PORTS 3
+#define MAX_RETRIES 1
+#define LOG2_BRANCH_PROB 4
+#define BRANCH_MASK 0x80000000
+
+#define TRACE false
+#define TRACE_PRINT(...) do { if (TRACE) printf(__VA_ARGS__); } while (false)
+
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+/* If the instruction is a multiplication. */
+static inline bool is_mul(instr_type type) {
+ return type <= INSTR_MUL_R;
+}
+
+#ifdef HASHX_PROGRAM_STATS
+/* If the instruction is a 64x64->128 bit multiplication. */
+static inline bool is_wide_mul(instr_type type) {
+ return type < INSTR_MUL_R;
+}
+#endif
+
+/* Ivy Bridge integer execution ports: P0, P1, P5 */
+typedef enum execution_port {
+ PORT_NONE = 0,
+ PORT_P0 = 1,
+ PORT_P1 = 2,
+ PORT_P5 = 4,
+ PORT_P01 = PORT_P0 | PORT_P1,
+ PORT_P05 = PORT_P0 | PORT_P5,
+ PORT_P015 = PORT_P0 | PORT_P1 | PORT_P5
+} execution_port;
+
+static const char* execution_port_names[] = {
+ "PORT_NONE", "PORT_P0", "PORT_P1", "PORT_P01", "PORT_P5", "PORT_P05", "PORT_P15", "PORT_P015"
+};
+
+typedef struct instr_template {
+ instr_type type; /* instruction type */
+ const char* x86_asm; /* x86 assembly */
+ int x86_size; /* x86 code size */
+ int latency; /* latency in cycles */
+ execution_port uop1; /* ex. ports for the 1st uop */
+ execution_port uop2; /* ex. ports for the 2nd uop */
+ uint32_t immediate_mask; /* mask for imm32 */
+ instr_type group; /* instruction group */
+ bool imm_can_be_0; /* if imm32 can be zero */
+ bool distinct_dst; /* if dst and src must be distinct */
+ bool op_par_src; /* operation parameter is equal to src */
+ bool has_src; /* if the instruction has a source operand */
+ bool has_dst; /* if the instr. has a destination operand */
+} instr_template;
+
+typedef struct register_info {
+ int latency; /* cycle when the register value will be ready */
+ instr_type last_op; /* last op applied to the register */
+ uint32_t last_op_par; /* parameter of the last op (~0 = constant) */
+} register_info;
+
+typedef struct program_item {
+ const instr_template** templates;
+ uint32_t mask0;
+ uint32_t mask1;
+ bool duplicates;
+} program_item;
+
+typedef struct generator_ctx {
+ int cycle;
+ int sub_cycle;
+ int mul_count;
+ bool chain_mul;
+ int latency;
+ siphash_rng gen;
+ register_info registers[8];
+ execution_port ports[PORT_MAP_SIZE][NUM_PORTS];
+} generator_ctx;
+
+static const instr_template tpl_umulh_r = {
+ .type = INSTR_UMULH_R,
+ .x86_asm = "mul r",
+ .x86_size = 9, /* mov, mul, mov */
+ .latency = 4,
+ .uop1 = PORT_P1,
+ .uop2 = PORT_P5,
+ .immediate_mask = 0,
+ .group = INSTR_UMULH_R,
+ .imm_can_be_0 = false,
+ .distinct_dst = false,
+ .op_par_src = false,
+ .has_src = true,
+ .has_dst = true,
+};
+
+static const instr_template tpl_smulh_r = {
+ .type = INSTR_SMULH_R,
+ .x86_asm = "imul r",
+ .x86_size = 9, /* mov, mul, mov */
+ .latency = 4,
+ .uop1 = PORT_P1,
+ .uop2 = PORT_P5,
+ .immediate_mask = 0,
+ .group = INSTR_SMULH_R,
+ .imm_can_be_0 = false,
+ .distinct_dst = false,
+ .op_par_src = false,
+ .has_src = true,
+ .has_dst = true,
+};
+
+static const instr_template tpl_mul_r = {
+ .type = INSTR_MUL_R,
+ .x86_asm = "imul r,r",
+ .x86_size = 4,
+ .latency = 3,
+ .uop1 = PORT_P1,
+ .uop2 = PORT_NONE,
+ .immediate_mask = 0,
+ .group = INSTR_MUL_R,
+ .imm_can_be_0 = false,
+ .distinct_dst = true,
+ .op_par_src = true,
+ .has_src = true,
+ .has_dst = true,
+};
+
+static const instr_template tpl_sub_r = {
+ .type = INSTR_SUB_R,
+ .x86_asm = "sub r,r",
+ .x86_size = 3,
+ .latency = 1,
+ .uop1 = PORT_P015,
+ .uop2 = PORT_NONE,
+ .immediate_mask = 0,
+ .group = INSTR_ADD_RS,
+ .imm_can_be_0 = false,
+ .distinct_dst = true,
+ .op_par_src = true,
+ .has_src = true,
+ .has_dst = true,
+};
+
+static const instr_template tpl_xor_r = {
+ .type = INSTR_XOR_R,
+ .x86_asm = "xor r,r",
+ .x86_size = 3,
+ .latency = 1,
+ .uop1 = PORT_P015,
+ .uop2 = PORT_NONE,
+ .immediate_mask = 0,
+ .group = INSTR_XOR_R,
+ .imm_can_be_0 = false,
+ .distinct_dst = true,
+ .op_par_src = true,
+ .has_src = true,
+ .has_dst = true,
+};
+
+static const instr_template tpl_add_rs = {
+ .type = INSTR_ADD_RS,
+ .x86_asm = "lea r,r+r*s",
+ .x86_size = 4,
+ .latency = 1,
+ .uop1 = PORT_P01,
+ .uop2 = PORT_NONE,
+ .immediate_mask = 3,
+ .group = INSTR_ADD_RS,
+ .imm_can_be_0 = true,
+ .distinct_dst = true,
+ .op_par_src = true,
+ .has_src = true,
+ .has_dst = true,
+};
+
+static const instr_template tpl_ror_c = {
+ .type = INSTR_ROR_C,
+ .x86_asm = "ror r,i",
+ .x86_size = 4,
+ .latency = 1,
+ .uop1 = PORT_P05,
+ .uop2 = PORT_NONE,
+ .immediate_mask = 63,
+ .group = INSTR_ROR_C,
+ .imm_can_be_0 = false,
+ .distinct_dst = true,
+ .op_par_src = false,
+ .has_src = false,
+ .has_dst = true,
+};
+
+static const instr_template tpl_add_c = {
+ .type = INSTR_ADD_C,
+ .x86_asm = "add r,i",
+ .x86_size = 7,
+ .latency = 1,
+ .uop1 = PORT_P015,
+ .uop2 = PORT_NONE,
+ .immediate_mask = UINT32_MAX,
+ .group = INSTR_ADD_C,
+ .imm_can_be_0 = false,
+ .distinct_dst = true,
+ .op_par_src = false,
+ .has_src = false,
+ .has_dst = true,
+};
+
+static const instr_template tpl_xor_c = {
+ .type = INSTR_XOR_C,
+ .x86_asm = "xor r,i",
+ .x86_size = 7,
+ .latency = 1,
+ .uop1 = PORT_P015,
+ .uop2 = PORT_NONE,
+ .immediate_mask = UINT32_MAX,
+ .group = INSTR_XOR_C,
+ .imm_can_be_0 = false,
+ .distinct_dst = true,
+ .op_par_src = false,
+ .has_src = false,
+ .has_dst = true,
+};
+
+
+static const instr_template tpl_target = {
+ .type = INSTR_TARGET,
+ .x86_asm = "cmovz esi, edi",
+ .x86_size = 5, /* test, cmovz */
+ .latency = 1,
+ .uop1 = PORT_P015,
+ .uop2 = PORT_P015,
+ .immediate_mask = 0,
+ .group = INSTR_TARGET,
+ .imm_can_be_0 = false,
+ .distinct_dst = true,
+ .op_par_src = false,
+ .has_src = false,
+ .has_dst = false,
+};
+
+static const instr_template tpl_branch = {
+ .type = INSTR_BRANCH,
+ .x86_asm = "jz target",
+ .x86_size = 10, /* or, test, jz */
+ .latency = 1,
+ .uop1 = PORT_P015,
+ .uop2 = PORT_P015,
+ .immediate_mask = BRANCH_MASK,
+ .group = INSTR_BRANCH,
+ .imm_can_be_0 = false,
+ .distinct_dst = true,
+ .op_par_src = false,
+ .has_src = false,
+ .has_dst = false,
+};
+
+static const instr_template* instr_lookup[] = {
+ &tpl_ror_c,
+ &tpl_xor_c,
+ &tpl_add_c,
+ &tpl_add_c,
+ &tpl_sub_r,
+ &tpl_xor_r,
+ &tpl_xor_c,
+ &tpl_add_rs,
+};
+
+static const instr_template* wide_mul_lookup[] = {
+ &tpl_smulh_r,
+ &tpl_umulh_r
+};
+
+static const instr_template* mul_lookup = &tpl_mul_r;
+static const instr_template* target_lookup = &tpl_target;
+static const instr_template* branch_lookup = &tpl_branch;
+
+static const program_item item_mul = {
+ .templates = &mul_lookup,
+ .mask0 = 0,
+ .mask1 = 0,
+ .duplicates = true
+};
+
+static const program_item item_target = {
+ .templates = &target_lookup,
+ .mask0 = 0,
+ .mask1 = 0,
+ .duplicates = true
+};
+
+static const program_item item_branch = {
+ .templates = &branch_lookup,
+ .mask0 = 0,
+ .mask1 = 0,
+ .duplicates = true
+};
+
+static const program_item item_wide_mul = {
+ .templates = wide_mul_lookup,
+ .mask0 = 1,
+ .mask1 = 1,
+ .duplicates = true
+};
+
+static const program_item item_any = {
+ .templates = instr_lookup,
+ .mask0 = 7,
+ .mask1 = 3, /* instructions that don't need a src register */
+ .duplicates = false
+};
+
+static const program_item* program_layout[] = {
+ &item_mul,
+ &item_target,
+ &item_any,
+ &item_mul,
+ &item_any,
+ &item_any,
+ &item_mul,
+ &item_any,
+ &item_any,
+ &item_mul,
+ &item_any,
+ &item_any,
+ &item_wide_mul,
+ &item_any,
+ &item_any,
+ &item_mul,
+ &item_any,
+ &item_any,
+ &item_mul,
+ &item_branch,
+ &item_any,
+ &item_mul,
+ &item_any,
+ &item_any,
+ &item_wide_mul,
+ &item_any,
+ &item_any,
+ &item_mul,
+ &item_any,
+ &item_any,
+ &item_mul,
+ &item_any,
+ &item_any,
+ &item_mul,
+ &item_any,
+ &item_any,
+};
+
+static const instr_template* select_template(generator_ctx* ctx, instr_type last_instr, int attempt) {
+ const program_item* item = program_layout[ctx->sub_cycle % 36];
+ const instr_template* tpl;
+ do {
+ int index = item->mask0 ? hashx_siphash_rng_u8(&ctx->gen) & (attempt > 0 ? item->mask1 : item->mask0) : 0;
+ tpl = item->templates[index];
+ } while (!item->duplicates && tpl->group == last_instr);
+ return tpl;
+}
+
+static uint32_t branch_mask(siphash_rng* gen) {
+ uint32_t mask = 0;
+ int popcnt = 0;
+ while (popcnt < LOG2_BRANCH_PROB) {
+ int bit = hashx_siphash_rng_u8(gen) % 32;
+ uint32_t bitmask = 1U << bit;
+ if (!(mask & bitmask)) {
+ mask |= bitmask;
+ popcnt++;
+ }
+ }
+ return mask;
+}
+
+static void instr_from_template(const instr_template* tpl, siphash_rng* gen, instruction* instr) {
+ instr->opcode = tpl->type;
+ if (tpl->immediate_mask) {
+ if (tpl->immediate_mask == BRANCH_MASK) {
+ instr->imm32 = branch_mask(gen);
+ }
+ else do {
+ instr->imm32 = hashx_siphash_rng_u32(gen) & tpl->immediate_mask;
+ } while (instr->imm32 == 0 && !tpl->imm_can_be_0);
+ }
+ if (!tpl->op_par_src) {
+ if (tpl->distinct_dst) {
+ instr->op_par = UINT32_MAX;
+ }
+ else {
+ instr->op_par = hashx_siphash_rng_u32(gen);
+ }
+ }
+ if (!tpl->has_src) {
+ instr->src = -1;
+ }
+ if (!tpl->has_dst) {
+ instr->dst = -1;
+ }
+}
+
+static bool select_register(int available_regs[8], int regs_count, siphash_rng* gen, int* reg_out) {
+ if (regs_count == 0)
+ return false;
+
+ int index;
+
+ if (regs_count > 1) {
+ index = hashx_siphash_rng_u32(gen) % regs_count;
+ }
+ else {
+ index = 0;
+ }
+ *reg_out = available_regs[index];
+ return true;
+}
+
+static bool select_destination(const instr_template* tpl, instruction* instr, generator_ctx* ctx, int cycle) {
+ int available_regs[8];
+ int regs_count = 0;
+ /* Conditions for the destination register:
+ // * value must be ready at the required cycle
+ // * cannot be the same as the source register unless the instruction allows it
+ // - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
+ // * register cannot be multiplied twice in a row unless chain_mul is true
+ // - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
+ // - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
+ // * either the last instruction applied to the register or its source must be different than this instruction
+ // - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
+ // * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction) */
+ for (int i = 0; i < 8; ++i) {
+ bool available = ctx->registers[i].latency <= cycle;
+ available &= ((!tpl->distinct_dst) | (i != instr->src));
+ available &= (ctx->chain_mul | (tpl->group != INSTR_MUL_R) | (ctx->registers[i].last_op != INSTR_MUL_R));
+ available &= ((ctx->registers[i].last_op != tpl->group) | (ctx->registers[i].last_op_par != instr->op_par));
+ available &= ((instr->opcode != INSTR_ADD_RS) | (i != REGISTER_NEEDS_DISPLACEMENT));
+ available_regs[regs_count] = available ? i : 0;
+ regs_count += available;
+ }
+ return select_register(available_regs, regs_count, &ctx->gen, &instr->dst);
+}
+
+static bool select_source(const instr_template* tpl, instruction* instr, generator_ctx* ctx, int cycle) {
+ int available_regs[8];
+ int regs_count = 0;
+ /* all registers that are ready at the cycle */
+ for (int i = 0; i < 8; ++i) {
+ if (ctx->registers[i].latency <= cycle)
+ available_regs[regs_count++] = i;
+ }
+ /* if there are only 2 available registers for ADD_RS and one of them is r5, select it as the source because it cannot be the destination */
+ if (regs_count == 2 && instr->opcode == INSTR_ADD_RS) {
+ if (available_regs[0] == REGISTER_NEEDS_DISPLACEMENT || available_regs[1] == REGISTER_NEEDS_DISPLACEMENT) {
+ instr->op_par = instr->src = REGISTER_NEEDS_DISPLACEMENT;
+ return true;
+ }
+ }
+ if (select_register(available_regs, regs_count, &ctx->gen, &instr->src)) {
+ if (tpl->op_par_src)
+ instr->op_par = instr->src;
+ return true;
+ }
+ return false;
+}
+
+static int schedule_uop(execution_port uop, generator_ctx* ctx, int cycle, bool commit) {
+ /* The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
+ port P1 (multiplication) by instructions that can go to any port. */
+ for (; cycle < PORT_MAP_SIZE; ++cycle) {
+ if ((uop & PORT_P5) && !ctx->ports[cycle][2]) {
+ if (commit) {
+ ctx->ports[cycle][2] = uop;
+ }
+ TRACE_PRINT("%s scheduled to port P5 at cycle %i (commit = %i)\n", execution_port_names[uop], cycle, commit);
+ return cycle;
+ }
+ if ((uop & PORT_P0) && !ctx->ports[cycle][0]) {
+ if (commit) {
+ ctx->ports[cycle][0] = uop;
+ }
+ TRACE_PRINT("%s scheduled to port P0 at cycle %i (commit = %i)\n", execution_port_names[uop], cycle, commit);
+ return cycle;
+ }
+ if ((uop & PORT_P1) != 0 && !ctx->ports[cycle][1]) {
+ if (commit) {
+ ctx->ports[cycle][1] = uop;
+ }
+ TRACE_PRINT("%s scheduled to port P1 at cycle %i (commit = %i)\n", execution_port_names[uop], cycle, commit);
+ return cycle;
+ }
+ }
+ return -1;
+}
+
+static int schedule_instr(const instr_template* tpl, generator_ctx* ctx, bool commit) {
+ if (tpl->uop2 == PORT_NONE) {
+ /* this instruction has only one uOP */
+ return schedule_uop(tpl->uop1, ctx, ctx->cycle, commit);
+ }
+ else {
+ /* instructions with 2 uOPs are scheduled conservatively by requiring both uOPs to execute in the same cycle */
+ for (int cycle = ctx->cycle; cycle < PORT_MAP_SIZE; ++cycle) {
+
+ int cycle1 = schedule_uop(tpl->uop1, ctx, cycle, false);
+ int cycle2 = schedule_uop(tpl->uop2, ctx, cycle, false);
+
+ if (cycle1 >= 0 && cycle1 == cycle2) {
+ if (commit) {
+ schedule_uop(tpl->uop1, ctx, cycle, true);
+ schedule_uop(tpl->uop2, ctx, cycle, true);
+ }
+ return cycle1;
+ }
+ }
+ }
+
+ return -1;
+}
+
+static void print_registers(const generator_ctx* ctx) {
+ for (int i = 0; i < 8; ++i) {
+ printf(" R%i = %i\n", i, ctx->registers[i].latency);
+ }
+}
+
+bool hashx_program_generate(const siphash_state* key, hashx_program* program) {
+ generator_ctx ctx = {
+ .cycle = 0,
+ .sub_cycle = 0, /* 3 sub-cycles = 1 cycle */
+ .mul_count = 0,
+ .chain_mul = false,
+ .latency = 0,
+ .ports = {{ 0 }}
+ };
+ hashx_siphash_rng_init(&ctx.gen, key);
+#ifdef HASHX_RNG_CALLBACK
+ ctx.gen.callback = program->rng_callback;
+ ctx.gen.callback_user_data = program->rng_callback_user_data;
+#endif
+ for (int i = 0; i < 8; ++i) {
+ ctx.registers[i].last_op = -1;
+ ctx.registers[i].latency = 0;
+ ctx.registers[i].last_op_par = (uint32_t)-1;
+ }
+ program->code_size = 0;
+
+ int attempt = 0;
+ instr_type last_instr = -1;
+#ifdef HASHX_PROGRAM_STATS
+ program->x86_size = 0;
+#endif
+
+ while (program->code_size < HASHX_PROGRAM_MAX_SIZE) {
+ instruction* instr = &program->code[program->code_size];
+ TRACE_PRINT("CYCLE: %i/%i\n", ctx.sub_cycle, ctx.cycle);
+
+ /* select an instruction template */
+ const instr_template* tpl = select_template(&ctx, last_instr, attempt);
+ last_instr = tpl->group;
+
+ TRACE_PRINT("Template: %s\n", tpl->x86_asm);
+
+ instr_from_template(tpl, &ctx.gen, instr);
+
+ /* calculate the earliest cycle when this instruction (all of its uOPs) can be scheduled for execution */
+ int scheduleCycle = schedule_instr(tpl, &ctx, false);
+ if (scheduleCycle < 0) {
+ TRACE_PRINT("Unable to map operation '%s' to execution port (cycle %i)\n", tpl->x86_asm, ctx.cycle);
+ /* __debugbreak(); */
+ break;
+ }
+
+ ctx.chain_mul = attempt > 0;
+
+ /* find a source register (if applicable) that will be ready when this instruction executes */
+ if (tpl->has_src) {
+ if (!select_source(tpl, instr, &ctx, scheduleCycle)) {
+ TRACE_PRINT("; src STALL (attempt %i)\n", attempt);
+ if (attempt++ < MAX_RETRIES) {
+ continue;
+ }
+ if (TRACE) {
+ printf("; select_source FAILED at cycle %i\n", ctx.cycle);
+ print_registers(&ctx);
+ /* __debugbreak(); */
+ }
+ ctx.sub_cycle += 3;
+ ctx.cycle = ctx.sub_cycle / 3;
+ attempt = 0;
+ continue;
+ }
+ TRACE_PRINT("; src = r%i\n", instr->src);
+ }
+
+ /* find a destination register that will be ready when this instruction executes */
+ if (tpl->has_dst) {
+ if (!select_destination(tpl, instr, &ctx, scheduleCycle)) {
+ TRACE_PRINT("; dst STALL (attempt %i)\n", attempt);
+ if (attempt++ < MAX_RETRIES) {
+ continue;
+ }
+ if (TRACE) {
+ printf("; select_destination FAILED at cycle %i\n", ctx.cycle);
+ print_registers(&ctx);
+ /* __debugbreak(); */
+ }
+ ctx.sub_cycle += 3;
+ ctx.cycle = ctx.sub_cycle / 3;
+ attempt = 0;
+ continue;
+ }
+ TRACE_PRINT("; dst = r%i\n", instr->dst);
+ }
+ attempt = 0;
+
+ /* recalculate when the instruction can be scheduled for execution based on operand availability */
+ scheduleCycle = schedule_instr(tpl, &ctx, true);
+
+ if (scheduleCycle < 0) {
+ TRACE_PRINT("Unable to map operation '%s' to execution port (cycle %i)\n", tpl->x86_asm, ctx.cycle);
+ break;
+ }
+
+ /* terminating condition */
+ if (scheduleCycle >= TARGET_CYCLE) {
+ break;
+ }
+
+ if (tpl->has_dst) {
+ register_info* ri = &ctx.registers[instr->dst];
+ int retireCycle = scheduleCycle + tpl->latency;
+ ri->latency = retireCycle;
+ ri->last_op = tpl->group;
+ ri->last_op_par = instr->op_par;
+ ctx.latency = MAX(retireCycle, ctx.latency);
+ TRACE_PRINT("; RETIRED at cycle %i\n", retireCycle);
+ }
+
+ program->code_size++;
+#ifdef HASHX_PROGRAM_STATS
+ program->x86_size += tpl->x86_size;
+#endif
+
+ ctx.mul_count += is_mul(instr->opcode);
+
+ ++ctx.sub_cycle;
+ ctx.sub_cycle += (tpl->uop2 != PORT_NONE);
+ ctx.cycle = ctx.sub_cycle / 3;
+ }
+
+#ifdef HASHX_PROGRAM_STATS
+ memset(program->asic_latencies, 0, sizeof(program->asic_latencies));
+
+ program->counter = ctx.gen.counter;
+ program->wide_mul_count = 0;
+ program->mul_count = ctx.mul_count;
+
+ /* Calculate ASIC latency:
+ Assumes 1 cycle latency for all operations and unlimited parallelization. */
+ for (size_t i = 0; i < program->code_size; ++i) {
+ instruction* instr = &program->code[i];
+ if (instr->dst < 0)
+ continue;
+ int last_dst = program->asic_latencies[instr->dst] + 1;
+ int lat_src = instr->dst != instr->src ? program->asic_latencies[instr->src] + 1 : 0;
+ program->asic_latencies[instr->dst] = MAX(last_dst, lat_src);
+ program->wide_mul_count += is_wide_mul(instr->opcode);
+ }
+
+ program->asic_latency = 0;
+ program->cpu_latency = 0;
+ for (int i = 0; i < 8; ++i) {
+ program->asic_latency = MAX(program->asic_latency, program->asic_latencies[i]);
+ program->cpu_latencies[i] = ctx.registers[i].latency;
+ program->cpu_latency = MAX(program->cpu_latency, program->cpu_latencies[i]);
+ }
+
+ program->ipc = program->code_size / (double)program->cpu_latency;
+ program->branch_count = 0;
+ memset(program->branches, 0, sizeof(program->branches));
+
+ if (TRACE) {
+ printf("; ALU port utilization:\n");
+ printf("; (* = in use, _ = idle)\n");
+ for (int i = 0; i < PORT_MAP_SIZE; ++i) {
+ printf("; %3i ", i);
+ for (int j = 0; j < NUM_PORTS; ++j) {
+ printf("%c", (ctx.ports[i][j] ? '*' : '_'));
+ }
+ printf("\n");
+ }
+ }
+#endif
+
+ /* reject programs that don't meet the uniform complexity requirements */
+ /* this happens in less than 1 seed out of 10000 */
+ return
+ (program->code_size == REQUIREMENT_SIZE) &&
+ (ctx.mul_count == REQUIREMENT_MUL_COUNT) &&
+ (ctx.latency == REQUIREMENT_LATENCY - 1); /* cycles are numbered from 0 */
+}
+
+static const char* x86_reg_map[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
+
+void hashx_program_asm_x86(const hashx_program* program) {
+ size_t target = 0;
+ for (size_t i = 0; i < program->code_size; ++i) {
+ const instruction* instr = &program->code[i];
+ switch (instr->opcode)
+ {
+ case INSTR_SUB_R:
+ printf("sub %s, %s\n", x86_reg_map[instr->dst], x86_reg_map[instr->src]);
+ break;
+ case INSTR_XOR_R:
+ printf("xor %s, %s\n", x86_reg_map[instr->dst], x86_reg_map[instr->src]);
+ break;
+ case INSTR_ADD_RS:
+ printf("lea %s, [%s+%s*%u]\n", x86_reg_map[instr->dst], x86_reg_map[instr->dst], x86_reg_map[instr->src], 1 << instr->imm32);
+ break;
+ case INSTR_MUL_R:
+ printf("imul %s, %s\n", x86_reg_map[instr->dst], x86_reg_map[instr->src]);
+ break;
+ case INSTR_ROR_C:
+ printf("ror %s, %u\n", x86_reg_map[instr->dst], instr->imm32);
+ break;
+ case INSTR_ADD_C:
+ printf("add %s, %i\n", x86_reg_map[instr->dst], instr->imm32);
+ break;
+ case INSTR_XOR_C:
+ printf("xor %s, %i\n", x86_reg_map[instr->dst], instr->imm32);
+ break;
+ case INSTR_UMULH_R:
+ printf("mov rax, %s\n", x86_reg_map[instr->dst]);
+ printf("mul %s\n", x86_reg_map[instr->src]);
+ printf("mov %s, rdx\n", x86_reg_map[instr->dst]);
+ break;
+ case INSTR_SMULH_R:
+ printf("mov rax, %s\n", x86_reg_map[instr->dst]);
+ printf("imul %s\n", x86_reg_map[instr->src]);
+ printf("mov %s, rdx\n", x86_reg_map[instr->dst]);
+ break;
+ case INSTR_TARGET:
+ printf("test edi, edi\n");
+ printf("target_%i: cmovz esi, edi\n", (int)i);
+ target = i;
+ break;
+ case INSTR_BRANCH:
+ printf("or edx, esi\n");
+ printf("test edx, %i\n", instr->imm32);
+ printf("jz target_%i\n", (int)target);
+ break;
+ default:
+ UNREACHABLE;
+ }
+ }
+}
diff --git a/src/ext/equix/hashx/src/program.h b/src/ext/equix/hashx/src/program.h
new file mode 100644
index 0000000000..78dbb8b6e3
--- /dev/null
+++ b/src/ext/equix/hashx/src/program.h
@@ -0,0 +1,52 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef PROGRAM_H
+#define PROGRAM_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <hashx.h>
+#include "instruction.h"
+#include "siphash.h"
+#include "blake2.h"
+
+#define HASHX_PROGRAM_MAX_SIZE 512
+
+typedef struct hashx_program {
+ instruction code[HASHX_PROGRAM_MAX_SIZE];
+ size_t code_size;
+#ifdef HASHX_PROGRAM_STATS
+ unsigned counter;
+ double ipc;
+ int x86_size;
+ int cpu_latency;
+ int asic_latency;
+ int mul_count;
+ int wide_mul_count;
+ int cpu_latencies[8];
+ int asic_latencies[8];
+ int branch_count;
+ int branches[16];
+#endif
+#ifdef HASHX_RNG_CALLBACK
+ void (*rng_callback)(uint64_t *buffer, void *user_data);
+ void *rng_callback_user_data;
+#endif
+} hashx_program;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+HASHX_PRIVATE bool hashx_program_generate(const siphash_state* key, hashx_program* program);
+
+HASHX_PRIVATE void hashx_program_execute(const hashx_program* program, uint64_t r[8]);
+
+HASHX_PRIVATE void hashx_program_asm_x86(const hashx_program* program);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/ext/equix/hashx/src/program_exec.c b/src/ext/equix/hashx/src/program_exec.c
new file mode 100644
index 0000000000..f00eb5ef69
--- /dev/null
+++ b/src/ext/equix/hashx/src/program_exec.c
@@ -0,0 +1,158 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include "program.h"
+#include "force_inline.h"
+#include "unreachable.h"
+#include "siphash.h"
+#include "hashx_endian.h"
+
+#if defined(__SIZEOF_INT128__)
+typedef unsigned __int128 uint128_t;
+typedef __int128 int128_t;
+static FORCE_INLINE uint64_t umulh(uint64_t a, uint64_t b) {
+ return ((uint128_t)a * b) >> 64;
+ }
+static FORCE_INLINE int64_t smulh(int64_t a, int64_t b) {
+ return ((int128_t)a * b) >> 64;
+}
+#define HAVE_UMULH
+#define HAVE_SMULH
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning (disable : 4146) /* unary minus applied to unsigned type */
+#define HAS_VALUE(X) X ## 0
+#define EVAL_DEFINE(X) HAS_VALUE(X)
+#include <intrin.h>
+#include <stdlib.h>
+
+static FORCE_INLINE uint64_t rotr64(uint64_t x, unsigned int c) {
+ return _rotr64(x, c);
+}
+
+#define HAVE_ROTR
+
+#if EVAL_DEFINE(__MACHINEARM64_X64(1))
+static FORCE_INLINE uint64_t umulh(uint64_t a, uint64_t b) {
+ return __umulh(a, b);
+}
+#define HAVE_UMULH
+#endif
+
+#if EVAL_DEFINE(__MACHINEX64(1))
+static FORCE_INLINE int64_t smulh(int64_t a, int64_t b) {
+ int64_t hi;
+ _mul128(a, b, &hi);
+ return hi;
+}
+#define HAVE_SMULH
+#endif
+
+#endif
+
+#ifndef HAVE_ROTR
+static FORCE_INLINE uint64_t rotr64(uint64_t a, unsigned int b) {
+ return (a >> b) | (a << (64 - b));
+}
+#define HAVE_ROTR
+#endif
+
+#ifndef HAVE_UMULH
+#define LO(x) ((x)&0xffffffff)
+#define HI(x) ((x)>>32)
+static uint64_t umulh(uint64_t a, uint64_t b) {
+ uint64_t ah = HI(a), al = LO(a);
+ uint64_t bh = HI(b), bl = LO(b);
+ uint64_t x00 = al * bl;
+ uint64_t x01 = al * bh;
+ uint64_t x10 = ah * bl;
+ uint64_t x11 = ah * bh;
+ uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
+ uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
+ uint64_t m3 = HI(x11) + HI(m2);
+
+ return (m3 << 32) + LO(m2);
+}
+#undef LO
+#undef HI
+#define HAVE_UMULH
+#endif
+
+#ifndef HAVE_SMULH
+static int64_t smulh(int64_t a, int64_t b) {
+ int64_t hi = umulh(a, b);
+ if (a < 0LL) hi -= b;
+ if (b < 0LL) hi -= a;
+ return hi;
+}
+#define HAVE_SMULH
+#endif
+
+static FORCE_INLINE uint64_t sign_extend_2s_compl(uint32_t x) {
+ return (-1 == ~0) ?
+ (uint64_t)(int64_t)(int32_t)(x) :
+ (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x);
+}
+
+void hashx_program_execute(const hashx_program* program, uint64_t r[8]) {
+ size_t target = 0;
+ bool branch_enable = true;
+ uint32_t result = 0;
+#ifdef HASHX_PROGRAM_STATS
+ int branch_idx = 0;
+#endif
+ for (size_t i = 0; i < program->code_size; ++i) {
+ const instruction* instr = &program->code[i];
+ switch (instr->opcode)
+ {
+ case INSTR_UMULH_R:
+ result = (uint32_t) (r[instr->dst] = umulh(r[instr->dst],
+ r[instr->src]));
+ break;
+ case INSTR_SMULH_R:
+ result = (uint32_t) (r[instr->dst] = smulh(r[instr->dst],
+ r[instr->src]));
+ break;
+ case INSTR_MUL_R:
+ r[instr->dst] *= r[instr->src];
+ break;
+ case INSTR_SUB_R:
+ r[instr->dst] -= r[instr->src];
+ break;
+ case INSTR_XOR_R:
+ r[instr->dst] ^= r[instr->src];
+ break;
+ case INSTR_ADD_RS:
+ r[instr->dst] += r[instr->src] << instr->imm32;
+ break;
+ case INSTR_ROR_C:
+ r[instr->dst] = rotr64(r[instr->dst], instr->imm32);
+ break;
+ case INSTR_ADD_C:
+ r[instr->dst] += sign_extend_2s_compl(instr->imm32);
+ break;
+ case INSTR_XOR_C:
+ r[instr->dst] ^= sign_extend_2s_compl(instr->imm32);
+ break;
+ case INSTR_TARGET:
+ target = i;
+ break;
+ case INSTR_BRANCH:
+ if (branch_enable && (result & instr->imm32) == 0) {
+ i = target;
+ branch_enable = false;
+#ifdef HASHX_PROGRAM_STATS
+ ((hashx_program*)program)->branch_count++;
+ ((hashx_program*)program)->branches[branch_idx]++;
+#endif
+ }
+#ifdef HASHX_PROGRAM_STATS
+ branch_idx++;
+#endif
+ break;
+ default:
+ UNREACHABLE;
+ }
+ }
+}
diff --git a/src/ext/equix/hashx/src/siphash.c b/src/ext/equix/hashx/src/siphash.c
new file mode 100644
index 0000000000..0acfca8814
--- /dev/null
+++ b/src/ext/equix/hashx/src/siphash.c
@@ -0,0 +1,66 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include "siphash.h"
+#include "hashx_endian.h"
+#include "unreachable.h"
+
+uint64_t hashx_siphash13_ctr(uint64_t input, const siphash_state* keys) {
+ uint64_t v0 = keys->v0;
+ uint64_t v1 = keys->v1;
+ uint64_t v2 = keys->v2;
+ uint64_t v3 = keys->v3;
+
+ v3 ^= input;
+
+ SIPROUND(v0, v1, v2, v3);
+
+ v0 ^= input;
+ v2 ^= 0xff;
+
+ SIPROUND(v0, v1, v2, v3);
+ SIPROUND(v0, v1, v2, v3);
+ SIPROUND(v0, v1, v2, v3);
+
+ return (v0 ^ v1) ^ (v2 ^ v3);
+}
+
+void hashx_siphash24_ctr_state512(const siphash_state* keys, uint64_t input,
+ uint64_t state_out[8]) {
+
+ uint64_t v0 = keys->v0;
+ uint64_t v1 = keys->v1;
+ uint64_t v2 = keys->v2;
+ uint64_t v3 = keys->v3;
+
+ v1 ^= 0xee;
+ v3 ^= input;
+
+ SIPROUND(v0, v1, v2, v3);
+ SIPROUND(v0, v1, v2, v3);
+
+ v0 ^= input;
+ v2 ^= 0xee;
+
+ SIPROUND(v0, v1, v2, v3);
+ SIPROUND(v0, v1, v2, v3);
+ SIPROUND(v0, v1, v2, v3);
+ SIPROUND(v0, v1, v2, v3);
+
+ state_out[0] = v0;
+ state_out[1] = v1;
+ state_out[2] = v2;
+ state_out[3] = v3;
+
+ v1 ^= 0xdd;
+
+ SIPROUND(v0, v1, v2, v3);
+ SIPROUND(v0, v1, v2, v3);
+ SIPROUND(v0, v1, v2, v3);
+ SIPROUND(v0, v1, v2, v3);
+
+ state_out[4] = v0;
+ state_out[5] = v1;
+ state_out[6] = v2;
+ state_out[7] = v3;
+}
diff --git a/src/ext/equix/hashx/src/siphash.h b/src/ext/equix/hashx/src/siphash.h
new file mode 100644
index 0000000000..bb468402c3
--- /dev/null
+++ b/src/ext/equix/hashx/src/siphash.h
@@ -0,0 +1,35 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef SIPHASH_H
+#define SIPHASH_H
+
+#include <stdint.h>
+#include <hashx.h>
+
+#define ROTL(x, b) (((x) << (b)) | ((x) >> (64 - (b))))
+#define SIPROUND(v0, v1, v2, v3) \
+ do { \
+ v0 += v1; v2 += v3; v1 = ROTL(v1, 13); \
+ v3 = ROTL(v3, 16); v1 ^= v0; v3 ^= v2; \
+ v0 = ROTL(v0, 32); v2 += v1; v0 += v3; \
+ v1 = ROTL(v1, 17); v3 = ROTL(v3, 21); \
+ v1 ^= v2; v3 ^= v0; v2 = ROTL(v2, 32); \
+ } while (0)
+
+typedef struct siphash_state {
+ uint64_t v0, v1, v2, v3;
+} siphash_state;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+HASHX_PRIVATE uint64_t hashx_siphash13_ctr(uint64_t input, const siphash_state* keys);
+HASHX_PRIVATE void hashx_siphash24_ctr_state512(const siphash_state* keys, uint64_t input, uint64_t state_out[8]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/ext/equix/hashx/src/siphash_rng.c b/src/ext/equix/hashx/src/siphash_rng.c
new file mode 100644
index 0000000000..c0f457be76
--- /dev/null
+++ b/src/ext/equix/hashx/src/siphash_rng.c
@@ -0,0 +1,41 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include "siphash_rng.h"
+
+void hashx_siphash_rng_init(siphash_rng* gen, const siphash_state* state) {
+ gen->keys = *state;
+ gen->counter = 0;
+ gen->count8 = 0;
+ gen->count32 = 0;
+}
+
+uint8_t hashx_siphash_rng_u8(siphash_rng* gen) {
+ if (gen->count8 == 0) {
+ gen->buffer8 = hashx_siphash13_ctr(gen->counter, &gen->keys);
+ gen->counter++;
+ gen->count8 = sizeof(gen->buffer8);
+#ifdef HASHX_RNG_CALLBACK
+ if (gen->callback) {
+ gen->callback(&gen->buffer8, gen->callback_user_data);
+ }
+#endif
+ }
+ gen->count8--;
+ return gen->buffer8 >> (gen->count8 * 8);
+}
+
+uint32_t hashx_siphash_rng_u32(siphash_rng* gen) {
+ if (gen->count32 == 0) {
+ gen->buffer32 = hashx_siphash13_ctr(gen->counter, &gen->keys);
+ gen->counter++;
+ gen->count32 = sizeof(gen->buffer32) / sizeof(uint32_t);
+#ifdef HASHX_RNG_CALLBACK
+ if (gen->callback) {
+ gen->callback(&gen->buffer32, gen->callback_user_data);
+ }
+#endif
+ }
+ gen->count32--;
+ return (uint32_t)(gen->buffer32 >> (gen->count32 * 32));
+}
diff --git a/src/ext/equix/hashx/src/siphash_rng.h b/src/ext/equix/hashx/src/siphash_rng.h
new file mode 100644
index 0000000000..7b402fdc6e
--- /dev/null
+++ b/src/ext/equix/hashx/src/siphash_rng.h
@@ -0,0 +1,34 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef SIPHASH_GENERATOR_H
+#define SIPHASH_GENERATOR_H
+
+#include <stdint.h>
+#include <hashx.h>
+#include "siphash.h"
+
+typedef struct siphash_rng {
+ siphash_state keys;
+ uint64_t counter;
+ uint64_t buffer8, buffer32;
+ unsigned count8, count32;
+#ifdef HASHX_RNG_CALLBACK
+ void (*callback)(uint64_t *buffer, void *user_data);
+ void *callback_user_data;
+#endif
+} siphash_rng;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+HASHX_PRIVATE void hashx_siphash_rng_init(siphash_rng* gen, const siphash_state* state);
+HASHX_PRIVATE uint32_t hashx_siphash_rng_u32(siphash_rng* gen);
+HASHX_PRIVATE uint8_t hashx_siphash_rng_u8(siphash_rng* gen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/ext/equix/hashx/src/test_utils.h b/src/ext/equix/hashx/src/test_utils.h
new file mode 100644
index 0000000000..54c2f7ec80
--- /dev/null
+++ b/src/ext/equix/hashx/src/test_utils.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef TEST_UTILS_H
+#define TEST_UTILS_H
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdlib.h>
+#include <hashx.h>
+
+static inline void read_option(const char* option, int argc, char** argv, bool* out) {
+ for (int i = 0; i < argc; ++i) {
+ if (strcmp(argv[i], option) == 0) {
+ *out = true;
+ return;
+ }
+ }
+ *out = false;
+}
+
+static inline void read_int_option(const char* option, int argc, char** argv, int* out, int default_val) {
+ for (int i = 0; i < argc - 1; ++i) {
+ if (strcmp(argv[i], option) == 0 && (*out = atoi(argv[i + 1])) > 0) {
+ return;
+ }
+ }
+ *out = default_val;
+}
+
+static inline char parse_nibble(char hex) {
+ hex &= ~0x20;
+ return (hex & 0x40) ? hex - ('A' - 10) : hex & 0xf;
+}
+
+static inline void hex2bin(const char* in, int length, char* out) {
+ for (int i = 0; i < length; i += 2) {
+ char nibble1 = parse_nibble(*in++);
+ char nibble2 = parse_nibble(*in++);
+ *out++ = nibble1 << 4 | nibble2;
+ }
+}
+
+static inline void output_hex(const char* data, int length) {
+ for (unsigned i = 0; i < length; ++i)
+ printf("%02x", data[i] & 0xff);
+}
+
+static inline bool hashes_equal(char* a, char* b) {
+ return memcmp(a, b, HASHX_SIZE) == 0;
+}
+
+static inline bool equals_hex(const void* hash, const char* hex) {
+ char reference[HASHX_SIZE];
+ hex2bin(hex, 2 * HASHX_SIZE, reference);
+ return memcmp(hash, reference, sizeof(reference)) == 0;
+}
+
+#endif
diff --git a/src/ext/equix/hashx/src/tests.c b/src/ext/equix/hashx/src/tests.c
new file mode 100644
index 0000000000..f0a4ebe713
--- /dev/null
+++ b/src/ext/equix/hashx/src/tests.c
@@ -0,0 +1,275 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+#include <assert.h>
+#include "test_utils.h"
+
+typedef bool test_func();
+
+static int test_no = 0;
+
+static hashx_ctx* ctx_int = NULL;
+static hashx_ctx* ctx_cmp = NULL;
+static hashx_ctx* ctx_auto = NULL;
+
+static const char seed1[] = "This is a test";
+static const char seed2[] = "Lorem ipsum dolor sit amet";
+
+static const uint64_t counter1 = 0;
+static const uint64_t counter2 = 123456;
+static const uint64_t counter3 = 987654321123456789;
+
+#ifdef HASHX_BLOCK_MODE
+static const unsigned char long_input[] = {
+ 0x0b, 0x0b, 0x98, 0xbe, 0xa7, 0xe8, 0x05, 0xe0, 0x01, 0x0a, 0x21, 0x26,
+ 0xd2, 0x87, 0xa2, 0xa0, 0xcc, 0x83, 0x3d, 0x31, 0x2c, 0xb7, 0x86, 0x38,
+ 0x5a, 0x7c, 0x2f, 0x9d, 0xe6, 0x9d, 0x25, 0x53, 0x7f, 0x58, 0x4a, 0x9b,
+ 0xc9, 0x97, 0x7b, 0x00, 0x00, 0x00, 0x00, 0x66, 0x6f, 0xd8, 0x75, 0x3b,
+ 0xf6, 0x1a, 0x86, 0x31, 0xf1, 0x29, 0x84, 0xe3, 0xfd, 0x44, 0xf4, 0x01,
+ 0x4e, 0xca, 0x62, 0x92, 0x76, 0x81, 0x7b, 0x56, 0xf3, 0x2e, 0x9b, 0x68,
+ 0xbd, 0x82, 0xf4, 0x16
+};
+#endif
+
+#define RUN_TEST(x) run_test(#x, &x)
+
+static void run_test(const char* name, test_func* func) {
+ printf("[%2i] %-40s ... ", ++test_no, name);
+ printf(func() ? "PASSED\n" : "SKIPPED\n");
+}
+
+static bool test_alloc() {
+ ctx_int = hashx_alloc(HASHX_TYPE_INTERPRETED);
+ assert(ctx_int != NULL);
+ return true;
+}
+
+static bool test_free() {
+ hashx_free(ctx_int);
+ hashx_free(ctx_cmp);
+ hashx_free(ctx_auto);
+ return true;
+}
+
+static bool test_make1() {
+ hashx_result result = hashx_make(ctx_int, seed1, sizeof(seed1));
+ assert(result == HASHX_OK);
+ return true;
+}
+
+static bool test_hash_ctr1() {
+#ifdef HASHX_SALT
+ return false;
+#endif
+#ifndef HASHX_BLOCK_MODE
+ char hash[HASHX_SIZE];
+ hashx_result result = hashx_exec(ctx_int, counter2, hash);
+ assert(result == HASHX_OK);
+ /* printf("\n");
+ output_hex(hash, HASHX_SIZE);
+ printf("\n"); */
+ assert(equals_hex(hash, "aebdd50aa67c93afb82a4c534603b65e46decd584c55161c526ebc099415ccf1"));
+ return true;
+#else
+ return false;
+#endif
+}
+
+static bool test_hash_ctr2() {
+#ifdef HASHX_SALT
+ return false;
+#endif
+#ifndef HASHX_BLOCK_MODE
+ char hash[HASHX_SIZE];
+ hashx_result result = hashx_exec(ctx_int, counter1, hash);
+ assert(result == HASHX_OK);
+ assert(equals_hex(hash, "2b2f54567dcbea98fdb5d5e5ce9a65983c4a4e35ab1464b1efb61e83b7074bb2"));
+ return true;
+#else
+ return false;
+#endif
+}
+
+static bool test_make2() {
+ hashx_result result = hashx_make(ctx_int, seed2, sizeof(seed2));
+ assert(result == HASHX_OK);
+ return true;
+}
+
+static bool test_hash_ctr3() {
+#ifdef HASHX_SALT
+ return false;
+#endif
+#ifndef HASHX_BLOCK_MODE
+ char hash[HASHX_SIZE];
+ hashx_result result = hashx_exec(ctx_int, counter2, hash);
+ assert(result == HASHX_OK);
+ assert(equals_hex(hash, "ab3d155bf4bbb0aa3a71b7801089826186e44300e6932e6ffd287cf302bbb0ba"));
+ return true;
+#else
+ return false;
+#endif
+}
+
+static bool test_hash_ctr4() {
+#ifdef HASHX_SALT
+ return false;
+#endif
+#ifndef HASHX_BLOCK_MODE
+ char hash[HASHX_SIZE];
+ hashx_result result = hashx_exec(ctx_int, counter3, hash);
+ assert(result == HASHX_OK);
+ assert(equals_hex(hash, "8dfef0497c323274a60d1d93292b68d9a0496379ba407b4341cf868a14d30113"));
+ return true;
+#else
+ return false;
+#endif
+}
+
+static bool test_hash_block1() {
+#ifdef HASHX_SALT
+ return false;
+#endif
+#ifndef HASHX_BLOCK_MODE
+ return false;
+#else
+ char hash[HASHX_SIZE];
+ hashx_result result = hashx_exec(ctx_int, long_input, sizeof(long_input), hash);
+ assert(result == HASHX_OK);
+ assert(equals_hex(hash, "d0b232b832459501ca1ac9dc0429fd931414ead7624a457e375a43ea3e5e737a"));
+ return true;
+#endif
+}
+
+static bool test_alloc_compiler() {
+ ctx_cmp = hashx_alloc(HASHX_TYPE_COMPILED);
+ assert(ctx_cmp != NULL);
+ return true;
+}
+
+static bool test_make3() {
+ hashx_result result = hashx_make(ctx_cmp, seed2, sizeof(seed2));
+ if (result == HASHX_FAIL_COMPILE) {
+ return false;
+ }
+ assert(result == HASHX_OK);
+ return true;
+}
+
+static bool test_compiler_ctr1() {
+#ifndef HASHX_BLOCK_MODE
+ hashx_result result;
+ char hash1[HASHX_SIZE];
+ char hash2[HASHX_SIZE];
+ result = hashx_exec(ctx_int, counter2, hash1);
+ assert(result == HASHX_OK);
+ result = hashx_exec(ctx_cmp, counter2, hash2);
+ if (result == HASHX_FAIL_UNPREPARED) {
+ return false;
+ }
+ assert(result == HASHX_OK);
+ assert(hashes_equal(hash1, hash2));
+ return true;
+#else
+ return false;
+#endif
+}
+
+static bool test_compiler_ctr2() {
+#ifndef HASHX_BLOCK_MODE
+ hashx_result result;
+ char hash1[HASHX_SIZE];
+ char hash2[HASHX_SIZE];
+ result = hashx_exec(ctx_int, counter1, hash1);
+ assert(result == HASHX_OK);
+ result = hashx_exec(ctx_cmp, counter1, hash2);
+ if (result == HASHX_FAIL_UNPREPARED) {
+ return false;
+ }
+ assert(result == HASHX_OK);
+ assert(hashes_equal(hash1, hash2));
+ return true;
+#else
+ return false;
+#endif
+}
+
+static bool test_compiler_block1() {
+#ifndef HASHX_BLOCK_MODE
+ return false;
+#else
+ hashx_result result;
+ char hash1[HASHX_SIZE];
+ char hash2[HASHX_SIZE];
+ result = hashx_exec(ctx_int, long_input, sizeof(long_input), hash1);
+ assert(result == HASHX_OK);
+ result = hashx_exec(ctx_cmp, long_input, sizeof(long_input), hash2);
+ if (result == HASHX_FAIL_UNPREPARED) {
+ return false;
+ }
+ assert(result == HASHX_OK);
+ assert(hashes_equal(hash1, hash2));
+ return true;
+#endif
+}
+
+static bool test_alloc_automatic() {
+ ctx_auto = hashx_alloc(HASHX_TRY_COMPILE);
+ assert(ctx_auto != NULL);
+ return true;
+}
+
+static bool test_auto_fallback() {
+ hashx_result result = hashx_make(ctx_auto, seed2, sizeof(seed2));
+ assert(result == HASHX_OK);
+ hashx_type actual_type = (hashx_type)-1;
+ result = hashx_query_type(ctx_auto, &actual_type);
+ assert(result == HASHX_OK);
+ assert(actual_type == HASHX_TYPE_INTERPRETED ||
+ actual_type == HASHX_TYPE_COMPILED);
+ return actual_type == HASHX_TYPE_INTERPRETED;
+}
+
+static bool test_bad_seeds() {
+#ifdef HASHX_SALT
+ return false;
+#else
+ hashx_result result;
+ result = hashx_make(ctx_auto, "\xf8\x05\x00\x00", 4);
+ assert(result == HASHX_OK);
+ result = hashx_make(ctx_auto, "\xf9\x05\x00\x00", 4);
+ assert(result == HASHX_FAIL_SEED);
+ result = hashx_make(ctx_auto, "\x5d\x93\x02\x00", 4);
+ assert(result == HASHX_FAIL_SEED);
+ result = hashx_make(ctx_auto, "\x5e\x93\x02\x00", 4);
+ assert(result == HASHX_OK);
+ return true;
+#endif
+}
+
+int main() {
+ RUN_TEST(test_alloc);
+ RUN_TEST(test_make1);
+ RUN_TEST(test_hash_ctr1);
+ RUN_TEST(test_hash_ctr2);
+ RUN_TEST(test_make2);
+ RUN_TEST(test_hash_ctr3);
+ RUN_TEST(test_hash_ctr4);
+ RUN_TEST(test_alloc_compiler);
+ RUN_TEST(test_make3);
+ RUN_TEST(test_compiler_ctr1);
+ RUN_TEST(test_compiler_ctr2);
+ RUN_TEST(test_hash_block1);
+ RUN_TEST(test_compiler_block1);
+ RUN_TEST(test_alloc_automatic);
+ RUN_TEST(test_auto_fallback);
+ RUN_TEST(test_bad_seeds);
+ RUN_TEST(test_free);
+
+ printf("\nAll tests were successful\n");
+ return 0;
+}
diff --git a/src/ext/equix/hashx/src/unreachable.h b/src/ext/equix/hashx/src/unreachable.h
new file mode 100644
index 0000000000..69807fb395
--- /dev/null
+++ b/src/ext/equix/hashx/src/unreachable.h
@@ -0,0 +1,9 @@
+#ifndef UNREACHABLE
+#ifdef __GNUC__
+#define UNREACHABLE __builtin_unreachable()
+#elif _MSC_VER
+#define UNREACHABLE __assume(0)
+#else
+#define UNREACHABLE
+#endif
+#endif
diff --git a/src/ext/equix/hashx/src/virtual_memory.c b/src/ext/equix/hashx/src/virtual_memory.c
new file mode 100644
index 0000000000..2dff6f7552
--- /dev/null
+++ b/src/ext/equix/hashx/src/virtual_memory.c
@@ -0,0 +1,140 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#include "virtual_memory.h"
+
+#ifdef HASHX_WIN
+#include <windows.h>
+#else
+#ifdef __APPLE__
+#include <mach/vm_statistics.h>
+#endif
+#include <sys/types.h>
+#include <sys/mman.h>
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#define PAGE_READONLY PROT_READ
+#define PAGE_READWRITE (PROT_READ | PROT_WRITE)
+#define PAGE_EXECUTE_READ (PROT_READ | PROT_EXEC)
+#define PAGE_EXECUTE_READWRITE (PROT_READ | PROT_WRITE | PROT_EXEC)
+#if defined(__NetBSD__) && defined(PROT_MPROTECT)
+#define PAGE_MMAP_PROT (PAGE_READWRITE | PROT_MPROTECT(PROT_EXEC))
+#else
+#define PAGE_MMAP_PROT PAGE_READWRITE
+#endif
+#endif
+
+#ifdef HASHX_WIN
+
+static bool set_privilege(const char* pszPrivilege, BOOL bEnable) {
+ HANDLE hToken;
+ TOKEN_PRIVILEGES tp;
+ BOOL status;
+ DWORD error;
+
+ if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES
+ | TOKEN_QUERY, &hToken))
+ return false;
+
+ if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid))
+ return false;
+
+ tp.PrivilegeCount = 1;
+
+ if (bEnable)
+ tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+ else
+ tp.Privileges[0].Attributes = 0;
+
+ status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0,
+ (PTOKEN_PRIVILEGES)NULL, 0);
+ error = GetLastError();
+
+ CloseHandle(hToken);
+
+ return status && (error == ERROR_SUCCESS);
+}
+#endif
+
+void* hashx_vm_alloc(size_t bytes) {
+ void* mem;
+#ifdef HASHX_WIN
+ mem = VirtualAlloc(NULL, bytes, MEM_COMMIT, PAGE_READWRITE);
+#else
+ mem = mmap(NULL, bytes, PAGE_MMAP_PROT, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (mem == MAP_FAILED)
+ return NULL;
+#endif
+ return mem;
+}
+
+static inline bool page_protect(void* ptr, size_t bytes, int rules) {
+#ifdef HASHX_WIN
+ DWORD oldp;
+ if (!VirtualProtect(ptr, bytes, (DWORD)rules, &oldp)) {
+ return false;
+ }
+#else
+ if (mprotect(ptr, bytes, rules) != 0)
+ return false;
+#endif
+ return true;
+}
+
+bool hashx_vm_rw(void* ptr, size_t bytes) {
+ return page_protect(ptr, bytes, PAGE_READWRITE);
+}
+
+bool hashx_vm_rx(void* ptr, size_t bytes) {
+ return page_protect(ptr, bytes, PAGE_EXECUTE_READ);
+}
+
+#ifdef EQUIX_SUPPORT_HUGEPAGES
+void* hashx_vm_alloc_huge(size_t bytes) {
+ void* mem;
+#ifdef HASHX_WIN
+ if (!set_privilege("SeLockMemoryPrivilege", 1)) {
+ /* Failed, but try the VirtualAlloc anyway */
+ }
+ SIZE_T page_min = GetLargePageMinimum();
+ if (page_min > 0) {
+ mem = VirtualAlloc(NULL, ALIGN_SIZE(bytes, page_min), MEM_COMMIT
+ | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
+ }
+ else {
+ mem = NULL;
+ }
+#else
+#ifdef __APPLE__
+ mem = mmap(NULL, bytes, PAGE_READWRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+ VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
+#elif defined(__FreeBSD__)
+ mem = mmap(NULL, bytes, PAGE_READWRITE, MAP_PRIVATE | MAP_ANONYMOUS
+ | MAP_ALIGNED_SUPER, -1, 0);
+#elif defined(__OpenBSD__) || defined(__NetBSD__)
+ (void)bytes;
+ mem = MAP_FAILED; // OpenBSD and NetBSD do not support huge pages
+#else
+ mem = mmap(NULL, bytes, PAGE_READWRITE, MAP_PRIVATE | MAP_ANONYMOUS
+ | MAP_HUGETLB | MAP_POPULATE, -1, 0);
+#endif
+ if (mem == MAP_FAILED) {
+ mem = NULL;
+ }
+#endif
+ return mem;
+}
+#endif /* EQUIX_SUPPORT_HUGEPAGES */
+
+void hashx_vm_free(void* ptr, size_t bytes) {
+ if (!ptr) {
+ return;
+ }
+#ifdef HASHX_WIN
+ (void)bytes;
+ VirtualFree(ptr, 0, MEM_RELEASE);
+#else
+ munmap(ptr, bytes);
+#endif
+}
diff --git a/src/ext/equix/hashx/src/virtual_memory.h b/src/ext/equix/hashx/src/virtual_memory.h
new file mode 100644
index 0000000000..63633bed3f
--- /dev/null
+++ b/src/ext/equix/hashx/src/virtual_memory.h
@@ -0,0 +1,23 @@
+/* Copyright (c) 2020 tevador <tevador@gmail.com> */
+/* See LICENSE for licensing information */
+
+#ifndef VIRTUAL_MEMORY_H
+#define VIRTUAL_MEMORY_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <hashx.h>
+
+#define ALIGN_SIZE(pos, align) ((((pos) - 1) / (align) + 1) * (align))
+
+HASHX_PRIVATE void* hashx_vm_alloc(size_t size);
+HASHX_PRIVATE bool hashx_vm_rw(void* ptr, size_t size);
+HASHX_PRIVATE bool hashx_vm_rx(void* ptr, size_t size);
+HASHX_PRIVATE void hashx_vm_free(void* ptr, size_t size);
+
+#ifdef EQUIX_SUPPORT_HUGEPAGES
+HASHX_PRIVATE void* hashx_vm_alloc_huge(size_t size);
+#endif
+
+#endif