diff options
author | Nick Mathewson <nickm@torproject.org> | 2011-11-25 10:36:13 -0500 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2011-11-25 10:36:13 -0500 |
commit | 25c9e3aab9237b9d01229eb9bdb73450cc4ccfcb (patch) | |
tree | 6bf1b6be7a02453099cc8d019ba22ba6b8c453ae | |
parent | fcf5c92e21dc6b39aa75e6d709f76c82276d3455 (diff) | |
parent | fbec45c9b43b22fd4838f395ef602e498f33340f (diff) | |
download | tor-25c9e3aab9237b9d01229eb9bdb73450cc4ccfcb.tar.gz tor-25c9e3aab9237b9d01229eb9bdb73450cc4ccfcb.zip |
Merge branch 'more_aes_hackery_rebased'
Conflicts:
changes/aes_hackery
-rw-r--r-- | changes/aes_hackery | 10 | ||||
-rw-r--r-- | src/common/aes.c | 208 | ||||
-rw-r--r-- | src/common/aes.h | 2 | ||||
-rw-r--r-- | src/common/crypto.c | 3 | ||||
-rw-r--r-- | src/test/test_crypto.c | 33 |
5 files changed, 186 insertions, 70 deletions
diff --git a/changes/aes_hackery b/changes/aes_hackery new file mode 100644 index 0000000000..8b2ef1a2e3 --- /dev/null +++ b/changes/aes_hackery @@ -0,0 +1,10 @@ + o Major features (AES performance): + - When using OpenSSL 1.0.0 or later, use OpenSSL's counter mode + implementation; it makes AES_CTR about 7% faster than our old one + (which was about 10% faster than the one OpenSSL used to provide). + Resolves issue #4526. + + o Major bugfixes (AES performance): + - Only use the EVP interface when AES acceleration is enabled, + to avoid a 5-7% performance regression. Resolves issue #4525, + bugfix on 0.2.3.8-alpha. diff --git a/src/common/aes.c b/src/common/aes.c index 9c03b8085c..cec6899817 100644 --- a/src/common/aes.c +++ b/src/common/aes.c @@ -14,34 +14,39 @@ #include <assert.h> #include <stdlib.h> #include <string.h> +#include <openssl/aes.h> +#include <openssl/evp.h> +#include <openssl/engine.h> +#if OPENSSL_VERSION_NUMBER >= 0x10000000L +/* See comments about which counter mode implementation to use below. */ +#include <openssl/modes.h> +#define USE_OPENSSL_CTR +#endif #include "compat.h" #include "aes.h" #include "util.h" #include "torlog.h" -/* We have 2 strategies for getting AES: Via OpenSSL's AES_encrypt function, - * via OpenSSL's EVP_EncryptUpdate function. */ - -/** Defined iff we're using OpenSSL's AES functions for AES. */ -#undef USE_OPENSSL_AES -/** Defined iff we're using OpenSSL's EVP code for AES. */ -#undef USE_OPENSSL_EVP - -/* Here we pick which to use, if none is force-defined above */ -#if (!defined(USE_OPENSSL_AES) && \ - !defined(USE_OPENSSL_EVP)) - -#define USE_OPENSSL_EVP - +#ifdef ANDROID +/* Android's OpenSSL seems to have removed all of its Engine support. */ +#define DISABLE_ENGINES #endif -/* Include OpenSSL headers as needed. */ -#ifdef USE_OPENSSL_AES -# include <openssl/aes.h> -#endif -#ifdef USE_OPENSSL_EVP -# include <openssl/evp.h> -#endif +/* We have 2 strategies for getting AES: Via OpenSSL's AES_encrypt function, + * via OpenSSL's EVP_EncryptUpdate function. + * + * If there's any hardware acceleration in play, we want to be using EVP_* so + * we can get it. Otherwise, we'll want AES_*, which seems to be about 5% + * faster than indirecting through the EVP layer. + */ + +/* We have 2 strategies for counter mode: use our own, or use OpenSSL's. + * + * Here we have a counter mode that's faster than the one shipping with + * OpenSSL pre-1.0 (by about 10%!). But OpenSSL 1.0.0 added a counter mode + * implementation faster than the one here (by about 7%). So we pick which + * one to used based on the Openssl version above. + */ /*======================================================================*/ /* Interface to AES code, and counter implementation */ @@ -49,13 +54,12 @@ /** Implements an AES counter-mode cipher. */ struct aes_cnt_cipher { /** This next element (however it's defined) is the AES key. */ -#if defined(USE_OPENSSL_EVP) - EVP_CIPHER_CTX key; -#elif defined(USE_OPENSSL_AES) - AES_KEY key; -#endif + union { + EVP_CIPHER_CTX evp; + AES_KEY aes; + } key; -#if !defined(WORDS_BIGENDIAN) +#if !defined(WORDS_BIGENDIAN) && !defined(USE_OPENSSL_CTR) #define USING_COUNTER_VARS /** These four values, together, implement a 128-bit counter, with * counter0 as the low-order word and counter3 as the high-order word. */ @@ -77,9 +81,51 @@ struct aes_cnt_cipher { /** The encrypted value of ctr_buf. */ uint8_t buf[16]; /** Our current stream position within buf. */ +#ifdef USE_OPENSSL_CTR + unsigned int pos; +#else uint8_t pos; +#endif + + /** True iff we're using the evp implementation of this cipher. */ + uint8_t using_evp; }; +/** True if we should prefer the EVP implementation for AES, either because + * we're testing it or because we have hardware acceleration configured */ +static int should_use_EVP = 0; + +/** Check whether we should use the EVP interface for AES. If <b>force_val</b> + * is nonnegative, we use use EVP iff it is true. Otherwise, we use EVP + * if there is an engine enabled for aes-ecb. */ +int +evaluate_evp_for_aes(int force_val) +{ + ENGINE *e; + + if (force_val >= 0) { + should_use_EVP = force_val; + return 0; + } +#ifdef DISABLE_ENGINES + should_use_EVP = 0; +#else + e = ENGINE_get_cipher_engine(NID_aes_128_ecb); + + if (e) { + log_notice(LD_CRYPTO, "AES engine \"%s\" found; using EVP_* functions.", + ENGINE_get_name(e)); + should_use_EVP = 1; + } else { + log_notice(LD_CRYPTO, "No AES engine found; using AES_* functions."); + should_use_EVP = 0; + } +#endif + + return 0; +} + +#ifndef USE_OPENSSL_CTR #if !defined(USING_COUNTER_VARS) #define COUNTER(c, n) ((c)->ctr_buf.buf32[3-(n)]) #else @@ -100,16 +146,15 @@ _aes_fill_buf(aes_cnt_cipher_t *cipher) * None of these issues are insurmountable in principle. */ -#if defined(USE_OPENSSL_EVP) - { + if (cipher->using_evp) { int outl=16, inl=16; - EVP_EncryptUpdate(&cipher->key, cipher->buf, &outl, + EVP_EncryptUpdate(&cipher->key.evp, cipher->buf, &outl, cipher->ctr_buf.buf, inl); + } else { + AES_encrypt(cipher->ctr_buf.buf, cipher->buf, &cipher->key.aes); } -#elif defined(USE_OPENSSL_AES) - AES_encrypt(cipher->ctr_buf.buf, cipher->buf, &cipher->key); -#endif } +#endif /** * Return a newly allocated counter-mode AES128 cipher implementation. @@ -129,18 +174,21 @@ aes_new_cipher(void) void aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits) { -#if defined(USE_OPENSSL_EVP) - const EVP_CIPHER *c; - switch (key_bits) { - case 128: c = EVP_aes_128_ecb(); break; - case 192: c = EVP_aes_192_ecb(); break; - case 256: c = EVP_aes_256_ecb(); break; - default: tor_assert(0); + if (should_use_EVP) { + const EVP_CIPHER *c; + switch (key_bits) { + case 128: c = EVP_aes_128_ecb(); break; + case 192: c = EVP_aes_192_ecb(); break; + case 256: c = EVP_aes_256_ecb(); break; + default: tor_assert(0); + } + EVP_EncryptInit(&cipher->key.evp, c, (const unsigned char*)key, NULL); + cipher->using_evp = 1; + } else { + AES_set_encrypt_key((const unsigned char *)key, key_bits, &cipher->key.aes); + cipher->using_evp = 0; } - EVP_EncryptInit(&cipher->key, c, (const unsigned char*)key, NULL); -#elif defined(USE_OPENSSL_AES) - AES_set_encrypt_key((const unsigned char *)key, key_bits, &(cipher->key)); -#endif + #ifdef USING_COUNTER_VARS cipher->counter0 = 0; cipher->counter1 = 0; @@ -151,7 +199,12 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits) memset(cipher->ctr_buf.buf, 0, sizeof(cipher->ctr_buf.buf)); cipher->pos = 0; + +#ifdef USE_OPENSSL_CTR + memset(cipher->buf, 0, sizeof(cipher->buf)); +#else _aes_fill_buf(cipher); +#endif } /** Release storage held by <b>cipher</b> @@ -161,9 +214,9 @@ aes_free_cipher(aes_cnt_cipher_t *cipher) { if (!cipher) return; -#ifdef USE_OPENSSL_EVP - EVP_CIPHER_CTX_cleanup(&cipher->key); -#endif + if (cipher->using_evp) { + EVP_CIPHER_CTX_cleanup(&cipher->key.evp); + } memset(cipher, 0, sizeof(aes_cnt_cipher_t)); tor_free(cipher); } @@ -176,6 +229,18 @@ aes_free_cipher(aes_cnt_cipher_t *cipher) #define UPDATE_CTR_BUF(c, n) #endif +#ifdef USE_OPENSSL_CTR +/* Helper function to use EVP with openssl's counter-mode wrapper. */ +static void evp_block128_fn(const uint8_t in[16], + uint8_t out[16], + const void *key) +{ + EVP_CIPHER_CTX *ctx = (void*)key; + int inl=16, outl=16; + EVP_EncryptUpdate(ctx, out, &outl, in, inl); +} +#endif + /** Encrypt <b>len</b> bytes from <b>input</b>, storing the result in * <b>output</b>. Uses the key in <b>cipher</b>, and advances the counter * by <b>len</b> bytes as it encrypts. @@ -184,20 +249,29 @@ void aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len, char *output) { - /* This function alone is up to 5% of our runtime in some profiles; anything - * we could do to make it faster would be great. - * - * Experimenting suggests that unrolling the inner loop into a switch - * statement doesn't help. What does seem to help is making the input and - * output buffers word aligned, and never crypting anything besides an - * integer number of words at a time -- it shaves maybe 4-5% of the per-byte - * encryption time measured by bench_aes. We can't do that with the current - * Tor protocol, though: Tor really likes to crypt things in 509-byte - * chunks. - * - * If we were really ambitous, we'd force len to be a multiple of the block - * size, and shave maybe another 4-5% off. - */ +#ifdef USE_OPENSSL_CTR + if (cipher->using_evp) { + /* In openssl 1.0.0, there's an if'd out EVP_aes_128_ctr in evp.h. If + * it weren't disabled, it might be better just to use that. + */ + CRYPTO_ctr128_encrypt((const unsigned char *)input, + (unsigned char *)output, + len, + &cipher->key.evp, + cipher->ctr_buf.buf, + cipher->buf, + &cipher->pos, + evp_block128_fn); + } else { + AES_ctr128_encrypt((const unsigned char *)input, + (unsigned char *)output, + len, + &cipher->key.aes, + cipher->ctr_buf.buf, + cipher->buf, + &cipher->pos); + } +#else int c = cipher->pos; if (PREDICT_UNLIKELY(!len)) return; @@ -220,6 +294,7 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len, UPDATE_CTR_BUF(cipher, 0); _aes_fill_buf(cipher); } +#endif } /** Encrypt <b>len</b> bytes from <b>input</b>, storing the results in place. @@ -229,11 +304,9 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len, void aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len) { - - /* XXXX This function is up to 5% of our runtime in some profiles; - * we should look into unrolling some of the loops; taking advantage - * of alignment, using a bigger buffer, and so on. Not till after 0.1.2.x, - * though. */ +#ifdef USE_OPENSSL_CTR + aes_crypt(cipher, data, len, data); +#else int c = cipher->pos; if (PREDICT_UNLIKELY(!len)) return; @@ -256,6 +329,7 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len) UPDATE_CTR_BUF(cipher, 0); _aes_fill_buf(cipher); } +#endif } /** Reset the 128-bit counter of <b>cipher</b> to the 16-bit big-endian value @@ -272,6 +346,8 @@ aes_set_iv(aes_cnt_cipher_t *cipher, const char *iv) cipher->pos = 0; memcpy(cipher->ctr_buf.buf, iv, 16); +#ifndef USE_OPENSSL_CTR _aes_fill_buf(cipher); +#endif } diff --git a/src/common/aes.h b/src/common/aes.h index b2591942bc..221e846155 100644 --- a/src/common/aes.h +++ b/src/common/aes.h @@ -24,5 +24,7 @@ void aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len, void aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len); void aes_set_iv(aes_cnt_cipher_t *cipher, const char *iv); +int evaluate_evp_for_aes(int force_value); + #endif diff --git a/src/common/crypto.c b/src/common/crypto.c index f2ef833522..55ad5fd23f 100644 --- a/src/common/crypto.c +++ b/src/common/crypto.c @@ -276,6 +276,9 @@ crypto_global_init(int useAccel, const char *accelName, const char *accelDir) } else { log_info(LD_CRYPTO, "NOT using OpenSSL engine support."); } + + evaluate_evp_for_aes(-1); + return crypto_seed_rng(1); } return 0; diff --git a/src/test/test_crypto.c b/src/test/test_crypto.c index 1b338a29ab..cad8c2f556 100644 --- a/src/test/test_crypto.c +++ b/src/test/test_crypto.c @@ -7,6 +7,7 @@ #define CRYPTO_PRIVATE #include "or.h" #include "test.h" +#include "aes.h" /** Run unit tests for Diffie-Hellman functionality. */ static void @@ -95,13 +96,16 @@ test_crypto_rng(void) /** Run unit tests for our AES functionality */ static void -test_crypto_aes(void) +test_crypto_aes(void *arg) { char *data1 = NULL, *data2 = NULL, *data3 = NULL; crypto_cipher_env_t *env1 = NULL, *env2 = NULL; int i, j; char *mem_op_hex_tmp=NULL; + int use_evp = !strcmp(arg,"evp"); + evaluate_evp_for_aes(use_evp); + data1 = tor_malloc(1024); data2 = tor_malloc(1024); data3 = tor_malloc(1024); @@ -670,7 +674,7 @@ test_crypto_s2k(void) /** Test AES-CTR encryption and decryption with IV. */ static void -test_crypto_aes_iv(void) +test_crypto_aes_iv(void *arg) { crypto_cipher_env_t *cipher; char *plain, *encrypted1, *encrypted2, *decrypted1, *decrypted2; @@ -678,6 +682,9 @@ test_crypto_aes_iv(void) char key1[16], key2[16]; ssize_t encrypted_size, decrypted_size; + int use_evp = !strcmp(arg,"evp"); + evaluate_evp_for_aes(use_evp); + plain = tor_malloc(4095); encrypted1 = tor_malloc(4095 + 1 + 16); encrypted2 = tor_malloc(4095 + 1 + 16); @@ -851,18 +858,36 @@ test_crypto_base32_decode(void) ; } +static void * +pass_data_setup_fn(const struct testcase_t *testcase) +{ + return testcase->setup_data; +} +static int +pass_data_cleanup_fn(const struct testcase_t *testcase, void *ptr) +{ + (void)ptr; + (void)testcase; + return 1; +} +static const struct testcase_setup_t pass_data = { + pass_data_setup_fn, pass_data_cleanup_fn +}; + #define CRYPTO_LEGACY(name) \ { #name, legacy_test_helper, 0, &legacy_setup, test_crypto_ ## name } struct testcase_t crypto_tests[] = { CRYPTO_LEGACY(formats), CRYPTO_LEGACY(rng), - CRYPTO_LEGACY(aes), + { "aes_AES", test_crypto_aes, TT_FORK, &pass_data, (void*)"aes" }, + { "aes_EVP", test_crypto_aes, TT_FORK, &pass_data, (void*)"evp" }, CRYPTO_LEGACY(sha), CRYPTO_LEGACY(pk), CRYPTO_LEGACY(dh), CRYPTO_LEGACY(s2k), - CRYPTO_LEGACY(aes_iv), + { "aes_iv_AES", test_crypto_aes_iv, TT_FORK, &pass_data, (void*)"aes" }, + { "aes_iv_EVP", test_crypto_aes_iv, TT_FORK, &pass_data, (void*)"evp" }, CRYPTO_LEGACY(base32_decode), END_OF_TESTCASES }; |