summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorYawning Angel <yawning@schwanenlied.me>2015-08-17 18:41:41 +0000
committerYawning Angel <yawning@schwanenlied.me>2015-08-17 18:41:41 +0000
commita77616f6056a9dbb5722600ff2465d8ec5b3d063 (patch)
tree91e92bb47374ed3a07848cb7a32b8e2332693e31 /src
parent5fe18bcf5459d8801fdeaf01e74571cd36ba47da (diff)
downloadtor-a77616f6056a9dbb5722600ff2465d8ec5b3d063.tar.gz
tor-a77616f6056a9dbb5722600ff2465d8ec5b3d063.zip
Enable ed25519-donna's SSE2 code when possible for 32 bit x86.
This probably requires the user to manually set CFLAGS, but should result in a net gain on 32 bit x86. Enabling SSE2 support would be possible on x86_64, but will result in slower performance. Implements feature #16535.
Diffstat (limited to 'src')
-rw-r--r--src/ext/ed25519/donna/README.tor4
-rw-r--r--src/ext/ed25519/donna/ed25519-donna-portable.h10
2 files changed, 14 insertions, 0 deletions
diff --git a/src/ext/ed25519/donna/README.tor b/src/ext/ed25519/donna/README.tor
index 2bb0efc012..026d180c24 100644
--- a/src/ext/ed25519/donna/README.tor
+++ b/src/ext/ed25519/donna/README.tor
@@ -40,3 +40,7 @@ as of 8757bd4cd209cb032853ece0ce413f122eef212c.
* On non-x86 targets, GCC's Stack Protector dislikes variables that have
alignment constraints greater than that of other primitive types.
The `ALIGN` macro is thus no-oped for all non-SSE2 builds.
+
+ * On 32 bit x86 targets that the compiler thinks supports SSE2, always
+ enable SSE2 support by force defining ED25519_SSE2 (x86_64 would also
+ always support this, but that code path is slower).
diff --git a/src/ext/ed25519/donna/ed25519-donna-portable.h b/src/ext/ed25519/donna/ed25519-donna-portable.h
index 9ec83b87e3..1cd644c003 100644
--- a/src/ext/ed25519/donna/ed25519-donna-portable.h
+++ b/src/ext/ed25519/donna/ed25519-donna-portable.h
@@ -158,6 +158,16 @@ static inline void U64TO8_LE(unsigned char *p, const uint64_t v) {
#define ALIGN(x)
#endif
+/* Tor: Force enable SSE2 on 32 bit x86 systems if the compile target
+ * architecture supports it. This is not done on x86-64 as the non-SSE2
+ * code benchmarks better, at least on Haswell.
+ */
+#if defined(__SSE2__) && !defined(CPU_X86_64)
+ /* undef in case it's manually specified... */
+ #undef ED25519_SSE2
+ #define ED25519_SSE2
+#endif
+
#include <stdlib.h>
#include <string.h>