diff options
author | David Fifield <david@bamsoftware.com> | 2022-09-28 15:21:23 -0600 |
---|---|---|
committer | David Fifield <david@bamsoftware.com> | 2022-12-08 08:03:54 -0700 |
commit | 839d2218837dfbd1682ff39b375f45660b3974b5 (patch) | |
tree | f2631313cb4055637b4d726ad2bc4b0dfe128838 | |
parent | d4749d2c1dd5d9e59d8994d5b84102f7b0afd0ef (diff) | |
download | snowflake-839d2218837dfbd1682ff39b375f45660b3974b5.tar.gz snowflake-839d2218837dfbd1682ff39b375f45660b3974b5.zip |
Take ownership of buffer in QueuePacketConn QueueIncoming/WriteTo.
This design is easier to misuse, because it allows the caller to modify
the contents of the slice after queueing it, but it avoids an extra
allocation + memmove per incoming packet.
Before:
$ go test -bench='Benchmark(QueueIncoming|WriteTo)' -benchtime=2s -benchmem
BenchmarkQueueIncoming-4 7001494 342.4 ns/op 1024 B/op 2 allocs/op
BenchmarkWriteTo-4 3777459 627 ns/op 1024 B/op 2 allocs/op
After:
$ go test -bench=BenchmarkWriteTo -benchtime 2s -benchmem
BenchmarkQueueIncoming-4 13361600 170.1 ns/op 512 B/op 1 allocs/op
BenchmarkWriteTo-4 6702324 373 ns/op 512 B/op 1 allocs/op
Despite the benchmark results, the change in QueueIncoming turns out not
to have an effect in practice. It appears that the compiler had already
been optimizing out the allocation and copy in QueueIncoming.
https://gitlab.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/-/issues/40187
The WriteTo change, on the other hand, in practice reduces the frequency
of garbage collection.
https://gitlab.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/-/issues/40199
-rw-r--r-- | common/turbotunnel/queuepacketconn.go | 22 | ||||
-rw-r--r-- | common/turbotunnel/queuepacketconn_test.go | 43 |
2 files changed, 52 insertions, 13 deletions
diff --git a/common/turbotunnel/queuepacketconn.go b/common/turbotunnel/queuepacketconn.go index 14a9833..e11f8d8 100644 --- a/common/turbotunnel/queuepacketconn.go +++ b/common/turbotunnel/queuepacketconn.go @@ -42,8 +42,9 @@ func NewQueuePacketConn(localAddr net.Addr, timeout time.Duration) *QueuePacketC } } -// QueueIncoming queues and incoming packet and its source address, to be -// returned in a future call to ReadFrom. +// QueueIncoming queues an incoming packet and its source address, to be +// returned in a future call to ReadFrom. This function takes ownership of p and +// the caller must not reuse it. func (c *QueuePacketConn) QueueIncoming(p []byte, addr net.Addr) { select { case <-c.closed: @@ -51,11 +52,8 @@ func (c *QueuePacketConn) QueueIncoming(p []byte, addr net.Addr) { return default: } - // Copy the slice so that the caller may reuse it. - buf := make([]byte, len(p)) - copy(buf, p) select { - case c.recvQueue <- taggedPacket{buf, addr}: + case c.recvQueue <- taggedPacket{p, addr}: default: // Drop the incoming packet if the receive queue is full. } @@ -84,22 +82,20 @@ func (c *QueuePacketConn) ReadFrom(p []byte) (int, net.Addr, error) { } // WriteTo queues an outgoing packet for the given address. The queue can later -// be retrieved using the OutgoingQueue method. +// be retrieved using the OutgoingQueue method. This function takes ownership of +// p and the caller must not reuse it. func (c *QueuePacketConn) WriteTo(p []byte, addr net.Addr) (int, error) { select { case <-c.closed: return 0, &net.OpError{Op: "write", Net: c.LocalAddr().Network(), Addr: c.LocalAddr(), Err: c.err.Load().(error)} default: } - // Copy the slice so that the caller may reuse it. - buf := make([]byte, len(p)) - copy(buf, p) select { - case c.clients.SendQueue(addr) <- buf: - return len(buf), nil + case c.clients.SendQueue(addr) <- p: + return len(p), nil default: // Drop the outgoing packet if the send queue is full. - return len(buf), nil + return len(p), nil } } diff --git a/common/turbotunnel/queuepacketconn_test.go b/common/turbotunnel/queuepacketconn_test.go new file mode 100644 index 0000000..49ab6aa --- /dev/null +++ b/common/turbotunnel/queuepacketconn_test.go @@ -0,0 +1,43 @@ +package turbotunnel + +import ( + "testing" + "time" +) + +type emptyAddr struct{} + +func (_ emptyAddr) Network() string { return "empty" } +func (_ emptyAddr) String() string { return "empty" } + +// Run with -benchmem to see memory allocations. +func BenchmarkQueueIncoming(b *testing.B) { + conn := NewQueuePacketConn(emptyAddr{}, 1*time.Hour) + defer conn.Close() + + b.ResetTimer() + s := 500 + for i := 0; i < b.N; i++ { + // Use a variable for the length to stop the compiler from + // optimizing out the allocation. + p := make([]byte, s) + conn.QueueIncoming(p, emptyAddr{}) + } + b.StopTimer() +} + +// BenchmarkWriteTo benchmarks the QueuePacketConn.WriteTo function. +func BenchmarkWriteTo(b *testing.B) { + conn := NewQueuePacketConn(emptyAddr{}, 1*time.Hour) + defer conn.Close() + + b.ResetTimer() + s := 500 + for i := 0; i < b.N; i++ { + // Use a variable for the length to stop the compiler from + // optimizing out the allocation. + p := make([]byte, s) + conn.WriteTo(p, emptyAddr{}) + } + b.StopTimer() +} |