summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2017-11-02 10:30:42 -0400
committerNick Mathewson <nickm@torproject.org>2017-11-02 10:30:42 -0400
commitcb42c62c9e87d4699add69087ba25d1454758bb0 (patch)
tree3cf3da6aa93d5988916414d3ad972cbd43ac95ee
parent27ea6fcd31ceb7dd9e48e5586aa05f05bfe86e0a (diff)
parent6dc591a024e4dde8f4beafc7d42bdaeac996cca4 (diff)
downloadtor-cb42c62c9e87d4699add69087ba25d1454758bb0.tar.gz
tor-cb42c62c9e87d4699add69087ba25d1454758bb0.zip
Merge branch 'dgoulet_ticket23753_032_02_squashed' into maint-0.3.2
-rw-r--r--changes/bug237534
-rw-r--r--src/or/scheduler.c42
-rw-r--r--src/or/scheduler.h8
-rw-r--r--src/or/scheduler_kist.c10
4 files changed, 58 insertions, 6 deletions
diff --git a/changes/bug23753 b/changes/bug23753
new file mode 100644
index 0000000000..8782a8e2d0
--- /dev/null
+++ b/changes/bug23753
@@ -0,0 +1,4 @@
+ o Minor features (logging, scheduler):
+ - Introduce a SCHED_BUG() function to log extra information about the
+ scheduler state if we ever catch a bug in the scheduler. Closes ticket
+ 23753.
diff --git a/src/or/scheduler.c b/src/or/scheduler.c
index 22a9b574ac..97b6d40b1e 100644
--- a/src/or/scheduler.c
+++ b/src/or/scheduler.c
@@ -9,6 +9,9 @@
#define SCHEDULER_KIST_PRIVATE
#include "scheduler.h"
#include "main.h"
+#include "buffers.h"
+#define TOR_CHANNEL_INTERNAL_
+#include "channeltls.h"
#include <event2/event.h>
@@ -604,7 +607,7 @@ scheduler_release_channel,(channel_t *chan))
}
if (chan->scheduler_state == SCHED_CHAN_PENDING) {
- if (smartlist_pos(channels_pending, chan) == -1) {
+ if (SCHED_BUG(smartlist_pos(channels_pending, chan) == -1, chan)) {
log_warn(LD_SCHED, "Scheduler asked to release channel %" PRIu64 " "
"but it wasn't in channels_pending",
chan->global_identifier);
@@ -667,6 +670,43 @@ scheduler_channel_wants_writes(channel_t *chan)
}
}
+/* Log warn the given channel and extra scheduler context as well. This is
+ * used by SCHED_BUG() in order to be able to extract as much information as
+ * we can when we hit a bug. Channel chan can be NULL. */
+void
+scheduler_bug_occurred(const channel_t *chan)
+{
+ char buf[128];
+
+ if (chan != NULL) {
+ const size_t outbuf_len =
+ buf_datalen(TO_CONN(BASE_CHAN_TO_TLS((channel_t *) chan)->conn)->outbuf);
+ tor_snprintf(buf, sizeof(buf),
+ "Channel %" PRIu64 " in state %s and scheduler state %d."
+ " Num cells on cmux: %d. Connection outbuf len: %lu.",
+ chan->global_identifier,
+ channel_state_to_string(chan->state),
+ chan->scheduler_state, circuitmux_num_cells(chan->cmux),
+ outbuf_len);
+ }
+
+ {
+ char *msg;
+ /* Rate limit every 60 seconds. If we start seeing this every 60 sec, we
+ * know something is stuck/wrong. It *should* be loud but not too much. */
+ static ratelim_t rlimit = RATELIM_INIT(60);
+ if ((msg = rate_limit_log(&rlimit, approx_time()))) {
+ log_warn(LD_BUG, "%s Num pending channels: %d. "
+ "Channel in pending list: %s.%s",
+ (chan != NULL) ? buf : "No channel in bug context.",
+ smartlist_len(channels_pending),
+ (smartlist_pos(channels_pending, chan) == -1) ? "no" : "yes",
+ msg);
+ tor_free(msg);
+ }
+ }
+}
+
#ifdef TOR_UNIT_TESTS
/*
diff --git a/src/or/scheduler.h b/src/or/scheduler.h
index 0ec24db931..47c98f096a 100644
--- a/src/or/scheduler.h
+++ b/src/or/scheduler.h
@@ -143,6 +143,14 @@ MOCK_DECL(void, scheduler_channel_has_waiting_cells, (channel_t *chan));
/*********************************
* Defined in scheduler.c
*********************************/
+
+/* Triggers a BUG() and extra information with chan if available. */
+#define SCHED_BUG(cond, chan) \
+ (PREDICT_UNLIKELY(cond) ? \
+ ((BUG(cond)) ? (scheduler_bug_occurred(chan), 1) : 0) : 0)
+
+void scheduler_bug_occurred(const channel_t *chan);
+
smartlist_t *get_channels_pending(void);
MOCK_DECL(int, scheduler_compare_channels,
(const void *c1_v, const void *c2_v));
diff --git a/src/or/scheduler_kist.c b/src/or/scheduler_kist.c
index 03aa9c45bd..a3b74e8cc9 100644
--- a/src/or/scheduler_kist.c
+++ b/src/or/scheduler_kist.c
@@ -116,7 +116,7 @@ channel_outbuf_length(channel_t *chan)
/* In theory, this can not happen because we can not scheduler a channel
* without a connection that has its outbuf initialized. Just in case, bug
* on this so we can understand a bit more why it happened. */
- if (BUG(BASE_CHAN_TO_TLS(chan)->conn == NULL)) {
+ if (SCHED_BUG(BASE_CHAN_TO_TLS(chan)->conn == NULL, chan)) {
return 0;
}
return buf_datalen(TO_CONN(BASE_CHAN_TO_TLS(chan)->conn)->outbuf);
@@ -371,7 +371,7 @@ socket_can_write(socket_table_t *table, const channel_t *chan)
{
socket_table_ent_t *ent = NULL;
ent = socket_table_search(table, chan);
- IF_BUG_ONCE(!ent) {
+ if (SCHED_BUG(!ent, chan)) {
return 1; // Just return true, saying that kist wouldn't limit the socket
}
@@ -391,7 +391,7 @@ update_socket_info(socket_table_t *table, const channel_t *chan)
{
socket_table_ent_t *ent = NULL;
ent = socket_table_search(table, chan);
- IF_BUG_ONCE(!ent) {
+ if (SCHED_BUG(!ent, chan)) {
return; // Whelp. Entry didn't exist for some reason so nothing to do.
}
update_socket_info_impl(ent);
@@ -403,7 +403,7 @@ update_socket_written(socket_table_t *table, channel_t *chan, size_t bytes)
{
socket_table_ent_t *ent = NULL;
ent = socket_table_search(table, chan);
- IF_BUG_ONCE(!ent) {
+ if (SCHED_BUG(!ent, chan)) {
return; // Whelp. Entry didn't exist so nothing to do.
}
@@ -573,7 +573,7 @@ kist_scheduler_run(void)
/* get best channel */
chan = smartlist_pqueue_pop(cp, scheduler_compare_channels,
offsetof(channel_t, sched_heap_idx));
- IF_BUG_ONCE(!chan) {
+ if (SCHED_BUG(!chan, NULL)) {
/* Some-freaking-how a NULL got into the channels_pending. That should
* never happen, but it should be harmless to ignore it and keep looping.
*/