diff options
Diffstat (limited to 'src/or/scheduler.c')
-rw-r--r-- | src/or/scheduler.c | 707 |
1 files changed, 0 insertions, 707 deletions
diff --git a/src/or/scheduler.c b/src/or/scheduler.c deleted file mode 100644 index 49ac1b939a..0000000000 --- a/src/or/scheduler.c +++ /dev/null @@ -1,707 +0,0 @@ -/* * Copyright (c) 2013-2016, The Tor Project, Inc. */ -/* See LICENSE for licensing information */ - -/** - * \file scheduler.c - * \brief Relay scheduling system - **/ - -#include "or.h" - -#define TOR_CHANNEL_INTERNAL_ /* For channel_flush_some_cells() */ -#include "channel.h" - -#include "compat_libevent.h" -#define SCHEDULER_PRIVATE_ -#include "scheduler.h" - -#include <event2/event.h> - -/* - * Scheduler high/low watermarks - */ - -static uint32_t sched_q_low_water = 16384; -static uint32_t sched_q_high_water = 32768; - -/* - * Maximum cells to flush in a single call to channel_flush_some_cells(); - * setting this low means more calls, but too high and we could overshoot - * sched_q_high_water. - */ - -static uint32_t sched_max_flush_cells = 16; - -/* - * Write scheduling works by keeping track of which channels can - * accept cells, and have cells to write. From the scheduler's perspective, - * a channel can be in four possible states: - * - * 1.) Not open for writes, no cells to send - * - Not much to do here, and the channel will have scheduler_state == - * SCHED_CHAN_IDLE - * - Transitions from: - * - Open for writes/has cells by simultaneously draining all circuit - * queues and filling the output buffer. - * - Transitions to: - * - Not open for writes/has cells by arrival of cells on an attached - * circuit (this would be driven from append_cell_to_circuit_queue()) - * - Open for writes/no cells by a channel type specific path; - * driven from connection_or_flushed_some() for channel_tls_t. - * - * 2.) Open for writes, no cells to send - * - Not much here either; this will be the state an idle but open channel - * can be expected to settle in. It will have scheduler_state == - * SCHED_CHAN_WAITING_FOR_CELLS - * - Transitions from: - * - Not open for writes/no cells by flushing some of the output - * buffer. - * - Open for writes/has cells by the scheduler moving cells from - * circuit queues to channel output queue, but not having enough - * to fill the output queue. - * - Transitions to: - * - Open for writes/has cells by arrival of new cells on an attached - * circuit, in append_cell_to_circuit_queue() - * - * 3.) Not open for writes, cells to send - * - This is the state of a busy circuit limited by output bandwidth; - * cells have piled up in the circuit queues waiting to be relayed. - * The channel will have scheduler_state == SCHED_CHAN_WAITING_TO_WRITE. - * - Transitions from: - * - Not open for writes/no cells by arrival of cells on an attached - * circuit - * - Open for writes/has cells by filling an output buffer without - * draining all cells from attached circuits - * - Transitions to: - * - Opens for writes/has cells by draining some of the output buffer - * via the connection_or_flushed_some() path (for channel_tls_t). - * - * 4.) Open for writes, cells to send - * - This connection is ready to relay some cells and waiting for - * the scheduler to choose it. The channel will have scheduler_state == - * SCHED_CHAN_PENDING. - * - Transitions from: - * - Not open for writes/has cells by the connection_or_flushed_some() - * path - * - Open for writes/no cells by the append_cell_to_circuit_queue() - * path - * - Transitions to: - * - Not open for writes/no cells by draining all circuit queues and - * simultaneously filling the output buffer. - * - Not open for writes/has cells by writing enough cells to fill the - * output buffer - * - Open for writes/no cells by draining all attached circuit queues - * without also filling the output buffer - * - * Other event-driven parts of the code move channels between these scheduling - * states by calling scheduler functions; the scheduler only runs on open-for- - * writes/has-cells channels and is the only path for those to transition to - * other states. The scheduler_run() function gives us the opportunity to do - * scheduling work, and is called from other scheduler functions whenever a - * state transition occurs, and periodically from the main event loop. - */ - -/* Scheduler global data structures */ - -/* - * We keep a list of channels that are pending - i.e, have cells to write - * and can accept them to send. The enum scheduler_state in channel_t - * is reserved for our use. - */ - -/* Pqueue of channels that can write and have cells (pending work) */ -STATIC smartlist_t *channels_pending = NULL; - -/* - * This event runs the scheduler from its callback, and is manually - * activated whenever a channel enters open for writes/cells to send. - */ - -STATIC struct event *run_sched_ev = NULL; - -/* - * Queue heuristic; this is not the queue size, but an 'effective queuesize' - * that ages out contributions from stalled channels. - */ - -STATIC uint64_t queue_heuristic = 0; - -/* - * Timestamp for last queue heuristic update - */ - -STATIC time_t queue_heuristic_timestamp = 0; - -/* Scheduler static function declarations */ - -static void scheduler_evt_callback(evutil_socket_t fd, - short events, void *arg); -static int scheduler_more_work(void); -static void scheduler_retrigger(void); -#if 0 -static void scheduler_trigger(void); -#endif - -/* Scheduler function implementations */ - -/** Free everything and shut down the scheduling system */ - -void -scheduler_free_all(void) -{ - log_debug(LD_SCHED, "Shutting down scheduler"); - - if (run_sched_ev) { - if (event_del(run_sched_ev) < 0) { - log_warn(LD_BUG, "Problem deleting run_sched_ev"); - } - tor_event_free(run_sched_ev); - run_sched_ev = NULL; - } - - if (channels_pending) { - smartlist_free(channels_pending); - channels_pending = NULL; - } -} - -/** - * Comparison function to use when sorting pending channels - */ - -MOCK_IMPL(STATIC int, -scheduler_compare_channels, (const void *c1_v, const void *c2_v)) -{ - channel_t *c1 = NULL, *c2 = NULL; - /* These are a workaround for -Wbad-function-cast throwing a fit */ - const circuitmux_policy_t *p1, *p2; - uintptr_t p1_i, p2_i; - - tor_assert(c1_v); - tor_assert(c2_v); - - c1 = (channel_t *)(c1_v); - c2 = (channel_t *)(c2_v); - - tor_assert(c1); - tor_assert(c2); - - if (c1 != c2) { - if (circuitmux_get_policy(c1->cmux) == - circuitmux_get_policy(c2->cmux)) { - /* Same cmux policy, so use the mux comparison */ - return circuitmux_compare_muxes(c1->cmux, c2->cmux); - } else { - /* - * Different policies; not important to get this edge case perfect - * because the current code never actually gives different channels - * different cmux policies anyway. Just use this arbitrary but - * definite choice. - */ - p1 = circuitmux_get_policy(c1->cmux); - p2 = circuitmux_get_policy(c2->cmux); - p1_i = (uintptr_t)p1; - p2_i = (uintptr_t)p2; - - return (p1_i < p2_i) ? -1 : 1; - } - } else { - /* c1 == c2, so always equal */ - return 0; - } -} - -/* - * Scheduler event callback; this should get triggered once per event loop - * if any scheduling work was created during the event loop. - */ - -static void -scheduler_evt_callback(evutil_socket_t fd, short events, void *arg) -{ - (void)fd; - (void)events; - (void)arg; - log_debug(LD_SCHED, "Scheduler event callback called"); - - tor_assert(run_sched_ev); - - /* Run the scheduler */ - scheduler_run(); - - /* Do we have more work to do? */ - if (scheduler_more_work()) scheduler_retrigger(); -} - -/** Mark a channel as no longer ready to accept writes */ - -MOCK_IMPL(void, -scheduler_channel_doesnt_want_writes,(channel_t *chan)) -{ - tor_assert(chan); - - tor_assert(channels_pending); - - /* If it's already in pending, we can put it in waiting_to_write */ - if (chan->scheduler_state == SCHED_CHAN_PENDING) { - /* - * It's in channels_pending, so it shouldn't be in any of - * the other lists. It can't write any more, so it goes to - * channels_waiting_to_write. - */ - smartlist_pqueue_remove(channels_pending, - scheduler_compare_channels, - STRUCT_OFFSET(channel_t, sched_heap_idx), - chan); - chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE; - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p went from pending " - "to waiting_to_write", - U64_PRINTF_ARG(chan->global_identifier), chan); - } else { - /* - * It's not in pending, so it can't become waiting_to_write; it's - * either not in any of the lists (nothing to do) or it's already in - * waiting_for_cells (remove it, can't write any more). - */ - if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) { - chan->scheduler_state = SCHED_CHAN_IDLE; - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p left waiting_for_cells", - U64_PRINTF_ARG(chan->global_identifier), chan); - } - } -} - -/** Mark a channel as having waiting cells */ - -MOCK_IMPL(void, -scheduler_channel_has_waiting_cells,(channel_t *chan)) -{ - int became_pending = 0; - - tor_assert(chan); - tor_assert(channels_pending); - - /* First, check if this one also writeable */ - if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) { - /* - * It's in channels_waiting_for_cells, so it shouldn't be in any of - * the other lists. It has waiting cells now, so it goes to - * channels_pending. - */ - chan->scheduler_state = SCHED_CHAN_PENDING; - smartlist_pqueue_add(channels_pending, - scheduler_compare_channels, - STRUCT_OFFSET(channel_t, sched_heap_idx), - chan); - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p went from waiting_for_cells " - "to pending", - U64_PRINTF_ARG(chan->global_identifier), chan); - became_pending = 1; - } else { - /* - * It's not in waiting_for_cells, so it can't become pending; it's - * either not in any of the lists (we add it to waiting_to_write) - * or it's already in waiting_to_write or pending (we do nothing) - */ - if (!(chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE || - chan->scheduler_state == SCHED_CHAN_PENDING)) { - chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE; - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p entered waiting_to_write", - U64_PRINTF_ARG(chan->global_identifier), chan); - } - } - - /* - * If we made a channel pending, we potentially have scheduling work - * to do. - */ - if (became_pending) scheduler_retrigger(); -} - -/** Set up the scheduling system */ - -void -scheduler_init(void) -{ - log_debug(LD_SCHED, "Initting scheduler"); - - tor_assert(!run_sched_ev); - run_sched_ev = tor_event_new(tor_libevent_get_base(), -1, - 0, scheduler_evt_callback, NULL); - - channels_pending = smartlist_new(); - queue_heuristic = 0; - queue_heuristic_timestamp = approx_time(); -} - -/** Check if there's more scheduling work */ - -static int -scheduler_more_work(void) -{ - tor_assert(channels_pending); - - return ((scheduler_get_queue_heuristic() < sched_q_low_water) && - ((smartlist_len(channels_pending) > 0))) ? 1 : 0; -} - -/** Retrigger the scheduler in a way safe to use from the callback */ - -static void -scheduler_retrigger(void) -{ - tor_assert(run_sched_ev); - event_active(run_sched_ev, EV_TIMEOUT, 1); -} - -/** Notify the scheduler of a channel being closed */ - -MOCK_IMPL(void, -scheduler_release_channel,(channel_t *chan)) -{ - tor_assert(chan); - tor_assert(channels_pending); - - if (chan->scheduler_state == SCHED_CHAN_PENDING) { - smartlist_pqueue_remove(channels_pending, - scheduler_compare_channels, - STRUCT_OFFSET(channel_t, sched_heap_idx), - chan); - } - - chan->scheduler_state = SCHED_CHAN_IDLE; -} - -/** Run the scheduling algorithm if necessary */ - -MOCK_IMPL(void, -scheduler_run, (void)) -{ - int n_cells, n_chans_before, n_chans_after; - uint64_t q_len_before, q_heur_before, q_len_after, q_heur_after; - ssize_t flushed, flushed_this_time; - smartlist_t *to_readd = NULL; - channel_t *chan = NULL; - - log_debug(LD_SCHED, "We have a chance to run the scheduler"); - - if (scheduler_get_queue_heuristic() < sched_q_low_water) { - n_chans_before = smartlist_len(channels_pending); - q_len_before = channel_get_global_queue_estimate(); - q_heur_before = scheduler_get_queue_heuristic(); - - while (scheduler_get_queue_heuristic() <= sched_q_high_water && - smartlist_len(channels_pending) > 0) { - /* Pop off a channel */ - chan = smartlist_pqueue_pop(channels_pending, - scheduler_compare_channels, - STRUCT_OFFSET(channel_t, sched_heap_idx)); - tor_assert(chan); - - /* Figure out how many cells we can write */ - n_cells = channel_num_cells_writeable(chan); - if (n_cells > 0) { - log_debug(LD_SCHED, - "Scheduler saw pending channel " U64_FORMAT " at %p with " - "%d cells writeable", - U64_PRINTF_ARG(chan->global_identifier), chan, n_cells); - - flushed = 0; - while (flushed < n_cells && - scheduler_get_queue_heuristic() <= sched_q_high_water) { - flushed_this_time = - channel_flush_some_cells(chan, - MIN(sched_max_flush_cells, - (size_t) n_cells - flushed)); - if (flushed_this_time <= 0) break; - flushed += flushed_this_time; - } - - if (flushed < n_cells) { - /* We ran out of cells to flush */ - chan->scheduler_state = SCHED_CHAN_WAITING_FOR_CELLS; - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p " - "entered waiting_for_cells from pending", - U64_PRINTF_ARG(chan->global_identifier), - chan); - } else { - /* The channel may still have some cells */ - if (channel_more_to_flush(chan)) { - /* The channel goes to either pending or waiting_to_write */ - if (channel_num_cells_writeable(chan) > 0) { - /* Add it back to pending later */ - if (!to_readd) to_readd = smartlist_new(); - smartlist_add(to_readd, chan); - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p " - "is still pending", - U64_PRINTF_ARG(chan->global_identifier), - chan); - } else { - /* It's waiting to be able to write more */ - chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE; - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p " - "entered waiting_to_write from pending", - U64_PRINTF_ARG(chan->global_identifier), - chan); - } - } else { - /* No cells left; it can go to idle or waiting_for_cells */ - if (channel_num_cells_writeable(chan) > 0) { - /* - * It can still accept writes, so it goes to - * waiting_for_cells - */ - chan->scheduler_state = SCHED_CHAN_WAITING_FOR_CELLS; - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p " - "entered waiting_for_cells from pending", - U64_PRINTF_ARG(chan->global_identifier), - chan); - } else { - /* - * We exactly filled up the output queue with all available - * cells; go to idle. - */ - chan->scheduler_state = SCHED_CHAN_IDLE; - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p " - "become idle from pending", - U64_PRINTF_ARG(chan->global_identifier), - chan); - } - } - } - - log_debug(LD_SCHED, - "Scheduler flushed %d cells onto pending channel " - U64_FORMAT " at %p", - (int)flushed, U64_PRINTF_ARG(chan->global_identifier), - chan); - } else { - log_info(LD_SCHED, - "Scheduler saw pending channel " U64_FORMAT " at %p with " - "no cells writeable", - U64_PRINTF_ARG(chan->global_identifier), chan); - /* Put it back to WAITING_TO_WRITE */ - chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE; - } - } - - /* Readd any channels we need to */ - if (to_readd) { - SMARTLIST_FOREACH_BEGIN(to_readd, channel_t *, readd_chan) { - readd_chan->scheduler_state = SCHED_CHAN_PENDING; - smartlist_pqueue_add(channels_pending, - scheduler_compare_channels, - STRUCT_OFFSET(channel_t, sched_heap_idx), - readd_chan); - } SMARTLIST_FOREACH_END(readd_chan); - smartlist_free(to_readd); - } - - n_chans_after = smartlist_len(channels_pending); - q_len_after = channel_get_global_queue_estimate(); - q_heur_after = scheduler_get_queue_heuristic(); - log_debug(LD_SCHED, - "Scheduler handled %d of %d pending channels, queue size from " - U64_FORMAT " to " U64_FORMAT ", queue heuristic from " - U64_FORMAT " to " U64_FORMAT, - n_chans_before - n_chans_after, n_chans_before, - U64_PRINTF_ARG(q_len_before), U64_PRINTF_ARG(q_len_after), - U64_PRINTF_ARG(q_heur_before), U64_PRINTF_ARG(q_heur_after)); - } -} - -/** Trigger the scheduling event so we run the scheduler later */ - -#if 0 -static void -scheduler_trigger(void) -{ - log_debug(LD_SCHED, "Triggering scheduler event"); - - tor_assert(run_sched_ev); - - event_add(run_sched_ev, EV_TIMEOUT, 1); -} -#endif - -/** Mark a channel as ready to accept writes */ - -void -scheduler_channel_wants_writes(channel_t *chan) -{ - int became_pending = 0; - - tor_assert(chan); - tor_assert(channels_pending); - - /* If it's already in waiting_to_write, we can put it in pending */ - if (chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE) { - /* - * It can write now, so it goes to channels_pending. - */ - smartlist_pqueue_add(channels_pending, - scheduler_compare_channels, - STRUCT_OFFSET(channel_t, sched_heap_idx), - chan); - chan->scheduler_state = SCHED_CHAN_PENDING; - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p went from waiting_to_write " - "to pending", - U64_PRINTF_ARG(chan->global_identifier), chan); - became_pending = 1; - } else { - /* - * It's not in SCHED_CHAN_WAITING_TO_WRITE, so it can't become pending; - * it's either idle and goes to WAITING_FOR_CELLS, or it's a no-op. - */ - if (!(chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS || - chan->scheduler_state == SCHED_CHAN_PENDING)) { - chan->scheduler_state = SCHED_CHAN_WAITING_FOR_CELLS; - log_debug(LD_SCHED, - "Channel " U64_FORMAT " at %p entered waiting_for_cells", - U64_PRINTF_ARG(chan->global_identifier), chan); - } - } - - /* - * If we made a channel pending, we potentially have scheduling work - * to do. - */ - if (became_pending) scheduler_retrigger(); -} - -/** - * Notify the scheduler that a channel's position in the pqueue may have - * changed - */ - -void -scheduler_touch_channel(channel_t *chan) -{ - tor_assert(chan); - - if (chan->scheduler_state == SCHED_CHAN_PENDING) { - /* Remove and re-add it */ - smartlist_pqueue_remove(channels_pending, - scheduler_compare_channels, - STRUCT_OFFSET(channel_t, sched_heap_idx), - chan); - smartlist_pqueue_add(channels_pending, - scheduler_compare_channels, - STRUCT_OFFSET(channel_t, sched_heap_idx), - chan); - } - /* else no-op, since it isn't in the queue */ -} - -/** - * Notify the scheduler of a queue size adjustment, to recalculate the - * queue heuristic. - */ - -void -scheduler_adjust_queue_size(channel_t *chan, int dir, uint64_t adj) -{ - time_t now = approx_time(); - - log_debug(LD_SCHED, - "Queue size adjustment by %s" U64_FORMAT " for channel " - U64_FORMAT, - (dir >= 0) ? "+" : "-", - U64_PRINTF_ARG(adj), - U64_PRINTF_ARG(chan->global_identifier)); - - /* Get the queue heuristic up to date */ - scheduler_update_queue_heuristic(now); - - /* Adjust as appropriate */ - if (dir >= 0) { - /* Increasing it */ - queue_heuristic += adj; - } else { - /* Decreasing it */ - if (queue_heuristic > adj) queue_heuristic -= adj; - else queue_heuristic = 0; - } - - log_debug(LD_SCHED, - "Queue heuristic is now " U64_FORMAT, - U64_PRINTF_ARG(queue_heuristic)); -} - -/** - * Query the current value of the queue heuristic - */ - -STATIC uint64_t -scheduler_get_queue_heuristic(void) -{ - time_t now = approx_time(); - - scheduler_update_queue_heuristic(now); - - return queue_heuristic; -} - -/** - * Adjust the queue heuristic value to the present time - */ - -STATIC void -scheduler_update_queue_heuristic(time_t now) -{ - time_t diff; - - if (queue_heuristic_timestamp == 0) { - /* - * Nothing we can sensibly do; must not have been initted properly. - * Oh well. - */ - queue_heuristic_timestamp = now; - } else if (queue_heuristic_timestamp < now) { - diff = now - queue_heuristic_timestamp; - /* - * This is a simple exponential age-out; the other proposed alternative - * was a linear age-out using the bandwidth history in rephist.c; I'm - * going with this out of concern that if an adversary can jam the - * scheduler long enough, it would cause the bandwidth to drop to - * zero and render the aging mechanism ineffective thereafter. - */ - if (0 <= diff && diff < 64) queue_heuristic >>= diff; - else queue_heuristic = 0; - - queue_heuristic_timestamp = now; - - log_debug(LD_SCHED, - "Queue heuristic is now " U64_FORMAT, - U64_PRINTF_ARG(queue_heuristic)); - } - /* else no update needed, or time went backward */ -} - -/** - * Set scheduler watermarks and flush size - */ - -void -scheduler_set_watermarks(uint32_t lo, uint32_t hi, uint32_t max_flush) -{ - /* Sanity assertions - caller should ensure these are true */ - tor_assert(lo > 0); - tor_assert(hi > lo); - tor_assert(max_flush > 0); - - sched_q_low_water = lo; - sched_q_high_water = hi; - sched_max_flush_cells = max_flush; -} - |