/* Copyright (c) 2017-2019, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
* @file conscache.c
* @brief Consensus and diff on-disk cache.
**/
#include "core/or/or.h"
#include "app/config/config.h"
#include "feature/dircache/conscache.h"
#include "lib/crypt_ops/crypto_util.h"
#include "lib/fs/storagedir.h"
#include "lib/encoding/confline.h"
#define CCE_MAGIC 0x17162253
#ifdef _WIN32
/* On Windows, unlink won't work on a file if the file is actively mmap()ed.
* That forces us to be less aggressive about unlinking files, and causes other
* changes throughout our logic.
*/
#define MUST_UNMAP_TO_UNLINK
#endif /* defined(_WIN32) */
/**
* A consensus_cache_entry_t is a reference-counted handle to an
* item in a consensus_cache_t. It can be mmapped into RAM, or not,
* depending whether it's currently in use.
*/
struct consensus_cache_entry_t {
uint32_t magic; /**< Must be set to CCE_MAGIC */
HANDLE_ENTRY(consensus_cache_entry, consensus_cache_entry_t);
int32_t refcnt; /**< Reference count. */
unsigned can_remove : 1; /**< If true, we want to delete this file. */
/** If true, we intend to unmap this file as soon as we're done with it. */
unsigned release_aggressively : 1;
/** Filename for this object within the storage_dir_t */
char *fname;
/** Labels associated with this object. Immutable once the object
* is created. */
config_line_t *labels;
/** Pointer to the cache that includes this entry (if any). */
consensus_cache_t *in_cache;
/** Since what time has this object been mapped into RAM, but with the cache
* being the only having a reference to it? */
time_t unused_since;
/** mmaped contents of the underlying file. May be NULL */
tor_mmap_t *map;
/** Length of the body within map. */
size_t bodylen;
/** Pointer to the body within map. */
const uint8_t *body;
};
/**
* A consensus_cache_t holds a directory full of labeled items.
*/
struct consensus_cache_t {
/** Underling storage_dir_t to handle persistence */
storage_dir_t *dir;
/** List of all the entries in the directory. */
smartlist_t *entries;
/** The maximum number of entries that we'd like to allow in this cache.
* This is the same as the storagedir limit when MUST_UNMAP_TO_UNLINK is
* not defined. */
unsigned max_entries;
};
static void consensus_cache_clear(consensus_cache_t *cache);
static void consensus_cache_rescan(consensus_cache_t *);
static void consensus_cache_entry_map(consensus_cache_t *,
consensus_cache_entry_t *);
static void consensus_cache_entry_unmap(consensus_cache_entry_t *ent);
/**
* Helper: Open a consensus cache in subdirectory subdir of the
* data directory, to hold up to max_entries of data.
*/
consensus_cache_t *
consensus_cache_open(const char *subdir, int max_entries)
{
int storagedir_max_entries;
consensus_cache_t *cache = tor_malloc_zero(sizeof(consensus_cache_t));
char *directory = get_cachedir_fname(subdir);
cache->max_entries = max_entries;
#ifdef MUST_UNMAP_TO_UNLINK
/* If we can't unlink the files that we're still using, then we need to
* tell the storagedir backend to allow far more files than this consensus
* cache actually wants, so that it can hold files which, from this cache's
* perspective, have become useless.
*/
#define VERY_LARGE_STORAGEDIR_LIMIT (1000*1000)
storagedir_max_entries = VERY_LARGE_STORAGEDIR_LIMIT;
#else /* !defined(MUST_UNMAP_TO_UNLINK) */
/* Otherwise, we can just tell the storagedir to use the same limits
* as this cache. */
storagedir_max_entries = max_entries;
#endif /* defined(MUST_UNMAP_TO_UNLINK) */
cache->dir = storage_dir_new(directory, storagedir_max_entries);
tor_free(directory);
if (!cache->dir) {
tor_free(cache);
return NULL;
}
consensus_cache_rescan(cache);
return cache;
}
/** Return true if it's okay to put more entries in this cache than
* its official file limit.
*
* (We need this method on Windows, where we can't unlink files that are still
* in use, and therefore might need to temporarily exceed the file limit until
* the no-longer-wanted files are deletable.)
*/
int
consensus_cache_may_overallocate(consensus_cache_t *cache)
{
(void) cache;
#ifdef MUST_UNMAP_TO_UNLINK
return 1;
#else
return 0;
#endif
}
/**
* Tell the sandbox (if any) configured by cfg to allow the
* operations that cache will need.
*/
int
consensus_cache_register_with_sandbox(consensus_cache_t *cache,
struct sandbox_cfg_elem_t **cfg)
{
#ifdef MUST_UNMAP_TO_UNLINK
/* Our Linux sandbox doesn't support huge file lists like the one that would
* be generated by using VERY_LARGE_STORAGEDIR_LIMIT above in
* consensus_cache_open(). Since the Linux sandbox is the only one we have
* right now, we just assert that we never reach this point when we've had
* to use VERY_LARGE_STORAGEDIR_LIMIT.
*
* If at some point in the future we have a different sandbox mechanism that
* can handle huge file lists, we can remove this assertion or make it
* conditional.
*/
tor_assert_nonfatal_unreached();
#endif /* defined(MUST_UNMAP_TO_UNLINK) */
return storage_dir_register_with_sandbox(cache->dir, cfg);
}
/**
* Helper: clear all entries from cache (but do not delete
* any that aren't marked for removal
*/
static void
consensus_cache_clear(consensus_cache_t *cache)
{
consensus_cache_delete_pending(cache, 0);
SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
ent->in_cache = NULL;
consensus_cache_entry_decref(ent);
} SMARTLIST_FOREACH_END(ent);
smartlist_free(cache->entries);
cache->entries = NULL;
}
/**
* Drop all storage held by cache.
*/
void
consensus_cache_free_(consensus_cache_t *cache)
{
if (! cache)
return;
if (cache->entries) {
consensus_cache_clear(cache);
}
storage_dir_free(cache->dir);
tor_free(cache);
}
/**
* Write datalen bytes of data at data into the cache,
* labeling that data with labels. On failure, return NULL. On
* success, return a newly created consensus_cache_entry_t.
*
* The returned value will be owned by the cache, and you will have a
* reference to it. Call consensus_cache_entry_decref() when you are
* done with it.
*
* The provided labels MUST have distinct keys: if they don't,
* this API does not specify which values (if any) for the duplicate keys
* will be considered.
*/
consensus_cache_entry_t *
consensus_cache_add(consensus_cache_t *cache,
const config_line_t *labels,
const uint8_t *data,
size_t datalen)
{
char *fname = NULL;
int r = storage_dir_save_labeled_to_file(cache->dir,
labels, data, datalen, &fname);
if (r < 0 || fname == NULL) {
return NULL;
}
consensus_cache_entry_t *ent =
tor_malloc_zero(sizeof(consensus_cache_entry_t));
ent->magic = CCE_MAGIC;
ent->fname = fname;
ent->labels = config_lines_dup(labels);
ent->in_cache = cache;
ent->unused_since = TIME_MAX;
smartlist_add(cache->entries, ent);
/* Start the reference count at 2: the caller owns one copy, and the
* cache owns another.
*/
ent->refcnt = 2;
return ent;
}
/**
* Given a cache, return some entry for which key=value.
* Return NULL if no such entry exists.
*
* Does not adjust reference counts.
*/
consensus_cache_entry_t *
consensus_cache_find_first(consensus_cache_t *cache,
const char *key,
const char *value)
{
smartlist_t *tmp = smartlist_new();
consensus_cache_find_all(tmp, cache, key, value);
consensus_cache_entry_t *ent = NULL;
if (smartlist_len(tmp))
ent = smartlist_get(tmp, 0);
smartlist_free(tmp);
return ent;
}
/**
* Given a cache, add every entry to out for which
* key=value. If key is NULL, add every entry.
*
* Do not add any entry that has been marked for removal.
*
* Does not adjust reference counts.
*/
void
consensus_cache_find_all(smartlist_t *out,
consensus_cache_t *cache,
const char *key,
const char *value)
{
SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
if (ent->can_remove == 1) {
/* We want to delete this; pretend it isn't there. */
continue;
}
if (! key) {
smartlist_add(out, ent);
continue;
}
const char *found_val = consensus_cache_entry_get_value(ent, key);
if (found_val && !strcmp(value, found_val)) {
smartlist_add(out, ent);
}
} SMARTLIST_FOREACH_END(ent);
}
/**
* Given a list of consensus_cache_entry_t, remove all those entries
* that do not have key=value in their labels.
*
* Does not adjust reference counts.
*/
void
consensus_cache_filter_list(smartlist_t *lst,
const char *key,
const char *value)
{
if (BUG(lst == NULL))
return; // LCOV_EXCL_LINE
if (key == NULL)
return;
SMARTLIST_FOREACH_BEGIN(lst, consensus_cache_entry_t *, ent) {
const char *found_val = consensus_cache_entry_get_value(ent, key);
if (! found_val || strcmp(value, found_val)) {
SMARTLIST_DEL_CURRENT(lst, ent);
}
} SMARTLIST_FOREACH_END(ent);
}
/**
* If ent has a label with the given key, return its
* value. Otherwise return NULL.
*
* The return value is only guaranteed to be valid for as long as you
* hold a reference to ent.
*/
const char *
consensus_cache_entry_get_value(const consensus_cache_entry_t *ent,
const char *key)
{
const config_line_t *match = config_line_find(ent->labels, key);
if (match)
return match->value;
else
return NULL;
}
/**
* Return a pointer to the labels in ent.
*
* This pointer is only guaranteed to be valid for as long as you
* hold a reference to ent.
*/
const config_line_t *
consensus_cache_entry_get_labels(const consensus_cache_entry_t *ent)
{
return ent->labels;
}
/**
* Increase the reference count of ent.
*/
void
consensus_cache_entry_incref(consensus_cache_entry_t *ent)
{
if (BUG(ent->magic != CCE_MAGIC))
return; // LCOV_EXCL_LINE
++ent->refcnt;
ent->unused_since = TIME_MAX;
}
/**
* Release a reference held to ent.
*
* If it was the last reference, ent will be freed. Therefore, you must not
* use ent after calling this function.
*/
void
consensus_cache_entry_decref(consensus_cache_entry_t *ent)
{
if (! ent)
return;
if (BUG(ent->refcnt <= 0))
return; // LCOV_EXCL_LINE
if (BUG(ent->magic != CCE_MAGIC))
return; // LCOV_EXCL_LINE
--ent->refcnt;
if (ent->refcnt == 1 && ent->in_cache) {
/* Only the cache has a reference: we don't need to keep the file
* mapped */
if (ent->map) {
if (ent->release_aggressively) {
consensus_cache_entry_unmap(ent);
} else {
ent->unused_since = approx_time();
}
}
return;
}
if (ent->refcnt > 0)
return;
/* Refcount is zero; we can free it. */
if (ent->map) {
consensus_cache_entry_unmap(ent);
}
tor_free(ent->fname);
config_free_lines(ent->labels);
consensus_cache_entry_handles_clear(ent);
memwipe(ent, 0, sizeof(consensus_cache_entry_t));
tor_free(ent);
}
/**
* Mark ent for deletion from the cache. Deletion will not occur
* until the cache is the only place that holds a reference to ent.
*/
void
consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent)
{
ent->can_remove = 1;
}
/**
* Mark ent as the kind of entry that we don't need to keep mmap'd for
* any longer than we're actually using it.
*/
void
consensus_cache_entry_mark_for_aggressive_release(consensus_cache_entry_t *ent)
{
ent->release_aggressively = 1;
}
/**
* Try to read the body of ent into memory if it isn't already
* loaded. On success, set *body_out to the body, *sz_out
* to its size, and return 0. On failure return -1.
*
* The resulting body pointer will only be valid for as long as you
* hold a reference to ent.
*/
int
consensus_cache_entry_get_body(const consensus_cache_entry_t *ent,
const uint8_t **body_out,
size_t *sz_out)
{
if (BUG(ent->magic != CCE_MAGIC))
return -1; // LCOV_EXCL_LINE
if (! ent->map) {
if (! ent->in_cache)
return -1;
consensus_cache_entry_map((consensus_cache_t *)ent->in_cache,
(consensus_cache_entry_t *)ent);
if (! ent->map) {
return -1;
}
}
*body_out = ent->body;
*sz_out = ent->bodylen;
return 0;
}
/**
* Unmap every mmap'd element of cache that has been unused
* since cutoff.
*/
void
consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff)
{
SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
tor_assert_nonfatal(ent->in_cache == cache);
if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) {
/* Somebody is using this entry right now */
continue;
}
if (ent->unused_since > cutoff) {
/* Has been unused only for a little while */
continue;
}
if (ent->map == NULL) {
/* Not actually mapped. */
continue;
}
consensus_cache_entry_unmap(ent);
} SMARTLIST_FOREACH_END(ent);
}
/**
* Return the number of currently unused filenames available in this cache.
*/
int
consensus_cache_get_n_filenames_available(consensus_cache_t *cache)
{
tor_assert(cache);
int max = cache->max_entries;
int used = smartlist_len(storage_dir_list(cache->dir));
#ifdef MUST_UNMAP_TO_UNLINK
if (used > max)
return 0;
#else
tor_assert_nonfatal(max >= used);
#endif /* defined(MUST_UNMAP_TO_UNLINK) */
return max - used;
}
/**
* Delete every element of cache has been marked with
* consensus_cache_entry_mark_for_removal. If force is false,
* retain those entries which are in use by something other than the cache.
*/
void
consensus_cache_delete_pending(consensus_cache_t *cache, int force)
{
SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
tor_assert_nonfatal(ent->in_cache == cache);
int force_ent = force;
#ifdef MUST_UNMAP_TO_UNLINK
/* We cannot delete anything with an active mmap on win32, so no
* force-deletion. */
if (ent->map) {
force_ent = 0;
}
#endif /* defined(MUST_UNMAP_TO_UNLINK) */
if (! force_ent) {
if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) {
/* Somebody is using this entry right now */
continue;
}
}
if (ent->can_remove == 0) {
/* Don't want to delete this. */
continue;
}
if (BUG(ent->refcnt <= 0)) {
continue; // LCOV_EXCL_LINE
}
SMARTLIST_DEL_CURRENT(cache->entries, ent);
ent->in_cache = NULL;
char *fname = tor_strdup(ent->fname); /* save a copy */
consensus_cache_entry_decref(ent);
storage_dir_remove_file(cache->dir, fname);
tor_free(fname);
} SMARTLIST_FOREACH_END(ent);
}
/**
* Internal helper: rescan cache and rebuild its list of entries.
*/
static void
consensus_cache_rescan(consensus_cache_t *cache)
{
if (cache->entries) {
consensus_cache_clear(cache);
}
cache->entries = smartlist_new();
const smartlist_t *fnames = storage_dir_list(cache->dir);
SMARTLIST_FOREACH_BEGIN(fnames, const char *, fname) {
tor_mmap_t *map = NULL;
config_line_t *labels = NULL;
const uint8_t *body;
size_t bodylen;
map = storage_dir_map_labeled(cache->dir, fname,
&labels, &body, &bodylen);
if (! map) {
/* The ERANGE error might come from tor_mmap_file() -- it means the file
* was empty. EINVAL might come from ..map_labeled() -- it means the
* file was misformatted. In both cases, we should just delete it.
*/
if (errno == ERANGE || errno == EINVAL) {
log_warn(LD_FS, "Found %s file %s in consensus cache; removing it.",
errno == ERANGE ? "empty" : "misformatted",
escaped(fname));
storage_dir_remove_file(cache->dir, fname);
} else {
/* Can't load this; continue */
log_warn(LD_FS, "Unable to map file %s from consensus cache: %s",
escaped(fname), strerror(errno));
}
continue;
}
consensus_cache_entry_t *ent =
tor_malloc_zero(sizeof(consensus_cache_entry_t));
ent->magic = CCE_MAGIC;
ent->fname = tor_strdup(fname);
ent->labels = labels;
ent->refcnt = 1;
ent->in_cache = cache;
ent->unused_since = TIME_MAX;
smartlist_add(cache->entries, ent);
tor_munmap_file(map); /* don't actually need to keep this around */
} SMARTLIST_FOREACH_END(fname);
}
/**
* Make sure that ent is mapped into RAM.
*/
static void
consensus_cache_entry_map(consensus_cache_t *cache,
consensus_cache_entry_t *ent)
{
if (ent->map)
return;
ent->map = storage_dir_map_labeled(cache->dir, ent->fname,
NULL, &ent->body, &ent->bodylen);
ent->unused_since = TIME_MAX;
}
/**
* Unmap ent from RAM.
*
* Do not call this if something other than the cache is holding a reference
* to ent
*/
static void
consensus_cache_entry_unmap(consensus_cache_entry_t *ent)
{
ent->unused_since = TIME_MAX;
if (!ent->map)
return;
tor_munmap_file(ent->map);
ent->map = NULL;
ent->body = NULL;
ent->bodylen = 0;
ent->unused_since = TIME_MAX;
}
HANDLE_IMPL(consensus_cache_entry, consensus_cache_entry_t, )
#ifdef TOR_UNIT_TESTS
/**
* Testing only: Return true iff ent is mapped into memory.
*
* (In normal operation, this information is not exposed.)
*/
int
consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent)
{
if (ent->map) {
tor_assert(ent->body);
return 1;
} else {
tor_assert(!ent->body);
return 0;
}
}
#endif /* defined(TOR_UNIT_TESTS) */