aboutsummaryrefslogtreecommitdiff
path: root/src/feature/dircache/conscache.c
blob: 2a831aa447e4467170e764bdfcafea6eb20c9431 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
/* Copyright (c) 2017-2020, The Tor Project, Inc. */
/* See LICENSE for licensing information */

/**
 * @file conscache.c
 * @brief Consensus and diff on-disk cache.
 **/

#include "core/or/or.h"

#include "app/config/config.h"
#include "feature/dircache/conscache.h"
#include "lib/crypt_ops/crypto_util.h"
#include "lib/fs/storagedir.h"
#include "lib/encoding/confline.h"

#define CCE_MAGIC 0x17162253

#ifdef _WIN32
/* On Windows, unlink won't work on a file if the file is actively mmap()ed.
 * That forces us to be less aggressive about unlinking files, and causes other
 * changes throughout our logic.
 */
#define MUST_UNMAP_TO_UNLINK
#endif /* defined(_WIN32) */

/**
 * A consensus_cache_entry_t is a reference-counted handle to an
 * item in a consensus_cache_t.  It can be mmapped into RAM, or not,
 * depending whether it's currently in use.
 */
struct consensus_cache_entry_t {
  uint32_t magic; /**< Must be set to CCE_MAGIC */
  HANDLE_ENTRY(consensus_cache_entry, consensus_cache_entry_t);
  int32_t refcnt; /**< Reference count. */
  unsigned can_remove : 1; /**< If true, we want to delete this file. */
  /** If true, we intend to unmap this file as soon as we're done with it. */
  unsigned release_aggressively : 1;

  /** Filename for this object within the storage_dir_t */
  char *fname;
  /** Labels associated with this object. Immutable once the object
   * is created. */
  config_line_t *labels;
  /** Pointer to the cache that includes this entry (if any). */
  consensus_cache_t *in_cache;

  /** Since what time has this object been mapped into RAM, but with the cache
   * being the only having a reference to it? */
  time_t unused_since;
  /** mmaped contents of the underlying file.  May be NULL */
  tor_mmap_t *map;
  /** Length of the body within <b>map</b>. */
  size_t bodylen;
  /** Pointer to the body within <b>map</b>. */
  const uint8_t *body;
};

/**
 * A consensus_cache_t holds a directory full of labeled items.
 */
struct consensus_cache_t {
  /** Underling storage_dir_t to handle persistence */
  storage_dir_t *dir;
  /** List of all the entries in the directory. */
  smartlist_t *entries;

  /** The maximum number of entries that we'd like to allow in this cache.
   * This is the same as the storagedir limit when MUST_UNMAP_TO_UNLINK is
   * not defined. */
  unsigned max_entries;
};

static void consensus_cache_clear(consensus_cache_t *cache);
static void consensus_cache_rescan(consensus_cache_t *);
static void consensus_cache_entry_map(consensus_cache_t *,
                                      consensus_cache_entry_t *);
static void consensus_cache_entry_unmap(consensus_cache_entry_t *ent);

/**
 * Helper: Open a consensus cache in subdirectory <b>subdir</b> of the
 * data directory, to hold up to <b>max_entries</b> of data.
 */
consensus_cache_t *
consensus_cache_open(const char *subdir, int max_entries)
{
  int storagedir_max_entries;
  consensus_cache_t *cache = tor_malloc_zero(sizeof(consensus_cache_t));
  char *directory = get_cachedir_fname(subdir);
  cache->max_entries = max_entries;

#ifdef MUST_UNMAP_TO_UNLINK
  /* If we can't unlink the files that we're still using, then we need to
   * tell the storagedir backend to allow far more files than this consensus
   * cache actually wants, so that it can hold files which, from this cache's
   * perspective, have become useless.
   */
#define VERY_LARGE_STORAGEDIR_LIMIT (1000*1000)
  storagedir_max_entries = VERY_LARGE_STORAGEDIR_LIMIT;
#else /* !defined(MUST_UNMAP_TO_UNLINK) */
  /* Otherwise, we can just tell the storagedir to use the same limits
   * as this cache. */
  storagedir_max_entries = max_entries;
#endif /* defined(MUST_UNMAP_TO_UNLINK) */

  cache->dir = storage_dir_new(directory, storagedir_max_entries);
  tor_free(directory);
  if (!cache->dir) {
    tor_free(cache);
    return NULL;
  }

  consensus_cache_rescan(cache);
  return cache;
}

/** Return true if it's okay to put more entries in this cache than
 * its official file limit.
 *
 * (We need this method on Windows, where we can't unlink files that are still
 * in use, and therefore might need to temporarily exceed the file limit until
 * the no-longer-wanted files are deletable.)
 */
int
consensus_cache_may_overallocate(consensus_cache_t *cache)
{
  (void) cache;
#ifdef MUST_UNMAP_TO_UNLINK
  return 1;
#else
  return 0;
#endif
}

// HACK: GCC on Appveyor hates that we may assert before returning. Work around
// the error.
#ifdef _WIN32
#ifndef COCCI
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-attribute=noreturn"
#endif
#endif /* defined(_WIN32) */

/**
 * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
 * operations that <b>cache</b> will need.
 */
int
consensus_cache_register_with_sandbox(consensus_cache_t *cache,
                                      struct sandbox_cfg_elem_t **cfg)
{
#ifdef MUST_UNMAP_TO_UNLINK
  /* Our Linux sandbox doesn't support huge file lists like the one that would
   * be generated by using VERY_LARGE_STORAGEDIR_LIMIT above in
   * consensus_cache_open().  Since the Linux sandbox is the only one we have
   * right now, we just assert that we never reach this point when we've had
   * to use VERY_LARGE_STORAGEDIR_LIMIT.
   *
   * If at some point in the future we have a different sandbox mechanism that
   * can handle huge file lists, we can remove this assertion or make it
   * conditional.
   */
  tor_assert_nonfatal_unreached();
#endif /* defined(MUST_UNMAP_TO_UNLINK) */
  return storage_dir_register_with_sandbox(cache->dir, cfg);
}

#ifdef _WIN32
#ifndef COCCI
#pragma GCC diagnostic pop
#endif
#endif

/**
 * Helper: clear all entries from <b>cache</b> (but do not delete
 * any that aren't marked for removal
 */
static void
consensus_cache_clear(consensus_cache_t *cache)
{
  consensus_cache_delete_pending(cache, 0);

  SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
    ent->in_cache = NULL;
    consensus_cache_entry_decref(ent);
  } SMARTLIST_FOREACH_END(ent);
  smartlist_free(cache->entries);
  cache->entries = NULL;
}

/**
 * Drop all storage held by <b>cache</b>.
 */
void
consensus_cache_free_(consensus_cache_t *cache)
{
  if (! cache)
    return;

  if (cache->entries) {
    consensus_cache_clear(cache);
  }
  storage_dir_free(cache->dir);
  tor_free(cache);
}

/**
 * Write <b>datalen</b> bytes of data at <b>data</b> into the <b>cache</b>,
 * labeling that data with <b>labels</b>.  On failure, return NULL. On
 * success, return a newly created consensus_cache_entry_t.
 *
 * The returned value will be owned by the cache, and you will have a
 * reference to it.  Call consensus_cache_entry_decref() when you are
 * done with it.
 *
 * The provided <b>labels</b> MUST have distinct keys: if they don't,
 * this API does not specify which values (if any) for the duplicate keys
 * will be considered.
 */
consensus_cache_entry_t *
consensus_cache_add(consensus_cache_t *cache,
                    const config_line_t *labels,
                    const uint8_t *data,
                    size_t datalen)
{
  char *fname = NULL;
  int r = storage_dir_save_labeled_to_file(cache->dir,
                                            labels, data, datalen, &fname);
  if (r < 0 || fname == NULL) {
    return NULL;
  }
  consensus_cache_entry_t *ent =
    tor_malloc_zero(sizeof(consensus_cache_entry_t));
  ent->magic = CCE_MAGIC;
  ent->fname = fname;
  ent->labels = config_lines_dup(labels);
  ent->in_cache = cache;
  ent->unused_since = TIME_MAX;
  smartlist_add(cache->entries, ent);
  /* Start the reference count at 2: the caller owns one copy, and the
   * cache owns another.
   */
  ent->refcnt = 2;

  return ent;
}

/**
 * Given a <b>cache</b>, return some entry for which <b>key</b>=<b>value</b>.
 * Return NULL if no such entry exists.
 *
 * Does not adjust reference counts.
 */
consensus_cache_entry_t *
consensus_cache_find_first(consensus_cache_t *cache,
                           const char *key,
                           const char *value)
{
  smartlist_t *tmp = smartlist_new();
  consensus_cache_find_all(tmp, cache, key, value);
  consensus_cache_entry_t *ent = NULL;
  if (smartlist_len(tmp))
    ent = smartlist_get(tmp, 0);
  smartlist_free(tmp);
  return ent;
}

/**
 * Given a <b>cache</b>, add every entry to <b>out</b> for which
 * <b>key</b>=<b>value</b>.  If <b>key</b> is NULL, add every entry.
 *
 * Do not add any entry that has been marked for removal.
 *
 * Does not adjust reference counts.
 */
void
consensus_cache_find_all(smartlist_t *out,
                         consensus_cache_t *cache,
                         const char *key,
                         const char *value)
{
  SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
    if (ent->can_remove == 1) {
      /* We want to delete this; pretend it isn't there. */
      continue;
    }
    if (! key) {
      smartlist_add(out, ent);
      continue;
    }
    const char *found_val = consensus_cache_entry_get_value(ent, key);
    if (found_val && !strcmp(value, found_val)) {
      smartlist_add(out, ent);
    }
  } SMARTLIST_FOREACH_END(ent);
}

/**
 * Given a list of consensus_cache_entry_t, remove all those entries
 * that do not have <b>key</b>=<b>value</b> in their labels.
 *
 * Does not adjust reference counts.
 */
void
consensus_cache_filter_list(smartlist_t *lst,
                            const char *key,
                            const char *value)
{
  if (BUG(lst == NULL))
    return; // LCOV_EXCL_LINE
  if (key == NULL)
    return;
  SMARTLIST_FOREACH_BEGIN(lst, consensus_cache_entry_t *, ent) {
    const char *found_val = consensus_cache_entry_get_value(ent, key);
    if (! found_val || strcmp(value, found_val)) {
      SMARTLIST_DEL_CURRENT(lst, ent);
    }
  } SMARTLIST_FOREACH_END(ent);
}

/**
 * If <b>ent</b> has a label with the given <b>key</b>, return its
 * value.  Otherwise return NULL.
 *
 * The return value is only guaranteed to be valid for as long as you
 * hold a reference to <b>ent</b>.
 */
const char *
consensus_cache_entry_get_value(const consensus_cache_entry_t *ent,
                                const char *key)
{
  const config_line_t *match = config_line_find(ent->labels, key);
  if (match)
    return match->value;
  else
    return NULL;
}

/**
 * Return a pointer to the labels in <b>ent</b>.
 *
 * This pointer is only guaranteed to be valid for as long as you
 * hold a reference to <b>ent</b>.
 */
const config_line_t *
consensus_cache_entry_get_labels(const consensus_cache_entry_t *ent)
{
  return ent->labels;
}

/**
 * Increase the reference count of <b>ent</b>.
 */
void
consensus_cache_entry_incref(consensus_cache_entry_t *ent)
{
  if (BUG(ent->magic != CCE_MAGIC))
    return; // LCOV_EXCL_LINE
  ++ent->refcnt;
  ent->unused_since = TIME_MAX;
}

/**
 * Release a reference held to <b>ent</b>.
 *
 * If it was the last reference, ent will be freed. Therefore, you must not
 * use <b>ent</b> after calling this function.
 */
void
consensus_cache_entry_decref(consensus_cache_entry_t *ent)
{
  if (! ent)
    return;
  if (BUG(ent->refcnt <= 0))
    return; // LCOV_EXCL_LINE
  if (BUG(ent->magic != CCE_MAGIC))
    return; // LCOV_EXCL_LINE

  --ent->refcnt;

  if (ent->refcnt == 1 && ent->in_cache) {
    /* Only the cache has a reference: we don't need to keep the file
     * mapped */
    if (ent->map) {
      if (ent->release_aggressively) {
        consensus_cache_entry_unmap(ent);
      } else {
        ent->unused_since = approx_time();
      }
    }
    return;
  }

  if (ent->refcnt > 0)
    return;

  /* Refcount is zero; we can free it. */
  if (ent->map) {
    consensus_cache_entry_unmap(ent);
  }
  tor_free(ent->fname);
  config_free_lines(ent->labels);
  consensus_cache_entry_handles_clear(ent);
  memwipe(ent, 0, sizeof(consensus_cache_entry_t));
  tor_free(ent);
}

/**
 * Mark <b>ent</b> for deletion from the cache.  Deletion will not occur
 * until the cache is the only place that holds a reference to <b>ent</b>.
 */
void
consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent)
{
  ent->can_remove = 1;
}

/**
 * Mark <b>ent</b> as the kind of entry that we don't need to keep mmap'd for
 * any longer than we're actually using it.
 */
void
consensus_cache_entry_mark_for_aggressive_release(consensus_cache_entry_t *ent)
{
  ent->release_aggressively = 1;
}

/**
 * Try to read the body of <b>ent</b> into memory if it isn't already
 * loaded.  On success, set *<b>body_out</b> to the body, *<b>sz_out</b>
 * to its size, and return 0.  On failure return -1.
 *
 * The resulting body pointer will only be valid for as long as you
 * hold a reference to <b>ent</b>.
 */
int
consensus_cache_entry_get_body(const consensus_cache_entry_t *ent,
                               const uint8_t **body_out,
                               size_t *sz_out)
{
  if (BUG(ent->magic != CCE_MAGIC))
    return -1; // LCOV_EXCL_LINE

  if (! ent->map) {
    if (! ent->in_cache)
      return -1;

    consensus_cache_entry_map((consensus_cache_t *)ent->in_cache,
                              (consensus_cache_entry_t *)ent);
    if (! ent->map) {
      return -1;
    }
  }

  *body_out = ent->body;
  *sz_out = ent->bodylen;
  return 0;
}

/**
 * Unmap every mmap'd element of <b>cache</b> that has been unused
 * since <b>cutoff</b>.
 */
void
consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff)
{
  SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
    tor_assert_nonfatal(ent->in_cache == cache);
    if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) {
      /* Somebody is using this entry right now */
      continue;
    }
    if (ent->unused_since > cutoff) {
      /* Has been unused only for a little while */
      continue;
    }
    if (ent->map == NULL) {
      /* Not actually mapped. */
      continue;
    }
    consensus_cache_entry_unmap(ent);
  } SMARTLIST_FOREACH_END(ent);
}

/**
 * Return the number of currently unused filenames available in this cache.
 */
int
consensus_cache_get_n_filenames_available(consensus_cache_t *cache)
{
  tor_assert(cache);
  int max = cache->max_entries;
  int used = smartlist_len(storage_dir_list(cache->dir));
#ifdef MUST_UNMAP_TO_UNLINK
  if (used > max)
    return 0;
#else
  tor_assert_nonfatal(max >= used);
#endif /* defined(MUST_UNMAP_TO_UNLINK) */
  return max - used;
}

/**
 * Delete every element of <b>cache</b> has been marked with
 * consensus_cache_entry_mark_for_removal. If <b>force</b> is false,
 * retain those entries which are in use by something other than the cache.
 */
void
consensus_cache_delete_pending(consensus_cache_t *cache, int force)
{
  SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
    tor_assert_nonfatal(ent->in_cache == cache);
    int force_ent = force;
#ifdef MUST_UNMAP_TO_UNLINK
    /* We cannot delete anything with an active mmap on win32, so no
     * force-deletion. */
    if (ent->map) {
      force_ent = 0;
    }
#endif /* defined(MUST_UNMAP_TO_UNLINK) */
    if (! force_ent) {
      if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) {
        /* Somebody is using this entry right now */
        continue;
      }
    }
    if (ent->can_remove == 0) {
      /* Don't want to delete this. */
      continue;
    }
    if (BUG(ent->refcnt <= 0)) {
      continue; // LCOV_EXCL_LINE
    }

    SMARTLIST_DEL_CURRENT(cache->entries, ent);
    ent->in_cache = NULL;
    char *fname = tor_strdup(ent->fname); /* save a copy */
    consensus_cache_entry_decref(ent);
    storage_dir_remove_file(cache->dir, fname);
    tor_free(fname);
  } SMARTLIST_FOREACH_END(ent);
}

/**
 * Internal helper: rescan <b>cache</b> and rebuild its list of entries.
 */
static void
consensus_cache_rescan(consensus_cache_t *cache)
{
  if (cache->entries) {
    consensus_cache_clear(cache);
  }

  cache->entries = smartlist_new();
  const smartlist_t *fnames = storage_dir_list(cache->dir);
  SMARTLIST_FOREACH_BEGIN(fnames, const char *, fname) {
    tor_mmap_t *map = NULL;
    config_line_t *labels = NULL;
    const uint8_t *body;
    size_t bodylen;
    map = storage_dir_map_labeled(cache->dir, fname,
                                  &labels, &body, &bodylen);
    if (! map) {
      /* The ERANGE error might come from tor_mmap_file() -- it means the file
       * was empty. EINVAL might come from ..map_labeled() -- it means the
       * file was misformatted. In both cases, we should just delete it.
       */
      if (errno == ERANGE || errno == EINVAL) {
        log_warn(LD_FS, "Found %s file %s in consensus cache; removing it.",
                 errno == ERANGE ? "empty" : "misformatted",
                 escaped(fname));
        storage_dir_remove_file(cache->dir, fname);
      } else {
        /* Can't load this; continue */
        log_warn(LD_FS, "Unable to map file %s from consensus cache: %s",
                 escaped(fname), strerror(errno));
      }
      continue;
    }
    consensus_cache_entry_t *ent =
      tor_malloc_zero(sizeof(consensus_cache_entry_t));
    ent->magic = CCE_MAGIC;
    ent->fname = tor_strdup(fname);
    ent->labels = labels;
    ent->refcnt = 1;
    ent->in_cache = cache;
    ent->unused_since = TIME_MAX;
    smartlist_add(cache->entries, ent);
    tor_munmap_file(map); /* don't actually need to keep this around */
  } SMARTLIST_FOREACH_END(fname);
}

/**
 * Make sure that <b>ent</b> is mapped into RAM.
 */
static void
consensus_cache_entry_map(consensus_cache_t *cache,
                          consensus_cache_entry_t *ent)
{
  if (ent->map)
    return;

  ent->map = storage_dir_map_labeled(cache->dir, ent->fname,
                                     NULL, &ent->body, &ent->bodylen);
  ent->unused_since = TIME_MAX;
}

/**
 * Unmap <b>ent</b> from RAM.
 *
 * Do not call this if something other than the cache is holding a reference
 * to <b>ent</b>
 */
static void
consensus_cache_entry_unmap(consensus_cache_entry_t *ent)
{
  ent->unused_since = TIME_MAX;
  if (!ent->map)
    return;

  tor_munmap_file(ent->map);
  ent->map = NULL;
  ent->body = NULL;
  ent->bodylen = 0;
  ent->unused_since = TIME_MAX;
}

HANDLE_IMPL(consensus_cache_entry, consensus_cache_entry_t, )

#ifdef TOR_UNIT_TESTS
/**
 * Testing only: Return true iff <b>ent</b> is mapped into memory.
 *
 * (In normal operation, this information is not exposed.)
 */
int
consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent)
{
  if (ent->map) {
    tor_assert(ent->body);
    return 1;
  } else {
    tor_assert(!ent->body);
    return 0;
  }
}
#endif /* defined(TOR_UNIT_TESTS) */