summaryrefslogtreecommitdiff
path: root/src/lib/fs/path.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/fs/path.c')
-rw-r--r--src/lib/fs/path.c415
1 files changed, 414 insertions, 1 deletions
diff --git a/src/lib/fs/path.c b/src/lib/fs/path.c
index 0d57be4b06..81960bd69a 100644
--- a/src/lib/fs/path.c
+++ b/src/lib/fs/path.c
@@ -13,15 +13,34 @@
#include "lib/malloc/malloc.h"
#include "lib/log/log.h"
#include "lib/log/util_bug.h"
+#include "lib/container/smartlist.h"
+#include "lib/sandbox/sandbox.h"
#include "lib/string/printf.h"
#include "lib/string/util_string.h"
#include "lib/string/compat_ctype.h"
+#include "lib/string/compat_string.h"
+#include "lib/fs/files.h"
+#include "lib/fs/dir.h"
#include "lib/fs/userdb.h"
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
+#ifdef _WIN32
+#include <windows.h>
+#include <shlwapi.h>
+#else /* !(defined(_WIN32)) */
+#include <dirent.h>
+#include <glob.h>
+#endif /* defined(_WIN32) */
+
#include <errno.h>
#include <string.h>
@@ -161,7 +180,7 @@ clean_fname_for_stat(char *name)
/** Modify <b>fname</b> to contain the name of its parent directory. Doesn't
* actually examine the filesystem; does a purely syntactic modification.
*
- * The parent of the root director is considered to be iteself.
+ * The parent of the root director is considered to be itself.
*
* Path separators are the forward slash (/) everywhere and additionally
* the backslash (\) on Win32.
@@ -294,3 +313,397 @@ make_path_absolute(const char *fname)
return absfname;
#endif /* defined(_WIN32) */
}
+
+/* The code below implements tor_glob and get_glob_opened_files. Because it is
+ * not easy to understand it by looking at individual functions, the big
+ * picture explanation here should be read first.
+ *
+ * Purpose of the functions:
+ * - tor_glob - receives a pattern and returns all the paths that result from
+ * its glob expansion, globs can be present on all path components.
+ * - get_glob_opened_files - receives a pattern and returns all the paths that
+ * are opened during its expansion (the paths before any path fragment that
+ * contains a glob as they have to be opened to check for glob matches). This
+ * is used to get the paths that have to be added to the seccomp sandbox
+ * allowed list.
+ *
+ * Due to OS API differences explained below, the implementation of tor_glob is
+ * completely different for Windows and POSIX systems, so we ended up with
+ * three different implementations:
+ * - tor_glob for POSIX - as POSIX glob does everything we need, we simply call
+ * it and process the results. This is completely implemented in tor_glob.
+ * - tor_glob for WIN32 - because the WIN32 API only supports expanding globs
+ * in the last path fragment, we need to expand the globs in each path
+ * fragment manually and call recursively to get the same behaviour as POSIX
+ * glob. When there are no globs in pattern, we know we are on the last path
+ * fragment and collect the full path.
+ * - get_glob_opened_files - because the paths before any path fragment with a
+ * glob will be opened to check for matches, we need to collect them and we
+ * need to expand the globs in each path fragments and call recursively until
+ * we find no more globs.
+ *
+ * As seen from the description above, both tor_glob for WIN32 and
+ * get_glob_opened_files receive a pattern and return a list of paths and have
+ * to expand all path fragments that contain globs and call themselves
+ * recursively. The differences are:
+ * - get_glob_opened_files collects paths before path fragments with globs
+ * while tor_glob for WIN32 collects full paths resulting from the expansion
+ * of all globs.
+ * - get_glob_opened_files can call tor_glob to expand path fragments with
+ * globs while tor_glob for WIN32 cannot because it IS tor_glob. For tor_glob
+ * for WIN32, an auxiliary function has to be used for this purpose.
+ *
+ * To avoid code duplication, the logic of tor_glob for WIN32 and
+ * get_glob_opened_files is implemented in get_glob_paths. The differences are
+ * configured by the extra function parameters:
+ * - final - if true, returns a list of paths obtained from expanding pattern
+ * (implements tor_glob). Otherwise, returns the paths before path fragments
+ * with globs (implements get_glob_opened_files).
+ * - unglob - function used to expand a path fragment. The function signature
+ * is defined by the unglob_fn typedef. Two implementations are available:
+ * - unglob_win32 - uses tor_listdir and PathMatchSpec (for tor_glob WIN32)
+ * - unglob_opened_files - uses tor_glob (for get_glob_opened_files)
+ */
+
+/** Returns true if the character at position <b>pos</b> in <b>pattern</b> is
+ * considered a glob. Returns false otherwise. Takes escaping into account on
+ * systems where escaping globs is supported. */
+static inline bool
+is_glob_char(const char *pattern, int pos)
+{
+ bool is_glob = pattern[pos] == '*' || pattern[pos] == '?';
+#ifdef _WIN32
+ return is_glob;
+#else /* !defined(_WIN32) */
+ bool is_escaped = pos > 0 && pattern[pos-1] == '\\';
+ return is_glob && !is_escaped;
+#endif /* defined(_WIN32) */
+}
+
+/** Expands the first path fragment of <b>pattern</b> that contains globs. The
+ * path fragment is between <b>prev_sep</b> and <b>next_sep</b>. If the path
+ * fragment is the last fragment of <b>pattern</b>, <b>next_sep</b> will be the
+ * index of the last char. Returns a list of paths resulting from the glob
+ * expansion of the path fragment. Anything after <b>next_sep</b> is not
+ * included in the returned list. Returns NULL on failure. */
+typedef struct smartlist_t * unglob_fn(const char *pattern, int prev_sep,
+ int next_sep);
+
+/** Adds <b>path</b> to <b>result</b> if it exists and is a file type we can
+ * handle. Returns false if <b>path</b> is a file type we cannot handle,
+ * returns true otherwise. Used on tor_glob for WIN32. */
+static bool
+add_non_glob_path(const char *path, struct smartlist_t *result)
+{
+ file_status_t file_type = file_status(path);
+ if (file_type == FN_ERROR) {
+ return false;
+ } else if (file_type != FN_NOENT) {
+ char *to_add = tor_strdup(path);
+ clean_fname_for_stat(to_add);
+ smartlist_add(result, to_add);
+ }
+ /* If WIN32 tor_glob is called with a non-existing path, we want it to
+ * return an empty list instead of error to match the regular version */
+ return true;
+}
+
+/** Auxiliary function used by get_glob_opened_files and WIN32 tor_glob.
+ * Returns a list of paths obtained from <b>pattern</b> using <b>unglob</b> to
+ * expand each path fragment. If <b>final</b> is true, the paths are the result
+ * of the glob expansion of <b>pattern</b> (implements tor_glob). Otherwise,
+ * the paths are the paths opened by glob while expanding <b>pattern</b>
+ * (implements get_glob_opened_files). Returns NULL on failure. */
+static struct smartlist_t *
+get_glob_paths(const char *pattern, unglob_fn unglob, bool final)
+{
+ smartlist_t *result = smartlist_new();
+ int i, prev_sep = -1, next_sep = -1;
+ bool is_glob = false, error_found = false, is_sep = false, is_last = false;
+
+ // find first path fragment with globs
+ for (i = 0; pattern[i]; i++) {
+ is_glob = is_glob || is_glob_char(pattern, i);
+ is_last = !pattern[i+1];
+ is_sep = pattern[i] == *PATH_SEPARATOR || pattern[i] == '/';
+ if (is_sep || is_last) {
+ prev_sep = next_sep;
+ next_sep = i; // next_sep+1 is start of next fragment or end of string
+ if (is_glob) {
+ break;
+ }
+ }
+ }
+
+ if (!is_glob) { // pattern fully expanded or no glob in pattern
+ if (final && !add_non_glob_path(pattern, result)) {
+ error_found = true;
+ goto end;
+ }
+ return result;
+ }
+
+ if (!final) {
+ // add path before the glob to result
+ int len = prev_sep < 1 ? prev_sep + 1 : prev_sep; // handle /*
+ char *path_until_glob = tor_strndup(pattern, len);
+ smartlist_add(result, path_until_glob);
+ }
+
+ smartlist_t *unglobbed_paths = unglob(pattern, prev_sep, next_sep);
+ if (!unglobbed_paths) {
+ error_found = true;
+ } else {
+ // for each path for current fragment, add the rest of the pattern
+ // and call recursively to get all expanded paths
+ SMARTLIST_FOREACH_BEGIN(unglobbed_paths, char *, current_path) {
+ char *next_path;
+ tor_asprintf(&next_path, "%s"PATH_SEPARATOR"%s", current_path,
+ &pattern[next_sep+1]);
+ smartlist_t *opened_next = get_glob_paths(next_path, unglob, final);
+ tor_free(next_path);
+ if (!opened_next) {
+ error_found = true;
+ break;
+ }
+ smartlist_add_all(result, opened_next);
+ smartlist_free(opened_next);
+ } SMARTLIST_FOREACH_END(current_path);
+ SMARTLIST_FOREACH(unglobbed_paths, char *, p, tor_free(p));
+ smartlist_free(unglobbed_paths);
+ }
+
+end:
+ if (error_found) {
+ SMARTLIST_FOREACH(result, char *, p, tor_free(p));
+ smartlist_free(result);
+ result = NULL;
+ }
+ return result;
+}
+
+#ifdef _WIN32
+/** Expands globs in <b>pattern</b> for the path fragment between
+ * <b>prev_sep</b> and <b>next_sep</b> using the WIN32 API. Returns NULL on
+ * failure. Used by the WIN32 implementation of tor_glob. Implements unglob_fn,
+ * see its description for more details. */
+static struct smartlist_t *
+unglob_win32(const char *pattern, int prev_sep, int next_sep)
+{
+ smartlist_t *result = smartlist_new();
+ int len = prev_sep < 1 ? prev_sep + 1 : prev_sep; // handle /*
+ char *path_until_glob = tor_strndup(pattern, len);
+
+ if (!is_file(file_status(path_until_glob))) {
+ smartlist_t *filenames = tor_listdir(path_until_glob);
+ if (!filenames) {
+ smartlist_free(result);
+ result = NULL;
+ } else {
+ SMARTLIST_FOREACH_BEGIN(filenames, char *, filename) {
+ TCHAR tpattern[MAX_PATH] = {0};
+ TCHAR tfile[MAX_PATH] = {0};
+ char *full_path;
+ tor_asprintf(&full_path, "%s"PATH_SEPARATOR"%s",
+ path_until_glob, filename);
+ char *path_curr_glob = tor_strndup(pattern, next_sep + 1);
+ // *\ must return only dirs, remove \ from the pattern so it matches
+ if (is_dir(file_status(full_path))) {
+ clean_fname_for_stat(path_curr_glob);
+ }
+#ifdef UNICODE
+ mbstowcs(tpattern, path_curr_glob, MAX_PATH);
+ mbstowcs(tfile, full_path, MAX_PATH);
+#else /* !defined(UNICODE) */
+ strlcpy(tpattern, path_curr_glob, MAX_PATH);
+ strlcpy(tfile, full_path, MAX_PATH);
+#endif /* defined(UNICODE) */
+ if (PathMatchSpec(tfile, tpattern)) {
+ smartlist_add(result, full_path);
+ } else {
+ tor_free(full_path);
+ }
+ tor_free(path_curr_glob);
+ } SMARTLIST_FOREACH_END(filename);
+ SMARTLIST_FOREACH(filenames, char *, p, tor_free(p));
+ smartlist_free(filenames);
+ }
+ }
+ tor_free(path_until_glob);
+ return result;
+}
+#elif HAVE_GLOB
+/** Same as opendir but calls sandbox_intern_string before */
+static DIR *
+prot_opendir(const char *name)
+{
+ if (sandbox_interned_string_is_missing(name)) {
+ errno = EPERM;
+ return NULL;
+ }
+ return opendir(sandbox_intern_string(name));
+}
+
+/** Same as stat but calls sandbox_intern_string before */
+static int
+prot_stat(const char *pathname, struct stat *buf)
+{
+ if (sandbox_interned_string_is_missing(pathname)) {
+ errno = EPERM;
+ return -1;
+ }
+ return stat(sandbox_intern_string(pathname), buf);
+}
+
+/** Same as lstat but calls sandbox_intern_string before */
+static int
+prot_lstat(const char *pathname, struct stat *buf)
+{
+ if (sandbox_interned_string_is_missing(pathname)) {
+ errno = EPERM;
+ return -1;
+ }
+ return lstat(sandbox_intern_string(pathname), buf);
+}
+/** As closedir, but has the right type for gl_closedir */
+static void
+wrap_closedir(void *arg)
+{
+ closedir(arg);
+}
+
+/** Function passed to glob to handle processing errors. <b>epath</b> is the
+ * path that caused the error and <b>eerrno</b> is the errno set by the
+ * function that failed. We want to ignore ENOENT and ENOTDIR because, in BSD
+ * systems, these are not ignored automatically, which makes glob fail when
+ * globs expand to non-existing paths and GLOB_ERR is set.
+ */
+static int
+glob_errfunc(const char *epath, int eerrno)
+{
+ (void)epath;
+ return eerrno == ENOENT || eerrno == ENOTDIR ? 0 : -1;
+}
+#endif /* defined(HAVE_GLOB) */
+
+/** Return a new list containing the paths that match the pattern
+ * <b>pattern</b>. Return NULL on error. On POSIX systems, errno is set by the
+ * glob function or is set to EPERM if glob tried to access a file not allowed
+ * by the seccomp sandbox.
+ */
+struct smartlist_t *
+tor_glob(const char *pattern)
+{
+ smartlist_t *result = NULL;
+
+#ifdef _WIN32
+ // PathMatchSpec does not support forward slashes, change them to backslashes
+ char *pattern_normalized = tor_strdup(pattern);
+ tor_strreplacechar(pattern_normalized, '/', *PATH_SEPARATOR);
+ result = get_glob_paths(pattern_normalized, unglob_win32, true);
+ tor_free(pattern_normalized);
+#elif HAVE_GLOB /* !(defined(_WIN32)) */
+ glob_t matches;
+ int flags = GLOB_NOSORT;
+#ifdef GLOB_ALTDIRFUNC
+ /* use functions that call sandbox_intern_string */
+ flags |= GLOB_ALTDIRFUNC;
+ typedef void *(*gl_opendir)(const char * name);
+ typedef struct dirent *(*gl_readdir)(void *);
+ typedef void (*gl_closedir)(void *);
+ matches.gl_opendir = (gl_opendir) &prot_opendir;
+ matches.gl_readdir = (gl_readdir) &readdir;
+ matches.gl_closedir = (gl_closedir) &wrap_closedir;
+ matches.gl_stat = &prot_stat;
+ matches.gl_lstat = &prot_lstat;
+#endif /* defined(GLOB_ALTDIRFUNC) */
+ // use custom error handler to workaround BSD quirks and do not set GLOB_ERR
+ // because it would make glob fail on error even if the error handler ignores
+ // the error
+ int ret = glob(pattern, flags, glob_errfunc, &matches);
+ if (ret == GLOB_NOMATCH) {
+ return smartlist_new();
+ } else if (ret != 0) {
+ return NULL;
+ }
+
+ // #40141: workaround for bug in glibc < 2.19 where patterns ending in path
+ // separator match files and folders instead of folders only
+ size_t pattern_len = strlen(pattern);
+ bool dir_only = has_glob(pattern) &&
+ pattern_len > 0 && pattern[pattern_len-1] == *PATH_SEPARATOR;
+
+ result = smartlist_new();
+ size_t i;
+ for (i = 0; i < matches.gl_pathc; i++) {
+ char *match = tor_strdup(matches.gl_pathv[i]);
+ size_t len = strlen(match);
+ if (len > 0 && match[len-1] == *PATH_SEPARATOR) {
+ match[len-1] = '\0';
+ }
+
+ if (!dir_only || (dir_only && is_dir(file_status(match)))) {
+ smartlist_add(result, match);
+ } else {
+ tor_free(match);
+ }
+ }
+ globfree(&matches);
+#else
+ (void)pattern;
+ return result;
+#endif /* !defined(HAVE_GLOB) */
+
+ return result;
+}
+
+/** Returns true if <b>s</b> contains characters that can be globbed.
+ * Returns false otherwise. */
+bool
+has_glob(const char *s)
+{
+ int i;
+ for (i = 0; s[i]; i++) {
+ if (is_glob_char(s, i)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/** Expands globs in <b>pattern</b> for the path fragment between
+ * <b>prev_sep</b> and <b>next_sep</b> using tor_glob. Returns NULL on
+ * failure. Used by get_glob_opened_files. Implements unglob_fn, see its
+ * description for more details. */
+static struct smartlist_t *
+unglob_opened_files(const char *pattern, int prev_sep, int next_sep)
+{
+ (void)prev_sep;
+ smartlist_t *result = smartlist_new();
+ // if the following fragments have no globs, we're done
+ if (has_glob(&pattern[next_sep+1])) {
+ // if there is a glob after next_sep, we know next_sep is a separator and
+ // not the last char and glob_path will have the path without the separator
+ char *glob_path = tor_strndup(pattern, next_sep);
+ smartlist_t *child_paths = tor_glob(glob_path);
+ tor_free(glob_path);
+ if (!child_paths) {
+ smartlist_free(result);
+ result = NULL;
+ } else {
+ smartlist_add_all(result, child_paths);
+ smartlist_free(child_paths);
+ }
+ }
+ return result;
+}
+
+/** Returns a list of files that are opened by the tor_glob function when
+ * called with <b>pattern</b>. Returns NULL on error. The purpose of this
+ * function is to create a list of files to be added to the sandbox white list
+ * before the sandbox is enabled. */
+struct smartlist_t *
+get_glob_opened_files(const char *pattern)
+{
+ return get_glob_paths(pattern, unglob_opened_files, false);
+}