aboutsummaryrefslogtreecommitdiff
path: root/httpd/patterns.c
diff options
context:
space:
mode:
Diffstat (limited to 'httpd/patterns.c')
-rw-r--r--httpd/patterns.c695
1 files changed, 695 insertions, 0 deletions
diff --git a/httpd/patterns.c b/httpd/patterns.c
new file mode 100644
index 0000000..b7cb381
--- /dev/null
+++ b/httpd/patterns.c
@@ -0,0 +1,695 @@
+/* $OpenBSD$ */
+
+/*
+ * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
+ * Copyright (C) 1994-2015 Lua.org, PUC-Rio.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Derived from Lua 5.3.1:
+ * $Id: lstrlib.c,v 1.229 2015/05/20 17:39:23 roberto Exp $
+ * Standard library for string operations and pattern-matching
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "patterns.h"
+
+#define uchar(c) ((unsigned char)(c)) /* macro to 'unsign' a char */
+#define CAP_UNFINISHED (-1)
+#define CAP_POSITION (-2)
+#define L_ESC '%'
+#define SPECIALS "^$*+?.([%-"
+
+struct match_state {
+ int matchdepth; /* control for recursive depth (to avoid C
+ * stack overflow) */
+ int maxcaptures; /* configured capture limit */
+ const char *src_init; /* init of source string */
+ const char *src_end; /* end ('\0') of source string */
+ const char *p_end; /* end ('\0') of pattern */
+ const char *error; /* should be NULL */
+ int level; /* total number of captures (finished or
+ * unfinished) */
+ struct {
+ const char *init;
+ ptrdiff_t len;
+ } capture[MAXCAPTURES];
+};
+
+/* recursive function */
+static const char *match(struct match_state *, const char *, const char *);
+
+static int
+match_error(struct match_state *ms, const char *error)
+{
+ ms->error = ms->error == NULL ? error : ms->error;
+ return (-1);
+}
+
+static int
+check_capture(struct match_state *ms, int l)
+{
+ l -= '1';
+ if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
+ return match_error(ms, "invalid capture index");
+ return (l);
+}
+
+static int
+capture_to_close(struct match_state *ms)
+{
+ int level = ms->level;
+ for (level--; level >= 0; level--)
+ if (ms->capture[level].len == CAP_UNFINISHED)
+ return (level);
+ return match_error(ms, "invalid pattern capture");
+}
+
+static const char *
+classend(struct match_state *ms, const char *p)
+{
+ switch (*p++) {
+ case L_ESC:
+ if (p == ms->p_end)
+ match_error(ms,
+ "malformed pattern (ends with '%%')");
+ return p + 1;
+ case '[':
+ if (*p == '^')
+ p++;
+ do {
+ /* look for a ']' */
+ if (p == ms->p_end) {
+ match_error(ms,
+ "malformed pattern (missing ']')");
+ break;
+ }
+ if (*(p++) == L_ESC && p < ms->p_end) {
+ /* skip escapes (e.g. '%]') */
+ p++;
+ }
+ } while (*p != ']');
+ return p + 1;
+ default:
+ return p;
+ }
+}
+
+static int
+match_class(int c, int cl)
+{
+ int res;
+ switch (tolower(cl)) {
+ case 'a':
+ res = isalpha(c);
+ break;
+ case 'c':
+ res = iscntrl(c);
+ break;
+ case 'd':
+ res = isdigit(c);
+ break;
+ case 'g':
+ res = isgraph(c);
+ break;
+ case 'l':
+ res = islower(c);
+ break;
+ case 'p':
+ res = ispunct(c);
+ break;
+ case 's':
+ res = isspace(c);
+ break;
+ case 'u':
+ res = isupper(c);
+ break;
+ case 'w':
+ res = isalnum(c);
+ break;
+ case 'x':
+ res = isxdigit(c);
+ break;
+ case 'z':
+ res = (c == 0);
+ break; /* deprecated option */
+ default:
+ return (cl == c);
+ }
+ return (islower(cl) ? res : !res);
+}
+
+static int
+matchbracketclass(int c, const char *p, const char *ec)
+{
+ int sig = 1;
+ if (*(p + 1) == '^') {
+ sig = 0;
+ /* skip the '^' */
+ p++;
+ }
+ while (++p < ec) {
+ if (*p == L_ESC) {
+ p++;
+ if (match_class(c, uchar(*p)))
+ return sig;
+ } else if ((*(p + 1) == '-') && (p + 2 < ec)) {
+ p += 2;
+ if (uchar(*(p - 2)) <= c && c <= uchar(*p))
+ return sig;
+ } else if (uchar(*p) == c)
+ return sig;
+ }
+ return !sig;
+}
+
+static int
+singlematch(struct match_state *ms, const char *s, const char *p,
+ const char *ep)
+{
+ if (s >= ms->src_end)
+ return 0;
+ else {
+ int c = uchar(*s);
+ switch (*p) {
+ case '.':
+ /* matches any char */
+ return (1);
+ case L_ESC:
+ return match_class(c, uchar(*(p + 1)));
+ case '[':
+ return matchbracketclass(c, p, ep - 1);
+ default:
+ return (uchar(*p) == c);
+ }
+ }
+}
+
+static const char *
+matchbalance(struct match_state *ms, const char *s, const char *p)
+{
+ if (p >= ms->p_end - 1) {
+ match_error(ms,
+ "malformed pattern (missing arguments to '%%b')");
+ return (NULL);
+ }
+ if (*s != *p)
+ return (NULL);
+ else {
+ int b = *p;
+ int e = *(p + 1);
+ int cont = 1;
+ while (++s < ms->src_end) {
+ if (*s == e) {
+ if (--cont == 0)
+ return s + 1;
+ } else if (*s == b)
+ cont++;
+ }
+ }
+
+ /* string ends out of balance */
+ return (NULL);
+}
+
+static const char *
+max_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
+{
+ ptrdiff_t i = 0;
+ /* counts maximum expand for item */
+ while (singlematch(ms, s + i, p, ep))
+ i++;
+ /* keeps trying to match with the maximum repetitions */
+ while (i >= 0) {
+ const char *res = match(ms, (s + i), ep + 1);
+ if (res)
+ return res;
+ /* else didn't match; reduce 1 repetition to try again */
+ i--;
+ }
+ return NULL;
+}
+
+static const char *
+min_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
+{
+ for (;;) {
+ const char *res = match(ms, s, ep + 1);
+ if (res != NULL)
+ return res;
+ else if (singlematch(ms, s, p, ep))
+ s++; /* try with one more repetition */
+ else
+ return NULL;
+ }
+}
+
+static const char *
+start_capture(struct match_state *ms, const char *s, const char *p, int what)
+{
+ const char *res;
+
+ int level = ms->level;
+ if (level >= ms->maxcaptures)
+ match_error(ms, "too many captures");
+ ms->capture[level].init = s;
+ ms->capture[level].len = what;
+ ms->level = level + 1;
+ /* undo capture if match failed */
+ if ((res = match(ms, s, p)) == NULL)
+ ms->level--;
+ return res;
+}
+
+static const char *
+end_capture(struct match_state *ms, const char *s, const char *p)
+{
+ int l = capture_to_close(ms);
+ const char *res;
+ /* close capture */
+ ms->capture[l].len = s - ms->capture[l].init;
+ /* undo capture if match failed */
+ if ((res = match(ms, s, p)) == NULL)
+ ms->capture[l].len = CAP_UNFINISHED;
+ return res;
+}
+
+static const char *
+match_capture(struct match_state *ms, const char *s, int l)
+{
+ size_t len;
+ l = check_capture(ms, l);
+ len = ms->capture[l].len;
+ if ((size_t) (ms->src_end - s) >= len &&
+ memcmp(ms->capture[l].init, s, len) == 0)
+ return s + len;
+ else
+ return NULL;
+}
+
+static const char *
+match(struct match_state *ms, const char *s, const char *p)
+{
+ const char *ep, *res;
+ char previous;
+
+ if (ms->matchdepth-- == 0)
+ match_error(ms, "pattern too complex");
+
+ /* using goto's to optimize tail recursion */
+ init:
+ /* end of pattern? */
+ if (p != ms->p_end) {
+ switch (*p) {
+ case '(':
+ /* start capture */
+ if (*(p + 1) == ')')
+ /* position capture? */
+ s = start_capture(ms, s, p + 2, CAP_POSITION);
+ else
+ s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
+ break;
+ case ')':
+ /* end capture */
+ s = end_capture(ms, s, p + 1);
+ break;
+ case '$':
+ /* is the '$' the last char in pattern? */
+ if ((p + 1) != ms->p_end) {
+ /* no; go to default */
+ goto dflt;
+ }
+ /* check end of string */
+ s = (s == ms->src_end) ? s : NULL;
+ break;
+ case L_ESC:
+ /* escaped sequences not in the format class[*+?-]? */
+ switch (*(p + 1)) {
+ case 'b':
+ /* balanced string? */
+ s = matchbalance(ms, s, p + 2);
+ if (s != NULL) {
+ p += 4;
+ /* return match(ms, s, p + 4); */
+ goto init;
+ } /* else fail (s == NULL) */
+ break;
+ case 'f':
+ /* frontier? */
+ p += 2;
+ if (*p != '[')
+ match_error(ms, "missing '['"
+ " after '%%f' in pattern");
+ /* points to what is next */
+ ep = classend(ms, p);
+ previous =
+ (s == ms->src_init) ? '\0' : *(s - 1);
+ if (!matchbracketclass(uchar(previous),
+ p, ep - 1) &&
+ matchbracketclass(uchar(*s),
+ p, ep - 1)) {
+ p = ep;
+ /* return match(ms, s, ep); */
+ goto init;
+ }
+ /* match failed */
+ s = NULL;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ /* capture results (%0-%9)? */
+ s = match_capture(ms, s, uchar(*(p + 1)));
+ if (s != NULL) {
+ p += 2;
+ /* return match(ms, s, p + 2) */
+ goto init;
+ }
+ break;
+ default:
+ goto dflt;
+ }
+ break;
+ default:
+
+ /* pattern class plus optional suffix */
+ dflt:
+ /* points to optional suffix */
+ ep = classend(ms, p);
+
+ /* does not match at least once? */
+ if (!singlematch(ms, s, p, ep)) {
+ /* accept empty? */
+ if (*ep == '*' || *ep == '?' || *ep == '-') {
+ p = ep + 1;
+ /* return match(ms, s, ep + 1); */
+ goto init;
+ } else {
+ /* '+' or no suffix */
+ s = NULL; /* fail */
+ }
+ } else {
+ /* matched once */
+ /* handle optional suffix */
+ switch (*ep) {
+ case '?':
+ /* optional */
+ if ((res =
+ match(ms, s + 1, ep + 1)) != NULL)
+ s = res;
+ else {
+ /*
+ * else return
+ * match(ms, s, ep + 1);
+ */
+ p = ep + 1;
+ goto init;
+ }
+ break;
+ case '+':
+ /* 1 or more repetitions */
+ s++; /* 1 match already done */
+ /* FALLTHROUGH */
+ case '*':
+ /* 0 or more repetitions */
+ s = max_expand(ms, s, p, ep);
+ break;
+ case '-':
+ /* 0 or more repetitions (minimum) */
+ s = min_expand(ms, s, p, ep);
+ break;
+ default:
+ /* no suffix */
+ s++;
+ p = ep;
+ /* return match(ms, s + 1, ep); */
+ goto init;
+ }
+ }
+ break;
+ }
+ }
+ ms->matchdepth++;
+ return s;
+}
+
+static const char *
+lmemfind(const char *s1, size_t l1,
+ const char *s2, size_t l2)
+{
+ const char *init;
+
+ if (l2 == 0) {
+ /* empty strings are everywhere */
+ return (s1);
+ } else if (l2 > l1) {
+ /* avoids a negative 'l1' */
+ return (NULL);
+ } else {
+ /*
+ * to search for a '*s2' inside 's1'
+ * - 1st char will be checked by 'memchr'
+ * - 's2' cannot be found after that
+ */
+ l2--;
+ l1 = l1 - l2;
+ while (l1 > 0 &&
+ (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
+ /* 1st char is already checked */
+ init++;
+ if (memcmp(init, s2 + 1, l2) == 0)
+ return init - 1;
+ else {
+ /* correct 'l1' and 's1' to try again */
+ l1 -= init - s1;
+ s1 = init;
+ }
+ }
+ /* not found */
+ return (NULL);
+ }
+}
+
+static int
+push_onecapture(struct match_state *ms, int i, const char *s,
+ const char *e, struct str_find *sm)
+{
+ if (i >= ms->level) {
+ if (i == 0 || ms->level == 0) {
+ /* add whole match */
+ sm->sm_so = (off_t)(s - ms->src_init);
+ sm->sm_eo = (off_t)(e - s) + sm->sm_so;
+ } else
+ return match_error(ms, "invalid capture index");
+ } else {
+ ptrdiff_t l = ms->capture[i].len;
+ if (l == CAP_UNFINISHED)
+ match_error(ms, "unfinished capture");
+ sm->sm_so = ms->capture[i].init - ms->src_init;
+ sm->sm_eo = sm->sm_so + l;
+ }
+ sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo;
+ return (0);
+}
+
+static int
+push_captures(struct match_state *ms, const char *s, const char *e,
+ struct str_find *sm, size_t nsm)
+{
+ unsigned int i;
+ unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level;
+
+ if (nlevels > nsm)
+ nlevels = nsm;
+ for (i = 0; i < nlevels; i++)
+ if (push_onecapture(ms, i, s, e, sm + i) == -1)
+ break;
+
+ /* number of strings pushed */
+ return (nlevels);
+}
+
+/* check whether pattern has no special characters */
+static int
+nospecials(const char *p, size_t l)
+{
+ size_t upto = 0;
+
+ do {
+ if (strpbrk(p + upto, SPECIALS)) {
+ /* pattern has a special character */
+ return 0;
+ }
+ /* may have more after \0 */
+ upto += strlen(p + upto) + 1;
+ } while (upto <= l);
+
+ /* no special chars found */
+ return (1);
+}
+
+static int
+str_find_aux(struct match_state *ms, const char *pattern, const char *string,
+ struct str_find *sm, size_t nsm, off_t init)
+{
+ size_t ls = strlen(string);
+ size_t lp = strlen(pattern);
+ const char *s = string;
+ const char *p = pattern;
+ const char *s1, *s2;
+ int anchor, i;
+
+ if (init < 0)
+ init = 0;
+ else if (init > (off_t)ls)
+ return match_error(ms, "starting after string's end");
+ s1 = s + init;
+
+ if (nospecials(p, lp)) {
+ /* do a plain search */
+ s2 = lmemfind(s1, ls - (size_t)init, p, lp);
+ if (s2 != NULL) {
+ i = 0;
+ sm[i].sm_so = 0;
+ sm[i].sm_eo = ls;
+ if (nsm > 1) {
+ i++;
+ sm[i].sm_so = s2 - s;
+ sm[i].sm_eo = (s2 - s) + lp;
+ }
+ return (i + 1);
+ }
+ return (0);
+ }
+
+ anchor = (*p == '^');
+ if (anchor) {
+ p++;
+ lp--; /* skip anchor character */
+ }
+ ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1;
+ ms->matchdepth = MAXCCALLS;
+ ms->src_init = s;
+ ms->src_end = s + ls;
+ ms->p_end = p + lp;
+ do {
+ const char *res;
+ ms->level = 0;
+ assert(ms->matchdepth == MAXCCALLS);
+ if ((res = match(ms, s1, p)) != NULL) {
+ sm->sm_so = 0;
+ sm->sm_eo = ls;
+ return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1;
+ }
+ } while (s1++ < ms->src_end && !anchor);
+
+ return 0;
+}
+
+int
+str_find(const char *string, const char *pattern, struct str_find *sm,
+ size_t nsm, const char **errstr)
+{
+ struct match_state ms;
+ int ret;
+
+ memset(&ms, 0, sizeof(ms));
+ memset(sm, 0, nsm * sizeof(*sm));
+
+ ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
+ if (ms.error != NULL) {
+ /* Return 0 on error and store the error string */
+ *errstr = ms.error;
+ ret = 0;
+ } else
+ *errstr = NULL;
+
+ return (ret);
+}
+
+int
+str_match(const char *string, const char *pattern, struct str_match *m,
+ const char **errstr)
+{
+ struct str_find sm[MAXCAPTURES];
+ struct match_state ms;
+ int ret, i;
+ size_t len, nsm;
+
+ nsm = MAXCAPTURES;
+ memset(&ms, 0, sizeof(ms));
+ memset(sm, 0, sizeof(sm));
+ memset(m, 0, sizeof(*m));
+
+ ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
+ if (ret == 0 || ms.error != NULL) {
+ /* Return 0 on error and store the error string */
+ *errstr = ms.error;
+ return (-1);
+ }
+
+ if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) {
+ *errstr = strerror(errno);
+ return (-1);
+ }
+ m->sm_nmatch = ret;
+
+ for (i = 0; i < ret; i++) {
+ if (sm[i].sm_so > sm[i].sm_eo)
+ continue;
+ len = sm[i].sm_eo - sm[i].sm_so;
+ if ((m->sm_match[i] = calloc(1,
+ len + 1)) == NULL) {
+ *errstr = strerror(errno);
+ str_match_free(m);
+ return (-1);
+ }
+ (void)memcpy(m->sm_match[i],
+ string + sm[i].sm_so, len);
+ }
+
+ *errstr = NULL;
+ return (0);
+}
+
+void
+str_match_free(struct str_match *m)
+{
+ unsigned int i = 0;
+ for (i = 0; i < m->sm_nmatch; i++)
+ free(m->sm_match[i]);
+ free(m->sm_match);
+ m->sm_nmatch = 0;
+}