From f3f57c7673a69d373ac1b2d64d919ea504d9f7f0 Mon Sep 17 00:00:00 2001 From: Reyk Floeter Date: Sat, 20 Jun 2015 14:10:47 +0200 Subject: Experimental support for rewrites using Lua's lean pattern matching implementation. --- httpd/Makefile | 3 + httpd/httpd.conf.5 | 53 +++- httpd/httpd.h | 8 +- httpd/parse.y | 71 ++++-- httpd/patterns.7 | 305 +++++++++++++++++++++++ httpd/patterns.c | 695 ++++++++++++++++++++++++++++++++++++++++++++++++++++ httpd/patterns.h | 46 ++++ httpd/server_http.c | 71 +++++- 8 files changed, 1211 insertions(+), 41 deletions(-) create mode 100644 httpd/patterns.7 create mode 100644 httpd/patterns.c create mode 100644 httpd/patterns.h diff --git a/httpd/Makefile b/httpd/Makefile index 885ad42..69fdb5e 100644 --- a/httpd/Makefile +++ b/httpd/Makefile @@ -6,6 +6,9 @@ SRCS+= config.c control.c httpd.c log.c logger.c proc.c SRCS+= server.c server_http.c server_file.c server_fcgi.c MAN= httpd.8 httpd.conf.5 +SRCS+= patterns.c +MAN+= patterns.7 + LDADD= -levent -ltls -lssl -lcrypto -lutil DPADD= ${LIBEVENT} ${LIBTLS} ${LIBSSL} ${LIBCRYPTO} ${LIBUTIL} #DEBUG= -g -DDEBUG=3 -O0 diff --git a/httpd/httpd.conf.5 b/httpd/httpd.conf.5 index 87866d2..24d92ac 100644 --- a/httpd/httpd.conf.5 +++ b/httpd/httpd.conf.5 @@ -131,14 +131,38 @@ The configured web servers. .Pp Each .Ic server -must have a -.Ar name -and include one or more lines of the following syntax: +section starts with a declaration of the server +.Ar name : +.Bl -tag -width Ds +.It Ic server Ar name Brq ... +Match the server name using shell globbing rules. +This can be an explicit name, +.Ar www.example.com , +or a name including wildcards, +.Ar *.example.com . +.It Ic server match Ar name Brq ... +Match the server name using pattern matching, +see +.Xr patterns 7 . +.El +.Pp +Followed by a block of options that is enclosed in curly brackets: .Bl -tag -width Ds .It Ic alias Ar name Specify an additional alias .Ar name for this server. +.It Ic alias match Ar name +Like the +.Ic alias +option, +but +.Ic match +the +.Ar name +using pattern matching instead of shell globbing rules, +see +.Xr patterns 7 . .It Oo Ic no Oc Ic authenticate Oo Ar realm Oc Ic with Pa htpasswd Authenticate a remote user for .Ar realm @@ -188,6 +212,12 @@ The configured IP address of the server. The configured TCP server port of the server. .It Ic $SERVER_NAME The name of the server. +.It Ic Pf % Ar n +The capture index +.Ar n +of a string that was captured by the enclosing +.Ic location match +option. .El .It Ic connection Ar option Set the specified options and limits for HTTP connections. @@ -247,6 +277,22 @@ except .Ic location and .Ic tcp . +.It Ic location match Ar path Brq ... +Like the +.Ic location +option, +but +.Ic match +the +.Ar path +using pattern matching instead of shell globbing rules, +see +.Xr patterns 7 . +The pattern may contain captures that can be used in the +.Ar uri +of an enclosed +.Ic block return +option. .It Oo Ic no Oc Ic log Op Ar option Set the specified logging options. Logging is enabled by default using the standard @@ -516,6 +562,7 @@ server "www.example.com" { .Ed .Sh SEE ALSO .Xr htpasswd 1 , +.Xr patterns 7 , .Xr httpd 8 , .Xr slowcgi 8 .Sh AUTHORS diff --git a/httpd/httpd.h b/httpd/httpd.h index 1431eaa..ff76281 100644 --- a/httpd/httpd.h +++ b/httpd/httpd.h @@ -35,6 +35,8 @@ #include #include +#include "patterns.h" + #define CONF_FILE "/etc/httpd.conf" #define HTTPD_SOCKET "/var/run/httpd.sock" #define HTTPD_USER "www" @@ -278,6 +280,7 @@ struct client { void *clt_srv_conf; u_int32_t clt_srv_id; struct sockaddr_storage clt_srv_ss; + struct str_match clt_srv_match; int clt_s; in_port_t clt_port; @@ -341,12 +344,15 @@ SPLAY_HEAD(client_tree, client); #define SRVFLAG_NO_AUTH 0x00020000 #define SRVFLAG_BLOCK 0x00040000 #define SRVFLAG_NO_BLOCK 0x00080000 +#define SRVFLAG_LOCATION_MATCH 0x00100000 +#define SRVFLAG_SERVER_MATCH 0x00200000 #define SRVFLAG_BITS \ "\10\01INDEX\02NO_INDEX\03AUTO_INDEX\04NO_AUTO_INDEX" \ "\05ROOT\06LOCATION\07FCGI\10NO_FCGI\11LOG\12NO_LOG\13SOCKET" \ "\14SYSLOG\15NO_SYSLOG\16TLS\17ACCESS_LOG\20ERROR_LOG" \ - "\21AUTH\22NO_AUTH\23BLOCK\24NO_BLOCK" + "\21AUTH\22NO_AUTH\23BLOCK\24NO_BLOCK\25LOCATION_MATCH" \ + "\26SERVER_MATCH" #define TCPFLAG_NODELAY 0x01 #define TCPFLAG_NNODELAY 0x02 diff --git a/httpd/parse.y b/httpd/parse.y index 0aae421..f556bf0 100644 --- a/httpd/parse.y +++ b/httpd/parse.y @@ -107,7 +107,7 @@ int host_if(const char *, struct addresslist *, int host(const char *, struct addresslist *, int, struct portrange *, const char *, int); void host_free(struct addresslist *); -struct server *server_inherit(struct server *, const char *, +struct server *server_inherit(struct server *, struct server_config *, struct server_config *); int getservice(char *); int is_if_in_group(const char *, const char *); @@ -131,14 +131,14 @@ typedef struct { %token ACCESS ALIAS AUTO BACKLOG BODY BUFFER CERTIFICATE CHROOT CIPHERS COMMON %token COMBINED CONNECTION DHE DIRECTORY ECDHE ERR FCGI INDEX IP KEY LISTEN -%token LOCATION LOG LOGDIR MAXIMUM NO NODELAY ON PORT PREFORK PROTOCOLS +%token LOCATION LOG LOGDIR MATCH MAXIMUM NO NODELAY ON PORT PREFORK PROTOCOLS %token REQUEST REQUESTS ROOT SACK SERVER SOCKET STRIP STYLE SYSLOG TCP TIMEOUT %token TLS TYPES %token ERROR INCLUDE AUTHENTICATE WITH BLOCK DROP RETURN PASS %token STRING %token NUMBER %type port -%type opttls +%type opttls optmatch %type timeout %type numberstring optstring %type authopts @@ -200,26 +200,26 @@ main : PREFORK NUMBER { } ; -server : SERVER STRING { +server : SERVER optmatch STRING { struct server *s; if (!loadcfg) { - free($2); + free($3); YYACCEPT; } if ((s = calloc(1, sizeof (*s))) == NULL) fatal("out of memory"); - if (strlcpy(s->srv_conf.name, $2, + if (strlcpy(s->srv_conf.name, $3, sizeof(s->srv_conf.name)) >= sizeof(s->srv_conf.name)) { yyerror("server name truncated"); - free($2); + free($3); free(s); YYERROR; } - free($2); + free($3); strlcpy(s->srv_conf.root, HTTPD_DOCROOT, sizeof(s->srv_conf.root)); @@ -235,7 +235,9 @@ server : SERVER STRING { s->srv_conf.timeout.tv_sec = SERVER_TIMEOUT; s->srv_conf.maxrequests = SERVER_MAXREQUESTS; s->srv_conf.maxrequestbody = SERVER_MAXREQUESTBODY; - s->srv_conf.flags |= SRVFLAG_LOG; + s->srv_conf.flags = SRVFLAG_LOG; + if ($2) + s->srv_conf.flags |= SRVFLAG_SERVER_MATCH; s->srv_conf.logformat = LOG_FORMAT_COMMON; s->srv_conf.tls_protocols = TLS_PROTOCOLS_DEFAULT; if ((s->srv_conf.tls_cert_file = @@ -334,7 +336,7 @@ server : SERVER STRING { continue; if ((sn = server_inherit(srv, - b->name, a)) == NULL) { + b, a)) == NULL) { serverconfig_free(srv_conf); free(srv); YYABORT; @@ -405,30 +407,35 @@ serveroptsl : LISTEN ON STRING opttls port { } if (alias != NULL) { + /* IP-based; use name match flags from parent */ + alias->flags = srv->srv_conf.flags; TAILQ_INSERT_TAIL(&srv->srv_hosts, alias, entry); } } - | ALIAS STRING { + | ALIAS optmatch STRING { struct server_config *alias; if (parentsrv != NULL) { yyerror("alias inside location"); - free($2); + free($3); YYERROR; } if ((alias = calloc(1, sizeof(*alias))) == NULL) fatal("out of memory"); - if (strlcpy(alias->name, $2, sizeof(alias->name)) >= + if (strlcpy(alias->name, $3, sizeof(alias->name)) >= sizeof(alias->name)) { yyerror("server alias truncated"); - free($2); + free($3); free(alias); YYERROR; } - free($2); + free($3); + + if ($2) + alias->flags |= SRVFLAG_SERVER_MATCH; TAILQ_INSERT_TAIL(&srv->srv_hosts, alias, entry); } @@ -456,38 +463,38 @@ serveroptsl : LISTEN ON STRING opttls port { | fastcgi | authenticate | filter - | LOCATION STRING { + | LOCATION optmatch STRING { struct server *s; if (srv->srv_conf.ss.ss_family == AF_UNSPEC) { yyerror("listen address not specified"); - free($2); + free($3); YYERROR; } if (parentsrv != NULL) { - yyerror("location %s inside location", $2); - free($2); + yyerror("location %s inside location", $3); + free($3); YYERROR; } if (!loadcfg) { - free($2); + free($3); YYACCEPT; } if ((s = calloc(1, sizeof (*s))) == NULL) fatal("out of memory"); - if (strlcpy(s->srv_conf.location, $2, + if (strlcpy(s->srv_conf.location, $3, sizeof(s->srv_conf.location)) >= sizeof(s->srv_conf.location)) { yyerror("server location truncated"); - free($2); + free($3); free(s); YYERROR; } - free($2); + free($3); if (strlcpy(s->srv_conf.name, srv->srv_conf.name, sizeof(s->srv_conf.name)) >= @@ -501,6 +508,8 @@ serveroptsl : LISTEN ON STRING opttls port { /* A location entry uses the parent id */ s->srv_conf.parent_id = srv->srv_conf.id; s->srv_conf.flags = SRVFLAG_LOCATION; + if ($2) + s->srv_conf.flags |= SRVFLAG_LOCATION_MATCH; s->srv_s = -1; memcpy(&s->srv_conf.ss, &srv->srv_conf.ss, sizeof(s->srv_conf.ss)); @@ -884,6 +893,10 @@ block : BLOCK { } ; +optmatch : /* empty */ { $$ = 0; } + | MATCH { $$ = 1; } + ; + optstring : /* empty */ { $$ = NULL; } | STRING { $$ = $1; } ; @@ -1108,6 +1121,7 @@ lookup(char *s) { "location", LOCATION }, { "log", LOG }, { "logdir", LOGDIR }, + { "match", MATCH }, { "max", MAXIMUM }, { "no", NO }, { "nodelay", NODELAY }, @@ -1889,7 +1903,7 @@ host_free(struct addresslist *al) } struct server * -server_inherit(struct server *src, const char *name, +server_inherit(struct server *src, struct server_config *alias, struct server_config *addr) { struct server *dst, *s, *dstl; @@ -1927,7 +1941,7 @@ server_inherit(struct server *src, const char *name, } /* Now set alias and listen address */ - strlcpy(dst->srv_conf.name, name, sizeof(dst->srv_conf.name)); + strlcpy(dst->srv_conf.name, alias->name, sizeof(dst->srv_conf.name)); memcpy(&dst->srv_conf.ss, &addr->ss, sizeof(dst->srv_conf.ss)); dst->srv_conf.port = addr->port; dst->srv_conf.prefixlen = addr->prefixlen; @@ -1936,6 +1950,10 @@ server_inherit(struct server *src, const char *name, else dst->srv_conf.flags &= ~SRVFLAG_TLS; + /* Don't inherit the "match" option, use it from the alias */ + dst->srv_conf.flags &= ~SRVFLAG_SERVER_MATCH; + dst->srv_conf.flags |= (alias->flags & SRVFLAG_SERVER_MATCH); + if (server_tls_load_keypair(dst) == -1) { yyerror("failed to load public/private keys " "for server %s", dst->srv_conf.name); @@ -1975,7 +1993,8 @@ server_inherit(struct server *src, const char *name, fatal("out of memory"); memcpy(&dstl->srv_conf, &s->srv_conf, sizeof(dstl->srv_conf)); - strlcpy(dstl->srv_conf.name, name, sizeof(dstl->srv_conf.name)); + strlcpy(dstl->srv_conf.name, alias->name, + sizeof(dstl->srv_conf.name)); /* Copy the new Id and listen address */ dstl->srv_conf.id = ++last_server_id; diff --git a/httpd/patterns.7 b/httpd/patterns.7 new file mode 100644 index 0000000..1eeef4c --- /dev/null +++ b/httpd/patterns.7 @@ -0,0 +1,305 @@ +.\" $OpenBSD$ +.\" +.\" Copyright (c) 2015 Reyk Floeter +.\" Copyright (C) 1994-2015 Lua.org, PUC-Rio. +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining +.\" a copy of this software and associated documentation files (the +.\" "Software"), to deal in the Software without restriction, including +.\" without limitation the rights to use, copy, modify, merge, publish, +.\" distribute, sublicense, and/or sell copies of the Software, and to +.\" permit persons to whom the Software is furnished to do so, subject to +.\" the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be +.\" included in all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +.\" EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +.\" MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +.\" IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +.\" CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +.\" TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +.\" SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +.\" +.\" Derived from section 6.4.1 in manual.html of Lua 5.3.1: +.\" $Id: manual.of,v 1.151 2015/06/10 21:08:57 roberto Exp $ +.\" +.Dd $Mdocdate: Jun 19 2015 $ +.Dt PATTERNS 7 +.Os +.Sh NAME +.Nm patterns +.Nd Lua's pattern matching rules. +.Sh DESCRIPTION +Pattern matching in +.Xr httpd 8 +is based on the implementation of the Lua scripting language and +provides a simple and fast alternative to Regular expressions (REs) that +are described in +.Xr re_format 7 . +Patterns are described by regular strings, which are interpreted as +patterns by the pattern-matching +.Dq find +and +.Dq match +functions. +This document describes the syntax and the meaning (that is, what they +match) of these strings. +.Sh CHARACTER CLASS +.Pp +A character class is used to represent a set of characters. +The following combinations are allowed in describing a character +class: +.Bl -tag -width Ds +.It Ar x +(where +.Ar x +is not one of the magic characters +.Sq ^$()%.[]*+-? ) +represents the character +.Ar x +itself. +.It . +(a dot) represents all characters. +.It %a +represents all letters. +.It %c +represents all control characters. +.It %d +represents all digits. +.It %g +represents all printable characters except space. +.It %l +represents all lowercase letters. +.It %p +represents all punctuation characters. +.It %s +represents all space characters. +.It %u +represents all uppercase letters. +.It %w +represents all alphanumeric characters. +.It %x +represents all hexadecimal digits. +.It Pf % Ar x +(where +.Ar x +is any non-alphanumeric character) represents the character +.Ar x . +This is the standard way to escape the magic characters. +Any non-alphanumeric character (including all punctuation characters, +even the non-magical) can be preceded by a +.Eq % +when used to represent itself in a pattern. +.It Bq Ar set +represents the class which is the union of all +characters in +.Ar set . +A range of characters can be specified by separating the end +characters of the range, in ascending order, with a +.Sq - . +All classes +.Sq Ar %x +described above can also be used as components in +.Ar set . +All other characters in +.Ar set +represent themselves. +For example, +.Sq [%w_] +(or +.Sq [_%w] ) +represents all alphanumeric characters plus the underscore, +.Sq [0-7] +represents the octal digits, +and +.Sq [0-7%l%-] +represents the octal digits plus the lowercase letters plus the +.Sq - +character. +.Pp +The interaction between ranges and classes is not defined. +Therefore, patterns like +.Sq [%a-z] +or +.Sq [a-%%] +have no meaning. +.It Bq Ar ^set +represents the complement of +.Ar set , +where +.Ar set +is interpreted as above. +.El +.Pp +For all classes represented by single letters ( +.Sq %a , +.Sq %c , +etc.), +the corresponding uppercase letter represents the complement of the class. +For instance, +.Sq %S +represents all non-space characters. +.Pp +The definitions of letter, space, and other character groups depend on +the current locale. +In particular, the class +.Sq [a-z] +may not be equivalent to +.Sq %l . +.Sh PATTERN ITEM +A pattern item can be +.Bl -bullet +.It +a single character class, which matches any single character in the class; +.It +a single character class followed by +.Sq * , +which matches zero or more repetitions of characters in the class. +These repetition items will always match the longest possible sequence; +.It +a single character class followed by +.Sq + , +which matches one or more repetitions of characters in the class. +These repetition items will always match the longest possible sequence; +.It +a single character class followed by +.Sq - , +which also matches zero or more repetitions of characters in the class. +Unlike +.Sq * , +these repetition items will always match the shortest possible sequence; +.It +a single character class followed by +.Sq \? , +which matches zero or one occurrence of a character in the class. +It always matches one occurrence if possible; +.It +.Sq Pf % Ar n , +for +.Ar n +between 1 and 9; +such item matches a substring equal to the n-th captured string (see below); +.It +.Sq Pf %b Ar xy , +where +.Ar x +and +.Ar y +are two distinct characters; +such item matches strings that start with +.Ar x, +end with +.Ar y , +and where the +.Ar x +and +.Ar y +are +.Em balanced . +This means that, if one reads the string from left to right, counting +.Em +1 +for an +.Ar x +and +.Em -1 +for a +.Ar y , +the ending +.Ar y +is the first +.Ar y +where the count reaches 0. +For instance, the item +.Sq %b() +matches expressions with balanced parentheses. +.It +.Sq Pf %f Bq Ar set , +a +.Em frontier pattern ; +such item matches an empty string at any position such that the next +character belongs to +.Ar set +and the previous character does not belong to +.Ar set . +The set +.Ar set +is interpreted as previously described. +The beginning and the end of the subject are handled as if +they were the character +.Sq \e0 . +.El +.Sh PATTERN +A pattern is a sequence of pattern items. +A caret +.Sq ^ +at the beginning of a pattern anchors the match at the beginning of +the subject string. +A +.Sq \$ +at the end of a pattern anchors the match at the end of the subject string. +At other positions, +.Sq ^ +and +.Sq \$ +have no special meaning and represent themselves. +.Sh CAPTURES +A pattern can contain sub-patterns enclosed in parentheses; they +describe captures. +When a match succeeds, the substrings of the subject string that match +captures are stored (captured) for future use. +Captures are numbered according to their left parentheses. +For instance, in the pattern +.Qq (a*(.)%w(%s*)) , +the part of the string matching +.Qq a*(.)%w(%s*) +is stored as the first capture (and therefore has number 1); +the character matching +.So \. Sc +is captured with number 2, +and the part matching +.Qq %s* +has number 3. +.Pp +As a special case, the empty capture +.Sq () +captures the current string position (a number). +For instance, if we apply the pattern +.Qq ()aa() +on the string +.Qq flaaap , +there will be two captures: 3 and 5. +.Sh SEE ALSO +.Xr fnmatch 3 , +.Xr re_format 3 , +.Xr httpd 8 . +.Rs +.%A Roberto Ierusalimschy +.%A Luiz Henrique de Figueiredo +.%A Waldemar Celes +.%Q Lua.org +.%Q PUC-Rio +.%D June 2015 +.%R Lua 5.3 Reference Manual +.%T Patterns +.%U http://www.lua.org/manual/5.3/manual.html#6.4.1 +.Re +.Sh HISTORY +The first implementation of the pattern rules were introduced with Lua 2.5. +Almost twenty years later, +an implementation based on Lua 5.3.1 appeared in +.Ox 5.8 . +.Sh AUTHORS +The pattern matching is derived from the original implementation of +the Lua scripting language, that is written by +.An -nosplit +.An Roberto Ierusalimschy , +.An Waldemar Celes , +and +.An Luiz Henrique de Figueiredo +at PUC-Rio. +It was turned into a native C API for +.Xr httpd 8 +by +.An Reyk Floeter Aq Mt reyk@openbsd.org . diff --git a/httpd/patterns.c b/httpd/patterns.c new file mode 100644 index 0000000..b7cb381 --- /dev/null +++ b/httpd/patterns.c @@ -0,0 +1,695 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2015 Reyk Floeter + * Copyright (C) 1994-2015 Lua.org, PUC-Rio. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Derived from Lua 5.3.1: + * $Id: lstrlib.c,v 1.229 2015/05/20 17:39:23 roberto Exp $ + * Standard library for string operations and pattern-matching + */ + +#include +#include +#include +#include +#include +#include + +#include "patterns.h" + +#define uchar(c) ((unsigned char)(c)) /* macro to 'unsign' a char */ +#define CAP_UNFINISHED (-1) +#define CAP_POSITION (-2) +#define L_ESC '%' +#define SPECIALS "^$*+?.([%-" + +struct match_state { + int matchdepth; /* control for recursive depth (to avoid C + * stack overflow) */ + int maxcaptures; /* configured capture limit */ + const char *src_init; /* init of source string */ + const char *src_end; /* end ('\0') of source string */ + const char *p_end; /* end ('\0') of pattern */ + const char *error; /* should be NULL */ + int level; /* total number of captures (finished or + * unfinished) */ + struct { + const char *init; + ptrdiff_t len; + } capture[MAXCAPTURES]; +}; + +/* recursive function */ +static const char *match(struct match_state *, const char *, const char *); + +static int +match_error(struct match_state *ms, const char *error) +{ + ms->error = ms->error == NULL ? error : ms->error; + return (-1); +} + +static int +check_capture(struct match_state *ms, int l) +{ + l -= '1'; + if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) + return match_error(ms, "invalid capture index"); + return (l); +} + +static int +capture_to_close(struct match_state *ms) +{ + int level = ms->level; + for (level--; level >= 0; level--) + if (ms->capture[level].len == CAP_UNFINISHED) + return (level); + return match_error(ms, "invalid pattern capture"); +} + +static const char * +classend(struct match_state *ms, const char *p) +{ + switch (*p++) { + case L_ESC: + if (p == ms->p_end) + match_error(ms, + "malformed pattern (ends with '%%')"); + return p + 1; + case '[': + if (*p == '^') + p++; + do { + /* look for a ']' */ + if (p == ms->p_end) { + match_error(ms, + "malformed pattern (missing ']')"); + break; + } + if (*(p++) == L_ESC && p < ms->p_end) { + /* skip escapes (e.g. '%]') */ + p++; + } + } while (*p != ']'); + return p + 1; + default: + return p; + } +} + +static int +match_class(int c, int cl) +{ + int res; + switch (tolower(cl)) { + case 'a': + res = isalpha(c); + break; + case 'c': + res = iscntrl(c); + break; + case 'd': + res = isdigit(c); + break; + case 'g': + res = isgraph(c); + break; + case 'l': + res = islower(c); + break; + case 'p': + res = ispunct(c); + break; + case 's': + res = isspace(c); + break; + case 'u': + res = isupper(c); + break; + case 'w': + res = isalnum(c); + break; + case 'x': + res = isxdigit(c); + break; + case 'z': + res = (c == 0); + break; /* deprecated option */ + default: + return (cl == c); + } + return (islower(cl) ? res : !res); +} + +static int +matchbracketclass(int c, const char *p, const char *ec) +{ + int sig = 1; + if (*(p + 1) == '^') { + sig = 0; + /* skip the '^' */ + p++; + } + while (++p < ec) { + if (*p == L_ESC) { + p++; + if (match_class(c, uchar(*p))) + return sig; + } else if ((*(p + 1) == '-') && (p + 2 < ec)) { + p += 2; + if (uchar(*(p - 2)) <= c && c <= uchar(*p)) + return sig; + } else if (uchar(*p) == c) + return sig; + } + return !sig; +} + +static int +singlematch(struct match_state *ms, const char *s, const char *p, + const char *ep) +{ + if (s >= ms->src_end) + return 0; + else { + int c = uchar(*s); + switch (*p) { + case '.': + /* matches any char */ + return (1); + case L_ESC: + return match_class(c, uchar(*(p + 1))); + case '[': + return matchbracketclass(c, p, ep - 1); + default: + return (uchar(*p) == c); + } + } +} + +static const char * +matchbalance(struct match_state *ms, const char *s, const char *p) +{ + if (p >= ms->p_end - 1) { + match_error(ms, + "malformed pattern (missing arguments to '%%b')"); + return (NULL); + } + if (*s != *p) + return (NULL); + else { + int b = *p; + int e = *(p + 1); + int cont = 1; + while (++s < ms->src_end) { + if (*s == e) { + if (--cont == 0) + return s + 1; + } else if (*s == b) + cont++; + } + } + + /* string ends out of balance */ + return (NULL); +} + +static const char * +max_expand(struct match_state *ms, const char *s, const char *p, const char *ep) +{ + ptrdiff_t i = 0; + /* counts maximum expand for item */ + while (singlematch(ms, s + i, p, ep)) + i++; + /* keeps trying to match with the maximum repetitions */ + while (i >= 0) { + const char *res = match(ms, (s + i), ep + 1); + if (res) + return res; + /* else didn't match; reduce 1 repetition to try again */ + i--; + } + return NULL; +} + +static const char * +min_expand(struct match_state *ms, const char *s, const char *p, const char *ep) +{ + for (;;) { + const char *res = match(ms, s, ep + 1); + if (res != NULL) + return res; + else if (singlematch(ms, s, p, ep)) + s++; /* try with one more repetition */ + else + return NULL; + } +} + +static const char * +start_capture(struct match_state *ms, const char *s, const char *p, int what) +{ + const char *res; + + int level = ms->level; + if (level >= ms->maxcaptures) + match_error(ms, "too many captures"); + ms->capture[level].init = s; + ms->capture[level].len = what; + ms->level = level + 1; + /* undo capture if match failed */ + if ((res = match(ms, s, p)) == NULL) + ms->level--; + return res; +} + +static const char * +end_capture(struct match_state *ms, const char *s, const char *p) +{ + int l = capture_to_close(ms); + const char *res; + /* close capture */ + ms->capture[l].len = s - ms->capture[l].init; + /* undo capture if match failed */ + if ((res = match(ms, s, p)) == NULL) + ms->capture[l].len = CAP_UNFINISHED; + return res; +} + +static const char * +match_capture(struct match_state *ms, const char *s, int l) +{ + size_t len; + l = check_capture(ms, l); + len = ms->capture[l].len; + if ((size_t) (ms->src_end - s) >= len && + memcmp(ms->capture[l].init, s, len) == 0) + return s + len; + else + return NULL; +} + +static const char * +match(struct match_state *ms, const char *s, const char *p) +{ + const char *ep, *res; + char previous; + + if (ms->matchdepth-- == 0) + match_error(ms, "pattern too complex"); + + /* using goto's to optimize tail recursion */ + init: + /* end of pattern? */ + if (p != ms->p_end) { + switch (*p) { + case '(': + /* start capture */ + if (*(p + 1) == ')') + /* position capture? */ + s = start_capture(ms, s, p + 2, CAP_POSITION); + else + s = start_capture(ms, s, p + 1, CAP_UNFINISHED); + break; + case ')': + /* end capture */ + s = end_capture(ms, s, p + 1); + break; + case '$': + /* is the '$' the last char in pattern? */ + if ((p + 1) != ms->p_end) { + /* no; go to default */ + goto dflt; + } + /* check end of string */ + s = (s == ms->src_end) ? s : NULL; + break; + case L_ESC: + /* escaped sequences not in the format class[*+?-]? */ + switch (*(p + 1)) { + case 'b': + /* balanced string? */ + s = matchbalance(ms, s, p + 2); + if (s != NULL) { + p += 4; + /* return match(ms, s, p + 4); */ + goto init; + } /* else fail (s == NULL) */ + break; + case 'f': + /* frontier? */ + p += 2; + if (*p != '[') + match_error(ms, "missing '['" + " after '%%f' in pattern"); + /* points to what is next */ + ep = classend(ms, p); + previous = + (s == ms->src_init) ? '\0' : *(s - 1); + if (!matchbracketclass(uchar(previous), + p, ep - 1) && + matchbracketclass(uchar(*s), + p, ep - 1)) { + p = ep; + /* return match(ms, s, ep); */ + goto init; + } + /* match failed */ + s = NULL; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + /* capture results (%0-%9)? */ + s = match_capture(ms, s, uchar(*(p + 1))); + if (s != NULL) { + p += 2; + /* return match(ms, s, p + 2) */ + goto init; + } + break; + default: + goto dflt; + } + break; + default: + + /* pattern class plus optional suffix */ + dflt: + /* points to optional suffix */ + ep = classend(ms, p); + + /* does not match at least once? */ + if (!singlematch(ms, s, p, ep)) { + /* accept empty? */ + if (*ep == '*' || *ep == '?' || *ep == '-') { + p = ep + 1; + /* return match(ms, s, ep + 1); */ + goto init; + } else { + /* '+' or no suffix */ + s = NULL; /* fail */ + } + } else { + /* matched once */ + /* handle optional suffix */ + switch (*ep) { + case '?': + /* optional */ + if ((res = + match(ms, s + 1, ep + 1)) != NULL) + s = res; + else { + /* + * else return + * match(ms, s, ep + 1); + */ + p = ep + 1; + goto init; + } + break; + case '+': + /* 1 or more repetitions */ + s++; /* 1 match already done */ + /* FALLTHROUGH */ + case '*': + /* 0 or more repetitions */ + s = max_expand(ms, s, p, ep); + break; + case '-': + /* 0 or more repetitions (minimum) */ + s = min_expand(ms, s, p, ep); + break; + default: + /* no suffix */ + s++; + p = ep; + /* return match(ms, s + 1, ep); */ + goto init; + } + } + break; + } + } + ms->matchdepth++; + return s; +} + +static const char * +lmemfind(const char *s1, size_t l1, + const char *s2, size_t l2) +{ + const char *init; + + if (l2 == 0) { + /* empty strings are everywhere */ + return (s1); + } else if (l2 > l1) { + /* avoids a negative 'l1' */ + return (NULL); + } else { + /* + * to search for a '*s2' inside 's1' + * - 1st char will be checked by 'memchr' + * - 's2' cannot be found after that + */ + l2--; + l1 = l1 - l2; + while (l1 > 0 && + (init = (const char *)memchr(s1, *s2, l1)) != NULL) { + /* 1st char is already checked */ + init++; + if (memcmp(init, s2 + 1, l2) == 0) + return init - 1; + else { + /* correct 'l1' and 's1' to try again */ + l1 -= init - s1; + s1 = init; + } + } + /* not found */ + return (NULL); + } +} + +static int +push_onecapture(struct match_state *ms, int i, const char *s, + const char *e, struct str_find *sm) +{ + if (i >= ms->level) { + if (i == 0 || ms->level == 0) { + /* add whole match */ + sm->sm_so = (off_t)(s - ms->src_init); + sm->sm_eo = (off_t)(e - s) + sm->sm_so; + } else + return match_error(ms, "invalid capture index"); + } else { + ptrdiff_t l = ms->capture[i].len; + if (l == CAP_UNFINISHED) + match_error(ms, "unfinished capture"); + sm->sm_so = ms->capture[i].init - ms->src_init; + sm->sm_eo = sm->sm_so + l; + } + sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo; + return (0); +} + +static int +push_captures(struct match_state *ms, const char *s, const char *e, + struct str_find *sm, size_t nsm) +{ + unsigned int i; + unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level; + + if (nlevels > nsm) + nlevels = nsm; + for (i = 0; i < nlevels; i++) + if (push_onecapture(ms, i, s, e, sm + i) == -1) + break; + + /* number of strings pushed */ + return (nlevels); +} + +/* check whether pattern has no special characters */ +static int +nospecials(const char *p, size_t l) +{ + size_t upto = 0; + + do { + if (strpbrk(p + upto, SPECIALS)) { + /* pattern has a special character */ + return 0; + } + /* may have more after \0 */ + upto += strlen(p + upto) + 1; + } while (upto <= l); + + /* no special chars found */ + return (1); +} + +static int +str_find_aux(struct match_state *ms, const char *pattern, const char *string, + struct str_find *sm, size_t nsm, off_t init) +{ + size_t ls = strlen(string); + size_t lp = strlen(pattern); + const char *s = string; + const char *p = pattern; + const char *s1, *s2; + int anchor, i; + + if (init < 0) + init = 0; + else if (init > (off_t)ls) + return match_error(ms, "starting after string's end"); + s1 = s + init; + + if (nospecials(p, lp)) { + /* do a plain search */ + s2 = lmemfind(s1, ls - (size_t)init, p, lp); + if (s2 != NULL) { + i = 0; + sm[i].sm_so = 0; + sm[i].sm_eo = ls; + if (nsm > 1) { + i++; + sm[i].sm_so = s2 - s; + sm[i].sm_eo = (s2 - s) + lp; + } + return (i + 1); + } + return (0); + } + + anchor = (*p == '^'); + if (anchor) { + p++; + lp--; /* skip anchor character */ + } + ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1; + ms->matchdepth = MAXCCALLS; + ms->src_init = s; + ms->src_end = s + ls; + ms->p_end = p + lp; + do { + const char *res; + ms->level = 0; + assert(ms->matchdepth == MAXCCALLS); + if ((res = match(ms, s1, p)) != NULL) { + sm->sm_so = 0; + sm->sm_eo = ls; + return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1; + } + } while (s1++ < ms->src_end && !anchor); + + return 0; +} + +int +str_find(const char *string, const char *pattern, struct str_find *sm, + size_t nsm, const char **errstr) +{ + struct match_state ms; + int ret; + + memset(&ms, 0, sizeof(ms)); + memset(sm, 0, nsm * sizeof(*sm)); + + ret = str_find_aux(&ms, pattern, string, sm, nsm, 0); + if (ms.error != NULL) { + /* Return 0 on error and store the error string */ + *errstr = ms.error; + ret = 0; + } else + *errstr = NULL; + + return (ret); +} + +int +str_match(const char *string, const char *pattern, struct str_match *m, + const char **errstr) +{ + struct str_find sm[MAXCAPTURES]; + struct match_state ms; + int ret, i; + size_t len, nsm; + + nsm = MAXCAPTURES; + memset(&ms, 0, sizeof(ms)); + memset(sm, 0, sizeof(sm)); + memset(m, 0, sizeof(*m)); + + ret = str_find_aux(&ms, pattern, string, sm, nsm, 0); + if (ret == 0 || ms.error != NULL) { + /* Return 0 on error and store the error string */ + *errstr = ms.error; + return (-1); + } + + if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) { + *errstr = strerror(errno); + return (-1); + } + m->sm_nmatch = ret; + + for (i = 0; i < ret; i++) { + if (sm[i].sm_so > sm[i].sm_eo) + continue; + len = sm[i].sm_eo - sm[i].sm_so; + if ((m->sm_match[i] = calloc(1, + len + 1)) == NULL) { + *errstr = strerror(errno); + str_match_free(m); + return (-1); + } + (void)memcpy(m->sm_match[i], + string + sm[i].sm_so, len); + } + + *errstr = NULL; + return (0); +} + +void +str_match_free(struct str_match *m) +{ + unsigned int i = 0; + for (i = 0; i < m->sm_nmatch; i++) + free(m->sm_match[i]); + free(m->sm_match); + m->sm_nmatch = 0; +} diff --git a/httpd/patterns.h b/httpd/patterns.h new file mode 100644 index 0000000..ddda0dd --- /dev/null +++ b/httpd/patterns.h @@ -0,0 +1,46 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2015 Reyk Floeter + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#ifndef PATTERNS_H +#define PATTERNS_H + +#define MAXCAPTURES 32 /* Max no. of allowed captures in pattern */ +#define MAXCCALLS 200 /* Max recusion depth in pattern matching */ + +struct str_find { + off_t sm_so; /* start offset of match */ + off_t sm_eo; /* end offset of match */ +}; + +struct str_match { + char **sm_match; /* allocated array of matched strings */ + unsigned int sm_nmatch; /* number of elements in array */ +}; + +__BEGIN_DECLS +int str_find(const char *, const char *, struct str_find *, size_t, + const char **); +int str_match(const char *, const char *, struct str_match *, + const char **); +void str_match_free(struct str_match *); +__END_DECLS + +#endif /* PATTERNS_H */ diff --git a/httpd/server_http.c b/httpd/server_http.c index 32b9e7d..26d5392 100644 --- a/httpd/server_http.c +++ b/httpd/server_http.c @@ -29,14 +29,16 @@ #include #include #include +#include #include #include #include #include -#include +#include #include "httpd.h" #include "http.h" +#include "patterns.h" static int server_httpmethod_cmp(const void *, const void *); static int server_httperror_cmp(const void *, const void *); @@ -633,6 +635,7 @@ server_reset_http(struct client *clt) clt->clt_remote_user = NULL; clt->clt_bev->readcb = server_read_http; clt->clt_srv_conf = &srv->srv_conf; + str_match_free(&clt->clt_srv_match); } ssize_t @@ -873,6 +876,8 @@ server_close_http(struct client *clt) clt->clt_descresp = NULL; free(clt->clt_remote_user); clt->clt_remote_user = NULL; + + str_match_free(&clt->clt_srv_match); } char * @@ -882,11 +887,34 @@ server_expand_http(struct client *clt, const char *val, char *buf, struct http_descriptor *desc = clt->clt_descreq; struct server_config *srv_conf = clt->clt_srv_conf; char ibuf[128], *str, *path; - int ret; + const char *errstr = NULL, *p; + size_t size; + int n, ret; if (strlcpy(buf, val, len) >= len) return (NULL); + /* Find previously matched substrings by index */ + for (p = val; clt->clt_srv_match.sm_nmatch && + (p = strstr(p, "%")) != NULL; p++) { + if (!isdigit(*(p + 1))) + continue; + + /* Copy number, leading '%' char and add trailing \0 */ + size = strspn(p + 1, "0123456789") + 2; + if (size >= sizeof(ibuf)) + return (NULL); + (void)strlcpy(ibuf, p, size); + n = strtonum(ibuf + 1, 0, + clt->clt_srv_match.sm_nmatch - 1, &errstr); + if (errstr != NULL) + return (NULL); + + /* Expand variable with matched value */ + if (expand_string(buf, len, ibuf, + clt->clt_srv_match.sm_match[n]) != 0) + return (NULL); + } if (strstr(val, "$DOCUMENT_URI") != NULL) { if ((path = url_encode(desc->http_path)) == NULL) return (NULL); @@ -982,8 +1010,10 @@ server_response(struct httpd *httpd, struct client *clt) struct server *srv = clt->clt_srv; struct server_config *srv_conf = &srv->srv_conf; struct kv *kv, key, *host; - int portval = -1; + struct str_find sm; + int portval = -1, ret; char *hostval; + const char *errstr = NULL; /* Canonicalize the request path */ if (desc->http_path == NULL || @@ -1043,9 +1073,17 @@ server_response(struct httpd *httpd, struct client *clt) hostname); } #endif - if ((srv_conf->flags & SRVFLAG_LOCATION) == 0 && - fnmatch(srv_conf->name, hostname, - FNM_CASEFOLD) == 0 && + if (srv_conf->flags & SRVFLAG_LOCATION) + continue; + else if (srv_conf->flags & SRVFLAG_SERVER_MATCH) { + str_find(hostname, srv_conf->name, + &sm, 1, &errstr); + ret = errstr == NULL ? 0 : -1; + } else { + ret = fnmatch(srv_conf->name, + hostname, FNM_CASEFOLD); + } + if (ret == 0 && (portval == -1 || (portval != -1 && portval == srv_conf->port))) { /* Replace host configuration */ @@ -1115,6 +1153,8 @@ server_getlocation(struct client *clt, const char *path) { struct server *srv = clt->clt_srv; struct server_config *srv_conf = clt->clt_srv_conf, *location; + const char *errstr = NULL; + int ret; /* Now search for the location */ TAILQ_FOREACH(location, &srv->srv_hosts, entry) { @@ -1125,11 +1165,20 @@ server_getlocation(struct client *clt, const char *path) } #endif if ((location->flags & SRVFLAG_LOCATION) && - location->parent_id == srv_conf->parent_id && - fnmatch(location->location, path, FNM_CASEFOLD) == 0) { - /* Replace host configuration */ - clt->clt_srv_conf = srv_conf = location; - break; + location->parent_id == srv_conf->parent_id) { + errstr = NULL; + if (location->flags & SRVFLAG_LOCATION_MATCH) { + ret = str_match(path, location->location, + &clt->clt_srv_match, &errstr); + } else { + ret = fnmatch(location->location, + path, FNM_CASEFOLD); + } + if (ret == 0 && errstr == NULL) { + /* Replace host configuration */ + clt->clt_srv_conf = srv_conf = location; + break; + } } } -- cgit v1.2.3-54-g00ecf From 5a2b3f8c4b6ca5a92e5f975c4c6ddf8564b6409c Mon Sep 17 00:00:00 2001 From: Reyk Floeter Date: Sat, 20 Jun 2015 18:07:50 +0200 Subject: capture_to_close() wasn't checked (it returns -1 on error) and caused a panic when using a ")(" pattern. Found by Sebastien Marie --- httpd/patterns.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/httpd/patterns.c b/httpd/patterns.c index b7cb381..23c7083 100644 --- a/httpd/patterns.c +++ b/httpd/patterns.c @@ -291,6 +291,8 @@ end_capture(struct match_state *ms, const char *s, const char *p) { int l = capture_to_close(ms); const char *res; + if (l == -1) + return NULL; /* close capture */ ms->capture[l].len = s - ms->capture[l].init; /* undo capture if match failed */ -- cgit v1.2.3-54-g00ecf From 1de57f991a98d99872c5edb5200ca8309126187d Mon Sep 17 00:00:00 2001 From: Reyk Floeter Date: Sat, 20 Jun 2015 22:18:03 +0200 Subject: Changing luaL_error() calls to match_error() required slightly different semantics as we cannot just abort as Lua does. So we have to check return values carefully and I missed a few in the transition. Patch by Sebastien Marie --- httpd/patterns.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/httpd/patterns.c b/httpd/patterns.c index 23c7083..f4b999d 100644 --- a/httpd/patterns.c +++ b/httpd/patterns.c @@ -275,8 +275,10 @@ start_capture(struct match_state *ms, const char *s, const char *p, int what) const char *res; int level = ms->level; - if (level >= ms->maxcaptures) + if (level >= ms->maxcaptures) { match_error(ms, "too many captures"); + return (NULL); + } ms->capture[level].init = s; ms->capture[level].len = what; ms->level = level + 1; @@ -320,8 +322,10 @@ match(struct match_state *ms, const char *s, const char *p) const char *ep, *res; char previous; - if (ms->matchdepth-- == 0) + if (ms->matchdepth-- == 0) { match_error(ms, "pattern too complex"); + return (NULL); + } /* using goto's to optimize tail recursion */ init: @@ -364,9 +368,11 @@ match(struct match_state *ms, const char *s, const char *p) case 'f': /* frontier? */ p += 2; - if (*p != '[') + if (*p != '[') { match_error(ms, "missing '['" " after '%%f' in pattern"); + break; + } /* points to what is next */ ep = classend(ms, p); previous = @@ -518,7 +524,7 @@ push_onecapture(struct match_state *ms, int i, const char *s, } else { ptrdiff_t l = ms->capture[i].len; if (l == CAP_UNFINISHED) - match_error(ms, "unfinished capture"); + return match_error(ms, "unfinished capture"); sm->sm_so = ms->capture[i].init - ms->src_init; sm->sm_eo = sm->sm_so + l; } -- cgit v1.2.3-54-g00ecf From df6d58c8a50cc5d11d3b16986c5ff8ec8cea09b6 Mon Sep 17 00:00:00 2001 From: Sébastien Marie Date: Sun, 21 Jun 2015 06:57:58 +0200 Subject: propagate error from classend() an error could be setted in classend(). check and propagate it if any. --- httpd/patterns.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/httpd/patterns.c b/httpd/patterns.c index f4b999d..8fbaa28 100644 --- a/httpd/patterns.c +++ b/httpd/patterns.c @@ -375,6 +375,8 @@ match(struct match_state *ms, const char *s, const char *p) } /* points to what is next */ ep = classend(ms, p); + if (ms->error != NULL) + break; previous = (s == ms->src_init) ? '\0' : *(s - 1); if (!matchbracketclass(uchar(previous), @@ -416,6 +418,8 @@ match(struct match_state *ms, const char *s, const char *p) dflt: /* points to optional suffix */ ep = classend(ms, p); + if (ms->error != NULL) + break; /* does not match at least once? */ if (!singlematch(ms, s, p, ep)) { -- cgit v1.2.3-54-g00ecf From f87380994e9d2df719d483664e776e475e27d221 Mon Sep 17 00:00:00 2001 From: Sébastien Marie Date: Mon, 22 Jun 2015 13:42:08 +0200 Subject: several cosmetics fixes + add a new error checking - there is no need to escape "%" with "%%" in error message - corrects a comment - add a check for error --- httpd/patterns.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/httpd/patterns.c b/httpd/patterns.c index 8fbaa28..1dbea15 100644 --- a/httpd/patterns.c +++ b/httpd/patterns.c @@ -215,7 +215,7 @@ matchbalance(struct match_state *ms, const char *s, const char *p) { if (p >= ms->p_end - 1) { match_error(ms, - "malformed pattern (missing arguments to '%%b')"); + "malformed pattern (missing arguments to '%b')"); return (NULL); } if (*s != *p) @@ -308,6 +308,8 @@ match_capture(struct match_state *ms, const char *s, int l) { size_t len; l = check_capture(ms, l); + if (l == -1) + return NULL; len = ms->capture[l].len; if ((size_t) (ms->src_end - s) >= len && memcmp(ms->capture[l].init, s, len) == 0) @@ -370,7 +372,7 @@ match(struct match_state *ms, const char *s, const char *p) p += 2; if (*p != '[') { match_error(ms, "missing '['" - " after '%%f' in pattern"); + " after '%f' in pattern"); break; } /* points to what is next */ @@ -667,7 +669,7 @@ str_match(const char *string, const char *pattern, struct str_match *m, ret = str_find_aux(&ms, pattern, string, sm, nsm, 0); if (ret == 0 || ms.error != NULL) { - /* Return 0 on error and store the error string */ + /* Return -1 on error and store the error string */ *errstr = ms.error; return (-1); } -- cgit v1.2.3-54-g00ecf From 95318c663159a0143edbe41e2e8c94bc057276a8 Mon Sep 17 00:00:00 2001 From: Sébastien Marie Date: Mon, 22 Jun 2015 13:44:20 +0200 Subject: add regress testsuite for patterns --- regress/patterns/Makefile | 20 ++++++++ regress/patterns/patterns-tester.c | 98 ++++++++++++++++++++++++++++++++++++++ regress/patterns/test-patterns.in | 23 +++++++++ regress/patterns/test-patterns.out | 87 +++++++++++++++++++++++++++++++++ 4 files changed, 228 insertions(+) create mode 100644 regress/patterns/Makefile create mode 100644 regress/patterns/patterns-tester.c create mode 100644 regress/patterns/test-patterns.in create mode 100644 regress/patterns/test-patterns.out diff --git a/regress/patterns/Makefile b/regress/patterns/Makefile new file mode 100644 index 0000000..024f96e --- /dev/null +++ b/regress/patterns/Makefile @@ -0,0 +1,20 @@ +# $OpenBSD$ + +HTTPDSRC = ${.CURDIR}/../../httpd + +.PATH: ${HTTPDSRC} + +REGRESS_TARGETS= test-patterns + +CLEANFILES += patterns-tester + +patterns-tester: patterns-tester.c patterns.c patterns.h + ${CC} -o $@ ${CFLAGS} ${.CURDIR}/patterns-tester.c ${HTTPDSRC}/patterns.c -I${HTTPDSRC} + +test-patterns: patterns-tester test-patterns.out test-patterns.in + cat ${.CURDIR}/test-patterns.in | grep -v '^#' | \ + while IFS=' ' read string pattern comments ; do \ + ./patterns-tester "$${string}" "$${pattern}" 2>&1 || true; \ + done | diff -I 'OpenBSD' -u ${.CURDIR}/test-patterns.out - + +.include diff --git a/regress/patterns/patterns-tester.c b/regress/patterns/patterns-tester.c new file mode 100644 index 0000000..9134c0c --- /dev/null +++ b/regress/patterns/patterns-tester.c @@ -0,0 +1,98 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2015 Sebastien Marie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include + +#include "patterns.h" + +extern char * malloc_options; + +static void read_string(char *, size_t); +static void read_string_stop(void); + +static void +read_string(char *buf, size_t len) +{ + size_t i; + + /* init */ + bzero(buf, len); + + /* read */ + if (fgets(buf, len, stdin) == NULL) + err(1, "fgets"); + + /* strip '\n' */ + i = strnlen(buf, len); + if (i != 0) + buf[i-1] = '\0'; +} + +static void +read_string_stop() +{ + if (getchar() != EOF) + errx(1, "read_string_stop: too many input"); +} + +int +main(int argc, char *argv[]) +{ + char string[1024]; + char pattern[1024]; + struct str_match m; + const char *errstr = NULL; + int ret; + size_t i; + + /* configure malloc */ + malloc_options = "S"; + + /* read testcase */ + if (argc != 3) { + /* from stdin (useful for afl) */ + read_string(string, sizeof(string)); + read_string(pattern, sizeof(pattern)); + read_string_stop(); + } else { + /* from arguments */ + strlcpy(string, argv[1], sizeof(string)); + strlcpy(pattern, argv[2], sizeof(pattern)); + } + + /* print testcase */ + printf("string='%s'\n", string); + printf("pattern='%s'\n", pattern); + + /* test it ! */ + ret = str_match(string, pattern, &m, &errstr); + if (errstr != NULL) + errx(1, "str_match: %s", errstr); + + /* print result */ + printf("ret=%d num=%d\n", ret, m.sm_nmatch); + for (i=0; i Date: Mon, 22 Jun 2015 16:52:49 +0200 Subject: don't do tail-call-recursive every time - switch '*', '?' and '-' to normal call, in order to pass into "too complex" check - remove assert() - on error, quit early --- httpd/patterns.c | 11 ++++++++--- httpd/patterns.h | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/httpd/patterns.c b/httpd/patterns.c index 1dbea15..177a5af 100644 --- a/httpd/patterns.c +++ b/httpd/patterns.c @@ -427,9 +427,12 @@ match(struct match_state *ms, const char *s, const char *p) if (!singlematch(ms, s, p, ep)) { /* accept empty? */ if (*ep == '*' || *ep == '?' || *ep == '-') { - p = ep + 1; /* return match(ms, s, ep + 1); */ - goto init; + return match(ms, s, ep + 1); + /* + * p = ep + 1; + * goto init; + */ } else { /* '+' or no suffix */ s = NULL; /* fail */ @@ -621,11 +624,13 @@ str_find_aux(struct match_state *ms, const char *pattern, const char *string, do { const char *res; ms->level = 0; - assert(ms->matchdepth == MAXCCALLS); if ((res = match(ms, s1, p)) != NULL) { sm->sm_so = 0; sm->sm_eo = ls; return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1; + + } else if (ms->error != NULL) { + return 0; } } while (s1++ < ms->src_end && !anchor); diff --git a/httpd/patterns.h b/httpd/patterns.h index ddda0dd..28b0f95 100644 --- a/httpd/patterns.h +++ b/httpd/patterns.h @@ -23,7 +23,7 @@ #define PATTERNS_H #define MAXCAPTURES 32 /* Max no. of allowed captures in pattern */ -#define MAXCCALLS 200 /* Max recusion depth in pattern matching */ +#define MAXCCALLS 5000 /* Max recusion depth in pattern matching */ struct str_find { off_t sm_so; /* start offset of match */ -- cgit v1.2.3-54-g00ecf From ca5a2bf34a994373a6976865d4f112b8085b8ae8 Mon Sep 17 00:00:00 2001 From: Reyk Floeter Date: Tue, 23 Jun 2015 10:28:07 +0200 Subject: Add tests with lua53 for comparison --- regress/patterns/Makefile | 11 ++++ regress/patterns/patterns-tester.lua | 2 + regress/patterns/test-patterns-lua.out | 100 +++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 regress/patterns/patterns-tester.lua create mode 100644 regress/patterns/test-patterns-lua.out diff --git a/regress/patterns/Makefile b/regress/patterns/Makefile index 024f96e..995b416 100644 --- a/regress/patterns/Makefile +++ b/regress/patterns/Makefile @@ -8,6 +8,11 @@ REGRESS_TARGETS= test-patterns CLEANFILES += patterns-tester +#LUA?= lua53 +.ifdef LUA +REGRESS_TARGETS+= test-patterns-lua +.endif + patterns-tester: patterns-tester.c patterns.c patterns.h ${CC} -o $@ ${CFLAGS} ${.CURDIR}/patterns-tester.c ${HTTPDSRC}/patterns.c -I${HTTPDSRC} @@ -17,4 +22,10 @@ test-patterns: patterns-tester test-patterns.out test-patterns.in ./patterns-tester "$${string}" "$${pattern}" 2>&1 || true; \ done | diff -I 'OpenBSD' -u ${.CURDIR}/test-patterns.out - +test-patterns-lua: patterns-tester.lua test-patterns-lua.out test-patterns.in + cat ${.CURDIR}/test-patterns.in | grep -v '^#' | \ + while IFS=' ' read string pattern comments ; do \ + ${LUA} ./patterns-tester.lua "$${string}" "$${pattern}" 2>&1 || true; \ + done | diff -I 'OpenBSD' -u ${.CURDIR}/test-patterns-lua.out - + .include diff --git a/regress/patterns/patterns-tester.lua b/regress/patterns/patterns-tester.lua new file mode 100644 index 0000000..15df0a7 --- /dev/null +++ b/regress/patterns/patterns-tester.lua @@ -0,0 +1,2 @@ +print(string.format("string='%s'\npattern='%s'", arg[1], arg[2])); +print(string.match(arg[1], arg[2])); diff --git a/regress/patterns/test-patterns-lua.out b/regress/patterns/test-patterns-lua.out new file mode 100644 index 0000000..cd9fe6a --- /dev/null +++ b/regress/patterns/test-patterns-lua.out @@ -0,0 +1,100 @@ +# $OpenBSD$ +string='/page/51' +pattern='^/(%a+)/(%d+)$' +page 51 +string='/Apage/51' +pattern='/[^%d][%w%u][^%c]+()[%d]+' +9 +string='/^page/51' +pattern='/^(.a.e)/(.)' +page 5 +string='/page/page-51' +pattern='/(.*)/%1-(%d+)' +page 51 +string='/page/[51]' +pattern='/page/(%b[])' +[51] +string=':-]' +pattern=']+' +] +string=':-)' +pattern='[)]+' +) +string='/page/51' +pattern='$^' +nil +string='1234567890' +pattern='([2-5]-)' + +string='****' +pattern='^**$' +**** +string='xxxx' +pattern='^x*$' +xxxx +string='/page/51' +pattern='no-%d-match' +nil +string='/page/page-51' +pattern='/(.*)/%9-(%d+)' +lua53: ./patterns-tester.lua:2: invalid capture index %9 +stack traceback: + [C]: in function 'string.match' + ./patterns-tester.lua:2: in main chunk + [C]: in ? +string=':-)' +pattern=')+' +lua53: ./patterns-tester.lua:2: invalid pattern capture +stack traceback: + [C]: in function 'string.match' + ./patterns-tester.lua:2: in main chunk + [C]: in ? +string='/page/51' +pattern='/page/51(' +lua53: ./patterns-tester.lua:2: unfinished capture +stack traceback: + [C]: in function 'string.match' + ./patterns-tester.lua:2: in main chunk + [C]: in ? +string='/page/51' +pattern='/page/51%' +lua53: ./patterns-tester.lua:2: malformed pattern (ends with '%') +stack traceback: + [C]: in function 'string.match' + ./patterns-tester.lua:2: in main chunk + [C]: in ? +string='/page/51' +pattern='/page/[51' +lua53: ./patterns-tester.lua:2: malformed pattern (missing ']') +stack traceback: + [C]: in function 'string.match' + ./patterns-tester.lua:2: in main chunk + [C]: in ? +string='/page/(51)' +pattern='/page/%b(' +lua53: ./patterns-tester.lua:2: malformed pattern (missing arguments to '%b') +stack traceback: + [C]: in function 'string.match' + ./patterns-tester.lua:2: in main chunk + [C]: in ? +string='/page/51' +pattern='()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()' +lua53: ./patterns-tester.lua:2: too many captures +stack traceback: + [C]: in function 'string.match' + ./patterns-tester.lua:2: in main chunk + [C]: in ? +string='/page/51' +pattern='/page/%f' +lua53: ./patterns-tester.lua:2: missing '[' after '%f' in pattern +stack traceback: + [C]: in function 'string.match' + ./patterns-tester.lua:2: in main chunk + [C]: in ? +string='/page/51' +pattern='/page%f/51' +lua53: ./patterns-tester.lua:2: missing '[' after '%f' in pattern +stack traceback: + [C]: in function 'string.match' + ./patterns-tester.lua:2: in main chunk + [C]: in ? -- cgit v1.2.3-54-g00ecf From cb9aff69125a32fd4327cce7079eb18273356d92 Mon Sep 17 00:00:00 2001 From: Sébastien Marie Date: Tue, 23 Jun 2015 10:50:15 +0200 Subject: add '+' to limited matchdepth - revert "normal" call to optimized-tail-call-recursion for '*', '?' and '-' - add a "pattern too complex" test to !singlematch() branch, so that it is limited on call numbers. --- httpd/patterns.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/httpd/patterns.c b/httpd/patterns.c index 177a5af..e6e1b1f 100644 --- a/httpd/patterns.c +++ b/httpd/patterns.c @@ -425,14 +425,16 @@ match(struct match_state *ms, const char *s, const char *p) /* does not match at least once? */ if (!singlematch(ms, s, p, ep)) { + if (ms->matchdepth-- == 0) { + match_error(ms, "pattern too complex"); + s = NULL; /* failed */ + } + /* accept empty? */ if (*ep == '*' || *ep == '?' || *ep == '-') { + p = ep + 1; /* return match(ms, s, ep + 1); */ - return match(ms, s, ep + 1); - /* - * p = ep + 1; - * goto init; - */ + goto init; } else { /* '+' or no suffix */ s = NULL; /* fail */ -- cgit v1.2.3-54-g00ecf From 3873b23c53de31074d294d127077e691662150c2 Mon Sep 17 00:00:00 2001 From: Sébastien Marie Date: Tue, 23 Jun 2015 12:03:53 +0200 Subject: add a new control for repetitor items - add a counter for limiting the search for repetitor items ('*', '+', '-' and '?') - add test case for this new kind of error --- httpd/patterns.c | 7 +++++++ httpd/patterns.h | 1 + regress/patterns/test-patterns.in | 4 ++++ regress/patterns/test-patterns.out | 12 ++++++++++++ 4 files changed, 24 insertions(+) diff --git a/httpd/patterns.c b/httpd/patterns.c index 1dbea15..62c8078 100644 --- a/httpd/patterns.c +++ b/httpd/patterns.c @@ -48,6 +48,7 @@ struct match_state { int matchdepth; /* control for recursive depth (to avoid C * stack overflow) */ + int repetitioncounter; /* control the repetition items */ int maxcaptures; /* configured capture limit */ const char *src_init; /* init of source string */ const char *src_end; /* end ('\0') of source string */ @@ -425,6 +426,11 @@ match(struct match_state *ms, const char *s, const char *p) /* does not match at least once? */ if (!singlematch(ms, s, p, ep)) { + if (ms->repetitioncounter-- == 0) { + match_error(ms, "max repetition items"); + s = NULL; /* fail */ + } else + /* accept empty? */ if (*ep == '*' || *ep == '?' || *ep == '-') { p = ep + 1; @@ -615,6 +621,7 @@ str_find_aux(struct match_state *ms, const char *pattern, const char *string, } ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1; ms->matchdepth = MAXCCALLS; + ms->repetitioncounter = MAXREPETITION; ms->src_init = s; ms->src_end = s + ls; ms->p_end = p + lp; diff --git a/httpd/patterns.h b/httpd/patterns.h index ddda0dd..6db5991 100644 --- a/httpd/patterns.h +++ b/httpd/patterns.h @@ -24,6 +24,7 @@ #define MAXCAPTURES 32 /* Max no. of allowed captures in pattern */ #define MAXCCALLS 200 /* Max recusion depth in pattern matching */ +#define MAXREPETITION 0xfffff /* Max for repetition items */ struct str_find { off_t sm_so; /* start offset of match */ diff --git a/regress/patterns/test-patterns.in b/regress/patterns/test-patterns.in index 46aa506..5abef1f 100644 --- a/regress/patterns/test-patterns.in +++ b/regress/patterns/test-patterns.in @@ -21,3 +21,7 @@ xxxx ^x*$ same as before /page/51 ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()() too many captures /page/51 /page/%f missing '[' after '%f' in pattern /page/51 /page%f/51 missing '[' after '%f' in pattern +q********************************* *************************************q max repetition items +q+++++++++++++++++++++++++++++++++ +++++++++++++++++++++++++++++++++++++q max repetition items +q--------------------------------- -------------------------------------q max repetition items +q????????????????????????????????? ?????????????????????????????????????q max repetition items diff --git a/regress/patterns/test-patterns.out b/regress/patterns/test-patterns.out index 1999cc1..aecb9f0 100644 --- a/regress/patterns/test-patterns.out +++ b/regress/patterns/test-patterns.out @@ -85,3 +85,15 @@ pattern='/page/%f' patterns-tester: str_match: missing '[' after '%f' in pattern string='/page/51' pattern='/page%f/51' +patterns-tester: str_match: max repetition items +string='q*********************************' +pattern='*************************************q' +patterns-tester: str_match: max repetition items +string='q+++++++++++++++++++++++++++++++++' +pattern='+++++++++++++++++++++++++++++++++++++q' +patterns-tester: str_match: max repetition items +string='q---------------------------------' +pattern='-------------------------------------q' +patterns-tester: str_match: max repetition items +string='q?????????????????????????????????' +pattern='?????????????????????????????????????q' -- cgit v1.2.3-54-g00ecf From f9ef46fb267f6cf5ec187b068c993615040b7a7b Mon Sep 17 00:00:00 2001 From: Reyk Floeter Date: Tue, 23 Jun 2015 15:29:12 +0200 Subject: Use strndup instead of calloc + memcpy. From Leandro Pereira (@lafp on twitter) --- httpd/patterns.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/httpd/patterns.c b/httpd/patterns.c index 08c1e27..61f6ab8 100644 --- a/httpd/patterns.c +++ b/httpd/patterns.c @@ -692,14 +692,12 @@ str_match(const char *string, const char *pattern, struct str_match *m, if (sm[i].sm_so > sm[i].sm_eo) continue; len = sm[i].sm_eo - sm[i].sm_so; - if ((m->sm_match[i] = calloc(1, - len + 1)) == NULL) { + if ((m->sm_match[i] = strndup(string + + sm[i].sm_so, len)) == NULL) { *errstr = strerror(errno); str_match_free(m); return (-1); } - (void)memcpy(m->sm_match[i], - string + sm[i].sm_so, len); } *errstr = NULL; -- cgit v1.2.3-54-g00ecf