diff options
author | Yann Ylavic <ylavic@apache.org> | 2017-10-02 23:57:26 +0200 |
---|---|---|
committer | Yann Ylavic <ylavic@apache.org> | 2017-10-02 23:57:26 +0200 |
commit | 08ddf7ad0a257c1b27b5fdb8e32fcd40b4dbdad1 (patch) | |
tree | e851b15724aa86321e5aaae317ec8fa6bacd127a /server/util_expr_parse.y | |
parent | xforms (diff) | |
download | apache2-08ddf7ad0a257c1b27b5fdb8e32fcd40b4dbdad1.tar.xz apache2-08ddf7ad0a257c1b27b5fdb8e32fcd40b4dbdad1.zip |
ap_expr: open string expressions to the <word>.
Introduces the syntax "%{:<word>:}", borrowed from the <var>'s one, and which
likewise can be embedded anywhere in a string expression (the same reserved
character ':' gets reused in an unambiguous manner).
This allows the two types of expressions (boolean and string) to now share
fully the same language set, namely: strings, lists, vars, regexes, backrefs,
functions with multiple or complex arguments, and especially combinations
thereof.
Most of them were reserved to boolean expressions only, while complex string
constructions can also benefit to, well, strings. The <word> construct allows
that (say the syntax "%{:<word>:}" looks like a temporary variable constructed
in a string).
Since string expressions may now have to deal with lists (arrays), they also
need a way to produce/extract strings from list and vice versa. This can be
done with the new "join" and "split" operators, while the new substitution
regexes (like "s/<pattern>/<substitute>/<flags>") may be used to manipulate
strings in place. All this of course available for both string and boolean
expressions.
Tests and doc updates upcoming..
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1810605 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'server/util_expr_parse.y')
-rw-r--r-- | server/util_expr_parse.y | 155 |
1 files changed, 103 insertions, 52 deletions
diff --git a/server/util_expr_parse.y b/server/util_expr_parse.y index 9e02602139..ce1d6188df 100644 --- a/server/util_expr_parse.y +++ b/server/util_expr_parse.y @@ -48,10 +48,15 @@ %token <cpVal> T_DIGIT "number" %token <cpVal> T_ID "identifier" -%token <cpVal> T_STRING "cstring" -%token <cpVal> T_REGEX "regex" -%token <cpVal> T_REGEX_I "case-indendent regex" -%token <num> T_REGEX_BACKREF "regex back reference" +%token <cpVal> T_STRING "string" + +%token T_REGEX "match regex" +%token T_REGSUB "substitution regex" +%token <cpVal> T_REG_MATCH "match pattern of the regex" +%token <cpVal> T_REG_SUBST "substitution pattern of the regex" +%token <cpVal> T_REG_FLAGS "flags of the regex" +%token <num> T_REG_REF "regex back reference" + %token <cpVal> T_OP_UNARY "unary operator" %token <cpVal> T_OP_BINARY "binary operator" @@ -59,6 +64,8 @@ %token T_STR_END "end of string" %token T_VAR_BEGIN "start of variable name" %token T_VAR_END "end of variable name" +%token T_VAREXP_BEGIN "start of variable expression" +%token T_VAREXP_END "end of variable expression" %token T_OP_EQ "integer equal" %token T_OP_NE "integer not equal" @@ -75,8 +82,12 @@ %token T_OP_STR_LE "string less or equal" %token T_OP_STR_GT "string greater than" %token T_OP_STR_GE "string greater or equal" + %token T_OP_CONCAT "string concatenation" +%token T_OP_SPLIT "split operator" +%token T_OP_JOIN "join operator" + %token T_OP_OR "logical or" %token T_OP_AND "logical and" %token T_OP_NOT "logical not" @@ -86,18 +97,21 @@ %right T_OP_NOT %right T_OP_CONCAT -%type <exVal> expr -%type <exVal> comparison -%type <exVal> strfunccall "function" -%type <exVal> lstfunccall "listfunction" -%type <exVal> regex -%type <exVal> words -%type <exVal> wordlist -%type <exVal> word -%type <exVal> string -%type <exVal> strpart "stringpart" -%type <exVal> var "variable" -%type <exVal> backref "rebackref" +%type <exVal> cond "condition" +%type <exVal> comp "comparison" +%type <exVal> strfunc "string function" +%type <exVal> lstfunc "list function" +%type <exVal> wordlist "list of words" +%type <exVal> words "tuple of words" +%type <exVal> word "word expression" +%type <exVal> string "string expression" +%type <exVal> strany "any string expression" +%type <exVal> var "variable expression" +%type <exVal> regex "regular expression match" +%type <exVal> regsub "regular expression substitution" +%type <exVal> regsplit "regular expression split" +%type <exVal> regany "any regular expression" +%type <exVal> regref "regular expression back reference" %{ #include "util_expr_private.h" @@ -109,24 +123,24 @@ int ap_expr_yylex(YYSTYPE *lvalp, void *scanner); %% -root : T_EXPR_BOOL expr { ctx->expr = $2; } +root : T_EXPR_BOOL cond { ctx->expr = $2; } | T_EXPR_STRING string { ctx->expr = $2; } | T_ERROR { YYABORT; } ; -expr : T_TRUE { $$ = ap_expr_make(op_True, NULL, NULL, ctx); } +cond : T_TRUE { $$ = ap_expr_make(op_True, NULL, NULL, ctx); } | T_FALSE { $$ = ap_expr_make(op_False, NULL, NULL, ctx); } - | T_OP_NOT expr { $$ = ap_expr_make(op_Not, $2, NULL, ctx); } - | expr T_OP_OR expr { $$ = ap_expr_make(op_Or, $1, $3, ctx); } - | expr T_OP_AND expr { $$ = ap_expr_make(op_And, $1, $3, ctx); } - | comparison { $$ = ap_expr_make(op_Comp, $1, NULL, ctx); } + | T_OP_NOT cond { $$ = ap_expr_make(op_Not, $2, NULL, ctx); } + | cond T_OP_OR cond { $$ = ap_expr_make(op_Or, $1, $3, ctx); } + | cond T_OP_AND cond { $$ = ap_expr_make(op_And, $1, $3, ctx); } + | comp { $$ = ap_expr_make(op_Comp, $1, NULL, ctx); } | T_OP_UNARY word { $$ = ap_expr_unary_op_make( $1, $2, ctx); } | word T_OP_BINARY word { $$ = ap_expr_binary_op_make($2, $1, $3, ctx); } - | '(' expr ')' { $$ = $2; } + | '(' cond ')' { $$ = $2; } | T_ERROR { YYABORT; } ; -comparison: word T_OP_EQ word { $$ = ap_expr_make(op_EQ, $1, $3, ctx); } +comp : word T_OP_EQ word { $$ = ap_expr_make(op_EQ, $1, $3, ctx); } | word T_OP_NE word { $$ = ap_expr_make(op_NE, $1, $3, ctx); } | word T_OP_LT word { $$ = ap_expr_make(op_LT, $1, $3, ctx); } | word T_OP_LE word { $$ = ap_expr_make(op_LE, $1, $3, ctx); } @@ -143,70 +157,107 @@ comparison: word T_OP_EQ word { $$ = ap_expr_make(op_EQ, $1, $3, | word T_OP_NRE regex { $$ = ap_expr_make(op_NRE, $1, $3, ctx); } ; -wordlist : lstfunccall { $$ = $1; } +wordlist : lstfunc { $$ = $1; } + | word T_OP_REG regsplit { $$ = ap_expr_list_regex_make($1, $3, ctx); } + | wordlist T_OP_REG regany { $$ = ap_expr_list_regex_make($1, $3, ctx); } | '{' words '}' { $$ = $2; } + | '(' wordlist ')' { $$ = $2; } ; words : word { $$ = ap_expr_make(op_ListElement, $1, NULL, ctx); } - | words ',' word { $$ = ap_expr_make(op_ListElement, $3, $1, ctx); } + | word ',' words { $$ = ap_expr_make(op_ListElement, $1, $3, ctx); } ; -string : string strpart { $$ = ap_expr_make(op_Concat, $1, $2, ctx); } - | strpart { $$ = $1; } +string : strany { $$ = $1; } + | string strany { $$ = ap_expr_concat_make($1, $2, ctx); } | T_ERROR { YYABORT; } ; -strpart : T_STRING { $$ = ap_expr_make(op_String, $1, NULL, ctx); } +strany : T_STRING { $$ = ap_expr_make(op_String, $1, NULL, ctx); } | var { $$ = $1; } - | backref { $$ = $1; } + | regref { $$ = $1; } ; var : T_VAR_BEGIN T_ID T_VAR_END { $$ = ap_expr_var_make($2, ctx); } | T_VAR_BEGIN T_ID ':' string T_VAR_END { $$ = ap_expr_str_func_make($2, $4, ctx); } + | T_VAREXP_BEGIN word T_VAREXP_END { $$ = ap_expr_str_word_make($2, ctx); } + | T_VAREXP_BEGIN cond T_VAREXP_END { $$ = ap_expr_str_bool_make($2, ctx); } ; word : T_DIGIT { $$ = ap_expr_make(op_Digit, $1, NULL, ctx); } + | T_STR_BEGIN T_STR_END { $$ = ap_expr_make(op_String, "", NULL, ctx); } + | T_STR_BEGIN string T_STR_END { $$ = $2; } | word T_OP_CONCAT word { $$ = ap_expr_make(op_Concat, $1, $3, ctx); } + | word T_OP_REG regsub { $$ = ap_expr_make(op_Regsub, $1, $3, ctx); } | var { $$ = $1; } - | backref { $$ = $1; } - | strfunccall { $$ = $1; } - | T_STR_BEGIN string T_STR_END { $$ = $2; } - | T_STR_BEGIN T_STR_END { $$ = ap_expr_make(op_String, "", NULL, ctx); } + | regref { $$ = $1; } + | strfunc { $$ = $1; } + | T_OP_JOIN wordlist { + $$ = ap_expr_make(op_Join, $2, NULL, ctx); + } + | T_OP_JOIN wordlist ',' word { + $$ = ap_expr_make(op_Join, $2, $4, ctx); + } + | T_OP_JOIN '(' wordlist ',' word ')' { + $$ = ap_expr_make(op_Join, $3, $5, ctx); + } + | '(' word ')' { $$ = $2; } ; -regex : T_REGEX { - ap_regex_t *regex; - if ((regex = ap_pregcomp(ctx->pool, $1, - AP_REG_EXTENDED|AP_REG_NOSUB)) == NULL) { +regex : T_REGEX T_REG_MATCH T_REG_FLAGS { + ap_expr_t *e = ap_expr_regex_make($2, $3, NULL, 0, ctx); + if (!e) { ctx->error = "Failed to compile regular expression"; YYERROR; } - $$ = ap_expr_make(op_Regex, regex, NULL, ctx); + $$ = e; } - | T_REGEX_I { - ap_regex_t *regex; - if ((regex = ap_pregcomp(ctx->pool, $1, - AP_REG_EXTENDED|AP_REG_NOSUB|AP_REG_ICASE)) == NULL) { + ; +regsub : T_REGSUB T_REG_MATCH string T_REG_FLAGS { + ap_expr_t *e = ap_expr_regex_make($2, $4, $3, 0, ctx); + if (!e) { + ctx->error = "Failed to compile regular expression"; + YYERROR; + } + $$ = e; + } + ; +regsplit : T_OP_SPLIT T_REG_MATCH string T_REG_FLAGS { + /* Returns a list: + * <word> ~= split/:// + * => split around ':', replace it with empty + * <word> ~= split/:/\n/ + * => split around ':', replace it with '\n' + * <list> ~= split/.*?Ip Address:([^,]+)/$1/ + * => split around the whole match, replace it with $1 + */ + ap_expr_t *e = ap_expr_regex_make($2, $4, $3, 1, ctx); + if (!e) { ctx->error = "Failed to compile regular expression"; YYERROR; } - $$ = ap_expr_make(op_Regex, regex, NULL, ctx); + $$ = e; } ; +regany : regex { $$ = $1; } + | regsub { $$ = $1; } + | regsplit { $$ = $1; } + ; -backref : T_REGEX_BACKREF { +regref : T_REG_REF { int *n = apr_palloc(ctx->pool, sizeof(int)); *n = $1; - $$ = ap_expr_make(op_RegexBackref, n, NULL, ctx); + $$ = ap_expr_make(op_Regref, n, NULL, ctx); } - ; + ; -lstfunccall : T_ID '(' word ')' { $$ = ap_expr_list_func_make($1, $3, ctx); } - ; +lstfunc : T_ID '(' word ')' { $$ = ap_expr_list_func_make($1, $3, ctx); } + /* | T_ID '(' words ')' { $$ = ap_expr_list_func_make($1, $3, ctx); } */ + ; -strfunccall : T_ID '(' word ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } - | T_ID '(' words ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } - ; +strfunc : T_ID '(' word ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } + | T_ID '(' words ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } + ; %% |