diff options
author | Yann Ylavic <ylavic@apache.org> | 2017-10-04 18:18:46 +0200 |
---|---|---|
committer | Yann Ylavic <ylavic@apache.org> | 2017-10-04 18:18:46 +0200 |
commit | e172ca33e2ab5f97b08ec96bc6d138ba021a3f02 (patch) | |
tree | 3348b47782f97bd4189a66410aed3681d52177b7 | |
parent | On the trunk: (diff) | |
download | apache2-e172ca33e2ab5f97b08ec96bc6d138ba021a3f02.tar.xz apache2-e172ca33e2ab5f97b08ec96bc6d138ba021a3f02.zip |
ap_expr: follow up to r1810605.
The "split" and "join" operators are now a prefix, ala perl.
Add the "sub" operator for string substitutions, prefix still.
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1811104 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | server/util_expr_eval.c | 113 | ||||
-rw-r--r-- | server/util_expr_parse.y | 159 | ||||
-rw-r--r-- | server/util_expr_private.h | 21 | ||||
-rw-r--r-- | server/util_expr_scan.l | 56 |
4 files changed, 127 insertions, 222 deletions
diff --git a/server/util_expr_eval.c b/server/util_expr_eval.c index 3898566e53..ab36dbd0a9 100644 --- a/server/util_expr_eval.c +++ b/server/util_expr_eval.c @@ -67,7 +67,7 @@ static const char *ap_expr_eval_var(ap_expr_eval_ctx_t *ctx, const void *data); typedef struct { - int type, flags; + int flags; const ap_expr_t *subst; } ap_expr_regctx_t; @@ -228,13 +228,13 @@ static const char *ap_expr_eval_word(ap_expr_eval_ctx_t *ctx, result = ap_expr_list_pstrcat(ctx->p, list, sep); break; } - case op_Regsub: { + case op_Sub: { const ap_expr_t *reg = node->node_arg2; const char *subject = ap_expr_eval_word(ctx, node->node_arg1); result = ap_expr_regexec(subject, reg, NULL, ctx); break; } - case op_Regref: { + case op_Backref: { const unsigned int *np = node->node_arg1; result = ap_expr_eval_re_backref(ctx, *np); break; @@ -327,7 +327,7 @@ static const char *ap_expr_regexec(const char *subject, nmatch = ctx->re_nmatch; pmatch = ctx->re_pmatch; } - else if (regctx->type != 'm') { + else if (regctx->subst) { nmatch = 1; pmatch = &match0; } @@ -339,17 +339,7 @@ static const char *ap_expr_regexec(const char *subject, */ rv = ap_regexec(regex, val, nmatch, pmatch, empty ? AP_REG_ANCHORED | AP_REG_NOTEMPTY : 0); - if (regctx->type == 'm') { - /* Simple match "m//", just return whether it matched (subject) - * or not (NULL) - */ - return (rv == 0) ? subject : NULL; - } if (rv == 0) { - /* Substitution "s//" or split "S//" matched. - * s// => replace $0 with evaluated regctx->subst - * S// => split at $0 (keeping evaluated regctx->subst if any) - */ int pos = pmatch[0].rm_so, end = pmatch[0].rm_eo; AP_DEBUG_ASSERT(pos >= 0 && pos <= end); @@ -359,16 +349,13 @@ static const char *ap_expr_regexec(const char *subject, str = ap_expr_eval_word(ctx, regctx->subst); len = strlen(str); } - /* Splitting makes sense into a given list only, if NULL we fall - * back into returning a s// string... - */ if (list) { char *tmp = apr_palloc(ctx->p, pos + len + 1); memcpy(tmp, val, pos); memcpy(tmp + pos, str, len + 1); APR_ARRAY_PUSH(list, const char*) = tmp; } - else { /* regctx->type == 's' */ + else { ap_varbuf_grow(&vb, pos + len + 1); ap_varbuf_strmemcat(&vb, val, pos); ap_varbuf_strmemcat(&vb, str, len); @@ -383,7 +370,7 @@ static const char *ap_expr_regexec(const char *subject, val += end; } else if (empty) { - /* Skip this non-matching character (or CRLF) and restart + /* Skip this non-matching character (or full CRLF) and restart * another "normal" match (possibly empty) from there. */ if (val[0] == '\r' && val[1] == '\n') { @@ -416,25 +403,16 @@ static apr_array_header_t *ap_expr_list_make(ap_expr_eval_ctx_t *ctx, { apr_array_header_t *list = NULL; - if (node->node_op == op_ListRegex) { + if (node->node_op == op_Split) { const ap_expr_t *arg = node->node_arg1; const ap_expr_t *reg = node->node_arg2; - const ap_expr_regctx_t *regctx = reg->node_arg2; const apr_array_header_t *source = ap_expr_list_make(ctx, arg); int i; list = apr_array_make(ctx->p, source->nelts, sizeof(const char*)); for (i = 0; i < source->nelts; ++i) { const char *val = APR_ARRAY_IDX(source, i, const char*); - if (regctx->type == 'S') { - (void)ap_expr_regexec(val, reg, list, ctx); - } - else { - val = ap_expr_regexec(val, reg, NULL, ctx); - if (val) { - APR_ARRAY_PUSH(list, const char*) = val; - } - } + (void)ap_expr_regexec(val, reg, list, ctx); } } else if (node->node_op == op_ListElement) { @@ -461,10 +439,8 @@ static apr_array_header_t *ap_expr_list_make(ap_expr_eval_ctx_t *ctx, ap_expr_eval_word(ctx, node->node_arg2)); } else { - const char *subject = ap_expr_eval_word(ctx, node); - - list = apr_array_make(ctx->p, 8, sizeof(const char*)); - (void)ap_expr_regexec(subject, node->node_arg2, list, ctx); + list = apr_array_make(ctx->p, 1, sizeof(const char*)); + APR_ARRAY_PUSH(list, const char*) = ap_expr_eval_word(ctx, node); } return list; @@ -698,37 +674,15 @@ ap_expr_t *ap_expr_concat_make(const void *a1, const void *a2, return ap_expr_make(op_Concat, a1, a2, ctx); } -ap_expr_t *ap_expr_str_word_make(const ap_expr_t *arg, - ap_expr_parse_ctx_t *ctx) -{ - ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t)); - node->node_op = op_Word; - node->node_arg1 = arg; - node->node_arg2 = NULL; - return node; -} - -ap_expr_t *ap_expr_str_bool_make(const ap_expr_t *arg, - ap_expr_parse_ctx_t *ctx) -{ - ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t)); - node->node_op = op_Bool; - node->node_arg1 = arg; - node->node_arg2 = NULL; - return node; -} - -ap_expr_t *ap_expr_regex_make(const char *pattern, const char *flags, - const ap_expr_t *subst, int split, - ap_expr_parse_ctx_t *ctx) +ap_expr_t *ap_expr_regex_make(const char *pattern, const ap_expr_t *subst, + const char *flags, ap_expr_parse_ctx_t *ctx) { ap_expr_t *node = NULL; ap_expr_regctx_t *regctx; ap_regex_t *regex; - regctx = apr_palloc(ctx->pool, sizeof *regctx); + regctx = apr_pcalloc(ctx->pool, sizeof *regctx); regctx->subst = subst; - regctx->flags = 0; if (flags) { for (; *flags; ++flags) { switch (*flags) { @@ -747,19 +701,6 @@ ap_expr_t *ap_expr_regex_make(const char *pattern, const char *flags, } } } - if (subst) { - if (split) { - regctx->type = 'S'; - regctx->flags |= AP_REG_MULTI; - } - else { - regctx->type = 's'; - } - } - else { - regctx->type = 'm'; - } - regex = ap_pregcomp(ctx->pool, pattern, regctx->flags); if (!regex) { return NULL; @@ -832,16 +773,6 @@ ap_expr_t *ap_expr_list_func_make(const char *name, const ap_expr_t *arg, return ap_expr_make(op_ListFuncCall, info, arg, ctx); } -ap_expr_t *ap_expr_list_regex_make(const ap_expr_t *arg, const ap_expr_t *reg, - ap_expr_parse_ctx_t *ctx) -{ - ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t)); - node->node_op = op_ListRegex; - node->node_arg1 = arg; - node->node_arg2 = reg; - return node; -} - ap_expr_t *ap_expr_unary_op_make(const char *name, const ap_expr_t *arg, ap_expr_parse_ctx_t *ctx) { @@ -878,6 +809,12 @@ ap_expr_t *ap_expr_var_make(const char *name, ap_expr_parse_ctx_t *ctx) return node; } +ap_expr_t *ap_expr_backref_make(int num, ap_expr_parse_ctx_t *ctx) +{ + int *n = apr_pmemdup(ctx->pool, &num, sizeof(num)); + return ap_expr_make(op_Backref, n, NULL, ctx); +} + #ifdef AP_EXPR_DEBUG #define MARK APLOG_MARK,loglevel,0,s @@ -965,13 +902,13 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s, case op_NRE: case op_Word: case op_Bool: + case op_Sub: case op_Join: - case op_Regsub: + case op_Split: case op_Concat: case op_StringFuncCall: case op_ListFuncCall: case op_ListElement: - case op_ListRegex: { char *name; switch (e->node_op) { @@ -996,13 +933,13 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s, CASE_OP(op_NRE); CASE_OP(op_Word); CASE_OP(op_Bool); + CASE_OP(op_Sub); CASE_OP(op_Join); - CASE_OP(op_Regsub); + CASE_OP(op_Split); CASE_OP(op_Concat); CASE_OP(op_StringFuncCall); CASE_OP(op_ListFuncCall); CASE_OP(op_ListElement); - CASE_OP(op_ListRegex); default: ap_assert(0); } @@ -1048,8 +985,8 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s, DUMP_P("op_Regex", e->node_arg1); break; /* arg1: pointer to int */ - case op_Regref: - DUMP_IP("op_Regref", e->node_arg1); + case op_Backref: + DUMP_IP("op_Backref", e->node_arg1); break; default: ap_log_error(MARK, "%*sERROR: INVALID OP %d", indent, " ", e->node_op); diff --git a/server/util_expr_parse.y b/server/util_expr_parse.y index 84eb054e6f..b6d586c614 100644 --- a/server/util_expr_parse.y +++ b/server/util_expr_parse.y @@ -50,12 +50,12 @@ %token <cpVal> T_ID "identifier" %token <cpVal> T_STRING "string literal" -%token T_REGEX "matching regex" -%token T_REGSUB "substitution regex" +%token T_REGEX "start of matching regex" +%token T_REGSUB "start of substitution regex" %token <cpVal> T_REG_MATCH "pattern of the regex" -%token <cpVal> T_REG_SUBST "replacement of the regex" +%token <cpVal> T_REG_SUBST "substitution of the regex" %token <cpVal> T_REG_FLAGS "pattern flags of the regex" -%token <num> T_REG_REF "capture reference in the regex" +%token <num> T_BACKREF "regex back reference" %token <cpVal> T_OP_UNARY "unary operator" %token <cpVal> T_OP_BINARY "binary operator" @@ -85,33 +85,35 @@ %token T_OP_CONCAT "string concatenation" -%token T_OP_SPLIT "split operator" %token T_OP_JOIN "join operator" +%token T_OP_SPLIT "split operator" +%token T_OP_SUB "substitute operator" %token T_OP_OR "logical or" %token T_OP_AND "logical and" %token T_OP_NOT "logical not" -%right T_OP_OR -%right T_OP_AND +%left T_OP_OR +%left T_OP_AND %right T_OP_NOT %right T_OP_CONCAT %type <exVal> cond "condition" %type <exVal> comp "comparison" %type <exVal> strfunc "string function" -%type <exVal> lstfunc "list function" -%type <exVal> wordlist "list of words" -%type <exVal> words "tuple of words" +%type <exVal> listfunc "list function" +%type <exVal> list "list" +%type <exVal> words "words" %type <exVal> word "word" %type <exVal> string "string" %type <exVal> substr "substring" %type <exVal> var "variable" -%type <exVal> regex "regex match" -%type <exVal> regsub "regex substitution" -%type <exVal> regsplit "regex split" -%type <exVal> regany "regex any" -%type <exVal> regref "regex capture reference" +%type <exVal> regex "match regex" +%type <exVal> regsub "substitution regex" +%type <exVal> regany "any regex" +%type <exVal> split "split" +%type <exVal> join "join" +%type <exVal> sub "sub" %{ #include "util_expr_private.h" @@ -123,8 +125,13 @@ int ap_expr_yylex(YYSTYPE *lvalp, void *scanner); %% -root : T_EXPR_BOOL cond { ctx->expr = $2; } - | T_EXPR_STRING string { ctx->expr = $2; } +expr : T_EXPR_STRING string { ctx->expr = $2; } + | T_EXPR_BOOL cond { ctx->expr = $2; } + | T_ERROR { YYABORT; } + ; + +string : substr { $$ = $1; } + | string substr { $$ = ap_expr_concat_make($1, $2, ctx); } | T_ERROR { YYABORT; } ; @@ -152,60 +159,69 @@ comp : word T_OP_EQ word { $$ = ap_expr_make(op_EQ, $1, $3, | word T_OP_STR_LE word { $$ = ap_expr_make(op_STR_LE, $1, $3, ctx); } | word T_OP_STR_GT word { $$ = ap_expr_make(op_STR_GT, $1, $3, ctx); } | word T_OP_STR_GE word { $$ = ap_expr_make(op_STR_GE, $1, $3, ctx); } - | word T_OP_IN wordlist { $$ = ap_expr_make(op_IN, $1, $3, ctx); } | word T_OP_REG regex { $$ = ap_expr_make(op_REG, $1, $3, ctx); } | word T_OP_NRE regex { $$ = ap_expr_make(op_NRE, $1, $3, ctx); } + | word T_OP_IN list { $$ = ap_expr_make(op_IN, $1, $3, ctx); } ; -wordlist : lstfunc { $$ = $1; } - | word T_OP_REG regsplit { $$ = ap_expr_list_regex_make($1, $3, ctx); } - | wordlist T_OP_REG regany { $$ = ap_expr_list_regex_make($1, $3, ctx); } - | '{' words '}' { $$ = $2; } - | '(' wordlist ')' { $$ = $2; } - ; - -words : word { $$ = ap_expr_make(op_ListElement, $1, NULL, ctx); } - | word ',' words { $$ = ap_expr_make(op_ListElement, $1, $3, ctx); } +word : T_DIGIT { $$ = ap_expr_make(op_Digit, $1, NULL, ctx); } + | T_STR_BEGIN T_STR_END { $$ = ap_expr_make(op_String, "", NULL, ctx); } + | T_STR_BEGIN string T_STR_END { $$ = $2; } + | word T_OP_CONCAT word { $$ = ap_expr_make(op_Concat, $1, $3, ctx); } + | var { $$ = $1; } + | sub { $$ = $1; } + | join { $$ = $1; } + | strfunc { $$ = $1; } + | '(' word ')' { $$ = $2; } ; -string : substr { $$ = $1; } - | string substr { $$ = ap_expr_concat_make($1, $2, ctx); } - | T_ERROR { YYABORT; } +list : split { $$ = $1; } + | listfunc { $$ = $1; } + | '{' words '}' { $$ = $2; } + | '(' list ')' { $$ = $2; } ; substr : T_STRING { $$ = ap_expr_make(op_String, $1, NULL, ctx); } | var { $$ = $1; } - | regref { $$ = $1; } ; var : T_VAR_BEGIN T_ID T_VAR_END { $$ = ap_expr_var_make($2, ctx); } | T_VAR_BEGIN T_ID ':' string T_VAR_END { $$ = ap_expr_str_func_make($2, $4, ctx); } - | T_VAREXP_BEGIN word T_VAREXP_END { $$ = ap_expr_str_word_make($2, ctx); } - | T_VAREXP_BEGIN cond T_VAREXP_END { $$ = ap_expr_str_bool_make($2, ctx); } + | T_VAREXP_BEGIN cond T_VAREXP_END { $$ = ap_expr_make(op_Bool, $2, NULL, ctx); } + | T_VAREXP_BEGIN word T_VAREXP_END { $$ = ap_expr_make(op_Word, $2, NULL, ctx); } + | T_BACKREF { $$ = ap_expr_backref_make($1, ctx); } ; -word : T_DIGIT { $$ = ap_expr_make(op_Digit, $1, NULL, ctx); } - | T_STR_BEGIN T_STR_END { $$ = ap_expr_make(op_String, "", NULL, ctx); } - | T_STR_BEGIN string T_STR_END { $$ = $2; } - | word T_OP_CONCAT word { $$ = ap_expr_make(op_Concat, $1, $3, ctx); } - | word T_OP_REG regsub { $$ = ap_expr_make(op_Regsub, $1, $3, ctx); } - | var { $$ = $1; } - | regref { $$ = $1; } - | strfunc { $$ = $1; } - | T_OP_JOIN wordlist { - $$ = ap_expr_make(op_Join, $2, NULL, ctx); - } - | T_OP_JOIN wordlist ',' word { - $$ = ap_expr_make(op_Join, $2, $4, ctx); - } - | T_OP_JOIN '(' wordlist ',' word ')' { - $$ = ap_expr_make(op_Join, $3, $5, ctx); - } - | '(' word ')' { $$ = $2; } +strfunc : T_ID '(' word ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } + | T_ID '(' words ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } + ; + +listfunc : T_ID '(' word ')' { $$ = ap_expr_list_func_make($1, $3, ctx); } + /* | T_ID '(' words ')' { $$ = ap_expr_list_func_make($1, $3, ctx); } */ + ; + +sub : T_OP_SUB regsub ',' word { $$ = ap_expr_make(op_Sub, $4, $2, ctx); } + | T_OP_SUB '(' regsub ',' word ')' { $$ = ap_expr_make(op_Sub, $5, $3, ctx); } + ; + +join : T_OP_JOIN list { $$ = ap_expr_make(op_Join, $2, NULL, ctx); } + | T_OP_JOIN '(' list ')' { $$ = ap_expr_make(op_Join, $3, NULL, ctx); } + | T_OP_JOIN list ',' word { $$ = ap_expr_make(op_Join, $2, $4, ctx); } + | T_OP_JOIN '(' list ',' word ')' { $$ = ap_expr_make(op_Join, $3, $5, ctx); } + ; + +split : T_OP_SPLIT regany ',' list { $$ = ap_expr_make(op_Split, $4, $2, ctx); } + | T_OP_SPLIT '(' regany ',' list ')' { $$ = ap_expr_make(op_Split, $5, $3, ctx); } + | T_OP_SPLIT regany ',' word { $$ = ap_expr_make(op_Split, $4, $2, ctx); } + | T_OP_SPLIT '(' regany ',' word ')' { $$ = ap_expr_make(op_Split, $5, $3, ctx); } + ; + +words : word { $$ = ap_expr_make(op_ListElement, $1, NULL, ctx); } + | word ',' words { $$ = ap_expr_make(op_ListElement, $1, $3, ctx); } ; regex : T_REGEX T_REG_MATCH T_REG_FLAGS { - ap_expr_t *e = ap_expr_regex_make($2, $3, NULL, 0, ctx); + ap_expr_t *e = ap_expr_regex_make($2, NULL, $3, ctx); if (!e) { ctx->error = "Failed to compile regular expression"; YYERROR; @@ -214,7 +230,7 @@ regex : T_REGEX T_REG_MATCH T_REG_FLAGS { } ; regsub : T_REGSUB T_REG_MATCH string T_REG_FLAGS { - ap_expr_t *e = ap_expr_regex_make($2, $4, $3, 0, ctx); + ap_expr_t *e = ap_expr_regex_make($2, $3, $4, ctx); if (!e) { ctx->error = "Failed to compile regular expression"; YYERROR; @@ -222,41 +238,8 @@ regsub : T_REGSUB T_REG_MATCH string T_REG_FLAGS { $$ = e; } ; -regsplit : T_OP_SPLIT T_REG_MATCH string T_REG_FLAGS { - /* Returns a list: - * <word> ~= split/:// - * => split around ':', replace it with empty - * <word> ~= split/:/\n/ - * => split around ':', replace it with '\n' - * <list> ~= split/.*?Ip Address:([^,]+)/$1/ - * => split around the whole match, replace it with $1 - */ - ap_expr_t *e = ap_expr_regex_make($2, $4, $3, 1, ctx); - if (!e) { - ctx->error = "Failed to compile regular expression"; - YYERROR; - } - $$ = e; - } - ; -regany : regex { $$ = $1; } - | regsub { $$ = $1; } - | regsplit { $$ = $1; } - ; - -regref : T_REG_REF { - int *n = apr_palloc(ctx->pool, sizeof(int)); - *n = $1; - $$ = ap_expr_make(op_Regref, n, NULL, ctx); - } - ; - -lstfunc : T_ID '(' word ')' { $$ = ap_expr_list_func_make($1, $3, ctx); } - /* | T_ID '(' words ')' { $$ = ap_expr_list_func_make($1, $3, ctx); } */ - ; - -strfunc : T_ID '(' word ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } - | T_ID '(' words ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } +regany : regex { $$ = $1; } + | regsub { $$ = $1; } ; %% diff --git a/server/util_expr_private.h b/server/util_expr_private.h index b0fcacefef..b81cdb9450 100644 --- a/server/util_expr_private.h +++ b/server/util_expr_private.h @@ -54,10 +54,10 @@ typedef enum { op_REG, op_NRE, op_STR_EQ, op_STR_NE, op_STR_LT, op_STR_LE, op_STR_GT, op_STR_GE, op_Concat, - op_Digit, op_String, - op_Var, op_Word, op_Bool, op_Join, - op_Regex, op_Regsub, op_Regref, - op_ListElement, op_ListRegex, + op_String, op_Word, + op_Digit, op_Var, op_Bool, op_ListElement, + op_Sub, op_Split, op_Join, + op_Regex, op_Backref, /* * call external functions/operators. * The info node contains the function pointer and some function specific @@ -130,23 +130,18 @@ ap_expr_t *ap_expr_make(ap_expr_node_op_e op, const void *arg1, const void *arg2, ap_expr_parse_ctx_t *ctx); ap_expr_t *ap_expr_concat_make(const void *a1, const void *a2, ap_expr_parse_ctx_t *ctx); -ap_expr_t *ap_expr_str_word_make(const ap_expr_t *arg, - ap_expr_parse_ctx_t *ctx); -ap_expr_t *ap_expr_str_bool_make(const ap_expr_t *arg, - ap_expr_parse_ctx_t *ctx); -ap_expr_t *ap_expr_regex_make(const char *pattern, const char *flags, - const ap_expr_t *subst, int split, - ap_expr_parse_ctx_t *ctx); +ap_expr_t *ap_expr_regex_make(const char *pattern, const ap_expr_t *subst, + const char *flags, ap_expr_parse_ctx_t *ctx); /* create parse tree node for the string-returning function 'name' */ ap_expr_t *ap_expr_str_func_make(const char *name, const ap_expr_t *arg, ap_expr_parse_ctx_t *ctx); /* create parse tree node for the list-returning function 'name' */ ap_expr_t *ap_expr_list_func_make(const char *name, const ap_expr_t *arg, ap_expr_parse_ctx_t *ctx); -ap_expr_t *ap_expr_list_regex_make(const ap_expr_t *lst, const ap_expr_t *re, - ap_expr_parse_ctx_t *ctx); /* create parse tree node for the variable 'name' */ ap_expr_t *ap_expr_var_make(const char *name, ap_expr_parse_ctx_t *ctx); +/* create parse tree node for the back reference 'num' */ +ap_expr_t *ap_expr_backref_make(int num, ap_expr_parse_ctx_t *ctx); /* create parse tree node for the unary operator 'name' */ ap_expr_t *ap_expr_unary_op_make(const char *name, const ap_expr_t *arg, ap_expr_parse_ctx_t *ctx); diff --git a/server/util_expr_scan.l b/server/util_expr_scan.l index a7d5e99267..5bc5a019fb 100644 --- a/server/util_expr_scan.l +++ b/server/util_expr_scan.l @@ -37,7 +37,7 @@ %x str expr %x var vararg -%x split regex regsub regflags +%x regex regsub regflags %{ #include "util_expr_private.h" @@ -174,7 +174,7 @@ VAR_END (\}) VAREXP_BEGIN (\%\{\:) VAREXP_END (\:\}) REG_SEP [/#$%^|?!'",;:._-] -REG_REF (\$[0-9]) +BACKREF (\$[0-9]) %% @@ -228,14 +228,14 @@ REG_REF (\$[0-9]) } /* regexp backref inside string/arg */ -<str,vararg,regsub>{REG_REF} { +<str,vararg,regsub>{BACKREF} { if (!STR_EMPTY()) { yyless(0); /* come back below */ yylval->cpVal = STR_RETURN(); return T_STRING; } yylval->num = yytext[1] - '0'; - return T_REG_REF; + return T_BACKREF; } /* variable inside string/arg */ @@ -358,6 +358,7 @@ REG_REF (\$[0-9]) } <regex>{ANY} { if (yytext[0] == str_stop) { + yylval->cpVal = STR_RETURN(); STATE_POP(0); /* <regex> */ if (str_flag == 'm') { STATE_PUSH(regflags, 0); @@ -365,33 +366,30 @@ REG_REF (\$[0-9]) else { STATE_PUSH(regsub, 0); } - yylval->cpVal = STR_RETURN(); return T_REG_MATCH; } STR_APPEND_CHECK(yytext[0], 1); } <regsub>{ANY} { if (yytext[0] == str_stop) { + yylval->cpVal = STR_RETURN(); STATE_POP(0); /* <regsub> */ STATE_PUSH(regflags, 0); + return T_STRING; } - else { - STR_APPEND_CHECK(yytext[0], 1); - } + STR_APPEND_CHECK(yytext[0], 1); } <regflags>{ANY} { - if (ap_strchr_c("ismg", yytext[0])) { - STR_APPEND_NOCHECK(yytext[0]); - } - else if (apr_isalnum(yytext[0])) { - PERROR("Invalid regexp flag(s)"); - } - else { + if (!ap_strchr_c("ismg", yytext[0])) { + if (apr_isalnum(yytext[0])) { + PERROR("Invalid regexp flag(s)"); + } yyless(0); /* not a flags, rewind */ yylval->cpVal = STR_RETURN(); STATE_POP(1); /* <regflags> */ return T_REG_FLAGS; } + STR_APPEND_NOCHECK(yytext[0]); } <regflags><<EOF>> { yylval->cpVal = STR_RETURN(); @@ -402,9 +400,9 @@ REG_REF (\$[0-9]) PERROR("Unterminated regexp"); } -<expr>{REG_REF} { +<expr>{BACKREF} { yylval->num = yytext[1] - '0'; - return T_REG_REF; + return T_BACKREF; } /* @@ -452,22 +450,9 @@ REG_REF (\$[0-9]) return T_OP_UNARY; } - /* Split a string (or list) into a(nother) list */ -<expr>"split" { - STATE_PUSH(split, 0); - return T_OP_SPLIT; -} -<split>{REG_SEP} { - STATE_POP(0); /* <split> */ - STATE_PUSH(regex, 1); - str_stop = yytext[0]; - str_flag = 'S'; -} -<split>{ANY} { - PERROR("Expecting split regular expression"); -} -<split><<EOF>> { - PERROR("Unterminated split"); + /* Apply subtitution to a string */ +<expr>"sub" { + return T_OP_SUB; } /* Join a list into a string */ @@ -475,6 +460,11 @@ REG_REF (\$[0-9]) return T_OP_JOIN; } + /* Split a string (or list) into a(nother) list */ +<expr>"split" { + return T_OP_SPLIT; +} + /* * Specials */ |