diff options
author | William A. Rowe Jr <wrowe@apache.org> | 2016-12-09 20:06:06 +0100 |
---|---|---|
committer | William A. Rowe Jr <wrowe@apache.org> | 2016-12-09 20:06:06 +0100 |
commit | 12cfcf08fffc6e4ec597e0396016d09afdb89fa8 (patch) | |
tree | 66e9f56e55c5e6747d416aae98c153ba7f759cfa /server/util_pcre.c | |
parent | ProxyPass ! doesn't block per-directory ProxyPass (diff) | |
download | apache2-12cfcf08fffc6e4ec597e0396016d09afdb89fa8.tar.xz apache2-12cfcf08fffc6e4ec597e0396016d09afdb89fa8.zip |
Replace PCRE with PCRE2 where it is available.
This patch removes the needless assignment of re_erroffset in the conf pool
by the worker threads; such mistakes break the shared copy-on-write pages of
memory that should have remained common between all httpd worker processes.
Two de-optimizations are inherent in this patch, the former ovector-on-stack
opportunity is lost unless implemented as a new general context. Safer that
we either create a new general context using pool allocation, or recycle a
per pool or per thread match_data buffer of some arbitrary 10 elts or so.
Submitted by: wrowe, Petr Pisar <ppisar@redhat.com>
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1773454 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'server/util_pcre.c')
-rw-r--r-- | server/util_pcre.c | 164 |
1 files changed, 119 insertions, 45 deletions
diff --git a/server/util_pcre.c b/server/util_pcre.c index 3e4fbb9444..9c83e203a7 100644 --- a/server/util_pcre.c +++ b/server/util_pcre.c @@ -46,10 +46,18 @@ POSSIBILITY OF SUCH DAMAGE. #include "httpd.h" #include "apr_strings.h" #include "apr_tables.h" + +#ifdef HAVE_PCRE2 +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" +#define PCREn(x) PCRE2_ ## x +#else #include "pcre.h" +#define PCREn(x) PCRE_ ## x +#endif /* PCRE_DUPNAMES is only present since version 6.7 of PCRE */ -#ifndef PCRE_DUPNAMES +#if !defined(PCRE_DUPNAMES) && !defined(HAVE_PCRE2) #error PCRE Version 6.7 or later required! #else @@ -74,11 +82,19 @@ static const char *const pstring[] = { AP_DECLARE(const char *) ap_pcre_version_string(int which) { +#ifdef HAVE_PCRE2 + static char buf[80]; +#endif switch (which) { case AP_REG_PCRE_COMPILED: - return APR_STRINGIFY(PCRE_MAJOR) "." APR_STRINGIFY(PCRE_MINOR) " " APR_STRINGIFY(PCRE_DATE); + return APR_STRINGIFY(PCREn(MAJOR)) "." APR_STRINGIFY(PCREn(MINOR)) " " APR_STRINGIFY(PCREn(DATE)); case AP_REG_PCRE_LOADED: +#ifdef HAVE_PCRE2 + pcre2_config(PCRE2_CONFIG_VERSION, buf); + return buf; +#else return pcre_version(); +#endif default: return "Unknown"; } @@ -118,7 +134,11 @@ AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg, AP_DECLARE(void) ap_regfree(ap_regex_t *preg) { +#ifdef HAVE_PCRE2 + pcre2_code_free(preg->re_pcre); +#else (pcre_free)(preg->re_pcre); +#endif } @@ -139,34 +159,48 @@ AP_DECLARE(void) ap_regfree(ap_regex_t *preg) */ AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags) { +#ifdef HAVE_PCRE2 + uint32_t capcount; + size_t erroffset; +#else const char *errorptr; int erroffset; +#endif int errcode = 0; - int options = PCRE_DUPNAMES; + int options = PCREn(DUPNAMES); if ((cflags & AP_REG_ICASE) != 0) - options |= PCRE_CASELESS; + options |= PCREn(CASELESS); if ((cflags & AP_REG_NEWLINE) != 0) - options |= PCRE_MULTILINE; + options |= PCREn(MULTILINE); if ((cflags & AP_REG_DOTALL) != 0) - options |= PCRE_DOTALL; + options |= PCREn(DOTALL); - preg->re_pcre = - pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL); - preg->re_erroffset = erroffset; +#ifdef HAVE_PCRE2 + preg->re_pcre = pcre2_compile((const unsigned char *)pattern, + PCRE2_ZERO_TERMINATED, options, &errcode, + &erroffset, NULL); +#else + preg->re_pcre = pcre_compile2(pattern, options, &errcode, + &errorptr, &erroffset, NULL); +#endif + preg->re_erroffset = erroffset; if (preg->re_pcre == NULL) { - /* - * There doesn't seem to be constants defined for compile time error - * codes. 21 is "failed to get memory" according to pcreapi(3). - */ + /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */ if (errcode == 21) return AP_REG_ESPACE; return AP_REG_INVARG; } +#ifdef HAVE_PCRE2 + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_CAPTURECOUNT, &capcount); + preg->re_nsub = capcount; +#else pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub)); + PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub)); +#endif return 0; } @@ -198,17 +232,33 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff, { int rc; int options = 0; - int *ovector = NULL; +#ifdef HAVE_PCRE2 + pcre2_match_data *matchdata; + size_t *ovector; +#else int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; int allocated_ovector = 0; + int *ovector = NULL; +#endif if ((eflags & AP_REG_NOTBOL) != 0) - options |= PCRE_NOTBOL; + options |= PCREn(NOTBOL); if ((eflags & AP_REG_NOTEOL) != 0) - options |= PCRE_NOTEOL; - - ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */ - + options |= PCREn(NOTEOL); + +#ifdef HAVE_PCRE2 + /* TODO: create a generic TLS matchdata buffer of some nmatch limit, + * e.g. 10 matches, to avoid a malloc-per-call. If it must be alloced, + * implement a general context using palloc and no free implementation. + */ + matchdata = pcre2_match_data_create(nmatch, NULL); + if (matchdata == NULL) + return AP_REG_ESPACE; + ovector = pcre2_get_ovector_pointer(matchdata); + rc = pcre2_match((const pcre2_code *)preg->re_pcre, + (const unsigned char *)buff, len, + 0, options, matchdata, NULL); +#else if (nmatch > 0) { if (nmatch <= POSIX_MALLOC_THRESHOLD) { ovector = &(small_ovector[0]); @@ -220,52 +270,64 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff, allocated_ovector = 1; } } - rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len, 0, options, ovector, nmatch * 3); +#endif if (rc == 0) rc = nmatch; /* All captured slots were filled in */ if (rc >= 0) { apr_size_t i; - for (i = 0; i < (apr_size_t)rc; i++) { + apr_size_t nlim = (apr_size_t)rc < nmatch ? (apr_size_t)rc : nmatch; + for (i = 0; i < nlim; i++) { pmatch[i].rm_so = ovector[i * 2]; pmatch[i].rm_eo = ovector[i * 2 + 1]; } - if (allocated_ovector) - free(ovector); for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; - return 0; } +#ifdef HAVE_PCRE2 + pcre2_match_data_free(matchdata); +#else + if (allocated_ovector) + free(ovector); +#endif + + if (rc >= 0) { + return 0; + } else { - if (allocated_ovector) - free(ovector); +#ifdef HAVE_PCRE2 + if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21) + return AP_REG_INVARG; +#endif switch (rc) { - case PCRE_ERROR_NOMATCH: + case PCREn(ERROR_NOMATCH): return AP_REG_NOMATCH; - case PCRE_ERROR_NULL: + case PCREn(ERROR_NULL): return AP_REG_INVARG; - case PCRE_ERROR_BADOPTION: + case PCREn(ERROR_BADOPTION): return AP_REG_INVARG; - case PCRE_ERROR_BADMAGIC: + case PCREn(ERROR_BADMAGIC): return AP_REG_INVARG; - case PCRE_ERROR_UNKNOWN_NODE: - return AP_REG_ASSERT; - case PCRE_ERROR_NOMEMORY: + case PCREn(ERROR_NOMEMORY): return AP_REG_ESPACE; -#ifdef PCRE_ERROR_MATCHLIMIT - case PCRE_ERROR_MATCHLIMIT: +#if defined(HAVE_PCRE2) || defined(PCRE_ERROR_MATCHLIMIT) + case PCREn(ERROR_MATCHLIMIT): return AP_REG_ESPACE; #endif -#ifdef PCRE_ERROR_BADUTF8 - case PCRE_ERROR_BADUTF8: +#if defined(PCRE_ERROR_UNKNOWN_NODE) + case PCRE_ERROR_UNKNOWN_NODE: + return AP_REG_ASSERT; +#endif +#if defined(PCRE_ERROR_BADUTF8) + case PCREn(ERROR_BADUTF8): return AP_REG_INVARG; #endif -#ifdef PCRE_ERROR_BADUTF8_OFFSET - case PCRE_ERROR_BADUTF8_OFFSET: +#if defined(PCRE_ERROR_BADUTF8_OFFSET) + case PCREn(ERROR_BADUTF8_OFFSET): return AP_REG_INVARG; #endif default: @@ -278,17 +340,29 @@ AP_DECLARE(int) ap_regname(const ap_regex_t *preg, apr_array_header_t *names, const char *prefix, int upper) { + char *nametable; + +#ifdef HAVE_PCRE2 + uint32_t namecount; + uint32_t nameentrysize; + uint32_t i; + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMECOUNT, &namecount); + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize); + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMETABLE, &nametable); +#else int namecount; int nameentrysize; int i; - char *nametable; - pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_NAMECOUNT, &namecount); + PCRE_INFO_NAMECOUNT, &namecount); pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); + PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_NAMETABLE, &nametable); + PCRE_INFO_NAMETABLE, &nametable); +#endif for (i = 0; i < namecount; i++) { const char *offset = nametable + i * nameentrysize; |