summaryrefslogtreecommitdiffstats
path: root/modules/metadata/mod_cern_meta.c
blob: f107936f2a2d0455bcfbfe0d99215f5218c4a79a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
/* Copyright 2000-2004 Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * mod_cern_meta.c
 * version 0.1.0
 * status beta
 * 
 * Andrew Wilson <Andrew.Wilson@cm.cf.ac.uk> 25.Jan.96
 *
 * *** IMPORTANT ***
 * This version of mod_cern_meta.c controls Meta File behaviour on a
 * per-directory basis.  Previous versions of the module defined behaviour
 * on a per-server basis.  The upshot is that you'll need to revisit your 
 * configuration files in order to make use of the new module.
 * ***
 *
 * Emulate the CERN HTTPD Meta file semantics.  Meta files are HTTP
 * headers that can be output in addition to the normal range of
 * headers for each file accessed.  They appear rather like the Apache
 * .asis files, and are able to provide a crude way of influencing
 * the Expires: header, as well as providing other curiosities.
 * There are many ways to manage meta information, this one was
 * chosen because there is already a large number of CERN users
 * who can exploit this module.  It should be noted that there are probably
 * more sensitive ways of managing the Expires: header specifically.
 *
 * The module obeys the following directives, which can appear 
 * in the server's .conf files and in .htaccess files.
 *
 *  MetaFiles <on|off> 
 *
 *    turns on|off meta file processing for any directory.  
 *    Default value is off
 *
 *        # turn on MetaFiles in this directory
 *        MetaFiles on
 *
 *  MetaDir <directory name>
 *      
 *    specifies the name of the directory in which Apache can find
 *    meta information files.  The directory is usually a 'hidden'
 *    subdirectory of the directory that contains the file being
 *    accessed.  eg:
 *
 *        # .meta files are in the *same* directory as the 
 *        # file being accessed
 *        MetaDir .
 *
 *    the default is to look in a '.web' subdirectory. This is the
 *    same as for CERN 3.+ webservers and behaviour is the same as 
 *    for the directive:
 *
 *        MetaDir .web
 *
 *  MetaSuffix <meta file suffix>
 *
 *    specifies the file name suffix for the file containing the
 *    meta information.  eg:
 *
 *       # our meta files are suffixed with '.cern_meta'
 *       MetaSuffix .cern_meta
 *
 *    the default is to look for files with the suffix '.meta'.  This
 *    behaviour is the same as for the directive:
 *
 *       MetaSuffix .meta
 *
 * When accessing the file
 *
 *   DOCUMENT_ROOT/somedir/index.html
 *
 * this module will look for the file
 *
 *   DOCUMENT_ROOT/somedir/.web/index.html.meta
 *
 * and will use its contents to generate additional MIME header 
 * information.
 *
 * For more information on the CERN Meta file semantics see:
 *
 *   http://www.w3.org/hypertext/WWW/Daemon/User/Config/General.html#MetaDir
 *
 * Change-log:
 * 29.Jan.96 pfopen/pfclose instead of fopen/fclose
 *           DECLINE when real file not found, we may be checking each
 *           of the index.html/index.shtml/index.htm variants and don't
 *           need to report missing ones as spurious errors. 
 * 31.Jan.96 log_error reports about a malformed .meta file, rather
 *           than a script error.
 * 20.Jun.96 MetaFiles <on|off> default off, added, so that module
 *           can be configured per-directory.  Prior to this the module
 *           was running for each request anywhere on the server, naughty..
 * 29.Jun.96 All directives made per-directory.
 */

#include "apr.h"
#include "apr_strings.h"

#define APR_WANT_STRFUNC
#include "apr_want.h"

#if APR_HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif

#include "ap_config.h"
#include "httpd.h"
#include "http_config.h"
#include "util_script.h"
#include "http_log.h"
#include "http_request.h"
#include "http_protocol.h"
#include "apr_lib.h"

#define DIR_CMD_PERMS OR_INDEXES

#define DEFAULT_METADIR		".web"
#define DEFAULT_METASUFFIX	".meta"
#define DEFAULT_METAFILES	0

module AP_MODULE_DECLARE_DATA cern_meta_module;

typedef struct {
    const char *metadir;
    const char *metasuffix;
    int metafiles;
} cern_meta_dir_config;

static void *create_cern_meta_dir_config(apr_pool_t *p, char *dummy)
{
    cern_meta_dir_config *new =
    (cern_meta_dir_config *) apr_palloc(p, sizeof(cern_meta_dir_config));

    new->metadir = NULL;
    new->metasuffix = NULL;
    new->metafiles = DEFAULT_METAFILES;

    return new;
}

static void *merge_cern_meta_dir_configs(apr_pool_t *p, void *basev, void *addv)
{
    cern_meta_dir_config *base = (cern_meta_dir_config *) basev;
    cern_meta_dir_config *add = (cern_meta_dir_config *) addv;
    cern_meta_dir_config *new =
    (cern_meta_dir_config *) apr_palloc(p, sizeof(cern_meta_dir_config));

    new->metadir = add->metadir ? add->metadir : base->metadir;
    new->metasuffix = add->metasuffix ? add->metasuffix : base->metasuffix;
    new->metafiles = add->metafiles;

    return new;
}

static const char *set_metadir(cmd_parms *parms, void *in_dconf, const char *arg)
{
    cern_meta_dir_config *dconf = in_dconf;

    dconf->metadir = arg;
    return NULL;
}

static const char *set_metasuffix(cmd_parms *parms, void *in_dconf, const char *arg)
{
    cern_meta_dir_config *dconf = in_dconf;

    dconf->metasuffix = arg;
    return NULL;
}

static const char *set_metafiles(cmd_parms *parms, void *in_dconf, int arg)
{
    cern_meta_dir_config *dconf = in_dconf;

    dconf->metafiles = arg;
    return NULL;
}


static const command_rec cern_meta_cmds[] =
{
    AP_INIT_FLAG("MetaFiles", set_metafiles, NULL, DIR_CMD_PERMS,
                 "Limited to 'on' or 'off'"),
    AP_INIT_TAKE1("MetaDir", set_metadir, NULL, DIR_CMD_PERMS,
                  "the name of the directory containing meta files"),
    AP_INIT_TAKE1("MetaSuffix", set_metasuffix, NULL, DIR_CMD_PERMS,
                  "the filename suffix for meta files"),
    {NULL}
};

/* XXX: this is very similar to ap_scan_script_header_err_core...
 * are the differences deliberate, or just a result of bit rot?
 */
static int scan_meta_file(request_rec *r, apr_file_t *f)
{
    char w[MAX_STRING_LEN];
    char *l;
    int p;
    apr_table_t *tmp_headers;

    tmp_headers = apr_table_make(r->pool, 5);
    while (apr_file_gets(w, MAX_STRING_LEN - 1, f) == APR_SUCCESS) {

	/* Delete terminal (CR?)LF */

	p = strlen(w);
	if (p > 0 && w[p - 1] == '\n') {
	    if (p > 1 && w[p - 2] == '\015')
		w[p - 2] = '\0';
	    else
		w[p - 1] = '\0';
	}

	if (w[0] == '\0') {
	    return OK;
	}

	/* if we see a bogus header don't ignore it. Shout and scream */

	if (!(l = strchr(w, ':'))) {
 	    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
			"malformed header in meta file: %s", r->filename);
	    return HTTP_INTERNAL_SERVER_ERROR;
	}

	*l++ = '\0';
	while (*l && apr_isspace(*l))
	    ++l;

	if (!strcasecmp(w, "Content-type")) {
	    char *tmp;
	    /* Nuke trailing whitespace */

	    char *endp = l + strlen(l) - 1;
	    while (endp > l && apr_isspace(*endp))
		*endp-- = '\0';

	    tmp = apr_pstrdup(r->pool, l);
	    ap_content_type_tolower(tmp);
	    ap_set_content_type(r, tmp);
	}
	else if (!strcasecmp(w, "Status")) {
	    sscanf(l, "%d", &r->status);
	    r->status_line = apr_pstrdup(r->pool, l);
	}
	else {
	    apr_table_set(tmp_headers, w, l);
	}
    }
    apr_table_overlap(r->headers_out, tmp_headers, APR_OVERLAP_TABLES_SET);
    return OK;
}

static int add_cern_meta_data(request_rec *r)
{
    char *metafilename;
    char *leading_slash;
    char *last_slash;
    char *real_file;
    char *scrap_book;
    apr_file_t *f = NULL;
    apr_status_t retcode;
    cern_meta_dir_config *dconf;
    int rv;
    request_rec *rr;

    dconf = ap_get_module_config(r->per_dir_config, &cern_meta_module);

    if (!dconf->metafiles) {
	return DECLINED;
    };

    /* if ./.web/$1.meta exists then output 'asis' */

    if (r->finfo.filetype == 0) {
	return DECLINED;
    };

    /* is this a directory? */
    if (r->finfo.filetype == APR_DIR || r->uri[strlen(r->uri) - 1] == '/') {
	return DECLINED;
    };

    /* what directory is this file in? */
    scrap_book = apr_pstrdup(r->pool, r->filename);

    leading_slash = strchr(scrap_book, '/');
    last_slash = strrchr(scrap_book, '/');
    if ((last_slash != NULL) && (last_slash != leading_slash)) {
	/* skip over last slash */
	real_file = last_slash;
	real_file++;
	*last_slash = '\0';
    }
    else {
	/* no last slash, buh?! */
	ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
		    "internal error in mod_cern_meta: %s", r->filename);
	/* should really barf, but hey, let's be friends... */
	return DECLINED;
    };

    metafilename = apr_pstrcat(r->pool, scrap_book, "/",
			   dconf->metadir ? dconf->metadir : DEFAULT_METADIR,
			   "/", real_file,
		 dconf->metasuffix ? dconf->metasuffix : DEFAULT_METASUFFIX,
			   NULL);

    /* It sucks to require this subrequest to complete, because this
     * means people must leave their meta files accessible to the world.
     * A better solution might be a "safe open" feature of pfopen to avoid
     * pipes, symlinks, and crap like that.
     *
     * In fact, this doesn't suck.  Because <Location > blocks are never run
     * against sub_req_lookup_file, the meta can be somewhat protected by
     * either masking it with a <Location > directive or alias, or stowing
     * the file outside of the web document tree, while providing the
     * appropriate directory blocks to allow access to it as a file.
     */
    rr = ap_sub_req_lookup_file(metafilename, r, NULL);
    if (rr->status != HTTP_OK) {
	ap_destroy_sub_req(rr);
	return DECLINED;
    }
    ap_destroy_sub_req(rr);

    retcode = apr_file_open(&f, metafilename, APR_READ, APR_OS_DEFAULT, r->pool);
    if (retcode != APR_SUCCESS) {
	if (APR_STATUS_IS_ENOENT(retcode)) {
	    return DECLINED;
	}
	ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
	      "meta file permissions deny server access: %s", metafilename);
	return HTTP_FORBIDDEN;
    };

    /* read the headers in */
    rv = scan_meta_file(r, f);
    apr_file_close(f);

    return rv;
}

static void register_hooks(apr_pool_t *p)
{
    ap_hook_fixups(add_cern_meta_data,NULL,NULL,APR_HOOK_MIDDLE);
}

module AP_MODULE_DECLARE_DATA cern_meta_module =
{
    STANDARD20_MODULE_STUFF,
    create_cern_meta_dir_config,/* dir config creater */
    merge_cern_meta_dir_configs,/* dir merger --- default is to override */
    NULL,			/* server config */
    NULL,			/* merge server configs */
    cern_meta_cmds,		/* command apr_table_t */
    register_hooks		/* register hooks */
};