/* -*- mode: c -*- * * $Id: mod_encoding.c,v 1.6 2001/12/11 12:55:38 tai Exp $ * */ /* * mod_encoding core module test implementation for Apache2. * by Kunio Miyamoto (wakatono@todo.gr.jp) * Original security fix port 2002/06/06 * by Kunio Miyamoto (wakatono@todo.gr.jp) * Port new function of 20020611a * by Kunio Miyamoto (wakatono@todo.gr.jp) * Port new function of 20020611a * by Kunio Miyamoto (wakatono@todo.gr.jp) */ #include #include #include #include #include #include /* #include */ #include /** * Core part of the module. * Here, the module hooks into filename translation stage, * and converts all non-ascii-or-utf8 expressions into * appropriate encoding speficied by the configuration. * * Note UTF-8 is the implicit default and will be tried first, * regardless of user configuration. */ #ifndef MOD_ENCODING_DEBUG #ifdef DEBUG #define MOD_ENCODING_DEBUG 1 #else #define MOD_ENCODING_DEBUG 0 #endif #endif #define DBG(expr) if (MOD_ENCODING_DEBUG) { expr; } /* FIXME: remove gcc-dependency */ #define ENABLE_FLAG_UNSET 0 #define ENABLE_FLAG_OFF 1 #define ENABLE_FLAG_ON 2 #define STRIP_FLAG_UNSET 0 #define STRIP_FLAG_OFF 1 #define STRIP_FLAG_ON 2 /** * module-local information storage structure */ typedef struct { int enable_function; /* flag to enable this module */ char *server_encoding; /* server-side filesystem encoding */ apr_array_header_t *client_encoding; /* useragent-to-encoding-list sets */ apr_array_header_t *default_encoding; /* useragent-to-encoding-list sets */ int strip_msaccount; /* normalize wierd WinXP username */ } encoding_config; module AP_MODULE_DECLARE_DATA encoding_module; /*************************************************************************** * utility methods ***************************************************************************/ /** * Converts encoding of the input string. * Returns NULL on error, else, appropriate string on success. * * @param p Memory pool of apache * @param cd Conversion descriptor, made by iconv_open(3). * @param srcbuf Input string * @param srclen Length of the input string. Usually strlen(srcbuf). */ static char * iconv_string(request_rec *r, iconv_t cd, char *srcbuf, size_t srclen) { char *outbuf, *marker; size_t outlen; if (srclen == 0) { return srcbuf; } /* Allocate space for conversion. Note max bloat factor is 4 of UCS-4 */ marker = outbuf = (char *)apr_palloc(r->pool, outlen = srclen * 4 + 1); if (outbuf == NULL) { return NULL; } /* Convert every character within input string. */ while (srclen > 0) { if (iconv(cd, &srcbuf, &srclen, &outbuf, &outlen) == (size_t)(-1)) { return NULL; } } #if 0 /* Commented out for now as some iconv seems to break with NULL */ /* Everything done. Flush buffer/state and return result */ iconv(cd, NULL, NULL, &outbuf, &outlen); iconv(cd, NULL, NULL, NULL, NULL); #endif *outbuf = '\0'; return marker; } /** * Nomalize charset in HTTP request line and HTTP header(s). * Returns 0 on success, -1 (non-zero) on error. * * FIXME: Should handle/consider partial success? * * @param r Apache request object structure * @param cd Conversion descriptor, made by iconv_open(3). */ static int iconv_header(request_rec *r, iconv_t cd) { char *buff; char *keys[] = { "Destination", NULL }; int i; /* Normalize encoding in HTTP request line */ ap_unescape_url(r->unparsed_uri); if ((buff = iconv_string(r, cd, r->unparsed_uri, strlen(r->unparsed_uri))) == NULL) return -1; ap_parse_uri(r, buff); ap_getparents(r->uri); /* normalize given path for security */ /* Normalize encoding in HTTP request header(s) */ for (i = 0 ; keys[i] ; i++) { if ((buff = (char *)apr_table_get(r->headers_in, keys[i])) != NULL) { ap_unescape_url(buff); if ((buff = iconv_string(r, cd, buff, strlen(buff))) == NULL) return -1; apr_table_set(r->headers_in, keys[i], buff); } } return 0; } /** * Return the list of encoding(s) (defaults to (list "UTF-8")) * which named client is expected to send. * * @param r Apache request object structure * @param encmap Table of UA-to-encoding(s) * @param lookup Name of the useragent to look for */ static apr_array_header_t * get_client_encoding(request_rec *r, apr_array_header_t *encmap, const char *lookup) { void **list = (void **)encmap->elts; apr_array_header_t *encs = apr_array_make(r->pool, 1, sizeof(char *)); int i; /* push UTF-8 as the first candidate of expected encoding */ *((char **)apr_array_push(encs)) = apr_pstrdup(r->pool, "UTF-8"); if (! lookup) return encs; for (i = 0 ; i < encmap->nelts ; i += 2) { if (ap_regexec((regex_t *)list[i], lookup, 0, NULL, 0) == 0) { apr_array_cat(encs, (apr_array_header_t *)list[i + 1]); return encs; } } return encs; } /** * Handler for "EncodingEngine" directive. */ static const char * set_encoding_engine(cmd_parms *cmd, encoding_config *conf, int flag) { if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } conf->enable_function = flag ? ENABLE_FLAG_ON : ENABLE_FLAG_OFF; return NULL; } /** * Handler for "SetServerEncoding" directive. */ static const char * set_server_encoding(cmd_parms *cmd, encoding_config *conf, char *arg) { if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } conf->server_encoding = apr_pstrdup(cmd->pool, arg); return NULL; } /** * Handler for "AddClientEncoding" directive. * * This registers regex pattern of UserAgent: header and expected * encoding(s) from that useragent. */ static const char * add_client_encoding(cmd_parms *cmd, encoding_config *conf, char *args) { apr_array_header_t *encs; char *arg; if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } encs = apr_array_make(cmd->pool, 1, sizeof(void *)); /* register useragent with UserAgent: pattern */ if (*args && (arg = ap_getword_conf_nc(cmd->pool, &args))) { *(void **)apr_array_push(conf->client_encoding) = ap_pregcomp(cmd->pool, arg, REG_EXTENDED|REG_ICASE|REG_NOSUB); } /* register list of possible encodings from above useragent */ while (*args && (arg = ap_getword_conf_nc(cmd->pool, &args))) { *(void **)apr_array_push(encs) = apr_pstrdup(cmd->pool, arg); } *(void **)apr_array_push(conf->client_encoding) = encs; return NULL; } /** * Handler for "DefaultClientEncoding" directive. * * This registers encodings that work as "fallback" for most clients. */ static const char * default_client_encoding(cmd_parms *cmd, encoding_config *conf, char *args) { char *arg; if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } conf->default_encoding = apr_array_make(cmd->pool, 1, sizeof(char *)); /* register list of possible encodings as a default */ while (*args && (arg = ap_getword_conf_nc(cmd->pool, &args))) { *(void **)apr_array_push(conf->default_encoding) = apr_pstrdup(cmd->pool, arg); } return NULL; } /** * Handler for "NormalizeUsername" directive. * * NOTE: This has nothing to do with encoding conversion. * So where should this go? */ static const char * set_normalize_username(cmd_parms *cmd, encoding_config *conf, int flag) { if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } conf->strip_msaccount = flag ? STRIP_FLAG_ON : STRIP_FLAG_OFF; return NULL; } /*************************************************************************** * module-unique command table * * FIXME: decide which scope to apply for following directives ***************************************************************************/ static const command_rec mod_enc_commands[] = { {"EncodingEngine", set_encoding_engine, NULL, OR_ALL, FLAG, "Usage: EncodingEngine (on|off)"}, {"SetServerEncoding", set_server_encoding, NULL, OR_ALL, TAKE1, "Usage: SetServerEncoding "}, {"AddClientEncoding", add_client_encoding, NULL, OR_ALL, RAW_ARGS, "Usage: AddClientEncoding [ ...]"}, {"DefaultClientEncoding", default_client_encoding, NULL, OR_ALL, RAW_ARGS, "Usage: DefaultClientEncoding "}, {"NormalizeUsername", set_normalize_username, NULL, OR_ALL, FLAG, "Usage: NormalizeUsername (on|off)"}, {NULL} }; /*************************************************************************** * module methods ***************************************************************************/ /** * Setup server-level module internal data strcuture. */ static void * server_setup(apr_pool_t *p, server_rec *s) { encoding_config *conf; DBG(fprintf(stderr, "server_setup: entered\n")); conf = (encoding_config *)apr_pcalloc(p, sizeof(encoding_config)); conf->enable_function = ENABLE_FLAG_UNSET; conf->server_encoding = NULL; conf->client_encoding = apr_array_make(p, 2, sizeof(void *)); conf->default_encoding = NULL; conf->strip_msaccount = STRIP_FLAG_UNSET; return conf; } /** * Setup folder-level module internal data strcuture. */ static void * folder_setup(apr_pool_t *p, char *dir) { DBG(fprintf(stderr, "folder_setup: entered\n")); return server_setup(p, NULL); } /** * Merge configuration. */ static void * config_merge(apr_pool_t *p, void *base, void *override) { encoding_config *parent = base; encoding_config *child = override; encoding_config *merge; DBG(fprintf(stderr, "config_merge: entered\n")); merge = (encoding_config *)apr_pcalloc(p, sizeof(encoding_config)); if (child->enable_function != ENABLE_FLAG_UNSET) merge->enable_function = child->enable_function; else merge->enable_function = parent->enable_function; DBG(fprintf(stderr, "merged: enable_function == %d\n", merge->enable_function)); if (child->strip_msaccount != STRIP_FLAG_UNSET) merge->strip_msaccount = child->strip_msaccount; else merge->strip_msaccount = parent->strip_msaccount; DBG(fprintf(stderr, "merged: strip_msaccount == %d\n", merge->strip_msaccount)); if (child->server_encoding) merge->server_encoding = child->server_encoding; else merge->server_encoding = parent->server_encoding; DBG(fprintf(stderr, "merged: server_encoding == %s\n", merge->server_encoding)); if (child->default_encoding) merge->default_encoding = child->default_encoding; else merge->default_encoding = parent->default_encoding; merge->client_encoding = apr_array_append(p, child->client_encoding, parent->client_encoding); return merge; } /** * Handler for encoding conversion. * * Here, expected encoding by client/server is determined, and * whenever needed, client input will be converted to that of * server-side expected encoding. */ static int mod_enc_convert(request_rec *r) { encoding_config *conf, *dconf, *sconf; const char *oenc; /* server-side encoding */ apr_array_header_t *ienc; /* list of possible encodings */ void **list; /* same as above (for iteration) */ iconv_t cd; /* conversion descriptor */ int i; sconf = ap_get_module_config(r->server->module_config, &encoding_module); dconf = ap_get_module_config(r->per_dir_config, &encoding_module); conf = config_merge(r->pool, sconf, dconf); if (conf->enable_function != ENABLE_FLAG_ON) { return DECLINED; } oenc = conf->server_encoding ? conf->server_encoding : "UTF-8"; ienc = get_client_encoding(r, conf->client_encoding, apr_table_get(r->headers_in, "User-Agent")); if (conf->default_encoding) apr_array_cat(ienc, conf->default_encoding); list = (void **)ienc->elts; /* try specified encodings in order */ for (i = 0 ; i < ienc->nelts ; i++) { /* pick appropriate converter module */ if ((cd = iconv_open(oenc, list[i])) == (iconv_t)(-1)) continue; /* conversion tryout */ if (iconv_header(r, cd) == 0) { iconv_close(cd); return DECLINED; } /* don't fail, but just continue on until list ends */ iconv_close(cd); } return DECLINED; } /** * Handler for HTTP header parsing. * * Here, username is normalized and wierd "hostname\" prepended * to username is removed (if configured to do so). */ static int mod_enc_parse(request_rec *r) { encoding_config *conf, *dconf, *sconf; const char *pass; char *user; char *buff; sconf = ap_get_module_config(r->server->module_config, &encoding_module); dconf = ap_get_module_config(r->per_dir_config, &encoding_module); conf = config_merge(r->pool, sconf, dconf); if (conf->enable_function != ENABLE_FLAG_ON) { return DECLINED; } /* Parse header and strip "hostname\" from username - for WinXP */ if (conf->strip_msaccount == STRIP_FLAG_ON) { /* Only basic auth is supported for now */ if (ap_get_basic_auth_pw(r, &pass) != OK) return DECLINED; /* Is this username broken? */ if ((user = index(r->user, '\\')) == NULL) return DECLINED; /* Re-generate authorization header */ if (*(user + 1)) { buff = ap_pbase64encode(r->pool, apr_psprintf(r->pool, "%s:%s", user + 1, pass)); apr_table_set(r->headers_in, "Authorization", apr_pstrcat(r->pool, "Basic ", buff, NULL)); ap_get_basic_auth_pw(r, &pass); /* update */ } } return DECLINED; } static void register_hooks(apr_pool_t *p) { /* filename-to-URI translation */ /* ap_hook_translate_name(mod_enc_convert,NULL,NULL,APR_HOOK_FIRST); */ ap_hook_post_read_request(mod_enc_convert,NULL,NULL,APR_HOOK_FIRST); ap_hook_header_parser(mod_enc_parse,NULL,NULL,APR_HOOK_FIRST); } /*************************************************************************** * exported module structure ***************************************************************************/ module AP_MODULE_DECLARE_DATA encoding_module = { STANDARD20_MODULE_STUFF, folder_setup, /* create per-directory config structure */ config_merge, /* merge per-directory(?) config str */ server_setup, /* create per-server config structure */ config_merge, /* merge per-server config ...*/ mod_enc_commands, /* command handlers */ register_hooks };