/* -*- mode: c -*- * ======================================================================= * Copyright (c) 2000-2001 * Internet Initiative Japan Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgment: * * This product includes software developed by Internet * Initiative Japan Inc. for use in the mod_encoding module * for Apache. * * 4. Products derived from this software may not be called "mod_encoding" * nor may "mod_encoding" appear in their names without prior written * permission of Internet Initiative Japan Inc. For written permission, * please contact tai@iij.ad.jp (Taisuke Yamada). * * 5. Redistributions of any form whatsoever must retain the following * acknowledgment: * * This product includes software developed by Internet * Initiative Japan Inc. for use in the mod_encoding module * for Apache (http://www.apache.org/). * * THIS SOFTWARE IS PROVIDED BY INTERNET INITIATIVE JAPAN INC. ``AS IS'' * AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTERNET * INITIATIVE JAPAN INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * mod_encoding: This is a module to improve I18N filename * interoperability of mod_dav (and other HTTP-based protocols, maybe). * * It seems many WebDAV clients send filename in its platform-local * encoding. But since mod_dav expects everything, even HTTP request * line, to be in UTF-8, this causes an interoperability problem. * * I believe this is a future issue for specification (RFC?) to * standardize encoding used in HTTP request-line and HTTP header, * but life would be much easier if mod_dav can handle various * encodings sent by clients, TODAY. This module does just that. * * [Configuration] * * Here follows configuration example of this module. * * * EncodingEngine on * SetServerEncoding UTF-8 * * AddClientEncoding SJIS "Microsoft .* DAV" * AddClientEncoding SJIS "xdwin9x/" * AddClientEncoding EUC-JP "cadaver/" * * * The point is to register non-standard compilant clients so * this module can detect which client input (=HTTP header) to * check and convert encoding. You can use extended regexp to * name the client. * * [TODO] * * There're times when you want charset other than UTF-8 for * local filesystem. Since mod_dav hardcodes UTF-8 as expected * filesystem encoding, this is not an easy fix (though in * principle, all you need to do is to convert output of readdir(3)). * * Also, when adding this feature, this module can no longer go * outside mod_dav, raising cost to keep up with mod_dav... * * @author Taisuke Yamada * @version $Revision: 1.1 $ */ #include #include #include #include #include #include #include #ifndef MOD_ENCODING_DEBUG #ifdef DEBUG #define MOD_ENCODING_DEBUG 1 #else #define MOD_ENCODING_DEBUG 0 #endif #endif #define DBG(expr) if (MOD_ENCODING_DEBUG) { expr; } #define LOG(level, server, args...) \ ap_log_error(APLOG_MARK, APLOG_NOERRNO|level, server, ##args) /** * module-local information storage structure */ typedef struct { int enable_function; /* flag to enable this module */ char *server_encoding; array_header *client_encoding; } encoding_config; module MODULE_VAR_EXPORT encoding_module; /*************************************************************************** * utility methods ***************************************************************************/ /** * Converts encoding of the input string. * * @param p Memory pool of apache * @param cd Conversion descriptor, made by iconv_open(3). * @param srcbuf Input string * @param srclen Length of the input string. Usually strlen(srcbuf). */ static char * iconv_string(request_rec *r, iconv_t cd, char *srcbuf, size_t srclen) { char *outbuf, *marker; size_t outlen; if (srclen == 0) { LOG(APLOG_DEBUG, r->server, "iconv_string: skipping zero-length input"); return srcbuf; } /* Allocate space for conversion. Note max bloat factor is 4 of UCS-4 */ marker = outbuf = (char *)ap_palloc(r->pool, outlen = srclen * 4 + 1); if (outbuf == NULL) { LOG(APLOG_WARNING, r->server, "iconv_string: no more memory"); return srcbuf; } /* Convert every character within input string. */ while (srclen > 0) { if (iconv(cd, &srcbuf, &srclen, &outbuf, &outlen) == (size_t)(-1)) { LOG(APLOG_WARNING, r->server, "iconv_string: conversion error"); return srcbuf; } } /* Everything done. Flush buffer/state and return result */ iconv(cd, NULL, NULL, &outbuf, &outlen); iconv(cd, NULL, NULL, NULL, NULL); *outbuf = '\0'; return marker; } /** * Nomalize charset in HTTP request line and HTTP header(s). * * @param r Apache request object structure * @param cd Conversion descriptor, made by iconv_open(3). */ static void iconv_header(request_rec *r, iconv_t cd) { char *buff; char *keys[] = { "Destination", NULL }; int i; /* Normalize encoding in HTTP request line */ ap_unescape_url(r->unparsed_uri); buff = iconv_string(r, cd, r->unparsed_uri, strlen(r->unparsed_uri)); ap_parse_uri(r, buff); /* Normalize encoding in HTTP request header(s) */ for (i = 0 ; keys[i] ; i++) { if ((buff = (char *)ap_table_get(r->headers_in, keys[i])) != NULL) { ap_unescape_url(buff); buff = iconv_string(r, cd, buff, strlen(buff)); ap_table_set(r->headers_in, keys[i], buff); } } } /** * Return the encoding (defaults to "UTF-8") named client * is expected to send. */ static const char * get_client_encoding(request_rec *r, array_header *encmap, const char *lookup) { void **list = (void **)encmap->elts; int i; LOG(APLOG_DEBUG, r->server, "get_client_encoding: entered"); if (! lookup) return "UTF-8"; LOG(APLOG_DEBUG, r->server, "get_client_encoding: lookup == %s", lookup); for (i = 0 ; i < encmap->nelts ; i += 2) { LOG(APLOG_DEBUG, r->server, "get_client_encoding: list[%d] == %s", i, (char *)list[i]); if (ap_regexec((regex_t *)list[i + 1], lookup, 0, NULL, 0) == 0) { return (char *)list[i]; } } return "UTF-8"; } /** * Handler for "EncodingEngine" directive. */ static const char * set_encoding_engine(cmd_parms *parm, void *data, int flag) { encoding_config *conf; conf = ap_get_module_config(parm->server->module_config, &encoding_module); conf->enable_function = flag; return NULL; } /** * Handler for "SetServerEncoding" directive. */ static const char * set_server_encoding(cmd_parms *parm, void *data, char *arg) { encoding_config *conf; conf = ap_get_module_config(parm->server->module_config, &encoding_module); conf->server_encoding = ap_pstrdup(parm->pool, arg); return NULL; } /** * Handler for "AddClientEncoding" directive. * * This registers regex pattern of UserAgent: header and expected * encoding from that useragent. */ static const char * add_client_encoding(cmd_parms *parm, void *data, char *key, char *val) { encoding_config *conf; LOG(APLOG_DEBUG, parm->server, "add_client_encoding: entered"); LOG(APLOG_DEBUG, parm->server, "add_client_encoding: key == %s", key); LOG(APLOG_DEBUG, parm->server, "add_client_encoding: val == %s", val); conf = ap_get_module_config(parm->server->module_config, &encoding_module); *(void **)ap_push_array(conf->client_encoding) = ap_pstrdup(parm->pool, key); *(void **)ap_push_array(conf->client_encoding) = ap_pregcomp(parm->pool, val, REG_EXTENDED|REG_ICASE|REG_NOSUB); return NULL; } /*************************************************************************** * module-unique command table ***************************************************************************/ static const command_rec mod_enc_commands[] = { {"EncodingEngine", set_encoding_engine, NULL, OR_FILEINFO, FLAG, "Usage: EncodingEngine (on|off)"}, {"SetServerEncoding", set_server_encoding, NULL, OR_FILEINFO, TAKE1, "Usage: SetServerEncoding "}, {"AddClientEncoding", add_client_encoding, NULL, OR_FILEINFO, TAKE2, "Usage: AddClientEncoding "}, {NULL} }; /*************************************************************************** * module methods ***************************************************************************/ /** * Setup module internal data strcuture. */ static void * config_setup(pool *p, server_rec *s) { encoding_config *conf; conf = (encoding_config *)ap_pcalloc(p, sizeof(encoding_config)); conf->enable_function = 1; conf->server_encoding = "UTF-8"; conf->client_encoding = ap_make_array(p, 2, sizeof(void *)); return conf; } /** * Merge dirconfig. Currently does nothing. */ static void * config_merge(pool *p, encoding_config *base, encoding_config *override) { return override; } /** * Hooked handler for post-read request. * * Here, expected encoding by client/server is determined, and * whenever needed, client input will be converted to that of * server-side expected encoding. */ static int mod_enc_postread(request_rec *r) { encoding_config *conf; const char *oenc, *ienc; iconv_t cd; LOG(APLOG_DEBUG, r->server, "mod_enc_postread: entered"); conf = (encoding_config *) ap_get_module_config(r->server->module_config, &encoding_module); if (! conf->enable_function) { return DECLINED; } oenc = conf->server_encoding; ienc = get_client_encoding(r, conf->client_encoding, ap_table_get(r->headers_in, "User-Agent")); LOG(APLOG_DEBUG, r->server, "mod_enc_postread: ienc == %s", ienc); LOG(APLOG_DEBUG, r->server, "mod_enc_postread: oenc == %s", oenc); if (strcmp(ienc, oenc) == 0) { return DECLINED; } if ((cd = iconv_open(oenc, ienc)) != (iconv_t)(-1)) { iconv_header(r, cd); iconv_close(cd); } return DECLINED; } /*************************************************************************** * exported module structure ***************************************************************************/ module MODULE_VAR_EXPORT encoding_module = { STANDARD_MODULE_STUFF, NULL, /* initializer */ NULL, /* dir config */ NULL, /* dir config merger */ config_setup, /* server config */ config_merge, /* server config merger */ mod_enc_commands, /* command table */ NULL, /* handlers */ NULL, /* filename translation */ NULL, /* check_user_id */ NULL, /* check auth */ NULL, /* check access */ NULL, /* type_checker */ NULL, /* fixups */ NULL, /* logger */ NULL, /* header parser */ NULL, /* child_init */ NULL, /* child_exit */ mod_enc_postread, /* post read-request */ #ifdef EAPI NULL, /* EAPI: add_module */ NULL, /* EAPI: remove_module */ NULL, /* EAPI: rewrite_command */ NULL, /* EAPI: new_connection */ #endif };