mod_trimxml

ビルド方法

$ /usr/sbin/apxs -c mod_trimxml.c

$ sudo /usr/sbin/apxs -i -A mod_trimxml.la

設定

Filter として TRIMXML を設定します。

.htaccess の設定例

# LoadModule trimxml_module modules/mod_trimxml.so

<IfModule mod_trimxml.c>
    AddOutputFilterByType TRIMXML text/html

#   TrimXmlUseBuiltinRule On    # default: On
</IfModule>
TrimXmlUseBuiltinRule

モジュール組み込みのブラウザ判定ルーチンを使うかどうかの設定です。MSIE 2〜6 の場合にXML宣言を削除します。たいした判別ルーチンではないので期待しないでください。

デフォルトで On です。

TrimXmlConditonalEnv

ここで指定された環境変数が設定されていて 0 ではない場合に,XML宣言を削除します。複数指定できます(or になります)。BrowserMatch とかで設定した環境変数と絡めて使うのがいいかもしんない。

TrimXmlUseBuiltinRuleOff の場合にしか働きません。

BrowserMatch MSIE  msie
BrowserMatch Opera !msie
BrowserMatch Macintosh mac

TrimXmlUseBuiltinRule Off
TrimXmlConditionalEnv msie mac

とすると,Opera じゃない MSIE か,Mac からのアクセスの場合に,<?xml?>宣言を削除します。

ソース

/* mod_trimxml.c */
#include "apr_lib.h"
#include "apr_strings.h"
#include "ap_config.h"
#include "httpd.h"
#include "http_config.h"
#include "http_protocol.h"
#include "http_log.h"
#include "http_main.h"
#include "http_request.h"

#include "mod_core.h"

#define MATCHING_TOKEN      "<?xml"
#define MATCHING_TOKEN_LEN  5

module AP_MODULE_DECLARE_DATA trimxml_module;

typedef struct {
    int                 use_builtin_rule;
    apr_array_header_t  *conditional_env;
} trimxml_config_rec;

enum trimxml_stage_t {
    STAGE_START         = 0,
    STAGE_SKIP_SPACE    = STAGE_START,
    STAGE_WANT_TOKEN,
    STAGE_SKIP_TOKEN,
    STAGE_SKIP_CRLF,
    STAGE_DONE          = -1
};

typedef struct {
    int stage;
    int wanted_len;
    apr_bucket_brigade *bb1;    /* heading whitespaces */
    apr_bucket_brigade *bb2;    /* token for judgement */
} trimxml_ctx_t;

#define APR_BUCKET_MOVE_TO_BRIGADE(e, b)    \
    do {                                    \
        APR_BUCKET_REMOVE((e));             \
        APR_BRIGADE_INSERT_TAIL((b), (e));  \
    } while (0)

static int builtin_rule(request_rec *r)
{
    const char *str;
    char *p;

    str = apr_table_get(r->headers_in, "User-Agent");
    if (! str)
        return 0;

    p = strstr(str, "MSIE ");
    if (! p)
        return 0;

    p += 5; /* version */
    if (! (*p >= '2') && (*p <= '6'))
        return 0;

    return ! strstr(str, "Opera");
}

static int conditional_env_rule(request_rec *r, trimxml_config_rec *conf)
{
    int num_envs;
    char **envs_ptr;

    if (! conf->conditional_env)
        return 0;

    envs_ptr = (char **) conf->conditional_env->elts;
    num_envs = conf->conditional_env->nelts;
    for (; num_envs > 0; envs_ptr ++, num_envs --) {
        char       *env_ptr = *envs_ptr;
        const char *env_val = apr_table_get(r->subprocess_env, env_ptr);

        if (! env_val)
            continue;

        if (strcmp(env_val, "0"))
            return 1;   /* anything but '0' */
    }

    return 0;
}

static int xml_must_be_trimmed(request_rec *r)
{
    trimxml_config_rec *conf
        = ap_get_module_config(r->per_dir_config, &trimxml_module);

    if (conf->use_builtin_rule)
        return builtin_rule(r);
    else
        return conditional_env_rule(r, conf);
}

static apr_status_t trimxml_filter(ap_filter_t *f, apr_bucket_brigade *bb)
{
    request_rec     *r   = f->r;
    trimxml_ctx_t   *ctx = f->ctx;

    if ((APR_BRIGADE_EMPTY(bb))                 /* fast exit */
     || (ctx && ctx->stage == STAGE_DONE))      /* already done */
        return ap_pass_brigade(f->next, bb);

    if (! ctx) {
        /* initialization on the first time */
        if (! xml_must_be_trimmed(r)) {
            ap_remove_output_filter(f);
            return ap_pass_brigade(f->next, bb);
        }
        else {
            f->ctx = ctx = apr_palloc(r->pool, sizeof(*ctx));

            ctx->bb1 = apr_brigade_create(r->pool, f->c->bucket_alloc);
            ctx->bb2 = apr_brigade_create(r->pool, f->c->bucket_alloc);
            if (! ctx->bb1 || ! ctx->bb2) /** @todo: */;

            ctx->stage = STAGE_START;
        }
    }

    while (! APR_BRIGADE_EMPTY(bb)) {
        apr_status_t    rv;
        apr_bucket      *e;
        apr_size_t      len;
        const char      *str, *pos;

        e = APR_BRIGADE_FIRST(bb);

        if (APR_BUCKET_IS_EOS(e)) {
            switch (ctx->stage) {
            case STAGE_WANT_TOKEN:
                APR_BRIGADE_PREPEND(bb, ctx->bb2);
                APR_BRIGADE_PREPEND(bb, ctx->bb1);

                apr_brigade_destroy(ctx->bb1);
                apr_brigade_destroy(ctx->bb2);
                /* ctx->bb2 = ctx->bb1 = NULL; */
                break;

            case STAGE_SKIP_SPACE:
                /* might be If-Modified-Since */
                apr_brigade_destroy(ctx->bb2);
                /* ctx->bb2 = NULL; */
                /* processing advance (prepend bb1) */

            case STAGE_SKIP_TOKEN:
            case STAGE_SKIP_CRLF:
                /* Final */
                APR_BRIGADE_PREPEND(bb, ctx->bb1);

                apr_brigade_destroy(ctx->bb1);
                /* ctx->bb1 = NULL; */
            }

            ctx->stage = STAGE_DONE;

            return ap_pass_brigade(f->next, bb);
        }

        rv = apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
        if (rv != APR_SUCCESS)
            return rv;  /** @todo: */

        switch (ctx->stage) {

        case STAGE_SKIP_SPACE:
            for (pos = str; len > 0; pos ++, len --)
                if (! apr_isspace(*pos))
                    break;

            if (len <= 0) {
                /* whole bucket is filled with spaces, wait next */
                APR_BUCKET_MOVE_TO_BRIGADE(e, ctx->bb1);
            }
            else {
                apr_bucket_split(e, pos - str);
                APR_BUCKET_MOVE_TO_BRIGADE(e, ctx->bb1);

                ctx->wanted_len = MATCHING_TOKEN_LEN;

                ctx->stage = STAGE_WANT_TOKEN;
            }
            break;

        case STAGE_WANT_TOKEN:
            pos = str;
            while (len > 0 && ctx->wanted_len > 0) {
                pos ++, ctx->wanted_len --, len --;
            }

            if (ctx->wanted_len > 0) {  /* not enough */
                APR_BUCKET_MOVE_TO_BRIGADE(e, ctx->bb2);
            }
            else {
                char *buf;

                apr_bucket_split(e, pos - str);
                APR_BUCKET_MOVE_TO_BRIGADE(e, ctx->bb2);

                /* Now, make judgement */
                len = MATCHING_TOKEN_LEN + 1;
                buf = apr_palloc(r->pool, len);
                if (! buf) /** @todo: */;

                rv = apr_brigade_flatten(ctx->bb2, buf, &len);
                if (rv != APR_SUCCESS)
                    return rv;      /** @todo: */

                if (len < MATCHING_TOKEN_LEN
                 || memcmp(buf, MATCHING_TOKEN, MATCHING_TOKEN_LEN)) {
                    /* <?xml?> token doesn't exist */

                    /* Final */
                    APR_BRIGADE_PREPEND(bb, ctx->bb2);
                    APR_BRIGADE_PREPEND(bb, ctx->bb1);

                    apr_brigade_destroy(ctx->bb1);
                    apr_brigade_destroy(ctx->bb2);
                    /* ctx->bb2 = ctx->bb1 = NULL; */

                    ctx->stage = STAGE_DONE;

                    return ap_pass_brigade(f->next, bb);
                }
                else {
                    /* <?xml?> token exist */

                    apr_brigade_destroy(ctx->bb2);  /* now useless */
                    /* ctx->bb2 = NULL; */

                    ctx->stage = STAGE_SKIP_TOKEN;
                }
            }
            break;

        case STAGE_SKIP_TOKEN:
            for (pos = str; len > 0; pos ++, len --)
                if (*pos == '>')
                    break;

            if (len > 0) {
                apr_bucket_split(e, pos - str + 1); /* includes trail */
                APR_BUCKET_REMOVE(e);   /* to trash */

                ctx->stage = STAGE_SKIP_CRLF;
            }
            break;

        case STAGE_SKIP_CRLF:
            for (pos = str; len > 0; pos ++, len --)
                if (! apr_isspace(*pos))
                    break;

            if (len > 0) {
                apr_bucket_split(e, pos - str);
                APR_BUCKET_REMOVE(e);   /* to trash */

                /* Final */
                APR_BRIGADE_PREPEND(bb, ctx->bb1);

                apr_brigade_destroy(ctx->bb1);
                /* ctx->bb1 = NULL; */

                ctx->stage = STAGE_DONE;

                return ap_pass_brigade(f->next, bb);
            }
            break;

        }
    }

    return APR_SUCCESS;
}

static void *create_config_rec(apr_pool_t *p, char *dummy)
{
    trimxml_config_rec *c = apr_pcalloc(p, sizeof(trimxml_config_rec));

    c->use_builtin_rule = 1;    /* default: On */
    c->conditional_env  = NULL;

    return c;
}

static const char *add_conditonal_env(cmd_parms *cmd, void *mconfig, const char *arg)
{
    trimxml_config_rec *c = mconfig;

    if (! c->conditional_env)
        c->conditional_env = apr_array_make(cmd->pool, 1, sizeof(char *));

    *((const char **) apr_array_push(c->conditional_env)) = arg;

    return NULL;
}

static const command_rec trimxml_cmds[] =
{
    AP_INIT_FLAG("TrimXmlUseBuiltinRule", ap_set_flag_slot,
                 (void *) APR_OFFSETOF(trimxml_config_rec, use_builtin_rule),
                 OR_OPTIONS,
                 "Set to 'Off' to disable builtin browser detection mechanism. "
                 "Use this option in conjunction with TrimXmlConditionalEnv. "
                 "(default: On)."),
    AP_INIT_ITERATE("TrimXmlConditionalEnv", add_conditonal_env, NULL,
                    OR_OPTIONS,
                    "A list of environments for trimming &lt;?xml?&gt; tag. "
                    "Only available if TrimXmlUseBuiltinRule option is set to 'Off'."),
    {NULL}
};

static void register_hooks(apr_pool_t *p)
{
    ap_register_output_filter("TRIMXML", trimxml_filter, NULL, AP_FTYPE_CONTENT_SET);
        /* after all AP_FTYPE_RESOURCE filters, and doesn't modify content-type */
}

module AP_MODULE_DECLARE_DATA trimxml_module =
{
    STANDARD20_MODULE_STUFF,
    create_config_rec,  /* create per-directory config structure */
    NULL,               /* merge per-directory config structure */
    NULL,               /* create per-server config structure */
    NULL,               /* merge per-directory config structure */
    trimxml_cmds,       /* command apr_table_t */
    register_hooks      /* register hooks */
};

ライセンス

NYSL