Browse Source

add ngx_http_trim_filter_module

pull/274/head
taoyuanyuan 12 years ago
parent
commit
ea0ecfac8f
  1. 11
      auto/modules
  2. 14
      auto/options
  3. 3
      auto/sources
  4. 114
      docs/modules/ngx_http_trim_filter_module_cn.md
  5. 1560
      src/http/modules/ngx_http_trim_filter_module.c
  6. 393
      tests/test-nginx/cases/trim.t

11
auto/modules

@ -135,6 +135,7 @@ fi
# ngx_http_addition_filter
# ngx_http_userid_filter
# ngx_http_footer_filter
# ngx_http_trim_filter
# ngx_http_headers_filter
# ngx_http_copy_filter
# ngx_http_range_body_filter
@ -240,6 +241,16 @@ if [ $HTTP_FOOTER_SHARED = YES ]; then
NGX_SHARED_SRCS="$NGX_SHARED_SRCS|$HTTP_FOOTER_FILTER_SRCS"
fi
if [ $HTTP_TRIM = YES ]; then
HTTP_FILTER_MODULES="$HTTP_FILTER_MODULES $HTTP_TRIM_FILTER_MODULE"
HTTP_SRCS="$HTTP_SRCS $HTTP_TRIM_FILTER_SRCS"
fi
if [ $HTTP_TRIM_SHARED = YES ]; then
NGX_SHARED_MODULES="$NGX_SHARED_MODULES $HTTP_TRIM_FILTER_MODULE"
NGX_SHARED_SRCS="$NGX_SHARED_SRCS|$HTTP_TRIM_FILTER_SRCS"
fi
HTTP_MODULES="$HTTP_MODULES $HTTP_STATIC_MODULE"
if [ $HTTP_GZIP_STATIC = YES ]; then

14
auto/options

@ -128,6 +128,7 @@ NGX_ALL_MODULES="
ngx_http_addition_filter_module
ngx_http_userid_filter_module
ngx_http_footer_filter_module
ngx_http_trim_filter_module
ngx_http_headers_filter_module
ngx_http_copy_filter_module
ngx_http_range_body_filter_module
@ -245,6 +246,7 @@ HTTP_MP4=NO
HTTP_GZIP_STATIC=NO
HTTP_UPSTREAM_IP_HASH=YES
HTTP_FOOTER=YES
HTTP_TRIM=YES
HTTP_USER_AGENT=YES
HTTP_UPSTREAM_CHECK=YES
HTTP_UPSTREAM_LEAST_CONN=YES
@ -274,6 +276,7 @@ HTTP_MP4_SHARED=NO
HTTP_CHARSET_SHARED=NO
HTTP_USERID_SHARED=NO
HTTP_FOOTER_SHARED=NO
HTTP_TRIM_SHARED=NO
HTTP_ACCESS_SHARED=NO
HTTP_AUTOINDEX_SHARED=NO
HTTP_STATUS_SHARED=NO
@ -488,6 +491,8 @@ do
HTTP_USERID=NO ;;
--with-http_footer_filter_module=shared) HTTP_FOOTER_SHARED=YES
HTTP_FOOTER=NO ;;
--with-http_trim_filter_module=shared) HTTP_TRIM_SHARED=YES
HTTP_TRIM=NO ;;
--with-http_access_module=shared) HTTP_ACCESS_SHARED=YES
HTTP_ACCESS=NO ;;
--with-http_autoindex_module=shared) HTTP_AUTOINDEX_SHARED=YES
@ -540,6 +545,8 @@ do
HTTP_USERID_SHARED=NO ;;
--without-http_footer_filter_module) HTTP_FOOTER=NO
HTTP_FOOTER_SHARED=NO ;;
--without-http_trim_filter_module) HTTP_TRIM=NO
HTTP_TRIM_SHARED=NO ;;
--without-http_access_module) HTTP_ACCESS=NO
HTTP_ACCESS_SHARED=NO ;;
--without-http_auth_basic_module) HTTP_AUTH_BASIC=NO ;;
@ -761,6 +768,8 @@ cat << END
enable ngx_http_userid_filter_module (shared)
--with-http_footer_filter_module=shared
enable ngx_http_footer_filter_module (shared)
--with-http_trim_filter_module=shared
enable ngx_http_trim_filter_module (shared)
--with-http_access_module=shared enable ngx_http_access_module (shared)
--with-http_autoindex_module=shared
enable ngx_http_autoindex_module (shared)
@ -797,6 +806,7 @@ cat << END
--without-http_userid_module disable ngx_http_userid_filter_module
--without-http_footer_filter_module
disable ngx_http_footer_filter_module
--without-http_trim_filter_module disable ngx_http_trim_filter_module
--without-http_access_module disable ngx_http_access_module
--without-http_auth_basic_module disable ngx_http_auth_basic_module
--without-http_autoindex_module disable ngx_http_autoindex_module
@ -936,6 +946,7 @@ if [ $NGX_SHARED_ALL_MODULES = YES ]; then
HTTP_CHARSET_SHARED=YES
HTTP_USERID_SHARED=YES
HTTP_FOOTER_SHARED=YES
HTTP_TRIM_SHARED=YES
HTTP_ACCESS_SHARED=YES
HTTP_AUTOINDEX_SHARED=YES
HTTP_MAP_SHARED=YES
@ -971,6 +982,7 @@ if [ $NGX_SHARED_ALL_MODULES = YES ]; then
HTTP_CHARSET=NO
HTTP_USERID=NO
HTTP_FOOTER=NO
HTTP_TRIM=NO
HTTP_ACCESS=NO
HTTP_AUTOINDEX=NO
HTTP_MAP=NO
@ -1036,6 +1048,7 @@ elif [ $NGX_STATIC_ALL_MODULES = YES ]; then
HTTP_GZIP_STATIC=YES
HTTP_UPSTREAM_IP_HASH=YES
HTTP_FOOTER=YES
HTTP_TRIM=YES
HTTP_USER_AGENT=YES
HTTP_UPSTREAM_CHECK=YES
HTTP_UPSTREAM_LEAST_CONN=YES
@ -1058,6 +1071,7 @@ elif [ $NGX_STATIC_ALL_MODULES = YES ]; then
HTTP_CHARSET_SHARED=NO
HTTP_USERID_SHARED=NO
HTTP_FOOTER_SHARED=NO
HTTP_TRIM_SHARED=NO
HTTP_ACCESS_SHARED=NO
HTTP_AUTOINDEX_SHARED=NO
HTTP_STATUS_SHARED=NO

3
auto/sources

@ -388,6 +388,9 @@ HTTP_ADDITION_SRCS=src/http/modules/ngx_http_addition_filter_module.c
HTTP_FOOTER_FILTER_MODULE=ngx_http_footer_filter_module
HTTP_FOOTER_FILTER_SRCS=src/http/modules/ngx_http_footer_filter_module.c
HTTP_TRIM_FILTER_MODULE=ngx_http_trim_filter_module
HTTP_TRIM_FILTER_SRCS=src/http/modules/ngx_http_trim_filter_module.c
HTTP_DAV_MODULE=ngx_http_dav_module
HTTP_DAV_SRCS=src/http/modules/ngx_http_dav_module.c

114
docs/modules/ngx_http_trim_filter_module_cn.md

@ -0,0 +1,114 @@
# trim 模块
## 介绍
该模块用于删除 html , 内嵌 javascript 和 css 中的注释以及重复的空白符。
## 配置
location / {
trim on;
trim_jscss on;
}
## 指令
**trim** `on` | `off`
**默认:** `trim off`
**上下文:** `http, server, location`
在配置的地方使模块有效(失效),删除 html 的注释以及重复的空白符(\n,\r,\t,' ')。
例外:对于 `pre`,`textarea`,`ie注释`,`script`,`style` 等标签内的内容不作删除操作。
<br/>
**trim_jscss** `on` | `off`
**默认:** `trim_jscss off`
**上下文:** `http, server, location`
在配置的地方使模块有效(失效),删除内嵌 javascript 和 css 的注释以及重复的空白符(\n,\r,\t,' ')。
例外:对于非javascript代码的`script`,非css代码的`style` 等标签内的内容不作删除操作。
<br/>
**trim_types** `MIME types`
**默认:** `trim_types: text/html`
**上下文:** `http, server, location`
定义哪些[MIME types](http://en.wikipedia.org/wiki/MIME_type)类型的响应可以被处理。
<br/>
## 调试
添加请求参数http_trim=off,将关闭trim功能,返回原始代码,方便对照调试。
格式如下:
`http://www.xxx.com/index.html?http_trim=off`
## trim规则
### html
##### 空白符
+ 正文中的 '\r' 直接删除。
+ 正文中的 '\n' 替换为 '空格', 然后重复 \t' 和 '空格' 保留第一个。
+ 标签中的 '\r','\n','\t','空格' 保留第一个。
+ '>' 后的 '\n' 直接删除。
+ 标签的双引号和单引号内的空白符不做删除。
\<div class="no &nbsp; &nbsp; &nbsp; trim"\>
+ 保留第一行DTD声明的 '\n'。
+ `pre``texterea` 标签的内容不做删除。
+ `script``style` 标签的内容不做删除。
+ ie条件注释的内容不做删除。
##### 注释
+ 如果是ie条件注释不做操作。
判断规则:`<!--[if <![endif]-->` 之间的内容判断为ie条件注释。
+ 正常html注释直接删除. `<!-- -->`
### javascript
借鉴 jsmin 的处理规则 (http://www.crockford.com/javascript/jsmin.html)
`<script type="text/javascript">` 或者 `<script>` 标签认为是javascript。
##### 空白符
+ '(','[','{',';',',','>','=' 后的 '\n','\t','空格' 直接删除。
+ '\r' 直接删除。
+ 其他情况 重复的 '\n','\t','空格' 保留第一个。
+ 单引号和双引号内不删除。
如下不做操作:
"hello &nbsp; \\\\" &nbsp; world"
'hello &nbsp; \' &nbsp; world'
+ 正则表达式的内容不删除。
判断规则:'/' 前的非空字符是 ',','(','=' 三种的即认为是正则表达式。( 同jsmin的判断)
如下不做操作:
var re=/1 &nbsp; &nbsp; &nbsp;2/;
data.match(/1 &nbsp; &nbsp; 2/);
##### 注释
+ 删除单行注释。 `//`
+ 删除多行注释。 `/* */`
注意:javascript也有一种条件注释,不过貌似用得很少,jsmin直接删除的,trim也是直接删除。
http://en.wikipedia.org/wiki/Conditional_comment
### css
借鉴 YUI Compressor 的处理规则 (http://yui.github.io/yuicompressor/css.html)
`<style type="text/css">` 或者 `<style>` 标签认为是css.
##### 空白符
+ ';','>','{','}',':',',' 后的 '\n','\t','空格' 直接删除。
+ '\r' 直接删除。
+ 其他情况 连续的 '\n', '\t' 和 '空格' 保留第一个。
+ 单引号和双引号内不删除。
如下不做操作:
"hello &nbsp; \\\\\" &nbsp; world"
'hello &nbsp; \' &nbsp; &nbsp; world'
##### 注释
+ child seletor hack的注释不删除。
`html>/**/body p{color:blue}`
+ IE5 /Mac hack 的注释不删除。
`/*\*/.selector{color:khaki}/**/`
+ 其他情况删除注释。 `/* */`

1560
src/http/modules/ngx_http_trim_filter_module.c
File diff suppressed because it is too large
View File

393
tests/test-nginx/cases/trim.t

@ -0,0 +1,393 @@
use lib 'lib';
use Test::Nginx::Socket;
log_level('debug');
plan tests => 2 * blocks();
$ENV{TEST_NGINX_TRIM_PORT} ||= "1984";
run_tests();
# etcproxy 1986 1984
# TEST_NGINX_TRIM_PORT=1986 prove ../cases/trim.t
__DATA__
=== TEST 1: do not trim within 'textarea' 'pre' 'ie-comment'
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<textarea>
hello
world!
</textarea>
<pre>
hello world!
</pre>
<!--[if IE]> hello world ! <![endif]-->
<!-- hello world ! -->
<!--[if !IE ]>--> hello world ! <!--<![endif]-->
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<textarea>
hello
world!
</textarea><pre>
hello world!
</pre><!--[if IE]> hello world ! <![endif]--><!--[if !IE ]>--> hello world ! <!--<![endif]-->'
=== TEST 2: trim within other tags
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<body>hello world, it
is good to see you </body>
<body>hello world, it
is good to see you </body>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<body>hello world, it is good to see you </body><body>hello world, it is good to see you </body>'
=== TEST 3: trim within non-ie comment
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<body>hello <!--world--></body>
<!--[if IE]> hello world ! <![endif]-->
<!-- hello world! -->
<!--[if !IE ]>--> hello world ! <!--<![endif]-->
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<body>hello </body><!--[if IE]> hello world ! <![endif]--><!--[if !IE ]>--> hello world ! <!--<![endif]-->'
=== TEST 4: do not trim within tag quote
--- config
trim on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<body
style="text-align: center;">hello world, it
is good to see you </body>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<body
style="text-align: center;">hello world, it is good to see you </body>'
=== TEST 5: trim newline
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<html>
<body>hello world!<body>
<!-- --->
<html>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<html><body>hello world!<body> <html>'
=== TEST 6: return zero size
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
--- request
GET /t/trim.html
--- response_body eval
''
=== TEST 7: trim more tags
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
< <PRE>hello world ! </pre>
<2 <pre>hello world ! </pre>
<<< <pre>hello world ! </pre>
< < <pre>hello world ! </pre>
< <<pre>hello world ! </pre>
<x <<pre>hello world ! </pre>
< <<<!doctype html>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
< <PRE>hello world ! </pre><2 <pre>hello world ! </pre><<< <pre>hello world ! </pre>< < <pre>hello world ! </pre>< <<pre>hello world ! </pre><x <<pre>hello world ! </pre>< <<<!doctype html>'
=== TEST 8: trim Chinese characters
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<title>世界 你好 !</title>
--- request
GET /t/trim.html
--- response_body eval
'<title>世界 你好 !</title>
'
=== TEST 9: sendfile on
--- config
sendfile on;
trim on;
trim_jscss on;
--- user_files
>>> trim.html
<!DOCTYPE html>
<body>hello world, it
is good to see you </body>
<!-- trimoff -->
--- request
GET /trim.html
--- response_body eval
'<!DOCTYPE html>
<body>hello world, it is good to see you </body>'
=== TEST 10: if $arg_http_trim is off, trim off.
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<body>hello world, it
is good to see you </body>
<!-- trimoff -->
--- request
GET /t/trim.html?http_trim=off&hello=world
--- response_body
<!DOCTYPE html>
<body>hello world, it
is good to see you </body>
<!-- trimoff -->
=== TEST 11: trim javascript comment
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<script>
//// single comment
document.write("hello world");
</script>
<script type="text/javascript">
/*** muitl comment
! ***/
</script>
<script type="text/vbscript">
/* no javscript code !*/
</script>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<script>document.write("hello world");</script><script type="text/javascript"></script><script type="text/vbscript">
/* no javscript code !*/
</script>'
=== TEST 12: do not tirm javascript quote and RE
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<script>
document.write("hello world");
document.write("hello \" world");
var reg=/hello \/ world /g;
var reg= /hello \/ world /g;
str.replace(/ /,"hello");
str.replace( / /,"hello");
</script>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<script>document.write("hello world");document.write("hello \" world");var reg=/hello \/ world /g;var reg=/hello \/ world /g;str.replace(/ /,"hello");str.replace(/ /,"hello");</script>'
=== TEST 13: trim css comment
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<style type="text/css">
/*** css comment
! ***/
body {
background-color: black;
}
</style>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<style type="text/css">body {background-color:black;}</style>'
=== TEST 14: do not trim css quote
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<style type="text/css">
"hello world");
"hello \" world");
"hello \\\" world");
</style>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<style type="text/css">"hello world");"hello \" world");"hello \\\\\" world");</style>'
=== TEST 15 trim aplus.js
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<script type="text/javascript">
(function (d) {
var t=d.createElement("script");t.type="text/javascript";t.async=true;t.id="tb-beacon-aplus";
t.setAttribute("exparams","category=&userid=&aplus");
t.src=("https:"==d.location.protocol?"https://s":"http://a")+".tbcdn.cn/s/aplus_v2.js";
d.getElementsByTagName("head")[0].appendChild(t);
})(document);
</script>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<script type="text/javascript">(function (d) {var t=d.createElement("script");t.type="text/javascript";t.async=true;t.id="tb-beacon-aplus";t.setAttribute("exparams","category=&userid=&aplus");t.src=("https:"==d.location.protocol?"https://s":"http://a")+".tbcdn.cn/s/aplus_v2.js";d.getElementsByTagName("head")[0].appendChild(t);})(document);</script>'
=== TEST 15: do not trim css comment of child selector hack
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<style type="text/css">
html >/**/ body p {
color: blue;
}
</style>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<style type="text/css">html >/**/ body p {color:blue;}</style>'
=== TEST 16: do not trim css comment of IE5/Mac hack
--- config
trim on;
trim_jscss on;
location /t/ { proxy_buffering off; proxy_pass http://127.0.0.1:$TEST_NGINX_TRIM_PORT/;}
location /trim.html { trim off;}
--- user_files
>>> trim.html
<!DOCTYPE html>
<style type="text/css">
/* Ignore the next rule in IE mac \*/
.selector {
color: khaki;
}
/* Stop ignoring in IE mac */
</style>
--- request
GET /t/trim.html
--- response_body eval
'<!DOCTYPE html>
<style type="text/css">/*\\*/
.selector {
color: khaki;
}
/**/
</style>'
Loading…
Cancel
Save