符合 javascript unescape 函数(支持unicode)的 java escape 编码实现
最近在弄一个跨域的交互程序,交互过程非常复杂.而且,其中一端只能使用 javascript 来处理返回.
于是,发现 java 的 urlencoder.encode 方法与 javascript 的 unescape 方法已经不能兼容了.
现在浏览器的主流版本中 javascript 已经开始使用 unicode 作为字符串的内码进行编码.即, unicode 字符 escape 之后的样子类似于 %uabcd 而 java 的 urlencoder.encode 方法得到的结果是 %ab%cd ,转换出来的结果必定是乱码了.
方法有两个,其一,用 java 实现一个生成新编码的方法.其二,用 javascript 实现一个针对旧编码的解码方法.想了想,还是用 java 要方便一点点吧.
上次贴出的 escape 的 java 实现,似乎对大家有用.最近曾军同学提出 unescape 对于 javascript 与后端的交互也有意义.于是就一并实现了.再次贴上来.
代码非常简单,注释就免了吧.一贯的,仍然遵循"以空间换时间"原则.
需要的桶子,一贯的,help yourself.
/**
* javascript escape/unescape 编码的 java 实现
* author jackyz
* keep this copyright info while using this method by free
*/
public class escape {
private final static string[] hex = {
"00","01","02","03","04","05","06","07","08","09","0a","0b","0c","0d","0e","0f",
"10","11","12","13","14","15","16","17","18","19","1a","1b","1c","1d","1e","1f",
"20","21","22","23","24","25","26","27","28","29","2a","2b","2c","2d","2e","2f",
"30","31","32","33","34","35","36","37","38","39","3a","3b","3c","3d","3e","3f",
"40","41","42","43","44","45","46","47","48","49","4a","4b","4c","4d","4e","4f",
"50","51","52","53","54","55","56","57","58","59","5a","5b","5c","5d","5e","5f",
"60","61","62","63","64","65","66","67","68","69","6a","6b","6c","6d","6e","6f",
"70","71","72","73","74","75","76","77","78","79","7a","7b","7c","7d","7e","7f",
"80","81","82","83","84","85","86","87","88","89","8a","8b","8c","8d","8e","8f",
"90","91","92","93","94","95","96","97","98","99","9a","9b","9c","9d","9e","9f",
"a0","a1","a2","a3","a4","a5","a6","a7","a8","a9","aa","ab","ac","ad","ae","af",
"b0","b1","b2","b3","b4","b5","b6","b7","b8","b9","ba","bb","bc","bd","be","bf",
"c0","c1","c2","c3","c4","c5","c6","c7","c8","c9","ca","cb","cc","cd","ce","cf",
"d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","da","db","dc","dd","de","df",
"e0","e1","e2","e3","e4","e5","e6","e7","e8","e9","ea","eb","ec","ed","ee","ef",
"f0","f1","f2","f3","f4","f5","f6","f7","f8","f9","fa","fb","fc","fd","fe","ff"
};
private final static byte[] val = {
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
};
public static string escape(string s) {
stringbuffer sbuf = new stringbuffer();
int len = s.length();
for (int i = 0; i < len; i++) {
int ch = s.charat(i);
if (ch == ' ') { // space : map to '+'
sbuf.append('+');
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char)ch);
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char)ch);
} else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was
sbuf.append((char)ch);
} else if (ch == '-' || ch == '_' // unreserved : as it was
|| ch == '.' || ch == '!'
|| ch == '~' || ch == '*'
|| ch == '/'' || ch == '('
|| ch == ')') {
sbuf.append((char)ch);
} else if (ch <= 0x007f) { // other ascii : map to %xx
sbuf.append('%');
sbuf.append(hex[ch]);
} else { // unicode : map to %uxxxx
sbuf.append('%');
sbuf.append('u');
sbuf.append(hex[(ch >>> 8)]);
sbuf.append(hex[(0x00ff & ch)]);
}
}
return sbuf.tostring();
}
public static string unescape(string s) {
stringbuffer sbuf = new stringbuffer();
int i = 0;
int len = s.length();
while (i < len) {
int ch = s.charat(i);
if (ch == '+') { // + : map to ' '
sbuf.append(' ');
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char)ch);
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char)ch);
} else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was
sbuf.append((char)ch);
} else if (ch == '-' || ch == '_' // unreserved : as it was
|| ch == '.' || ch == '!'
|| ch == '~' || ch == '*'
|| ch == '/'' || ch == '('
|| ch == ')') {
sbuf.append((char)ch);
} else if (ch == '%') {
int cint = 0;
if ('u' != s.charat(i+1)) { // %xx : map to ascii(xx)
cint = (cint << 4) | val[s.charat(i+1)];
cint = (cint << 4) | val[s.charat(i+2)];
i+=2;
} else { // %uxxxx : map to unicode(xxxx)
cint = (cint << 4) | val[s.charat(i+2)];
cint = (cint << 4) | val[s.charat(i+3)];
cint = (cint << 4) | val[s.charat(i+4)];
cint = (cint << 4) | val[s.charat(i+5)];
i+=5;
}
sbuf.append((char)cint);
}
i++;
}
return sbuf.tostring();
}
public static void main(string[] args) {
string stest = "中文1234 abcd[]()<+>,.~//";
system.out.println(stest);
system.out.println(escape(stest));
system.out.println(unescape(escape(stest)));
}
}
于是,发现 java 的 urlencoder.encode 方法与 javascript 的 unescape 方法已经不能兼容了.
现在浏览器的主流版本中 javascript 已经开始使用 unicode 作为字符串的内码进行编码.即, unicode 字符 escape 之后的样子类似于 %uabcd 而 java 的 urlencoder.encode 方法得到的结果是 %ab%cd ,转换出来的结果必定是乱码了.
方法有两个,其一,用 java 实现一个生成新编码的方法.其二,用 javascript 实现一个针对旧编码的解码方法.想了想,还是用 java 要方便一点点吧.
上次贴出的 escape 的 java 实现,似乎对大家有用.最近曾军同学提出 unescape 对于 javascript 与后端的交互也有意义.于是就一并实现了.再次贴上来.
代码非常简单,注释就免了吧.一贯的,仍然遵循"以空间换时间"原则.
需要的桶子,一贯的,help yourself.
/**
* javascript escape/unescape 编码的 java 实现
* author jackyz
* keep this copyright info while using this method by free
*/
public class escape {
private final static string[] hex = {
"00","01","02","03","04","05","06","07","08","09","0a","0b","0c","0d","0e","0f",
"10","11","12","13","14","15","16","17","18","19","1a","1b","1c","1d","1e","1f",
"20","21","22","23","24","25","26","27","28","29","2a","2b","2c","2d","2e","2f",
"30","31","32","33","34","35","36","37","38","39","3a","3b","3c","3d","3e","3f",
"40","41","42","43","44","45","46","47","48","49","4a","4b","4c","4d","4e","4f",
"50","51","52","53","54","55","56","57","58","59","5a","5b","5c","5d","5e","5f",
"60","61","62","63","64","65","66","67","68","69","6a","6b","6c","6d","6e","6f",
"70","71","72","73","74","75","76","77","78","79","7a","7b","7c","7d","7e","7f",
"80","81","82","83","84","85","86","87","88","89","8a","8b","8c","8d","8e","8f",
"90","91","92","93","94","95","96","97","98","99","9a","9b","9c","9d","9e","9f",
"a0","a1","a2","a3","a4","a5","a6","a7","a8","a9","aa","ab","ac","ad","ae","af",
"b0","b1","b2","b3","b4","b5","b6","b7","b8","b9","ba","bb","bc","bd","be","bf",
"c0","c1","c2","c3","c4","c5","c6","c7","c8","c9","ca","cb","cc","cd","ce","cf",
"d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","da","db","dc","dd","de","df",
"e0","e1","e2","e3","e4","e5","e6","e7","e8","e9","ea","eb","ec","ed","ee","ef",
"f0","f1","f2","f3","f4","f5","f6","f7","f8","f9","fa","fb","fc","fd","fe","ff"
};
private final static byte[] val = {
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,
0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
};
public static string escape(string s) {
stringbuffer sbuf = new stringbuffer();
int len = s.length();
for (int i = 0; i < len; i++) {
int ch = s.charat(i);
if (ch == ' ') { // space : map to '+'
sbuf.append('+');
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char)ch);
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char)ch);
} else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was
sbuf.append((char)ch);
} else if (ch == '-' || ch == '_' // unreserved : as it was
|| ch == '.' || ch == '!'
|| ch == '~' || ch == '*'
|| ch == '/'' || ch == '('
|| ch == ')') {
sbuf.append((char)ch);
} else if (ch <= 0x007f) { // other ascii : map to %xx
sbuf.append('%');
sbuf.append(hex[ch]);
} else { // unicode : map to %uxxxx
sbuf.append('%');
sbuf.append('u');
sbuf.append(hex[(ch >>> 8)]);
sbuf.append(hex[(0x00ff & ch)]);
}
}
return sbuf.tostring();
}
public static string unescape(string s) {
stringbuffer sbuf = new stringbuffer();
int i = 0;
int len = s.length();
while (i < len) {
int ch = s.charat(i);
if (ch == '+') { // + : map to ' '
sbuf.append(' ');
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char)ch);
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char)ch);
} else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was
sbuf.append((char)ch);
} else if (ch == '-' || ch == '_' // unreserved : as it was
|| ch == '.' || ch == '!'
|| ch == '~' || ch == '*'
|| ch == '/'' || ch == '('
|| ch == ')') {
sbuf.append((char)ch);
} else if (ch == '%') {
int cint = 0;
if ('u' != s.charat(i+1)) { // %xx : map to ascii(xx)
cint = (cint << 4) | val[s.charat(i+1)];
cint = (cint << 4) | val[s.charat(i+2)];
i+=2;
} else { // %uxxxx : map to unicode(xxxx)
cint = (cint << 4) | val[s.charat(i+2)];
cint = (cint << 4) | val[s.charat(i+3)];
cint = (cint << 4) | val[s.charat(i+4)];
cint = (cint << 4) | val[s.charat(i+5)];
i+=5;
}
sbuf.append((char)cint);
}
i++;
}
return sbuf.tostring();
}
public static void main(string[] args) {
string stest = "中文1234 abcd[]()<+>,.~//";
system.out.println(stest);
system.out.println(escape(stest));
system.out.println(unescape(escape(stest)));
}
}
闽公网安备 35060202000074号