Feb 25
用C写过这个函数,想不到今天Java上也需要这样的函数,其实内容很简单,就是将被HTML的URL格式编码的字符串转换为普通的字符串。通常URL中采用的UTF8编码,熟悉计算机字符编码的朋友们应该一眼就明白函数的意思。
不废话了,代码如下:
- /**
- * Convert HTML character enitities(Unicode) to part of a Java String
- */
- import java.util.regex.*;
- public class UnicodeCeToJavaString {
- static final String mbs = "&#(\\d+);"; //like "ロ"
- public static String EncodeCesToChars(String paramStr){
- String mbChar;
- StringBuffer sb = new StringBuffer();
- Pattern pat = Pattern.compile(mbs);
- Matcher mat = pat.matcher(paramStr);
- while (mat.find()){
- mbChar = getMbCharStr(mat.group(1)); //pass the digit part
- mat.appendReplacement(sb, mbChar);
- }
- mat.appendTail(sb);
- return new String(sb);
- }
- /* worker method */
- static String getMbCharStr(String digits){ //handle "12525" part which is a
- char[] cha = new char[1]; //Unicode value stringnized
- try{
- int val = Integer.parseInt(digits);
- char ch = (char)val;
- cha[0] = ch;
- }
- catch(Exception e){
- System.err.println("Error from getMbCharStr:");
- e.printStackTrace(System.err);
- }
- return new String(cha); //easy!, because Java uses Unicode
- }
- }
