Tidy tidy = new Tidy();
tidy.setInputEncoding("GBK");
tidy.setOutputEncoding("UTF-8");
ByteArrayOutputStream output = new ByteArrayOutputStream();
tidy.parse(new ByteArrayInputStream(html.getBytes()), output);
String convertedHtml = output.toString();