Tidy tidy = new Tidy(); tidy.setInputEncoding("UTF-8"); tidy.setOutputEncoding("UTF-8"); tidy.setXHTML(true); tidy.setErrfile("tidy_err.txt"); Document doc = tidy.parseDOM(new FileInputStream("input.html"), null); tidy.pprint(doc, System.out);