import org.jsoup.Jsoup; import org.jsoup.helper.W3CDom; import org.jsoup.nodes.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import com.github.andrewoma.dexx.collection.Pair; import net.rootdev.javardfa.ParserFactory; import net.rootdev.javardfa.RDFa; import net.rootdev.javardfa.Setting; import net.rootdev.javardfa.StatementSink; import net.rootdev.javardfa.uri.IRIResolver; public class HtmlParserExample { public static void main(String[] args) throws Exception { Document document = Jsoup.connect("http://example.com").get(); org.w3c.dom.Document w3cDocument = new W3CDom().fromJsoup(document); RDFa rdfa = new RDFa(ParserFactory.create(w3cDocument), Setting.DEFAULTS, IRIResolver.getDefaultIriResolver()); StatementSink sink = new StatementSink(); rdfa.addListener(sink); rdfa profil(); Iterable<Pair<Node, NodeList>> statements = sink.getStatementGroups(); for (Pair<Node, NodeList> statement : statements) { Node subject = statement.getFirst(); NodeList predicatesAndObjects = statement.getSecond(); } } }


上一篇:
下一篇:
切换中文