<dependencies> <dependency> <groupId>org.apache.parquet</groupId> <artifactId>parquet-apache-hadoop</artifactId> <version>1.12.0</version> </dependency> </dependencies> import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroup; import org.apache.parquet.example.data.simple.SimpleGroupFactory; import org.apache.parquet.hadoop.ParquetFileWriter; import org.apache.parquet.hadoop.ParquetReader; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.example.GroupReadSupport; import org.apache.parquet.hadoop.example.GroupWriteSupport; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import org.apache.parquet.schema.Types; import java.io.IOException; import java.util.Arrays; public class ParquetExample { public static void main(String[] args) throws IOException { MessageType schema = MessageTypeParser.parseMessageType("message Pair { " + " required int32 key; " + " optional binary value; " + "}"); GroupWriteSupport writeSupport = new GroupWriteSupport(); writeSupport.setSchema(schema); ParquetWriter<Group> writer = new ParquetWriter<>("example.parquet", writeSupport); SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema); Group group1 = groupFactory.newGroup() .append("key", 1) .append("value", "value1"); Group group2 = groupFactory.newGroup() .append("key", 2) .append("value", "value2"); writer.write(group1); writer.write(group2); writer.close(); GroupReadSupport readSupport = new GroupReadSupport(); ParquetReader<Group> reader = ParquetReader.builder(readSupport, "example.parquet").build(); Group record; while ((record = reader.read()) != null) { System.out.println("key: " + record.getInteger("key", 0) + ", value: " + record.getString("value", 0)); } reader.close(); } }


上一篇:
下一篇:
切换中文