import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroupFactory; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.example.GroupReadSupport; import org.apache.parquet.hadoop.example.GroupWriteSupport; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import java.io.IOException; public class ParquetExample { public static void main(String[] args) throws IOException { String schemaString = "message Pair { " + " required int32 key; " + " required binary value; " + "}"; MessageType schema = MessageTypeParser.parseMessageType(schemaString); ParquetWriter<Group> writer = new ParquetWriter<>( new Path("data.parquet"), new GroupWriteSupport(), ParquetWriter.DEFAULT_COMPRESSION_CODEC_NAME, ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, true, false); SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema); Group group = groupFactory.newGroup() .append("key", 1) .append("value", "Hello, Parquet!"); writer.write(group); writer.close(); ParquetFileReader reader = ParquetFileReader.open(new Configuration(), new Path("data.parquet")); GroupReadSupport readSupport = new GroupReadSupport(); ParquetReader<Group> parquetReader = new ParquetReader<>( reader.getFileMetaData(), new Path("data.parquet"), readSupport); Group result; while ((result = parquetReader.read()) != null) { System.out.println(result); } parquetReader.close(); } }


上一篇:
下一篇:
切换中文