import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroupFactory; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.example.GroupWriteSupport; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Types; import java.io.IOException; public class ParquetExample { public static void main(String[] args) throws IOException { String fileName = "example.parquet"; MessageType schema = Types.buildMessage() .addField(Types.required(PrimitiveType.PrimitiveTypeName.INT32).named("id")) .addField(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).named("name")) .named("User"); GroupWriteSupport.setSchema(schema, ParquetExample.class); ParquetWriter<Group> writer = ParquetWriter.builder(new Path(fileName)) .withWriteMode(ParquetFileWriter.Mode.CREATE) .withCompressionCodec(CompressionCodecName.SNAPPY) .withRowGroupSize(ParquetWriter.DEFAULT_BLOCK_SIZE) .withPageSize(ParquetWriter.DEFAULT_PAGE_SIZE) .withDictionaryEncoding(true) .withValidation(true) .withWriteSupport(new GroupWriteSupport()) .build(); SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema); Group group = groupFactory.newGroup() .append("id", 1) .append("name", "John"); writer.write(group); writer.close(); } }


上一篇:
下一篇:
切换中文