<dependency> <groupId>org.apache.parquet</groupId> <artifactId>parquet-column</artifactId> <version>1.12.0</version> </dependency> import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.example.GroupWriteSupport; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import org.apache.parquet.example.data.Group; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; public class ParquetDataWriter { public static void main(String[] args) throws Exception { String schemaString = "message example { " + "required int32 id; " + "required binary name; " + "}"; Path filePath = new Path("data.parquet"); MessageType schema = MessageTypeParser.parseMessageType(schemaString); GroupWriteSupport writeSupport = new GroupWriteSupport(); writeSupport.setSchema(schema); Configuration conf = new Configuration(); ParquetWriter<Group> writer = new ParquetWriter<>(filePath, writeSupport, ParquetWriter.DEFAULT_COMPRESSION_CODEC_NAME, ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, true, false, ParquetWriter.DEFAULT_WRITER_VERSION, conf); Group group = new SimpleGroup(schema); group.append("id", 1); group.append("name", "John Doe"); writer.write(group); writer.close(); } }


上一篇:
下一篇:
切换中文