<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>1.12.0</version>
</dependency>
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.parquet.example.data.Group;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
public class ParquetDataWriter {
public static void main(String[] args) throws Exception {
String schemaString = "message example {
"
+ "required int32 id;
"
+ "required binary name;
"
+ "}";
Path filePath = new Path("data.parquet");
MessageType schema = MessageTypeParser.parseMessageType(schemaString);
GroupWriteSupport writeSupport = new GroupWriteSupport();
writeSupport.setSchema(schema);
Configuration conf = new Configuration();
ParquetWriter<Group> writer = new ParquetWriter<>(filePath, writeSupport, ParquetWriter.DEFAULT_COMPRESSION_CODEC_NAME, ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, true, false, ParquetWriter.DEFAULT_WRITER_VERSION, conf);
Group group = new SimpleGroup(schema);
group.append("id", 1);
group.append("name", "John Doe");
writer.write(group);
writer.close();
}
}