import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Types;
import java.io.IOException;
public class ParquetExample {
public static void main(String[] args) throws IOException {
String fileName = "example.parquet";
MessageType schema = Types.buildMessage()
.addField(Types.required(PrimitiveType.PrimitiveTypeName.INT32).named("id"))
.addField(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).named("name"))
.named("User");
GroupWriteSupport.setSchema(schema, ParquetExample.class);
ParquetWriter<Group> writer = ParquetWriter.builder(new Path(fileName))
.withWriteMode(ParquetFileWriter.Mode.CREATE)
.withCompressionCodec(CompressionCodecName.SNAPPY)
.withRowGroupSize(ParquetWriter.DEFAULT_BLOCK_SIZE)
.withPageSize(ParquetWriter.DEFAULT_PAGE_SIZE)
.withDictionaryEncoding(true)
.withValidation(true)
.withWriteSupport(new GroupWriteSupport())
.build();
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
Group group = groupFactory.newGroup()
.append("id", 1)
.append("name", "John");
writer.write(group);
writer.close();
}
}