<dependencies>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-apache-hadoop</artifactId>
<version>1.12.0</version>
</dependency>
</dependencies>
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroup;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.GroupReadSupport;
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.parquet.schema.Types;
import java.io.IOException;
import java.util.Arrays;
public class ParquetExample {
public static void main(String[] args) throws IOException {
MessageType schema = MessageTypeParser.parseMessageType("message Pair {
" +
" required int32 key;
" +
" optional binary value;
" +
"}");
GroupWriteSupport writeSupport = new GroupWriteSupport();
writeSupport.setSchema(schema);
ParquetWriter<Group> writer = new ParquetWriter<>("example.parquet", writeSupport);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
Group group1 = groupFactory.newGroup()
.append("key", 1)
.append("value", "value1");
Group group2 = groupFactory.newGroup()
.append("key", 2)
.append("value", "value2");
writer.write(group1);
writer.write(group2);
writer.close();
GroupReadSupport readSupport = new GroupReadSupport();
ParquetReader<Group> reader = ParquetReader.builder(readSupport, "example.parquet").build();
Group record;
while ((record = reader.read()) != null) {
System.out.println("key: " + record.getInteger("key", 0) +
", value: " + record.getString("value", 0));
}
reader.close();
}
}