import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.GroupReadSupport;
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import java.io.IOException;
public class ParquetExample {
public static void main(String[] args) throws IOException {
String schemaString = "message Pair {
" +
" required int32 key;
" +
" required binary value;
" +
"}";
MessageType schema = MessageTypeParser.parseMessageType(schemaString);
ParquetWriter<Group> writer = new ParquetWriter<>(
new Path("data.parquet"),
new GroupWriteSupport(),
ParquetWriter.DEFAULT_COMPRESSION_CODEC_NAME,
ParquetWriter.DEFAULT_BLOCK_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE,
true,
false);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
Group group = groupFactory.newGroup()
.append("key", 1)
.append("value", "Hello, Parquet!");
writer.write(group);
writer.close();
ParquetFileReader reader = ParquetFileReader.open(new Configuration(), new Path("data.parquet"));
GroupReadSupport readSupport = new GroupReadSupport();
ParquetReader<Group> parquetReader = new ParquetReader<>(
reader.getFileMetaData(),
new Path("data.parquet"),
readSupport);
Group result;
while ((result = parquetReader.read()) != null) {
System.out.println(result);
}
parquetReader.close();
}
}