import org.apache.hadoop.fs.Path;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.example.GroupReadSupport;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.schema.MessageType;
public class ParquetReaderExample {
public static void main(String[] args) {
Path filePath = new Path("path/to/parquet/file.parquet");
ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), filePath).build();
try {
Group row;
while ((row = reader.read()) != null) {
System.out.println(row);
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
try {
reader.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
}
import org.apache.hadoop.fs.Path;
import org.apache.parquet.example.data.simple.SimpleGroup;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
public class ParquetWriterExample {
public static void main(String[] args) {
MessageType schema = MessageTypeParser.parseMessageType("message Pair {
" +
" required int32 key;
" +
" optional binary value;
" +
"}");
Path filePath = new Path("path/to/parquet/file.parquet");
GroupWriteSupport writeSupport = new GroupWriteSupport();
writeSupport.setSchema(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(filePath, writeSupport);
try {
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
for (int i = 0; i < 10; i++) {
Group group = groupFactory.newGroup()
.append("key", i)
.append("value", "Value " + i);
writer.write(group);
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
try {
writer.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
}
Configuration conf = new Configuration();
conf.set("parquet.compression", "snappy");
ParquetWriter<Group> writer = new ParquetWriter<Group>(filePath, writeSupport, CompressionCodecName.SNAPPY);
import org.apache.parquet.filter2.predicate.FilterApi;
import org.apache.parquet.filter2.predicate.Operators;
import org.apache.parquet.filter2.predicate.Schemas;
import org.apache.parquet.schema.MessageType;
...
MessageType schema = MessageTypeParser.parseMessageType("message Pair {
" +
" required int32 key;
" +
" optional binary value;
" +
"}");
Operators.Column<Operators.BinaryColumn> column = FilterApi.binaryColumn("value", Schemas.optional(1));
Operators.Column<Operators.IntColumn> keyColumn = FilterApi.intColumn("key", Schemas.required(0));
Operators.BinaryColumn condition = column.equalTo("Value 1");
Operators.IntColumn keyCondition = keyColumn.equalTo(10);
Operators.And and = FilterApi.and(column.isNotNull(), condition);
Operators.And keyAnd = FilterApi.and(keyCondition);
ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), filePath)
.withFilter(and)
.build();
...