import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetReader; import org.apache.parquet.hadoop.example.GroupReadSupport; import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroupFactory; import org.apache.parquet.schema.MessageType; public class ParquetReaderExample { public static void main(String[] args) { Path filePath = new Path("path/to/parquet/file.parquet"); ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), filePath).build(); try { Group row; while ((row = reader.read()) != null) { System.out.println(row); } } catch (Exception ex) { ex.printStackTrace(); } finally { try { reader.close(); } catch (Exception ex) { ex.printStackTrace(); } } } } import org.apache.hadoop.fs.Path; import org.apache.parquet.example.data.simple.SimpleGroup; import org.apache.parquet.example.data.simple.SimpleGroupFactory; import org.apache.parquet.example.data.Group; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.example.GroupWriteSupport; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; public class ParquetWriterExample { public static void main(String[] args) { MessageType schema = MessageTypeParser.parseMessageType("message Pair { " + " required int32 key; " + " optional binary value; " + "}"); Path filePath = new Path("path/to/parquet/file.parquet"); GroupWriteSupport writeSupport = new GroupWriteSupport(); writeSupport.setSchema(schema); ParquetWriter<Group> writer = new ParquetWriter<Group>(filePath, writeSupport); try { SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema); for (int i = 0; i < 10; i++) { Group group = groupFactory.newGroup() .append("key", i) .append("value", "Value " + i); writer.write(group); } } catch (Exception ex) { ex.printStackTrace(); } finally { try { writer.close(); } catch (Exception ex) { ex.printStackTrace(); } } } } Configuration conf = new Configuration(); conf.set("parquet.compression", "snappy"); ParquetWriter<Group> writer = new ParquetWriter<Group>(filePath, writeSupport, CompressionCodecName.SNAPPY); import org.apache.parquet.filter2.predicate.FilterApi; import org.apache.parquet.filter2.predicate.Operators; import org.apache.parquet.filter2.predicate.Schemas; import org.apache.parquet.schema.MessageType; ... MessageType schema = MessageTypeParser.parseMessageType("message Pair { " + " required int32 key; " + " optional binary value; " + "}"); Operators.Column<Operators.BinaryColumn> column = FilterApi.binaryColumn("value", Schemas.optional(1)); Operators.Column<Operators.IntColumn> keyColumn = FilterApi.intColumn("key", Schemas.required(0)); Operators.BinaryColumn condition = column.equalTo("Value 1"); Operators.IntColumn keyCondition = keyColumn.equalTo(10); Operators.And and = FilterApi.and(column.isNotNull(), condition); Operators.And keyAnd = FilterApi.and(keyCondition); ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), filePath) .withFilter(and) .build(); ...


上一篇:
下一篇:
切换中文