python dpark = DparkContext('master_ip', 'master_port') python data = dpark.parallelize(data_list, numSlices) python cached_data = data.cache() python import zlib compressed_data = data.map(lambda x: zlib.compress(x)) python sliced_data = data.glom() python filtered_data = data.filter(lambda x: condition) python aggregated_data = data.reduceByKey(lambda x, y: x + y) python data.foreachPartition(lambda partition: process_data(partition))


上一篇:
下一篇:
切换中文