python
dpark = DparkContext('master_ip', 'master_port')
python
data = dpark.parallelize(data_list, numSlices)
python
cached_data = data.cache()
python
import zlib
compressed_data = data.map(lambda x: zlib.compress(x))
python
sliced_data = data.glom()
python
filtered_data = data.filter(lambda x: condition)
python
aggregated_data = data.reduceByKey(lambda x, y: x + y)
python
data.foreachPartition(lambda partition: process_data(partition))