>>> with open('workfile') as f:
... read_data = f.read()
>>> # We can check that the file has been automatically closed.
>>> f.closed
True
from pyspark import SparkContext
sc = SparkContext.getOrCreate()
data1 = sc.parallelize([[ 'red blue hi you red' ],
[ 'ball green ball go four ball'],
[ 'nice red start nice' ],
[ 'ball no kill tree go go' ]])
data2 = data1.flatMap(lambda x: [
(x_i,1) for xx in x for x_i in xx.split(' ')])
data3 = data2.reduceByKey(lambda x, y: x + y)
print(data3.collect())