Launch MRJob from IPython Laptop

I am trying to run the mrjob example from an IPython laptop

from mrjob.job import MRJob


class MRWordFrequencyCount(MRJob):

def mapper(self, _, line):
    yield "chars", len(line)
    yield "words", len(line.split())
    yield "lines", 1

def reducer(self, key, values):
    yield key, sum(values)  

then run it with code

mr_job = MRWordFrequencyCount(args=["testfile.txt"])
with mr_job.make_runner() as runner:
    runner.run()
    for line in runner.stream_output():
        key, value = mr_job.parse_output_line(line)
        print key, value

and get the error:

TypeError: <module '__main__' (built-in)> is a built-in class

Is there a way to run mrjob from an IPython laptop?

+4
source share
2 answers

I suspect this is due to this limitation stated on the MRJob website:

The job class file is sent to Hadoop to run. Therefore, the job file cannot try to start Hadoop, or you will recursively create Hadoop work orders! The code that starts the work should only be out of the Hadoop context.

, ():

if __name__ == '__main__':  
  MRWordCounter.run()  # where MRWordCounter is your job class
+1

" ", , , - , %%file, :

%%file wordcount.py
from mrjob.job import MRJob

class MRWordFrequencyCount(MRJob):

    def mapper(self, _, line):
        yield "chars", len(line)
        yield "words", len(line.split())
        yield "lines", 1

    def reducer(self, key, values):
        yield key, sum(values)

mrjob :

import wordcount
reload(wordcount)

mr_job = wordcount.MRWordFrequencyCount(args=['example.txt'])
with mr_job.make_runner() as runner:
    runner.run()
    for line in runner.stream_output():
        key, value = mr_job.parse_output_line(line)
        print key, value

, wordcount.py MRWordFrequencyCount wordcount - . , Python , wordcount.py iPython , . reload() .

: https://groups.google.com/d/msg/mrjob/CfdAgcEaC-I/8XfJPXCjTvQJ

()
mrjob, .

! python mrjob.py shakespeare.txt

: http://jupyter.cs.brynmawr.edu/hub/dblank/public/Jupyter%20Magics.ipynb

+1

Source: https://habr.com/ru/post/1548082/


All Articles