Stopping a Tweepy stream after a duration parameter (# lines, seconds, #Tweets, etc.)

I use Tweepy to capture hashtag #WorldCup-based streaming tweets, as seen in the code below. It works as expected.

class StdOutListener(StreamListener):
  ''' Handles data received from the stream. '''

  def on_status(self, status):
      # Prints the text of the tweet
      print('Tweet text: ' + status.text)

      # There are many options in the status object,
      # hashtags can be very easily accessed.
      for hashtag in status.entries['hashtags']:
          print(hashtag['text'])

      return true

    def on_error(self, status_code):
        print('Got an error with status code: ' + str(status_code))
        return True # To continue listening

    def on_timeout(self):
        print('Timeout...')
        return True # To continue listening

if __name__ == '__main__':
   listener = StdOutListener()
   auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
   auth.set_access_token(access_token, access_token_secret)

   stream = Stream(auth, listener)
   stream.filter(follow=[38744894], track=['#WorldCup'])

Since this is a hot hashtag right now, the search does not take too long to catch the maximum number of tweets that Tweepy allows you to receive in one transaction. However, if I were going to search in #StackOverflow, this could be much slower, and therefore I would like to kill the stream. I could do this in several ways, such as stopping after 100 tweets, stopping after 3 minutes, after the text output file reaches 150 lines, etc. I know that socket timeout is not used to achieve this.

:

Tweepy Streaming - x

, , API. , , , .

- Tweepy ( ), , ?

+2
1

, -, .

Python stop (, 20 ). , getTweetsByHashtag().

#!/usr/bin/env python
from tweepy import (Stream, OAuthHandler)
from tweepy.streaming import StreamListener

class Listener(StreamListener):

    tweet_counter = 0 # Static variable

    def login(self):
        CONSUMER_KEY =
        CONSUMER_SECRET =
        ACCESS_TOKEN =
        ACCESS_TOKEN_SECRET =

        auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
        auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
        return auth

    def on_status(self, status):
        Listener.tweet_counter += 1
        print(str(Listener.tweet_counter) + '. Screen name = "%s" Tweet = "%s"'
              %(status.author.screen_name, status.text.replace('\n', ' ')))

        if Listener.tweet_counter < Listener.stop_at:
            return True
        else:
            print('Max num reached = ' + str(Listener.tweet_counter))
            return False

    def getTweetsByGPS(self, stop_at_number, latitude_start, longitude_start, latitude_finish, longitude_finish):
        try:
            Listener.stop_at = stop_at_number # Create static variable
            auth = self.login()
            streaming_api = Stream(auth, Listener(), timeout=60) # Socket timeout value
            streaming_api.filter(follow=None, locations=[latitude_start, longitude_start, latitude_finish, longitude_finish])
        except KeyboardInterrupt:
            print('Got keyboard interrupt')

    def getTweetsByHashtag(self, stop_at_number, hashtag):
        try:
            Listener.stopAt = stop_at_number
            auth = self.login()
            streaming_api = Stream(auth, Listener(), timeout=60)
            # Atlanta area.
            streaming_api.filter(track=[hashtag])
        except KeyboardInterrupt:
            print('Got keyboard interrupt')

listener = Listener()
listener.getTweetsByGPS(20, -84.395198, 33.746876, -84.385585, 33.841601) # Atlanta area.
+2

Source: https://habr.com/ru/post/1614290/


All Articles