python - Tweepy not working -
so trying run tweepy script collect tweets. i've setup database i'm running error.
starting... started user: user1 exception in thread thread-1: traceback (most recent call last): file "/library/frameworks/python.framework/versions/2.7/lib/python2.7/threading.py", line 810, in __bootstrap_inner self.run() file "/library/frameworks/python.framework/versions/2.7/lib/python2.7/threading.py", line 763, in run self.__target(*self.__args, **self.__kwargs) file "build/bdist.macosx-10.6-intel/egg/tweepy/streaming.py", line 414, in filter self.body['follow'] = u','.join(follow).encode(encoding) typeerror: sequence item 0: expected string or unicode, int found
edit: script using: urllib import urlencode_noplus _noplus not in urllib why deleted code. although suspect causing error..
import tweepy import threading import logging tweepy.models import status tweepy.utils import import_simplejson urllib import urlencode import json import re json = import_simplejson() class stream: def __init__(self, consumer_key, consumer_secret, key, secret, name): self.auth = tweepy.oauthhandler(consumer_key, consumer_secret) self.auth.set_access_token(key, secret) self.tweetsbuffer = tweetsbuffer() self.name = name self.logger = logging.getlogger('twittercollector') #check credentials if not tweepy.api(self.auth).verify_credentials(): print "invalid credentials user: ",self.name,".\nexiting..." logging.error("invalid credentials user: "+self.name+".\nexiting...") exit(0) def run(self, users_list = none): sl = streamlistener() sl.init(self.tweetsbuffer) try: streamer = tweepy.stream(auth=self.auth, listener=sl, timeout=3000000000, include_entities=1, include_rts=1) #load friends filter = [] if users_list none: filter = tweepy.api(self.auth).friends_ids() else: sublist in users_list: user in sublist['users']: filter.append(user.id) #remove duplicates filter = list(set(filter)) sthread = threading.thread(target=streamer.filter, args=(filter,)) sthread.start() return sthread except exception, e: print e def gettweetsbuffer(self): return self.tweetsbuffer def getuserlist(self, lists): if lists none: return none api = tweepy.api(self.auth) users_list = [] list in lists: users = [] members = tweepy.cursor( api.list_members, list['owner'], list['slug'] ).items() member in members: users.append(member) users_list.append( { 'owner' : list['owner'], 'slug' : list['slug'], 'users' : users }) return users_list class streamlistener(tweepy.streamlistener): def init(self, tweetsbuffer): #set buffer self.tweetsbuffer = tweetsbuffer def parse_status(self, status, retweet = false): tweet = { 'tweet_id':status.id, 'tweet_text':status.text, 'created_at':status.created_at, 'geo_lat':status.coordinates['coordinates'][0] if not status.coordinates none else 0, 'geo_long': status.coordinates['coordinates'][1] if not status.coordinates none else 0, 'user_id':status.user.id, 'tweet_url':"http://twitter.com/"+status.user.id_str+"/status/"+status.id_str, 'retweet_count':status.retweet_count, 'original_tweet_id':status.retweeted_status.id if not retweet , (status.retweet_count > 0) else 0, 'urls': status.entities['urls'], 'hashtags':status.entities['hashtags'], 'mentions': status.entities['user_mentions'] } #parse user object user = { 'user_id':status.user.id, 'screen_name': status.user.screen_name, 'name': status.user.name, 'followers_count': status.user.followers_count, 'friends_count': status.user.friends_count, 'description': status.user.description if not status.user.description none else "n/a", 'image_url': status.user.profile_image_url, 'location': status.user.location if not status.user.location none else "n/a", 'created_at': status.user.created_at } return {'tweet':tweet, 'user':user} def on_data(self, data): if 'in_reply_to_status_id' in data: status = status.parse(self.api, json.loads(data)) if self.on_status(status, data) false: return false elif 'delete' in data: delete = json.loads(data)['delete']['status'] if self.on_delete(delete['id'], delete['user_id']) false: return false elif 'limit' in data: if self.on_limit(json.loads(data)['limit']['track']) false: return false def on_status(self, status, rawjsondata): try: #parse tweet tweet = self.parse_status(status) tweet['raw_json'] = rawjsondata self.tweetsbuffer.insert(tweet) #parse retweet if tweet['tweet']['retweet_count'] > 0: retweet = self.parse_status(status.retweeted_status, true) retweet['raw_json'] = none self.tweetsbuffer.insert(retweet) except exception: # catch unicode errors while printing console # , ignore them avoid breaking application. pass class tweetsbuffer(): tweetsbuffer = [] def __init__(self): self.lock = threading.lock() def insert(self, tweet): self.lock.acquire() self.tweetsbuffer.append(tweet) self.lock.release() def pop(self): self.lock.acquire() tweet = self.tweetsbuffer.pop() if len(self.tweetsbuffer) > 0 else none self.lock.release() return tweet
Comments
Post a Comment