First, create a sub-directory (anywhere on your system, even your tmp
directory).
Then create a file named twit_utils.py
and copy the code below. Make sure to have DEFAULT_TWITTER_CREDS_PATH
point to whereever you have a creds file (which you should have gotten from this lesson: Twitter App Authentication Process)
from datetime import datetime
import time
import tweepy
import os
import json
DEFAULT_TWITTER_CREDS_PATH = '~/.creds/me.json'
def get_api(credsfile = DEFAULT_TWITTER_CREDS_PATH):
"""
Takes care of the Twitter OAuth authentication process and
creates an API-handler to execute commands on Twitter
Arguments:
- credsfile (str): the full path of the filename that contains a JSON
file with credentials for Twitter
Returns:
A tweepy.api.API object
"""
fn = os.path.expanduser(credsfile) # get the full path in case the ~ is used
c = json.load(open(fn))
# Get authentication token
auth = tweepy.OAuthHandler(consumer_key = c['consumer_key'],
consumer_secret = c['consumer_secret'])
auth.set_access_token(c['access_token'], c['access_token_secret'])
# create an API handler
return tweepy.API(auth)
def convert_twitter_timestamp(t):
"""
t is something like 'Sat Jan 30 03:36:19 +0000 2010'
return: a datetime object
"""
return datetime.fromtimestamp(time.mktime(time.strptime(t, '%a %b %d %H:%M:%S +0000 %Y')))
def get_user_recent_tweets(screen_name):
options = {}
options['count'] = 200
options['since_id'] = 1
options['trim_user'] = True
options['exclude_replies'] = False
options['include_rts'] = True
api = get_api()
tweets = api.user_timeline(**options)
return [t._json for t in tweets]
def get_user_profile(screen_name):
api = get_api()
users = api.lookup_users(screen_names = [screen_name])
# lookup_users always returns array
profile = users[0]
return profile._json
def get_user_followers_sample(screen_name):
api = get_api()
ids = api.followers_ids(screen_name, count = 5000)
users = api.lookup_users(user_ids = ids[-101:-1])
return [user._json for user in users]
def get_user(screen_name):
"""
A convenience method
Returns a dictionary:
{
'profile': the result of get_user_profile(screen_name),
'tweets': the result of get_user_recent_tweets(screen_name),
'followers': the result of get_user_followers_sample(screen_name)
}
"""
api = get_api()
user = {}
user['profile'] = get_user_profile(screen_name)
user['tweets'] = get_user_recent_tweets(screen_name)
user['followers'] = get_user_followers_sample(screen_name)
return user
File found at: /files/code/botdetect/twit_utils.py
In your Terminal, go to that directory where you saved twit_utils.py
to, and get into iPython.
Once in iPython, you should be able to do this:
import twit_utils.py
userdict = twit_utils.get_user("stanford")
# calculate followers-to-friends(i.e. others-followed) ratio:
print(userdata['profile']['followers_count'] / userdata['profile']['friends_count'] )
If you're having trouble getting that data through Twitter, you can download it from my cached version:
import requests
users = {}
baseurl = "http://stash.compjour.org/data/twitter/botdetect/"
snames = ['shaq', 'stanford', 'taylorswift13']
for name in snames:
users[name] = requests.get(baseurl + name + '.json').json()
Some sample methods for testing the authenticity of the profile (see example JSON here):
import re
import time
from datetime import datetime
from twit_utils import convert_twitter_timestamp
def score(profile):
foo_list = [test_days_old, test_followers_per_day, test_follower_friend_ratio, test_verification, test_numbers_in_name, test_uncommon_letters_in_screenname]
total = 0
for f in foo_list:
total += f(profile)
return total
def account_age_in_days(profile):
xd = convert_twitter_timestamp(profile['created_at'])
days_old = (datetime.now() - xd).days
return days_old
def test_days_old(profile):
if account_age_in_days(profile) < 21:
return -2
else:
return 0
def test_followers_per_day(profile):
followers_per_day = profile['followers_count'] / account_age_in_days(profile)
if followers_per_day > 10:
return 2
elif followers_per_day > 2:
return 1
else:
return 0
def test_follower_friend_ratio(profile):
ratio = profile['followers_count'] / profile['friends_count']
if profile['followers_count'] < 10:
return 0
elif ratio > 10:
return 3
elif ratio > 3:
return 2
elif ratio > 1.01:
return 1
elif ratio < 0.6:
return -1
elif ratio < 0.1:
return -2
else:
return 0
def test_verification(profile):
if(profile['verified'] == True):
return 10
else:
return 0
def test_listed_ratio(profile):
if(profile['followers_count'] > 500):
if(profile['listed_count'] > 1):
return 1
else:
return -1
return 0
def test_numbers_in_name(profile):
matches = re.findall('\d', profile['screen_name'])
if len(matches) > 4:
return -2
elif len(matches) >= 3:
return -1
else:
return 0
def test_uncommon_letters_in_screenname(profile):
matches = re.findall('xzqyv', profile['screen_name'])
if len(matches) > 5:
return -1
else:
return 0
File found at: /files/code/botdetect/profile_tests.py
Hillary Clinton's social media account bore some scrutiny (as did Sen. John McCain before her, and many others).
Take a look at her followers page (currently listing 3.4M+ users). Glance across some of the profiles. Then check a more-controlled/curated list of Twitter users: the several hundred users that Twitter-celeb Ashton Kutcher has chosen to follow.