Simple Metrics for Detecting Twitter Bots

First, create a sub-directory (anywhere on your system, even your tmp directory).

Then create a file named twit_utils.py and copy the code below. Make sure to have DEFAULT_TWITTER_CREDS_PATH point to whereever you have a creds file (which you should have gotten from this lesson: Twitter App Authentication Process)

from datetime import datetime
import time
import tweepy
import os
import json

DEFAULT_TWITTER_CREDS_PATH = '~/.creds/me.json'

def get_api(credsfile = DEFAULT_TWITTER_CREDS_PATH):
    """
    Takes care of the Twitter OAuth authentication process and
    creates an API-handler to execute commands on Twitter

    Arguments:
      - credsfile (str): the full path of the filename that contains a JSON
        file with credentials for Twitter

    Returns:
      A tweepy.api.API object

    """
    fn = os.path.expanduser(credsfile)  # get the full path in case the ~ is used
    c = json.load(open(fn))
    # Get authentication token
    auth = tweepy.OAuthHandler(consumer_key = c['consumer_key'],
                               consumer_secret = c['consumer_secret'])
    auth.set_access_token(c['access_token'], c['access_token_secret'])
    # create an API handler
    return tweepy.API(auth)




def convert_twitter_timestamp(t):
    """
    t is something like 'Sat Jan 30 03:36:19 +0000 2010'
    return: a datetime object
    """

    return datetime.fromtimestamp(time.mktime(time.strptime(t, '%a %b %d %H:%M:%S +0000 %Y')))


def get_user_recent_tweets(screen_name):
    options = {}
    options['count'] = 200
    options['since_id'] = 1
    options['trim_user'] = True
    options['exclude_replies'] = False
    options['include_rts'] = True
    api = get_api()
    tweets = api.user_timeline(**options)
    return [t._json for t in tweets]



def get_user_profile(screen_name):
    api = get_api()
    users = api.lookup_users(screen_names = [screen_name])
    # lookup_users always returns array
    profile = users[0]
    return profile._json


def get_user_followers_sample(screen_name):
    api = get_api()
    ids = api.followers_ids(screen_name, count = 5000)
    users = api.lookup_users(user_ids = ids[-101:-1])

    return [user._json for user in users]



def get_user(screen_name):
    """
    A convenience method
    Returns a dictionary:
    {
        'profile': the result of get_user_profile(screen_name),
        'tweets': the result of get_user_recent_tweets(screen_name),
        'followers': the result of get_user_followers_sample(screen_name)
    }
    """
    api = get_api()
    user = {}
    user['profile'] = get_user_profile(screen_name)
    user['tweets'] = get_user_recent_tweets(screen_name)
    user['followers'] = get_user_followers_sample(screen_name)

    return user
        
File found at: /files/code/botdetect/twit_utils.py

In your Terminal, go to that directory where you saved twit_utils.py to, and get into iPython.

Once in iPython, you should be able to do this:

import twit_utils.py

userdict = twit_utils.get_user("stanford")
# calculate followers-to-friends(i.e. others-followed) ratio:
print(userdata['profile']['followers_count'] / userdata['profile']['friends_count'] )

If you're having trouble getting that data through Twitter, you can download it from my cached version:

import requests
users = {}
baseurl = "http://stash.compjour.org/data/twitter/botdetect/"
snames = ['shaq', 'stanford', 'taylorswift13']
for name in snames:
    users[name] = requests.get(baseurl + name + '.json').json()


Some sample methods for testing the authenticity of the profile (see example JSON here):

import re
import time
from datetime import datetime
from twit_utils import convert_twitter_timestamp

def score(profile):
    foo_list = [test_days_old, test_followers_per_day, test_follower_friend_ratio, test_verification, test_numbers_in_name, test_uncommon_letters_in_screenname]
    total = 0
    for f in foo_list:
        total += f(profile)

    return total



def account_age_in_days(profile):
    xd = convert_twitter_timestamp(profile['created_at'])
    days_old = (datetime.now() - xd).days
    return days_old


def test_days_old(profile):
    if account_age_in_days(profile) < 21:
        return -2
    else:
        return 0


def test_followers_per_day(profile):
    followers_per_day = profile['followers_count'] / account_age_in_days(profile)

    if followers_per_day > 10:
        return 2
    elif followers_per_day > 2:
        return 1
    else:
        return 0


def test_follower_friend_ratio(profile):
    ratio = profile['followers_count'] / profile['friends_count']
    if profile['followers_count'] < 10:
        return 0
    elif ratio > 10:
        return 3
    elif ratio > 3:
        return 2
    elif ratio > 1.01:
        return 1
    elif ratio < 0.6:
        return -1
    elif ratio < 0.1:
        return -2
    else:
        return 0


def test_verification(profile):
    if(profile['verified'] == True):
        return 10
    else:
        return 0

def test_listed_ratio(profile):
    if(profile['followers_count'] > 500):
        if(profile['listed_count'] > 1):
            return 1
        else:
            return -1

    return 0


def test_numbers_in_name(profile):
    matches = re.findall('\d', profile['screen_name'])
    if len(matches) > 4:
        return -2
    elif len(matches) >= 3:
        return -1
    else:
        return 0


def test_uncommon_letters_in_screenname(profile):
    matches = re.findall('xzqyv', profile['screen_name'])
    if len(matches) > 5:
        return -1
    else:
        return 0
        
File found at: /files/code/botdetect/profile_tests.py

Hillary Clinton's social media account bore some scrutiny (as did Sen. John McCain before her, and many others).

Take a look at her followers page (currently listing 3.4M+ users). Glance across some of the profiles. Then check a more-controlled/curated list of Twitter users: the several hundred users that Twitter-celeb Ashton Kutcher has chosen to follow.