In [ ]:
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
from scipy import stats
import numpy as np

I think that the code above imports different types of packages from various libraries.

In [14]:
plt.style.use('ggplot')

I think that this imports ggplot library and uses this style for all the plots.

In [16]:
OH = pd.read_csv('http://jsharpna.github.io/141B/data/OHvotes.csv')

OH.head()
Out[16]:
County Name Precinct Name Precinct Code Region Name Media Market Registered Voters Total Voters Turnout Percentage James Jerome Bell (WI)* Michael Bickelmeyer (WI)* ... Joseph Maldonado (WI)* Michael Andrew Maturen (WI)* Evan McMullin (WI)* Monica Moorehead (WI)* Joe Schriner (WI)* Mike Smith (WI)* Jill Stein (G) Josiah R. Stroh (WI)* Douglas W. Thomson (WI)* Donald J. Trump (R)
0 Adams BRATTON TOWNSHIP AAA Southwest Cincinnati 923 661 71.61% 0 0 ... 0 0 0 0 0 0 4 0 0 532
1 Adams BRUSH CREEK TOWNSHIP AAB Southwest Cincinnati 768 514 66.93% 0 0 ... 0 0 0 0 0 0 0 0 0 390
2 Adams LOCUST GROVE AAD Southwest Cincinnati 684 522 76.32% 0 0 ... 0 0 0 0 0 0 4 0 0 408
3 Adams GREEN TOWNSHIP AAE Southwest Cincinnati 409 259 63.33% 0 0 ... 0 0 0 0 0 0 1 0 0 176
4 Adams JEFFERSON TOWNSHIP AAG Southwest Cincinnati 537 351 65.36% 0 0 ... 0 0 0 0 0 0 2 0 0 258

5 rows × 31 columns

This reads the data and displays the first few rows of the data.

In [4]:
from locale import atof
import locale
locale.setlocale(locale.LC_NUMERIC, '')
badobj = [u'Registered Voters',u'Total Voters',u'Hillary Clinton (D)',u'Donald J. Trump (R)']
OH[badobj] = OH[badobj].applymap(lambda x: int(atof(x)))

candidates = [u'Hillary Clinton (D)',u'Gary Johnson',u'Jill Stein (G)',u'Donald J. Trump (R)']
OHred = OH[['County Name','Registered Voters','Total Voters']+candidates]
OHcounties = OHred.groupby('County Name').sum()

OHcounties.head()
Out[4]:
Registered Voters Total Voters Hillary Clinton (D) Gary Johnson Jill Stein (G) Donald J. Trump (R)
County Name
Adams 16945 11583 2326 226 47 8659
Allen 68054 46735 13294 1486 323 30487
Ashland 35452 25104 5740 906 185 17493
Ashtabula 60624 41797 15577 1213 427 23318
Athens 45418 30042 16370 1012 539 11354
In [5]:
DbeatsH = OHcounties['Donald J. Trump (R)'] > OHcounties['Hillary Clinton (D)']
d,h = OHcounties[DbeatsH]['Registered Voters'].mean(), OHcounties[~DbeatsH]['Registered Voters'].mean()
OHcounties['TMP'] = (OHcounties['Donald J. Trump (R)'] - OHcounties['Hillary Clinton (D)']) / OHcounties['Total Voters']
print 'Mean registered voters in Trumpland ' + str(d)
print 'Mean registered voters in Clintonland ' + str(h)
print 'Ratio (Clinton/Trump): ' + str(h/d)
Mean registered voters in Trumpland 55837.0625
Mean registered voters in Clintonland 424257.5
Ratio (Clinton/Trump): 7.59813430372
  • DbeatsH are the number of voters in counties where Trump beats Clinton
  • d,h are the means of the number of DbeatsH values where Trump or Clinton won respectively
  • The next line I think is the number of voters in each counties that vote for Trump - the number of voters in each counties that vote for Clinton divided by the total number of voters.
  • Finally it prints the registered voters and ratios
In [6]:
from mpl_toolkits.basemap import Basemap
cmap = plt.cm.seismic

I think this imports a map package?

In [7]:
m = Basemap(llcrnrlon=-85,llcrnrlat=38,urcrnrlon=-80,urcrnrlat=42,
            projection='lcc',lat_1=39,lat_2=41,lon_0=-82.5)

I think this projects a map of Ohio?

In [8]:
m.drawmapboundary()
m.drawcounties()
Out[8]:
<matplotlib.collections.PolyCollection at 0x1d13b518>

This draws the boundaries of the counties and divides them?

In [9]:
OHcnames = [(i,c['NAME']) for i,c in enumerate(m.counties_info) if c['STATE'] == 'OH']
In [10]:
ax = plt.gca()
In [11]:
for i,c in OHcnames:
    ccol = rgb2hex(cmap((OHcounties['TMP'].loc[c] + 1)/2)[:3])
    seg = m.counties[i]
    poly = Polygon(seg,facecolor=ccol,edgecolor=ccol)
    ax.add_patch(poly)
In [12]:
plt.show()

OH: Percent swing from Clinton (Blue) to Trump (Red)

I think this code first assigns colors to the boundaries and then plots the map on how voters voted Trump or Clinton.

In [26]:
smallcounties = OHcounties['Registered Voters'] < 50000
plt.hist(OHcounties[smallcounties]['TMP'],bins=20)
plt.show()
In [27]:
candperc = pd.DataFrame({c : OH[c] / OH[u'Total Voters'] for c in candidates})
candperc[['County Name','Precinct Name','Total Voters']] = OH[['County Name','Precinct Name','Total Voters']]
candperc = candperc.set_index(['County Name','Precinct Name'])
In [28]:
for c in candidates:
    plt.hist(candperc[c],alpha=.7,label=c,bins=50,range=(0,1),normed=0,weights=OH[c])
plt.legend(loc=2)
plt.show()
  1. First I think this makes a data frame of what percentage of voters voted for each candidate.
  2. Then I think colors are assigned to the voters and a plot is made with legends representing what color is which candidate/li>
In [29]:
c0, c1 = candidates[0], candidates[1]; plt.scatter(candperc[c0],candperc[c1],s = OH['Total Voters']/50)
plt.xlabel(c0)
plt.ylabel(c1)
plt.show()
In [30]:
c0, c1 = candidates[0], candidates[2]; plt.scatter(candperc[c0],candperc[c1],s = OH['Total Voters']/50)
plt.xlabel(c0)
plt.ylabel(c1)
plt.show()

This graphs the correlation between how voters voted for Clinton and how they vote for Stein.

In [31]:
c0, c1 = candidates[3], candidates[1]; plt.scatter(candperc[c0],candperc[c1],s = OH['Total Voters']/50)
plt.xlabel(c0)
plt.ylabel(c1)
plt.show()
In [32]:
c0, c1 = candidates[3], candidates[2]; plt.scatter(candperc[c0],candperc[c1],s = OH['Total Voters']/50)
plt.xlabel(c0)
plt.ylabel(c1)
plt.show()

This graphs the correlation between how voters voted for Trump and how they vote for Stein.

In [33]:
taus = pd.DataFrame([[stats.kendalltau(candperc[candperc['Total Voters'] > 100][c0],candperc[candperc['Total Voters'] > 100][c1])[0] for c0 in candidates] for c1 in candidates])
taus = taus.rename({i:candidates[i] for i in range(4)})
taus = taus.rename(columns = {i:candidates[i] for i in range(4)})
taus
Out[33]:
Hillary Clinton (D) Gary Johnson Jill Stein (G) Donald J. Trump (R)
Hillary Clinton (D) 1.000000 -0.117957 0.206878 -0.942552
Gary Johnson -0.117957 1.000000 0.099360 0.075659
Jill Stein (G) 0.206878 0.099360 1.000000 -0.227611
Donald J. Trump (R) -0.942552 0.075659 -0.227611 1.000000

I think this may be getting all the possible correlations between each candidates and putting them all into a matrix and displaying them.