import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
from scipy import stats
import numpy as np
I think that the code above imports different types of packages from various libraries.
plt.style.use('ggplot')
I think that this imports ggplot library and uses this style for all the plots.
OH = pd.read_csv('http://jsharpna.github.io/141B/data/OHvotes.csv')
OH.head()
This reads the data and displays the first few rows of the data.
from locale import atof
import locale
locale.setlocale(locale.LC_NUMERIC, '')
badobj = [u'Registered Voters',u'Total Voters',u'Hillary Clinton (D)',u'Donald J. Trump (R)']
OH[badobj] = OH[badobj].applymap(lambda x: int(atof(x)))
candidates = [u'Hillary Clinton (D)',u'Gary Johnson',u'Jill Stein (G)',u'Donald J. Trump (R)']
OHred = OH[['County Name','Registered Voters','Total Voters']+candidates]
OHcounties = OHred.groupby('County Name').sum()
OHcounties.head()
DbeatsH = OHcounties['Donald J. Trump (R)'] > OHcounties['Hillary Clinton (D)']
d,h = OHcounties[DbeatsH]['Registered Voters'].mean(), OHcounties[~DbeatsH]['Registered Voters'].mean()
OHcounties['TMP'] = (OHcounties['Donald J. Trump (R)'] - OHcounties['Hillary Clinton (D)']) / OHcounties['Total Voters']
print 'Mean registered voters in Trumpland ' + str(d)
print 'Mean registered voters in Clintonland ' + str(h)
print 'Ratio (Clinton/Trump): ' + str(h/d)
from mpl_toolkits.basemap import Basemap
cmap = plt.cm.seismic
I think this imports a map package?
m = Basemap(llcrnrlon=-85,llcrnrlat=38,urcrnrlon=-80,urcrnrlat=42,
projection='lcc',lat_1=39,lat_2=41,lon_0=-82.5)
I think this projects a map of Ohio?
m.drawmapboundary()
m.drawcounties()
This draws the boundaries of the counties and divides them?
OHcnames = [(i,c['NAME']) for i,c in enumerate(m.counties_info) if c['STATE'] == 'OH']
ax = plt.gca()
for i,c in OHcnames:
ccol = rgb2hex(cmap((OHcounties['TMP'].loc[c] + 1)/2)[:3])
seg = m.counties[i]
poly = Polygon(seg,facecolor=ccol,edgecolor=ccol)
ax.add_patch(poly)
plt.show()
OH: Percent swing from Clinton (Blue) to Trump (Red)
I think this code first assigns colors to the boundaries and then plots the map on how voters voted Trump or Clinton.
smallcounties = OHcounties['Registered Voters'] < 50000
plt.hist(OHcounties[smallcounties]['TMP'],bins=20)
plt.show()
candperc = pd.DataFrame({c : OH[c] / OH[u'Total Voters'] for c in candidates})
candperc[['County Name','Precinct Name','Total Voters']] = OH[['County Name','Precinct Name','Total Voters']]
candperc = candperc.set_index(['County Name','Precinct Name'])
for c in candidates:
plt.hist(candperc[c],alpha=.7,label=c,bins=50,range=(0,1),normed=0,weights=OH[c])
plt.legend(loc=2)
plt.show()
c0, c1 = candidates[0], candidates[1]; plt.scatter(candperc[c0],candperc[c1],s = OH['Total Voters']/50)
plt.xlabel(c0)
plt.ylabel(c1)
plt.show()
c0, c1 = candidates[0], candidates[2]; plt.scatter(candperc[c0],candperc[c1],s = OH['Total Voters']/50)
plt.xlabel(c0)
plt.ylabel(c1)
plt.show()
This graphs the correlation between how voters voted for Clinton and how they vote for Stein.
c0, c1 = candidates[3], candidates[1]; plt.scatter(candperc[c0],candperc[c1],s = OH['Total Voters']/50)
plt.xlabel(c0)
plt.ylabel(c1)
plt.show()
c0, c1 = candidates[3], candidates[2]; plt.scatter(candperc[c0],candperc[c1],s = OH['Total Voters']/50)
plt.xlabel(c0)
plt.ylabel(c1)
plt.show()
This graphs the correlation between how voters voted for Trump and how they vote for Stein.
taus = pd.DataFrame([[stats.kendalltau(candperc[candperc['Total Voters'] > 100][c0],candperc[candperc['Total Voters'] > 100][c1])[0] for c0 in candidates] for c1 in candidates])
taus = taus.rename({i:candidates[i] for i in range(4)})
taus = taus.rename(columns = {i:candidates[i] for i in range(4)})
taus
I think this may be getting all the possible correlations between each candidates and putting them all into a matrix and displaying them.