Adam T. Bradley

Posts in category “new england”

PaperJS Sparklines

Yet another PaperJS experiment. This one generates Sparklines. It simply looks for canvases matching a specific CSS selector (canvas.sparkler by default), and draws a chart based on JSON-formatted data in the data-data attribute. Here, I'm displaying some unemployment data from Quandl.

The code is on Gist.

library(rjson)
library(Quandl)

Quandl.auth(getOption('quandl_auth_key'))

ct.unemp <- Quandl("FRBC/UNEMP_ST_CT", trim_start="1978-01-01", trim_end="2013-12-01", collapse="monthly")
ct.2yunemp <- rev(ct.unemp$Value[1:24])

me.unemp <- Quandl("FRBC/UNEMP_ST_ME", trim_start="1978-01-01", trim_end="2013-12-01", collapse="monthly")
me.2yunemp <- rev(me.unemp$Value[1:24])

ma.unemp <- Quandl("FRBC/UNEMP_ST_MA", trim_start="1978-01-01", trim_end="2013-12-01", collapse="monthly")
ma.2yunemp <- rev(ma.unemp$Value[1:24])

nh.unemp <- Quandl("FRBC/UNEMP_ST_NH", trim_start="1978-01-01", trim_end="2013-12-01", collapse="monthly")
nh.2yunemp <- rev(nh.unemp$Value[1:24])

ri.unemp <- Quandl("FRBC/UNEMP_ST_RI", trim_start="1978-01-01", trim_end="2013-12-01", collapse="monthly")
ri.2yunemp <- rev(ri.unemp$Value[1:24])

vt.unemp <- Quandl("FRBC/UNEMP_ST_VT", trim_start="1978-01-01", trim_end="2013-12-01", collapse="monthly")
vt.2yunemp <- rev(vt.unemp$Value[1:24])
New England Unemployment by State, 2011-12-31 to 2013-11-30
State Min/max 2011-12-31 2013-11-30
Connecticut 7.5%/8.5% 8.3% 7.5%
Maine 6.4%/7.4% 7.4% 6.4%
Massachusetts 6.7%/7.2% 6.9% 7.1%
New Hampshire 5.2%/5.6% 5.4% 5.2%
Rhode Island 9.4%/10.9% 10.9% 9.4%
Vermont 4.2%/5.1% 5.1% 4.3%

More mapping with R

One more map; I'm doing more experimenting with plotting data from multiple dataframes using ggplot2.

library(rjson)
library(RCurl)
library(ggplot2)
#The `whereIs` function polls [Google's geocoding API]
#(https://developers.google.com/maps/documentation
#/geocoding/) and returns the latitude and longitude of a
#place.
whereIs <- function(place) {
url = paste('http://maps.googleapis.com/maps/',
'api/geocode/json?sensor=false&address=',
curlEscape(place), sep='')
rs <- fromJSON(file=url)
c(rs$results[[1]]$geometry$location$lat,
rs$results[[1]]$geometry$location$lng)
}
new_england <- c('connecticut', 'rhode island',
'massachusetts', 'vermont',
'new hampshire', 'maine')
states <- map_data("state")
states.ne <- subset(states, region %in% new_england)
counties <- map_data('county')
counties.ne <- subset(counties, region %in% new_england)
capital <- c('Hartford', 'Providence',
'Boston', 'Montpelier',
'Concord', 'Augusta')
#Here we create a dataframe with info on state capitals,
#including location (from `whereIs()`) and population
#(copied manually from Wikipedia).
ne <- data.frame(capital, new_england,
stringsAsFactors=F)
geo <- sapply(do.call(paste, c(ne, sep=", ")), whereIs)
pop <- c(124775, 178042, 625087,
7855, 42695, 19136)
ne <- cbind(ne, pop, geo[1,], geo[2,])
rownames(ne) <- NULL
colnames(ne) <- c('capital','state','pop','lat','long')
map_theme <- theme(
line = element_blank(),
rect = element_blank(),
strip.text = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
legend.position="none")
#Initialize a new `ggplot` plot.
plt <- ggplot() + map_theme +
ggtitle("New England counties and capitals") +
scale_fill_brewer(palette="Dark2")
#Draw the counties, outlining each with white. Make each
#state a different color (`fill=region`).
plt <- plt +
geom_polygon(data=counties.ne,
aes(long,lat,
group=group,
fill=region)) +
geom_path(data=counties.ne,
aes(long,lat,group=group),
colour="white")
#Add heavier white lines around the states. I tried some
#other methods, like black dotted/dashed lines, but
#didn't like them as much.
plt <- plt +
geom_path(data=states.ne,
aes(long,lat,group=group),
colour="white",linetype='solid', lwd=1.8)
#Add black dots for capitals, sized based on population.
plt <- plt +
geom_point(data=ne, colour='black',
aes(long, lat,
size=pop))
#Output the plot.
plt

plot of chunk unnamed-chunk-1

Mapping Colleges with R

Two things going on here. First I scrape some websites (an article on About.com listing top colleges in New England and the Wikipedia articles for those schools) using YQL. I’m just grabbing geographical information here, though I could grab more details (enrollment, year of founding, etc.) if I wanted them.

library(rjson)
library(RCurl)
library(xtable)
library(reshape)
queryYQL <- function(query) {
url <- paste("http://query.yahooapis.com/v1/public/yql?q=",
curlEscape(query), "&format=json&callback=", sep='')
resp <- fromJSON(file=url)
resp$query$results
}
getLoc <- function(name) {
#remove the "(RISD)" after RISD's full name.
url <- sub('\s\(.+\)', '', name)
url <- gsub(' ', '_', url)
url <- paste('http://en.wikipedia.org/wiki/', url, sep='')
qry <- paste("SELECT * FROM html WHERE url='",
url, "' AND xpath='//*[@class="geo"]'",
sep='')
loc <- queryYQL(qry)
loc <- ifelse(
is.null(loc), "",
ifelse(
is.null(names(loc$span)),
loc$span[[1]]$content,
loc$span$content))
loc
}
colLists = c('http://collegeapps.about.com/od/collegerankings/tp/Top-New-England-Colleges-And-Universities.htm',
'http://collegeapps.about.com/od/collegerankings/tp/Top-New-England-Colleges-And-Universities.01.htm',
'http://collegeapps.about.com/od/collegerankings/tp/Top-New-England-Colleges-And-Universities.02.htm')
names = c()
#I always feel like I've failed when I use a for loop in R.
for ( x in colLists ) {
qry <- paste("SELECT * FROM html WHERE url='", x,
"' AND xpath='//h3[@class="dsc"]/a'", sep='')
newcols <- queryYQL(qry)
newcols <- unlist(newcols$a)
newcols <- newcols[names(newcols)=='content']
names(newcols) <- NULL
#Never do this. Except this once when it only happens three times.
names <- c(names, newcols)
}
locs <- sapply(names, getLoc)
names = names[locs!='']
locs = locs[locs!='']
colleges <- data.frame(names, colsplit(locs, '; ', c('lat', 'long')))

Here’s my data, now that I have a data frame of top New England colleges with geographic coordinates. We lose Trinity College, since (unsurprisingly) http://en.wikipedia.org/wiki/Trinity_college is a disambiguation page, and not a description of the one in Connecticut.

print(xtable(colleges), type="html", include.rownames=F)
names lat long
Amherst College 42.37  -72.52 
Babson College 42.30  -71.26 
Bates College 44.11  -70.20 
Bentley University 42.39  -71.22 
Boston College 42.34  -71.17 
Bowdoin College 43.91  -69.96 
Brandeis University 42.37  -71.26 
Brown University 41.83  -71.40 
Coast Guard Academy 41.37  -72.10 
Colby College 44.56  -69.66 
Connecticut College 41.38  -72.10 
Dartmouth College 43.70  -72.29 
Harvard University 42.37  -71.12 
Holy Cross, College of the 42.24  -71.81 
Massachusetts Institute of Technology 42.36  -71.09 
Middlebury College 44.01  -73.18 
Olin College of Engineering 42.29  -71.26 
Rhode Island School of Design (RISD) 41.83  -71.41 
Smith College 42.32  -72.64 
Tufts University 42.41  -71.12 
Wellesley College 42.30  -71.31 
Wesleyan University 41.56  -72.66 
Williams College 42.71  -73.20 
Yale University 41.31  -72.93 

Finally, I plot the data, using code heavily borrowed from here. Positioning of the names could use some work, but this is good enough for now.

library(maps)
library(ggplot2)
all_states <- map_data("state")
new_england <- subset(all_states, region %in%
c('connecticut', 'rhode island',
'massachusetts', 'vermont',
'new hampshire', 'maine'))
map_theme <- theme(
line = element_blank(),
rect = element_blank(),
strip.text = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
legend.position="none")
plt <- ggplot() +
map_theme +
ggtitle("Some colleges in New England") +
geom_polygon(data=new_england,
aes(long,lat,group=group)) +
geom_path(data=new_england,
aes(long,lat,group=group),
color="white")+
geom_point(data=colleges,
aes(long, lat, color=names)) +
geom_text(data=colleges,
hjust=-0.07, vjust=0.4,
aes(x=long, y=lat, label=names,
colour=names),
size=3)
plt

plot of chunk college-display

RI Unemployment by County, 2012

I’m taking a break from maps to figure out some of the options for basic plots.

Below, I’m taking Rhode Island employment figures I lifted from the state Department of Labor and Training’s site and plotting unemployment by county for 2012.

library(RColorBrewer)
riemp <- read.csv('../ri-unemployment2012.csv')
riemp$pct.unemp <-
round((1 - (riemp$employed
/riemp$labor.force)) * 100, 2)
month <- as.Date(
paste(riemp$month, '01', sep="/"))
riemp$month <- NULL
riemp$monthnum <- format.Date(month, '%m')
riemp$monthname <- format.Date(month, '%B')
ri = riemp[riemp$region=='RHODE ISLAND',]
ri
##          region labor.force employed pct.unemp monthnum monthname
## 1  RHODE ISLAND      556461   491097     11.75       01   January
## 2  RHODE ISLAND      556081   488879     12.08       02  February
## 3  RHODE ISLAND      553837   488551     11.79       03     March
## 4  RHODE ISLAND      549176   488371     11.07       04     April
## 5  RHODE ISLAND      552331   494690     10.44       05       May
## 6  RHODE ISLAND      555397   497872     10.36       06      June
## 7  RHODE ISLAND      562854   499845     11.19       07      July
## 8  RHODE ISLAND      561453   502163     10.56       08    August
## 9  RHODE ISLAND      562846   507689      9.80       09 September
## 10 RHODE ISLAND      569150   512693      9.92       10   October
## 11 RHODE ISLAND      566423   510040      9.95       11  November

riemp.by.county <- riemp[grep('COUNTY', riemp$region),]
riemp.by.county$region <- sub(' COUNTY', '', riemp.by.county$region)
riemp.by.county
##         region labor.force employed pct.unemp monthnum monthname
## 474    BRISTOL       26600    23949      9.97       01   January
## 475    BRISTOL       26549    23846     10.18       02  February
## 476    BRISTOL       26509    23833     10.09       03     March
## 477    BRISTOL       26245    23825      9.22       04     April
## 478    BRISTOL       26415    24117      8.70       05       May
## 479    BRISTOL       26438    24242      8.31       06      June
## 480    BRISTOL       26817    24308      9.36       07      July
## 481    BRISTOL       26830    24425      8.96       08    August
## 482    BRISTOL       26972    24725      8.33       09 September
## 483    BRISTOL       27390    24997      8.74       10   October
## 484    BRISTOL       27337    24880      8.99       11  November
## 485       KENT       93686    83388     10.99       01   January
## 486       KENT       93712    83031     11.40       02  February
## 487       KENT       93293    82981     11.05       03     March
## 488       KENT       92792    82957     10.60       04     April
## 489       KENT       93310    83972     10.01       05       May
## 490       KENT       93531    84406      9.76       06      June
## 491       KENT       94677    84640     10.60       07      July
## 492       KENT       94612    85044     10.11       08    August
## 493       KENT       95099    86091      9.47       09 September
## 494       KENT       96069    87037      9.40       10   October
## 495       KENT       95544    86631      9.33       11  November
## 496    NEWPORT       45260    39530     12.66       01   January
## 497    NEWPORT       44985    39361     12.50       02  February
## 498    NEWPORT       44470    39337     11.54       03     March
## 499    NEWPORT       43636    39324      9.88       04     April
## 500    NEWPORT       43444    39807      8.37       05       May
## 501    NEWPORT       43511    40013      8.04       06      June
## 502    NEWPORT       44056    40123      8.93       07      July
## 503    NEWPORT       44157    40314      8.70       08    August
## 504    NEWPORT       44228    40811      7.73       09 September
## 505    NEWPORT       44970    41260      8.25       10   October
## 506    NEWPORT       45123    41067      8.99       11  November
## 507 PROVIDENCE      319215   280259     12.20       01   January
## 508 PROVIDENCE      319290   279056     12.60       02  February
## 509 PROVIDENCE      318438   278889     12.42       03     March
## 510 PROVIDENCE      316306   278808     11.85       04     April
## 511 PROVIDENCE      318573   282224     11.41       05       May
## 512 PROVIDENCE      320507   283684     11.49       06      June
## 513 PROVIDENCE      324426   284463     12.32       07      July
## 514 PROVIDENCE      323062   285822     11.53       08    August
## 515 PROVIDENCE      324362   289340     10.80       09 September
## 516 PROVIDENCE      327944   292521     10.80       10   October
## 517 PROVIDENCE      325984   291155     10.68       11  November
## 518 WASHINGTON       71702    63972     10.78       01   January
## 519 WASHINGTON       71549    63588     11.13       02  February
## 520 WASHINGTON       71127    63511     10.71       03     March
## 521 WASHINGTON       70198    63457      9.60       04     April
## 522 WASHINGTON       70590    64570      8.53       05       May
## 523 WASHINGTON       71408    65526      8.24       06      June
## 524 WASHINGTON       72880    66311      9.01       07      July
## 525 WASHINGTON       72792    66558      8.56       08    August
## 526 WASHINGTON       72187    66725      7.57       09 September
## 527 WASHINGTON       72781    66876      8.11       10   October
## 528 WASHINGTON       72435    66309      8.46       11  November

counties = as.character(unique(riemp.by.county$region))
counties
## [1] "BRISTOL"    "KENT"       "NEWPORT"    "PROVIDENCE" "WASHINGTON"
cols = brewer.pal(length(counties), 'Set1')
cols
## [1] "#E41A1C" "#377EB8" "#4DAF4A" "#984EA3" "#FF7F00"

cnt = riemp.by.county[riemp.by.county$region==counties[1],]
plot(ri$monthnum, ri$pct.unemp, type='l',
main="Rhode Island Unemployment, 2012, by County",
xlab="Month", xaxt="n", ylab="% Unemployed",
ylim=c(6,15), col='black', lwd=2)
axis(1, at=1:11, labels=cnt$monthname, las=2)
for ( i in 1:length(counties) ){
cnt = riemp.by.county[riemp.by.county$region==counties[i],]
lines(cnt$monthnum, cnt$pct.unemp, col=cols[i])
}
legend('topright', legend=append(counties, 'RHODE ISLAND', 0),
fill=append(cols, 'black', 0))