Workshop designed by Emily Molfe
Good Cheat Sheet: http://www.maths.lancs.ac.uk/~rowlings/Teaching/UseR2012/cheatsheet.html
Source for some exercises: https://rpubs.com/cengel248/59418
Load Libraries
library(maps) # For creating geographical maps
library(mapdata) # Contains basic data for ’maps’
library(maptools) # Tools for handling spatial objects
library(mapproj) # For creating projected maps
library(raster) # Tools to deal with raster maps
library(ggplot2) # To create maps
library(gpclib) # general polygon clipper
library(RColorBrewer) # Color packes
library(classInt) # Choose univariate class intervals
library(acs) # Package to handle ACS data
library(choroplethr) # Simplify the creation of Choropleth Maps in R
library(rgdal) # Bindings for the Geospatial Data Abstraction
library(sp) # Classes and mehods for spatial data
library(hexbin) # Hexagonal Binning Routines
library(ggmap) # Google maps and OpenStreetMap
library(XML) # Tools for parsing and generating XL
library(dplyr) # Data Manipulation
library(rgeos)
Example 1: Arlington
Load data
Single Point in Arlington
# Example 1: Arlington
# -----------------------------------------------------------------------
# Load Data ---------------------------------------------------------------
# County shapfiles come from: http://gisdata.arlgis.opendata.arcgis.com/
# Arlington County Border loads shape file
Ar <- readShapePoly("Data/County Line/Arlington_Boundaries_and_Facilities.shp")
# projects the data make sure that all your objects are on the same
# projection
proj4string(Ar) <- CRS("+proj=longlat +datum=NAD83")
# Roads
Roads <- readShapeLines("Data/Roads/Roads.shp")
# VT Building (plotting 1 point)
VT <- as.data.frame(cbind(-77.116258, 38.88143))
# Change col names to pinpoint x and y
colnames(VT) <- c("x", "y")
# Make into coordinates
coordinates(VT) <- ~x + y
# Project the VT data
proj4string(VT) <- CRS("+proj=longlat +datum=NAD83")
Get census block group
# Census Blocks read the projection into the polygon in 1 step
geodat <- readShapePoly("Data/Block Group/tl_2014_51_bg.shp", proj4string = CRS("+proj=longlat +datum=NAD83"))
Plot Virginia
# Plot to check
plot(geodat)
# Get Block Groups for Arlington County Only
geodat <- geodat[which(geodat$COUNTYFP == "013"), ]
# Order block groups to help data merging
geodat <- geodat[order(geodat$GEOID), ]
Plot Arlington
# Plot to check
plot(geodat)
ACS Data
# Load ACS Data
myacs <- read.acs("Data/ACS SNAP/ACS_13_5YR_B22010_with_ann.csv", geocols = 3:1,
endyear = 2013)
# See column names
myacs@acs.colnames
[1] "HD01_VD01.Estimate; Total:"
[2] "HD01_VD02.Estimate; Household received Food Stamps/SNAP in the past 12 months:"
[3] "HD01_VD03.Estimate; Household received Food Stamps/SNAP in the past 12 months: - Households with 1 or more persons with a disability"
[4] "HD01_VD04.Estimate; Household received Food Stamps/SNAP in the past 12 months: - Households with no persons with a disability"
[5] "HD01_VD05.Estimate; Household did not receive Food Stamps/SNAP in the past 12 months:"
[6] "HD01_VD06.Estimate; Household did not receive Food Stamps/SNAP in the past 12 months: - Households with 1 or more persons with a disability"
[7] "HD01_VD07.Estimate; Household did not receive Food Stamps/SNAP in the past 12 months: - Households with no persons with a disability"
# Pull out columns of interest
snapt <- myacs@estimate[, 1] # Total
snap <- myacs@estimate[, 2] # Household received Food Stamps/SNAP in the past 12 months
# Extracts attribute data from shape file
mdat <- geodat@data
# Binds ACS data to attributes (previous ordering makes this easy)
mdat <- cbind(mdat, snap, snapt)
# Connect attributes back to shapefile
geodat@data <- mdat
plot(geodat)
Need to tell it to plot with specific variables and colors
# General Plot to map SNAP
# --------------------------------------------------------------------
# Decide on colors from brewer.pal
display.brewer.all(type = "seq")
# Set Colors
nclr <- 4 # number of breaks for colors and legend
plotclr <- brewer.pal(nclr, "Reds") # palette from brewer
# Set breaks for different colors (... can use quantiles too with quantile()
# command
class <- classIntervals(geodat$snap, nclr, style = "fixed", fixedBreaks = c(0,
20, 50, 75, 100, 300))
# Set colors
colcode <- findColours(class, plotclr)
# Check names of breaks to set names
names(attr(colcode, "table"))
[1] "[0,20)" "[20,50)" "[50,75)" "[75,100)" "[100,300]"
# Set names of breaks
names <- c("0 - 20", "20 - 50", "50-75", "75 - 100", "100 - 300")
# Plot
par(mai = c(1, 0, 1, 2.8))
plot(geodat, col = colcode)
# Add title
mtext("Households within Arlington Recieving SNAP", 3, line = 1, cex = 1.5,
adj = -1.3)
# add = TRUE to add another layer to the same plot
plot(Roads, add = TRUE, col = "grey", lwd = 0.3)
plot(geodat, add = TRUE, lwd = 1)
plot(Ar, add = TRUE, lwd = 2.5)
plot(VT, add = TRUE, pch = 19, cex = 2, col = "blue")
legend(-77.03, 38.91635, xpd = TRUE, legend = names, fill = attr(colcode, "palette"),
cex = 1.5, bty = "n", title = "Count of Households\nReceiving SNAP")
legend(-77.03, 38.86, c("Census Block Group", "Virginia Tech"), lty = c(1, -1),
pch = c(-1, 19), col = c("black", "blue"), cex = 1.3, , seg.len = 1, y.intersp = 0.8,
bty = "n", xpd = TRUE)
# spplot ------------------------------------------------------------------
spplot(geodat,"snap")
##this is ugly, let's chose our own colors
# Choose own colors
pal <- brewer.pal(7, "OrRd") # select 7 colors from the palette OrRd
# Number of colors should be one higher than number of cuts
spplot(geodat, "snap", col.regions = pal, cuts = 6)
# ggplot and hexbins ------------------------------------------------------
# create a unique ID for the later join
geodat@data$id = rownames(geodat@data)
# turn SpatialPolygonsDataframe into a data.frame
geodat.pts <- fortify(geodat, region="id") # Only has the coordinates
geodat.df <- left_join(geodat.pts, geodat@data, by="id") # Add the attributes back in
# calculate quantile breaks
geodat.df$qt <- cut(geodat.df$snap,
breaks = c(0,20,50,75,100,300),include.lowest = TRUE)
# Make VT into a data.frame
VT<-as.data.frame(VT)
# plot
ggplot(geodat.df, aes(long,lat,group=group, fill=qt)) + # the data
ggtitle("Households Recieving SNAP in Arlington") +
geom_polygon() + # make polygons
scale_fill_brewer("Homicide Rate", palette = "OrRd") + # fill with brewer colors
theme(line = element_blank(), # remove the background, tickmarks, etc
axis.text=element_blank(),
axis.title=element_blank(),
panel.background = element_blank()) +
geom_point(aes(x, y,fill = NULL,group = NULL), size = 3,data=VT,col="blue")+
scale_alpha(guide = 'none')+
coord_equal()
# spplot ------------------------------------------------------------------
spplot(geodat, "snap")
## this is ugly, let's chose our own colors
# Choose own colors
pal <- brewer.pal(7, "OrRd") # select 7 colors from the palette OrRd
# Number of colors should be one higher than number of cuts
spplot(geodat, "snap", col.regions = pal, cuts = 6)
# ggplot and hexbins ------------------------------------------------------
# create a unique ID for the later join
geodat@data$id = rownames(geodat@data)
# turn SpatialPolygonsDataframe into a data.frame
geodat.pts <- fortify(geodat, region = "id") # Only has the coordinates
geodat.df <- left_join(geodat.pts, geodat@data, by = "id") # Add the attributes back in
# calculate quantile breaks
geodat.df$qt <- cut(geodat.df$snap, breaks = c(0, 20, 50, 75, 100, 300), include.lowest = TRUE)
# Make VT into a data.frame
VT <- as.data.frame(VT)
# plot
ggplot(geodat.df, aes(long, lat, group = group, fill = qt)) +
ggtitle("Households Recieving SNAP in Arlington") +
geom_polygon() + # make polygons
scale_fill_brewer("Homicide Rate", palette = "OrRd") + # fill with brewer colors
theme(line = element_blank(), # remove the background, tickmarks, etc
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_blank()) +
geom_point(aes(x, y, fill = NULL, group = NULL),
size = 3, data = VT, col = "blue") +
scale_alpha(guide = 'none') +
coord_equal()
Example 2: Philly
# Example 2: Philly ------------------------
# Load Data ---------------------------------------------------------------
# Philly Shapefile
philly <- readShapePoly("Data/Philly2/Philly2.shp")
# N_HOMIC: Number of homicides (since 2006)
# HOMIC_R: homicide rate per 100,000 (Philadelphia Open Data)
# PCT_COL: % 25 years and older with college or higher degree1 (ACS 2006-2010)
# mdHHnc: estimated median household income (ACS 2006-2010)
# spplot ------------------------------------------------------------------
spplot(philly) # sp plot (general)
spplot(philly, c("HOMIC_R", "PCT_COL")) # Just those
# Using color.brewer
pal <- brewer.pal(7, "OrRd") # we select 7 colors from the palette
spplot(philly, "HOMIC_R", col.regions = pal, cuts = 6)
# ggplot hexbins ------------------------------------------------------
homicides<-read.csv("PhillyHomicides.csv")
head(homicides) # X and Y at end are coordinates
DC_DIST SECTOR DISPATCH_DATE_TIME DISPATCH_DATE DISPATCH_TIME HOUR
1 22 1 2014-09-14 16:00:00 2014-09-14 16:00:00 NA
2 1 B 2006-01-14 00:00:00 2006-01-14 00:00:00 NA
3 1 B 2006-04-01 16:05:00 2006-04-01 16:05:00 NA
4 1 B 2006-05-10 11:13:00 2006-05-10 11:13:00 NA
5 1 E 2006-07-01 12:42:00 2006-07-01 12:42:00 NA
6 1 F 2006-07-09 19:13:00 2006-07-09 19:13:00 NA
DC_KEY LOCATION_BLOCK UCR_GENERAL OBJECTID
1 199822061421 1800 BLOCK W MONTGOMERY 100 44774978
2 200601001669 2000 BLOCK MIFFLIN ST 100 44746630
3 200601011408 S 22ND ST /SNYDER AVE 100 44746625
4 200601016399 2100 BLOCK MC KEAN ST 100 44771721
5 200601023411 2100 BLOCK S HICKS ST 100 44746632
6 200601024451 1800 BLOCK SNYDER AVE 100 44843467
TEXT_GENERAL_CODE POINT_X POINT_Y SHAPE
1 Homicide - Criminal -75.15680 39.98804 44714107
2 Homicide - Criminal -75.17873 39.92801 44685759
3 Homicide - Criminal -75.18275 39.92607 44685754
4 Homicide - Criminal -75.18092 39.92704 44710850
5 Homicide - Criminal -75.17204 39.92463 44685761
6 Homicide - Criminal -75.17612 39.92517 44782596
ggplot(homicides, aes(POINT_X, POINT_Y)) +
stat_binhex() +
scale_fill_gradientn(colours = c("white", "red"), name = "Frequency")
# ggplot ------------------------------------------------------
# create a unique ID for the later join
philly@data$id = rownames(philly@data)
# turn SpatialPolygonsDataframe into a data frame
philly.pts <- fortify(philly, region="id") #this only has the coordinates
philly.df <- left_join(philly.pts, philly@data, by="id") # add the attributes back in
# calculate quantile breaks
philly.df$qt <- cut(philly.df$HOMIC_R,
breaks = quantile(philly.df$HOMIC_R, probs = 0:7/7, na.rm = TRUE),
include.lowest = TRUE)
# plot
ggplot(philly.df, aes(long,lat,group=group, fill=qt)) + # the data
ggtitle("Homicide Rate in Philidelphia by Census Tract") +
geom_polygon() + # make polygons
scale_fill_brewer("Homicide Rate", palette = "OrRd") + # fill with brewer colors
theme(line = element_blank(), # remove the background, tickmarks, etc
axis.text=element_blank(),
axis.title=element_blank(),
panel.background = element_blank()) +
coord_equal()
# Event Data with Coordinates ---------------------------------------------
# Basemap
phBasemap <- get_map(location="Philadelphia, PA", zoom=12, maptype = 'satellite')
ggmap(phBasemap)
# Try out these different backgrounds (see library information)
#phBasemap <- get_map(location="Philadelphia, PA", zoom=12, maptype = 'terrain')
#ggmap(phBasemap)
#phBasemap <- get_map(location="Philadelphia, PA", zoom=12, maptype = 'toner')
#ggmap(phBasemap)
#phBasemap <- get_map(location="Philadelphia, PA", zoom=12, maptype = 'watercolor')
#ggmap(phBasemap)
# plot with heatmap
ggmap(phBasemap) +
# make the heatmap
stat_density2d(aes(x = POINT_X,
y = POINT_Y,
fill = ..level.., # value corresponding to discretized density estimates
alpha = ..level..),
bins = 25, # number of bands
data = homicides,
geom = "polygon") + # creates the bands of differenc dolors
## Configure the colors, transparency and panel
scale_fill_gradient(low = "yellow", high = "red") +
scale_alpha(range = c(.25, .55)) +
theme(legend.position="none")
Example 3: Web Scrapping
# Example 3: Web Scrapping ------------------------------------------------
# read in the data
url <- "http://en.wikipedia.org/wiki/List_of_United_States_cities_by_crime_rate"
# we want the first table: which=1
citiesCR <- readHTMLTable(url, which = 1, stringsAsFactors = FALSE)
# clean up (with mutate_each function from dplyr): remove the comma in 1,000
# and above and convert numbers from strings to numeric
citiesCRclean <- mutate_each(citiesCR, funs(as.numeric(gsub(",", "", .))), -(State:City))
# geocode loations
latlon <- geocode(paste(citiesCRclean$City, citiesCRclean$State, sep = ", "))
# combine into a new dataframe
citiesCRll <- data.frame(citiesCRclean, latlon)
# get basmap
map_us <- get_map(location = "United States", zoom = 4, color = "bw")
# plot
ggmap(map_us, legend = "bottomright", extent = "device") + geom_point(data = citiesCRll,
aes(x = lon, y = lat, color = Violent.Crime, size = Population)) + scale_colour_gradient(low = "white",
high = "red") + scale_size_continuous(range = c(4, 12))