Working with spatial data

sf & sp packages

The tools for handling spatial data in R were built around the SP package. However, recently, the sf (simple features) package has been developed to conform with formal geospatial standards, which has mostly replaced the sp tools. The sf and sp data structures follow the tidyverse framework.

Before we get into the sf package, let’s display spatial information with the plotting functions. This dataset is for the counties in the US state of Georgia.

# load spatial data
load("data/georgia.Rdata")

#display one of the polygons
#select the 4th polygon in the list (index 4)
elmt4 <- georgia.polys[[4]]

#set the plot extend
plot(elmt4, asp=1, type='n',xlab="Easting", ylab="Northing")

#use polygon function
polygon(elmt4, density=14, angle=45)

You may need to plot a specific element whose index you don’t know.

#find the index of a specific element
dekalbIndex <- which(georgia2@data$Name=='DeKalb')

elmtDeKalb <- georgia.polys[[dekalbIndex]]

plot(elmtDeKalb, asp=1, type='n',xlab="Easting", ylab="Northing")
#use polygon
polygon(elmtDeKalb, density=5, angle=135)

You may want to plot all the counties. Note the use of the lapply function

#plot all the counties
plot(800000:1500000,800000:1500000, asp=1, type='n',xlab='Easting', ylab='Northing')
#add polygons
lapply(georgia.polys,function(x){polygon(x,col='gray',border='red')})

working with sf data

#convert the Georgia dataset to SF
georgia_sf <- st_as_sf(georgia)

#note the difference in the plotting behavior
plot(georgia_sf)


plot(georgia_sf[,7])

Reading a shapefile

# read with sf
bc_shp_sf <- st_read("shps/BC_FED_3.shp")

#plot this dataset using the plot function shown above
#plot...
#plot...

#plot using ggplot
ggplot() +
  geom_sf(data = bc_shp_sf, color = 'red', fill = bc_shp_sf$m9GI3FpI_2, aes(geometry = geometry))

Using tmap

The tmap library provides great flexibility for the display of spatial data. The syntax is similar to that of ggplot.

#first let use quick tmap
qtm(bc_shp_sf, fill = "red", style = "natural")

qtm(bc_shp_sf, fill="m9GI3FpI_3", text = "FEDNAME", text.size = 0.5, format = "World_wide", style="classic", text.root=5, fill.title="Median Income")
#> tmap style set to "classic"
#> other available styles are: "white", "gray", "natural", "cobalt", "col_blind", "albatross", "beaver", "bw", "watercolor", "red", "black"


#Using the full tmap

#get outline
g <- st_union(bc_shp_sf)

#the union function is applied, see below
#tm_shape(g) +
#  tm_fill("red") +
#  tm_borders(lwd = 3)

tm_shape(bc_shp_sf) +
  tm_fill("green")

tm_shape(bc_shp_sf) +
  tm_fill("green") +
  tm_borders(lty = "dashed", col="black") #try lty as "solid"

tm_shape(bc_shp_sf) +
  tm_fill("green") +
  tm_borders(lty = "dashed", col="black") +
  tm_style("natural", bg.color = "grey90")
  
tm_shape(bc_shp_sf) +
  tm_fill("green") +
  tm_borders(lty = "dashed", col="black") +
  tm_style("natural", bg.color = "grey90") +
  #add an outline
  tm_shape(g) +
  tm_borders(lwd = 2)

tm_shape(bc_shp_sf) +
  tm_fill("green") +
  tm_borders(lty = "dashed", col="black") +
  tm_style("natural", bg.color = "grey90") +
  #add an outline
  tm_shape(g) +
  tm_borders(lwd = 2) + 
  tm_layout(title = "BC Region", 
  title.size = 1, 
  title.position = c(0.55, "top"))

#if you want to select some of the electoral districts
index <- which(bc_shp_sf$FEDNAME=="Abbotsford" | bc_shp_sf$FEDNAME=="Delta" | bc_shp_sf$FEDNAME=="Victoria" | bc_shp_sf$FEDNAME=="North Vancouver" )

#display them
tm_shape(bc_shp_sf[index,]) +
  tm_fill("red") +
  tm_borders(lwd = 3)

#hightlight them in the whole region
tm_shape(bc_shp_sf) +
  tm_fill("white") +
  tm_borders("grey", lwd=0.5) +
  #add a second layer
  tm_shape(g) +
  tm_borders(lwd = 1) + 
  #add a third layer
  tm_shape(bc_shp_sf[index,]) +
  tm_fill("lightblue") +
  tm_borders() +
  #specify some layout parameters
  tm_layout(frame = TRUE, title = "Selected regions in BC", 
            title.size = 1, 
            title.position = c(0.55, "top"))
  
#repeat the steps above and highlight the following regions instead
#Cariboo--Prince George, Prince George--Peace River--Northern Rockies, Skeena--Bulkley Valley

Assignment 2

#Please provide your code for this assignment

#clear the workspace
rm(list = ls())

#Qn1: load the New Haven data

#Qn2: examine and convert the blocks, breach and tracts data to sf. Remember the st_as_sf() function

#Qn3: list the column names in the converted blocks sf data

#Qn4: the variable P_VACANT represents the percentage of households that are unoccupied in each block. Draw a histogram of this variable

#create a chloropleth map using the tmap_mode() function
tmap_mode('plot')
tm_shape(blocks_sf) +
  tm_polygons("P_VACANT", title="Vacant Homes", palette = "Reds") +
  tm_shape(breach_sf) +
  tm_dots(size = 0.1, shape = 16, col = 'blue', alpha=1) 
 
#Qn5: load the BC Shapefile BC_FED_SHP_3 (st_read())

#Qn6: add new variables to the dataset as follows
#Pop2021 : for Population in 2021 and is the same data in column m9GI3FpI_d
#AvgIncome : for Average income in 2021 and is the same data in column m9GI3FpI_1
#ColEdu is for Population with post secondary education and is the same data in column m9GI3FpI_2
#Pop2016 is for Population in 2016 and is the same data in column m9GI3FpI_3

#Qn7: Use head(data.frame()) to print off the 1st 6 rows of the modified dataset in the step above

#Qn8: Create a chloropleth map for each of the 4 variables above