New york census data

Plotting New York city neighbourhoods

This blog plots the percentage of low-income housholds in each census are of NYC. Low-income households are defined as households with an income of less than $25,000

Inspiration form: http://zevross.com/blog/2015/10/14/

For census tables: https://censusreporter.org/topics/table-codes/

Loading libraries

library(tigris)
library(acs)
library(stringr) 
library(dplyr)
library(ggmap)
library(maptools)
library(ggplot2)
library(maps)
library(RColorBrewer)
library(htmlwidgets)
library(rvg)
library(ggiraph)
library(gganimate)
library(gridExtra)
library(grid)
library(sp)
library(rgdal)
library(rgeos)
library(proj4)
library(data.table)
library(tidyverse)
library(broom)

Retrive census data and NYC map

Here we download all the census information for NYC regarding income

counties <- c(5, 47, 61, 81, 85)
tracts <- tracts(state = 'NY', county = c(5, 47, 61, 81, 85), cb=T)

#tract <- readOGR(dsn="2010 Census Blocks", layer = "geo_export_0d5391be-e106-4007-ba4b-8b9ca965d298")

api.key.install(key="e39e18eef6390892f0e0f1a1273b9ddd09cfbc7f")
# create a geographic set to grab tabular data (acs)
geo<-geo.make(state=c("NY"),
              county=c(5, 47, 61, 81, 85), tract="*")
              

income<-acs.fetch(endyear = 2012, span = 5, geography = geo,
                              table.number = "B19001", col.names = "pretty")


edu<-acs.fetch(endyear = 2012, span = 5, geography = geo,
table.number = "B15001", col.names = "pretty")

income_df <- data.frame(paste0(str_pad(income@geography$state, 2, "left", pad="0"), 
                             str_pad(income@geography$county, 3, "left", pad="0"), 
                             str_pad(income@geography$tract, 6, "left", pad="0")), 
                        income@estimate[,c("Household Income: Total:",  
                        "Household Income: $200,000 or more", 
                        "Household Income: $20,000 to $24,999", 
                        "Household Income: $15,000 to $19,999",
                        "Household Income: $10,000 to $14,999",
                        "Household Income: Less than $10,000")], 
                        stringsAsFactors = FALSE)

rownames(income_df)<-1:nrow(income_df)
names(income_df)<-c("GEOID", "total", 
                "over_200", 
                'between20_24', 
                'between15_20', 
                'betwee10_14', 
                'under10')
income_df$percent <- 100*((income_df$between20_24 + 
                        income_df$between15_20 + 
                        income_df$betwee10_14 + 
                        income_df$under10)/income_df$total)


ggtract<-fortify(tracts, region = "GEOID") 
ggtract$region <- ggtract$id
ggtract <- subset(ggtract, select=c(long, lat,region))

Get the water coordinates

For some reason the census tracks downloaded extends into the water. To remove this, I downloaded the water coordinates for all boroughs in NYC and plotted them.

NYC_water <- area_water("NY", "NEW YORK")
NYC_water <-tidy(NYC_water)
colnames(NYC_water)[1:3] <- c('long', 'lat', 'region')

bronkxWater <- area_water("NY", c("BRONX"))
bronkxWater <-tidy(bronkxWater)
colnames(bronkxWater)[1:3] <- c('long', 'lat', 'region')

quansWater <- area_water("NY", "QUEENS")
quansWater <-tidy(quansWater)
colnames(quansWater)[1:3] <- c('long', 'lat', 'region')

brookWater <- area_water("NY", "KINGS")
brookWater <-tidy(brookWater)
colnames(brookWater)[1:3] <- c('long', 'lat', 'region')

statWater <- area_water("NY", "RICHMOND")
statWater <-tidy(statWater)
colnames(statWater)[1:3] <- c('long', 'lat', 'region')

Get NYC neighborhoods (From https://rpubs.com/jhofman/nycmaps)

I also wanted to split the map into different NYC neighbourhoods to get a sense of how the split of low-income households are per neighbourhood

r <- GET(paste0('http://data.beta.nyc//dataset/0ff93d2d-90ba-457c-9f7e-39e47bf2ac5f/',
'resource/35dd04fb-81b3-479b-a074-a27a37888ce7/',
'download/d085e2f8d0b54d4590b1e7d1f35594c1pediacitiesnycneighborhoods.geojson'))
nyc_neighborhoods <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)

nyc_df <- tidy(nyc_neighborhoods)
nyc_df$region <- nyc_df$id
nyc_df <- subset(nyc_df, select=c(long, lat, region))

Annotate neighbourhoods

Make sure to separate the parks for the data

neighbourhood_names <- data.frame(nyc_neighborhoods)
neighbourhood_names$id <- rownames(neighbourhood_names)


nycParks <- neighbourhood_names[grep('Park', neighbourhood_names$neighborhood),]

nycParks <- nycParks[grep('Park Slope|Sunset Park|Borough Park|Park Hill|
            Parkchester|Morris Park|Rego Park|Floral Park', 
            nycParks$neighborhood, invert=T),]

Add info for interactive plot

Interactive map is not used here.

income_df$tip <- paste0(
"<br>", paste0(round(income_df$percent,2),'%'))

Plot NYC

p <- ggplot(income_df, aes(map_id=GEOID)) + 
 geom_map_interactive(data=income_df, map=ggtract, 
         aes(map_id=GEOID, data_id=GEOID, tooltip=tip, fill=percent),colour=NA, size=0.05)+
 geom_map_interactive(data=neighbourhood_names, map=nyc_df, 
         aes(map_id=id, data_id=id), colour='black', alpha=0, size=0.25)+
 geom_map_interactive(data=income_df[income_df$total<100,], 
         map=ggtract, aes(map_id=GEOID, data_id=GEOID),fill='#969696', size=0.05)+
 geom_map_interactive(data=nycParks, map=nyc_df, 
         aes(map_id=id, data_id=id),fill='#c7e9c0', size=0.05)+
 geom_map_interactive(data=NYC_water, map=NYC_water, 
         aes(map_id=region, data_id=region), fill='white', colour='white', size=0.05)+
 geom_map_interactive(data=bronkxWater, map=bronkxWater, 
         aes(map_id=region, data_id=region), fill='white', colour='white', size=0.05)+
 geom_map_interactive(data=quansWater, map=quansWater, 
         aes(map_id=region, data_id=region), fill='white', colour='white', size=0.05)+
 geom_map_interactive(data=brookWater, map=brookWater, 
         aes(map_id=region, data_id=region), fill='white', colour='white', size=0.05)+
 geom_map_interactive(data=statWater, map=statWater, 
         aes(map_id=region, data_id=region), fill='white', colour='white', size=0.05)+
 expand_limits(x = nyc_df$long, y = nyc_df$lat)+
 theme_void()+
 theme(plot.background = element_rect(fill = 'white'))+
 scale_fill_gradientn(colours = c("#99000d", "#fcbba1", "white"),
                        values = c(1,0.5, .3, .2, .1, 0))+
# scale_fill_brewer(palette='Blues')+
 ggtitle('Percentage of households earning less than < $25,000')
 
 ggiraph(code = {print(p)},  hover_css = "fill:red;r:3pt;" ,height_svg=6)

plot of chunk NewYorkMap