The relationship between crime and unemployment has long been of interest to scholars as well as those outside of academia.
This page takes a look at trends in unemployment using data from the Bureau of Labor Statistics as well as the open data portal.
To get going, we will load all the libraries we need. The code to generate everything you see here is hidden (to reduce clutter). But, if you want to see “how we get there”, just click the “Show” button on the right.
Next, we will load the data. These files are available in the data folder for the repository.
# clear workspace
rm( list = ls() )
# load libraries
library( dplyr ) # used for wrangling the data
library( tidyr ) # used for wrangling the data
library( ggplot2 ) # for plotting
library( cowplot ) # for putting the plots together
library( scales ) # for formatting the text
library( forecast ) # for working with time series data
library( here ) # for referencing the local directory
# define the objects
crimeData <- readRDS( here( "data/crimeData.rds" ) )
crimesByDay <- readRDS( here( "data/crimesByDay.rds" ) )
crimesByMonth <- readRDS( here( "data/crimesByMonth.rds" ) )
crimesByYear <- readRDS( here( "data/crimesByYear.rds" ) )
crimeRatesMonth <- readRDS( here( "data/crimeRatesMonth.rds" ) )
crimeRatesYear <- readRDS( here( "data/crimeRatesYear.rds" ) )
crimeRatesMonthType <- readRDS( here( "data/crimeRatesMonthType.rds" ) )
First, load the unemployment data. This makes a call to the Bureau of Labor Statistics API.
library( blscrapeR )
series_id <- "LAUMT043806000000003"
unemployment_data <- bls_api(
series_id,
startyear = head( names( crimeRatesMonth ), n=1 ),
endyear = tail( names( crimeRatesMonth ), n=1 )
)
UnemployByMonth <-
unemployment_data %>%
select( year, periodName, value) %>%
mutate( month = factor( unemployment_data$periodName,levels = month.name ) ) %>%
select( !periodName ) %>%
group_by( year, month ) %>%
spread( year, value ) %>%
ungroup() %>%
select( !month )
# create a time series object for plotting
monthlyUnemploymentRateByYear <- ts(
matrix( as.matrix( UnemployByMonth ), ncol = 1 ),
start=c( 2016, 1 ),
end=c( as.numeric( tail( names( UnemployByMonth ), n=1 ) ), 12 ), frequency=12
)
# plot the rates
monthlyUnemploymentRateByYear %>%
ggseasonplot(
year.labels = TRUE,
continuous = FALSE,
main = "Plot of Monthly Unemployment Rate by Years for Phoenix, AZ",
col = colorRampPalette(c("#f7968f", "#c41104"))( dim( UnemployByMonth )[2] ) ) +
scale_y_continuous( label = comma ) +
geom_line( size = 1.2 ) +
theme_minimal()
Now, we can plot unemployment with crime.
# create a time series object for plotting
monthlyCrimeRateByYear <- ts(
matrix( as.matrix( crimeRatesMonth ), ncol = 1 ),
start=c( 2016, 1 ),
end=c( as.numeric( tail( names( crimeRatesMonth ), n=1 ) ), 12 ), frequency=12
)
# render the plot
crime_seasonplot <- monthlyCrimeRateByYear %>%
ggseasonplot(
year.labels = TRUE,
continuous = FALSE,
main = "Plot of Monthly Crime Rate by Years for Phoenix, AZ",
col = colorRampPalette(c("#f7968f", "#c41104"))( dim( crimeRatesMonth )[2] ) ) +
scale_y_continuous( label = comma ) +
geom_line( size = 1.2 ) +
theme_minimal()
unemployment_seasonplot <- monthlyUnemploymentRateByYear %>%
ggseasonplot(
year.labels = TRUE,
continuous = FALSE,
main = "Plot of Monthly Unemployment Rate by Years for Phoenix, AZ",
col = colorRampPalette(c("#f7968f", "#c41104"))( dim( UnemployByMonth )[2] ) ) +
scale_y_continuous( label = comma ) +
geom_line( size = 1.2 ) +
theme_minimal()
combined_plot <- plot_grid( unemployment_seasonplot, crime_seasonplot, ncol = 1 )
print( combined_plot )
We can rework these to show the trends over time (rather than having time stacked).
actual_data <- data.frame(
dsCrime = time( monthlyCrimeRateByYear ),
yCrime = as.numeric( monthlyCrimeRateByYear ),
dsUnemp = time( monthlyUnemploymentRateByYear ),
yUnemp = as.numeric( monthlyUnemploymentRateByYear )
)
crime_plot <- ggplot() +
geom_line( data = actual_data, aes( x = dsCrime, y = yCrime ), color = "#c41104" ) +
labs( x = "Year", y = "Crime Rate", color = "Crime Rate" ) +
theme_minimal()
unemp_plot <- ggplot() +
geom_line( data = actual_data, aes( x = dsUnemp, y = yUnemp ), color = "#c41104" ) +
labs( x = "Year", y = "Unemployment Rate", color = "Unemployment Rate" ) +
theme_minimal()
combined_plot <- plot_grid( unemp_plot, crime_plot, ncol = 1 )
print( combined_plot )
Now we can examine the correlation between the two, which is 0.148.
# calculate the ccf
unemp_crime_ccf <- ccf( actual_data$yUnemp, actual_data$yCrime )
Back to Open Criminology Phoenix page
Please report any needed corrections to the Issues page. Thanks!