-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnibrs_eda_multiple.R
70 lines (61 loc) · 2.28 KB
/
nibrs_eda_multiple.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
suppressMessages(library(tidyverse))
suppressMessages(library(here))
suppressMessages(library(survey))
suppressMessages(library(tidymodels))
suppressMessages(library(glue))
suppressMessages(library(cli))
suppressMessages(library(xtable))
suppressMessages(library(data.table))
suppressMessages(library(dtplyr))
source(here("analysis", "utils_regression.R"))
# ----
# load nibrs data
files_nibrs <- list.files(here("data", "nibrs"))
files_nibrs <- files_nibrs[grepl("imputed", files_nibrs) & grepl("weights", files_nibrs) & grepl("multiple", files_nibrs)]
files_names <- str_sub(files_nibrs, start = 14) %>%
str_remove_all(., "data_imputed|_multiple_weights.csv|_1")
df_list <- files_nibrs %>%
purrr::map(~ read_csv(here("data", "nibrs", .x), col_types = cols()) %>%
select(-matches("lasso|rf|nb|sl|nnet")) %>%
filter(year >= 2006 & year <= 2015)) %>%
setNames(files_names)
df_list <- df_list %>% map(~ .x %>%
mutate(wgt = 1 / mod_log) %>%
select(-mod_log))
df_list <- df_list %>% map(~ .x %>%
filter(race_of_offender == "black" | race_of_offender == "white") %>%
filter(race_of_victim == "black" | race_of_victim == "white"))
# representation in police data
df_list %>% map(~ .x %>%
group_by(race_of_offender, multiple_offender) %>%
summarise(n = sum(1 / mod_logistic)) %>%
mutate(prop = n / sum(n)))
df_list %>% map(~ .x %>%
group_by(race_of_offender) %>%
summarise(n = n()) %>%
mutate(prop = n / sum(n)))
df_list %>% map(~ .x %>%
group_by(race_of_offender) %>%
summarise(n = sum(1 / mod_logistic)) %>%
mutate(prop = n / sum(n)))
# compute arrest rates by racial group of the offender
df_list %>% map(~ .x %>%
group_by(race_of_offender, multiple_offender) %>%
summarise(arrest_rate = sum(y) / sum(1 / mod_logistic), arrest_rate_police = mean(y)))
df_list %>% map(~ .x %>%
group_by(multiple_offender, region) %>%
summarise(n = n()) %>%
mutate(prop = n / sum(n)))
df_list %>% map(~ .x %>%
group_by(multiple_offender, is_private_location) %>%
summarise(n = n()) %>%
mutate(prop = n / sum(n)))
df_list %>% map(~ .x %>%
distinct(incident_number, multiple_offender) %>%
group_by(multiple_offender) %>%
summarise(n = n()) %>%
mutate(prop = n / sum(n)))
df_list %>% map(~ .x %>%
group_by(multiple_offender) %>%
summarise(n = n()) %>%
mutate(prop = n / sum(n)))