Question 1
set_up = covid %>%
filter(state == "California") %>%
group_by(county) %>%
mutate(NewCases = cases - lag(cases)) %>%
mutate(Cumul_Cases = sum(cases)) %>%
ungroup() %>%
filter(date == max(date))
Table representing the five counties in CA with the most new cases at the most current day
(most_new_cases = set_up %>%
slice_max(NewCases, n = 5) %>%
select(county, NewCases))
## # A tibble: 5 x 2
## county NewCases
## <chr> <dbl>
## 1 Los Angeles 809
## 2 San Diego 265
## 3 Orange 185
## 4 Fresno 159
## 5 San Bernardino 156
knitr::kable(most_new_cases,
caption = "Most New Cases: California Counties",
col.names = c("County", "New Cases"),
format.args = list(big.mark = ",")) %>%
kableExtra:: kable_styling("basic", full_width = TRUE, font_size = NULL)
Most New Cases: California Counties
County
|
New Cases
|
Los Angeles
|
809
|
San Diego
|
265
|
Orange
|
185
|
Fresno
|
159
|
San Bernardino
|
156
|
Table representing the five counties in CA with the most cumulative cases at the most current day
(most_cumul_cases = set_up %>%
slice_max(Cumul_Cases, n = 5) %>%
select(county, Cumul_Cases))
## # A tibble: 5 x 2
## county Cumul_Cases
## <chr> <dbl>
## 1 Los Angeles 18408002
## 2 Riverside 3611739
## 3 Orange 3293539
## 4 San Bernardino 2972488
## 5 San Diego 2832227
knitr::kable(most_cumul_cases,
caption = "Most Cumulative Cases: California Counties",
col.names = c("County", "Cumulative Cases"),
format.args = list(big.mark = ",")) %>%
kableExtra:: kable_styling("basic", full_width = TRUE, font_size = NULL)
Most Cumulative Cases: California Counties
County
|
Cumulative Cases
|
Los Angeles
|
18,408,002
|
Riverside
|
3,611,739
|
Orange
|
3,293,539
|
San Bernardino
|
2,972,488
|
San Diego
|
2,832,227
|
Table representing the five counties in CA with the most new cases per capita at the most current day
(most_new_cases_per_cap = pop %>%
slice_max(new_cases_per_cap, n = 5) %>%
select(county.y, new_cases_per_cap))
## # A tibble: 5 x 2
## county.y new_cases_per_cap
## <chr> <dbl>
## 1 Kings 0.000262
## 2 San Benito 0.000239
## 3 Monterey 0.000203
## 4 Lake 0.000171
## 5 Fresno 0.000159
knitr::kable(most_new_cases_per_cap,
caption = "Most New Cases Per Capita: California Counties",
col.names = c("County", "New Cases Per Capita"),
format.args = list(big.mark = ",")) %>%
kableExtra:: kable_styling("basic", full_width = TRUE, font_size = NULL)
Most New Cases Per Capita: California Counties
County
|
New Cases Per Capita
|
Kings
|
0.0002615
|
San Benito
|
0.0002388
|
Monterey
|
0.0002027
|
Lake
|
0.0001708
|
Fresno
|
0.0001591
|
Table representing the five counties in CA with the most cumulative cases per capita at the most current day
(most_cumul_per_cap = pop %>%
slice_max(cumul_cases_per_cap, n = 5) %>%
select(county.y, cumul_cases_per_cap))
## # A tibble: 5 x 2
## county.y cumul_cases_per_cap
## <chr> <dbl>
## 1 Imperial 4.80
## 2 Kings 2.77
## 3 Tulare 1.95
## 4 Kern 1.84
## 5 Los Angeles 1.83
knitr::kable(most_cumul_per_cap,
caption = "Most Cumulative Cases Per Capita: California Counties",
col.names = c("County", "Cumulative Cases Per Capita"),
format.args = list(big.mark = ",")) %>%
kableExtra:: kable_styling("basic", full_width = TRUE, font_size = NULL)
Most Cumulative Cases Per Capita: California Counties
County
|
Cumulative Cases Per Capita
|
Imperial
|
4.795111
|
Kings
|
2.772264
|
Tulare
|
1.949337
|
Kern
|
1.844563
|
Los Angeles
|
1.833629
|
Total number of new cases in the last 14 days per 100,000 people
last_14_days = covid %>%
right_join(PopulationEstimates, by = "fips") %>%
filter(date >= max(date, na.rm = TRUE) - 13, state.x == "California") %>%
group_by(county.x) %>%
summarize(new_cases_per_cap = sum(cases - lag(cases), na.rm = TRUE) / (max(pop_2019) / 100000)) %>%
filter(new_cases_per_cap <= 100) %>%
ungroup()
As of 8/17/2020, there are a total of 14 safe counties in California. This means they comply with the California Department of Public Health’s criteria of having less than 100 new cases per 100,000 residents over the past 14 days.
Question 2
The number of daily new cases and the 7-day rolling mean of the California, New York, Florida, and Lousiana
four_states = covid %>%
filter (state %in% c("California", "Louisiana", "New York", "Florida")) %>%
group_by(state, date) %>%
summarize(cases = sum(cases)) %>%
ungroup() %>%
group_by(state) %>%
mutate(daily_new_cases= cases-lag(cases),
seven_day_roll= zoo::rollmean(daily_new_cases, 7, fill= NA, align= 'right')) %>%
ungroup() %>%
filter(daily_new_cases > 0)
ggplot(four_states, aes( x = date)) +
geom_col(aes(y = daily_new_cases, col= state)) +
geom_line(aes(y = seven_day_roll, col = "darkred", size = 1)) +
facet_wrap(~state, scale = "free_y") +
labs(title = "Daily New Covid Cases at the State Level", x= "", y= "", color = "", caption = "Lab 02", subtitle = "COVID-19 Data: NY Times" ) +
theme_light() +
theme(legend.position = 'NA')
To get the number of daily new cases per capita and the 7-day rolling mean of the California, New York, Florida, and Lousiana.
cap_state= PopulationEstimates %>%
select(state, pop_2019) %>%
right_join(four_states, by = "state") %>%
mutate(state_per_cap = ((cases - lag(cases))/ pop_2019),
seven_day_roll= zoo::rollmean(daily_new_cases, 7, fill= NA, align= 'right'))
ggplot(cap_state, aes(x=date)) +
geom_col(aes(y= state_per_cap), col= NA, fill= "#F5B8B5") +
geom_line(aes(y= seven_day_roll), col= "darkred", size= 1) +
theme_light() +
labs(title = "States' New Covid Cases Per Capita", x= "", y= "", caption = "Lab 02", subtitle = "COVID-19 Data: NY Times" ) +
theme(legend.position = 'NA')
The number of daily, new cases at the state level can be misleading since it doesn’t take into consideration the state’s population. Once population is integrated into the number of cases, it is expect for data to reflect a more accurate representation of which out of the four states (CA, LA, NY, FL) has been affected more by COVID.