THIS WORKS IS YET TO BE REVIEWED
In this paper contains estimates for the reproductive number $R_{t} over time. This is done as described in [1]. These have been implemented in R using EpiEstim
package [2] which is what is used here. This report should be updated roughly daily and is available online.
As this paper is updated over time this section will summarise significant changes. The code producing this paper is tracked using Git. The Git commit hash for this project at the time of generating this paper was 43c938c362fee38df7d97ae1a08fbc1d9189601c.
2020-06-12
The project uses the following libraries.
require(EpiEstim)
require(EnvStats)
require(ggplot2)
require(ggpubr)
require(lubridate)
require(utils)
require(httr)
require(dplyr)
require(tidyr)
require(scales)
Data is downloaded from the Git repository associated with [3].
# Provincial Deaths
GET(url = "https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_deaths.csv",
write_disk("covid19za_provincial_cumulative_timeline_deaths.csv", overwrite = TRUE))
Response [https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_deaths.csv]
Date: 2020-06-12 19:58
Status: 200
Content-Type: text/plain; charset=utf-8
Size: 7.19 kB
<ON DISK> C:\Users\lrossou\Desktop\COVID-19\rt_estimates\covid19za_provincial_cumulative_timeline_deaths.csv
# Provincial Cases
GET(url = "https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_confirmed.csv",
write_disk("covid19za_provincial_cumulative_timeline_confirmed.csv", overwrite = TRUE))
Response [https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_confirmed.csv]
Date: 2020-06-12 19:58
Status: 200
Content-Type: text/plain; charset=utf-8
Size: 9.08 kB
<ON DISK> C:\Users\lrossou\Desktop\COVID-19\rt_estimates\covid19za_provincial_cumulative_timeline_confirmed.csv
First read in the data from the downloaded comma-separated values text file.
# Read from CSVs above
data_cases <- read.csv("covid19za_provincial_cumulative_timeline_confirmed.csv",
stringsAsFactors = FALSE)
data_deaths <- read.csv("covid19za_provincial_cumulative_timeline_deaths.csv", stringsAsFactors = FALSE)
In the case data file row 21 and 32 contain no provincial details. We estimated it by spreading the national total to the provinces in proportion to a mixture of the prior day and the next day.
data_cases[21, c("EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW", "WC", "UNKNOWN")] <- colSums(data_cases[c(20,
22), c("EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW", "WC", "UNKNOWN")])/sum(data_cases[c(20,
22), ]$total) * data_cases[21, ]$total
data_cases[32, c("EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW", "WC", "UNKNOWN")] <- colSums(data_cases[c(31,
33), c("EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW", "WC", "UNKNOWN")])/sum(data_cases[c(31,
33), ]$total) * data_cases[32, ]$total
The following function will be applied to case and death data. In this function the following occurs:
SA
column is added as the sum of the new per province data.fix_data <- function(data, start_date = as.Date("2020-03-01"), end_date = as.Date("2020-03-31")) {
# Scale provinces by scale factor (assume unknown are in proportion)
data[, c("EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW", "WC")] <- data[, c("EC",
"FS", "GP", "KZN", "LP", "MP", "NC", "NW", "WC")] * (1 + data$UNKNOWN/rowSums(data[,
c("EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW", "WC")]))
# Only select columns we need
data <- data %>% select("date", "EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW",
"WC")
data$date <- as.Date(data$date, "%d-%m-%Y")
# Round data so we have integer cases
data[, c("EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW", "WC")] <- round(data[,
c("EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW", "WC")], 0)
# Calculate a new SA column
data$SA <- rowSums(data[, c("EC", "FS", "GP", "KZN", "LP", "MP", "NC", "NW",
"WC")])
# 'Melt' the data
data <- pivot_longer(data, cols = c("EC", "FS", "GP", "KZN", "LP", "MP", "NC",
"NW", "WC", "SA"), names_to = "province", values_to = "count")
# Getting daily data from the cumulative data set
data = data %>% group_by(province) %>% arrange(date) %>% mutate(count = count -
lag(count, default = 0)) %>% ungroup()
# add missing dates
all_dates <- expand_grid(date = seq(start_date, end_date, 1), province = levels(as.factor(data$province)))
# join
data <- left_join(all_dates, data, by = c("date", "province"))
# province factor
data$province <- as.factor(data$province)
# 0 for NAs
data$count <- ifelse(is.na(data$count), 0, data$count)
# remove negatives
data$count <- ifelse(data$count < 0, 0, data$count)
data <- data %>% group_by(province) %>% mutate(cumulative_count = cumsum(count)) %>%
ungroup()
return(data)
}
Below we use the function above to process deaths and cases and then combine them into a single dataset.
start_date <- min(as.Date(data_cases$date, "%d-%m-%Y"))
end_date <- max(as.Date(data_cases$date, "%d-%m-%Y"))
data_cases <- fix_data(data_cases, start_date = start_date, end_date = end_date)
data_deaths <- fix_data(data_deaths, start_date = start_date, end_date = end_date)
data_cases <- cbind("cases", data_cases)
data_deaths <- cbind("deaths", data_deaths)
colnames(data_cases)[1] <- "type"
colnames(data_deaths)[1] <- "type"
# combined
data <- rbind(data_cases, data_deaths)
# remove data sets no longer needed
rm("data_cases", "data_deaths", "start_date", "end_date")
Below we plot cumulative case count on a log scale by province:
ggplot(data %>% filter(type == "cases"), aes(x = date, y = cumulative_count)) + geom_line(aes(color = province),
size = 1) + scale_y_log10(labels = comma) + ggtitle("Cumulative Cases by Province") +
theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "right") +
guides(fill = guide_legend(ncol = 1)) + scale_color_hue(l = 50)