This document calculates results for the manuscript: “Timeliness in the German surveillance system for infectious diseases: Amendment of the infection protection act in 2013 decreased local reporting time to 1 day.” Its aim is to analyze the reporting delay in the German Surveillance System. This document is the result of a statistics program called R, that can be downloaded from http://www.r-project.org.

The document is structured in three parts.

1 Part - Getting and preparing data

1.1 Querying database

The data is retrieved from the database for notifiable incetious dieases of the Robert Koch Institute. The full query can be seen at the end of the script.

1.2 Loading data from intermediate result and start protocol

load(file="rawData.RData")

# Initialize variable for protocol
protocol <- list(TotalNumberRetrieved=nrow(data))

1.3 Loading a table with disease names and other information

DiseaseDF <- read.table(file="Diseases.csv", header=TRUE, sep="\t", quote="", row.names = NULL, colClasses = "character", encoding="UTF-8")

datatable(rownames = FALSE, extensions = 'Buttons', DiseaseDF)

1.4 Loading a table with names of the German federal states

FederalstatesDF <- read.table("FederalStates.csv", header=TRUE, sep=";", row.names = NULL, colClasses = "character", encoding="UTF-8")

FederalstatesDF <- FederalstatesDF %>% 
  mutate(ReportingStateName = gsub("\xfc", "ü", ReportingStateName))

datatable(rownames = FALSE, extensions = 'Buttons',FederalstatesDF)

1.5 Loading a table with nationwide public holidays and calculate working days

Holidays <- read.table("Holidays.csv", header=FALSE, sep=";", row.names = NULL, colClasses = "character") %>% .[["V1"]]
WorkingDays <- sum(isBizday(timeDate(seq(as.Date("2012/03/29"), as.Date("2014/03/28"), "days")), holidays=Holidays, wday=1:5))

datatable(rownames = FALSE, extensions = 'Buttons',data.frame(Holidays))

1.6 Changing class and other minor changes

data <- data %>% 
  mutate_each(funs(as.Date(., format = "%Y-%m-%d")), 1:23) %>%
  mutate_each(funs(as.character(.)), 24:32,35:36) %>%
  mutate_each(funs(as.numeric(.)), 33:34, 37:40) %>%
  mutate_each(funs(replace(., .=="-nicht erhoben-", NA)), 24:32,35:36) %>% 
  mutate_each(funs(replace(., .=="ohne", NA)), 24:32,35:36) %>%
  mutate_each(funs(replace(., .=="zOther", NA)), 24:32,35:36) %>% 
  mutate_each(funs(replace(., .=="-kein-", NA)), 24:32,35:36) %>% 
  mutate(Alter = ifelse(Alter<0|Alter>120, NA, Alter)) %>% 
  mutate(SoftwareGruppeGA = factor(SoftwareGruppeGA, labels=c("Software A", "Software B", "Software C", "Software D", "SurvNet", "Software E"))) 

# Change German umlaute
levels(data$Disease) <- gsub("\xe4", "ä", levels(data$Disease)) 
levels(data$Disease) <- gsub("\xf6", "ö", levels(data$Disease)) 
levels(data$ReportingStateName) <- gsub("\xfc", "ü", levels(data$ReportingStateName)) 

1.7 Attach federal states to the main data

data <- data %>% 
  left_join(FederalstatesDF, by="ReportingStateName") %>% 
  mutate(ReportingStateName=factor(FederalStateEnglish)) %>% 
  select(-FederalStateEnglish)

1.8 Attach diseases to the main data

data <- data %>% 
  left_join(DiseaseDF,by="Disease") %>% 
  mutate(Erkrankung=factor(Disease)) %>% 
  mutate(Disease=factor(DiseaseEnglish)) %>% 
  select(-DiseaseEnglish)

1.9 Filter diseases introduced in IfSG amendment from 29.03.2013

# Protocol
protocol$DownloadedDiseases <- levels(data$Erkrankung)

"%nin%" <- function (x, table) match(x, table, nomatch = 0L) == 0L #helper function

# Filter process
data <- data %>% 
  filter(!is.na(Disease)) %>% 
  filter(Erkrankung %nin% c("Mumps", "Röteln", "Windpocken", "Keuchhusten")) %>% 
  droplevels

# Protocol
protocol$AnalysedDiseases <- levels(data$Erkrankung)
protocol$NCorrectDiseaseCategory = nrow(data)

1.10 Compute which diseases are frequent and filter those with less than 10 cases

data <- data %>% 
  left_join(data %>% 
              count(Disease) %>% 
              filter(n>10) %>% 
              mutate(FrequentDisease= TRUE) %>% 
              select(Disease, FrequentDisease), by="Disease")

# Filter
data <- data %>% filter(FrequentDisease)

# Protocol
protocol$TotalNumber = nrow(data)
protocol$FilterLess10Cases = protocol$NCorrectDiseaseCategory-protocol$TotalNumber

1.11 Find the first date of notification

data <- data %>% 
    mutate(dateOfNotification = as.Date(pmin(ArztMeldungImGA, LaborMeldungImGA, EigeneErmittlungMeldungImGA, GemeinschaftP8MeldungImGA, WeiterleitungAnderesGAMeldungImGA, GemeinschaftP34MeldungImGA, AndereMeldungImGA, UnbekannteMeldeArtImGA, na.rm=TRUE), origin="1970-01-01")) 

1.12 Compute which days are weekdays

data <- data %>%
  mutate(Weekday_local = weekdays(dateOfNotification)) %>%
  mutate(Weekday_local = factor(Weekday_local, levels=c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag"), labels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))) %>%
  mutate(Weekday_state = weekdays(ImportLS)) %>%
  mutate(Weekday_state = factor(Weekday_state, levels=c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag"), labels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")))

1.13 Categorise data in cases before and after the IfSG amendment

data <- data %>% 
  mutate(gesetz_local = factor(ifelse(Meldedatum <= as.Date("2013-03-31"), "VorGesetz", "NachGesetz"))) %>%
  mutate(gesetz_state = factor(ifelse(ImportLS <= as.Date("2013-03-31"), "VorGesetz", "NachGesetz"))) 

1.14 Find local public health agencies that have reported more than 10 cases according to specifications version 3 before the IfSG amendment

data <- data %>% 
  left_join(data %>% 
              filter(SoftwareSN2SN3GA=="SN3") %>% 
              filter(gesetz_local=="VorGesetz") %>% 
              count(MeldendesGA) %>% 
              filter(n>10) %>% 
              mutate(longtermSN3User= TRUE) %>% 
              select(MeldendesGA, longtermSN3User), by="MeldendesGA")

1.15 Compute delays

data <- data %>% 
  mutate(delay_to_report_local = as.numeric(ExportGA - dateOfNotification)) %>%
  mutate(delay_to_report_state = as.numeric(ExportLS - ImportLS)) %>% 
  mutate(notificationProcessDelay = as.numeric(Meldedatum-dateOfNotification)) %>% 
  mutate(total_delay = as.numeric(ExportLS-dateOfNotification))

1.16 Protocol

protocol$dateOfNotification=sum(!is.na(data$dateOfNotification))
protocol$NLocalPublicHealthAgencies=length(levels(as.factor(data$MeldendesGA)))
protocol$ExportGA=sum(!is.na(data$ExportGA))
protocol$ImportLS=sum(!is.na(data$ImportLS)) 
protocol$ExportLS=sum(!is.na(data$ExportLS)) 
protocol$DelayLocalInitial=sum(!is.na(data$delay_to_report_local))
protocol$DelayStateInitial=sum(!is.na(data$delay_to_report_state))
protocol$DelayLocalUnder0=sum(data$delay_to_report_local<0, na.rm=TRUE)
protocol$DelayStateUnder0=sum(data$delay_to_report_state<0, na.rm=TRUE)
protocol$DelayLocalAbove183=sum(data$delay_to_report_local>183, na.rm=TRUE)
protocol$DelayStateAbove183=sum(data$delay_to_report_state>183, na.rm=TRUE) 

1.17 Deleting delay values that are potentially wrong

data <- data %>% 
     mutate(delay_to_report_local = ifelse(delay_to_report_local<0|delay_to_report_local>183, NA, delay_to_report_local))%>%
     mutate(delay_to_report_state = ifelse(delay_to_report_state<0|delay_to_report_state>183, NA, delay_to_report_state))%>%
     mutate(notificationProcessDelay = ifelse(notificationProcessDelay<0|notificationProcessDelay>183, NA, notificationProcessDelay)) %>% 
  mutate(total_delay = ifelse(total_delay<0|total_delay>183, NA, total_delay)) 

# Deleting delay values of processing delay that dont fall on a Mo-Thu and only those with SN3
data <- data %>% 
  mutate(notificationProcessDelay = ifelse(Weekday_local %in% c("Friday", "Saturday", "Sunday"), NA, notificationProcessDelay)) %>% 
  mutate(notificationProcessDelay = ifelse(SoftwareSN2SN3GA=="SN2", NA, notificationProcessDelay)) %>% 
  mutate(notificationProcessDelay = ifelse(!longtermSN3User, NA, notificationProcessDelay))

1.18 Categorising into quartiles

data <- data %>% 
  left_join(data %>% 
              count(MeldendesGA) %>% 
              mutate(NotificationsPerWorkingDay = n/WorkingDays) %>% 
              mutate(rangNotifications= ntile(NotificationsPerWorkingDay,4)) ,by="MeldendesGA")

# We label the rangs according to meanNotificationsPerWorkingDay
Label1 <- round(min(data$NotificationsPerWorkingDay[data$rangNotifications==1], na.rm=T),1)
Label2 <- round(min(data$NotificationsPerWorkingDay[data$rangNotifications==2], na.rm=T),1)
Label3 <- round(min(data$NotificationsPerWorkingDay[data$rangNotifications==3], na.rm=T),1)
Label4 <- round(min(data$NotificationsPerWorkingDay[data$rangNotifications==4], na.rm=T),1)
Label5 <- round(max(data$NotificationsPerWorkingDay[data$rangNotifications==4], na.rm=T),1)
AllLabels <- c(paste(Label1, Label2, sep=" - "), 
               paste(Label2, Label3, sep=" - "), 
               paste(Label3, Label4, sep=" - "), 
               paste(Label4, Label5, sep=" - "))

data$rangNotifications <- factor(data$rangNotifications, labels=AllLabels)
rm(Label1, Label2, Label3, Label4, Label5, AllLabels)

# Introduce rangTransmission
data <- data %>% 
  left_join(data %>%
              filter(gesetz_local=="VorGesetz") %>% 
              group_by(MeldendesGA) %>% 
              summarise(mean = mean(delay_to_report_local, na.rm=TRUE)) %>% 
              mutate(rangTransmission= ntile(mean,4)) %>%
              select(MeldendesGA, rangTransmission) 
            ,by="MeldendesGA")

1.19 Compute agegroup

data <- data %>%
  mutate(Agegroup = findInterval(Alter, c(seq(0,100,10)))) %>% 
  mutate(Agegroup = ifelse(Agegroup>10, 10, Agegroup)) %>% 
  mutate(Agegroup = factor(Agegroup, labels=c("0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90+")))

1.20 Compute a variable for the labeling of the figures

data <- data %>% mutate(Yearmonth = as.Date(paste(strftime(Meldedatum, format="%Y-%m"), "01", sep="-")))

1.21 Find the local public health agencies that report within one working day

data$wd0 <- isBizday(timeDate(data$dateOfNotification), holidays=Holidays, wday=1:5)
data$wd1 <- isBizday(timeDate(data$dateOfNotification+1), holidays=Holidays, wday=1:5)
data$wd2 <- isBizday(timeDate(data$dateOfNotification+2), holidays=Holidays, wday=1:5)
data$wd3 <- isBizday(timeDate(data$dateOfNotification+3), holidays=Holidays, wday=1:5)
data$wd4 <- isBizday(timeDate(data$dateOfNotification+4), holidays=Holidays, wday=1:5)
data$wd5 <- isBizday(timeDate(data$dateOfNotification+5), holidays=Holidays, wday=1:5)
data$wd6 <- isBizday(timeDate(data$dateOfNotification+6), holidays=Holidays, wday=1:5)

data<-data %>% 
    mutate(target=0) %>% 
    mutate(target=ifelse(wd0 & wd6, 6,target) ) %>% 
    mutate(target=ifelse(wd0 & wd5, 5,target) ) %>% 
    mutate(target=ifelse(wd0 & wd4, 4,target) ) %>% 
    mutate(target=ifelse(wd0 & wd3, 3,target) ) %>% 
    mutate(target=ifelse(wd0 & wd2, 2,target) ) %>% 
    mutate(target=ifelse(wd0 & wd1, 1,target) ) %>% 
    mutate(target=ifelse(!wd0 & wd1 & wd6, 6,target) ) %>% 
    mutate(target=ifelse(!wd0 & wd1 & wd5, 5,target) ) %>% 
    mutate(target=ifelse(!wd0 & wd1 & wd4, 4,target) ) %>% 
    mutate(target=ifelse(!wd0 & wd1 & wd3, 3,target) ) %>%
    mutate(target=ifelse(!wd0 & wd1 & wd2, 2,target) ) %>%
    mutate(target=ifelse(!wd0 & !wd1 & wd2 & wd6, 6,target) ) %>% 
    mutate(target=ifelse(!wd0 & !wd1 & wd2 & wd5, 5,target) ) %>% 
    mutate(target=ifelse(!wd0 & !wd1 & wd2 & wd4, 4,target) ) %>% 
    mutate(target=ifelse(!wd0 & !wd1 & wd2 & wd3, 3,target) ) %>%
    mutate(target=ifelse(!wd0 & !wd1 & !wd2 & wd3 & wd6, 6,target) ) %>% 
    muta