# EMIT Main Quarantine Paper Analysis: Applying POC Infection Criteria
# Program Objective: Apply the POC Infection Criteria to the 
# Author: Jacob Bueno de Mesquita
# Date: December 21, 2018; February, 2019
# Summary: Based on Massimo and Walt's review comments on manuscript, we delved into making better comparisons between the Main Q paper and the POC paper wrt qRT-PCR status.
# The POC paper classified a single qRT-PCR positive hit as a criteria to call a positive case.
# The Main Q paper required 2 qRT-PCR positive hits as necessary for criteria to call a positive case. 
# Here we apply the POC criteria to the Main Q paper -- this will be added to the supplement.
# Because this effects Table 1 and Table 3, we will copy table 1 and table 3 script from the EMIT_Q_Main_Analysis.R script
# Then we will modify it to match the POC criteria. 

# Note that the POC paper was Killingley et al 2012 in JID -- the proof-of-concept for using quarantine transmission model for human transmission. 
#### Load required packages, set working directory, and read in data file ####
library(tidyverse)
library(RcppRoll)
library(readxl)
library(knitr)
library(data.table)
library(lubridate)
library(devtools)
library(xtable)
library(DT)
library(kableExtra)

# setwd("/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine")
# Issues with markdown so eliminating this command to setwd and instad will direct all files to specific directory.

Qdata <- read.csv("/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Curated Data/Analytical Datasets/QuarantineMergedData.csv")

session_info()
## ─ Session info ──────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.5.1 (2018-07-02)
##  os       macOS  10.14.4              
##  system   x86_64, darwin15.6.0        
##  ui       RStudio                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2019-04-06                  
## 
## ─ Packages ──────────────────────────────────────────────────────────────
##  package     * version date       lib source        
##  arsenal     * 1.5.0   2018-11-09 [1] CRAN (R 3.5.0)
##  assertthat    0.2.0   2017-04-11 [1] CRAN (R 3.5.0)
##  backports     1.1.2   2017-12-13 [1] CRAN (R 3.5.0)
##  base64enc     0.1-3   2015-07-28 [1] CRAN (R 3.5.0)
##  bindr         0.1.1   2018-03-13 [1] CRAN (R 3.5.0)
##  bindrcpp    * 0.2.2   2018-03-29 [1] CRAN (R 3.5.0)
##  broom         0.5.0   2018-07-17 [1] CRAN (R 3.5.0)
##  callr         3.0.0   2018-08-24 [1] CRAN (R 3.5.0)
##  cellranger    1.1.0   2016-07-27 [1] CRAN (R 3.5.0)
##  checkmate     1.8.5   2017-10-24 [1] CRAN (R 3.5.0)
##  cli           1.0.1   2018-09-25 [1] CRAN (R 3.5.0)
##  colorspace    1.3-2   2016-12-14 [1] CRAN (R 3.5.0)
##  crayon        1.3.4   2017-09-16 [1] CRAN (R 3.5.0)
##  crosstalk     1.0.0   2016-12-21 [1] CRAN (R 3.5.0)
##  data.table  * 1.11.8  2018-09-30 [1] CRAN (R 3.5.0)
##  desc          1.2.0   2018-05-01 [1] CRAN (R 3.5.0)
##  devtools    * 2.0.1   2018-10-26 [1] CRAN (R 3.5.1)
##  digest        0.6.18  2018-10-10 [1] CRAN (R 3.5.0)
##  dplyr       * 0.7.7   2018-10-16 [1] CRAN (R 3.5.0)
##  DT          * 0.4     2018-01-30 [1] CRAN (R 3.5.0)
##  evaluate      0.12    2018-10-09 [1] CRAN (R 3.5.0)
##  fansi         0.4.0   2018-10-05 [1] CRAN (R 3.5.0)
##  forcats     * 0.3.0   2018-02-19 [1] CRAN (R 3.5.0)
##  fs            1.2.6   2018-08-23 [1] CRAN (R 3.5.0)
##  ggplot2     * 3.1.0   2018-10-25 [1] CRAN (R 3.5.0)
##  glue          1.3.0   2018-07-17 [1] CRAN (R 3.5.0)
##  gtable        0.2.0   2016-02-26 [1] CRAN (R 3.5.0)
##  haven         1.1.2   2018-06-27 [1] CRAN (R 3.5.0)
##  highr         0.7     2018-06-09 [1] CRAN (R 3.5.0)
##  hms           0.4.2   2018-03-10 [1] CRAN (R 3.5.0)
##  htmlTable   * 1.12    2018-05-26 [1] CRAN (R 3.5.0)
##  htmltools     0.3.6   2017-04-28 [1] CRAN (R 3.5.0)
##  htmlwidgets   1.3     2018-09-30 [1] CRAN (R 3.5.0)
##  httpuv        1.4.5   2018-07-19 [1] CRAN (R 3.5.0)
##  httr          1.3.1   2017-08-20 [1] CRAN (R 3.5.0)
##  jsonlite      1.5     2017-06-01 [1] CRAN (R 3.5.0)
##  kableExtra  * 0.9.0   2018-05-21 [1] CRAN (R 3.5.0)
##  knitr       * 1.20    2018-02-20 [1] CRAN (R 3.5.0)
##  labeling      0.3     2014-08-23 [1] CRAN (R 3.5.0)
##  later         0.7.5   2018-09-18 [1] CRAN (R 3.5.0)
##  lattice       0.20-38 2018-11-04 [1] CRAN (R 3.5.0)
##  lazyeval      0.2.1   2017-10-29 [1] CRAN (R 3.5.0)
##  lubridate   * 1.7.4   2018-04-11 [1] CRAN (R 3.5.0)
##  magrittr    * 1.5     2014-11-22 [1] CRAN (R 3.5.0)
##  markdown    * 0.8     2017-04-20 [1] CRAN (R 3.5.0)
##  Matrix        1.2-14  2018-04-13 [1] CRAN (R 3.5.1)
##  memoise       1.1.0   2017-04-21 [1] CRAN (R 3.5.0)
##  mime          0.6     2018-10-05 [1] CRAN (R 3.5.0)
##  modelr        0.1.2   2018-05-11 [1] CRAN (R 3.5.0)
##  munsell       0.5.0   2018-06-12 [1] CRAN (R 3.5.0)
##  nlme          3.1-137 2018-04-07 [1] CRAN (R 3.5.1)
##  pillar        1.3.0   2018-07-14 [1] CRAN (R 3.5.0)
##  pkgbuild      1.0.2   2018-10-16 [1] CRAN (R 3.5.0)
##  pkgconfig     2.0.2   2018-08-16 [1] CRAN (R 3.5.0)
##  pkgload       1.0.2   2018-10-29 [1] CRAN (R 3.5.1)
##  plyr          1.8.4   2016-06-08 [1] CRAN (R 3.5.0)
##  prettyunits   1.0.2   2015-07-13 [1] CRAN (R 3.5.0)
##  processx      3.2.0   2018-08-16 [1] CRAN (R 3.5.0)
##  promises      1.0.1   2018-04-13 [1] CRAN (R 3.5.0)
##  ps            1.2.0   2018-10-16 [1] CRAN (R 3.5.0)
##  purrr       * 0.2.5   2018-05-29 [1] CRAN (R 3.5.0)
##  qwraps2     * 0.4.0   2019-01-14 [1] CRAN (R 3.5.2)
##  R6            2.3.0   2018-10-04 [1] CRAN (R 3.5.0)
##  Rcpp          0.12.19 2018-10-01 [1] CRAN (R 3.5.0)
##  RcppRoll    * 0.3.0   2018-06-05 [1] CRAN (R 3.5.0)
##  readr       * 1.1.1   2017-05-16 [1] CRAN (R 3.5.0)
##  readxl      * 1.1.0   2018-04-20 [1] CRAN (R 3.5.0)
##  remotes       2.0.1   2018-10-19 [1] CRAN (R 3.5.0)
##  rlang         0.3.0.1 2018-10-25 [1] CRAN (R 3.5.0)
##  rmarkdown   * 1.10    2018-06-11 [1] CRAN (R 3.5.0)
##  rprojroot     1.3-2   2018-01-03 [1] CRAN (R 3.5.0)
##  rstudioapi    0.8     2018-10-02 [1] CRAN (R 3.5.0)
##  rvest         0.3.2   2016-06-17 [1] CRAN (R 3.5.0)
##  scales        1.0.0   2018-08-09 [1] CRAN (R 3.5.0)
##  selectr       0.4-1   2018-04-06 [1] CRAN (R 3.5.0)
##  sessioninfo   1.1.0   2018-09-25 [1] CRAN (R 3.5.0)
##  shiny         1.1.0   2018-05-17 [1] CRAN (R 3.5.0)
##  stringi       1.2.4   2018-07-20 [1] CRAN (R 3.5.0)
##  stringr     * 1.3.1   2018-05-10 [1] CRAN (R 3.5.0)
##  survival      2.43-3  2018-11-26 [1] CRAN (R 3.5.0)
##  testthat      2.0.1   2018-10-13 [1] CRAN (R 3.5.0)
##  tibble      * 1.4.2   2018-01-22 [1] CRAN (R 3.5.0)
##  tidyr       * 0.8.2   2018-10-28 [1] CRAN (R 3.5.0)
##  tidyselect    0.2.5   2018-10-11 [1] CRAN (R 3.5.0)
##  tidyverse   * 1.2.1   2017-11-14 [1] CRAN (R 3.5.0)
##  usethis     * 1.4.0   2018-08-14 [1] CRAN (R 3.5.0)
##  utf8          1.1.4   2018-05-24 [1] CRAN (R 3.5.0)
##  viridisLite   0.3.0   2018-02-01 [1] CRAN (R 3.5.0)
##  withr         2.1.2   2018-03-15 [1] CRAN (R 3.5.0)
##  xml2          1.2.0   2018-01-24 [1] CRAN (R 3.5.0)
##  xtable      * 1.8-3   2018-08-29 [1] CRAN (R 3.5.0)
##  yaml          2.2.0   2018-07-25 [1] CRAN (R 3.5.0)
## 
## [1] /Library/Frameworks/R.framework/Versions/3.5/Resources/library
#### * TABLE 1 ---------------------------####
#### Overview of columns to produce for Table 1 (Donor Status) in the paper ####

# Table 1 is "Donor status". It gives for each of the 3 quarantines:
# a) Number of infected/ number of inoculated (and %)
# b) Number of symptomatic (and % of infected)
# c) Number of symptomatic, non-ILI (and % of infected)
# d) Number of ILI (and % of infected)
# e) Number of febrile (and % of infected)
# f) Number of PCR confirmed infection (and % of infected)
# g) Number of PCR confirmed infection and seroconversion (and % of infected)
# h) Number of seroconversion by HAI: MN: Either (and % of infected)
# i) Number of seroconversion prior to quarantine by HAI: MN: Both (and % of infected)

#### Table 1 (donors): a) Number of infected, number of inoculated (and % infected of inoculated) ####

# number of inoculated donors

Qdata_inoculated_donors <- Qdata %>% 
  filter(Randomization_DorIRorCR == "D")
Qdata_inoculated_donors_table1 <- Qdata_inoculated_donors %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Inoculated_Donors = n_distinct(SubjectID))
print(Qdata_inoculated_donors_table1)
## # A tibble: 3 x 2
##   QuarantineNumber Number_Inoculated_Donors
##              <int>                    <int>
## 1                1                       20
## 2                2                       12
## 3                3                       20
# number of infected donors

# positive by PCR (seroconversion, or PCR positive on more than 1 day)
# let's get the list with at least one day PCR positive, then merge up with seroconversion data
Qdata_pcr_pos1_or_more_days <- Qdata %>%
  filter(Randomization_DorIRorCR == "D") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct < 38 & InfA_Ct != 0)) %>%
  group_by(SubjectID, StudyDay) %>%
  summarize(count = n()) %>%
  summarize(NumberDaysPosPCR = n_distinct(StudyDay))
print(Qdata_pcr_pos1_or_more_days)
## # A tibble: 41 x 2
##    SubjectID NumberDaysPosPCR
##        <int>            <int>
##  1       100                6
##  2       103                5
##  3       104                4
##  4       106                3
##  5       107                6
##  6       108                6
##  7       110                4
##  8       112                5
##  9       113                5
## 10       114                5
## # ... with 31 more rows
# let's get the list with seroconversion by Microneuts (CDC serology)

# First only select the subjectIDs that were serosusceptible by MN at baseline (<80 at baseline)
# Revision, even if they were less serosusceptible, there was still a chance for seroconversion so do not filter out the <80 at baseline
#Qdata_Microneut_susceptible <- Qdata %>%
#filter(Randomization_DorIRorCR == "D" & Microneut_VisitType == "Q baseline" & Microneutralization.Titer.to.A.Wisconsin.67.2005 < 80) %>%
#distinct(SubjectID, .keep_all = FALSE)

Qdata_Microneut_pos <- Qdata %>%
  filter(Randomization_DorIRorCR == "D" & Microneut_VisitType == "F/up" & Microneut_Seroconvert == 1) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber)
print(Qdata_Microneut_pos)
##    SubjectID QuarantineNumber
## 1        100                1
## 2        101                1
## 3        103                1
## 4        106                1
## 5        107                1
## 6        108                1
## 7        110                1
## 8        112                1
## 9        113                1
## 10       114                1
## 11       115                1
## 12       116                1
## 13       117                1
## 14       118                1
## 15       122                2
## 16       125                2
## 17       126                2
## 18       128                2
## 19       129                2
## 20       130                2
## 21       131                2
## 22       133                3
## 23       136                3
## 24       138                3
## 25       141                3
## 26       142                3
## 27       143                3
## 28       145                3
## 29       146                3
## 30       147                3
## 31       148                3
## 32       150                3
# let's get the list with seroconversion by HAI (Glasgow serology)

# First only select the subjectIDs that were serosusceptible by HAI at baseline (<=10 at baseline)
# Revision, even if they were less serosusceptible, there was still a chance for seroconversion so do not filter out the <=10 at baseline
#Qdata_HAI_susceptible <- Qdata %>%
#filter(Randomization_DorIRorCR == "D" & HAI_dayminus2_recodeNDA <= 10) %>%
#distinct(SubjectID, .keep_all = FALSE)

Qdata_HAI_pos <- Qdata %>%
  filter(Randomization_DorIRorCR == "D" & HAI_Seroconversion == 1) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber)
print(Qdata_HAI_pos)
##    SubjectID QuarantineNumber
## 1        100                1
## 2        101                1
## 3        103                1
## 4        107                1
## 5        108                1
## 6        110                1
## 7        112                1
## 8        113                1
## 9        115                1
## 10       116                1
## 11       117                1
## 12       118                1
## 13       122                2
## 14       125                2
## 15       126                2
## 16       127                2
## 17       128                2
## 18       129                2
## 19       130                2
## 20       131                2
## 21       132                2
## 22       133                3
## 23       134                3
## 24       136                3
## 25       138                3
## 26       139                3
## 27       141                3
## 28       142                3
## 29       143                3
## 30       145                3
## 31       146                3
## 32       147                3
## 33       148                3
## 34       150                3
## 35       151                3
# Now let's merge the datasets together to get full list of volunteers who meet positivity criteria
Qdata_infected <- Qdata_HAI_pos %>%
  full_join(Qdata_Microneut_pos, by = c("SubjectID" = "SubjectID")) %>%
  full_join(Qdata_pcr_pos1_or_more_days, by = c("SubjectID" = "SubjectID")) %>%
  arrange(SubjectID)
print(Qdata_infected)
##    SubjectID QuarantineNumber.x QuarantineNumber.y NumberDaysPosPCR
## 1        100                  1                  1                6
## 2        101                  1                  1               NA
## 3        103                  1                  1                5
## 4        104                 NA                 NA                4
## 5        106                 NA                  1                3
## 6        107                  1                  1                6
## 7        108                  1                  1                6
## 8        110                  1                  1                4
## 9        112                  1                  1                5
## 10       113                  1                  1                5
## 11       114                 NA                  1                5
## 12       115                  1                  1               NA
## 13       116                  1                  1                1
## 14       117                  1                  1                4
## 15       118                  1                  1                4
## 16       122                  2                  2                1
## 17       123                 NA                 NA                5
## 18       124                 NA                 NA                2
## 19       125                  2                  2                6
## 20       126                  2                  2                4
## 21       127                  2                 NA                5
## 22       128                  2                  2                6
## 23       129                  2                  2                5
## 24       130                  2                  2                5
## 25       131                  2                  2                5
## 26       132                  2                 NA                4
## 27       133                  3                  3                6
## 28       134                  3                 NA                4
## 29       136                  3                  3                5
## 30       137                 NA                 NA                4
## 31       138                  3                  3                6
## 32       139                  3                 NA               NA
## 33       140                 NA                 NA                1
## 34       141                  3                  3                1
## 35       142                  3                  3                6
## 36       143                  3                  3                6
## 37       144                 NA                 NA                2
## 38       145                  3                  3                6
## 39       146                  3                  3                6
## 40       147                  3                  3                5
## 41       148                  3                  3                6
## 42       150                  3                  3                4
## 43       151                  3                 NA                6
## 44       152                 NA                 NA                1
# Identify whom among the pcr positive individuals with only a single day of PCR positivity did not also seroconvert to confirm infection
Qdata_1pcrpos_nosero <- Qdata_infected %>%
  filter(NumberDaysPosPCR == 1) %>%
  filter(is.na(QuarantineNumber.x) & is.na(QuarantineNumber.y))
print(Qdata_1pcrpos_nosero)
##   SubjectID QuarantineNumber.x QuarantineNumber.y NumberDaysPosPCR
## 1       140                 NA                 NA                1
## 2       152                 NA                 NA                1
# Among the individuals that were PCR positive on only a single day, which study day was the positive day?
Qdata_pcr_pos1_or_more_days_studydays <- Qdata %>%
  filter(Randomization_DorIRorCR == "D") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct<38 & InfA_Ct!=0)) %>%
  group_by(SubjectID, StudyDay) %>%
  summarize(count = n())
Qdata_pcr_pos1_day <- Qdata_infected %>%
  filter(NumberDaysPosPCR ==1) %>%
  left_join(Qdata_pcr_pos1_or_more_days_studydays, by = c("SubjectID" = "SubjectID")) %>%
  select(-`count`)
print(Qdata_pcr_pos1_day)
##   SubjectID QuarantineNumber.x QuarantineNumber.y NumberDaysPosPCR
## 1       116                  1                  1                1
## 2       122                  2                  2                1
## 3       140                 NA                 NA                1
## 4       141                  3                  3                1
## 5       152                 NA                 NA                1
##   StudyDay
## 1        2
## 2        2
## 3        4
## 4        4
## 5        4
# Note on this outcome: all subjects that were only PCR on one day, were positive on day 2 or day 4 (not day 1)
# This may be useful if the group is interested in changing the criteria for PCR positivity such that a single PCR test would count as criteria for infection as long as it wasn't on day 0 or day 1.
# Conversation with Alex Mann leads me to believe we should keep the 2 separate day PCR positivity criteria for now.

# Do not remove those who were only 1 day pcr positive and no seroconversion (protocol criteria for positivity)
print(Qdata_infected)
##    SubjectID QuarantineNumber.x QuarantineNumber.y NumberDaysPosPCR
## 1        100                  1                  1                6
## 2        101                  1                  1               NA
## 3        103                  1                  1                5
## 4        104                 NA                 NA                4
## 5        106                 NA                  1                3
## 6        107                  1                  1                6
## 7        108                  1                  1                6
## 8        110                  1                  1                4
## 9        112                  1                  1                5
## 10       113                  1                  1                5
## 11       114                 NA                  1                5
## 12       115                  1                  1               NA
## 13       116                  1                  1                1
## 14       117                  1                  1                4
## 15       118                  1                  1                4
## 16       122                  2                  2                1
## 17       123                 NA                 NA                5
## 18       124                 NA                 NA                2
## 19       125                  2                  2                6
## 20       126                  2                  2                4
## 21       127                  2                 NA                5
## 22       128                  2                  2                6
## 23       129                  2                  2                5
## 24       130                  2                  2                5
## 25       131                  2                  2                5
## 26       132                  2                 NA                4
## 27       133                  3                  3                6
## 28       134                  3                 NA                4
## 29       136                  3                  3                5
## 30       137                 NA                 NA                4
## 31       138                  3                  3                6
## 32       139                  3                 NA               NA
## 33       140                 NA                 NA                1
## 34       141                  3                  3                1
## 35       142                  3                  3                6
## 36       143                  3                  3                6
## 37       144                 NA                 NA                2
## 38       145                  3                  3                6
## 39       146                  3                  3                6
## 40       147                  3                  3                5
## 41       148                  3                  3                6
## 42       150                  3                  3                4
## 43       151                  3                 NA                6
## 44       152                 NA                 NA                1
# Summarize number of infected (by any criteria) for each Q
# First need to attach a full set of quarantine numbers on the "Qdata_infected" df
Qdata_QuarantineNumbers <- Qdata %>%
  select(SubjectID, QuarantineNumber) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  filter(!is.na(SubjectID))
Qdata_infected_donors <- Qdata_infected %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID"))
Qdata_infected_donors_table1 <- Qdata_infected_donors %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Infected_Donors = n_distinct(SubjectID))
print(Qdata_infected_donors_table1)
## # A tibble: 3 x 2
##   QuarantineNumber Number_Infected_Donors
##              <int>                  <int>
## 1                1                     15
## 2                2                     11
## 3                3                     18
#### Generation of Table 1 for paper ####

# To output a nice summary table with numInfected, numInoculated, and %infected of inoculated
Qdata_table1 <- Qdata_infected_donors_table1 %>% #using the df just created, above
  left_join(Qdata_inoculated_donors_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_Infected_of_Inoculated = Number_Infected_Donors/Number_Inoculated_Donors)
print(Qdata_table1)
## # A tibble: 3 x 4
##   QuarantineNumber Number_Infected_D… Number_Inoculate… Fraction_Infected…
##              <int>              <int>             <int>              <dbl>
## 1                1                 15                20              0.75 
## 2                2                 11                12              0.917
## 3                3                 18                20              0.9
#### Table 1 (donors): b) Number of symptomatic (and % of infected) ####

# How to define "symptomatic"? (Includes both "symptomatic non-ILI" and "symptomatic ILI")
# Ben Killingley: "Symptomatic are the ones with symptoms (had to be more than 1 symptom on 1 day) but who did not reach ILI def"
# Really, this means having evidence of 2 or more symptoms that occurred together over at least 2 consecutive days, or fever at least once
# Figured out this classification criteria using information from Alex Mann and from reverse engineering the criteria from the CDC final report, which classified each volunteer (compared CDC final report with symptom profile of each volunteer)

## Note: The symptomatic, symptomatic non-ILI, ILI, and febrile are meant to be displayed as a fraction of the infected. However there are symptomatic, symtpomatic non-ILI, ILI, and perhaps even febrile who were not infected. Perhaps we should report these with symtpoms but who were not infected in their own supplementatry table?

# Based on email correspondence with Don Milton and the EMIT team on October 3, 2018 we will forgo the "symptomatic" classification in the table
# Thus, the next two sections of code (implementing versions 1 and 2) will not be further persued and published at this time.
# Revision: Later email correspondence from October 4, 2018 resolves that we will use a milder criteria for symptomatic (and afebile)...
# Thus, Version 2 of Symptomatic will be used and is coded in this script somewhere below.
# This was later revised and a new version called version 3 was used
# I have commented out the versions that have become obsolete over time. 

#### ## Implementing Version 1 of "Symptomatic" ####

# “Evidence of at least 2 symptoms of any grade that persist for least 2 consecutive study days, where at least two of those consecutive study days are the same days; or have fever at least once.”

# First, manipulate the data to prepare for a loop that can classify symptomatic by using self report and DPE symptoms
# Combine symptom severity measures (grades 1, 2, and 3) because grade >1 doesn't matter for this definition of symptomatic afebrile
# Symptomatic_donors_infected_grade123 <- Qdata_infected_donors %>%
#   left_join(Qdata) %>%
#   filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 |
#            StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
#   mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis,
#          LRI = cough+SOB,
#          SystemicI = headache+muscleAches+malaise) %>%
#   mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
#   mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
#          stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
#          sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
#          soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
#          DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
#          DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
#          DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
#          DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
#          cough123 = cough==1 | cough==2 | cough==3, 
#          SOB123 = SOB==1 | SOB==2 | SOB==3,
#          headache123 = headache==1 | headache==2 | headache==3, 
#          muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
#          malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
#   mutate(Febrile = as.numeric(Febrile),
#          runnyNose123 = as.numeric(runnyNose123), 
#          stuffyNose123 = as.numeric(stuffyNose123), 
#          sneezing123 = as.numeric(sneezing123), 
#          soreThroat123 = as.numeric(soreThroat123),
#          DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
#          DPEOtits123 = as.numeric(DPEOtits123), 
#          DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
#          DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
#          cough123 = as.numeric(cough123), 
#          SOB123 = as.numeric(SOB123),
#          headache123 = as.numeric(headache123), 
#          muscleAches123 = as.numeric(muscleAches123), 
#          malaise123 = as.numeric(malaise123)) %>%
#   group_by(SubjectID, StudyDay, QuarantineNumber) %>%
#   summarize(Febrile = max(Febrile),
#             runnyNose123 = max(runnyNose123), 
#             stuffyNose123 = max(stuffyNose123), 
#             sneezing123 = max(sneezing123), 
#             soreThroat123 = max(soreThroat123),
#             DPENasalDischarge123 = max(DPENasalDischarge123), 
#             DPEOtits123 = max(DPEOtits123), 
#             DPESinusTenderness123 = max(DPESinusTenderness123), 
#             DPEPharyngitis123 = max(DPEPharyngitis123),
#             cough123 = max(cough123), 
#             SOB123 = max(SOB123),
#             headache123 = max(headache123), 
#             muscleAches123 = max(muscleAches123), 
#             malaise123 = max(malaise123)) %>%
#   select(SubjectID, QuarantineNumber, StudyDay, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123,
#          cough123, SOB123, headache123, muscleAches123, malaise123) %>%
#   ungroup()
# # The above gets us to a dataset where symptoms with grade 1, 2, or 3 are summarized by whether there was...
# # ... at least one symptoms (of any grade) detection per study day
# 
# # Now to select which of the subjects were symptomatic (version1) (excluding the febrile criteria for now)
# # Note: Using the breaks in the loops for efficiency. If a subject is detected as symptomatic, ...
# # ... the loop restarts on the next subjectID
# # But for this we only want to include symptoms for study days 1 to 6 so we need to cut a new df
# Symptomatic_donors_infected_grade123_day1to6 <- Symptomatic_donors_infected_grade123 %>%
#   filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6)
# 
# sub <- unique(Symptomatic_donors_infected_grade123_day1to6$SubjectID)
# c_sub <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token<-0
#   subid <- sub[i]
#   temp <- Symptomatic_donors_infected_grade123_day1to6[Symptomatic_donors_infected_grade123_day1to6$SubjectID == subid, ]
#   temp1<-temp[,4:13]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:3],temp1)
#   for (j in 1:(nrow(temp)-1)) {
#     for (k in 4:12) {
#       for (l in (k+1):13){
#         if (!is.na(temp[j, k]) & !is.na(temp[j, l]) & !is.na(temp[j+1, k]) & !is.na(temp[j+1, l])) {
#           if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) {
#             if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
#               c_sub <- rbind(c_sub, subid)
#               token<-1
#               break  
#               }
#             }
#           }
#         if (token==1){
#           break
#         } 
#       }
#       if (token==1){
#         break
#       } 
#     }
#     if (token==1){
#       break
#     }
#   }
# }
# # Note that the above loop did not take into account symptoms that may have occurred before study day 1
# # One way of looking at this is that is there was a symptom that occured before day 1, ...
# # ... then that symptom should not contribute to classification criteria for symptomatic.
# # Will do another loop that implements this to see if this makes a difference at all
# sub <- unique(Symptomatic_donors_infected_grade123$SubjectID)
# c_sub2 <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token<-0
#   subid <- sub[i]
#   temp <- Symptomatic_donors_infected_grade123[Symptomatic_donors_infected_grade123$SubjectID == subid, ]
#   temp1<-temp[,4:13]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:3],temp1)
#   for (j in 1:(nrow(temp)-1)) {
#     for (k in 4:12) {
#       for (l in (k+1):13){
#         if (!is.na(temp[j, k]) & !is.na(temp[j, l]) & !is.na(temp[j+1, k]) & !is.na(temp[j+1, l])) {
#           if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) {
#             if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
#               sum1<-0
#               sum2<-0
#               for (m in 1:(tail(which(temp$StudyDay==0), n=1))){
#                 sum1<-sum1+temp[m,k]
#                 sum2<-sum1+temp[m,l]
#               }
#               if (sum1==0 & sum2==0) {
#                 c_sub2 <- rbind(c_sub2, subid)
#                 token<-1
#                 break
#               }
#             }
#           }
#         }
#       }
#       if (token==1){
#         break
#       } 
#     }
#     if (token==1){
#       break
#     }
#   }
# }
# # Both this loop and the less stringent one both yieleded n=26 study IDs, thus we move ahead using either version for now
# # We will have to discuss how we should work the definition in the paper. 
# 
# # The above loop prints a vector with 26 subject IDs who meet the criteria for symptomatic as...
# # ... having at least 2 symptoms on at least the same two consecutive study days
# # Note that febrile could count here as one of the two symptoms, but the only donor subjectID for where that occurred, 
# # ... also had other symptoms that would have classified them as symptomatic without looking at the febrile symptom
# 
# # But having fever at least once would also be considered as part of the symptomatic definition, so...
# # ... we will run some more lines that search for febrile cases and add them to this set of symptomatic subjectIDs...
# # ... in order to get a full list of the symptomatics
# 
# # First checking to see if there was fever in any of the donors prior to study day 1
# Febrile_before_day1 <- Symptomatic_donors_infected_grade123 %>%
#   filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 |StudyDay == 0) %>%
#   filter(Febrile == 1)
# # None of the infected donors had fever before day1
# # Now can look at who might have had fever during study days 1 to 6
# 
# Symptomatic_by_fever <- Symptomatic_donors_infected_grade123_day1to6 %>%
#   filter(Febrile == 1) %>%
#   select(SubjectID)
# print(Symptomatic_by_fever)
# 
# Symptomatic_V1 <- as.data.frame(c_sub) %>%
#   rename(SubjectID = "V1") %>%
#   full_join(Symptomatic_by_fever, by = "SubjectID") %>%
#   distinct(SubjectID, .keep_all = TRUE)
# 
# # Now adding the QuarantineNumber on to the Symptomatic_V1 df 
# # Then we can sort by Q for the table1
# Symptomatic_V1_QuarantineNumber_table1 <- Symptomatic_V1 %>%
#   left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
#   group_by(QuarantineNumber) %>%
#   summarize(Number_Symptomatic_V1 = n_distinct(SubjectID))
# 
# # Add onto Table1 the number of symptomatic by version 1 criteria and % of infected
# # Note: we are commenting this out, because the team decided on a version 3 for symptomatic to use instead.
# #Qdata_table1 <- Qdata_table1 %>%
#   #left_join(Symptomatic_V1_QuarantineNumber_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
#   #mutate(Fraction_Symptomatic_V1_of_Infected = Number_Symptomatic_V1/Number_Infected_Donors)
# #print(Qdata_table1)

#### ## Implementing Version 2 of "Symptomatic" ####

# # Symptomatic_V2: “Evidence of at least 2 symptoms of any grade that do not necessarily...
# # ...need to persist for consecutive study days, nor persist for the same consecutive study days, but ... 
# # ...where each of the symptoms appeared on at least two different study days.”
# # Note: the "Symptomatic_donors_infected_grade123" df needed for this code was created in the Version 1 of Symptomatic code
# 
# # Going to implement Symptomatic_V2 for afebrile to make a well-defined milder criteria for "symptomatic afebrile"
# sub <- unique(Symptomatic_donors_infected_grade123_day1to6$SubjectID)
# c_sub <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token<-0
#   subid <- sub[i]
#   temp <- Symptomatic_donors_infected_grade123[Symptomatic_donors_infected_grade123$SubjectID == subid, ]
#   temp1<-temp[,4:13]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:3],temp1)
#   for (j in 1:(nrow(temp))) {
#     for (k in 5:12) {
#       for (l in (k+1):13){
#         if (sum(temp[,k], na.rm = TRUE)>=2 & sum(temp[,l], na.rm = TRUE)>=2) {
#           c_sub <- rbind(c_sub, subid)
#           token<-1
#           break
#         }
#       }
#       if (token==1){
#         break
#       } 
#     }
#     if (token==1){
#       break
#     }
#   }
# }
# # produces a c_sub with 29 subjectIDs
# # Note that the above loop did not take into account symptoms that may have occurred before study day 1
# # One way of looking at this is that is there was a symptom that occured before day 1, ...
# # ... then that symptom should not contribute to classification criteria for symptomatic.
# # Will do another loop that implements this to see if this makes a difference at all
# sub <- unique(Symptomatic_donors_infected_grade123$SubjectID)
# c_sub2 <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token<-0
#   subid <- sub[i]
#   temp <- Symptomatic_donors_infected_grade123[Symptomatic_donors_infected_grade123$SubjectID == subid, ]
#   temp1<-temp[,4:13]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:3],temp1)
#   for (j in 1:(nrow(temp))) {
#     for (k in 5:12) {
#       for (l in (k+1):13){
#         if (sum(temp[,k], na.rm = TRUE)>=2 & sum(temp[,l], na.rm = TRUE)>=2) {
#           sum1<-0
#           sum2<-0
#           for (m in 1:(tail(which(temp$StudyDay==0), n=1))){
#             sum1<-sum1+temp[m,k]
#             sum2<-sum1+temp[m,l]
#           }
#           if (sum1==0 & sum2==0){
#             c_sub2 <- rbind(c_sub2, subid)
#             token<-1
#             break
#           }
#         }
#       }
#       if (token==1){
#         break
#       } 
#     }
#     if (token==1){
#       break
#     }
#   }
# }
# # produces c_sub2 with 27 subjectIDs, therefore it matters when we implement the more stringent criteria
# # For now we will use the more less stringent criteria, unless we are given the go ahead to use the more stringent criteria
# 
# # Now adding the c_sub vector of studyIDs to the table1 (donors)
# # Remember this is symptomatic version 2: a milder criteria for symptomatic, however it is symptomatic afebrile (unlike in Version 1)
# Symptomatic_V2 <- as.data.frame(c_sub) %>%
#   rename(SubjectID = "V1") 
# 
# # Eliminate the ones that had fever
# # First find the SubjectIDs from among the infected, that had fever
# Qdata_infected_febrile <- Qdata_infected_donors %>%
#   left_join(Qdata) %>%
#   filter(Randomization_DorIRorCR == "D" & Tympanic.temp..degrees.C. >37.9) %>%
#   distinct(SubjectID, .keep_all = FALSE)
# 
# Symptomatic_V2 <- Symptomatic_V2 %>%
#   anti_join(Qdata_infected_febrile)
# # This reduced the number of Symptomatic_V2 from 29 to 22
# 
# # Now adding the QuarantineNumber on to the Symptomatic df 
# # Then we can sort by Q for the table1
# Symptomatic_V2_QuarantineNumber_table1 <- Symptomatic_V2 %>%
#   left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
#   group_by(QuarantineNumber) %>%
#   summarize(Number_Symptomatic_V2 = n_distinct(SubjectID))
# 
# # Add onto Table1 the number of symptomatic by version 2 criteria and % of infected
# # Note: we are commenting this out, because the team decided on a version 3 for symptomatic to use instead.
# #Qdata_table1 <- Qdata_table1 %>%
#   #left_join(Symptomatic_V2_QuarantineNumber_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
#   #mutate(Fraction_Symptomatic_V2_of_Infected = Number_Symptomatic_V2/Number_Infected_Donors)
# #print(Qdata_table1)

#### ## Implementing Version 3 of "Symptomatic"  ####
## The purpose of this version of symptomatic is so that we are consistent with the definitions from...
## ... the proof-of-concept study (Killingley, 2012 JID)

# Thus, this version 3 of symptomatic is:
# "Any respiratory symptom that occurs at all over 2 consecutive days, or occurs for 3/3 (am, early pm, late pm) symptom measurements on a single day, where respiratory symptoms include: runny nose, stuffy nose, sneezing, sore throat, cough, and shortness of breath"

# First we are going to cut the new df that has only the 6 respiratory symptoms of interest 
# (and also to include fever, just in case of future analyses)
Symptomatic_donors_infected_V3_days1to6 <- Qdata_infected_donors %>%
  left_join(Qdata) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | 
           StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  filter(Microneut_VisitType == "Q baseline") %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
## Joining, by = c("SubjectID", "QuarantineNumber")
# This is great but the way the data is put together, this leaves out the day -3 through day0 data
# Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0
# Then we will bind it back to the "Symptomatic_donors_infected_V3" that was just created.
Symptomatic_donors_infected_before_day1 <- Qdata_infected_donors %>%
  left_join(Qdata, by = c("SubjectID" = "SubjectID", "QuarantineNumber" = "QuarantineNumber")) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123)
# Now binding together and sorting
Symptomatic_donors_infected_V3_dayneg3to6 <- bind_rows(Symptomatic_donors_infected_V3_days1to6, Symptomatic_donors_infected_before_day1) 
Symptomatic_donors_infected_V3_dayneg3to6 <- Symptomatic_donors_infected_V3_dayneg3to6 %>%
  arrange(SubjectID, StudyDay)

# Filter those with three measurements positive in a single study day for any of the respiratory pathogens
sub <- unique(Symptomatic_donors_infected_V3_days1to6$SubjectID)
c_sub <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- Symptomatic_donors_infected_V3_days1to6[Symptomatic_donors_infected_V3_days1to6$SubjectID == subid, ]
  temp1<-temp[,6:12]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        if (sum(temp2[,l]) == 3) {
          token <- 1
          c_sub <- rbind(c_sub, subid)
          break
        }
        if (token == 1) {
          break
        }
      }
      if (token == 1) {
        break
      }
    }
    if (token == 1) {
      break
    }
  }
} 
# But what if we want to disqualify a symptom from the classification scheme if it already appeared before study day 1
# We will compare results using this more stringent criteria, with the results using the less stringent criteria and select one (or both) for publication
sub <- unique(Symptomatic_donors_infected_V3_dayneg3to6$SubjectID)
c_sub2 <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- Symptomatic_donors_infected_V3_dayneg3to6[Symptomatic_donors_infected_V3_dayneg3to6$SubjectID == subid, ]
  temp1<-temp[,6:12]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        if (sum(temp2[,l]) == 3) {
          sum1<-0
          for (m in 1:(tail(which(temp$StudyDay==0), n=1))) {
            sum1<-sum1+temp[m,l]
          }
          if (sum1==0) {
            token <- 1
            c_sub2 <- rbind(c_sub2, subid)
            break
          }
        }
      }
      if (token == 1) {
        break
      }
    }
    if (token == 1) {
      break
    }
  }
} 
# Note that this loop with the stringent criteria of not using symptoms that appear before day 1 in the classification of sympomatic_V3 yields c_sub2 with n=19 subjectIDs (5 fewer than in the less stringent loop above)
# For now we will use the version with less stringent criteria but we will have the other version on deck in case we decide to change or publish both
# Get get the list of subject IDs from c_sub into a df
Symptomatic_V3_donors_infected_singleday <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")

# Now use a loop to classify those with any sort of respiratory symptom on two consecutive days
# For this we should use a "Symptomatic_donors_infected_grade123" df that marks with indicator of 1 when any of the 3 symptom measurements in a day showed evidence of symptoms of any grade.
Symptomatic_donors_infected_grade123 <- Qdata_infected_donors %>%
  left_join(Qdata) %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 |
           StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis,
         LRI = cough+SOB,
         SystemicI = headache+muscleAches+malaise) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3,
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3,
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3,
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3,
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3,
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3,
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3,
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3,
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3,
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123),
         stuffyNose123 = as.numeric(stuffyNose123),
         sneezing123 = as.numeric(sneezing123),
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123),
         DPEOtits123 = as.numeric(DPEOtits123),
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123),
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123),
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123),
         muscleAches123 = as.numeric(muscleAches123),
         malaise123 = as.numeric(malaise123)) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  summarize(Febrile = max(Febrile),
            runnyNose123 = max(runnyNose123),
            stuffyNose123 = max(stuffyNose123),
            sneezing123 = max(sneezing123),
            soreThroat123 = max(soreThroat123),
            DPENasalDischarge123 = max(DPENasalDischarge123),
            DPEOtits123 = max(DPEOtits123),
            DPESinusTenderness123 = max(DPESinusTenderness123),
            DPEPharyngitis123 = max(DPEPharyngitis123),
            cough123 = max(cough123),
            SOB123 = max(SOB123),
            headache123 = max(headache123),
            muscleAches123 = max(muscleAches123),
            malaise123 = max(malaise123)) %>%
  select(SubjectID, QuarantineNumber, StudyDay, Febrile, runnyNose123, stuffyNose123, sneezing123, soreThroat123,
         cough123, SOB123, headache123, muscleAches123, malaise123) %>%
  ungroup()
## Joining, by = c("SubjectID", "QuarantineNumber")
# The above gets us to a dataset where symptoms with grade 1, 2, or 3 are summarized by whether there was at least one symptoms (of any grade) detection per study day

# Now to select which of the subjects were symptomatic (version1) (excluding the febrile criteria for now)
# Note: Using the breaks in the loops for efficiency. If a subject is detected as symptomatic, the loop restarts on the next subjectID
# But for this we only want to include symptoms for study days 1 to 6 so we need to cut a new df
Symptomatic_donors_infected_grade123_day1to6 <- Symptomatic_donors_infected_grade123 %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6)

sub <- unique(Symptomatic_donors_infected_grade123_day1to6$SubjectID)
c_sub <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- Symptomatic_donors_infected_grade123_day1to6[Symptomatic_donors_infected_grade123_day1to6$SubjectID == subid, ]
  temp1<-temp[,4:10]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      if (temp[j, k] + temp[j+1, k] == 2){
        if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
          c_sub <- rbind(c_sub, subid)
          token<-1
          break
        }
      }   
    }
    if (token==1){
      break
    } 
  }
}
# This yields a c_sub of 32 subjectIDs but let's now apply the more stringent version (eliminating Sx if appear before day 1)
sub <- unique(Symptomatic_donors_infected_grade123$SubjectID)
c_sub2 <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- Symptomatic_donors_infected_grade123[Symptomatic_donors_infected_grade123$SubjectID == subid, ]
  temp1<-temp[,4:10]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      if (temp[j, k] + temp[j+1, k] == 2){
        if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
          sum1<-0
          for (l in 1:(tail(which(temp$StudyDay==0), n=1))) {
            sum1 <- sum1+temp[l,k]
          }
          if (sum1 == 0) {
            c_sub2 <- rbind(c_sub2, subid)
            token<-1
            break
          }
        }
      }   
    }
    if (token==1){
      break
    } 
  }
}
# This yields a c_sub2 of 30 subjectIDs, so it cut out 2 
# For now, however we will stick to using the less stringent criteria with regards to Sx before day1

# Rename "V1" as SubjectID
Symptomatic_V3_donors_infected_twodays <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")

# Combine the Symptomatic_V3_donors_infected_singleday df and the Symptomatic_V3_donors_infected_twodays df
Symptomatic_V3_donors_infected_combined <- Symptomatic_V3_donors_infected_twodays %>%
  full_join(Symptomatic_V3_donors_infected_singleday, by = c("SubjectID" = "SubjectID")) %>%
  arrange(SubjectID)
print(Symptomatic_V3_donors_infected_combined)
##    SubjectID
## 1        100
## 2        103
## 3        107
## 4        108
## 5        110
## 6        112
## 7        113
## 8        114
## 9        116
## 10       117
## 11       118
## 12       122
## 13       125
## 14       126
## 15       127
## 16       128
## 17       129
## 18       131
## 19       134
## 20       136
## 21       137
## 22       138
## 23       139
## 24       140
## 25       141
## 26       142
## 27       143
## 28       144
## 29       145
## 30       146
## 31       147
## 32       150
## 33       151
## 34       152
# But the above definition of symptomatic (V3) doesn't make any mention of febrile illness
# Let's check to see if the febrile are already accounted for among the group of symptomatic version 3
Symptomatic_by_fever <- Symptomatic_donors_infected_grade123 %>%
  filter(Febrile == 1) %>%
  select(SubjectID) %>%
  anti_join(Symptomatic_V3_donors_infected_combined, by = c("SubjectID" = "SubjectID"))
print(Symptomatic_by_fever)
## # A tibble: 0 x 1
## # ... with 1 variable: SubjectID <int>
# Important to note that 1 subjectID was febrile but not symptomatic (#148)

# For now we will add to the table without this one febrile case, but we will inquire about whether or not to include febrile (without respiratory Sx) along with the other respiratory symptoms as part of the symptomatic V3 set
Symptomatic_V3_donors_infected_combined_table1 <- Symptomatic_V3_donors_infected_combined %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Symptomatic_V3 = n_distinct(SubjectID))
print(Symptomatic_V3_donors_infected_combined_table1)
## # A tibble: 3 x 2
##   QuarantineNumber Number_Symptomatic_V3
##              <int>                 <int>
## 1                1                    11
## 2                2                     7
## 3                3                    16
# Add onto Table1 the number of symptomatic by version 3 criteria and % of infected
Qdata_table1 <- Qdata_table1 %>%
  left_join(Symptomatic_V3_donors_infected_combined_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_Symptomatic_V3_of_Infected = Number_Symptomatic_V3/Number_Infected_Donors)
print(Qdata_table1)
## # A tibble: 3 x 6
##   QuarantineNumber Number_Infected… Number_Inoculat… Fraction_Infect…
##              <int>            <int>            <int>            <dbl>
## 1                1               15               20            0.75 
## 2                2               11               12            0.917
## 3                3               18               20            0.9  
## # ... with 2 more variables: Number_Symptomatic_V3 <int>,
## #   Fraction_Symptomatic_V3_of_Infected <dbl>
#### Table 1 (donors): c) Number of symptomatic, non-ILI (and % of infected) ####

# We will ignore this category based on discussion above about removing "symptomatic" classification from the final report
# Find note under "Table 1 (donors): b)..." section of code

#### Table 1 (donors): d) Number of ILI (and % of infected) ####

## Definition of ILI by CDC: "Case definitions for influenza-like illness are nonspecific for influenza and vary depending on the purpose for which they are used.  A case definition of fever 100°F or greater, oral or equivalent, and cough and/or sore throat is used by CDC in its U.S."

# We will create 2 versions of this definition: one including fever (as written above), and one not including fever
# We will eliminate a symptom from contributing to classification criteria if it appeared during D-1 or D-2
# Later a third version was added based on teleconference on October 12, 2018 and the desire to be consistent with terminology following the previously published Killingley et al., 2012 paper

# Later discussion has revealed an ILI Version 3 that we should use. 
# I have commented out the other versions of classifying ILI that have become obsolete.

#### ## First version of classification of ILI (febrile ILI, exact CDC definition) (and % of infected) ####

# # Operationally, this means evidence of fever >100F (>37.9C) & any evidence of cough or sore throat or DPE Pharyngitis
# # Technically we should be using >=37.8 since 37.8 == 100.04 but since Killingley et al., 2012 and...
# # ... other sources from the EMIT consortium believe in using the >37.9 criteria, we will implement that here.
# 
# # First, cut the dataset to only the infected donors who meet the definition for fever
# # Note, none of the volunteers registered a fever on any of the study days prior to inoculation day
# ILI_V1_data <- Qdata_infected_donors %>%
#   left_join(Qdata) %>%
#   select(SubjectID, QuarantineNumber, StudyDay, Sx_Date, Tympanic.temp..degrees.C., cough, soreThroat, DPEPharyngitis) %>%
#   filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | 
#            StudyDay == 4 | StudyDay == 5 | StudyDay == 6)
# 
# # Let's consolidate the sore throat and pharyngitis variables to make one cumulative variable (soreThroat or DPEPharyngitis) called st
# ILI_V1_data <- ILI_V1_data %>%
#   mutate(st = cough>=1 | soreThroat>=1 | DPEPharyngitis>=1, st = as.numeric(st))
# 
# # Before using the loop we need to cut a df that only looks at sx from day1-6
# ILI_V1_data_day1to6 <- ILI_V1_data %>%
#   filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6)
# 
# # Applying loop to select subjectIDs that have fever (>37.9C) and either cough or sore throat
# sub <- unique(ILI_V1_data_day1to6$SubjectID)
# c_sub <- c()
# token_t<-0
# token_s<-0
# for (i in 1:length(sub)) {
#   token_t<-0
#   token_s<-0
#   subid <- sub[i]
#   temp <- ILI_V1_data_day1to6[ILI_V1_data_day1to6$SubjectID == subid, ]
#   for (j in 1:(nrow(temp))) {
#     if (!is.na(temp$Tympanic.temp..degrees.C.[j])) {
#       if (temp$Tympanic.temp..degrees.C.[j] > 37.9) {
#         token_t<-1
#       }
#     }
#     if (sum(temp$cough[j], temp$st[j], na.rm = TRUE) >=1) {
#         token_s<-1
#     }
#     if (token_t==1 & token_s==1){
#       c_sub <- rbind(c_sub, subid)
#       break
#     }
#   }
# }
# # Note: the above code does not check for the case that someone had fever, cough, or sore throat...
# # ... prior to inoculation day. Would take some effort to figure out and since there were only 4 instances. 
# # I checked the raw data and see that none of the 4 had temp>37.9, cough or st symptoms prior to inoculation
# # Thus these 4 can be printed into the table
# # Future iterations of this code would do well to implement logic that would not count symptoms ...
# # ... that appeared before inoculation day in the classification criteria
# 
# # Now adding this vector of studyIDs to the table1 (donors)
# ILI_V1_febrile <- as.data.frame(c_sub) %>%
#   rename(SubjectID = "V1") 
# 
# # Now adding the QuarantineNumber on to the Febrile ILI df 
# # Then we can sort by Q for the table1
# ILI_V1_febrile_table1 <- ILI_V1_febrile %>%
#   left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
#   group_by(QuarantineNumber) %>%
#   summarize(Number_ILI_V1_Febrile = n_distinct(SubjectID))
# 
# # Add onto Table1 the number of symptomatic by version 1 criteria and % of infected
# # Note: we are commenting this out, because the team decided on a version 3 for ILI to use instead.
# #Qdata_table1 <- Qdata_table1 %>%
#   #left_join(ILI_V1_febrile_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
#   #mutate(Fraction_ILI_V1_Febrile_of_Infected = Number_ILI_V1_Febrile/Number_Infected_Donors)
# #print(Qdata_table1)

#### ## Second version of classification of ILI (afebrile ILI) (and % of infected) ####
# 
# # This definition of afebrile ILI is: symptom of grade >=1 for cough or soreThroat (or DPEPharyngitis)
# # First we will do a loop that doesn't restrict symptoms that appear before study day1 from contributing to criteria
# sub <- unique(ILI_V1_data_day1to6$SubjectID)
# c_sub <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token_c<-0
#   token_st<-0
#   subid <- sub[i]
#   temp <- ILI_V1_data_day1to6[ILI_V1_data_day1to6$SubjectID == subid, ]
#   temp1<-temp[,6:9]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:5],temp1)
#   for (j in 1:(nrow(temp))) {
#     if (temp$cough[j] >=1) {
#           token_c<-1
#     }
#     if (temp$st[j] >= 1) {
#           token_st<-1
#     }
#   }
#   if (token_c + token_st >=1) {
#     c_sub <- rbind(c_sub, subid)
#   }
# }
# # This yields a c_sub of 24 subjectIDs
# # But this version allows symptoms that appear before day1 to count towards criteria.
# # In contrast, the version below doesn't
# # The study team can make the decision about which version to use
# sub <- unique(ILI_V1_data$SubjectID)
# c_sub2 <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token_c<-0
#   token_st<-0
#   subid <- sub[i]
#   temp <- ILI_V1_data[ILI_V1_data$SubjectID == subid, ]
#   temp1<-temp[,6:9]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:5],temp1)
#   for (j in 1:(nrow(temp))) {
#     if (temp$cough[j] >=1) {
#       sum1<-0
#       for (k in 1:(tail(which(temp$StudyDay==0), n=1))) {
#         sum1 <- sum1+temp$cough[k]
#         if (sum1 == 0) {
#           token_c<-1
#         }
#       }
#     }
#     if (temp$st[j] >= 1) {
#       sum2<-0
#       for (l in 1:(tail(which(temp$StudyDay==0), n=1))) {
#         sum2 <- sum2+temp$st[l]
#         if (sum2 == 0) {
#           token_st<-1
#         }
#       }
#     }
#   }
#   if (token_c + token_st >=1) {
#     c_sub2 <- rbind(c_sub2, subid)
#   }
# }
# # This version yields a c_sub2 with n=24 subjectIDs (same as c_sub)
# 
# # For now we will include the less stringent criteria version but we have both versions available for use.
# # Now adding the c_sub (less stringent criteria) vector of studyIDs to the table1 (donors)
# ILI_V2_afebrile <- as.data.frame(c_sub) %>%
#   rename(SubjectID = "V1") 
# 
# # Getting rid of subjectIDs that actually were febrile
# # First find the SubjectIDs from among the infected, that had fever
# Qdata_infected_febrile <- Qdata_infected_donors %>%
#   left_join(Qdata) %>%
#   filter(Randomization_DorIRorCR == "D" & Tympanic.temp..degrees.C. >37.9) %>%
#   distinct(SubjectID, .keep_all = FALSE)
# 
# ILI_V2_afebrile <- ILI_V2_afebrile %>%
#   anti_join(Qdata_infected_febrile)
# 
# # Now adding the QuarantineNumber on to the ILI afebrile df 
# # Then we can sort by Q for the table1
# ILI_V2_afebrile_table1 <- ILI_V2_afebrile %>%
#   left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
#   group_by(QuarantineNumber) %>%
#   summarize(Number_ILI_V2_Afebrile = n_distinct(SubjectID))
# 
# # Add onto Table1 the number of ILI afebrile and % of infected
# # Note: we are commenting this out, because the team decided on a version 3 for ILI to use instead.
# #Qdata_table1 <- Qdata_table1 %>%
#   #left_join(ILI_V2_afebrile_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
#   #mutate(Fraction_ILI_V2_Afebrile_of_Infected = Number_ILI_V2_Afebrile/Number_Infected_Donors)
# #print(Qdata_table1)

#### ## Third version of classification of ILI (to match Killingley et al., 2012) (and % of infected) ####
# This version came about during the October 12, 2018 conference call with the EMIT team (UK and UMD groups present)
# For now this version sounds like it will be the one that we use for the paper. 

# This definition of ILI is: "an illness lasting >=24 hours with either (1) fever >37.9°C plusat least 1 respiratory symptom or (2) >=2 symptoms, at least 1 of which must be respiratory."
# Where "respiratory symptom" means evidence of any grade of runny nose, stuffy nose, sneeze, sore throat, cough, shortness of breath
# Where "lasting >=24 hours" means evidence of the symptom over all three instances of symptom measurements for a single day, or evidence of the symptom over two days at any frequency (1-3/3 instances of symptom recordings)

# First, let's program the first criteria (fever > 37.9C plus at least 1 respiratory symptom)
# To do this, we can:
# a) create the set of subject IDs that meet the fever criteria, and then check them for
# b) evidence of three instances during a single day, or
# c) evidence of any frequency of instances >=1 for 2 consecutive days
# Then, we can deal with the second criteria for ILI (>=2 symptoms one of which being a respiratory)

# Find the SubjectIDs from among the infected, that had fever
# First check to see if anyone had fever before day 1. 
Qdata_infected_febrile_pre_day1 <- Qdata_infected_donors %>%
  left_join(Qdata) %>%
  filter(Randomization_DorIRorCR == "D") %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  filter(Tympanic.temp..degrees.C. >37.9) %>%
  distinct(SubjectID, .keep_all = FALSE)
## Joining, by = c("SubjectID", "QuarantineNumber")
print(Qdata_infected_febrile_pre_day1)
## [1] SubjectID
## <0 rows> (or 0-length row.names)
# As it turns out, none of the infected donors had fever before day 1 
# Now we can see who among the infected subject IDs had fever at least once over study days 1-6
Qdata_infected_febrile_day1to6 <- Qdata_infected_donors %>%
  left_join(Qdata) %>%
  filter(Randomization_DorIRorCR == "D") %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  filter(Tympanic.temp..degrees.C. >37.9) %>%
  distinct(SubjectID, .keep_all = FALSE)
## Joining, by = c("SubjectID", "QuarantineNumber")
print(Qdata_infected_febrile_day1to6)
##   SubjectID
## 1       100
## 2       103
## 3       107
## 4       108
## 5       146
## 6       151
# Now see among the "Qdata_infected_febrile_day1to6" df, who had symptoms all day during a single study day

# First we are going to cut the a new df that has the 9 symptoms of interest (this includes the 6 respiratory symptoms) and fever for only those in the "Qdata_infected_febrile_day1to6" df
ILI_V3_donors_infected_febrile_studyday1to6 <- Qdata_infected_febrile_day1to6 %>%
  left_join(Qdata) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | 
           StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  filter(Microneut_VisitType == "Q baseline") %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123,
         headache123, muscleAches123, malaise123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
## Joining, by = "SubjectID"
# This is great but the way the data is put together, this leaves out the day -3 through day0 data
# Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0
# Then we will bind it back to the "Symptomatic_donors_infected_V3_febrile" that was just created.
ILI_V3_donors_infected_before_day1_febrile <- Qdata_infected_febrile_day1to6 %>%
  left_join(Qdata) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123,
         headache123, muscleAches123, malaise123)
## Joining, by = "SubjectID"
# Now binding together and sorting
ILI_V3_donors_infected_febrile <- bind_rows(ILI_V3_donors_infected_febrile_studyday1to6, ILI_V3_donors_infected_before_day1_febrile) 
ILI_V3_donors_infected_febrile <- ILI_V3_donors_infected_febrile %>%
  arrange(SubjectID, StudyDay)
# But this definition is just for post day0 so we will filter just day1-6 so use the "ILI_V3_donors_infected_febrile_studyday1to6" df
sub <- unique(ILI_V3_donors_infected_febrile_studyday1to6$SubjectID)
c_sub <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- ILI_V3_donors_infected_febrile_studyday1to6[ILI_V3_donors_infected_febrile_studyday1to6$SubjectID == subid, ]
  temp1<-temp[,6:15]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) { 
        if (sum(temp2[,l]) == 3) {
          c_sub <- rbind(c_sub, subid)
          token <- 1
          break
        }
      }
      if(token == 1) {
        break
      }
    }
    if(token == 1) {
      break
    }
  }
} 
# This yields a c_sub vector with 7 subjectIDs
# If we want to do the same loop, but exclude symptoms that were positive before study day 1 we use ILI_V3_donors_infected_febrile
sub <- unique(ILI_V3_donors_infected_febrile$SubjectID)
c_sub2 <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- ILI_V3_donors_infected_febrile[ILI_V3_donors_infected_febrile$SubjectID == subid, ]
  temp1<-temp[,6:12]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        if (sum(temp2[,l]) == 3) {
          sum1<-0
          for (m in 1:(tail(which(temp$StudyDay==-0), n=1))){
            sum1<-sum1+temp[m,l]
          }
          if (sum1==0) {
            c_sub2 <- rbind(c_sub2, subid)
            token<-1
            break
          }
        }
      }
      if(token == 1) {
        break
      }
    }
    if(token == 1) {
      break
    }
  }
}
# This yields a c_sub2 vector with 7 subjectIDs, which matches what the less stringent loop from above produced
# For now we will you the less stringent criteria
# Now get the df of subject IDs from the c_sub vector
ILI_V3_infected_donors_criteria1_singleday <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")
print(ILI_V3_infected_donors_criteria1_singleday)
##         SubjectID
## subid         100
## subid.1       103
## subid.2       107
## subid.3       108
## subid.4       146
## subid.5       151
# This is the output for the first criteria for ILI (fever and 1 respiratory symptom for >=24 hours), where the symptoms occured on three times in the same day (counts as >= 24 hours)

# Moving to the second part of criteria 1 (febrile plus 2 consecutive study days of a resp. Sx at any freq >=1)
# Now to implement the criteria 1 (febrile plus 1 resp Sx) for those who had symptoms over 2 consecutive study days
# Use the "Symptomatic_donors_infected_grade123" df, cut it to the 6 resp. symptoms, and febrile only
# Remember: the "Symptomatic_donors_infected_grade123" df created under Version 1 of Symptomatic for the infected donors
Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days <- Symptomatic_donors_infected_grade123 %>%
  right_join(Qdata_infected_febrile_day1to6) %>%
  select(SubjectID, StudyDay, QuarantineNumber, Febrile, runnyNose123, stuffyNose123, sneezing123, 
         soreThroat123, cough123, SOB123)
## Joining, by = "SubjectID"
# Now implement loop to make this classification of having resp Sx at frequency >=1 over 2 consecutive study days
# First without checking for symptoms prior to study day 1
Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days_1to6 <- Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6)

sub <- unique(Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days_1to6$SubjectID)
c_sub <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days_1to6[Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days_1to6$SubjectID == subid, ]
  temp1<-temp[,4:10]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      if (temp[j, k] + temp[j+1, k] == 2){
        if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
          c_sub <- rbind(c_sub, subid)
          token<-1
          break
        }
      }
    }   
    if (token==1){
      break
    } 
  }
}
# This yielded a c_sub vector of 7 subjectIDs.
# Check to see if any of the symptoms appeared before day 1 and thus should be considered as disqualifying for that symptom to contribute to classification criteria.
sub <- unique(Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days$SubjectID)
c_sub2 <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days[Symptomatic_febrile_donors_grade123_ILI_V3_criteria1_2days$SubjectID == subid, ]
  temp1<-temp[,4:10]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      if (temp[j, k] + temp[j+1, k] == 2){
        if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
          sum1<-0
          for (m in 1:(tail(which(temp$StudyDay==0), n=1))){
            sum1<-sum1+temp[m,k]
          }
          if (sum1==0) {
            c_sub2 <- rbind(c_sub2, subid)
            token<-1
            break
          }
        }
      }   
    }
    if (token==1){
      break
    } 
  }
}
# This also yielded a vector of 7 subjectIDs
# Thus, using the more stringent criteria here doesn't make any difference
# For now we will use the less stringent criteria
# Create df from the vector of 7 subjectIDs from c_sub 
# Remember this is the classification of febrile with symptoms of >=1 frequency over 2 days
ILI_V3_infected_donors_criteria1_2days <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")
print(ILI_V3_infected_donors_criteria1_2days)
##         SubjectID
## subid         100
## subid.1       103
## subid.2       107
## subid.3       108
## subid.4       146
## subid.5       151
# Now will merge the 2 parts of criteria 1 for ILI together to get a single set of subjectIDs that meet the 1st ILI criteria
# Reminder that 1st ILI criteria is:  febrile (>37.9C) plus >=24 hours of a respiratory symptom (one of the 6 self-reported resp Sxs)
# The "2 parts of criteria 1" refer to:
# 1) febrile plus 3 observations of self-reported resp. Sx in a single day, and 
# 2) febrile plus 2 consecutive days of at least 1 self-reported resp. Sx at any daily frequency >=1

ILI_V3_infected_donors_criteria1 <- full_join(ILI_V3_infected_donors_criteria1_singleday, 
                                              ILI_V3_infected_donors_criteria1_2days)
## Joining, by = "SubjectID"
print(ILI_V3_infected_donors_criteria1)
##   SubjectID
## 1       100
## 2       103
## 3       107
## 4       108
## 5       146
## 6       151
## Plan for implementing the second criteria for ILI_V3:
# That is: >= 2 symptoms for >=24 hours, 1 of which is respiratory, and merging with the first criteria for ILI

# To do this, first we will filter those subject IDs without fever (find the always afebrile group)
# Then we will see who among the always afebrile had:
# >=1 resp. symptom on 1 day plus at least one other symptom on the same single day (grade >=1 for 3/3 observations in a day)
# Then we will see who among the always afebrile had:
# >=1 resp. sympom at frequency >=1 over 2 days plus at least one other resp. symptom at freq >=1 for same 2 days

# Then we will add those subject IDs (from the above 2 criteria) together to form ILI_V3_infected_donors_criteria2
# Then we will merge ILI_V3_infected_donors_criteria1 and ILI_V3_infected_donors_criteria2 together to make ILI_V3_infected_donors

# First we will get the list of subjectIDs who never had fever (the always afebrile group)
# Now we can see who among the infected subject IDs had fever at least once over study days 1-6
Qdata_infected_afebrile_day1to6 <- Qdata_infected_donors %>%
  select (SubjectID) %>%
  anti_join(Qdata_infected_febrile_day1to6)
## Joining, by = "SubjectID"
print(Qdata_infected_afebrile_day1to6)
##    SubjectID
## 1        101
## 2        104
## 3        106
## 4        110
## 5        112
## 6        113
## 7        114
## 8        115
## 9        116
## 10       117
## 11       118
## 12       122
## 13       123
## 14       124
## 15       125
## 16       126
## 17       127
## 18       128
## 19       129
## 20       130
## 21       131
## 22       132
## 23       133
## 24       134
## 25       136
## 26       137
## 27       138
## 28       139
## 29       140
## 30       141
## 31       142
## 32       143
## 33       144
## 34       145
## 35       147
## 36       148
## 37       150
## 38       152
# Now we will see who among these always afebrile had resp. symptom on a single day plus at least one other respiratory symptom on the same single day (grade >=1 for 3/3 observations in a day)

# First we are going to cut the a new df that has the 6 self-reported respiratory symptoms of interest plus the 3 self-reported non-resp symptoms, and fever (just for kicks, it should always be <37.9 because we already filtered but interesting to have the values)
# for only those in the "Qdata_infected_afebrile_day1to6" df
ILI_V3_donors_infected_afebrile <- Qdata_infected_afebrile_day1to6 %>%
  left_join(Qdata) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | 
           StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  filter(Microneut_VisitType == "Q baseline") %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123,
         headache123, muscleAches123, malaise123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
## Joining, by = "SubjectID"
# This is great but the way the data is put together, this leaves out the day -3 through day0 data
# Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0
# Then we will bind it back to the "Symptomatic_V3_donors_infected_afebrile" that was just created.
ILI_V3_donors_infected_before_day1_afebrile <- Qdata_infected_afebrile_day1to6 %>%
  left_join(Qdata) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123)
## Joining, by = "SubjectID"
# Now binding together and sorting
ILI_V3_donors_infected_afebrile <- bind_rows(ILI_V3_donors_infected_afebrile, ILI_V3_donors_infected_before_day1_afebrile) 
ILI_V3_donors_infected_afebrile <- ILI_V3_donors_infected_afebrile %>%
  arrange(SubjectID, StudyDay)

# But, the current definition is just for post day0 so we will filter just day1-6
ILI_V3_donors_infected_afebrile_1to6 <- ILI_V3_donors_infected_afebrile %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6)

# Loop to get subjectIDs where there were 2 symptoms (one of which respiratory), each observed 3 times on the same day
sub <- unique(ILI_V3_donors_infected_afebrile_1to6$SubjectID)
c_sub <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- ILI_V3_donors_infected_afebrile_1to6[ILI_V3_donors_infected_afebrile_1to6$SubjectID == subid, ]
  temp1<-temp[,6:15]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        for (m in (l+1):15) { 
          if (sum(temp2[,l]) + sum(temp2[,m]) == 6) {
            c_sub <- rbind(c_sub, subid)
            token <- 1
            break
          }
        }
        if(token == 1) {
          break
        }
      }
      if(token == 1) {
        break
      }
    }
    if(token == 1) {
      break
    }
  }
} 
# This prints a vector c_sub of 11 subjectIDs
# If we want to do the same loop, but exclude symptoms that were positive before study day 1 we would do...
# Use the "ILI_V3_donors_infected_afebrile" df because it has all three study days for day-3 to day6 for each always afebrile subjectID
sub <- unique(ILI_V3_donors_infected_afebrile$SubjectID)
c_sub2 <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- ILI_V3_donors_infected_afebrile[ILI_V3_donors_infected_afebrile$SubjectID == subid, ]
  temp1<-temp[,6:13]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 5:10) {
        for (m in (l+1):13) { 
          if (sum(temp2[,l]) + sum(temp2[,m]) == 6) {
            sum1<-0
            sum2<-0
            for (n in 1:(tail(which(temp$StudyDay==0), n=1))){
              sum1<-sum1+temp[n,l]
              sum2<-sum2+temp[n,m]
            }
            if (sum1==0 & sum2==0){
              c_sub2 <- rbind(c_sub2, subid)
              token <- 1
              break
            }
          }
        }
        if(token == 1) {
          break
        }
      }
      if(token == 1) {
        break
      }
    }
    if(token == 1) {
      break
    }
  }
} 
# This yields a vector of 8 subjectIDs (3 less than the c_sub, less stringent criteria)
# For now we will take the less stringent criteria that doesn't exclude Sx occuring before day1
# So this means we will stick to using c_sub and not c_sub2
# Now get c_sub vector of 11 subjectIDs into adf of subjectIDs
ILI_V3_infected_donors_criteria2_2resp_singleday <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")
print(ILI_V3_infected_donors_criteria2_2resp_singleday)
##          SubjectID
## subid          112
## subid.1        114
## subid.2        127
## subid.3        128
## subid.4        134
## subid.5        136
## subid.6        137
## subid.7        139
## subid.8        142
## subid.9        143
## subid.10       145
## subid.11       150
## subid.12       152
# This "ILI_V3_infected_donors_criteria2_2resp_singleday" df is the output for the 1st part (single day) of the ILI_V3 2nd criteria. 

# Now for the 2nd part (2 consec days) of the ILI_V3 2nd criteria
# Which is >=2 Sx with >=1 of those Sx being respiratory, and the >=2 Sx occuring over 2 consecutive days

# Need to make a new df that has, for all of the always afebrile individuals, 1 row of data for each study day-3to6
# Recall: "Symptomatic_donors_infected_grade123" df was created in V1 of Sympomtatic and has all the Sx we need and spans study days-3 to 6, but we want to keep only the always afebrile ones
ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6 <- Symptomatic_donors_infected_grade123 %>%
  right_join(Qdata_infected_afebrile_day1to6) 
## Joining, by = "SubjectID"
# Now, using the "ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6" df
# Columns 5-10 are respiratory symptoms and columns 11-13 are the other symptoms that matter for this definition
# Do a loop to list subjectIDs for those with 2 or more symptoms, one of which is respiratory and where this occurred over 2 consecutive study days (at least 2 consecutive study days)
# first, do not exclude symptoms that were positive before day 1 as part of the classification criteria
# This means we must cut a new df with just data from study days 1-6
ILI_V3_donors_infected_afebrile_1row_per_studyday_day1to6 <- ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6 %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6)

sub <- unique(ILI_V3_donors_infected_afebrile_1row_per_studyday_day1to6$SubjectID)
c_sub <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- ILI_V3_donors_infected_afebrile_1row_per_studyday_day1to6[ILI_V3_donors_infected_afebrile_1row_per_studyday_day1to6$SubjectID == subid, ]
  temp1<-temp[,4:13]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      for (l in (k+1):13){
        if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) {
          if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
            c_sub <- rbind(c_sub, subid)
            token<-1
            break
          }
        }
      }
      if (token==1){
        break
      } 
    }
    if (token==1){
      break
    }
  }
}
# This gives a vector c_sub of 18 subjectIDs
# If we want to exclude symptoms that were positive before day 1 as part of the classification criteria, we use the following loop
sub <- unique(ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6$SubjectID)
c_sub2 <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6[ILI_V3_donors_infected_afebrile_1row_per_studyday_neg3to6$SubjectID == subid, ]
  temp1<-temp[,4:13]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      for (l in (k+1):13){
        if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) {
          if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
            sum1<-0
            sum2<-0
            for (m in 1:(tail(which(temp$StudyDay==-1), n=1))){
              sum1<-sum1+temp[m,k]
              sum2<-sum1+temp[m,l]
            }
            if (sum1==0 & sum2==0) {
              c_sub2 <- rbind(c_sub2, subid)
              token<-1
              break
            }
          }
        }
      }
      if (token==1){
        break
      } 
    }
    if (token==1){
      break
    }
  }
}
# This yields 18 subjectIDs and doesn't change the result compared with the less stringent criteria.
# Thus, we will keep the original c_sub that is from the less stringent criteria
# Get the original c_sub vector of 18 subjectIDs into a dataframe
ILI_V3_infected_donors_criteria2_part2 <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")
print(ILI_V3_infected_donors_criteria2_part2)
##          SubjectID
## subid          110
## subid.1        112
## subid.2        113
## subid.3        114
## subid.4        125
## subid.5        127
## subid.6        128
## subid.7        129
## subid.8        131
## subid.9        134
## subid.10       136
## subid.11       137
## subid.12       139
## subid.13       140
## subid.14       142
## subid.15       143
## subid.16       144
## subid.17       145
## subid.18       147
## subid.19       150
## subid.20       152
# merge together the ILI criteria2 parts 1 and 2
ILI_V3_infected_donors_criteria2 <- full_join(ILI_V3_infected_donors_criteria2_2resp_singleday, ILI_V3_infected_donors_criteria2_part2) %>%
  arrange(SubjectID)
## Joining, by = "SubjectID"
print(ILI_V3_infected_donors_criteria2)
##    SubjectID
## 1        110
## 2        112
## 3        113
## 4        114
## 5        125
## 6        127
## 7        128
## 8        129
## 9        131
## 10       134
## 11       136
## 12       137
## 13       139
## 14       140
## 15       142
## 16       143
## 17       144
## 18       145
## 19       147
## 20       150
## 21       152
# merge together the ILI criteria 1 and 2 dfs
ILI_V3_infected_donors <- full_join(ILI_V3_infected_donors_criteria1, ILI_V3_infected_donors_criteria2) %>%
  arrange(SubjectID)
## Joining, by = "SubjectID"
print(ILI_V3_infected_donors)
##    SubjectID
## 1        100
## 2        103
## 3        107
## 4        108
## 5        110
## 6        112
## 7        113
## 8        114
## 9        125
## 10       127
## 11       128
## 12       129
## 13       131
## 14       134
## 15       136
## 16       137
## 17       139
## 18       140
## 19       142
## 20       143
## 21       144
## 22       145
## 23       146
## 24       147
## 25       150
## 26       151
## 27       152
# Now adding the QuarantineNumber on to the ILI df 
# Then we can sort by Q for the table1
ILI_V3_infected_donors_table1 <- ILI_V3_infected_donors %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_ILI_V3 = n_distinct(SubjectID))
print(ILI_V3_infected_donors_table1)
## # A tibble: 3 x 2
##   QuarantineNumber Number_ILI_V3
##              <int>         <int>
## 1                1             8
## 2                2             5
## 3                3            14
# Add onto Table1 the number of ILI by version 3 criteria and % of infected
Qdata_table1 <- Qdata_table1 %>%
  left_join(ILI_V3_infected_donors_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_ILI_V3_of_Infected = Number_ILI_V3/Number_Infected_Donors)
print(Qdata_table1)
## # A tibble: 3 x 8
##   QuarantineNumber Number_Infected… Number_Inoculat… Fraction_Infect…
##              <int>            <int>            <int>            <dbl>
## 1                1               15               20            0.75 
## 2                2               11               12            0.917
## 3                3               18               20            0.9  
## # ... with 4 more variables: Number_Symptomatic_V3 <int>,
## #   Fraction_Symptomatic_V3_of_Infected <dbl>, Number_ILI_V3 <int>,
## #   Fraction_ILI_V3_of_Infected <dbl>
#### Table 1 (donors): e) Number of febrile (and % of infected)  ####

# Use the list of infected donors to do this analysis
Qdata_infected_febrile <- Qdata_infected_donors %>%
  left_join(Qdata) %>%
  filter(Randomization_DorIRorCR == "D" & Tympanic.temp..degrees.C. > 37.9) 
## Joining, by = c("SubjectID", "QuarantineNumber")
Qdata_infected_febrile_table1 <- Qdata_infected_febrile %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Febrile_Infected = n_distinct(SubjectID))
print(Qdata_infected_febrile_table1)
## # A tibble: 2 x 2
##   QuarantineNumber Number_Febrile_Infected
##              <int>                   <int>
## 1                1                       4
## 2                3                       2
# Add febrile count and fraction febrile to Table 1
Qdata_table1 <- Qdata_table1 %>%
  left_join(Qdata_infected_febrile_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_Febrile_Infected_of_Total_Infected = Number_Febrile_Infected/Number_Infected_Donors)

#### Aysymptomatic -- for the text of the paper ####
# Asymptomatic is defined as not symptomatic
# Since ILI must also be symptomatic they are not included in this definition
# Asymptomatic should probably not be febrile, but let's check to make sure all the febrile fell within the Symptomatic pool
# Then we can take the inverse of the Symptomatic group as the Asymptomatic group.
# Critical question: Are we talking about the 42 infected donors or the 52 total donors?
# Answer: We are talking about out of the 42 infected donors. 

# Are all the febrile cases also symptomatic?
Qdata_infected_febrile_subjectID <- Qdata_infected_febrile %>%
  distinct(SubjectID, .keep_all = FALSE)
print(Qdata_infected_febrile_subjectID)
##   SubjectID
## 1       100
## 2       103
## 3       107
## 4       108
## 5       146
## 6       151
# There are 8 subjectIDs that were febrile among the infected donors
# Now let's see the list of symptomatic subjectIDs and compare
# Apparently this analysis was already done as part of figuring out the group that was symptomatic
# Found one instance where there was someone who was febrile, who didn't make the list of symptomatic (#148)
# We can see if we get the same result here.
Febrile_not_symptomatic <- Qdata_infected_febrile_subjectID %>%
  distinct(SubjectID, .keep_all = FALSE) %>%
  anti_join(Symptomatic_V3_donors_infected_combined)
## Joining, by = "SubjectID"
print(Febrile_not_symptomatic)
## [1] SubjectID
## <0 rows> (or 0-length row.names)
# Indeed we get the same result here. There is one subject (#148) among the 42 infected donors who was febrile and not symptomatic
# Which Q was this donor a part of? How high was the fever? How long did the fever persist?
Qdata_148 <- Qdata %>%
  filter(SubjectID == 148) %>%
  filter(Tympanic.temp..degrees.C. > 37.9) %>%
  select(QuarantineNumber, SubjectID, Tympanic.temp..degrees.C., StudyDay, Sx_Date, SDC_time) %>%
  distinct(SubjectID, StudyDay, SDC_time, .keep_all = TRUE)

# Let's get the list of those among the 42 infected donors who never met symptomatic criteria
never_symptomatic <- Qdata_infected_donors %>%
  select(QuarantineNumber, SubjectID) %>%
  anti_join(Symptomatic_V3_donors_infected_combined)
## Joining, by = "SubjectID"
print(never_symptomatic)
##    QuarantineNumber SubjectID
## 1                 1       101
## 2                 1       104
## 3                 1       106
## 4                 1       115
## 5                 2       123
## 6                 2       124
## 7                 2       130
## 8                 2       132
## 9                 3       133
## 10                3       148
# This generates a list of 10
# I will now eliminate from the list of 10, the febrile but not symptomatic 
asymptomatic_without148 <- never_symptomatic %>%
  anti_join(Febrile_not_symptomatic)
## Joining, by = "SubjectID"
print(asymptomatic_without148)
##    QuarantineNumber SubjectID
## 1                 1       101
## 2                 1       104
## 3                 1       106
## 4                 1       115
## 5                 2       123
## 6                 2       124
## 7                 2       130
## 8                 2       132
## 9                 3       133
## 10                3       148
asymptomatic_with148 <- never_symptomatic
print(asymptomatic_with148)
##    QuarantineNumber SubjectID
## 1                 1       101
## 2                 1       104
## 3                 1       106
## 4                 1       115
## 5                 2       123
## 6                 2       124
## 7                 2       130
## 8                 2       132
## 9                 3       133
## 10                3       148
# Since there were no other symptoms with 148 and the fever was low-grade and only appeared during a single instance we will not exclude 148 from the list of asymptomatic

# I will now make an aggregated table by Q#
asymptomatic_by_Q <- asymptomatic_with148 %>%
  group_by(QuarantineNumber) %>%
  summarize(Asymptomatic = n_distinct(SubjectID))
print(asymptomatic_by_Q)
## # A tibble: 3 x 2
##   QuarantineNumber Asymptomatic
##              <int>        <int>
## 1                1            4
## 2                2            4
## 3                3            2
# Cleaning up "asymptomatic_by_Q" for inclusion in the SI
asymptomatic_by_Q <- asymptomatic_by_Q %>%
  rename(`Quarantine #` = QuarantineNumber) %>%
  bind_rows(summarise_all(., funs(if(is.numeric(.)) sum(.) else "Total")))
# Change the 4th row of the Quarantine # column to "Total"
asymptomatic_by_Q$`Quarantine #`[4] <- "Total"

# write these files out to the results folder
# write_csv(asymptomatic_by_Q, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/Asymptomatic_Q.csv")

#### Table 1 (donors): f) Number of PCR confirmed infection (and % of infected) ####

# This was already done to get the number of infected donors for the first few columns in this Table 1
# Redo what was done earlier, but tweaking for the purpose of this column in the table 1

# Get list of SubjectID and the number of days each was positive by PCR
# But this time we are matching the POC criteria of only 1 PCR = case
Qdata_pcr_pos1_or_more_days <- Qdata %>%
  filter(Randomization_DorIRorCR == "D") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct < 38 & InfA_Ct != 0)) %>%
  group_by(SubjectID, StudyDay) %>%
  summarize(count = n()) %>%
  summarize(NumberDaysPosPCR = n_distinct(StudyDay)) %>%
  filter(NumberDaysPosPCR >= 1)
print(Qdata_pcr_pos1_or_more_days)
## # A tibble: 41 x 2
##    SubjectID NumberDaysPosPCR
##        <int>            <int>
##  1       100                6
##  2       103                5
##  3       104                4
##  4       106                3
##  5       107                6
##  6       108                6
##  7       110                4
##  8       112                5
##  9       113                5
## 10       114                5
## # ... with 31 more rows
# Add the Q numbers to the list of SubjectIDs and the number of PCR positive days and summarize by Q
Qdata_pcr_pos1_or_more_days_table1 <- Qdata_pcr_pos1_or_more_days %>%
  left_join(Qdata_QuarantineNumbers) %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_PCR_Infected_Donors = n_distinct(SubjectID))
## Joining, by = "SubjectID"
print(Qdata_pcr_pos1_or_more_days_table1)
## # A tibble: 3 x 2
##   QuarantineNumber Number_PCR_Infected_Donors
##              <int>                      <int>
## 1                1                         13
## 2                2                         11
## 3                3                         17
# To add to table1
Qdata_table1 <- Qdata_table1 %>%
  left_join(Qdata_pcr_pos1_or_more_days_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_PCR_Infected_Donors_of_Infected = Number_PCR_Infected_Donors/Number_Infected_Donors)
print(Qdata_table1)
## # A tibble: 3 x 12
##   QuarantineNumber Number_Infected… Number_Inoculat… Fraction_Infect…
##              <int>            <int>            <int>            <dbl>
## 1                1               15               20            0.75 
## 2                2               11               12            0.917
## 3                3               18               20            0.9  
## # ... with 8 more variables: Number_Symptomatic_V3 <int>,
## #   Fraction_Symptomatic_V3_of_Infected <dbl>, Number_ILI_V3 <int>,
## #   Fraction_ILI_V3_of_Infected <dbl>, Number_Febrile_Infected <int>,
## #   Fraction_Febrile_Infected_of_Total_Infected <dbl>,
## #   Number_PCR_Infected_Donors <int>,
## #   Fraction_PCR_Infected_Donors_of_Infected <dbl>
#### But the text for the paper also asks for the number of volunteers that were positive on each study day 1-6 ####
Qdata_pcr_pos1_or_more_days_days1to6_total_samples <- Qdata_pcr_pos1_or_more_days %>%
  left_join(Qdata) %>%
  select(QuarantineNumber, SubjectID, StudyDay, InfA_Ct) %>%
  group_by(SubjectID) %>%
  distinct(StudyDay, .keep_all = TRUE) %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  ungroup() %>%
  group_by(StudyDay) %>%
  summarize(Number_samples_each_studyday_positive_subjects = n())
## Joining, by = "SubjectID"
# This shows that indeed there were 36 samples taken (1 for each subjectID) on each of study days 1-6 for each of the PCR infected subjects
Qdata_pcr_pos1_or_more_days_days1to6 <- Qdata_pcr_pos1_or_more_days %>%
  left_join(Qdata) %>%
  select(QuarantineNumber, SubjectID, StudyDay, InfA_Ct) %>%
  group_by(SubjectID) %>%
  distinct(StudyDay, .keep_all = TRUE) %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  ungroup() %>%
  filter(InfA_Ct>0) %>%
  group_by(StudyDay) %>%
  summarize(number_PCR_positive = n_distinct(SubjectID)) %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_total_samples) %>%
  mutate(percent_pos = (number_PCR_positive/Number_samples_each_studyday_positive_subjects)*100) %>%
  rename(`Study Day` = StudyDay) %>%
  select(-Number_samples_each_studyday_positive_subjects) # Note that this value was 36 for each study day
## Joining, by = "SubjectID"
## Joining, by = "StudyDay"
Qdata_pcr_pos1_or_more_days_days1to6[7,] = c(7, colSums(Qdata_pcr_pos1_or_more_days_days1to6[,2]), 
                                             (colSums(Qdata_pcr_pos1_or_more_days_days1to6[,2])/(6*36))*100)
# Note that the 7 in the first column is a place holder for what should be later labeled "Total"
# Had to keep it in numeric format for now

Qdata_pcr_pos1_or_more_days_days1to6_line <- Qdata_pcr_pos1_or_more_days_days1to6 %>%
  mutate(percent_pos = as.numeric(percent_pos)) %>%
  mutate_at(vars(percent_pos), funs(round(., 2))) %>%
  rename(`PCR Positive Subjects` = number_PCR_positive, `Percent Positive` = percent_pos)

# write this out because it is used in the results in text for line chart.
# write_csv(Qdata_pcr_pos1_or_more_days_days1to6_line, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_PCR_Positive_1plus_days_Study_Day_line.csv")

# Now get this data into tabular format in case we choose to present it like that or both ways
# First round to the nearest whole number since this seems to be how the Nottingham team has drafted the paper. 
Qdata_pcr_pos1_or_more_days_days1to6_line_round_parenth <- Qdata_pcr_pos1_or_more_days_days1to6_line %>%
  mutate_at(vars(`Percent Positive`), funs(round(., 0))) 
# Now add the () around the percent value
Qdata_pcr_pos1_or_more_days_days1to6_line_round_parenth$`Percent Positive` <- 
  paste0("(", Qdata_pcr_pos1_or_more_days_days1to6_line_round_parenth$`Percent Positive`, ")")

Qdata_pcr_pos1_or_more_days_days1to6_table <- Qdata_pcr_pos1_or_more_days_days1to6_line_round_parenth %>%
  unite(`PCR Positive Subjects (%)`, `PCR Positive Subjects`, `Percent Positive`, sep = " ", remove = TRUE)

Qdata_pcr_pos1_or_more_days_days1to6_table[7,1] <- "Total"

print(Qdata_pcr_pos1_or_more_days_days1to6_table)
## # A tibble: 7 x 2
##   `Study Day` `PCR Positive Subjects (%)`
##   <chr>       <chr>                      
## 1 1           19 (46)                    
## 2 2           36 (88)                    
## 3 3           35 (85)                    
## 4 4           34 (83)                    
## 5 5           33 (80)                    
## 6 6           24 (59)                    
## 7 Total       181 (84)
# write this out because it is used in the results in text.
# write_csv(Qdata_pcr_pos1_or_more_days_days1to6_table, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_PCR_Positive_1plus_days_Study_Day_Table.csv")
# Note that the percent in parentheses here is the percent positive of all PCR positive samples 

#### What if we wanted to compare how the PCR positives compared over the 6 study days between Qs? ####
# Let's stratify this by Quarantine and add each Q as a column 
# Then we can try some categorical statistics to test if there are any between group differences

Qdata_pcr_pos1_or_more_days_days1to6_byQ <- Qdata_pcr_pos1_or_more_days %>%
  left_join(Qdata) %>%
  select(QuarantineNumber, SubjectID, StudyDay, InfA_Ct) %>%
  group_by(SubjectID) %>%
  distinct(StudyDay, .keep_all = TRUE) %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  ungroup() %>%
  filter(InfA_Ct>0) %>%
  spread(QuarantineNumber, InfA_Ct) 
## Joining, by = "SubjectID"
## Let's get each Q individually and then put them together ##

## First deal with Q1

Qdata_pcr_pos1_or_more_days_days1to6_byQ1 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ %>%
  select(SubjectID, StudyDay, "1") %>%
  rename(Q1 = "1") %>%
  filter(!is.na(Q1)) %>%
  group_by(StudyDay) %>%
  summarize(Q1_number_PCR_positive = n_distinct(SubjectID, na.rm = TRUE))

# Now create the % positive for table 1
# First need to figure out how many total samples were taken for each of these study days for Q1
Qdata_pcr_pos1_or_more_days_days1to6_byQ_Q1totalsamples <- Qdata_pcr_pos1_or_more_days %>%
  left_join(Qdata) %>%
  select(QuarantineNumber, SubjectID, StudyDay, InfA_Ct) %>%
  group_by(SubjectID) %>%
  distinct(StudyDay, .keep_all = TRUE) %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  ungroup() %>%
  filter(QuarantineNumber == 1) %>%
  group_by(StudyDay) %>%
  summarize(Q1_Number_samples_each_studyday_positive_subjects = n())
## Joining, by = "SubjectID"
# Shows that there were 12 samples taken on each study day (one for each subjectID per day) among the positive subjects for Q1

# Now add the total column derived above to the Q1 df
Qdata_pcr_pos1_or_more_days_days1to6_byQ1 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ1 %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ_Q1totalsamples) 
## Joining, by = "StudyDay"
# Add the totals row for Q1 df
Qdata_pcr_pos1_or_more_days_days1to6_byQ1[7,] <- c(7, colSums(Qdata_pcr_pos1_or_more_days_days1to6_byQ1[,2:3]))
# Note that the first column was made to be 7 as a place holder, we will chance it to "total" when we are ready, but need numeric for now

# Now add the percents column to the Q1 df
Qdata_pcr_pos1_or_more_days_days1to6_byQ1 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ1 %>%
  mutate(Q1_Percent_Positive = (Q1_number_PCR_positive/Q1_Number_samples_each_studyday_positive_subjects)*100)

# round and rename
Qdata_pcr_pos1_or_more_days_days1to6_byQ1 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ1 %>%
  mutate_at(vars(Q1_Percent_Positive), funs(round(., 2))) %>%
  rename(`Study Day` = StudyDay,
         `Q1 PCR Positive Samples` = Q1_number_PCR_positive, 
         `Q1 Total Samples from Positive Subjects` = Q1_Number_samples_each_studyday_positive_subjects,
         `Q1 Percent Positive Samples` = Q1_Percent_Positive)

## Now deal with Q2

Qdata_pcr_pos1_or_more_days_days1to6_byQ2 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ %>%
  select(SubjectID, StudyDay, "2") %>%
  rename(Q2 = "2") %>%
  filter(!is.na(Q2)) %>%
  group_by(StudyDay) %>%
  summarize(Q2_number_PCR_positive = n_distinct(SubjectID, na.rm = TRUE))

# Now create the % positive for table 1
# First need to figure out how many total samples were taken for each of these study days for Q2
Qdata_pcr_pos1_or_more_days_days1to6_byQ_Q2totalsamples <- Qdata_pcr_pos1_or_more_days %>%
  left_join(Qdata) %>%
  select(QuarantineNumber, SubjectID, StudyDay, InfA_Ct) %>%
  group_by(SubjectID) %>%
  distinct(StudyDay, .keep_all = TRUE) %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  ungroup() %>%
  filter(QuarantineNumber == 2) %>%
  group_by(StudyDay) %>%
  summarize(Q2_Number_samples_each_studyday_positive_subjects = n())
## Joining, by = "SubjectID"
# Shows that there were 10 samples taken on each study day (one for each subjectID per day) among the positive subjects for Q2

# Now add the total column derived above to the Q2 df
Qdata_pcr_pos1_or_more_days_days1to6_byQ2 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ2 %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ_Q2totalsamples) 
## Joining, by = "StudyDay"
# Add the totals row for Q2 df
Qdata_pcr_pos1_or_more_days_days1to6_byQ2[7,] <- c(7, colSums(Qdata_pcr_pos1_or_more_days_days1to6_byQ2[,2:3]))
# Note that the first column was made to be 7 as a place holder, we will chance it to "total" when we are ready, but need numeric for now

# Now add the percents column to the Q2 df
Qdata_pcr_pos1_or_more_days_days1to6_byQ2 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ2 %>%
  mutate(Q2_Percent_Positive = (Q2_number_PCR_positive/Q2_Number_samples_each_studyday_positive_subjects)*100)

# round and rename
Qdata_pcr_pos1_or_more_days_days1to6_byQ2 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ2 %>%
  mutate_at(vars(Q2_Percent_Positive), funs(round(., 2))) %>%
  rename(`Study Day` = StudyDay,
         `Q2 PCR Positive Samples` = Q2_number_PCR_positive, 
         `Q2 Total Samples from Positive Subjects` = Q2_Number_samples_each_studyday_positive_subjects,
         `Q2 Percent Positive Samples` = Q2_Percent_Positive)

## Now deal with Q3

Qdata_pcr_pos1_or_more_days_days1to6_byQ3 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ %>%
  select(SubjectID, StudyDay, "3") %>%
  rename(Q3 = "3") %>%
  filter(!is.na(Q3)) %>%
  group_by(StudyDay) %>%
  summarize(Q3_number_PCR_positive = n_distinct(SubjectID, na.rm = TRUE))

# Now create the % positive for table 1
# First need to figure out how many total samples were taken for each of these study days for Q3
Qdata_pcr_pos1_or_more_days_days1to6_byQ_Q3totalsamples <- Qdata_pcr_pos1_or_more_days %>%
  left_join(Qdata) %>%
  select(QuarantineNumber, SubjectID, StudyDay, InfA_Ct) %>%
  group_by(SubjectID) %>%
  distinct(StudyDay, .keep_all = TRUE) %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  ungroup() %>%
  filter(QuarantineNumber == 3) %>%
  group_by(StudyDay) %>%
  summarize(Q3_Number_samples_each_studyday_positive_subjects = n())
## Joining, by = "SubjectID"
# Shows that there were 14 samples taken on each study day (one for each subjectID per day) among the positive subjects for Q3

# Now add the total column derived above to the Q3 df
Qdata_pcr_pos1_or_more_days_days1to6_byQ3 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ3 %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ_Q3totalsamples) 
## Joining, by = "StudyDay"
# Add the totals row for Q3 df
Qdata_pcr_pos1_or_more_days_days1to6_byQ3[7,] <- c(7, colSums(Qdata_pcr_pos1_or_more_days_days1to6_byQ3[,2:3]))
# Note that the first column was made to be 7 as a place holder, we will chance it to "total" when we are ready, but need numeric for now

# Now add the percents column to the Q3 df
Qdata_pcr_pos1_or_more_days_days1to6_byQ3 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ3 %>%
  mutate(Q3_Percent_Positive = (Q3_number_PCR_positive/Q3_Number_samples_each_studyday_positive_subjects)*100)

# round and rename
Qdata_pcr_pos1_or_more_days_days1to6_byQ3 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ3 %>%
  mutate_at(vars(Q3_Percent_Positive), funs(round(., 2))) %>%
  rename(`Study Day` = StudyDay,
         `Q3 PCR Positive Samples` = Q3_number_PCR_positive, 
         `Q3 Total Samples from Positive Subjects` = Q3_Number_samples_each_studyday_positive_subjects,
         `Q3 Percent Positive Samples` = Q3_Percent_Positive)

## Now merge these Q1, Q2, and Q3 together into one larger df

Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line <- Qdata_pcr_pos1_or_more_days_days1to6_byQ1 %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ2) %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ3) %>%
  filter(`Study Day` !=7)
## Joining, by = "Study Day"
## Joining, by = "Study Day"
# Would like to add some columns that sum the number of positive, total samples, and make new percents for cumulative day1-6
# Note that these values looking at all Q combined were already generated, but to generate them again here is a good, redundant check

Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line <- Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line %>%
  group_by(`Study Day`) %>%
  mutate(`All Q Total PCR Positive Samples` = sum(`Q1 PCR Positive Samples`, `Q2 PCR Positive Samples`, `Q3 PCR Positive Samples`),
         `All Q Total Samples from Positive Subjects` = sum(`Q1 Total Samples from Positive Subjects`, 
                                                            `Q2 Total Samples from Positive Subjects`,
                                                            `Q3 Total Samples from Positive Subjects`)) %>%
  mutate(`All Q Percent Positive Samples` = (`All Q Total PCR Positive Samples`/`All Q Total Samples from Positive Subjects`)*100) %>%
  mutate_at(vars(`All Q Percent Positive Samples`), funs(round(., 2)))

# Perhaps need to reshape this data for plotting in ggplot2
# Try gathering it so that there are just two variables to plot, the study day (x-axis), and the percent positive (y-axis)
# The Q1, Q2, and Q3 will be the groups
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line_for_plot <- Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line %>%
  gather("Quarantine", "Percent Positive", 4,7,10,13) %>%
  select(`Study Day`, `Quarantine`, `Percent Positive`)
# Convert the Quarantine column data to simply Q1, Q2, Q3, and All Q
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line_for_plot$Quarantine[1:18] <- 
  str_sub(Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line_for_plot$Quarantine[1:18], 1,2)
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line_for_plot$Quarantine[19:24] <- 
  str_sub(Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line_for_plot$Quarantine[19:24], 1,5)

# Check to see if the plot works
p <- ggplot(Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line_for_plot, 
            aes(x = `Study Day`, y = `Percent Positive`, group = Quarantine, colour = Quarantine)) +
  geom_line() +
  geom_point() +
  scale_color_manual(values = c("black", "chartreuse4", "cyan2", "darkorchid1")) +
  theme_bw() +
  xlab("Study Day") +
  ylab("Percent Positive")
p

# Now write out this df for line plotting
# write_csv(Qdata_pcr_pos1_or_more_days_days1to6_byQ123_line_for_plot, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_PCR_Pos_1plus_days_StudyDay_Q123_line.csv")

## Now for a version for table output, do some adjustments
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table <- Qdata_pcr_pos1_or_more_days_days1to6_byQ1 %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ2) %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ3) %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_line) %>%
  rename(`All Q PCR Positive Samples` = `PCR Positive Subjects`,
         `All Q Percent Positive` = `Percent Positive`)
## Joining, by = "Study Day"
## Joining, by = "Study Day"
## Joining, by = "Study Day"
# Cleaning up the table df to make it more presentable.
# For example, we probably don't need to keep the columns with the Total Sample counts, let's remove these, let's also round the percents to the nearest whole percent
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table <- Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table %>%
  select(`Study Day`, 
         `Q1 PCR Positive Samples`, `Q1 Percent Positive Samples`,
         `Q2 PCR Positive Samples`, `Q2 Percent Positive Samples`,
         `Q3 PCR Positive Samples`, `Q3 Percent Positive Samples`,
         `All Q PCR Positive Samples`, `All Q Percent Positive`) %>%
  mutate_all(funs(round(., 0)))

# Add the () to the percents
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table$`Q1 Percent Positive Samples` <- 
  paste0("(", Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table$`Q1 Percent Positive Samples`, ")")
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table$`Q2 Percent Positive Samples` <- 
  paste0("(", Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table$`Q2 Percent Positive Samples`, ")")
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table$`Q3 Percent Positive Samples` <- 
  paste0("(", Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table$`Q3 Percent Positive Samples`, ")")
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table$`All Q Percent Positive` <- 
  paste0("(", Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table$`All Q Percent Positive`, ")")

# Unite the positive sample number columns with the percents
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table <- Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table%>%
  unite(`Q1 PCR Positive Samples (%)`, 
        `Q1 PCR Positive Samples`, `Q1 Percent Positive Samples`,
        sep = c(" "), remove = TRUE) %>%
  unite(`Q2 PCR Positive Samples (%)`, 
        `Q2 PCR Positive Samples`, `Q2 Percent Positive Samples`,
        sep = c(" "), remove = TRUE) %>%
  unite(`Q3 PCR Positive Samples (%)`, 
        `Q3 PCR Positive Samples`, `Q3 Percent Positive Samples`,
        sep = c(" "), remove = TRUE) %>%
  unite(`All Q PCR Positive Samples (%)`, 
        `All Q PCR Positive Samples`, `All Q Percent Positive`,
        sep = c(" "), remove = TRUE)

# Switch the 7 in column 1 to "Total"
Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table[7,1] <- "Total"

# Now write out this table file
# write_csv(Qdata_pcr_pos1_or_more_days_days1to6_byQ123_table, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_PCR_Pos_1plus_days_StudyDay_Q123_table.csv")

#### What if we want to  plot the NPSwab Ct values over the 6 study days (positive swabs only)? ####
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT <- Qdata_pcr_pos1_or_more_days %>%
  left_join(Qdata) %>%
  select(QuarantineNumber, SubjectID, StudyDay, InfA_Ct) %>%
  group_by(SubjectID) %>%
  distinct(StudyDay, .keep_all = TRUE) %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  ungroup() %>%
  filter(InfA_Ct>0)
## Joining, by = "SubjectID"
## Let's get each Q individually and then put them together ##

## Q1
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Q1 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT %>%
  filter(QuarantineNumber == 1) %>%
  group_by(StudyDay) %>%
  summarise(Q1meanCT = mean(InfA_Ct),
            Q1sdCT = sd(InfA_Ct))

## Q2
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Q2 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT %>%
  filter(QuarantineNumber == 2) %>%
  group_by(StudyDay) %>%
  summarise(Q2meanCT = mean(InfA_Ct),
            Q2sdCT = sd(InfA_Ct))

## Q3
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Q3 <- Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT %>%
  filter(QuarantineNumber == 3) %>%
  group_by(StudyDay) %>%
  summarise(Q3meanCT = mean(InfA_Ct),
            Q3sdCT = sd(InfA_Ct))

## To get the total mean and sd over all three Q, we will go back to work with the "Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT" df

Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_Mean_SD <- Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT %>%
  group_by(StudyDay) %>%
  summarize(Qall_meanCT = mean(InfA_Ct),
            Qall_sdCT = sd(InfA_Ct))

## Combine Q1, Q2, Q3, Qall
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall <- Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Q1 %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Q2) %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Q3) %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_Mean_SD)
## Joining, by = "StudyDay"
## Joining, by = "StudyDay"
## Joining, by = "StudyDay"
# Prepare for plotting the lines
# Convert to long format

# First convert the ct means to long
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_Mean <- Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall %>%
  gather("Quarantine", "CT", 2,4,6,8) %>%
  select(StudyDay, Quarantine, CT)
# Just select the Q1, Q1, Q3, Qall from the means df
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_Mean$Quarantine[1:18] <- 
  str_sub(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_Mean$Quarantine[1:18], 1,2)
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_Mean$Quarantine[19:24] <- 
  str_sub(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_Mean$Quarantine[19:24], 1,4)

# Now convert the ct sd to long
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_SD <- Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall %>%
  gather("Quarantine", "SD", 3,5,7,9) %>%
  select(StudyDay, Quarantine, SD)
# Just select the Q1, Q1, Q3, Qall from the sd df
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_SD$Quarantine[1:18] <- 
  str_sub(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_SD$Quarantine[1:18], 1,2)
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_SD$Quarantine[19:24] <- 
  str_sub(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_SD$Quarantine[19:24], 1,4)

# Now merge together the mean and sd dfs from above
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_long <- Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_Mean %>%
  left_join(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_SD)
## Joining, by = c("StudyDay", "Quarantine")
# Clean up names, etc. on the df to be used for the plot
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_long <- Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_long %>%
  rename(`Study Day` = StudyDay)
Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_long$Quarantine[19:24] <- "All Q"

# Check to see if plot works before trying in RMarkdown
pd <- position_dodge(0.25)
p <- ggplot(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_long, 
            aes(x = `Study Day`, y = `CT`, group = Quarantine, colour = Quarantine)) +
  geom_line() +
  geom_errorbar(width=.1, aes(ymin=`CT` - `SD`, ymax=`CT` + `SD`), position = pd) +
  geom_point(position = pd) +
  scale_color_manual(values = c("black", "darkorange3", "cyan2", "darkorchid1")) +
  theme_bw() +
  xlab("Study Day") +
  ylab("NPS CT Value")
p

# Now write out this table file
# write_csv(Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall_long, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_PCR_1plus_days_CT_StudyDay_Q123_line.csv")

#### If we wanted this Ct line chart in a table format we could print out the following table ####
# Can work with the "Qdata_pcr_pos1_or_more_days_days1to6_byQ_NPS_CT_Qall" df but need to add the totals for each Q and then the overall total
# Would need to work on this separately. For now the figure serves to get the point across. 

#### Table 1 (donors): g) Number of PCR confirmed infection and seroconversion (and % of infected) ####

# This was already done to get the number of infected donors for the first few columns in this Table 1
# Work with the "Qdata_infected_donors" df that was created in section a) above.

Inf_PCR_and_Sero <- Qdata_infected_donors %>%
  filter(NumberDaysPosPCR >=1) %>%
  filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y))

#Now summarize the number of unique SubjectIDs that meet this criteria by Q for the table
Inf_PCR_and_Sero_table1 <- Inf_PCR_and_Sero %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Positive_PCR_and_Seroconversion = n_distinct(SubjectID))

# Now add Inf_PCR_and_Sero_table1 to the cumulative table 1
Qdata_table1 <- Qdata_table1 %>%
  left_join(Inf_PCR_and_Sero_table1, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_Infected_by_PCR_and_Serology = Number_Positive_PCR_and_Seroconversion/Number_Infected_Donors)

#### Additional table for SI about positivity by HAI, MN, and/or 2 PCR positive tests ####
# What if we are interested in the full summary, by Q of how many had HAI, MN, and 2 PCR positive tests, ...
# ... who had HAI or MN, and 2 PCR positive tests, ...
# ... who had neither HAI or MN, and 2 PCR positive tests, ...
# ... who had HAI and MN, but did not meet PCR positivity, ...
# ... who had HAI or MN, but did not meet PCR positivity?
# Here we can provide the data in two different ways, one by subjectIDs, and one by aggregating by Q
# When aggregating by Q, it may be good to include totals and percents (out of the number infected per Q)

# First create the full set of SubjectIDs with the basic info of positive by HAI, MN, PCR, PCR study days positive
# Work with the "Qdata_infected_donors" df that was created earlier.

# Add a column for `PCR Positive`
Qdata_infected_donors$`PCR Positive` <- NA

SI_Inf_PCR_and_Sero <- Qdata_infected_donors %>%
  mutate(QuarantineNumber.x = ifelse(QuarantineNumber.x >=1, 1, QuarantineNumber.x)) %>%
  mutate(QuarantineNumber.y = ifelse(QuarantineNumber.y >=1, 1, QuarantineNumber.y)) %>%
  mutate(`PCR Positive` = ifelse(NumberDaysPosPCR >= 1, 1, `PCR Positive`)) %>%
  rename(`Seroconversion by HAI` = QuarantineNumber.x, `Seroconversion by MN` = QuarantineNumber.y,
         `Number of Days with PCR Positive NPS` = NumberDaysPosPCR, `Quarantine #` = QuarantineNumber,
         `Volunteer ID` = SubjectID) %>%
  mutate_at(vars(`Seroconversion by HAI`:`PCR Positive`), funs(replace(., is.na(.), 0))) # convert NAs to 0
# Move the Quarantine # to the first column so the RMarkdown can create blocks for Qs 1, 2, and 3
# Also put the number of PCR days positive column to the right of the "PCR positive" column
SI_Inf_PCR_and_Sero <- SI_Inf_PCR_and_Sero[, c(5,1:3,6,4)]

# Write this df out for use in the RMarkdown file for SI material
# write_csv(SI_Inf_PCR_and_Sero, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_Positive_HAI_MN_PCR_1plus_days_SubjectID.csv")

#### Discussion text: How many donors were PCR positive on at least one day (of days1-4)? ####
pcr_positive <- Qdata %>%
  filter(InfA_Ct > 0) %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4) %>%
  distinct(SubjectID)
# Shows that there were 43 donors with at least one day of PCR positivity on study days 1-4
# 43 out of the 52 donors = 82.3%

# Now summarize the number of unique SubjectIDs that meet this criteria by Q for the table
# Create the following categories:
# how many had HAI, MN, and 2 PCR positive tests, ...
# who had HAI or MN, and 2 PCR positive tests, ...
# who had neither HAI or MN, and 2 PCR positive tests, ...
# who had HAI and MN, but did not meet PCR positivity, ...
# who had HAI or MN, but did not meet PCR positivity?

# First add the columns of interest to the df
# Note that here "pcr positive means meeting the case definition, meaning having two positive tests. 
SI_Inf_PCR_and_Sero_by_Q_table1 <- SI_Inf_PCR_and_Sero
SI_Inf_PCR_and_Sero_by_Q_table1$`Seroconversion by HAI or MN, plus PCR Positive` <- NA
SI_Inf_PCR_and_Sero_by_Q_table1$`Seroconversion by HAI and MN, plus PCR Positive` <- NA
SI_Inf_PCR_and_Sero_by_Q_table1$`Seroconversion by HAI or MN, but not PCR Positive` <- NA
SI_Inf_PCR_and_Sero_by_Q_table1$`Seroconversion by HAI and MN, but not PCR Positive` <- NA

SI_Inf_PCR_and_Sero_by_Q_table1 <- SI_Inf_PCR_and_Sero_by_Q_table1 %>%
  mutate(`Seroconversion by HAI or MN, plus PCR Positive` = 
           ifelse(`PCR Positive` == 1 & (`Seroconversion by HAI` == 1 | `Seroconversion by MN` == 1), 
                  1, `Seroconversion by HAI or MN, plus PCR Positive`)) %>%
  mutate(`Seroconversion by HAI and MN, plus PCR Positive` = 
           ifelse(`PCR Positive` == 1 & (`Seroconversion by HAI` == 1 & `Seroconversion by MN` == 1), 
                  1, `Seroconversion by HAI and MN, plus PCR Positive`)) %>%
  mutate(`Seroconversion by HAI or MN, but not PCR Positive` = 
           ifelse(`PCR Positive` == 0 & (`Seroconversion by HAI` == 1 | `Seroconversion by MN` == 1), 
                  1, `Seroconversion by HAI or MN, but not PCR Positive`)) %>%
  mutate(`Seroconversion by HAI and MN, but not PCR Positive` = 
           ifelse(`PCR Positive` == 0 & (`Seroconversion by HAI` == 1 & `Seroconversion by MN` == 1), 
                  1, `Seroconversion by HAI and MN, but not PCR Positive`)) %>%
  group_by(`Quarantine #`) %>%
  summarize(`Seroconversion by HAI or MN, plus PCR Positive` = sum(`Seroconversion by HAI or MN, plus PCR Positive`, na.rm = TRUE),
            `Seroconversion by HAI and MN, plus PCR Positive` = sum(`Seroconversion by HAI and MN, plus PCR Positive`, na.rm = TRUE),
            `Seroconversion by HAI or MN, but not PCR Positive` = sum(`Seroconversion by HAI or MN, but not PCR Positive`, na.rm = TRUE),
            `Seroconversion by HAI and MN, but not PCR Positive` = sum(`Seroconversion by HAI and MN, but not PCR Positive`, na.rm = TRUE)) %>%
  left_join(Qdata_inoculated_donors_table1, by = c("Quarantine #" = "QuarantineNumber")) %>%
  left_join(Qdata_infected_donors_table1, by = c("Quarantine #" = "QuarantineNumber")) 

# before moving further, deal with sums and means and rounding (don't take mean the percents because the denominators are different for each Q!)
SI_Inf_PCR_and_Sero_by_Q_table1[4,] = c(4, colSums(SI_Inf_PCR_and_Sero_by_Q_table1[,2:7]))
# Note that the first column was made to be 4 as a place holder, we will chance it to "total" when we are ready, but need numeric for now

# Now add the percents on and the calculation of the percents for the totals will work without Q size bias introduced by taking col mean of percentages
SI_Inf_PCR_and_Sero_by_Q_table1 <- SI_Inf_PCR_and_Sero_by_Q_table1 %>%
  mutate(`Percent Infected of Inoculated` = (Number_Infected_Donors/Number_Inoculated_Donors)*100,
         `Percent HAI or MN, and PCR of Infected` = (`Seroconversion by HAI or MN, plus PCR Positive`/Number_Infected_Donors)*100,
         `Percent HAI or MN, and PCR of Inoculated` = (`Seroconversion by HAI or MN, plus PCR Positive`/Number_Inoculated_Donors)*100,
         `Percent HAI and MN, and PCR of Infected` = (`Seroconversion by HAI and MN, plus PCR Positive`/Number_Infected_Donors)*100,
         `Percent HAI and MN, and PCR of Inoculated` = (`Seroconversion by HAI and MN, plus PCR Positive`/Number_Inoculated_Donors)*100,
         `Percent HAI or MN, no PCR of Infected` = (`Seroconversion by HAI or MN, but not PCR Positive`/Number_Infected_Donors)*100,
         `Percent HAI or MN, no PCR of Inoculated` = (`Seroconversion by HAI or MN, but not PCR Positive`/Number_Inoculated_Donors)*100,
         `Percent HAI and MN, no PCR of Infected` = (`Seroconversion by HAI and MN, but not PCR Positive`/Number_Infected_Donors)*100,
         `Percent HAI and MN, no PCR of Inoculated` = (`Seroconversion by HAI and MN, but not PCR Positive`/Number_Inoculated_Donors)*100) %>%
  mutate_at(8:16, round, 0)

# deal with adding () around the percents
SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI or MN, and PCR of Infected` <- paste0("(", SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI or MN, and PCR of Infected`, ")")
SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI or MN, and PCR of Inoculated` <- paste0("(", SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI or MN, and PCR of Inoculated`, ")")
SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI and MN, and PCR of Infected` <- paste0("(", SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI and MN, and PCR of Infected`, ")")
SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI and MN, and PCR of Inoculated` <- paste0("(", SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI and MN, and PCR of Inoculated`, ")")

SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI or MN, no PCR of Infected` <- paste0("(", SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI or MN, no PCR of Infected`, ")")
SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI or MN, no PCR of Inoculated` <- paste0("(", SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI or MN, no PCR of Inoculated`, ")")
SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI and MN, no PCR of Infected` <- paste0("(", SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI and MN, no PCR of Infected`, ")")
SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI and MN, no PCR of Inoculated` <- paste0("(", SI_Inf_PCR_and_Sero_by_Q_table1$`Percent HAI and MN, no PCR of Inoculated`, ")")

SI_Inf_PCR_and_Sero_by_Q_table1$`Percent Infected of Inoculated` <- paste0("(", SI_Inf_PCR_and_Sero_by_Q_table1$`Percent Infected of Inoculated`, ")")

# deal with uniting the right percents with the right columns
# will do first set of parentheses for "of infected" and the second set for "of inoculated"
SI_Inf_PCR_and_Sero_by_Q_table1_Inf_Inoc <- SI_Inf_PCR_and_Sero_by_Q_table1 %>%
  unite(`Infected/Inoculated`, 
        c(Number_Infected_Donors, Number_Inoculated_Donors),
        sep = c("/"), remove = TRUE) %>% 
  unite(`Infected/Inoculated (%)`,
        c(`Infected/Inoculated`, `Percent Infected of Inoculated`),
        sep = c(" "), remove = TRUE) %>%
  unite(`Seroconversion by HAI or MN, plus PCR Positive (% of Infected) (% of Inoculated)`, 
        c(`Seroconversion by HAI or MN, plus PCR Positive`, `Percent HAI or MN, and PCR of Infected`, `Percent HAI or MN, and PCR of Inoculated`), 
        sep = " ", remove = TRUE) %>%
  unite(`Seroconversion by HAI and MN, plus PCR Positive (% of Infected) (% of Inoculated)`, 
        c(`Seroconversion by HAI and MN, plus PCR Positive`, `Percent HAI and MN, and PCR of Infected`, `Percent HAI and MN, and PCR of Inoculated`), 
        sep = " ", remove = TRUE) %>%
  unite(`Seroconversion by HAI or MN, but not PCR Positive (% of Infected) (% of Inoculated)`, 
        c(`Seroconversion by HAI or MN, but not PCR Positive`, `Percent HAI or MN, no PCR of Infected`, `Percent HAI or MN, no PCR of Inoculated`), 
        sep = " ", remove = TRUE) %>%
  unite(`Seroconversion by HAI and MN, but not PCR Positive (% of Infected) (% of Inoculated)`, 
        c(`Seroconversion by HAI and MN, but not PCR Positive`, `Percent HAI and MN, no PCR of Infected`, `Percent HAI and MN, no PCR of Inoculated`), 
        sep = " ", remove = TRUE)

# deal with adding the "total" marker and getting the order of the columns right, and any other design (think about the multiple headers for markdown)
SI_Inf_PCR_and_Sero_by_Q_table1_Inf_Inoc$`Quarantine #`[4] <- "Total"
SI_Inf_PCR_and_Sero_by_Q_table1_Inf_Inoc <- SI_Inf_PCR_and_Sero_by_Q_table1_Inf_Inoc[,c(1,6,2:5)]

# finish and print to box sync and prepare markdown file to draw from here. 
# write_csv(SI_Inf_PCR_and_Sero_by_Q_table1_Inf_Inoc, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_Inf_PCR_1plus_days_and_Sero_by_Q_table1_Inf_Inoc.csv")

#### But what if we don't want to know about the inoculated donors and the percent of the inoculated? ####
# Creating new version of the "SI_Inf_PCR_and_Sero_by_Q_table1_manuscript" df that drops the percent inoculated.

# deal with uniting the right percents with the right columns
# will do first set of parentheses for "of infected" and the second set for "of inoculated"
SI_Inf_PCR_and_Sero_by_Q_table1_Inf <- SI_Inf_PCR_and_Sero_by_Q_table1 %>%
  unite(`Infected/Inoculated`, 
        c(Number_Infected_Donors, Number_Inoculated_Donors),
        sep = c("/"), remove = TRUE) %>% 
  unite(`Infected/Inoculated (%)`,
        c(`Infected/Inoculated`, `Percent Infected of Inoculated`),
        sep = c(" "), remove = TRUE) %>%
  unite(`Seroconversion by HAI or MN, plus PCR Positive (% of Infected)`, 
        c(`Seroconversion by HAI or MN, plus PCR Positive`, `Percent HAI or MN, and PCR of Infected`), 
        sep = " ", remove = TRUE) %>%
  unite(`Seroconversion by HAI and MN, plus PCR Positive (% of Infected)`, 
        c(`Seroconversion by HAI and MN, plus PCR Positive`, `Percent HAI and MN, and PCR of Infected`), 
        sep = " ", remove = TRUE) %>%
  unite(`Seroconversion by HAI or MN, but not PCR Positive (% of Infected)`, 
        c(`Seroconversion by HAI or MN, but not PCR Positive`, `Percent HAI or MN, no PCR of Infected`), 
        sep = " ", remove = TRUE) %>%
  unite(`Seroconversion by HAI and MN, but not PCR Positive (% of Infected)`, 
        c(`Seroconversion by HAI and MN, but not PCR Positive`, `Percent HAI and MN, no PCR of Infected`), 
        sep = " ", remove = TRUE)

# deal with adding the "total" marker and getting the order of the columns right, and any other design (think about the multiple headers for markdown)
SI_Inf_PCR_and_Sero_by_Q_table1_Inf$`Quarantine #`[4] <- "Total"
SI_Inf_PCR_and_Sero_by_Q_table1_Inf <- SI_Inf_PCR_and_Sero_by_Q_table1_Inf[,c(1,6,2:5)]

# finish and print to box sync and prepare markdown file to draw from here. 
# write_csv(SI_Inf_PCR_and_Sero_by_Q_table1_Inf, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_Inf_PCR_1plus_days_and_Sero_by_Q_table1_Inf.csv")

#### Table 1 (donors): h) Number of seroconversion by HAI: MN: Either ####

# This was already done to get the number of infected donors for the first few columns in this Table 1
# Reworking here to tailor the current Table 1 columns in question

## HAI

# Qdata_HAI_pos is the list (generated in section a) above) with seroconversion by HAI (Glasgow serology)
# Group by Q day and summarize number of distinct SubjectIDs
Qdata_HAI_pos_table1 <- Qdata_HAI_pos %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_HAI_Positive = n_distinct(SubjectID))

# Add HAI_pos column to the table (and % HAI_pos of infected)
# For now will comment out the part that creates the percentage for this column
Qdata_table1 <- Qdata_table1 %>%
  left_join(Qdata_HAI_pos_table1, by = c("QuarantineNumber" = "QuarantineNumber")) #%>%
#mutate(Fraction_HAI_Positive_of_Infected = Number_HAI_Positive/Number_Infected_Donors)

## Microneuts

# Qdata_Microneut_pos is the list (generated in section a) above) with seroconversion by Microneuts (CDC serology)
# Group by Q day and summarize number of distinct SubjectIDs
Qdata_Microneut_pos_table1 <- Qdata_Microneut_pos %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Microneut_Positive = n_distinct(SubjectID))

# Add Microneut_pos column to the table (and % Microneut_pos of infected)
# For now will comment out the part that creates the percentage for this column
Qdata_table1 <- Qdata_table1 %>%
  left_join(Qdata_Microneut_pos_table1, by = c("QuarantineNumber" = "QuarantineNumber")) #%>%
#mutate(Fraction_MN_Positive_of_Infected = Number_Microneut_Positive/Number_Infected_Donors)

## Either HAI or MN

# Already have this generated in the Qdata_infected_donors df
Pos_Either_HAI_or_MN_table1 <- Qdata_infected_donors %>%
  filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y)) %>%
  group_by(QuarantineNumber) %>%
  summarize(Positive_By_Either_HAI_or_MN = n_distinct(SubjectID))

# Add Pos_Either_HAI_or_MN_table1 to the cumulative Qdata_table1
# For now will comment out the part that creates the percentage for this column
Qdata_table1 <- Qdata_table1 %>%
  left_join(Pos_Either_HAI_or_MN_table1, by = c("QuarantineNumber" = "QuarantineNumber")) #%>%
#mutate(Fraction_Positive_By_Either_HAI_or_MN = Positive_By_Either_HAI_or_MN/Number_Infected_Donors)

#### Table 1 (donors) footnote: i) Number of those with greater immunity that expected prior to quarantine by HAI: MN: Both ####

# Definition of serosusceptible for this analysis, which will be included in the footnote of table 1 is from Alex Mann from email correspondence on September 28, 2018. He states:
# "An HI titre of ≤10 and/or an MN titre of <80 at baseline was retrospectively taken to indicate susceptibility to infection"
# Thus we will use this criteria to tell who among the inoculated donors was serosusceptible at baseline (entry to quarantine)
# We won't filter these individuals, but we will note who among those who above the MN of 80 (>=80) and HAI of 10 (>10) thresholds seroconverted, since the likelihood of seroconversion among those above the thresholds is lower.
# Based on teleconference with team on October 12, 2018 we will use the term greater than anticipated immunity upon admission to Q

HI_susceptibility_table1_footnote <- Qdata %>%
  filter(Randomization_DorIRorCR == "D" & HAI_dayminus2_recodeNDA >10) %>%
  group_by(QuarantineNumber) %>%
  summarize(HI_greater_anticip_immunity_at_baseline = n_distinct(SubjectID))

MN_susceptibility_table1_footnote <- Qdata %>%
  filter(Microneut_VisitType == "Screening") %>%
  filter(Randomization_DorIRorCR == "D" & Microneutralization.Titer.to.A.Wisconsin.67.2005 >=80) %>%
  group_by(QuarantineNumber) %>%
  summarize(MN_greater_anticip_immunity_at_baseline = n_distinct(SubjectID))

MN_seroconvert_between_screening_baseline_table1 <- Qdata %>%
  filter(Randomization_DorIRorCR == "D" & Microneut_VisitType == "Q baseline") %>%
  filter(Microneut_Seroconvert == 1) %>%
  group_by(QuarantineNumber)%>%
  summarize(MN_seroconvert_between_screening_baseline = n_distinct(SubjectID))

table1_footnote <- left_join(HI_susceptibility_table1_footnote, 
                             MN_susceptibility_table1_footnote, 
                             by = c("QuarantineNumber"= "QuarantineNumber")) %>%
  left_join(MN_seroconvert_between_screening_baseline_table1, 
            by = c("QuarantineNumber"= "QuarantineNumber"))

# Looking into more detail on who exactly might have greater than anticip. immunity or may have seroconverted before admission to Q
# Need to check over the below to ensure that it matches the proper criteria for seroconversion, serosusceptible, serosuitable, and seropositive, etc. 
# Alex Mann has some good comments about this. 

# Which SubjectID's were these with low serosusceptible prior to Q (by HAI, retrospectively)?

# Old version commented out below followed by corrected version
#Qdata_HAIprior_SubjectIDs <- Qdata %>%
#filter(Randomization_DorIRorCR == "D" & HAI_dayminus2_recodeNDA > 10 & HAI_Seroconversion != 1) %>%
#group_by(QuarantineNumber) %>%
#distinct(SubjectID, .keep_all = TRUE) %>%
#select(QuarantineNumber, SubjectID, Randomization_DorIRorCR, HAI_dayminus2, HAI_day28, HAI_dayminus2_recodeNDA, 
#HAI_day28_recodeNDA, HAI_dayminus2_recodeNDA_x4, HAI_Seroconversion, Preliminary.HAI.Classification)

HI_susceptibility_table1_footnote_SubIDs <- Qdata %>%
  filter(Randomization_DorIRorCR == "D" & HAI_dayminus2_recodeNDA >10) %>%
  group_by(QuarantineNumber) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(QuarantineNumber, SubjectID, Randomization_DorIRorCR, HAI_dayminus2, HAI_day28, HAI_dayminus2_recodeNDA, 
         HAI_day28_recodeNDA, HAI_dayminus2_recodeNDA_x4, HAI_Seroconversion, Preliminary.HAI.Classification) %>%
  ungroup()

# Which SubjectID's were these with greater than antic immunity prior to Q (by MN, retrospectively)?

# 2 old versions commented out below followed by corrected version

#Qdata_MNprior <- Qdata %>%
# filter(Randomization_DorIRorCR == "D" & Microneut_VisitType == "Q baseline" & Microneut_Seroconvert == 1) 
#Qdata_MNprior_table1footnote <- Qdata_MNprior %>%
# group_by(QuarantineNumber) %>%
# summarize(NumberSeroconByMNprior = n_distinct(SubjectID))

# Qdata_MNprior_SubjectIDs <- Qdata %>%
# filter(Randomization_DorIRorCR == "D" & Microneut_VisitType == "Q baseline" & Microneut_Seroconvert == 1 & Microneutralization.Titer.to.A.Wisconsin.67.2005 >80) %>%
# group_by(QuarantineNumber) %>%
# distinct(SubjectID, .keep_all = TRUE) %>%
# select(QuarantineNumber, SubjectID, Randomization_DorIRorCR, Microneut_DrawDate, Microneut_VisitType, 
#     Microneutralization.Titer.to.A.Wisconsin.67.2005, Microneut_Seroconvert)

MN_low_susceptibility_table1_footnote_SubIDs <- Qdata %>%
  filter(Randomization_DorIRorCR == "D") %>%
  filter(Microneut_VisitType == "Screening") %>%
  filter(Microneutralization.Titer.to.A.Wisconsin.67.2005 >=80) %>%
  group_by(QuarantineNumber) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(QuarantineNumber, SubjectID, Randomization_DorIRorCR, Microneut_DrawDate, Microneut_VisitType, 
         Microneutralization.Titer.to.A.Wisconsin.67.2005, Microneut_Seroconvert) %>%
  rename(Microneut_Seroconvert_Screening = Microneut_Seroconvert) %>%
  ungroup()

# But let's see who among these with higher than antic. immunity at baseline (admission to Q) seroconverted by MN
MN_susceptibility_table1_footnote_SubIDs <- Qdata %>%
  filter(Randomization_DorIRorCR == "D") %>%
  filter(Microneut_VisitType == "F/up") %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, Microneut_Seroconvert)  %>%
  rename(Microneut_Seroconvert_Followup = Microneut_Seroconvert) %>%
  right_join(MN_low_susceptibility_table1_footnote_SubIDs)
## Joining, by = "SubjectID"
# Bind the greater than antic immunity by HAI and by MN together in a single table (table1 footnote)
Qdata_table1_footnote_subjectIDs <- HI_susceptibility_table1_footnote_SubIDs %>%
  full_join(MN_susceptibility_table1_footnote_SubIDs)
## Joining, by = c("QuarantineNumber", "SubjectID", "Randomization_DorIRorCR")
Qdata_table1_footnote_subjectIDs$HAIandMNprior <- NA
Qdata_table1_footnote_subjectIDs$HAIprior <- NA
Qdata_table1_footnote_subjectIDs$MNprior <- NA
Qdata_table1_footnote_subjectIDs$LowSuscepHAI_converted_anyway <- NA
Qdata_table1_footnote_subjectIDs$LowSuscepMN_converted_anyway <- NA
Qdata_table1_footnote_subjectIDs$EitherHAIorMNprior <- NA

Table1_footnote_sero_SubjectID <- Qdata_table1_footnote_subjectIDs %>%
  mutate(HAIandMNprior = ifelse(!is.na(HAI_dayminus2) & !is.na(Microneut_Seroconvert_Followup), 1, HAIandMNprior)) %>%
  mutate(HAIprior = ifelse(!is.na(HAI_dayminus2), 1, HAIprior)) %>%
  mutate(MNprior = ifelse(!is.na(Microneut_Seroconvert_Followup), 1, MNprior)) %>%
  mutate(EitherHAIorMNprior = ifelse(!is.na(Microneut_Seroconvert_Followup) | !is.na(HAI_dayminus2), 1, EitherHAIorMNprior)) %>%
  mutate(LowSuscepHAI_converted_anyway = ifelse(HAI_Seroconversion == 1 , 1, LowSuscepHAI_converted_anyway)) %>%
  mutate(LowSuscepMN_converted_anyway = ifelse(Microneut_Seroconvert_Followup == 1, 1, LowSuscepMN_converted_anyway)) %>%
  select(QuarantineNumber, SubjectID, HAIprior, MNprior, HAIandMNprior, EitherHAIorMNprior,
         LowSuscepHAI_converted_anyway, LowSuscepMN_converted_anyway) %>%
  arrange(QuarantineNumber, SubjectID)

# We can see that it was always the case that if someone had greater than antic immunity at entry to Q and seroconverted, they seroconverted by the detection method (HAI or MN) by which they had greater than antic immunity at entry to Q
# We can make a note of this and then also consolidate the LowSuscepHAI_converted_anyway and LowSuscepMN_converted_anyway to a single column
Table1_footnote_sero_SubjectID$Seroconverted_anyway <- NA

Table1_footnote_sero_anyway_SubjectID <- Table1_footnote_sero_SubjectID %>%
  mutate(Seroconverted_anyway = ifelse(LowSuscepHAI_converted_anyway == 1 | LowSuscepMN_converted_anyway, 
                                       1, Seroconverted_anyway)) %>%
  select(QuarantineNumber, SubjectID, HAIprior, MNprior, HAIandMNprior, EitherHAIorMNprior, Seroconverted_anyway)

# But the text in the manuscript is also curious about PCR evidence of infection among those with greater than antic. immunity
# So we will add the PCR data onto this Table1_footnote_SubjectID_summary

Table1_footnote_PCR_Pos_SubjectIDs <- Table1_footnote_sero_anyway_SubjectID %>%
  left_join(Qdata) %>%
  select(QuarantineNumber, SubjectID, HAIprior, MNprior, HAIandMNprior, EitherHAIorMNprior, Seroconverted_anyway,
         StudyDay, InfA_Ct) %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6) %>%
  distinct(SubjectID, StudyDay, .keep_all = TRUE) %>%
  filter(InfA_Ct > 0 & InfA_Ct < 38) %>%
  group_by(SubjectID) %>%
  summarize(Number_StudyDays_PCR_Pos = n_distinct(StudyDay)) %>%
  ungroup()
## Joining, by = c("QuarantineNumber", "SubjectID")
Table1_footnote_SubjectID_sero_PCRpos_days <- Table1_footnote_sero_anyway_SubjectID %>%
  left_join(Table1_footnote_PCR_Pos_SubjectIDs) 
## Joining, by = "SubjectID"
Table1_footnote_SubjectID_sero_PCRpos_days$PCR_positive <- NA
Table1_footnote_SubjectID_sero_PCRpos_days$PCR_positive_and_seroconverted <- NA

Table1_footnote_SubjectID_summary <- Table1_footnote_SubjectID_sero_PCRpos_days %>%
  mutate(PCR_positive = ifelse(Number_StudyDays_PCR_Pos >= 1, 1, PCR_positive)) %>%
  mutate(PCR_positive_and_seroconverted = ifelse(PCR_positive == 1 & Seroconverted_anyway ==1, 1, PCR_positive_and_seroconverted))
Table1_footnote_SubjectID_summary[is.na(Table1_footnote_SubjectID_summary)] <- 0
# This can be written out as part of SI but we will add some steps to clean up below

Table1_footnote_summary <- Table1_footnote_SubjectID_summary %>%
  group_by(QuarantineNumber) %>%
  summarize(HAIprior = sum(HAIprior, na.rm = T),
            MNprior = sum(MNprior, na.rm = T),
            HAIandMNprior = sum(HAIandMNprior, na.rm = T),
            EitherHAIorMNprior = sum(EitherHAIorMNprior, na.rm = T),
            Seroconverted_anyway = sum(Seroconverted_anyway, na.rm = T),
            PCR_positive = sum(PCR_positive, na.rm = T),
            PCR_positive_and_seroconverted = sum(PCR_positive_and_seroconverted, na.rm = T)) %>%
  ungroup()
# This can be written out as part of SI but we will add some steps to clean up below

# Before writing out the Table1_footnote_SubjectID_summary df, want to make the column names better
Table1_footnote_SubjectID_summary <- Table1_footnote_SubjectID_summary %>%
  select(-PCR_positive_and_seroconverted, -EitherHAIorMNprior) %>%
  rename('Quarantine #' = QuarantineNumber,
         'Subject ID' = SubjectID,
         'Greater than Anticipated HAI' = HAIprior,
         'Greater than Anticipated MN' = MNprior,
         'Greater than Anticipated HAI and MN' = HAIandMNprior,
         'Seroconverted' = Seroconverted_anyway,
         'Days qPCR Positive' = Number_StudyDays_PCR_Pos,
         'Positive by qPCR' = PCR_positive)

# Before writing out the Table1_footnote_summary df, want to add row with totals, and make the column names better

Table1_footnote_summary_sums <- colSums(Table1_footnote_summary)
Table1_footnote_summary <- rbind(Table1_footnote_summary, Table1_footnote_summary_sums)
# Change the 4th row of the Quarantine # column to "Total"
Table1_footnote_summary$QuarantineNumber[4] <- "Total"

# Now working on the column names for Table1_footnote_summary
Table1_footnote_summary <- Table1_footnote_summary %>%
  rename('Quarantine #' = QuarantineNumber,
         'Greater than Anticipated HAI' = HAIprior,
         'Greater than Anticipated MN' = MNprior,
         'Greater than Anticipated HAI and MN' = HAIandMNprior,
         'Greater than Anticipated HAI or MN' = EitherHAIorMNprior,
         'Seroconverted' = Seroconverted_anyway,
         'Positive by qPCR' = PCR_positive,
         'Seroconverted and Positive by qPCR' = PCR_positive_and_seroconverted)

#### Writing out Table 1 Footnote to box sync directory ####

write.csv(Qdata_table1_footnote_subjectIDs, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table1_Footnote_Full_Data.csv")
write.csv(table1_footnote, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table1_Footnote_Summary.csv")
write.csv(Table1_footnote_SubjectID_summary, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table1_Footnote_Reportable_Summary_SubjectIDs.csv")
write.csv(Table1_footnote_summary, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table1_Footnote_Reportable_Summary.csv")

#### Column means, sums, and rounding for Table 1 ####
table1_manuscript <- Qdata_table1 # use this as a backup because it is arduous to recreate Qdata_table1
table1_manuscript[is.na(table1_manuscript)] <- 0

table1_manuscript_sums <-  table1_manuscript %>%
  summarise_all(funs(sum))
table1_manuscript_sums <- table1_manuscript %>%
  full_join(table1_manuscript_sums)
## Joining, by = c("QuarantineNumber", "Number_Infected_Donors", "Number_Inoculated_Donors", "Fraction_Infected_of_Inoculated", "Number_Symptomatic_V3", "Fraction_Symptomatic_V3_of_Infected", "Number_ILI_V3", "Fraction_ILI_V3_of_Infected", "Number_Febrile_Infected", "Fraction_Febrile_Infected_of_Total_Infected", "Number_PCR_Infected_Donors", "Fraction_PCR_Infected_Donors_of_Infected", "Number_Positive_PCR_and_Seroconversion", "Fraction_Infected_by_PCR_and_Serology", "Number_HAI_Positive", "Number_Microneut_Positive", "Positive_By_Either_HAI_or_MN")
# This gets all of the columns to be summed
# Now let's work on changing the sums in all of the "percent" columns to the proper fraction 
# (also perform the *100 transformation to percent)
# (also round everything to the nearest whole number)
table1_manuscript_sums_fractions <- table1_manuscript_sums %>%
  mutate(Fraction_Infected_of_Inoculated = (Number_Infected_Donors/Number_Inoculated_Donors)*100,
         Fraction_Symptomatic_V3_of_Infected = (Number_Symptomatic_V3/Number_Infected_Donors)*100,
         Fraction_ILI_V3_of_Infected = (Number_ILI_V3/Number_Infected_Donors)*100,
         Fraction_Febrile_Infected_of_Total_Infected = (Number_Febrile_Infected/Number_Infected_Donors)*100,
         Fraction_PCR_Infected_Donors_of_Infected = (Number_PCR_Infected_Donors/Number_Infected_Donors)*100,
         Fraction_Infected_by_PCR_and_Serology = (Number_Positive_PCR_and_Seroconversion/Number_Infected_Donors)*100) %>%
  mutate_all(funs(round(., 0)))

# Now add parentheses to all of these fraction variables because they are presented in manuscript table as percents
table1_manuscript_sums_fractions$Fraction_Infected_of_Inoculated <- paste0("(", table1_manuscript_sums_fractions$Fraction_Infected_of_Inoculated, ")")
table1_manuscript_sums_fractions$Fraction_Symptomatic_V3_of_Infected <- paste0("(", table1_manuscript_sums_fractions$Fraction_Symptomatic_V3_of_Infected, ")")
table1_manuscript_sums_fractions$Fraction_ILI_V3_of_Infected <- paste0("(", table1_manuscript_sums_fractions$Fraction_ILI_V3_of_Infected, ")")
table1_manuscript_sums_fractions$Fraction_Febrile_Infected_of_Total_Infected <- paste0("(", table1_manuscript_sums_fractions$Fraction_Febrile_Infected_of_Total_Infected, ")")
table1_manuscript_sums_fractions$Fraction_PCR_Infected_Donors_of_Infected <- paste0("(", table1_manuscript_sums_fractions$Fraction_PCR_Infected_Donors_of_Infected, ")")
table1_manuscript_sums_fractions$Fraction_Infected_by_PCR_and_Serology <- paste0("(", table1_manuscript_sums_fractions$Fraction_Infected_by_PCR_and_Serology, ")")

# Now bring columns together into more publishable arrangement of data in the display of the table
# For example, when we have Infected/Inoculated column, we want to take the data from the Infected column and the data from the Inoculated column, and merge them into a single column, separated by a "/"
table1_manuscript_unite <- table1_manuscript_sums_fractions %>%
  unite(`Infected/Inoculated`, Number_Infected_Donors, Number_Inoculated_Donors, sep = "/", remove = TRUE) %>%
  unite(`Infected/Inoculated (%)`, `Infected/Inoculated`, Fraction_Infected_of_Inoculated, sep = " ", remove = TRUE) %>%
  unite(Symptomatic, Number_Symptomatic_V3, Fraction_Symptomatic_V3_of_Infected, sep = " ", remove = TRUE) %>%
  unite(ILI, Number_ILI_V3, Fraction_ILI_V3_of_Infected, sep = " ", remove = TRUE) %>%
  unite(Febrile, Number_Febrile_Infected, Fraction_Febrile_Infected_of_Total_Infected, sep = " ", remove = TRUE) %>%
  unite(`PCR Confirmed Infection`, Number_PCR_Infected_Donors, Fraction_PCR_Infected_Donors_of_Infected, sep = " ", remove = TRUE) %>%
  unite(`PCR Confirmed Infection and Seroconversion`, Number_Positive_PCR_and_Seroconversion, Fraction_Infected_by_PCR_and_Serology, sep = " ", remove = TRUE) %>%
  unite(`Seroconversion by HAI : MN : Either`, Number_HAI_Positive, Number_Microneut_Positive, Positive_By_Either_HAI_or_MN, sep = " : ", remove = TRUE) %>%
  rename('Quarantine #' = QuarantineNumber)
table1_manuscript_unite <- table1_manuscript_unite[,c(1:3,5,4,6:8)]

# Change the 4th row of the Quarantine # column to "Total"
table1_manuscript_unite$`Quarantine #`[4] <- "Total"

#### Writing out Table 1 to box sync directory ####

write.csv(table1_manuscript_unite, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table1_Manuscript.csv")

#### Writing out Table 1 to latex for direct translation of code to table image for paper

kable(table1_manuscript_unite) %>%
  kable_styling("striped") %>%
  add_header_above(c(" " = 2, "Laboratory Confirmed Infection & Illness (% of Infected)" = 3, 
                     "Laboratory Confirmed Infection Criteria (% of Infected)" = 3))
Laboratory Confirmed Infection & Illness (% of Infected)
Laboratory Confirmed Infection Criteria (% of Infected)
Quarantine # Infected/Inoculated (%) Symptomatic Febrile ILI PCR Confirmed Infection PCR Confirmed Infection and Seroconversion Seroconversion by HAI : MN : Either
1 15/20 (75) 11 (73) 4 (27) 8 (53) 13 (87) 12 (80) 12 : 14 : 14
2 11/12 (92) 7 (64) 0 (0) 5 (45) 11 (100) 9 (82) 9 : 7 : 9
3 18/20 (90) 16 (89) 2 (11) 14 (78) 17 (94) 13 (72) 14 : 11 : 14
Total 44/52 (85) 34 (77) 6 (14) 27 (61) 41 (93) 34 (77) 35 : 32 : 37
datatable(table1_manuscript_unite,
          filter = 'top', options = list(
            pageLength = 10, autoWidth = TRUE
          ))
print(xtable(table1_manuscript_unite),
      comment = FALSE)
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllllllll}
##   \hline
##  & Quarantine \# & Infected/Inoculated (\%) & Symptomatic & Febrile & ILI & PCR Confirmed Infection & PCR Confirmed Infection and Seroconversion & Seroconversion by HAI : MN : Either \\ 
##   \hline
## 1 & 1 & 15/20 (75) & 11 (73) & 4 (27) & 8 (53) & 13 (87) & 12 (80) & 12 : 14 : 14 \\ 
##   2 & 2 & 11/12 (92) & 7 (64) & 0 (0) & 5 (45) & 11 (100) & 9 (82) & 9 : 7 : 9 \\ 
##   3 & 3 & 18/20 (90) & 16 (89) & 2 (11) & 14 (78) & 17 (94) & 13 (72) & 14 : 11 : 14 \\ 
##   4 & Total & 44/52 (85) & 34 (77) & 6 (14) & 27 (61) & 41 (93) & 34 (77) & 35 : 32 : 37 \\ 
##    \hline
## \end{tabular}
## \end{table}
#### * TABLE 3 ---------------------------####
#### Overview of Table 3 in the paper ####
# Table 3 is "Recipient status". It gives for each of the 3 quarantines and for IR and CR:
# a) Number of infected/ number of exposed (and %)
# b) Number of symptomatic (and % of exposed)
# c) Number of symptomatic, non-ILI (and % of exposed)
# d) Number of ILI (and % of exposed)
# e) Number of febrile (and % of exposed)
# f) Number of PCR confirmed infection (and % of exposed)
# g) Number of PCR confirmed infection and seroconversion (and % of exposed)
# h) Number of seroconversion by HAI: MN: Either (and % of exposed)

#### Table 3: a1) IR: Number of infected/ number of exposed (and %) ####

# number of exposed IR

Exposed_IR <- Qdata %>% 
  filter(Randomization_DorIRorCR == "IR") %>%
  distinct(SubjectID, .keep_all = TRUE)
Exposed_IR_table3 <- Exposed_IR %>%
  group_by(QuarantineNumber) %>%
  summarize(NumberExposedIR = n_distinct(SubjectID))
print(Exposed_IR_table3)
## # A tibble: 3 x 2
##   QuarantineNumber NumberExposedIR
##              <int>           <int>
## 1                1              10
## 2                2              10
## 3                3              20
# number of infected IR

# positive by PCR (seroconversion, or PCR positive on more than 1 day)
# let's get the list with at least one day PCR positive, then merge up with seroconversion data
Qdata_pcr_pos1_or_more_days_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct<38 & InfA_Ct!=0)) %>%
  group_by(SubjectID, StudyDay) %>%
  summarize(count = n()) %>%
  summarize(NumberDaysPosPCR_IR = n_distinct(StudyDay))
print(Qdata_pcr_pos1_or_more_days_IR)
## # A tibble: 0 x 2
## # ... with 2 variables: SubjectID <int>, NumberDaysPosPCR_IR <int>
# let's get the list with seroconversion by Microneuts (CDC serology)

# First only select the subjectIDs that were serosusceptible by MN at baseline (<80 at baseline)
# Upon the October 12, 2018 conference call with the team, decided to not exclude based on this criteria
#Qdata_Microneut_susceptible <- Qdata %>%
#filter(Randomization_DorIRorCR == "IR" & Microneut_VisitType == "Q baseline" & Microneutralization.Titer.to.A.Wisconsin.67.2005 < 80) %>%
#distinct(SubjectID, .keep_all = FALSE)

Qdata_Microneut_pos_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR" & Microneut_VisitType == "F/up" & 
           Microneut_Seroconvert == 1) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber) 
print(Qdata_Microneut_pos_IR)
## [1] SubjectID        QuarantineNumber
## <0 rows> (or 0-length row.names)
# let's get the list with seroconversion by HAI (Glasgow serology)

# First only select the subjectIDs that were serosusceptible by HAI at baseline (<=10 at baseline)
# Following the October 12, 2018 conference call with the team, decided to not exclude based on this criteria
#Qdata_HAI_susceptible <- Qdata %>%
#filter(Randomization_DorIRorCR == "IR" & HAI_dayminus2_recodeNDA <= 10) %>%
#distinct(SubjectID, .keep_all = FALSE)

Qdata_HAI_pos_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR" & HAI_Seroconversion == 1) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber)
print(Qdata_HAI_pos_IR)
## [1] SubjectID        QuarantineNumber
## <0 rows> (or 0-length row.names)
# Now let's merge the datasets together to get full list of volunteers who meet positivity criteria
Qdata_infected_IR <- Qdata_HAI_pos_IR %>%
  full_join(Qdata_Microneut_pos_IR, by = c("SubjectID" = "SubjectID")) %>%
  full_join(Qdata_pcr_pos1_or_more_days_IR, by = c("SubjectID" = "SubjectID")) %>%
  arrange(SubjectID)
print(Qdata_infected_IR)
## [1] SubjectID           QuarantineNumber.x  QuarantineNumber.y 
## [4] NumberDaysPosPCR_IR
## <0 rows> (or 0-length row.names)
# Identify whom among the pcr positive individuals with only a single day of PCR positivity did not also seroconvert to confirm infection
# Qdata_1pcrpos_nosero_IR <- Qdata_infected_IR %>%
#  filter(NumberDaysPosPCR_IR == 1) %>%
#  filter(is.na(QuarantineNumber.x) & is.na(QuarantineNumber.y))

# Among the individuals that were positive on only a single day, which study day was the positive day?
Qdata_pcr_pos1_or_more_days_studydays_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct < 38 & InfA_Ct != 0)) %>%
  group_by(SubjectID, StudyDay) %>%
  summarize(count = n())
Qdata_pcr_single_day_infected_IR <- Qdata_infected_IR %>%
  filter(NumberDaysPosPCR_IR ==1) %>%
  left_join(Qdata_pcr_pos1_or_more_days_studydays_IR, by = c("SubjectID" = "SubjectID")) %>%
  select(-`count`)

# Remove those who were only 1 day pcr positive and no seroconversion (protocol criteria for positivity)
# Qdata_infected_IR <- Qdata_infected_IR %>% 
#  anti_join(Qdata_1pcrpos_nosero_IR, by = c("SubjectID" = "SubjectID"))

# Summarize number of infected (by any criteria) for each Q
# First need to attach a full set of quarantine numbers on the "Qdata_infected" df
Qdata_QuarantineNumbers <- Qdata %>%
  select(SubjectID, QuarantineNumber) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  filter(!is.na(SubjectID))
Qdata_infected_IR <- Qdata_infected_IR %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID"))
Qdata_infected_IR_table3 <- Qdata_infected_IR %>%
  group_by(QuarantineNumber) %>%
  summarize(NumberInfectedIR = n_distinct(SubjectID))
print(Qdata_infected_IR_table3)
## # A tibble: 0 x 2
## # ... with 2 variables: QuarantineNumber <int>, NumberInfectedIR <int>
#### Generation of Table3_IR for paper ####

# To output a nice summary table with numInfected, numInoculated, and %infected of inoculated
Qdata_table3_IR <- Exposed_IR_table3  %>%
  left_join(Qdata_infected_IR_table3) %>%
  mutate(Fraction_Inf_over_ExpIR = NumberInfectedIR/NumberExposedIR)
## Joining, by = "QuarantineNumber"
print(Qdata_table3_IR)
## # A tibble: 3 x 4
##   QuarantineNumber NumberExposedIR NumberInfectedIR Fraction_Inf_over_ExpIR
##              <int>           <int>            <int>                   <dbl>
## 1                1              10               NA                      NA
## 2                2              10               NA                      NA
## 3                3              20               NA                      NA
#### Table 3: a2) CR: Number of infected/ number of exposed (and %) ####

# number of exposed CR

Exposed_CR <- Qdata %>% 
  filter(Randomization_DorIRorCR == "CR") %>%
  distinct(SubjectID, .keep_all = TRUE)
Exposed_CR_table3 <- Exposed_CR %>%
  group_by(QuarantineNumber) %>%
  summarize(NumberExposedCR = n_distinct(SubjectID))
print(Exposed_CR_table3)
## # A tibble: 3 x 2
##   QuarantineNumber NumberExposedCR
##              <int>           <int>
## 1                1              11
## 2                2               9
## 3                3              15
# number of infected CR

# positive by PCR (seroconversion, or PCR positive on more than 1 day)
# let's get the list with at least one day PCR positive, then merge up with seroconversion data
Qdata_pcr_pos1_or_more_days_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct<38 & InfA_Ct!=0)) %>%
  group_by(SubjectID, StudyDay) %>%
  summarize(count = n()) %>%
  summarize(NumberDaysPosPCR_CR = n_distinct(StudyDay))
print(Qdata_pcr_pos1_or_more_days_CR)
## # A tibble: 2 x 2
##   SubjectID NumberDaysPosPCR_CR
##       <int>               <int>
## 1       236                   1
## 2       242                   1
# let's get the list with seroconversion by Microneuts (CDC serology)

# First only select the subjectIDs that were serosusceptible by MN at baseline (<80 at baseline)
# Upon the October 12, 2018 conference call with the team, decided to not exclude based on this criteria
#Qdata_Microneut_susceptible <- Qdata %>%
#filter(Randomization_DorIRorCR == "CR" & Microneut_VisitType == "Q baseline" & Microneutralization.Titer.to.A.Wisconsin.67.2005 < 80) %>%
#distinct(SubjectID, .keep_all = FALSE)

Qdata_Microneut_pos_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR" & Microneut_VisitType == "F/up" & Microneut_Seroconvert == 1) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber)
print(Qdata_Microneut_pos_CR)
##   SubjectID QuarantineNumber
## 1       233                2
# let's get the list with seroconversion by HAI (Glasgow serology)

# First only select the subjectIDs that were serosusceptible by HAI at baseline (<=10 at baseline)
# Upon the October 12, 2018 conference call with the team, decided to not exclude based on this criteria
#Qdata_HAI_susceptible <- Qdata %>%
#filter(Randomization_DorIRorCR == "CR" & HAI_dayminus2_recodeNDA <= 10) %>%
#distinct(SubjectID, .keep_all = FALSE)

Qdata_HAI_pos_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR" & HAI_Seroconversion == 1) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber)
print(Qdata_HAI_pos_CR)
##   SubjectID QuarantineNumber
## 1       233                2
# Now let's merge the datasets together to get full list of volunteers who meet positivity criteria
Qdata_infected_CR <- Qdata_HAI_pos_CR %>%
  full_join(Qdata_Microneut_pos_CR, by = c("SubjectID" = "SubjectID")) %>%
  full_join(Qdata_pcr_pos1_or_more_days_CR, by = c("SubjectID" = "SubjectID")) %>%
  arrange(SubjectID)
print(Qdata_infected_CR)
##   SubjectID QuarantineNumber.x QuarantineNumber.y NumberDaysPosPCR_CR
## 1       233                  2                  2                  NA
## 2       236                 NA                 NA                   1
## 3       242                 NA                 NA                   1
# Identify whom among the pcr positive individuals with only a single day of PCR positivity did not also seroconvert to confirm infection
# Qdata_1pcrpos_nosero_CR <- Qdata_infected_CR %>%
#  filter(NumberDaysPosPCR_CR ==1) %>%
#  filter(is.na(QuarantineNumber.x) & is.na(QuarantineNumber.y))

# Among the individuals that were positive on only a single day, which study day was the positive day?
Qdata_pcr_pos1_or_more_days_studydays_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct < 38 & InfA_Ct != 0)) %>%
  group_by(SubjectID, StudyDay) %>%
  summarize(count = n())
Qdata_PCR_single_day_infected_CR <- Qdata_infected_CR %>%
  filter(NumberDaysPosPCR_CR ==1) %>%
  left_join(Qdata_pcr_pos1_or_more_days_studydays_CR, by = c("SubjectID" = "SubjectID")) %>%
  select(-`count`)

# Remove those who were only 1 day pcr positive and no seroconversion (protocol criteria for positivity)
# Qdata_infected_CR <- Qdata_infected_CR %>% 
#  anti_join(Qdata_1pcrpos_nosero_CR, by = c("SubjectID" = "SubjectID"))

# Summarize number of infected (by any criteria) for each Q
# First need to attach a full set of quarantine numbers on the "Qdata_infected_CR" df
Qdata_QuarantineNumbers <- Qdata %>%
  select(SubjectID, QuarantineNumber) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  filter(!is.na(SubjectID))
Qdata_infected_CR <- Qdata_infected_CR %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID"))
Qdata_infected_CR_table3 <- Qdata_infected_CR %>%
  group_by(QuarantineNumber) %>%
  summarize(NumberInfectedCR = n_distinct(SubjectID))
print(Qdata_infected_CR_table3)
## # A tibble: 1 x 2
##   QuarantineNumber NumberInfectedCR
##              <int>            <int>
## 1                2                3
#### Generation of Table3_CR for paper ####

# To output a nice summary table with numInfected, numInoculated, and %infected of inoculated
Qdata_table3_CR <-Exposed_CR_table3 %>% #using the df just created, above
  left_join(Qdata_infected_CR_table3) %>%
  mutate(Fraction_Inf_over_ExpCR = NumberInfectedCR/NumberExposedCR)
## Joining, by = "QuarantineNumber"
print(Qdata_table3_CR)
## # A tibble: 3 x 4
##   QuarantineNumber NumberExposedCR NumberInfectedCR Fraction_Inf_over_ExpCR
##              <int>           <int>            <int>                   <dbl>
## 1                1              11               NA                  NA    
## 2                2               9                3                   0.333
## 3                3              15               NA                  NA
#### Table 3: b1) IR: Number of symptomatic (and % of exposed) ####

## Implementing Version 2 of "Symptomatic afebrile" that we used in Table 1 (see above)

# “Symptomatic_V2_Afebrile”: “Evidence of at least 2 symptoms of any grade that do not necessarily need to persist for consecutive study days, nor persist for the same consecutive study days, but where each of the symptoms appeared on at least two different study days.”

# Going to implement "symptomatic" for afebrile to make a well-defined milder criteria for "symptomatic afebrile"

# First need to manipulate the dataset to prepare for the loop logic that was created to do this analysis
# Combine symptom severity measures (grades 1, 2, and 3) because grade >1 doesn't matter for this definition of symptomatic afebrile
# Symptomatic_IR_exposed_grade123 <- Exposed_IR %>%
#   select(SubjectID, QuarantineNumber) %>%
#   left_join(Qdata, by = c("SubjectID" = "SubjectID", "QuarantineNumber" = "QuarantineNumber")) %>%
#   filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | 
#            StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 |
#            StudyDay == 9 | StudyDay == 10) %>%
#   mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
#          LRI = cough+SOB, 
#          SystemicI = headache+muscleAches+malaise) %>%
#   mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
#   mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
#          stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
#          sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
#          soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
#          DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
#          DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
#          DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
#          DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
#          cough123 = cough==1 | cough==2 | cough==3, 
#          SOB123 = SOB==1 | SOB==2 | SOB==3,
#          headache123 = headache==1 | headache==2 | headache==3, 
#          muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
#          malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
#   mutate(Febrile = as.numeric(Febrile),
#          runnyNose123 = as.numeric(runnyNose123), 
#          stuffyNose123 = as.numeric(stuffyNose123), 
#          sneezing123 = as.numeric(sneezing123), 
#          soreThroat123 = as.numeric(soreThroat123),
#          DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
#          DPEOtits123 = as.numeric(DPEOtits123), 
#          DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
#          DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
#          cough123 = as.numeric(cough123), 
#          SOB123 = as.numeric(SOB123),
#          headache123 = as.numeric(headache123), 
#          muscleAches123 = as.numeric(muscleAches123), 
#          malaise123 = as.numeric(malaise123)) %>%
#   group_by(SubjectID, StudyDay, QuarantineNumber) %>%
#   summarize(Febrile = max(Febrile),
#             runnyNose123 = max(runnyNose123), 
#             stuffyNose123 = max(stuffyNose123), 
#             sneezing123 = max(sneezing123), 
#             soreThroat123 = max(soreThroat123),
#             DPENasalDischarge123 = max(DPENasalDischarge123), 
#             DPEOtits123 = max(DPEOtits123), 
#             DPESinusTenderness123 = max(DPESinusTenderness123), 
#             DPEPharyngitis123 = max(DPEPharyngitis123),
#             cough123 = max(cough123), 
#             SOB123 = max(SOB123),
#             headache123 = max(headache123), 
#             muscleAches123 = max(muscleAches123), 
#             malaise123 = max(malaise123)) %>%
#   ungroup()
# # The above gets us to a dataset where symptoms with grade 1, 2, or 3 are summarized by whether there was at least one symptoms (of any grade) detection per study day
# 
# # Now can search for recipients that meet the definition of "symptomatic afebrile"
# # However we will deal with the "afebrile" component later
# # First applying the loop that will select those that meet the symptoms criteria apart from afebrile
# # But first need to create a new df that only has data from study days 1-10
# Symptomatic_IR_exposed_grade123_day1to10 <- Symptomatic_IR_exposed_grade123 %>%
#   filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 |
#            StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10)
# 
# sub <- unique(Symptomatic_IR_exposed_grade123_day1to10$SubjectID)
# c_sub <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token<-0
#   subid <- sub[i]
#   temp <- Symptomatic_IR_exposed_grade123_day1to10[Symptomatic_IR_exposed_grade123_day1to10$SubjectID == subid, ]
#   temp1<-temp[,4:17]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:3],temp1)
#   for (j in 1:(nrow(temp))) {
#     for (k in 5:16) {
#       for (l in (k+1):17){
#         if (sum(temp[,k], na.rm = TRUE)>=2 & sum(temp[,l], na.rm = TRUE)>=2) {
#           c_sub <- rbind(c_sub, subid)
#           token<-1
#           break
#         }
#       }
#       if (token==1){
#         break
#       } 
#     }
#     if (token==1){
#       break
#     }
#   }
# }
# # This yields 9 subjectIDs
# # However, if we want to exclude symptoms from contributing to the criteria if they appeared before day 1 we do
# sub <- unique(Symptomatic_IR_exposed_grade123$SubjectID)
# c_sub2 <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token<-0
#   subid <- sub[i]
#   temp <- Symptomatic_IR_exposed_grade123[Symptomatic_IR_exposed_grade123$SubjectID == subid, ]
#   temp1<-temp[,4:17]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:3],temp1)
#   for (j in 1:(nrow(temp))) {
#     for (k in 5:16) {
#       for (l in (k+1):17){
#         if (sum(temp[,k], na.rm = TRUE)>=2 & sum(temp[,l], na.rm = TRUE)>=2) {
#           sum1<-0
#           sum2<-0
#           for (m in 1:(tail(which(temp$StudyDay==-1), n=1))){
#             sum1<-sum1+temp[m,k]
#             sum2<-sum1+temp[m,l]
#           }
#           if (sum1==0 & sum2==0){
#             c_sub2 <- rbind(c_sub2, subid)
#             token<-1
#             break
#           }
#         }
#       }
#       if (token==1){
#         break
#       } 
#     }
#     if (token==1){
#       break
#     }
#   }
# }
# # This yields 8 subjectIDs, 1 fewer subjectIDs with sympptoms (before applying the afebrile criteria) compared with the c_sub above
# # For now we will stick with the less stringent version and use c_sub of 9
# 
# # Now converting this vector of studyIDs to a df
# # Remember this is symptomatic version 2: a milder criteria for symptomatic, however it is symptomatic afebrile
# Symptomatic_afebrile_IR <- as.data.frame(c_sub) %>%
#   rename(SubjectID = "V1") 
# 
# # Now getting rid of the subjectIDs that were febrile
# # first find which ones were febrile.
# Qdata_exposed_febrile_IR <- Qdata %>%
#   filter(Randomization_DorIRorCR == "IR" & Tympanic.temp..degrees.C. >37.9) %>%
#   distinct(SubjectID, .keep_all = FALSE)
# 
# Symptomatic_afebrile_IR <- Symptomatic_afebrile_IR %>%
#   anti_join(Qdata_exposed_febrile_IR, by = c("SubjectID" = "SubjectID"))
# 
# # Now adding the QuarantineNumber on to the Symptomatic df 
# # Then we can sort by Q for the table3_IR
# Symptomatic_V2_Afebrile_IR_QuarantineNumber_table3 <- Symptomatic_afebrile_IR %>%
#   left_join(Qdata_QuarantineNumbers) %>%
#   group_by(QuarantineNumber) %>%
#   summarize(Number_Symptomatic_V2_Afebrile_IR = n_distinct(SubjectID))
# 
# # Add onto Table3_IR the number of symptomatic by version 2 criteria and % of infected
# # For the final version of table 3 we will use the Symptomatic V3, and thus will ignore this in the printed table
# #Qdata_table3_IR <- Qdata_table3_IR %>%
#   #left_join(Symptomatic_V2_Afebrile_IR_QuarantineNumber_table3) %>%
#   #mutate(Fraction_Symptomatic_V2_Afebrile_of_Exposed_IR = Number_Symptomatic_V2_Afebrile_IR/NumberExposedIR)
# #print(Qdata_table3_IR)

#### Table 3: b2) CR: Number of symptomatic (and % of exposed) ####

# # Symptomatic Version 2 afebrile (like what was done to the IR group above)
# # “Symptomatic_V2_Afebrile”: “Evidence of at least 2 symptoms of any grade that do not necessarily...
# # ...need to persist for consecutive study days, nor persist for the same consecutive study days, but ... 
# # ...where each of the symptoms appeared on at least two different study days.”
# # First need to manipulate the dataset to prepare for the loop logic that was created to do this analysis
# # Combine symptom severity measures (grades 1, 2, and 3) because grade >1 doesn't matter for this definition of symptomatic afebrile
# Symptomatic_CR_exposed_grade123 <- Exposed_CR %>%
#   select(SubjectID, QuarantineNumber) %>%
#   left_join(Qdata, by = c("SubjectID" = "SubjectID", "QuarantineNumber" = "QuarantineNumber")) %>%
#   filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | 
#            StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 |
#            StudyDay == 9 | StudyDay == 10) %>%
#   mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
#          LRI = cough+SOB, 
#          SystemicI = headache+muscleAches+malaise) %>%
#   mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
#   mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
#          stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
#          sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
#          soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
#          DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
#          DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
#          DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
#          DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
#          cough123 = cough==1 | cough==2 | cough==3, 
#          SOB123 = SOB==1 | SOB==2 | SOB==3,
#          headache123 = headache==1 | headache==2 | headache==3, 
#          muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
#          malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
#   mutate(Febrile = as.numeric(Febrile),
#          runnyNose123 = as.numeric(runnyNose123), 
#          stuffyNose123 = as.numeric(stuffyNose123), 
#          sneezing123 = as.numeric(sneezing123), 
#          soreThroat123 = as.numeric(soreThroat123),
#          DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
#          DPEOtits123 = as.numeric(DPEOtits123), 
#          DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
#          DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
#          cough123 = as.numeric(cough123), 
#          SOB123 = as.numeric(SOB123),
#          headache123 = as.numeric(headache123), 
#          muscleAches123 = as.numeric(muscleAches123), 
#          malaise123 = as.numeric(malaise123)) %>%
#   group_by(SubjectID, StudyDay, QuarantineNumber) %>%
#   summarize(Febrile = max(Febrile),
#             runnyNose123 = max(runnyNose123), 
#             stuffyNose123 = max(stuffyNose123), 
#             sneezing123 = max(sneezing123), 
#             soreThroat123 = max(soreThroat123),
#             DPENasalDischarge123 = max(DPENasalDischarge123), 
#             DPEOtits123 = max(DPEOtits123), 
#             DPESinusTenderness123 = max(DPESinusTenderness123), 
#             DPEPharyngitis123 = max(DPEPharyngitis123),
#             cough123 = max(cough123), 
#             SOB123 = max(SOB123),
#             headache123 = max(headache123), 
#             muscleAches123 = max(muscleAches123), 
#             malaise123 = max(malaise123)) %>%
#   ungroup()
# # The above gets us to a dataset where symptoms with grade 1, 2, or 3 are summarized by whether there was...
# # ... at least one symptoms (of any grade) detection per study day
# 
# # Now can implement criteria search for recipients that meet the definition of "symptomatic afebrile"
# # The loop will check for those who meet the symptom definition and we will add the afebrile piece later.
# # First we need to cut a df that only has data from study days 1-10
# Symptomatic_CR_exposed_grade123_day1to10 <- Symptomatic_CR_exposed_grade123 %>%
#   filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 |
#            StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10)
# 
# sub <- unique(Symptomatic_CR_exposed_grade123_day1to10$SubjectID)
# c_sub <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token<-0
#   subid <- sub[i]
#   temp <- Symptomatic_CR_exposed_grade123_day1to10[Symptomatic_CR_exposed_grade123_day1to10$SubjectID == subid, ]
#   temp1<-temp[,4:17]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:3],temp1)
#   for (j in 1:(nrow(temp))) {
#     for (k in 5:16) {
#       for (l in (k+1):17){
#         if (sum(temp[,k], na.rm = TRUE)>=2 & sum(temp[,l], na.rm = TRUE)>=2) {
#             c_sub <- rbind(c_sub, subid)
#             token<-1
#             break
#         }
#       }
#       if (token==1){
#         break
#       } 
#     }
#     if (token==1){
#       break
#     }
#   }
# }
# # This yields a vector c_sub of 11 subjectIDs
# # But if we wanted to apply a more stringent criteria where symptoms occuring before day1 were excluded from contributing to criteria
# # We could do a new loop to create vector c_sub2
# sub <- unique(Symptomatic_CR_exposed_grade123$SubjectID)
# c_sub2 <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token<-0
#   subid <- sub[i]
#   temp <- Symptomatic_CR_exposed_grade123[Symptomatic_CR_exposed_grade123$SubjectID == subid, ]
#   temp1<-temp[,4:17]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:3],temp1)
#   for (j in 1:(nrow(temp))) {
#     for (k in 5:16) {
#       for (l in (k+1):17){
#         if (sum(temp[,k], na.rm = TRUE)>=2 & sum(temp[,l], na.rm = TRUE)>=2) {
#           sum1<-0
#           sum2<-0
#           for (m in 1:(tail(which(temp$StudyDay==-1), n=1))){
#             sum1<-sum1+temp[m,k]
#             sum2<-sum1+temp[m,l]
#           }
#           if (sum1==0 & sum2==0){
#             c_sub2 <- rbind(c_sub2, subid)
#             token<-1
#             break
#           }
#         }
#       }
#       if (token==1){
#         break
#       } 
#     }
#     if (token==1){
#       break
#     }
#   }
# }
# # This yields a vector c_sub2 that has 11 subjectIDs, same as the c_sub vector that used less stringent criteria
# # For now we will keep the less stringent criteria
# # Now transforming this c_sub (the less stringent version) vector of studyIDs to a df and checking for afebrile
# # Remember this is symptomatic version 2: a milder criteria for symptomatic, however it is symptomatic afebrile
# Symptomatic_V2_CR <- as.data.frame(c_sub) %>%
#   rename(SubjectID = "V1") 
# 
# # Now getting rid of the subjectIDs that were febrile
# # first find which ones were febrile.
# Qdata_exposed_febrile_CR <- Qdata %>%
#   filter(Randomization_DorIRorCR == "IR" & Tympanic.temp..degrees.C. >37.9) %>%
#   distinct(SubjectID, .keep_all = FALSE)
# 
# Symptomatic_V2_Afebrile_CR <- Symptomatic_V2_CR %>%
#   anti_join(Qdata_exposed_febrile_CR, by = c("SubjectID" = "SubjectID"))
# 
# # Now adding the QuarantineNumber on to the Symptomatic df 
# # Then we can sort by Q for the table3
# Symptomatic_V2_Afebrile_CR_QuarantineNumber_table3 <- Symptomatic_V2_Afebrile_CR %>%
#   left_join(Qdata_QuarantineNumbers) %>%
#   group_by(QuarantineNumber) %>%
#   summarize(Number_Symptomatic_V2_Afebrile_CR = n_distinct(SubjectID))
# 
# # Add onto Table3 the number of symptomatic by version 2 criteria and % of infected
# # For the final version of table 3 we will use the Symptomatic V3, and thus will ignore this in the printed table
# #Qdata_table3_CR <- Qdata_table3_CR %>%
#   #left_join(Symptomatic_V2_Afebrile_CR_QuarantineNumber_table3, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
#   #mutate(Fraction_Symptomatic_V2_Afebrile_of_ExposedCR = Number_Symptomatic_V2_Afebrile_CR/NumberExposedCR)
# #print(Qdata_table3_CR)

#### Table 3: c1) IR: Number of symptomatic, non-ILI (and % of exposed) ####

# This category will not be used for this paper.

#### Table 3: c2) CR: Number of symptomatic, non-ILI (and % of exposed) ####

# This category will not be used for this paper.

#### Table 3: IR Symptomatic version 3 (to match Killingley, 2012) ####
## Implementing a new version of "symptomatic" based on October 12, 2018 webex conference with the team
## The purpose of this version of symptomatic is so that we are consistent with the definitions from...
## ... the proof-of-concept study (Killingley, 2012 JID)

# Thus, this version 3 of symptomatic for IR is:
# "Any respiratory symptom that occurs at all over 2 consecutive days, or occurs for 3/3 (am, early pm, late pm) symptom measurements on a single day, where respiratory symptoms include runny nose, stuffy nose, sneezing, sore throat, cough, and shortness of breath"

# First we are going to cut the a new df that has only the 6 respiratory symtpomms of interest 
# (and also to include fever, just in case of future analyses)
Symptomatic_IR_V3_day1to10 <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR") %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | 
           StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  filter(Microneut_VisitType == "Q baseline") %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
# This is great but the way the data is put together, this leaves out the day -3 through day0 data
# Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0
# Then we will bind it back to the "Symptomatic_IR_V3" that was just created.
Symptomatic_IR_V3_before_day1 <- Exposed_IR %>%
  select(SubjectID) %>%
  left_join(Qdata) %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
## Joining, by = "SubjectID"
# Now binding together and sorting
Symptomatic_IR_V3 <- bind_rows(Symptomatic_IR_V3_day1to10, Symptomatic_IR_V3_before_day1) 
Symptomatic_IR_V3 <- Symptomatic_IR_V3 %>%
  arrange(SubjectID, StudyDay)

# We will hold onto the above work for the future, but for now use df Symptomatic_IR_V3_day1to10

# Filter those with three measurements positive in a single study day for any of the respiratory pathogens
sub <- unique(Symptomatic_IR_V3_day1to10$SubjectID)
c_sub <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- Symptomatic_IR_V3_day1to10[Symptomatic_IR_V3_day1to10$SubjectID == subid, ]
  temp1<-temp[,6:12]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        if (sum(temp2[,l]) == 3) {
          c_sub <- rbind(c_sub, subid)
          token <- 1
          break
        }
      }
      if (token == 1) {
        break
      }
    }
    if (token == 1) {
      break
    }
  } 
}
# This yields a c_sub vector of 5 subjectIDs
# However if we wanted to ignore the symptoms from the criteria where there was observation before day 1, we would do a new loop
# Note that here we must use the "Symptomatic_IR_V3" df because it includes the data from before day1
sub <- unique(Symptomatic_IR_V3$SubjectID)
c_sub2 <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- Symptomatic_IR_V3[Symptomatic_IR_V3$SubjectID == subid, ]
  temp1<-temp[,6:12]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        if (sum(temp2[,l]) == 3) {
          sum1<-0
          for (m in 1:(tail(which(temp$StudyDay==0), n=1))) {
            sum1<-sum1+temp[m,l]
          }
          if (sum1==0) {
            token<-1
            c_sub2 <- rbind(c_sub2, subid)
            break
          }
        }
      }
      if (token == 1) {
        break
      }
    }
    if (token == 1) {
      break
    }
  } 
}
# This yields a c_sub2 vector with 3 subjectIDs (2 less than the c_sub) and implements the criteria where we eliminated the Sx appearing before day1
# But for now we will use the less stringent criteria
# Now get the list of subject IDs from c_sub (as opposed to the c_sub2 version)
Symptomatic_IR_singleday <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1") %>%
  distinct(SubjectID)

# Now use a loop to classify those with at least 1 respiratory symptom on two consec days
# For this we should use the "Symptomatic_IR_exposed_grade123" df that marks with indicator of 1 when any of the 3 symptom measurements in a day showed evidence of symptoms of any grade.
# This df was created in the first version of symptomatic for IR - will recreate here because we are commenting out earlier versions
# Combine symptom severity measures (grades 1, 2, and 3) because grade >1 doesn't matter for this definition of symptomatic afebrile
Symptomatic_IR_exposed_grade123 <- Exposed_IR %>%
  select(SubjectID, QuarantineNumber) %>%
  left_join(Qdata, by = c("SubjectID" = "SubjectID", "QuarantineNumber" = "QuarantineNumber")) %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 |
           StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 |
           StudyDay == 9 | StudyDay == 10) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis,
         LRI = cough+SOB,
         SystemicI = headache+muscleAches+malaise) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3,
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3,
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3,
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3,
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3,
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3,
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3,
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3,
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3,
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123),
         stuffyNose123 = as.numeric(stuffyNose123),
         sneezing123 = as.numeric(sneezing123),
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123),
         DPEOtits123 = as.numeric(DPEOtits123),
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123),
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123),
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123),
         muscleAches123 = as.numeric(muscleAches123),
         malaise123 = as.numeric(malaise123)) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  summarize(Febrile = max(Febrile),
            runnyNose123 = max(runnyNose123),
            stuffyNose123 = max(stuffyNose123),
            sneezing123 = max(sneezing123),
            soreThroat123 = max(soreThroat123),
            DPENasalDischarge123 = max(DPENasalDischarge123),
            DPEOtits123 = max(DPEOtits123),
            DPESinusTenderness123 = max(DPESinusTenderness123),
            DPEPharyngitis123 = max(DPEPharyngitis123),
            cough123 = max(cough123),
            SOB123 = max(SOB123),
            headache123 = max(headache123),
            muscleAches123 = max(muscleAches123),
            malaise123 = max(malaise123)) %>%
  ungroup()
# The above gets us to a dataset where symptoms with grade 1, 2, or 3 are summarized by whether there was at least one symptoms (of any grade) detection per study day

# First cut the "resp" version of the "Symptomatic_IR_exposed_grade123" df to the variables of interest and proper scale for the loop
Symptomatic_IR_exposed_grade123_resp <-Symptomatic_IR_exposed_grade123 %>%
  select(SubjectID, StudyDay, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123)
# First we need to get the df for just study days 1-10
Symptomatic_IR_exposed_grade123_resp_day1to10 <- Symptomatic_IR_exposed_grade123_resp %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 |
           StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10)

sub <- unique(Symptomatic_IR_exposed_grade123_resp_day1to10$SubjectID)
c_sub <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- Symptomatic_IR_exposed_grade123_resp_day1to10[Symptomatic_IR_exposed_grade123_resp_day1to10$SubjectID == subid, ]
  temp1<-temp[,4:10]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      if (temp[j, k] + temp[j+1, k] == 2){
        if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
          c_sub <- rbind(c_sub, subid)
          token<-1
          break
        }
      }   
    }
    if (token==1){
      break
    } 
  }
}
# This yields a c_sub vector of 11 subjectIDs
# If we use the more stringent criteria we use the below loop instead and get c_sub2
sub <- unique(Symptomatic_IR_exposed_grade123_resp$SubjectID)
c_sub2 <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- Symptomatic_IR_exposed_grade123_resp[Symptomatic_IR_exposed_grade123_resp$SubjectID == subid, ]
  temp1<-temp[,4:10]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      if (temp[j, k] + temp[j+1, k] == 2){
        if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
          sum1<-0
          for (m in 1:(tail(which(temp$StudyDay==0), n=1))) {
            sum1<-sum1+temp[m,k]
          }
          if (sum1==0) {
            token <- 1
            c_sub2 <- rbind(c_sub2, subid)
            break
          }
          c_sub2 <- rbind(c_sub2, subid)
          token<-1
          break
        }
      }   
    }
    if (token==1){
      break
    } 
  }
}
# This yields a c_sub2 of 11 subjectIDs (same as c_sub)
# However for now we will stick with the original definition and go with the c_sub of 13 subjectIs
# Rename "V1" as SubjectID using the less stringent c_sub
Symptomatic_IR_twodays <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")

# Combine the Symptomatic_donors_infected_singleday df and the Symptomatic_donors_infected_twodays df
Symptomatic_IR_V3_combined <- Symptomatic_IR_twodays %>%
  full_join(Symptomatic_IR_singleday) %>%
  arrange(SubjectID) 
## Joining, by = "SubjectID"
# But the above definition of symptomatic doesn't make any mention of febrile illness
# Let's check to see if the febrile are already accounted for among the group of symptomatic version 3
Symptomatic_by_fever_IR <- Symptomatic_IR_exposed_grade123 %>%
  filter(Febrile == 1) %>%
  select(SubjectID) %>%
  anti_join(Symptomatic_IR_V3_combined, by = c("SubjectID" = "SubjectID"))
# Returned 0 subject IDs, thus adding fever to the analysis doesn't add anything. However we should still be clear about definitions for the paper

Symptomatic_IR_V3_combined <- Symptomatic_IR_V3_combined %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_IR_Symptomatic_V3 = n_distinct(SubjectID))
# Add onto Table3_IR the number of symptomatic by version 3 criteria and % of infected
Qdata_table3_IR <- Qdata_table3_IR %>%
  left_join(Symptomatic_IR_V3_combined, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_IR_Symptomatic_V3_of_ExposedIR = Number_IR_Symptomatic_V3/NumberExposedIR)
print(Qdata_table3_IR)
## # A tibble: 3 x 6
##   QuarantineNumber NumberExposedIR NumberInfectedIR Fraction_Inf_ov…
##              <int>           <int>            <int>            <dbl>
## 1                1              10               NA               NA
## 2                2              10               NA               NA
## 3                3              20               NA               NA
## # ... with 2 more variables: Number_IR_Symptomatic_V3 <int>,
## #   Fraction_IR_Symptomatic_V3_of_ExposedIR <dbl>
#### Table 3: CR Symptomatic version 3 (to match Killingley, 2012) ####

## The purpose of this version of symptomatic is so that we are consistent with the definitions from the proof-of-concept study (Killingley, 2012 JID)

# Thus, this version 3 of symptomatic for CR is:
# "Any respiratory symptom that occurs at all over 2 consecutive days, or occurs for 3/3 (am, early pm, late pm) symptom measurements on a single day, where respiratory symptoms include runny nose, stuffy nose, sneezing, sore throat, cough, and shortness of breath"

# First we are going to cut the a new df that has only the 6 respiratory symtpomms of interest 
# (and also to include fever, just in case of future analyses)
Symptomatic_CR_V3_day1to10 <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR") %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | 
           StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  filter(Microneut_VisitType == "Q baseline") %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
# This is great but the way the data is put together, this leaves out the day -3 through day0 data
# Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0
# Then we will bind it back to the "Symptomatic_CR_V3" that was just created.
Symptomatic_CR_before_day1 <- Exposed_CR %>%
  select(SubjectID) %>%
  left_join(Qdata) %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
## Joining, by = "SubjectID"
# Now binding together and sorting
Symptomatic_CR_V3 <- rbind(Symptomatic_CR_V3_day1to10, Symptomatic_CR_before_day1) 
Symptomatic_CR_V3 <- Symptomatic_CR_V3 %>%
  arrange(SubjectID, StudyDay)

# We will hold onto the above work for the future, but for now use the Symptomatic_CR_V3_day1to10 df

# Filter those with three measurements positive in a single study day for any of the respiratory pathogens
sub <- unique(Symptomatic_CR_V3_day1to10$SubjectID)
c_sub <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- Symptomatic_CR_V3_day1to10[Symptomatic_CR_V3_day1to10$SubjectID == subid, ]
  temp1<-temp[,6:12]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        if (sum(temp2[,l]) == 3) {
          token <-1
          c_sub <- rbind(c_sub, subid)
          break
        }
      }
      if (token == 1) {
        break
      }
    }
    if (token == 1) {
      break
    }
  }
} 
# This yields a c_sub vector with 8 subjectIDs, however if we want to employ the stringent criteria where Sx are removed from the classification criteria if they appear before day1, then use the next loop.
sub <- unique(Symptomatic_CR_V3$SubjectID)
c_sub2 <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- Symptomatic_CR_V3[Symptomatic_CR_V3$SubjectID == subid, ]
  temp1<-temp[,6:12]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        if (sum(temp2[,l]) == 3) {
          sum1<-0
          for (m in 1:(tail(which(temp$StudyDay==0), n=1))) {
            sum1<-sum1+temp[m,l]
          }
          if (sum1==0) {
            token <- 1
            c_sub2 <- rbind(c_sub2, subid)
            break
          }
        }
      }
      if (token == 1) {
        break
      }
    }
    if (token == 1) {
      break
    }
  }
} 
# This yields a c_sub2 for 6 subjectIDs (2 fewer than the c_sub)
# However, for now, we will go with the less stringent criteria and use the c_sub
# Now get the list of subject IDs from c_sub (and not the more stringent c_sub2 list of subject IDs)
Symptomatic_V3_CR_singleday <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1") %>%
  distinct(SubjectID)

# Now use a loop to classify those with any sort of respiratory symptom on two consecutive days
# For this we should use the "Symptomatic_CR_exposed_grade123" df that marks with indicator of 1 when any of the 3 symptom measurements in a day showed evidence of symptoms of any grade.
# This df was created in the first version of symptomatic for CR

# First cut the  the "Symptomatic_CR_exposed_grade123" df to the variables of interest and proper scale for the loop
Symptomatic_CR_exposed_grade123 <- Exposed_CR %>%
  select(SubjectID, QuarantineNumber) %>%
  left_join(Qdata, by = c("SubjectID" = "SubjectID", "QuarantineNumber" = "QuarantineNumber")) %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 |
           StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 |
           StudyDay == 9 | StudyDay == 10) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis,
         LRI = cough+SOB,
         SystemicI = headache+muscleAches+malaise) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3,
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3,
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3,
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3,
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3,
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3,
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3,
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3,
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3,
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123),
         stuffyNose123 = as.numeric(stuffyNose123),
         sneezing123 = as.numeric(sneezing123),
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123),
         DPEOtits123 = as.numeric(DPEOtits123),
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123),
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123),
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123),
         muscleAches123 = as.numeric(muscleAches123),
         malaise123 = as.numeric(malaise123)) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  summarize(Febrile = max(Febrile),
            runnyNose123 = max(runnyNose123),
            stuffyNose123 = max(stuffyNose123),
            sneezing123 = max(sneezing123),
            soreThroat123 = max(soreThroat123),
            DPENasalDischarge123 = max(DPENasalDischarge123),
            DPEOtits123 = max(DPEOtits123),
            DPESinusTenderness123 = max(DPESinusTenderness123),
            DPEPharyngitis123 = max(DPEPharyngitis123),
            cough123 = max(cough123),
            SOB123 = max(SOB123),
            headache123 = max(headache123),
            muscleAches123 = max(muscleAches123),
            malaise123 = max(malaise123)) %>%
  ungroup()
# The above gets us to a dataset where symptoms with grade 1, 2, or 3 are summarized by whether there was at least one symptoms (of any grade) detection per study day

Symptomatic_CR_exposed_grade123_day1to10 <-Symptomatic_CR_exposed_grade123 %>%
  select(SubjectID, StudyDay, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123) %>%
  filter(StudyDay == 1 |StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 |
           StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10)

sub <- unique(Symptomatic_CR_exposed_grade123_day1to10$SubjectID)
c_sub <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- Symptomatic_CR_exposed_grade123_day1to10[Symptomatic_CR_exposed_grade123_day1to10$SubjectID == subid, ]
  temp1<-temp[,4:10]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      if (temp[j, k] + temp[j+1, k] == 2){
        if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
          c_sub <- rbind(c_sub, subid)
          token<-1
          break
        }
      }   
    }
    if (token==1){
      break
    } 
  }
}
# This yields a c_sub with 12 subjectIDs, however if we want to use the more stringent criteria, where a Sx that appears before day 1 is removed from classification criteria, we use the following df and loop
sub <- unique(Symptomatic_CR_exposed_grade123$SubjectID)
c_sub2 <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- Symptomatic_CR_exposed_grade123[Symptomatic_CR_exposed_grade123$SubjectID == subid, ]
  temp1<-temp[,4:10]
  temp1[is.na(temp1)]<-0
  temp<-cbind(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      if (temp[j, k] + temp[j+1, k] == 2){
        if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
          sum1<-0
          for (m in 1:(tail(which(temp$StudyDay==0), n=1))) {
            sum1<-sum1+temp[m,k]
          }
          if (sum1==0) {
            token <- 1
            c_sub2 <- rbind(c_sub2, subid)
            break
          }
        }
      }   
    }
    if (token==1){
      break
    } 
  }
}
# This yields a c_sub2 of 10 subjectIDs (2 fewer than the less stringent criteria c_sub which yielded 12)
# Even so, for now we will use the less stringent criteria for this classification
# Rename "V1" as SubjectID from the c_sub vector of 12
Symptomatic_V3_CR_twodays <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")

# Combine the Symptomatic_donors_infected_singleday df and the Symptomatic_donors_infected_twodays df
Symptomatic_V3_CR_combined <- Symptomatic_V3_CR_twodays %>%
  full_join(Symptomatic_V3_CR_singleday, by = c("SubjectID" = "SubjectID")) %>%
  arrange(SubjectID) 

# But the above definition of symptomatic doesn't make any mention of febrile illness
# Let's check to see if the febrile are already accounted for among the group of symptomatic version 3
Symptomatic_by_fever_CR <- Symptomatic_CR_exposed_grade123 %>%
  filter(Febrile == 1) %>%
  select(SubjectID) %>%
  anti_join(Symptomatic_V3_CR_combined, by = c("SubjectID" = "SubjectID"))
# Returned 0 subject IDs, thus adding fever to the analysis doesn't add anything. However we should still be clear about definitions for the paper

Symptomatic_V3_CR_combined <- Symptomatic_V3_CR_combined %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Symptomatic_V3_CR = n_distinct(SubjectID))
# Add onto Table3_CR the number of symptomatic by version 3 criteria and % of infected
Qdata_table3_CR <- Qdata_table3_CR %>%
  left_join(Symptomatic_V3_CR_combined, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_Symptomatic_V3_CR_of_ExposedCR = Number_Symptomatic_V3_CR/NumberExposedCR)
print(Qdata_table3_CR)
## # A tibble: 3 x 6
##   QuarantineNumber NumberExposedCR NumberInfectedCR Fraction_Inf_ov…
##              <int>           <int>            <int>            <dbl>
## 1                1              11               NA           NA    
## 2                2               9                3            0.333
## 3                3              15               NA           NA    
## # ... with 2 more variables: Number_Symptomatic_V3_CR <int>,
## #   Fraction_Symptomatic_V3_CR_of_ExposedCR <dbl>
# Important note about the data here: subject 203 did not have any symptom scores reported at all in the raw data (although they should have).
# Upon investigating this discrepancy with Alex Mann and Ben Killingley, we have found that 203, in fact, had symptom scores of 0 for all of the self-reported symtpoms on each day of symptom surveillance.
# I could move this note to the EMIT_Quarantine_Main_work_with_clean_files where we are summarizing the data some, but actually I didn't catch this error until later on when I was checking the data before putting this table together. 
# For now we will keep the not here. 
# Also - important to note that the raw data will not change becasue of this. Rather, the null symptoms scores for 203 are essentially already accounted for here by the NAs reported for 203's self-reported symptom data. 
# This note serves to remind us that yes, in fact, the NAs can be interpreted as symptom scores of 0.

#### Table 3: d1a) IR: First classification of ILI (and % of exposed) ####

# # Operationally, this means evidence of fever >100F (>37.9C) & any evidence of cough or sore throat or DPE Pharyngitis
# 
# # First, cut the dataset to only the infected donors who meet the definition for fever
# # Note, none of the volunteers registered a fever on any of the study days prior to inoculation day
# ILIdata <- Exposed_IR %>%
#   select(SubjectID, QuarantineNumber) %>%
#   left_join(Qdata) %>%
#   select(SubjectID, QuarantineNumber, StudyDay, Sx_Date, SDC_time, Tympanic.temp..degrees.C., cough, soreThroat, DPEPharyngitis) %>%
#   filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | 
#            StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>%
#   group_by(SubjectID, StudyDay, SDC_time) %>%
#   distinct(SDC_time, .keep_all = TRUE) %>%
#   arrange(SubjectID, StudyDay) %>%
#   ungroup()
# 
# # Let's consolidate the sore throat and pharyngitis variables to make one cumulative variable (soreThroat or DPEPharyngitis)
# ILIdata_day1to10 <- ILIdata %>%
#   mutate(st = cough>=1 | soreThroat>=1 | DPEPharyngitis>=1, st = as.numeric(st)) %>%
#   filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 |
#            StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10)
#   
# sub <- unique(ILIdata_day1to10$SubjectID)
# c_sub <- c()
# token_t<-0
# token_sx<-0
# for (i in 1:length(sub)) {
#   token_t<-0
#   token_sx<-0
#   subid <- sub[i]
#   temp <- ILIdata_day1to10[ILIdata_day1to10$SubjectID == subid, ]
#   temp1<-temp[,6:10]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:5],temp1)
#   for (j in 1:(nrow(temp))) {
#     if (!is.na(temp$Tympanic.temp..degrees.C.[j])) {
#       if (temp$Tympanic.temp..degrees.C.[j] > 37.9) {
#         token_t<-1
#       }
#     }
#     if (sum(temp$cough[j], temp$st[j], na.rm = TRUE) >=1) {
#       token_sx<-1
#     }
#     if (token_t == 1 & token_sx == 1){
#       c_sub <- rbind(c_sub, subid)
#       break
#     }
#   }
# }
# # Note: the above code may not work properly and would require additional verification and potential troubleshooting.
# # It could just be that there were 0 instances that met the criteria and for that reason the c_sub comes back null
# # ... however, since the definition of ILI has changed based on the October 12, 2018 call, we will move to work on the new definition
# # Note: the above code does not check for the case that someone had fever, cough, or sore throat...
# # ... prior to inoculation day.
# # Future iterations of this code would do well to implement logic that would filter those that met ILI criteria
# 
# # Now adding this vector of studyIDs to the table3_IR
# # ILI_febrile_IR <- as.data.frame(c_sub) %>%
#   # rename(SubjectID = "V1") 
# 
# # Create vector with 0s in place (in the case that there were 0 instances that met the ILI definition for IR)
# #m <- matrix(0, ncol = 2, nrow = 3)
# #ILI_febrile_table3_IR <- as.data.frame(m) %>%
#   #rename(QuarantineNumber = V1, NumberILI_febrile_IR = V2)
# 
# # Now adding the QuarantineNumber on to the Febrile ILI df 
# # Then we can sort by Q for the table3
# # ILI_febrile_table3_IR <- ILI_febrile_IR %>%
#   # left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
#   # group_by(QuarantineNumber) %>%
#   # summarize(NumberILI_febrile_IR = n_distinct(SubjectID))
# 
# # Add onto Table3 the number of 1st version of ILI criteria and % of infected (this is outdated, pre-October 12, 2018 definition)
# #Qdata_table3_IR <- Qdata_table3_IR %>%
#   #left_join(ILI_febrile_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
#   #mutate(Fraction_ILI_febrile_of_ExpIR = NumberILI_febrile_IR/NumberExposedIR)
# #print(Qdata_table3_IR)

#### Table 3: d1b) IR: Second classification of ILI (afebrile ILI) (and % of exposed) ####

# # This definition of afebrile ILI is: symptom of grade >=1 for cough or soreThroat (or DPEPharyngitis)
# # This code eliminates those who were symptomatic for cough or soreThroat before Day0
# # note: using the "ILIdata" df, which was created in the ILI_Version 1 for IR code above
# # The below code is draft and does not work, so I will comment it out
# # Note that the attempt below uses the more stringent criteria (excluding Sx that occurred before day1 from criteria)
# # We will use the new definition of ILI, which comes from the Oct 12, 2018 EMIT team webex conference
# #sub <- unique(ILIdata$SubjectID)
# #c_sub <- c()
# #token_c<-0
# #token_st<-0
# #for (i in 1:length(sub)) {
#   #token_c<-0
#   #token_st<-0
#   #subid <- sub[i]
#   #temp <- ILIdata[ILIdata$SubjectID == subid, ]
#   #temp1<-temp[,6:9]
#   #temp1[is.na(temp1)]<-0
#   #temp<-cbind(temp[,1:5],temp1)
#   #for (j in 1:(nrow(temp))) {
#     #if (temp$cough[j] >=1) {
#       #sum1<-0
#       #for (k in 1:(tail(which(temp$StudyDay==-1), n=1))) {
#         #sum1 <- sum1+temp$cough[k]
#         #if (sum1 == 0) {
#           #token_c<-1
#         #}
#       #}
#     #}
#     #if (temp$st[j] >= 1) {
#       #sum2<-0
#       #for (l in 1:(tail(which(temp$StudyDay==-1), n=1))) {
#         #sum2 <- sum2+temp$st[l]
#         #if (sum2 == 0) {
#           #token_st<-1
#         #}
#       #}
#     #}
#   #}
#   #if (token_c + token_st >=1) {
#     #c_sub <- rbind(c_sub, subid)
#   #}
# #}
# 
# # Now adding this vector of studyIDs to the table3_IR
# #ILI_afebrile_IR <- as.data.frame(c_sub) %>%
#   #rename(SubjectID = "V1") 
# 
# # Now adding the QuarantineNumber on to the ILI afebrile df 
# # Then we can sort by Q for the table3
# #ILI_afebrile_table3_IR <- ILI_afebrile_IR %>%
#   #left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
#   #group_by(QuarantineNumber) %>%
#   #summarize(NumberILI_afebrile_IR = n_distinct(SubjectID))
# 
# # Add onto Table3 the number of ILI afebrile and % of infected
# #Qdata_table3_IR <- Qdata_table3_IR %>%
#   #left_join(ILI_afebrile_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
#   #mutate(Fraction_ILI_afebrile_of_ExpIR = NumberILI_afebrile_IR/NumberExposedIR)
# #print(Qdata_table3_IR)

#### Table 3: d1c) IR: Third version of classification of ILI (to match Killingley et al., 2012) (and % of infected) ####

# This definition of ILI is: "an illness lasting >=24 hours with either (1) fever >37.9°C plusat least 1 respiratory symptom or (2) >=2 symptoms, at least 1 of which must be respiratory."
# Where "respiratory symptom" means evidence of any grade of runny nose, stuffy nose, sneeze, sore throat, cough, shortness of breath
# Where "lasting >=24 hours" means evidence of the symptom over all three instances of symptom measurements for a single day, or evidence of the symptom over two days at any frequency (1-3/3 instances of symptom recordings)

# First, let's program the first criteria (fever >37.9C plus at least 1 respiratory symptom)
# To do this, we can:
# a) create the set of subject IDs that meet the fever criteria, and then check them for
# b) evidence of three instances during a single day, or
# c) evidence of any frequency of instances >=1 for 2 consecutive days
# Then, we can deal with the second criteria for ILI (>=2 symptoms one of which being a respiratory)

# Find the SubjectIDs from among the exposed IR, that had fever
# First check to see if anyone had fever before day 1. 
Qdata_IR_febrile_pre_day1 <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR") %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  filter(Tympanic.temp..degrees.C. >37.9) %>%
  distinct(SubjectID, .keep_all = FALSE)
# As it turns out, none of the IR had fever before day 1 
# Now we can see who among the infected subject IDs had fever at least once over study days 1-10
Qdata_IR_febrile_day1to10 <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR") %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 |
           StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>%
  filter(Tympanic.temp..degrees.C. >37.9) %>%
  distinct(SubjectID, .keep_all = FALSE)
# As it turns out, non of the IR ever had fever during study days 1-10

ILI_V3_IR_criteria1 <- Qdata_IR_febrile_day1to10

# Now we can move to the second criteria for ILI for the IR

## Plan for implementing the second criteria for ILI (>= 2 symptoms for >=24 hours, 1 of which is respiratory) and merging with the first criteria for ILI
# To do this, first we will filter those subject IDs without fever.
# Then we will see who among those without fever had respiratory symptom on a single day plus at least one other symptom on the same single day
# Then we will see who among those without fever had respiratory sympomt at frequency >=1 over 2 days plus at least one other symptom at freq >=1 for same 2 days
# Then we will add those subject IDs together to form ILI_IR_criteria2
# Since there were no subjects forming an ILI_IR_criteria1 df, we will use and ILI_IR_criteria2 together to make ILI_IR

# First we will filter those subject IDs without fever.
# Now we can see who among the exposed IR subject IDs had fever at least once over study days 1-10
Qdata_IR_afebrile_day1to10 <- Exposed_IR %>%
  select (SubjectID) %>%
  anti_join(Qdata_IR_febrile_day1to10)
## Joining, by = "SubjectID"
# Now we will see who among those without fever had respiratory symptom on a single day plus at least one other symptom on the same single day

# First we are going to cut the a new df that has the 6 respiratory symptoms of interest plus the 3 non-resp symptoms, and fever 
# for only those in the "Qdata_IR_afebrile_day1to10" df
ILI_V3_IR_afebrile_day1to10 <- Qdata_IR_afebrile_day1to10 %>%
  left_join(Qdata) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | 
           StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | 
           StudyDay == 9 | StudyDay == 10) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  filter(Microneut_VisitType == "Q baseline") %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123,
         headache123, muscleAches123, malaise123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
## Joining, by = "SubjectID"
# This is great but the way the data is put together, this leaves out the day -3 through day0 data
# Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0
# Then we will bind it back to the "Symptomatic_donors_infected_V3_afebrile" that was just created.
ILI_V3_IR_afebrile_before_day1 <- Qdata_IR_afebrile_day1to10 %>%
  left_join(Qdata) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123,
         headache123, muscleAches123, malaise123) 
## Joining, by = "SubjectID"
# Now binding together and sorting
ILI_V3_IR_afebrile <- bind_rows(ILI_V3_IR_afebrile_day1to10, ILI_V3_IR_afebrile_before_day1) 
ILI_V3_IR_afebrile <- ILI_V3_IR_afebrile %>%
  arrange(SubjectID, StudyDay)

# But, the current definition is just for post day0 so we will filter just day1-10 "ILI_V3_IR_afebrile_day1to10" df

# Loop to get subjectIDs where there were 2 symptoms (one of which respiratory), each observed 3 times on the same day
sub <- unique(ILI_V3_IR_afebrile_day1to10$SubjectID)
c_sub <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- ILI_V3_IR_afebrile_day1to10[ILI_V3_IR_afebrile_day1to10$SubjectID == subid, ]
  temp1<-temp[,6:15]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        for (m in (l+1):15) {
          if (sum(temp2[,l]) + sum(temp2[,m]) == 6) {
            token <- 1
            c_sub <- rbind(c_sub, subid)
            break
          }
        }
        if(token == 1) {
          break
        }
      }
      if(token == 1) {
        break
      }
    }
    if(token == 1) {
      break
    }
  }
} 
# This yields a c_sub of 2 subjectIDs, but if we wanted to exclude symptoms that appeared before day 1 we could do a new loop
sub <- unique(ILI_V3_IR_afebrile$SubjectID)
c_sub2 <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- ILI_V3_IR_afebrile[ILI_V3_IR_afebrile$SubjectID == subid, ]
  temp1<-temp[,6:15]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        for (m in (l+1):15) {
          if (sum(temp2[,l]) + sum(temp2[,m]) == 6) {
            sum1<-0
            sum2<-0
            for (n in 1:(tail(which(temp$StudyDay==0), n=1))) {
              sum1<-sum1+temp[n,l]
              sum2<-sum2+temp[n,m]
            }
            if (sum1==0 & sum2==0) {
              token<-1
              c_sub2 <- rbind(c_sub2, subid)
              break
            }
          }
        }
        if(token == 1) {
          break
        }
      }
      if(token == 1) {
        break
      }
    }
    if(token == 1) {
      break
    }
  }
} 
# This c_sub2 vector holds 2 subjectIDs, just like the less stringent c_sub
# For now we will take the less stringent criteria and move the 2 subjectIDs from c_sub into a df
# Now get the df of subject IDs from c_sub
ILI_V3_IR_criteria2_singleday <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")
# This is the output for the first part (single day) of the second ILI criteria

# Now implement the second part of the second ILI criteria: >=2 Sx (with >=1 resp) at any frequency over the same 2 consecutive study days

# Create an "IR_grade123_afebrile" df by collapsing the three study day values into 1, and select only the 9 symptoms that will be used as part of this analysis (the DPE aren't used here)
# The 9 are: runny nose, stuffy nose, sneeze, sore throat, cough, SOB, headache, muscleache, malaise
# Also remember to select the afebrile group (those who were never febrile, n = 40, which is actually all the IR)
IR_grade123_afebrile_day1to10 <- ILI_V3_IR_afebrile_day1to10 %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  summarize(Febrile = max(Febrile),
            runnyNose123 = max(runnyNose123), 
            stuffyNose123 = max(stuffyNose123), 
            sneezing123 = max(sneezing123), 
            soreThroat123 = max(soreThroat123),
            cough123 = max(cough123), 
            SOB123 = max(SOB123),
            headache123 = max(headache123), 
            muscleAches123 = max(muscleAches123), 
            malaise123 = max(malaise123))
# Bind with the data that comes before day 1 to get complete dataset
# Note that the data from before day 1 doesn't have to collapsed to a single measurement per day because we aren't scanning these in the criteria
# Rather we are interested in seeing if there were any symtoms before day 1 to implement a more stringent criteria for classifying symptoms
ILI_V3_IR_grade123_afebrile <- bind_rows(IR_grade123_afebrile_day1to10, ILI_V3_IR_afebrile_before_day1) 
ILI_V3_IR_grade123_afebrile <- ILI_V3_IR_grade123_afebrile %>%
  select(SubjectID, StudyDay, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123,
         headache123, muscleAches123, malaise123) %>%
  arrange(SubjectID, StudyDay)

# Columns 5-10 are respiratory symptoms and columns 11-13 are the other symptoms that matter for this definition
# Do a loop for 2 or more symptoms one of which is respiratory on with the Sx's occuring on 2 consecutive days at any frequency >=1
sub <- unique(IR_grade123_afebrile_day1to10$SubjectID)
c_sub <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- IR_grade123_afebrile_day1to10[IR_grade123_afebrile_day1to10$SubjectID == subid, ]
  temp1<-temp[,4:13]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      for (l in (k+1):13){
        if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) {
          if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
            c_sub <- rbind(c_sub, subid)
            token<-1
            break
          }
        }
      }
      if (token==1){
        break
      } 
    }
    if (token==1){
      break
    }
  }
}
# This yields a c_sub vector of 5 subjectIDs, however if we want to exclude Sx that occured before day0 we use a new loop
sub <- unique(ILI_V3_IR_grade123_afebrile$SubjectID)
c_sub2 <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- ILI_V3_IR_grade123_afebrile[ILI_V3_IR_grade123_afebrile$SubjectID == subid, ]
  temp1<-temp[,4:13]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      for (l in (k+1):13){
        if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) {
          if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
            sum1<-0
            sum2<-0
            for (m in 1:(tail(which(temp$StudyDay==-1), n=1))){
              sum1<-sum1+temp[m,k]
              sum2<-sum1+temp[m,l]
            }
            if (sum1==0 & sum2==0) {
              c_sub2 <- rbind(c_sub2, subid)
              token<-1
              break
            }
          }
        }
      }
      if (token==1){
        break
      } 
    }
    if (token==1){
      break
    }
  }
}
# This yields the same 5 subjectIDs as c_sub
# For now we will use the less stringent criteria (c_sub) and thus convert c_sub into a df for future manipulation
ILI_V3_IR_criteria2 <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")

# merge together the ILI criteria 1 and 2 dfs
ILI_V3_IR <- full_join(ILI_V3_IR_criteria1, ILI_V3_IR_criteria2)
## Joining, by = "SubjectID"
# Now adding the QuarantineNumber on to the ILI df 
# Then we can sort by Q for the table1
ILI_V3_IR_table3 <- ILI_V3_IR %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_ILI_V3_IR = n_distinct(SubjectID))

# Add onto Table3_IR the number of symptomatic by version 3 criteria and % of infected
Qdata_table3_IR <- Qdata_table3_IR %>%
  left_join(ILI_V3_IR_table3, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_ILI_V3_IR_of_ExposedIR = Number_ILI_V3_IR/NumberExposedIR)
print(Qdata_table3_IR)
## # A tibble: 3 x 8
##   QuarantineNumber NumberExposedIR NumberInfectedIR Fraction_Inf_ov…
##              <int>           <int>            <int>            <dbl>
## 1                1              10               NA               NA
## 2                2              10               NA               NA
## 3                3              20               NA               NA
## # ... with 4 more variables: Number_IR_Symptomatic_V3 <int>,
## #   Fraction_IR_Symptomatic_V3_of_ExposedIR <dbl>, Number_ILI_V3_IR <int>,
## #   Fraction_ILI_V3_IR_of_ExposedIR <dbl>
#### Table 3: d2a) CR: Number of ILI (and % of exposed) ####

# # Operationally, this means evidence of fever >100F (>37.9C) & any evidence of cough or sore throat or DPE Pharyngitis
# # Note that >100F could really be implemented as >=37.8 but the EMIT team in the UK consistently uses >37.9C so we will follow suit
# 
# # First, cut the dataset to only the infected donors who meet the definition for fever
# # Note, none of the volunteers registered a fever on any of the study days prior to inoculation day
# ILIdata_CR <- Exposed_CR %>%
#   select(SubjectID, QuarantineNumber) %>%
#   left_join(Qdata) %>%
#   select(SubjectID, QuarantineNumber, StudyDay, Sx_Date, SDC_time, Tympanic.temp..degrees.C., cough, soreThroat, DPEPharyngitis) %>%
#   filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | 
#            StudyDay == 3 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>%
#   group_by(SubjectID, StudyDay, SDC_time) %>%
#   distinct(SDC_time, .keep_all = TRUE) %>%
#   arrange(SubjectID, StudyDay) %>%
#   ungroup()
# 
# # Let's consolidate the sore throat and pharyngitis variables to make one cumulative variable (soreThroat or DPEPharyngitis)
# ILIdata_CR <- ILIdata_CR %>%
#   mutate(st = cough>=1 | soreThroat>=1 | DPEPharyngitis>=1, st = as.numeric(st))
# 
# # First need to create a df with study data from only study days 1-10
# ILIdata_CR_day1to10 <- ILIdata_CR %>%
#   filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 |
#            StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10)
# 
# sub <- unique(ILIdata_CR_day1to10$SubjectID)
# c_sub <- c()
# token_t<-0
# token_s<-0
# token<-0
# for (i in 1:length(sub)) {
#   token<-0
#   token_t<-0
#   token_s<-0
#   subid <- sub[i]
#   temp <- ILIdata_CR_day1to10[ILIdata_CR_day1to10$SubjectID == subid, ]
#   for (j in 1:(nrow(temp))) {
#     if (!is.na(temp$Tympanic.temp..degrees.C.[j])) {
#       if (temp$Tympanic.temp..degrees.C.[j] >37.9) {
#         token_t<-1
#       }
#     }
#     if (sum(temp$cough[j], temp$st[j], na.rm = TRUE) >=1) {
#       token_s<-1
#     }
#     if (token_t==1 & token_s==1){
#       token<-1
#       c_sub <- rbind(c_sub, subid)
#       break
#     }
#   }
#   if (token==1) {
#     break
#   }
# }
# # But also note: there were 0 instances where this criteria was met.
# # Note: the above code does not check for the case that someone had fever, cough, or sore throat prior to studyday1.
# # Future iterations to include more generalized cases of this code would do well to implement logic that would filter ...
# # ... those that met ILI criteria, accounting for the case where symptoms appeared before StudyDay == 1.
# 
# # Now adding this vector of studyIDs to the table3_CR
# #ILI_febrile_CR <- as.data.frame(c_sub) %>%
#   #rename(SubjectID = "V1") 
# # 0 instances so let's fill it in as 0
# ILI_febrile_CR <- as.data.frame(c(NA,NA,NA)) %>%
#   rename(SubjectID = "c(NA, NA, NA)")
# 
# # Now adding the QuarantineNumber on to the Febrile ILI df 
# # Then we can sort by Q for the table3
# ILI_febrile_table3_CR <- ILI_febrile_CR %>%
#   left_join(Qdata_QuarantineNumbers) %>%
#   group_by(QuarantineNumber) %>%
#   summarize(Number_ILI_febrile_CR = n_distinct(SubjectID))
# 
# # Add onto Table3 the number of ILI by version 1 criteria and % of infected
# # For the final version of table 3 we will use the ILI V3, and thus will ignore this in the printed table
# #Qdata_table3_CR <- Qdata_table3_CR %>%
#   #left_join(ILI_febrile_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
#   #mutate(Fraction_ILI_febrile_of_ExpCR = Number_ILI_febrile_CR/NumberExposedCR)
# #print(Qdata_table3_CR)

#### Table 3: d2b) CR: Second classification of ILI (afebrile ILI) (and % of exposed) ####

# # This definition of afebrile ILI is: symptom of grade >=1 for cough or soreThroat (or DPEPharyngitis)
# sub <- unique(ILIdata_CR_day1to10$SubjectID)
# c_sub <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token_c<-0
#   token_st<-0
#   subid <- sub[i]
#   temp <- ILIdata_CR_day1to10[ILIdata_CR_day1to10$SubjectID == subid, ]
#   temp1<-temp[,6:9]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:5],temp1)
#   for (j in 1:(nrow(temp))) {
#     if (sum(temp$cough[j]) >=1) {
#       token_c<-1
#     }
#     if (sum(temp$st[j]) >= 1) {
#       token_st<-1
#     }
#   }
#   if (token_c + token_st >=1) {
#     c_sub <- rbind(c_sub, subid)
#   }
# }
# # This yields a c_sub vector of 5 subjectIDs, however if we use code to eliminate those who were ...
# # ...symptomatic for cough or soreThroat before Day0 then we get...
# sub <- unique(ILIdata_CR$SubjectID)
# c_sub2 <- c()
# token<-0
# for (i in 1:length(sub)) {
#   token_c<-0
#   token_st<-0
#   subid <- sub[i]
#   temp <- ILIdata_CR[ILIdata_CR$SubjectID == subid, ]
#   temp1<-temp[,6:9]
#   temp1[is.na(temp1)]<-0
#   temp<-cbind(temp[,1:5],temp1)
#   for (j in 1:(nrow(temp))) {
#     if (sum(temp$cough[j]) >=1) {
#       sum1<-0
#       for (k in 1:(tail(which(temp$StudyDay==-1), n=1))) {
#         sum1 <- sum1+temp$cough[k]
#         if (sum1 == 0) {
#           token_c<-1
#         }
#       }
#     }
#     if (sum(temp$st[j]) >= 1) {
#       sum2<-0
#       for (l in 1:(tail(which(temp$StudyDay==-1), n=1))) {
#         sum2 <- sum2+temp$st[l]
#         if (sum2 == 0) {
#           token_st<-1
#         }
#       }
#     }
#   }
#   if (token_c + token_st >=1) {
#     c_sub2 <- rbind(c_sub2, subid)
#   }
# }
# # This yielded 5 subjectIDs (same as the less stringent criteria). However, for now we will use the less stringent criteria.
# # Now adding this vector of studyIDs to the table3_CR
# ILI_afebrile_CR <- as.data.frame(c_sub) %>%
#   rename(SubjectID = "V1") 
# 
# # Taking out the febrile
# ILI_afebrile_CR <- ILI_afebrile_CR %>%
#   anti_join(ILI_febrile_CR, by = c("SubjectID" = "SubjectID"))
# 
# # Now adding the QuarantineNumber on to the ILI afebrile df 
# # Then we can sort by Q for the table3
# ILI_afebrile_table3_CR <- ILI_afebrile_CR %>%
#   left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
#   group_by(QuarantineNumber) %>%
#   summarize(Number_ILI_afebrile_CR = n_distinct(SubjectID))
# 
# # Add onto Table3 the number of ILI afebrile and % of infected
# # For the final version of table 3 we will use the Symptomatic V3, and thus will ignore this in the printed table
# #Qdata_table3_CR <- Qdata_table3_CR %>%
#   #left_join(ILI_afebrile_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
#   #mutate(Fraction_ILI_afebrile_of_ExpCR = Number_ILI_afebrile_CR/NumberExposedCR)
# #print(Qdata_table3_CR)

#### Table 3: d1c) CR: Third version of classification of ILI (to match Killingley et al., 2012) (and % of infected) ####

# This definition of ILI is: "an illness lasting >=24 hours with either (1) fever >37.9°C plus at least 1 respiratory symptom or (2) >=2 symptoms, at least 1 of which must be respiratory."
# Where "respiratory symptom" means evidence of any grade of runny nose, stuffy nose, sneeze, sore throat, cough, shortness of breath
# Where "lasting >=24 hours" means evidence of the symptom over all three instances of symptom measurements for a single day, or evidence of the symptom over two days at any frequency (1-3/3 instances of symptom recordings)

# First, let's program the first criteria (fever >37.9C plus at least 1 respiratory symptom)
# To do this, we can:
# a) create the set of subject IDs that meet the fever criteria, and then check them for
# b) evidence of three instances during a single day, or
# c) evidence of any frequency of instances >=1 for 2 consecutive days
# Then, we can deal with the second criteria for ILI (>=2 symptoms one of which being a respiratory)

# Find the SubjectIDs from among the exposed CR, that had fever
# First check to see if anyone had fever before day 1. 
Qdata_CR_febrile_pre_day1 <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR") %>%
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  filter(Tympanic.temp..degrees.C. >37.9) %>%
  distinct(SubjectID, .keep_all = FALSE)
# As it turns out, none of the CR had fever before day 1 
# Now we can see who among the infected subject IDs had fever at least once over study days 1-10
Qdata_CR_febrile_day1to10 <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR") %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 |
           StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10) %>%
  filter(Tympanic.temp..degrees.C. >37.9) %>%
  distinct(SubjectID, .keep_all = FALSE)
# As it turns out, none of the CR ever had fever during study days 1-10

ILI_V3_CR_criteria1 <- Qdata_CR_febrile_day1to10

# Now we can move to the second criteria for ILI for the CR

## Plan for implementing the second criteria for ILI (>= 2 symptoms for >=24 hours, 1 of which is respiratory) and merging with the first criteria for ILI
# To do this, first we will filter those subject IDs without fever.
# Then we will see who among those without fever had respiratory symptom on a single day plus at least one other symptom on the same single day
# Then we will see who among those without fever had respiratory symptom at frequency >=1 over 2 days plus at least one other resp symptom at freq >=1 for same 2 days
# Then we will add those subject IDs together to form ILI_CR_criteria2
# Since there were no subjects forming an ILI_CR_criteria1 df, we will use and ILI_CR_criteria2 together to make ILI_CR

# First we will filter those subject IDs without fever.
# Now we can see who among the exposed CR subject IDs had fever at least once over study days 1-10
Qdata_CR_afebrile_day1to10 <- Exposed_CR %>%
  select (SubjectID) %>%
  anti_join(Qdata_CR_febrile_day1to10)
## Joining, by = "SubjectID"
# Now we will see who among those without fever had respiratory symptom on a single day plus at least one other respiratory symptom on the same single day

# First we are going to cut the a new df that has the 6 respiratory symptoms of interest plus the 3 non-resp symptoms, and fever 
# for only those in the "Qdata_CR_afebrile_day1to6" df
ILI_V3_CR_afebrile_day1to10 <- Qdata_CR_afebrile_day1to10 %>%
  left_join(Qdata) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0 | StudyDay == 1 | StudyDay == 2 | 
           StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | 
           StudyDay == 9 | StudyDay == 10) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  filter(Microneut_VisitType == "Q baseline") %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123,
         headache123, muscleAches123, malaise123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
## Joining, by = "SubjectID"
# This is great but the way the data is put together, this leaves out the day -3 through day0 data
# Therefore, as a quick fix, we will cut a new dataset that only filters in the data from day -3 through day0
# Then we will bind it back to the "Symptomatic_donors_infected_V3_afebrile" that was just created.
ILI_V3_CR_before_day1_afebrile <- Qdata_CR_afebrile_day1to10 %>%
  left_join(Qdata) %>% 
  filter(StudyDay == -3 | StudyDay == -2 | StudyDay == -1 | StudyDay == 0) %>%
  mutate(URI = runnyNose+stuffyNose+sneezing+soreThroat+DPENasalDischarge+DPEOtits+DPESinusTenderness+DPEPharyngitis, 
         LRI = cough+SOB, 
         SystemicI = headache+muscleAches+malaise) %>%
  mutate(Febrile = Tympanic.temp..degrees.C.>37.9) %>%
  mutate(runnyNose123 = runnyNose==1 | runnyNose==2 | runnyNose==3, 
         stuffyNose123 = stuffyNose==1 | stuffyNose==2 | stuffyNose==3, 
         sneezing123 = sneezing==1 | sneezing==2 | sneezing==3, 
         soreThroat123 = soreThroat==1 | soreThroat==2 | soreThroat==3,
         DPENasalDischarge123 = DPENasalDischarge==1 | DPENasalDischarge==2 | DPENasalDischarge==3, 
         DPEOtits123 = DPEOtits==1 | DPEOtits==2 | DPEOtits==3, 
         DPESinusTenderness123 = DPESinusTenderness==1 | DPESinusTenderness==2 | DPESinusTenderness==3, 
         DPEPharyngitis123 = DPEPharyngitis==1 | DPEPharyngitis==2 | DPEPharyngitis==3,
         cough123 = cough==1 | cough==2 | cough==3, 
         SOB123 = SOB==1 | SOB==2 | SOB==3,
         headache123 = headache==1 | headache==2 | headache==3, 
         muscleAches123 = muscleAches==1 | muscleAches==2 | muscleAches==3, 
         malaise123 = malaise==1 | malaise==2 | malaise==3) %>%
  mutate(Febrile = as.numeric(Febrile),
         runnyNose123 = as.numeric(runnyNose123), 
         stuffyNose123 = as.numeric(stuffyNose123), 
         sneezing123 = as.numeric(sneezing123), 
         soreThroat123 = as.numeric(soreThroat123),
         DPENasalDischarge123 = as.numeric(DPENasalDischarge123), 
         DPEOtits123 = as.numeric(DPEOtits123), 
         DPESinusTenderness123 = as.numeric(DPESinusTenderness123), 
         DPEPharyngitis123 = as.numeric(DPEPharyngitis123),
         cough123 = as.numeric(cough123), 
         SOB123 = as.numeric(SOB123),
         headache123 = as.numeric(headache123), 
         muscleAches123 = as.numeric(muscleAches123), 
         malaise123 = as.numeric(malaise123)) %>%
  select(SubjectID, StudyDay, Sx_Date, SDC_time, QuarantineNumber, Febrile,
         runnyNose123, stuffyNose123, sneezing123, soreThroat123, cough123, SOB123,
         headache123, muscleAches123, malaise123) %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  distinct(SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, StudyDay) %>%
  ungroup()
## Joining, by = "SubjectID"
# Now binding together and sorting
ILI_V3_CR_afebrile <- bind_rows(ILI_V3_CR_afebrile_day1to10, ILI_V3_CR_before_day1_afebrile) 
ILI_V3_CR_afebrile <- ILI_V3_CR_afebrile %>%
  arrange(SubjectID, StudyDay)

# But, the current definition is just for post day0 so we will filter just day1-10: use "ILI_V3_CR_afebrile_day1to10" df
# Loop to get subjectIDs where there were 2 symptoms (one of which respiratory), each observed 3 times on the same day
sub <- unique(ILI_V3_CR_afebrile_day1to10$SubjectID)
c_sub <- c()
token <- 0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- ILI_V3_CR_afebrile_day1to10[ILI_V3_CR_afebrile_day1to10$SubjectID == subid, ]
  temp1<-temp[,6:15]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        for (m in (l+1):15) {
          if (sum(temp2[,l]) + sum(temp2[,m]) == 6) {
            c_sub <- rbind(c_sub, subid)
            token <- 1
            break
          }
        }
        if(token == 1) {
          break
        }
      }
      if(token == 1) {
        break
      }
    }
    if(token == 1) {
      break
    }
  }
} 
# This yields a c_sub vector of 3 subjectIDs
# If we wanted to add the more stringent criteria of eliminating Sx that occurred before day1 from the classification criteria
sub <- unique(ILI_V3_CR_afebrile$SubjectID)
c_sub2 <- c()
token <- 0
sum1<-0
sum2<-0
for (i in 1:length(sub)) {
  token <- 0
  subid <- sub[i]
  temp <- ILI_V3_CR_afebrile[ILI_V3_CR_afebrile$SubjectID == subid, ]
  temp1<-temp[,6:15]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:5],temp1)
  stud <- unique(temp$StudyDay)
  for (j in 1:length(stud)) {
    studyday <- stud[j]
    temp2 <- temp[temp$StudyDay == studyday, ]
    for (k in 1:(nrow(temp2))) {
      for (l in 7:12) {
        for (m in (l+1):15) {
          if (sum(temp2[,l]) + sum(temp2[,m]) == 6) {
            sum1<-0
            sum2<-0
            for (o in 1:(tail(which(temp$StudyDay==0), n=1))) {
              sum1<-sum1+temp[o,l]
              sum2<-sum2+temp[o,m]
            }
            if (sum1==0 & sum2==0) {
              token<-1
              c_sub2 <- rbind(c_sub2, subid)
              break
            }
          }
          if(token == 1) {
            break
          }
        }
        if(token == 1) {
          break
        }
      }
      if(token == 1) {
        break
      }
    }
  } 
}
# This yields a c_sub2 vector of 3 subjectIDs, which is is the same as using the less stringent criteria,
# For now we will use the less stringent criteria 
# Now get the df of subject IDs from the less stringent c_sub
ILI_V3_infected_donors_criteria2_singleday <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")
# This is the output for the first part (single day) of the second ILI criteria

# Now implement the second part of the second ILI criteria: >=2 Sx (with >=1 resp) at any frequency over the same 2 consecutive study days

# Create an "CR_grade123_afebrile" df by collapsing the three study day values into 1, and select only the 9 symptoms that will be used as part of this analysis (the DPE aren't used here)
# The 9 are: runny nose, stuffy nose, sneeze, sore throat, cough, SOB, headache, muscleache, malaise
# Also remember to select the afebrile group (those who were never febrile, n = 35, which is actually all the CR)

CR_grade123_afebrile <- ILI_V3_CR_afebrile %>%
  group_by(SubjectID, StudyDay, QuarantineNumber) %>%
  summarize(Febrile = max(Febrile),
            runnyNose123 = max(runnyNose123), 
            stuffyNose123 = max(stuffyNose123), 
            sneezing123 = max(sneezing123), 
            soreThroat123 = max(soreThroat123),
            cough123 = max(cough123), 
            SOB123 = max(SOB123),
            headache123 = max(headache123), 
            muscleAches123 = max(muscleAches123), 
            malaise123 = max(malaise123))

# Columns 5-10 are respiratory symptoms and columns 11-13 are the other symptoms that matter for this definition
# Do a loop for 2 or more symptoms one of which is respiratory
# First do this without using symptoms that were positive before day 1 as part of the classification criteria

# First need to cut the data to just study days 1-10
CR_grade123_afebrile_day1to10 <- CR_grade123_afebrile %>%
  filter(StudyDay == 1 | StudyDay == 2 | StudyDay == 3 | StudyDay == 4 | StudyDay == 5 | 
           StudyDay == 6 | StudyDay == 7 | StudyDay == 8 | StudyDay == 9 | StudyDay == 10)

# Now grabbing the subjectIDs that meet the criteria for ILI over 2 consecutive study days (with at least 1 Sx as Resp)
sub <- unique(CR_grade123_afebrile_day1to10$SubjectID)
c_sub <- c()
token<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- CR_grade123_afebrile_day1to10[CR_grade123_afebrile_day1to10$SubjectID == subid, ]
  temp1<-temp[,4:13]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      for (l in (k+1):13){
        if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) {
          if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
            c_sub <- rbind(c_sub, subid)
            token<-1
            break
          }
        }
      }
      if (token==1){
        break
      } 
    }
    if (token==1){
      break
    }
  }
}
# This yields a c_sub of 9 subjectIDs but if we wanted to be more stringent we could exclude the Sx if they occured before study day 1 from the classification criteria
sub <- unique(CR_grade123_afebrile$SubjectID)
c_sub2 <- c()
token<-0
sum1<-0
sum2<-0
for (i in 1:length(sub)) {
  token<-0
  subid <- sub[i]
  temp <- CR_grade123_afebrile[CR_grade123_afebrile$SubjectID == subid, ]
  temp1<-temp[,4:13]
  temp1[is.na(temp1)]<-0
  temp<-bind_cols(temp[,1:3],temp1)
  for (j in 1:(nrow(temp)-1)) {
    for (k in 5:10) {
      for (l in (k+1):13){
        if (temp[j, k] + temp[j, l] + temp[j+1, k] + temp[j+1, l] == 4) {
          if (temp$StudyDay[j+1] == temp$StudyDay[j]+1) {
            sum1<-0
            sum2<-0
            for (m in 1:(tail(which(temp$StudyDay==-1), n=1))){
              sum1<-sum1+temp[m,k]
              sum2<-sum1+temp[m,l]
            }
            if (sum1==0 & sum2==0) {
              c_sub2 <- rbind(c_sub2, subid)
              token<-1
              break
            }
          }
        }
      }
      if (token==1){
        break
      } 
    }
    if (token==1){
      break
    }
  }
}
# This yields a c_sub2 of 8 subjectIDs (1 less than the c_sub), however we will go with the less stringent criteria for now and use the c_sub of 9 subjectIDs
# Get this ILI second criteria bit into a df using the less stringent c_sub vector of 9 subjectIDs
ILI_V3_CR_criteria2_2days <- as.data.frame(c_sub) %>%
  rename(SubjectID = "V1")

# merge the ILI_V3_infected_donors_criteria2_singleday and ILI_V3_CR_criteria2_2days dfs
ILI_V3_CR_criteria2 <- full_join(ILI_V3_infected_donors_criteria2_singleday, ILI_V3_CR_criteria2_2days)
## Joining, by = "SubjectID"
# merge the ILI criteria 1 and 2 dfs
ILI_V3_CR <- full_join(ILI_V3_CR_criteria1, ILI_V3_CR_criteria2)
## Joining, by = "SubjectID"
# Now adding the QuarantineNumber on to the ILI df 
# Then we can sort by Q for the table1
ILI_CR_V3_table3 <- ILI_V3_CR %>%
  left_join(Qdata_QuarantineNumbers) %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_ILI_CR_V3 = n_distinct(SubjectID))
## Joining, by = "SubjectID"
# Add onto Table3_CR the number of symptomatic by version 3 criteria and % of infected
Qdata_table3_CR <- Qdata_table3_CR %>%
  left_join(ILI_CR_V3_table3, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_ILI_V3_CR_of_ExposedCR = Number_ILI_CR_V3/NumberExposedCR)
print(Qdata_table3_CR)
## # A tibble: 3 x 8
##   QuarantineNumber NumberExposedCR NumberInfectedCR Fraction_Inf_ov…
##              <int>           <int>            <int>            <dbl>
## 1                1              11               NA           NA    
## 2                2               9                3            0.333
## 3                3              15               NA           NA    
## # ... with 4 more variables: Number_Symptomatic_V3_CR <int>,
## #   Fraction_Symptomatic_V3_CR_of_ExposedCR <dbl>, Number_ILI_CR_V3 <int>,
## #   Fraction_ILI_V3_CR_of_ExposedCR <dbl>
#### Table 3: e1) IR: Number of febrile (and % of exposed) ####

# Use the list of exposed IR to do this analysis
Qdata_exposed_febrile_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR" & Tympanic.temp..degrees.C. >37.9)

Qdata_exposed_febrile_table3_IR <- Qdata_exposed_febrile_IR %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Febrile_IR = n_distinct(SubjectID))

# Add febrile count and fraction febrile to Table 3
Qdata_table3_IR <- Qdata_table3_IR %>%
  left_join(Qdata_exposed_febrile_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_Febrile_over_ExposedIR = Number_Febrile_IR/NumberExposedIR)

#### Table 3: e2) CR: Number of febrile (and % of exposed) ####

# Use the list of exposed CR to do this analysis
Qdata_exposed_febrile_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR" & Tympanic.temp..degrees.C. > 37.9)

Qdata_exposed_febrile_table3_CR <- Qdata_exposed_febrile_CR %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Febrile_CR = n_distinct(SubjectID))

# Add febrile count and fraction febrile to Table 3
Qdata_table3_CR <- Qdata_table3_CR %>%
  left_join(Qdata_exposed_febrile_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_Febrile_over_ExpCR = Number_Febrile_CR/NumberExposedCR)

#### Table 3: f1) IR: Number of PCR confirmed infection (and % of exposed) ####

# This was already done to get the number of infected donors for the first few columns in this Table 3
# Redo what was done earlier, but tweaking for the purpose of this column in the table 3

# Get list of SubjectID and the number of days each was positive by PCR
Qdata_pcr_pos1_or_more_days_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct<38 & InfA_Ct!=0)) %>%
  group_by(SubjectID, StudyDay) %>%
  summarize(count = n()) %>%
  summarize(NumberDaysPosPCR_IR = n_distinct(StudyDay)) %>%
  filter(NumberDaysPosPCR >=1)
print(Qdata_pcr_pos1_or_more_days_IR)
## # A tibble: 0 x 2
## # ... with 2 variables: SubjectID <int>, NumberDaysPosPCR_IR <int>
# Add the Q numbers to the list of SubjectIDs and the number of PCR positive days and summarize by Q
# Note: there was not data on which to add Q numbers here. Running code anyways because to deal with generalized case
Qdata_pcr_pos1_or_more_days_table3_IR <- Qdata_pcr_pos1_or_more_days_IR %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_PCR_Infected_IR = n_distinct(SubjectID))
print(Qdata_pcr_pos1_or_more_days_table3_IR)
## # A tibble: 0 x 2
## # ... with 2 variables: QuarantineNumber <int>,
## #   Number_PCR_Infected_IR <int>
# Add to table3_IR
Qdata_table3_IR <- Qdata_table3_IR %>%
  left_join(Qdata_pcr_pos1_or_more_days_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_PCR_Infected_over_ExposedIR = Number_PCR_Infected_IR/NumberExposedIR)
print(Qdata_table3_IR)
## # A tibble: 3 x 12
##   QuarantineNumber NumberExposedIR NumberInfectedIR Fraction_Inf_ov…
##              <int>           <int>            <int>            <dbl>
## 1                1              10               NA               NA
## 2                2              10               NA               NA
## 3                3              20               NA               NA
## # ... with 8 more variables: Number_IR_Symptomatic_V3 <int>,
## #   Fraction_IR_Symptomatic_V3_of_ExposedIR <dbl>, Number_ILI_V3_IR <int>,
## #   Fraction_ILI_V3_IR_of_ExposedIR <dbl>, Number_Febrile_IR <int>,
## #   Fraction_Febrile_over_ExposedIR <dbl>, Number_PCR_Infected_IR <int>,
## #   Fraction_PCR_Infected_over_ExposedIR <dbl>
#### Table 3: f2) CR: Number of PCR confirmed infection (and % of exposed) ####

# This was already done to get the number of infected donors for the first few columns in this Table 3
# Redo what was done earlier, but tweaking for the purpose of this column in the table 3

# Get list of SubjectID and the number of days each was positive by PCR
Qdata_pcr_pos1_or_more_days_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct<38 & InfA_Ct!=0)) %>%
  group_by(SubjectID, StudyDay) %>%
  summarize(count = n()) %>%
  summarize(NumberDaysPosPCR_CR = n_distinct(StudyDay)) %>%
  filter(NumberDaysPosPCR_CR >=1)
print(Qdata_pcr_pos1_or_more_days_CR)
## # A tibble: 2 x 2
##   SubjectID NumberDaysPosPCR_CR
##       <int>               <int>
## 1       236                   1
## 2       242                   1
# Add the Q numbers to the list of SubjectIDs and the number of PCR positive days and summarize by Q
Qdata_pcr_pos1_or_more_days_table3_CR <- Qdata_pcr_pos1_or_more_days_CR %>%
  left_join(Qdata_QuarantineNumbers, by = c("SubjectID" = "SubjectID")) %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_PCR_Infected_CR = n_distinct(SubjectID))
print(Qdata_pcr_pos1_or_more_days_table3_CR)
## # A tibble: 1 x 2
##   QuarantineNumber Number_PCR_Infected_CR
##              <int>                  <int>
## 1                2                      2
# Add to table3_IR
Qdata_table3_CR <- Qdata_table3_CR %>%
  left_join(Qdata_pcr_pos1_or_more_days_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_PCR_Inf_over_ExpCR = Number_PCR_Infected_CR/NumberExposedCR)
print(Qdata_table3_CR)
## # A tibble: 3 x 12
##   QuarantineNumber NumberExposedCR NumberInfectedCR Fraction_Inf_ov…
##              <int>           <int>            <int>            <dbl>
## 1                1              11               NA           NA    
## 2                2               9                3            0.333
## 3                3              15               NA           NA    
## # ... with 8 more variables: Number_Symptomatic_V3_CR <int>,
## #   Fraction_Symptomatic_V3_CR_of_ExposedCR <dbl>, Number_ILI_CR_V3 <int>,
## #   Fraction_ILI_V3_CR_of_ExposedCR <dbl>, Number_Febrile_CR <int>,
## #   Fraction_Febrile_over_ExpCR <dbl>, Number_PCR_Infected_CR <int>,
## #   Fraction_PCR_Inf_over_ExpCR <dbl>
#### Detailed report on the CRs who were PCR positive ####
## There were no CRs that were PCR positive but there were some that were PCR positive for a single day
## Let's look at the serology data and respiratory symptom data for these individuals

# First confirm who was PCR positive for a single day
PCR_Pos_CR_Serology_Report <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR") %>%
  filter(!is.na(InfA_Ct)) %>%
  filter((InfA_Ct<38 & InfA_Ct!=0)) %>%
  distinct(Microneut_DrawDate, Microneut_VisitType, Sx_Date, SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, Microneut_DrawDate) %>%
  select(SubjectID, QuarantineNumber: Microneutralization.Titer.to.A.Wisconsin.67.2005, HAI_dayminus2: HAI_day28_recodeNDA) %>%
  filter(!is.na(Microneut_DrawDate)) %>%
  distinct(SubjectID, Microneut_VisitType, .keep_all = TRUE)
# This gives us the data about serology and pcr for the days that were pcr posiive for the 2 CR with positive pcr
# Now, let's get the data for MN and then the data for HAI and prepare it for tabular representation

# MN Serology table for #236 and #242
PCR_Pos_CR_Serology_Report_MN <- PCR_Pos_CR_Serology_Report %>%
  select(SubjectID:Microneutralization.Titer.to.A.Wisconsin.67.2005) %>%
  rename(`Subject ID` = SubjectID,
         `Quarantine Number` = QuarantineNumber,
         `Draw Date` = Microneut_DrawDate,
         `MN Visit` = Microneut_VisitType,
         `MN Titer to A/WI/67/2005` = Microneutralization.Titer.to.A.Wisconsin.67.2005)
# Write out this df for future RMarkdown reporting
# write.csv(PCR_Pos_CR_Serology_Report_MN, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_PCR_Pos_1plus_days_CR_Serology_Report_MN.csv")
# HAI Serology table for #233
PCR_Pos_CR_Serology_Report_HAI <- PCR_Pos_CR_Serology_Report %>%
  select(SubjectID:QuarantineNumber, HAI_dayminus2, HAI_dayminus2_recodeNDA, HAI_day28) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  rename(`Subject ID` = SubjectID,
         `Quarantine Number` = QuarantineNumber,
         `HAI 2 Days Before Entry to Q` = HAI_dayminus2,
         `HAI Day 28` = HAI_day28,
         `HAI 2 Days Before Entry to Q (Recoded Nondetect)` = HAI_dayminus2_recodeNDA)
# Write out this df for future RMarkdown reporting
# write.csv(PCR_Pos_CR_Serology_Report_HAI, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_PCR_Pos_1plus_days_CR_Serology_Report_HAI.csv")

# Now let's get the symptoms profiles for each of these CR with positive PCR
Positive_PCR_CR_Symptoms <- Qdata %>%
  filter(SubjectID == c(236, 242)) %>%
  select(SubjectID:QuarantineNumber, StudyDay:Tympanic.temp..degrees.C.) %>%
  filter(!is.na(SDC_time)) %>%
  distinct(Sx_Date, SDC_time, .keep_all = TRUE) %>%
  arrange(SubjectID, Sx_Date, SDC_time) %>%
  group_by(StudyDay) %>%
  mutate(Total_Respiratory_Score = runnyNose + stuffyNose + sneezing + soreThroat + cough + SOB)
## Warning in SubjectID == c(236, 242): longer object length is not a multiple
## of shorter object length
# We see from this that there are no symptoms scores (for respiratory or any others) that are above 0!
# Thus, there is no sense in doing a plot of all 0s. 

#### Table 3: g1) IR: Number of PCR confirmed infection and seroconversion (and % of exposed) ####

# This was already done to get the number of infected IR for the first few columns in this Table 3

Inf_PCR_and_Sero_IR <- Qdata_infected_IR %>%
  filter(NumberDaysPosPCR_IR >=1) %>%
  filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y))

#Now summarize the number of unique SubjectIDs that meet this criteria by Q for the table
Inf_PCR_and_Sero_table_IR <- Inf_PCR_and_Sero_IR %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_PCR_and_Sero_Positive_IR = n_distinct(SubjectID))

# Now add Inf_PCR_and_Sero_table_IR to the cumulative table 3
Qdata_table3_IR <- Qdata_table3_IR %>%
  left_join(Inf_PCR_and_Sero_table_IR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR = Number_PCR_and_Sero_Positive_IR/NumberExposedIR)

#### Table 3: g2) CR: Number of PCR confirmed infection and seroconversion (and % of exposed) ####

# This was already done to get the number of infected CR for the first few columns in this Table 3

Inf_PCR_and_Sero_CR <- Qdata_infected_CR %>%
  filter(NumberDaysPosPCR_CR >=1) %>%
  filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y))

#Now summarize the number of unique SubjectIDs that meet this criteria by Q for the table
Inf_PCR_and_Sero_table3_CR <- Inf_PCR_and_Sero_CR %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_PCR_and_Sero_Positive_CR = n_distinct(SubjectID))

# Now add Inf_PCR_and_Sero_table1 to the cumulative table 3
Qdata_table3_CR <- Qdata_table3_CR %>%
  left_join(Inf_PCR_and_Sero_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) %>%
  mutate(Fraction_Inf_PCR_and_Sero_Positive_CR = Number_PCR_and_Sero_Positive_CR/NumberExposedCR)

#### Table 3: h1) IR: Number of seroconversion by HAI: MN: Either (and % of exposed) ####

# This was already done to get the number of infected IR for the first few columns in this Table 1
# Reworking here to tailor the current Table 3 columns in question

## HAI

# Qdata_HAI_pos is the list (generated in section a) above) with seroconversion by HAI (Glasgow serology)
# Group by Q day and summarize number of distinct SubjectIDs
Qdata_HAI_pos_table3_IR <- Qdata_HAI_pos_IR %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_HAI_Positive_IR = n_distinct(SubjectID))

# Add HAI_pos column to the table (and % HAI_pos of infected)
# Don't need the percentage here for final version of table 3 so commenting out the mutate
Qdata_table3_IR <- Qdata_table3_IR %>%
  left_join(Qdata_HAI_pos_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>%
#mutate(Fraction_HAI_Positive_of_ExposedIR = Number_HAI_Positive_IR/NumberExposedIR)

## Microneuts

# Qdata_Microneut_pos is the list (generated in section a) above) with seroconversion by Microneuts (CDC serology)
# Group by Q day and summarize number of distinct SubjectIDs
Qdata_Microneut_pos_table3_IR <- Qdata_Microneut_pos_IR %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Microneut_Positive_IR = n_distinct(SubjectID))

# Add Microneut_pos column to the table (and % Microneut_pos of infected)
# Don't need the percentage here for final version of table 3 so commenting out the mutate
Qdata_table3_IR <- Qdata_table3_IR %>%
  left_join(Qdata_Microneut_pos_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>%
#mutate(Fraction_MN_Positive_of_ExposedIR = Number_Microneut_Positive_IR/NumberExposedIR)

## Either HAI or MN

# Already have this generated in the Exposed_IR df
Pos_Either_HAI_or_MN_table3_IR <- Qdata_infected_IR %>%
  filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y)) %>%
  group_by(QuarantineNumber) %>%
  summarize(Pos_Either_HAI_or_MN_IR = n_distinct(SubjectID))

# Add Pos_Either_HAI_or_MN_table3 to the cumulative Qdata_table3
# Don't need the percentage here for final version of table 3 so commenting out the mutate
Qdata_table3_IR <- Qdata_table3_IR %>%
  left_join(Pos_Either_HAI_or_MN_table3_IR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>%
#mutate(Fraction_Pos_Either_HAI_or_MN_of_ExpIR = Pos_Either_HAI_or_MN_IR/NumberExposedIR)

#### Table 3: h2) CR: Number of seroconversion by HAI: MN: Either (and % of exposed) ####

# This was already done to get the number of infected CR for the first few columns in this Table 1
# Reworking here to tailor the current Table 3 columns in question

## HAI

# Qdata_HAI_pos is the list (generated in section a) above) with seroconversion by HAI (Glasgow serology)
# Group by Q day and summarize number of distinct SubjectIDs
Qdata_HAI_pos_table3_CR <- Qdata_HAI_pos_CR %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_HAI_Positive_CR = n_distinct(SubjectID))
print(Qdata_HAI_pos_table3_CR)
## # A tibble: 1 x 2
##   QuarantineNumber Number_HAI_Positive_CR
##              <int>                  <int>
## 1                2                      1
# Add HAI_pos column to the table (and % HAI_pos of infected)
# Don't need the percentage here for final version of table 3 so commenting out the mutate
Qdata_table3_CR <- Qdata_table3_CR %>%
  left_join(Qdata_HAI_pos_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>%
#mutate(Fraction_HAI_Positive_of_ExposedCR = Number_HAI_Positive_CR/NumberExposedCR)
print(Qdata_table3_CR)
## # A tibble: 3 x 15
##   QuarantineNumber NumberExposedCR NumberInfectedCR Fraction_Inf_ov…
##              <int>           <int>            <int>            <dbl>
## 1                1              11               NA           NA    
## 2                2               9                3            0.333
## 3                3              15               NA           NA    
## # ... with 11 more variables: Number_Symptomatic_V3_CR <int>,
## #   Fraction_Symptomatic_V3_CR_of_ExposedCR <dbl>, Number_ILI_CR_V3 <int>,
## #   Fraction_ILI_V3_CR_of_ExposedCR <dbl>, Number_Febrile_CR <int>,
## #   Fraction_Febrile_over_ExpCR <dbl>, Number_PCR_Infected_CR <int>,
## #   Fraction_PCR_Inf_over_ExpCR <dbl>,
## #   Number_PCR_and_Sero_Positive_CR <int>,
## #   Fraction_Inf_PCR_and_Sero_Positive_CR <dbl>,
## #   Number_HAI_Positive_CR <int>
## Microneuts

# Qdata_Microneut_pos is the list (generated in section a) above) with seroconversion by Microneuts (CDC serology)
# Group by Q day and summarize number of distinct SubjectIDs
Qdata_Microneut_pos_table3_CR <- Qdata_Microneut_pos_CR %>%
  group_by(QuarantineNumber) %>%
  summarize(Number_Microneut_Positive_CR = n_distinct(SubjectID))
print(Qdata_Microneut_pos_table3_CR)
## # A tibble: 1 x 2
##   QuarantineNumber Number_Microneut_Positive_CR
##              <int>                        <int>
## 1                2                            1
# Add Microneut_pos column to the table (and % Microneut_pos of infected)
# Don't need the percentage here for final version of table 3 so commenting out the mutate
Qdata_table3_CR <- Qdata_table3_CR %>%
  left_join(Qdata_Microneut_pos_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>%
#mutate(Fraction_MN_Positive_of_ExposedCR = Number_Microneut_Positive_CR/NumberExposedCR)
print(Qdata_table3_CR)
## # A tibble: 3 x 16
##   QuarantineNumber NumberExposedCR NumberInfectedCR Fraction_Inf_ov…
##              <int>           <int>            <int>            <dbl>
## 1                1              11               NA           NA    
## 2                2               9                3            0.333
## 3                3              15               NA           NA    
## # ... with 12 more variables: Number_Symptomatic_V3_CR <int>,
## #   Fraction_Symptomatic_V3_CR_of_ExposedCR <dbl>, Number_ILI_CR_V3 <int>,
## #   Fraction_ILI_V3_CR_of_ExposedCR <dbl>, Number_Febrile_CR <int>,
## #   Fraction_Febrile_over_ExpCR <dbl>, Number_PCR_Infected_CR <int>,
## #   Fraction_PCR_Inf_over_ExpCR <dbl>,
## #   Number_PCR_and_Sero_Positive_CR <int>,
## #   Fraction_Inf_PCR_and_Sero_Positive_CR <dbl>,
## #   Number_HAI_Positive_CR <int>, Number_Microneut_Positive_CR <int>
## Either HAI or MN

# Already have this generated in the Exposed_CR df
Pos_Either_HAI_or_MN_table3_CR <- Qdata_infected_CR %>% 
  filter(!is.na(QuarantineNumber.x) | !is.na(QuarantineNumber.y)) %>%
  group_by(QuarantineNumber) %>%
  summarize(Positive_Either_HAI_or_MN_CR = n_distinct(SubjectID))
print(Pos_Either_HAI_or_MN_table3_CR)
## # A tibble: 1 x 2
##   QuarantineNumber Positive_Either_HAI_or_MN_CR
##              <int>                        <int>
## 1                2                            1
# Add Pos_Either_HAI_or_MN_table3 to the cumulative Qdata_table3
# Don't need the percentage here for final version of table 3 so commenting out the mutate
Qdata_table3_CR <- Qdata_table3_CR %>%
  left_join(Pos_Either_HAI_or_MN_table3_CR, by = c("QuarantineNumber" = "QuarantineNumber")) #%>%
#mutate(Fraction_Positive_Either_HAI_or_MN_of_ExposedCR = Positive_Either_HAI_or_MN_CR/NumberExposedCR)

#### Detailed report on the CR who seroconverted ####
## The above shows that there was a single CR who seroconverted (by both MN and HAI evidence)
## Let's do a detailed summary of the serology and symptoms data associated with this CR: #233

# First let's do the report for serology (we know the PCR is negative for #233)

seroconversion_CR_serology_report <- Qdata %>%
  filter(SubjectID == 233) %>%
  distinct(Microneut_DrawDate, Microneut_VisitType, Sx_Date, SDC_time, .keep_all = TRUE) %>%
  arrange(Microneut_DrawDate) %>%
  select(SubjectID, QuarantineNumber: Microneutralization.Titer.to.A.Wisconsin.67.2005, HAI_dayminus2: HAI_day28_recodeNDA) %>%
  filter(!is.na(Microneut_DrawDate)) %>%
  distinct(Microneut_VisitType, .keep_all = TRUE)
# Need to make one table for the MN data and one table for the HAI data
# MN Serology table for #233
seroconversion_CR_serology_report_MN <- seroconversion_CR_serology_report %>%
  select(SubjectID:Microneutralization.Titer.to.A.Wisconsin.67.2005) %>%
  rename(`Subject ID` = SubjectID,
         `Quarantine Number` = QuarantineNumber,
         `Draw Date` = Microneut_DrawDate,
         `MN Visit` = Microneut_VisitType,
         `MN Titer to A/WI/67/2005` = Microneutralization.Titer.to.A.Wisconsin.67.2005)
# Write out this df for future RMarkdown reporting
# write.csv(seroconversion_CR_serology_report_MN, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Seroconverted_CR_MN_Serology.csv")
# HAI Serology table for #233
seroconversion_CR_serology_report_HAI <- seroconversion_CR_serology_report %>%
  select(SubjectID:QuarantineNumber, HAI_dayminus2, HAI_dayminus2_recodeNDA, HAI_day28) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  rename(`Subject ID` = SubjectID,
         `Quarantine Number` = QuarantineNumber,
         `HAI 2 Days Before Entry to Q` = HAI_dayminus2,
         `HAI Day 28` = HAI_day28,
         `HAI 2 Days Before Entry to Q (Recoded Nondetect)` = HAI_dayminus2_recodeNDA)
# Write out this df for future RMarkdown reporting
# write.csv(seroconversion_CR_serology_report_HAI, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Seroconverted_CR_HAI_Serology.csv")

# Now that we have tables for MN and HAI serology, let's do the report for symptoms for #233

Seroconversion_CR_Symptoms <- Qdata %>%
  filter(SubjectID == 233) %>%
  select(SubjectID:QuarantineNumber, StudyDay:Tympanic.temp..degrees.C.) %>%
  distinct(Sx_Date, SDC_time, .keep_all = TRUE) %>%
  filter(StudyDay <= 10) %>%
  mutate(Respiratory_Total = (runnyNose + stuffyNose + sneezing + soreThroat + cough + SOB)/3) 
# Note that we divided the respiratory symptoms total by 3 to get the average score per day for each resp score then summed by all the respiratory scores.
# Write out this df for future RMarkdown reporting
# write.csv(Seroconversion_CR_Symptoms, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Seroconverted_CR_Symptoms.csv")

Seroconversion_CR_Symptoms_Line <- Seroconversion_CR_Symptoms %>%
  group_by(StudyDay) %>%
  summarise("Total Respiratory Score" = sum(Respiratory_Total))
# Write out this df for future RMarkdown reporting
# write.csv(Seroconversion_CR_Symptoms_Line, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Seroconverted_CR_Symptoms_Line.csv")

# We will not report on temperature because there was no fever for any of the Recipients in the study

# plot (line) the symptoms for #233 who seroconverted by MN and HAI
p <- ggplot(Seroconversion_CR_Symptoms_Line, aes(x = StudyDay, y = `Total Respiratory Score`, group=1)) +
  geom_line() +
  geom_point(shape=21, size=3, fill="white") +
  theme_bw() +
  xlab("Study Day") +
  ylab("Total Respiratory Score")
p

#### Table 3: i1) IR: Number with greater than antic. immunity prior to quarantine by HAI: MN: Both (and % of exposed) ####

# Definition of serosusceptible for this analysis, which will be included in the footnote of table 1 is from Alex Mann from email correspondence on September 28, 2018. He states:
# "An HI titre of ≤10 and/or an MN titre of <80 at baseline was retrospectively taken to indicate susceptibility to infection"
# Thus we will use this criteria to tell who among the inoculated donors was serosusceptible at baseline (entry to quarantine)
# We won't filter these individuals, but we will note who among those who above the MN of 80 (>=80) and HAI of 10 (>10) thresholds seroconverted, since the likelihood of seroconversion among those above the thresholds is lower.
# Based on teleconference with team on October 12, 2018 we will use the term greater than anticipated immunity upon admission to Q

HI_low_susceptibility_table3_footnote_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR" & HAI_dayminus2_recodeNDA >10) %>%
  group_by(QuarantineNumber) %>%
  summarize(low_HI_susceptibility_at_baseline_IR = n_distinct(SubjectID))

MN_low_susceptibility_table3_footnote_IR <- Qdata %>%
  filter(Microneut_VisitType == "Screening") %>%
  filter(Randomization_DorIRorCR == "IR" & Microneutralization.Titer.to.A.Wisconsin.67.2005 >=80) %>%
  group_by(QuarantineNumber) %>%
  summarize(low_MN_susceptibility_at_baseline_IR = n_distinct(SubjectID))

MN_seroconvert_between_screening_baseline_table3_footnote_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR" & Microneut_VisitType == "Q baseline") %>%
  filter(Microneut_Seroconvert == 1) %>%
  group_by(QuarantineNumber)%>%
  summarize(MN_seroconvert_between_screening_baseline_IR = n_distinct(SubjectID))

table3_footnote_IR <- full_join(HI_low_susceptibility_table3_footnote_IR, 
                                MN_low_susceptibility_table3_footnote_IR, 
                                by = c("QuarantineNumber"= "QuarantineNumber")) %>%
  left_join(MN_seroconvert_between_screening_baseline_table3_footnote_IR, 
            by = c("QuarantineNumber"= "QuarantineNumber"))

# Looking into more detail on who exactly might not be serosusceptible or may have seroconverted
# Need to check over the below to ensure that it matches the proper criteria for seroconversion, serosusceptible, serosuitable, and seropositive, etc. 
# Alex Mann has some good comments about this. 

# Which SubjectID's were these with greater than anticipated immunity prior to Q (by HAI, retrospectively)?

HI_low_susceptibility_table3_footnote_SubIDs_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR" & HAI_dayminus2_recodeNDA >10) %>%
  group_by(QuarantineNumber) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber, Randomization_DorIRorCR, HAI_dayminus2, HAI_day28, HAI_dayminus2_recodeNDA, 
         HAI_day28_recodeNDA, HAI_dayminus2_recodeNDA_x4, HAI_Seroconversion, Preliminary.HAI.Classification)

# Which SubjectID's were these with greater than anticipated immunity prior to Q (by MN, retrospectively)?

MN_low_susceptibility_table3_footnote_SubIDs_IR <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR" & Microneut_VisitType == "Screening") %>%
  filter(Microneutralization.Titer.to.A.Wisconsin.67.2005 >=80) %>%
  group_by(QuarantineNumber) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber, Randomization_DorIRorCR, Microneut_DrawDate, Microneut_VisitType, 
         Microneutralization.Titer.to.A.Wisconsin.67.2005, Microneut_Seroconvert) %>%
  rename(Microneut_Seroconvert_Screening = Microneut_Seroconvert)

MN_low_susceptibility_table3_footnote_SubIDs_IR_seroconversion <- Qdata %>%
  filter(Randomization_DorIRorCR == "IR" & Microneut_VisitType == "F/up") %>%
  filter(Microneut_Seroconvert == 1) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, Microneut_Seroconvert) %>%
  rename(Microneut_Seroconvert_Followup = Microneut_Seroconvert) %>%
  right_join(MN_low_susceptibility_table3_footnote_SubIDs_IR)
## Joining, by = "SubjectID"
# There were no IR subjectIDs that seroconverted by MN, and there were no IR subjectIDs that seroconverted after having lower than expected immunity

# Bind the seroconversion prior by HAI and by MN together in a single table (table1 footnote)
Qdata_table3_footnote_subjectIDs_IR <- HI_low_susceptibility_table3_footnote_SubIDs_IR %>%
  full_join(MN_low_susceptibility_table3_footnote_SubIDs_IR_seroconversion)
## Joining, by = c("SubjectID", "QuarantineNumber", "Randomization_DorIRorCR")
Qdata_table3_footnote_subjectIDs_IR$HAIandMNprior <- NA
Qdata_table3_footnote_subjectIDs_IR$HAIonlyprior <- NA
Qdata_table3_footnote_subjectIDs_IR$MNonlyprior <- NA
Qdata_table3_footnote_subjectIDs_IR$Low_Suscept_Converted_Anyways_HAIorMN <- NA
# Shows that there were a total of 5 IR subjectIDs (1 by HAI and 4 by MN) and none of them had evidence of seroconversion

Table3_footnote_SubjectID_summary_IR <- Qdata_table3_footnote_subjectIDs_IR %>%
  mutate(HAIonlyprior = 
           if_else(HAI_dayminus2_recodeNDA > 10 & 
                     (Microneutralization.Titer.to.A.Wisconsin.67.2005 < 80 | is.na(Microneutralization.Titer.to.A.Wisconsin.67.2005)), 1, 0)) %>%
  mutate(MNonlyprior = 
           if_else(Microneutralization.Titer.to.A.Wisconsin.67.2005 >=80 & 
                     (HAI_dayminus2_recodeNDA <= 10 | is.na(HAI_dayminus2_recodeNDA)), 1, 0)) %>%
  mutate(HAIandMNprior = 
           if_else(HAI_dayminus2_recodeNDA > 10 & 
                     Microneutralization.Titer.to.A.Wisconsin.67.2005 >= 80, 1, 0)) %>%
  mutate(Low_Suscept_Converted_Anyways_HAIorMN =
           if_else((HAI_dayminus2_recodeNDA > 10 & (HAI_Seroconversion == 1 | Microneut_Seroconvert_Followup == 1)) |
                     (Microneut_Seroconvert_Screening == 1 & (HAI_Seroconversion == 1 | Microneut_Seroconvert_Followup == 1)), 1, 0)) %>%
  select(QuarantineNumber, SubjectID, HAIonlyprior, MNonlyprior, HAIandMNprior, Low_Suscept_Converted_Anyways_HAIorMN)
# Convert all the NA to 0
Table3_footnote_SubjectID_summary_IR[is.na(Table3_footnote_SubjectID_summary_IR)] <- 0

Table3_IR_Footnote_Reportable_Summary <- Table3_footnote_SubjectID_summary_IR %>%
  group_by(QuarantineNumber) %>%
  summarise(`Greater than Anticipated HAI` = sum(HAIonlyprior),
            `Greater than Anticipated MN` = sum(MNonlyprior),
            `Greater than Anticipated HAI and MN` = sum(HAIandMNprior),
            Seroconverted = sum(Low_Suscept_Converted_Anyways_HAIorMN)) %>%
  rename(`Quarantine #` = QuarantineNumber)

#### Writing out Table 3_IR Footnote to box sync directory ####

# IR footnote information
write.csv(Qdata_table3_footnote_subjectIDs_IR, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_IR_Footnote_Full_Data.csv")
write.csv(table3_footnote_IR, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_IR_Footnote_Summary.csv")
write.csv(Table3_footnote_SubjectID_summary_IR, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_IR_Footnote_Reportable_Summary_SubjectIDs.csv")
write.csv(Table3_IR_Footnote_Reportable_Summary, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_IR_Footnote_Reportable_Summary.csv")

#### Table 3: i2) CR: Number with greater than antic. immunity prior to quarantine by HAI: MN: Both (and % of exposed) ####

# Definition of serosusceptible for this analysis, which will be included in the footnote of table 1 is from Alex Mann from email correspondence on September 28, 2018. He states:
# "An HI titre of ≤10 and/or an MN titre of <80 at baseline was retrospectively taken to indicate susceptibility to infection"
# Thus we will use this criteria to tell who among the inoculated donors was serosusceptible at baseline (entry to quarantine)
# Filter the number who were not serosusceptible at baseline

HI_susceptibility_table3_footnote_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR" & HAI_dayminus2_recodeNDA >10) %>%
  group_by(QuarantineNumber) %>%
  summarize(not_HI_susceptibile_at_baseline_CR = n_distinct(SubjectID))

MN_susceptibility_table3_footnote_CR <- Qdata %>%
  filter(Microneut_VisitType == "Screening") %>%
  filter(Randomization_DorIRorCR == "CR" & Microneutralization.Titer.to.A.Wisconsin.67.2005 >=80) %>%
  group_by(QuarantineNumber) %>%
  summarize(not_MN_susceptible_at_baseline_CR = n_distinct(SubjectID))

MN_seroconvert_between_screening_baseline_table3_footnote_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR" & Microneut_VisitType == "Q baseline") %>%
  filter(Microneut_Seroconvert == 1) %>%
  group_by(QuarantineNumber)%>%
  summarize(MN_seroconvert_between_screening_baseline_CR = n_distinct(SubjectID))

table3_footnote_CR <- left_join(HI_susceptibility_table3_footnote_CR, 
                                MN_susceptibility_table3_footnote_CR, 
                                by = c("QuarantineNumber"= "QuarantineNumber")) %>%
  left_join(MN_seroconvert_between_screening_baseline_table3_footnote_CR, 
            by = c("QuarantineNumber"= "QuarantineNumber"))

# Looking into more detail on who exactly might not be serosusceptible or may have seroconverted
# Need to check over the below to ensure that it matches the proper criteria for seroconversion, serosusceptible, serosuitable, and seropositive, etc. 
# Alex Mann has some good comments about this. 

# Which SubjectID's were these with not serosusceptible prior to Q (by HAI, retrospectively)?

HI_susceptibility_table3_footnote_SubIDs_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR" & HAI_dayminus2_recodeNDA >10) %>%
  group_by(QuarantineNumber) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber, Randomization_DorIRorCR, HAI_dayminus2, HAI_day28, HAI_dayminus2_recodeNDA, 
         HAI_day28_recodeNDA, HAI_dayminus2_recodeNDA_x4, HAI_Seroconversion, Preliminary.HAI.Classification)

# Which SubjectID's were these with not serosusceptible prior to Q (by MN, retrospectively)?

MN_susceptibility_table3_footnote_SubIDs_CR <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR" & Microneut_VisitType == "Screening") %>%
  filter(Microneutralization.Titer.to.A.Wisconsin.67.2005 >=80) %>%
  group_by(QuarantineNumber) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, QuarantineNumber, Randomization_DorIRorCR, Microneut_DrawDate, Microneut_VisitType, 
         Microneutralization.Titer.to.A.Wisconsin.67.2005, Microneut_Seroconvert) %>%
  rename(Microneut_Seroconvert_Screening = Microneut_Seroconvert)

MN_low_susceptibility_table3_footnote_SubIDs_CR_seroconversion <- Qdata %>%
  filter(Randomization_DorIRorCR == "CR" & Microneut_VisitType == "F/up") %>%
  filter(Microneut_Seroconvert == 1) %>%
  distinct(SubjectID, .keep_all = TRUE) %>%
  select(SubjectID, Microneut_Seroconvert) %>%
  rename(Microneut_Seroconvert_Followup = Microneut_Seroconvert) %>%
  right_join(MN_low_susceptibility_table3_footnote_SubIDs_IR)
## Joining, by = "SubjectID"
# There was 1 CR subjectIDs that seroconverted by MN, and there were no CR subjectIDs that seroconverted after having lower than expected immunity

# Bind the seroconversion prior by HAI and by MN together in a single table (table1 footnote)
Qdata_table3_footnote_subjectIDs_CR <- HI_susceptibility_table3_footnote_SubIDs_CR %>%
  full_join(MN_low_susceptibility_table3_footnote_SubIDs_CR_seroconversion)
## Joining, by = c("SubjectID", "QuarantineNumber", "Randomization_DorIRorCR")
Qdata_table3_footnote_subjectIDs_CR$HAIandMNprior <- NA
Qdata_table3_footnote_subjectIDs_CR$HAIonlyprior <- NA
Qdata_table3_footnote_subjectIDs_CR$MNonlyprior <- NA
Qdata_table3_footnote_subjectIDs_CR$LowSuscept_Seroconverted_Anyways <- NA

Table3_footnote_SubjectID_summary_CR <- Qdata_table3_footnote_subjectIDs_CR %>%
  mutate(HAIandMNprior = 
           if_else(HAI_dayminus2_recodeNDA > 10 & Microneutralization.Titer.to.A.Wisconsin.67.2005 >= 80, 1, 0)) %>%
  mutate(HAIonlyprior = 
           if_else(HAI_dayminus2_recodeNDA > 10 & 
                     (Microneutralization.Titer.to.A.Wisconsin.67.2005 < 80 | is.na(Microneutralization.Titer.to.A.Wisconsin.67.2005)), 1, 0)) %>%
  mutate(MNonlyprior = 
           if_else(Microneutralization.Titer.to.A.Wisconsin.67.2005 >=80 & 
                     (HAI_dayminus2_recodeNDA <= 10 | is.na(HAI_dayminus2_recodeNDA)), 1, 0)) %>%
  mutate(LowSuscept_Seroconverted_Anyways = 
           if_else((HAI_dayminus2_recodeNDA > 10 & (HAI_Seroconversion == 1 | Microneut_Seroconvert_Followup == 1)) |
                     (Microneut_Seroconvert_Screening == 1 & (HAI_Seroconversion == 1 | Microneut_Seroconvert_Followup == 1)), 1, 0)) %>%
  select(QuarantineNumber, SubjectID, HAIonlyprior, MNonlyprior, HAIandMNprior, LowSuscept_Seroconverted_Anyways)
# Convert all the NA to 0
Table3_footnote_SubjectID_summary_CR[is.na(Table3_footnote_SubjectID_summary_CR)] <- 0

Table3_CR_Footnote_Reportable_Summary <- Table3_footnote_SubjectID_summary_CR %>%
  group_by(QuarantineNumber) %>%
  summarise(`Greater than Anticipated HAI` = sum(HAIonlyprior),
            `Greater than Anticipated MN` = sum(MNonlyprior),
            `Greater than Anticipated HAI and MN` = sum(HAIandMNprior),
            Seroconverted = sum(LowSuscept_Seroconverted_Anyways)) %>%
  rename(`Quarantine #` = QuarantineNumber)

#### Writing out Table 3_CR Footnote to box sync directory ####
# CR footnote information
write.csv(Qdata_table3_footnote_subjectIDs_CR, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_CR_Footnote_Full_Data.csv")
write.csv(table3_footnote_CR, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_CR_Footnote_Summary.csv")
write.csv(Table3_footnote_SubjectID_summary_CR, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_CR_Footnote_Reportable_Summary_SubjectID.csv")
write.csv(Table3_CR_Footnote_Reportable_Summary, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_CR_Footnote_Reportable_Summary.csv")

#### Column means, sums and rounding for Table 3_IR ####

## IR
# First convert all the NAs to 0.
Qdata_table3_IR[is.na(Qdata_table3_IR)] <- 0 

Table3_IR_manuscript <- Qdata_table3_IR # use this as a backup because it is arduous to recreate Qdata_table1

Table3_IR_manuscript_sums <-  Table3_IR_manuscript %>%
  summarise_all(funs(sum))
Table3_IR_manuscript_sums <- Table3_IR_manuscript %>%
  full_join(Table3_IR_manuscript_sums)
## Joining, by = c("QuarantineNumber", "NumberExposedIR", "NumberInfectedIR", "Fraction_Inf_over_ExpIR", "Number_IR_Symptomatic_V3", "Fraction_IR_Symptomatic_V3_of_ExposedIR", "Number_ILI_V3_IR", "Fraction_ILI_V3_IR_of_ExposedIR", "Number_Febrile_IR", "Fraction_Febrile_over_ExposedIR", "Number_PCR_Infected_IR", "Fraction_PCR_Infected_over_ExposedIR", "Number_PCR_and_Sero_Positive_IR", "Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR", "Number_HAI_Positive_IR", "Number_Microneut_Positive_IR", "Pos_Either_HAI_or_MN_IR")
# This gets all of the columns to be summed
# Now let's work on changing the sums in all of the "percent" columns to the proper fraction 
# (also perform the *100 transformation to percent)
# (also round everything to the nearest whole number)
Table3_IR_manuscript_sums_fractions <- Table3_IR_manuscript_sums %>%
  mutate(Fraction_Inf_over_ExpIR = (NumberInfectedIR/NumberExposedIR)*100,
         Fraction_IR_Symptomatic_V3_of_ExposedIR = (Number_IR_Symptomatic_V3/NumberExposedIR)*100,
         Fraction_ILI_V3_IR_of_ExposedIR = (Number_ILI_V3_IR/NumberExposedIR)*100,
         Fraction_Febrile_over_ExposedIR = (Number_Febrile_IR/NumberExposedIR)*100,
         Fraction_PCR_Infected_over_ExposedIR = (Number_PCR_Infected_IR/NumberExposedIR)*100,
         Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR = (Number_PCR_and_Sero_Positive_IR/NumberExposedIR)*100) %>%
  mutate_all(funs(round(., 0)))

# Now add parentheses to all of these fraction variables because they are presented in manuscript table as percents
Table3_IR_manuscript_sums_fractions$Fraction_Inf_over_ExpIR <- 
  paste0("(", Table3_IR_manuscript_sums_fractions$Fraction_Inf_over_ExpIR, ")")
Table3_IR_manuscript_sums_fractions$Fraction_IR_Symptomatic_V3_of_ExposedIR <- 
  paste0("(", Table3_IR_manuscript_sums_fractions$Fraction_IR_Symptomatic_V3_of_ExposedIR, ")")
Table3_IR_manuscript_sums_fractions$Fraction_ILI_V3_IR_of_ExposedIR <- 
  paste0("(", Table3_IR_manuscript_sums_fractions$Fraction_ILI_V3_IR_of_ExposedIR, ")")
Table3_IR_manuscript_sums_fractions$Fraction_Febrile_over_ExposedIR <- 
  paste0("(", Table3_IR_manuscript_sums_fractions$Fraction_Febrile_over_ExposedIR, ")")
Table3_IR_manuscript_sums_fractions$Fraction_PCR_Infected_over_ExposedIR <- 
  paste0("(", Table3_IR_manuscript_sums_fractions$Fraction_PCR_Infected_over_ExposedIR, ")")
Table3_IR_manuscript_sums_fractions$Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR <- 
  paste0("(", Table3_IR_manuscript_sums_fractions$Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR, ")")

# Now bring columns together into more publishable arrangement of data in the display of the table

Table3_IR_manuscript_unite <- Table3_IR_manuscript_sums_fractions %>%
  unite(`Infected/Exposed`, NumberInfectedIR, NumberExposedIR, sep = "/", remove = TRUE) %>%
  unite(`Infected/Exposed (%)`, `Infected/Exposed`, Fraction_Inf_over_ExpIR, sep = " ", remove = TRUE) %>%
  unite(Symptomatic, Number_IR_Symptomatic_V3, Fraction_IR_Symptomatic_V3_of_ExposedIR, sep = " ", remove = TRUE) %>%
  unite(ILI, Number_ILI_V3_IR, Fraction_ILI_V3_IR_of_ExposedIR, sep = " ", remove = TRUE) %>%
  unite(Febrile, Number_Febrile_IR, Fraction_Febrile_over_ExposedIR, sep = " ", remove = TRUE) %>%
  unite(`PCR Confirmed Infection`, Number_PCR_Infected_IR, Fraction_PCR_Infected_over_ExposedIR, sep = " ", remove = TRUE) %>%
  unite(`PCR Confirmed Infection and Seroconversion`, Number_PCR_and_Sero_Positive_IR, Fraction_Inf_PCR_and_Sero_Positive_of_ExposedIR, sep = " ", remove = TRUE) %>%
  unite(`Seroconversion by HAI : MN : Either`, Number_HAI_Positive_IR, Number_Microneut_Positive_IR, Pos_Either_HAI_or_MN_IR, sep = " : ", remove = TRUE) %>%
  rename('Quarantine #' = QuarantineNumber)
Table3_IR_manuscript_unite <- Table3_IR_manuscript_unite[,c(1:3,5,4,6:8)]

# Change the 4th row of the Quarantine # column to "Total"
Table3_IR_manuscript_unite$`Quarantine #`[4] <- "Total"

# Add a column that indicates in each row the recipient classification (Intervention (IR))
# We need to do this in order to keep the data organized when we merge with the CR group to make 1 table 3
Table3_IR_manuscript_unite$`Recipient Classification` <- "Intervention (IR)"
Table3_IR_manuscript_unite <- Table3_IR_manuscript_unite[,c(1,9,2:8)]

#### Writing out Table 3_IR to box sync directory ####

write.csv(Table3_IR_manuscript_unite, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_IR_Manuscript.csv")

#### Writing out Table 3_IR to latex for direct translation of code to table image for paper

kable(Table3_IR_manuscript_unite) %>%
  kable_styling("striped") %>%
  add_header_above(c(" " = 3, "Laboratory Confirmed Infection & Illness (% of Infected)" = 3, 
                     "Laboratory Confirmed Infection Criteria (% of Infected)" = 3))
Laboratory Confirmed Infection & Illness (% of Infected)
Laboratory Confirmed Infection Criteria (% of Infected)
Quarantine # Recipient Classification Infected/Exposed (%) Symptomatic Febrile ILI PCR Confirmed Infection PCR Confirmed Infection and Seroconversion Seroconversion by HAI : MN : Either
1 Intervention (IR) 0/10 (0) 2 (20) 0 (0) 1 (10) 0 (0) 0 (0) 0 : 0 : 0
2 Intervention (IR) 0/10 (0) 3 (30) 0 (0) 2 (20) 0 (0) 0 (0) 0 : 0 : 0
3 Intervention (IR) 0/20 (0) 6 (30) 0 (0) 2 (10) 0 (0) 0 (0) 0 : 0 : 0
Total Intervention (IR) 0/40 (0) 11 (28) 0 (0) 5 (12) 0 (0) 0 (0) 0 : 0 : 0
datatable(Table3_IR_manuscript_unite,
          filter = 'top', options = list(
            pageLength = 10, autoWidth = TRUE
          ))
print(xtable(Table3_IR_manuscript_unite),
      comment = FALSE)
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlllllllll}
##   \hline
##  & Quarantine \# & Recipient Classification & Infected/Exposed (\%) & Symptomatic & Febrile & ILI & PCR Confirmed Infection & PCR Confirmed Infection and Seroconversion & Seroconversion by HAI : MN : Either \\ 
##   \hline
## 1 & 1 & Intervention (IR) & 0/10 (0) & 2 (20) & 0 (0) & 1 (10) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   2 & 2 & Intervention (IR) & 0/10 (0) & 3 (30) & 0 (0) & 2 (20) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   3 & 3 & Intervention (IR) & 0/20 (0) & 6 (30) & 0 (0) & 2 (10) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   4 & Total & Intervention (IR) & 0/40 (0) & 11 (28) & 0 (0) & 5 (12) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##    \hline
## \end{tabular}
## \end{table}
#### Column means, sums and rounding for Table 3_CR ####

## CR
# First convert all the NAs to 0.
Qdata_table3_CR[is.na(Qdata_table3_CR)] <- 0 

table3_CR_manuscript <- Qdata_table3_CR # use this as a backup because it is arduous to recreate Qdata_table1

table3_CR_manuscript_sums <-  table3_CR_manuscript %>%
  summarise_all(funs(sum))
table3_CR_manuscript_sums <- table3_CR_manuscript %>%
  full_join(table3_CR_manuscript_sums)
## Joining, by = c("QuarantineNumber", "NumberExposedCR", "NumberInfectedCR", "Fraction_Inf_over_ExpCR", "Number_Symptomatic_V3_CR", "Fraction_Symptomatic_V3_CR_of_ExposedCR", "Number_ILI_CR_V3", "Fraction_ILI_V3_CR_of_ExposedCR", "Number_Febrile_CR", "Fraction_Febrile_over_ExpCR", "Number_PCR_Infected_CR", "Fraction_PCR_Inf_over_ExpCR", "Number_PCR_and_Sero_Positive_CR", "Fraction_Inf_PCR_and_Sero_Positive_CR", "Number_HAI_Positive_CR", "Number_Microneut_Positive_CR", "Positive_Either_HAI_or_MN_CR")
# This gets all of the columns to be summed
# Now let's work on changing the sums in all of the "percent" columns to the proper fraction 
# (also perform the *100 transformation to percent)
# (also round everything to the nearest whole number)
table3_CR_manuscript_sums_fractions <- table3_CR_manuscript_sums %>%
  mutate(Fraction_Inf_over_ExpCR = (NumberInfectedCR/NumberExposedCR)*100,
         Fraction_Symptomatic_V3_CR_of_ExposedCR = (Number_Symptomatic_V3_CR/NumberExposedCR)*100,
         Fraction_ILI_V3_CR_of_ExposedCR = (Number_ILI_CR_V3/NumberExposedCR)*100,
         Fraction_Febrile_over_ExpCR = (Number_Febrile_CR/NumberExposedCR)*100,
         Fraction_PCR_Inf_over_ExpCR = (Number_PCR_Infected_CR/NumberExposedCR)*100,
         Fraction_Inf_PCR_and_Sero_Positive_CR = (Number_PCR_and_Sero_Positive_CR/NumberExposedCR)*100) %>%
  mutate_all(funs(round(., 0)))

# Now add parentheses to all of these fraction variables because they are presented in manuscript table as percents
table3_CR_manuscript_sums_fractions$Fraction_Inf_over_ExpCR <- 
  paste0("(", table3_CR_manuscript_sums_fractions$Fraction_Inf_over_ExpCR, ")")
table3_CR_manuscript_sums_fractions$Fraction_Symptomatic_V3_CR_of_ExposedCR <- 
  paste0("(", table3_CR_manuscript_sums_fractions$Fraction_Symptomatic_V3_CR_of_ExposedCR, ")")
table3_CR_manuscript_sums_fractions$Fraction_ILI_V3_CR_of_ExposedCR <- 
  paste0("(", table3_CR_manuscript_sums_fractions$Fraction_ILI_V3_CR_of_ExposedCR, ")")
table3_CR_manuscript_sums_fractions$Fraction_Febrile_over_ExpCR <- 
  paste0("(", table3_CR_manuscript_sums_fractions$Fraction_Febrile_over_ExpCR, ")")
table3_CR_manuscript_sums_fractions$Fraction_PCR_Inf_over_ExpCR <- 
  paste0("(", table3_CR_manuscript_sums_fractions$Fraction_PCR_Inf_over_ExpCR, ")")
table3_CR_manuscript_sums_fractions$Fraction_Inf_PCR_and_Sero_Positive_CR <- 
  paste0("(", table3_CR_manuscript_sums_fractions$Fraction_Inf_PCR_and_Sero_Positive_CR, ")")

# Now bring columns together into more publishable arrangement of data in the display of the table

table3_CR_manuscript_unite <- table3_CR_manuscript_sums_fractions %>%
  unite(`Infected/Exposed`, NumberInfectedCR, NumberExposedCR, sep = "/", remove = TRUE) %>%
  unite(`Infected/Exposed (%)`, `Infected/Exposed`, Fraction_Inf_over_ExpCR, sep = " ", remove = TRUE) %>%
  unite(Symptomatic, Number_Symptomatic_V3_CR, Fraction_Symptomatic_V3_CR_of_ExposedCR, sep = " ", remove = TRUE) %>%
  unite(ILI, Number_ILI_CR_V3, Fraction_ILI_V3_CR_of_ExposedCR, sep = " ", remove = TRUE) %>%
  unite(Febrile, Number_Febrile_CR, Fraction_Febrile_over_ExpCR, sep = " ", remove = TRUE) %>%
  unite(`PCR Confirmed Infection`, Number_PCR_Infected_CR, Fraction_PCR_Inf_over_ExpCR, sep = " ", remove = TRUE) %>%
  unite(`PCR Confirmed Infection and Seroconversion`, Number_PCR_and_Sero_Positive_CR, Fraction_Inf_PCR_and_Sero_Positive_CR, sep = " ", remove = TRUE) %>%
  unite(`Seroconversion by HAI : MN : Either`, Number_HAI_Positive_CR, Number_Microneut_Positive_CR, Positive_Either_HAI_or_MN_CR, sep = " : ", remove = TRUE) %>%
  rename('Quarantine #' = QuarantineNumber)
table3_CR_manuscript_unite <- table3_CR_manuscript_unite[,c(1:3,5,4,6:8)]

# Change the 4th row of the Quarantine # column to "Total"
table3_CR_manuscript_unite$`Quarantine #`[4] <- "Total"

# Add a column that indicates in each row the recipient classification (Control (CR))
# We need to do this in order to keep the data organized when we merge with the CR group to make 1 table 3
table3_CR_manuscript_unite$`Recipient Classification` <- "Control (CR)"
table3_CR_manuscript_unite <- table3_CR_manuscript_unite[,c(1,9,2:8)]

#### Writing out Table 3_CR to box sync directory ####

write.csv(table3_CR_manuscript_unite, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_CR_Manuscript.csv")

#### Writing out Table 3_CR to latex for direct translation of code to table image for paper

kable(table3_CR_manuscript_unite) %>%
  kable_styling("striped") %>%
  add_header_above(c(" " = 3, "Laboratory Confirmed Infection & Illness (% of Infected)" = 3, 
                     "Laboratory Confirmed Infection Criteria (% of Infected)" = 3))
Laboratory Confirmed Infection & Illness (% of Infected)
Laboratory Confirmed Infection Criteria (% of Infected)
Quarantine # Recipient Classification Infected/Exposed (%) Symptomatic Febrile ILI PCR Confirmed Infection PCR Confirmed Infection and Seroconversion Seroconversion by HAI : MN : Either
1 Control (CR) 0/11 (0) 4 (36) 0 (0) 3 (27) 0 (0) 0 (0) 0 : 0 : 0
2 Control (CR) 3/9 (33) 2 (22) 0 (0) 2 (22) 2 (22) 0 (0) 1 : 1 : 1
3 Control (CR) 0/15 (0) 6 (40) 0 (0) 4 (27) 0 (0) 0 (0) 0 : 0 : 0
Total Control (CR) 3/35 (9) 12 (34) 0 (0) 9 (26) 2 (6) 0 (0) 1 : 1 : 1
datatable(table3_CR_manuscript_unite,
          filter = 'top', options = list(
            pageLength = 10, autoWidth = TRUE
          ))
print(xtable(table3_CR_manuscript_unite),
      comment = FALSE)
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlllllllll}
##   \hline
##  & Quarantine \# & Recipient Classification & Infected/Exposed (\%) & Symptomatic & Febrile & ILI & PCR Confirmed Infection & PCR Confirmed Infection and Seroconversion & Seroconversion by HAI : MN : Either \\ 
##   \hline
## 1 & 1 & Control (CR) & 0/11 (0) & 4 (36) & 0 (0) & 3 (27) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   2 & 2 & Control (CR) & 3/9 (33) & 2 (22) & 0 (0) & 2 (22) & 2 (22) & 0 (0) & 1 : 1 : 1 \\ 
##   3 & 3 & Control (CR) & 0/15 (0) & 6 (40) & 0 (0) & 4 (27) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   4 & Total & Control (CR) & 3/35 (9) & 12 (34) & 0 (0) & 9 (26) & 2 (6) & 0 (0) & 1 : 1 : 1 \\ 
##    \hline
## \end{tabular}
## \end{table}
#### Building a cumulative Table 3 and writing out to box sync directory and for translation to manuscript ####

table3 <- full_join(Table3_IR_manuscript_unite, table3_CR_manuscript_unite) %>%
  arrange(`Quarantine #`, `Recipient Classification`)
## Joining, by = c("Quarantine #", "Recipient Classification", "Infected/Exposed (%)", "Symptomatic", "Febrile", "ILI", "PCR Confirmed Infection", "PCR Confirmed Infection and Seroconversion", "Seroconversion by HAI : MN : Either")
kable(table3) %>%
  kable_styling("striped") %>%
  add_header_above(c(" " = 3, "Laboratory Confirmed Infection & Illness (% of Infected)" = 3, 
                     "Laboratory Confirmed Infection Criteria (% of Infected)" = 3))%>%
  kable_styling(full_width = F) %>%
  column_spec(1, bold = T) %>%
  collapse_rows(columns = 1, valign = "top")
Laboratory Confirmed Infection & Illness (% of Infected)
Laboratory Confirmed Infection Criteria (% of Infected)
Quarantine # Recipient Classification Infected/Exposed (%) Symptomatic Febrile ILI PCR Confirmed Infection PCR Confirmed Infection and Seroconversion Seroconversion by HAI : MN : Either
1 Control (CR) 0/11 (0) 4 (36) 0 (0) 3 (27) 0 (0) 0 (0) 0 : 0 : 0
Intervention (IR) 0/10 (0) 2 (20) 0 (0) 1 (10) 0 (0) 0 (0) 0 : 0 : 0
2 Control (CR) 3/9 (33) 2 (22) 0 (0) 2 (22) 2 (22) 0 (0) 1 : 1 : 1
Intervention (IR) 0/10 (0) 3 (30) 0 (0) 2 (20) 0 (0) 0 (0) 0 : 0 : 0
3 Control (CR) 0/15 (0) 6 (40) 0 (0) 4 (27) 0 (0) 0 (0) 0 : 0 : 0
Intervention (IR) 0/20 (0) 6 (30) 0 (0) 2 (10) 0 (0) 0 (0) 0 : 0 : 0
Total Control (CR) 3/35 (9) 12 (34) 0 (0) 9 (26) 2 (6) 0 (0) 1 : 1 : 1
Intervention (IR) 0/40 (0) 11 (28) 0 (0) 5 (12) 0 (0) 0 (0) 0 : 0 : 0
datatable(table3,
          filter = 'top', options = list(
            pageLength = 10, autoWidth = TRUE
          ))
print(xtable(table3),
      comment = FALSE)
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlllllllll}
##   \hline
##  & Quarantine \# & Recipient Classification & Infected/Exposed (\%) & Symptomatic & Febrile & ILI & PCR Confirmed Infection & PCR Confirmed Infection and Seroconversion & Seroconversion by HAI : MN : Either \\ 
##   \hline
## 1 & 1 & Control (CR) & 0/11 (0) & 4 (36) & 0 (0) & 3 (27) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   2 & 1 & Intervention (IR) & 0/10 (0) & 2 (20) & 0 (0) & 1 (10) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   3 & 2 & Control (CR) & 3/9 (33) & 2 (22) & 0 (0) & 2 (22) & 2 (22) & 0 (0) & 1 : 1 : 1 \\ 
##   4 & 2 & Intervention (IR) & 0/10 (0) & 3 (30) & 0 (0) & 2 (20) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   5 & 3 & Control (CR) & 0/15 (0) & 6 (40) & 0 (0) & 4 (27) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   6 & 3 & Intervention (IR) & 0/20 (0) & 6 (30) & 0 (0) & 2 (10) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##   7 & Total & Control (CR) & 3/35 (9) & 12 (34) & 0 (0) & 9 (26) & 2 (6) & 0 (0) & 1 : 1 : 1 \\ 
##   8 & Total & Intervention (IR) & 0/40 (0) & 11 (28) & 0 (0) & 5 (12) & 0 (0) & 0 (0) & 0 : 0 : 0 \\ 
##    \hline
## \end{tabular}
## \end{table}
# Table3
write.csv(table3, "/Users/jbueno/Box Sync/EMIT/EMIT_Data_Analysis_Jake/EMIT_Quarantine/Analysis Results/SI_POC_Criteria_Table3_Manuscript.csv")