BMJ_paper_figure_3

###--READ HERE FIRST--###
#This is the code from this paper: O'Sullivan et al Temporal trends in use of tests in UK primary care, 2000-15: 
# cont. retrospective analysis of 250 million tests BMJ 2018; 363 doi: https://doi.org/10.1136/bmj.k4666
# Specifically, this is the code used for figure 3 and the associated statistical analysis

################----------------------------------------------------------------------------------------################
################---------------------------Histograms for Total test use--------------------------------################
################----------------------------------------------------------------------------------------################

setwd("H:/DPhil/Chapters/5. CPRD Factors associated with appropriate and inappropriate UK testing/Post ISAC approval/Data_Final_Oct/Total tests/NEW Dec 2017")

tests_per_year <- readRDS("total_tests_combined_dec17.rds")

##################################################################################################-------Year: 2000-------##################################################################################################
###--Restrict dF to just year 2000--###
tests_per_year_2000 <- tests_per_year[,1:2]

###--Stratify tests into 1st, 2nd,3rd etc test for year--### 
library(dplyr)

hist_2000 <- tests_per_year_2000 %>%
  group_by(tot_tests_2000) %>%
  summarise(total = n())

###--Exclude patients that got zero tests--###
hist_2000_1 <- hist_2000[2:nrow(hist_2000),] #Delete patients that got 0 tests

###--Generate total number of tests for 2000--###
sum_2000 <- sum(hist_2000_1$total)

####------Create percentages for each test-------#####
hist_2000_1$percentage <- (hist_2000_1$total/sum_2000)*100
hist_2000_1 <- as.data.frame(hist_2000_1)

#Restrict to 100 
hist_2000_1 <- hist_2000_1[1:100,]

##################################################################################################-------Year: 2004/5-------##################################################################################################

setwd("H:/DPhil/Chapters/5. CPRD Factors associated with appropriate and inappropriate UK testing/Post ISAC approval/Data_Final_Oct/Total tests/NEW Dec 2017")

tests_per_year <- readRDS("total_tests_combined_dec17.rds")

###--Restrict dF to just year 2004--###
tests <- tests_per_year[,c(1,6)]

###--Stratify tests into 1st, 2nd,3rd etc test for year--### 
library(dplyr)

hist_2004 <- tests %>%
  group_by(tot_tests_2004) %>%
  summarise(total = n())

###--Exclude patients that got zero tests--###
hist_2004 <- hist_2004[2:nrow(hist_2004),] #Delete patients that got 0 tests

###--Generate total number of tests for 2004--###
sum_2004 <- sum(hist_2004$total)

####------Create percentages for each test-------#####
hist_2004$percentage <- (hist_2004$total/sum_2004)*100
hist_2004 <- as.data.frame(hist_2004)

#Restrict to 100 
hist_2004 <- hist_2004[1:100,]

##################################################################################################-------Year: 2015-------##################################################################################################
setwd("H:/DPhil/Chapters/5. CPRD Factors associated with appropriate and inappropriate UK testing/Post ISAC approval/Data_Final_Oct/Total tests/NEW Dec 2017")

tests_per_year <- readRDS("total_tests_combined_dec17.rds")

###--Restrict dF to just year 2015--###
tests <- tests_per_year[,c(1,17)]

###--Stratify tests into 1st, 2nd,3rd etc test for year--### 
library(dplyr)

hist <- tests %>%
  group_by(tot_tests_2015) %>%
  summarise(total = n())

###--Exclude patients that got zero tests--###
hist_2015 <- hist[2:nrow(hist),] #Delete patients that got 0 tests

###--Generate total number of tests for 2015--###
sum_2015 <- sum(hist_2015$total)

####------Create percentages for each test-------#####
hist_2015$percentage <- (hist_2015$total/sum_2015)*100
hist_2015 <- as.data.frame(hist_2015)

#Restrict to 100
hist_2015 <- hist_2015[1:100,]


##################################################################################################-------Plotting-------##################################################################################################
library(reshape2)

data <- cbind(hist_2000_1, hist_2004, hist_2015)
data <- data[,c(1,3,6,9)]
colnames(data) <- c("no_tests", "2000", "2004", "2015")
data_m <- melt(data, id.vars = "no_tests")

#Line and graph lot
library(ggplot2)
#X axis normal
data_m$no_tests <- factor(data_m$no_tests, levels = unique(data_m$no_tests)) #Makes all x axis labels show
ggplot(data_m, aes(x = no_tests, y = value, group = variable, colour = variable)) + geom_line() + geom_point() + 
  labs(x="Annual number of tests per patient", y = "Percentage of all tests (%)") + 
  ggtitle("Percentage of all tests ordered \nby annual number of tests received per patient") + theme(plot.title = element_text(hjust = 0.5)) + labs(colour = "Year")

#Logged x axis
data_m$no_tests <- as.numeric(data_m$no_tests)
ggplot(data_m, aes(x = no_tests, y = value, group = variable, colour = variable)) + geom_line() + geom_point() + 
  labs(x="Annual number of tests per patient (logged)", y = "Percentage of all tests (%)") + 
  ggtitle("Percentage of all tests ordered \nby annual number of tests received per patient") + theme(plot.title = element_text(hjust = 0.5)) + labs(colour = "Year") +
  scale_x_log10()

#Save as DPI 300
setwd("H:/DPhil/Chapters/5. CPRD Factors associated with appropriate and inappropriate UK testing/Post ISAC approval/Paper 1 Temporal Total + specific test use/Lancet_Submission/Figures")
tiff("Figure_3.tiff", height = 15, width = 15, units = "cm", res = 300)
ggplot(data_m, aes(x = no_tests, y = value, group = variable, colour = variable)) + geom_line() + geom_point() + 
  labs(x="Annual number of tests per patient (logged)", y = "Percentage of all tests (%)") + 
  theme(plot.title = element_text(hjust = 0.5)) + labs(colour = "Year") +
  scale_x_log10()
dev.off()


##################################################################################################-------Chi square test to compare proportions-------##################################################################################################
#Create percentage with >10 tests for each year
greater_than_10_2000 <- sum(hist_2000_1[11:100,2])
greater_than_10_2000_p <- greater_than_10_2000/sum_2000 #10% of tests were on patients than recieved greater than 10 tests

greater_than_10_2004 <- sum(hist_2004[11:100,2])
greater_than_10_2004_p <- greater_than_10_2004/sum_2004 #21% of tests were on patients than recieved greater than 10 tests

greater_than_10_2015 <- sum(hist_2015[11:100,2])
greater_than_10_2015_p <- greater_than_10_2015/sum_2015 #33% 

#Chi squared tests
prop.test(c(greater_than_10_2000, greater_than_10_2015), c(sum_2000, sum_2015), correct = FALSE)
prop.test(c(greater_than_10_2004, greater_than_10_2015), c(sum_2004, sum_2015), correct = FALSE)


#See 'Histograms_Dec_2017_update' R script for data that made up Table 5
####Creating dataframe for chi-squared test#### Formula is prop.test(c(numerator 2000, numerator 2015), c(denominator 2000, denominator 2015))
#####-----Patients that received 1-10 tests-------#####
#2000 vs. 2015
chi_sqrd_1 <- prop.test(c(hist_2000_1_10[1,2], hist15[1,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_2 <- prop.test(c(hist_2000_1_10[2,2], hist15[2,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_3 <- prop.test(c(hist_2000_1_10[3,2], hist15[3,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_4 <- prop.test(c(hist_2000_1_10[4,2], hist15[4,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_5 <- prop.test(c(hist_2000_1_10[5,2], hist15[5,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_6 <- prop.test(c(hist_2000_1_10[6,2], hist15[6,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_7 <- prop.test(c(hist_2000_1_10[7,2], hist15[7,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_8 <- prop.test(c(hist_2000_1_10[8,2], hist15[8,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_9 <- prop.test(c(hist_2000_1_10[9,2], hist15[9,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_10 <- prop.test(c(hist_2000_1_10[10,2], hist15[10,2]),c(sum_2000, sum_2015), correct = FALSE)

#2004 vs. 2015
chi_sqrd_1_1 <- prop.test(c(hist04[1,2], hist15[1,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_2_2 <- prop.test(c(hist04[2,2], hist15[2,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_3_3 <- prop.test(c(hist04[3,2], hist15[3,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_4_4 <- prop.test(c(hist04[4,2], hist15[4,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_5_5 <- prop.test(c(hist04[5,2], hist15[5,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_6_6 <- prop.test(c(hist04[6,2], hist15[6,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_7_7 <- prop.test(c(hist04[7,2], hist15[7,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_8_8 <- prop.test(c(hist04[8,2], hist15[8,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_9_9 <- prop.test(c(hist04[9,2], hist15[9,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_10_10 <- prop.test(c(hist04[10,2], hist15[10,2]),c(sum_2004, sum_2015), correct = FALSE)