-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathBMJ_paper_figure_3
170 lines (127 loc) · 8.97 KB
/
BMJ_paper_figure_3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
###--READ HERE FIRST--###
#This is the code from this paper: O'Sullivan et al Temporal trends in use of tests in UK primary care, 2000-15:
# cont. retrospective analysis of 250 million tests BMJ 2018; 363 doi: https://doi.org/10.1136/bmj.k4666
# Specifically, this is the code used for figure 3 and the associated statistical analysis
################----------------------------------------------------------------------------------------################
################---------------------------Histograms for Total test use--------------------------------################
################----------------------------------------------------------------------------------------################
setwd("H:/DPhil/Chapters/5. CPRD Factors associated with appropriate and inappropriate UK testing/Post ISAC approval/Data_Final_Oct/Total tests/NEW Dec 2017")
tests_per_year <- readRDS("total_tests_combined_dec17.rds")
##################################################################################################-------Year: 2000-------##################################################################################################
###--Restrict dF to just year 2000--###
tests_per_year_2000 <- tests_per_year[,1:2]
###--Stratify tests into 1st, 2nd,3rd etc test for year--###
library(dplyr)
hist_2000 <- tests_per_year_2000 %>%
group_by(tot_tests_2000) %>%
summarise(total = n())
###--Exclude patients that got zero tests--###
hist_2000_1 <- hist_2000[2:nrow(hist_2000),] #Delete patients that got 0 tests
###--Generate total number of tests for 2000--###
sum_2000 <- sum(hist_2000_1$total)
####------Create percentages for each test-------#####
hist_2000_1$percentage <- (hist_2000_1$total/sum_2000)*100
hist_2000_1 <- as.data.frame(hist_2000_1)
#Restrict to 100
hist_2000_1 <- hist_2000_1[1:100,]
##################################################################################################-------Year: 2004/5-------##################################################################################################
setwd("H:/DPhil/Chapters/5. CPRD Factors associated with appropriate and inappropriate UK testing/Post ISAC approval/Data_Final_Oct/Total tests/NEW Dec 2017")
tests_per_year <- readRDS("total_tests_combined_dec17.rds")
###--Restrict dF to just year 2004--###
tests <- tests_per_year[,c(1,6)]
###--Stratify tests into 1st, 2nd,3rd etc test for year--###
library(dplyr)
hist_2004 <- tests %>%
group_by(tot_tests_2004) %>%
summarise(total = n())
###--Exclude patients that got zero tests--###
hist_2004 <- hist_2004[2:nrow(hist_2004),] #Delete patients that got 0 tests
###--Generate total number of tests for 2004--###
sum_2004 <- sum(hist_2004$total)
####------Create percentages for each test-------#####
hist_2004$percentage <- (hist_2004$total/sum_2004)*100
hist_2004 <- as.data.frame(hist_2004)
#Restrict to 100
hist_2004 <- hist_2004[1:100,]
##################################################################################################-------Year: 2015-------##################################################################################################
setwd("H:/DPhil/Chapters/5. CPRD Factors associated with appropriate and inappropriate UK testing/Post ISAC approval/Data_Final_Oct/Total tests/NEW Dec 2017")
tests_per_year <- readRDS("total_tests_combined_dec17.rds")
###--Restrict dF to just year 2015--###
tests <- tests_per_year[,c(1,17)]
###--Stratify tests into 1st, 2nd,3rd etc test for year--###
library(dplyr)
hist <- tests %>%
group_by(tot_tests_2015) %>%
summarise(total = n())
###--Exclude patients that got zero tests--###
hist_2015 <- hist[2:nrow(hist),] #Delete patients that got 0 tests
###--Generate total number of tests for 2015--###
sum_2015 <- sum(hist_2015$total)
####------Create percentages for each test-------#####
hist_2015$percentage <- (hist_2015$total/sum_2015)*100
hist_2015 <- as.data.frame(hist_2015)
#Restrict to 100
hist_2015 <- hist_2015[1:100,]
##################################################################################################-------Plotting-------##################################################################################################
library(reshape2)
data <- cbind(hist_2000_1, hist_2004, hist_2015)
data <- data[,c(1,3,6,9)]
colnames(data) <- c("no_tests", "2000", "2004", "2015")
data_m <- melt(data, id.vars = "no_tests")
#Line and graph lot
library(ggplot2)
#X axis normal
data_m$no_tests <- factor(data_m$no_tests, levels = unique(data_m$no_tests)) #Makes all x axis labels show
ggplot(data_m, aes(x = no_tests, y = value, group = variable, colour = variable)) + geom_line() + geom_point() +
labs(x="Annual number of tests per patient", y = "Percentage of all tests (%)") +
ggtitle("Percentage of all tests ordered \nby annual number of tests received per patient") + theme(plot.title = element_text(hjust = 0.5)) + labs(colour = "Year")
#Logged x axis
data_m$no_tests <- as.numeric(data_m$no_tests)
ggplot(data_m, aes(x = no_tests, y = value, group = variable, colour = variable)) + geom_line() + geom_point() +
labs(x="Annual number of tests per patient (logged)", y = "Percentage of all tests (%)") +
ggtitle("Percentage of all tests ordered \nby annual number of tests received per patient") + theme(plot.title = element_text(hjust = 0.5)) + labs(colour = "Year") +
scale_x_log10()
#Save as DPI 300
setwd("H:/DPhil/Chapters/5. CPRD Factors associated with appropriate and inappropriate UK testing/Post ISAC approval/Paper 1 Temporal Total + specific test use/Lancet_Submission/Figures")
tiff("Figure_3.tiff", height = 15, width = 15, units = "cm", res = 300)
ggplot(data_m, aes(x = no_tests, y = value, group = variable, colour = variable)) + geom_line() + geom_point() +
labs(x="Annual number of tests per patient (logged)", y = "Percentage of all tests (%)") +
theme(plot.title = element_text(hjust = 0.5)) + labs(colour = "Year") +
scale_x_log10()
dev.off()
##################################################################################################-------Chi square test to compare proportions-------##################################################################################################
#Create percentage with >10 tests for each year
greater_than_10_2000 <- sum(hist_2000_1[11:100,2])
greater_than_10_2000_p <- greater_than_10_2000/sum_2000 #10% of tests were on patients than recieved greater than 10 tests
greater_than_10_2004 <- sum(hist_2004[11:100,2])
greater_than_10_2004_p <- greater_than_10_2004/sum_2004 #21% of tests were on patients than recieved greater than 10 tests
greater_than_10_2015 <- sum(hist_2015[11:100,2])
greater_than_10_2015_p <- greater_than_10_2015/sum_2015 #33%
#Chi squared tests
prop.test(c(greater_than_10_2000, greater_than_10_2015), c(sum_2000, sum_2015), correct = FALSE)
prop.test(c(greater_than_10_2004, greater_than_10_2015), c(sum_2004, sum_2015), correct = FALSE)
#See 'Histograms_Dec_2017_update' R script for data that made up Table 5
####Creating dataframe for chi-squared test#### Formula is prop.test(c(numerator 2000, numerator 2015), c(denominator 2000, denominator 2015))
#####-----Patients that received 1-10 tests-------#####
#2000 vs. 2015
chi_sqrd_1 <- prop.test(c(hist_2000_1_10[1,2], hist15[1,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_2 <- prop.test(c(hist_2000_1_10[2,2], hist15[2,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_3 <- prop.test(c(hist_2000_1_10[3,2], hist15[3,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_4 <- prop.test(c(hist_2000_1_10[4,2], hist15[4,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_5 <- prop.test(c(hist_2000_1_10[5,2], hist15[5,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_6 <- prop.test(c(hist_2000_1_10[6,2], hist15[6,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_7 <- prop.test(c(hist_2000_1_10[7,2], hist15[7,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_8 <- prop.test(c(hist_2000_1_10[8,2], hist15[8,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_9 <- prop.test(c(hist_2000_1_10[9,2], hist15[9,2]),c(sum_2000, sum_2015), correct = FALSE)
chi_sqrd_10 <- prop.test(c(hist_2000_1_10[10,2], hist15[10,2]),c(sum_2000, sum_2015), correct = FALSE)
#2004 vs. 2015
chi_sqrd_1_1 <- prop.test(c(hist04[1,2], hist15[1,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_2_2 <- prop.test(c(hist04[2,2], hist15[2,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_3_3 <- prop.test(c(hist04[3,2], hist15[3,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_4_4 <- prop.test(c(hist04[4,2], hist15[4,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_5_5 <- prop.test(c(hist04[5,2], hist15[5,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_6_6 <- prop.test(c(hist04[6,2], hist15[6,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_7_7 <- prop.test(c(hist04[7,2], hist15[7,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_8_8 <- prop.test(c(hist04[8,2], hist15[8,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_9_9 <- prop.test(c(hist04[9,2], hist15[9,2]),c(sum_2004, sum_2015), correct = FALSE)
chi_sqrd_10_10 <- prop.test(c(hist04[10,2], hist15[10,2]),c(sum_2004, sum_2015), correct = FALSE)