-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path020compare.R
97 lines (66 loc) · 3.16 KB
/
020compare.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Compare daily multimedia audit logs between EMu & filer01
# Import EMu log ####
# select only edits of Main OR Supplementary multimedia
emu2 <- unique(emu2[grepl("^Multimedia|^Supplementary_tab", emu2$AudNewValue), -1])
emu2 <- spread(emu2, key = "AudColumnName", value = "AudNewValue", fill = "")
# log includes multiple updates to the same irn
emu <- merge(emu1, emu2,
by = "eaudit_key",
all.x = T)
# Import Filer log ####
# split "Delete"/"Create" edits
filerDeleted <- filerBU[filerBU$category=="Delete",]
filer <- filerBU[grepl("File", filerBU$event.type)==T,
c("event.type", "path.from")]
filer <- unique(filer[order(filer$path.from, filer$event.type),])
# uncomment next line to strip supplementary directories & files
filerMain <- filer # [!grepl("/supplementary", filer$path.from),]
# convert directory paths to irn's
filerMain$key <- gsub("/Multimedia/emufmnh/multimedia/", "", filerMain$path.from)
filerMain <- separate(filerMain, key,
c("irn1", "irn2", "MulIdentifier"),
sep = "/",
extra = "merge")
filerMain <- filerMain[grepl("\\d+", filerMain$irn2)==T,]
filerMain <- unite(filerMain, irn, c(irn1, irn2), sep = "")
# spread
filerMain$seq <- sequence(rle(filerMain$irn)$length)
filerMain$seq <- paste0(filerMain$event.type, filerMain$seq)
filerMain2 <- unique(filerMain[,c("irn","MulIdentifier","seq")])
filerMain2 <- spread(filerMain2,
key = seq,
value = MulIdentifier)
# Compare EMu & Filer logs ####
compareLogs <- emu[!emu$AudKey %in% filerMain2$irn,]
# This shouldn't be a thing (NROW should always == 0):
compareLogs2 <- filerMain2[!filerMain2$irn %in% emu$AudKey,]
# Counts of created & deleted files:
countCreated <- paste("Filer-Created-",
NROW(unique(filerMain$irn[grepl("Create", filerMain$seq)==T])))
countDeleted <- paste("Filer-Deleted-",
NROW(unique(filerMain$irn[grepl("Delete", filerMain$seq)==T])))
countMissing <- paste("Filer-Missing-",
NROW(compareLogs))
FilerRecap <- data.frame("FilesOnFiler" = rbind(countMissing, countCreated, countDeleted),
"EMuLogDate" = format(max(timeEMu$ctime), "%Y-%m-%d %a"),
stringsAsFactors = F)
FilerRecap <- separate(FilerRecap, 1,
c("Where", "Action", "Count"),
sep = "-")
# Output recap & missing files ####
if(!dir.exists(Sys.getenv("OUT_DIR"))) {
dir.create(Sys.getenv("OUT_DIR"))
}
# Uncomment format() line to datestamp the FilerRecap.csv
write.csv(FilerRecap,
file = paste0(Sys.getenv("OUT_DIR"),"FilerRecap",
# format(max(timeEMu$ctime), "%Y%m%d_%a"),
".csv"),
row.names = F)
if (NROW(compareLogs)>0) {
write.csv(compareLogs,
file = paste0(Sys.getenv("OUT_DIR"),"checkMissingFiles_",
format(max(timeEMu$ctime), "%Y%m%d_%a"),
".csv"),
row.names = F)
}