forked from ekfriis/farmout
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathModifyJobReport.py
executable file
·212 lines (171 loc) · 4.9 KB
/
ModifyJobReport.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/usr/bin/env python
"""
Dan 2008-10-29: copied this from
CRAB_2_3_1/external/ProdCommon/FwkJobRep/ModifyJobReport.py and
updated for use with farmout jobs.
_ModifyJobReport.py
Example of how to use the FwkJobRep package to update a job report post processing
"""
import os, string
import sys
import popen2
import getopt
from ProdCommon.FwkJobRep.ReportParser import readJobReport
tmp_file_path = "/tmp"
if os.access("/scratch",os.W_OK):
tmp_file_path = "/scratch"
def readCksum(filename):
"""
_readCksum_
Run a cksum command on a file an return the checksum value
"""
pop = popen2.Popen4("cksum %s" % filename)
while pop.poll() == -1:
exitStatus = pop.poll()
exitStatus = pop.poll()
if exitStatus:
return None
content = pop.fromchild.read()
value = content.strip()
value = content.split()[0]
return value
def fileSize(filename):
"""
_fileSize_
Get size of file
"""
return os.stat(filename)[6]
def addFileStats(file):
"""
_addFileStats_
Add checksum and size info to each size
"""
pfn = file['PFN']
file['Size'] = fileSize(pfn)
checkSum = readCksum(pfn)
file.addChecksum('cksum',checkSum)
return
def ShowUsage():
msg = """
USAGE: ModifyJobReport.py OPTIONS
OPTIONS:
--input-fjr
--output-fjr
--PrimaryDataset
--DataTier
--ProcessedDataset
--ApplicationFamily
--ApplicationName
--ApplicationVersion
--PSetHash
--SEName
--pfn-path
--lfn-path
--strip-input-file-info (do not publish info about input files)
"""
sys.stderr.write(msg)
if __name__ == '__main__':
# Example: Load the report, update the file stats, pretend to do a stage out
# and update the information for the stage out
long_options = [
"help",
"input-fjr=",
"output-fjr=",
"PrimaryDataset=",
"DataTier=",
"ProcessedDataset=",
"ApplicationFamily=",
"ApplicationName=",
"ApplicationVersion=",
"PSetHash=",
"SEName=",
"pfn-path=",
"lfn-path=",
"strip-input-file-info"
]
options,args = getopt.getopt(sys.argv[1:],"h",long_options)
strip_input_file_info = 0
for option,value in options:
if option == "--help" or option == "-h":
ShowUsage()
sys.exit(0)
elif option == "--input-fjr":
inputReport = value
elif option == "--output-fjr":
outputReport = value
elif option == "--PrimaryDataset":
PrimaryDataset = value
elif option == "--DataTier":
DataTier = value
elif option == "--ProcessedDataset":
ProcessedDataset = value
elif option == "--ApplicationFamily":
ApplicationFamily = value
elif option == "--ApplicationName":
ApplicationName = value
elif option == "--ApplicationVersion":
ApplicationVersion = value
elif option == "--PSetHash":
PSetHash = value
elif option == "--SEName":
SEName = value
elif option == "--pfn-path":
pfn_path = value
elif option == "--lfn-path":
lfn_path = value
elif option == "--strip-input-file-info":
strip_input_file_info = 1
else:
sys.stderr.write("Unexpected option: " + str(option) + "\n")
sys.exit(2)
reports = readJobReport(inputReport)
# report is an instance of FwkJobRep.FwkJobReport class
# can be N in a file, so a list is always returned
# here I am assuming just one report per file for simplicity
if len(reports) <> 1:
sys.stderr.write("ERROR: Found %d reports in " + inputReport + "\n" % len(reports))
sys.exit(1)
report = reports[-1]
if (len(report.files) == 0):
print "no output file to modify"
sys.exit(1)
# CRAB requires this status == "Success"
# would be nice to know if the job _really_ succeeded
report.status = "Success"
# NOTE, ExitCode in the job report is 50117, which means
# "could not update exit code in job report"
# I think this is the default set by cmssw and it is
# supposed to be overridden by the job wrapper.
# Currently we are not setting it. Perhaps we
# should save the exit code and file cksum in
# a log file (or poke it into the FJR) at runtime
# and then fix it up here if necessary. That way,
# the worker node does not need access to the python
# code for parsing FJRs.
for f in report.files:
fname = f['PFN']
if not os.path.exists(os.path.join(pfn_path,fname)):
for i in range(1,100):
if os.path.exists(os.path.join(pfn_path,str(i),fname)):
fname = os.path.join(str(i),fname)
break
f['PFN'] = os.path.join(pfn_path,fname)
f['LFN'] = os.path.join(lfn_path,fname)
f['SEName'] = SEName
#Generate per file stats
addFileStats(f)
if strip_input_file_info:
f.inputFiles = []
datasetinfo=f.newDataset()
datasetinfo['PrimaryDataset'] = PrimaryDataset
datasetinfo['DataTier'] = DataTier
datasetinfo['ProcessedDataset'] = ProcessedDataset
datasetinfo['ApplicationFamily'] = ApplicationFamily
datasetinfo['ApplicationName'] = ApplicationName
datasetinfo['ApplicationVersion'] = ApplicationVersion
datasetinfo['PSetHash'] = PSetHash
if strip_input_file_info:
report.inputFiles = []
# After modifying the report, save it to a file.
report.write(outputReport)
print "Wrote modified report to " + outputReport