forked from legend-exp/g4simple
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathg4sh5.py
114 lines (92 loc) · 3.63 KB
/
g4sh5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import sys, h5py
import numpy as np
import pandas as pd
'''
g4sh5 is a python module of convenience functions for reading step-wise g4simple
output stored in hdf5 files
Easy way to import it from anywhere: set the PYTHONPATH envvar:
export PYTHONPATH="$PYTHONPATH:/path/to/g4simple/"
'''
def get_n_rows(g4sntuple):
''' get the number of rows (steps) in a step-wise g4sntuple
Parameters
----------
g4sntuple : h5py.Dataset
The g4simple ntuple. Should be written in step-wise mode.
Returns
-------
n_rows : int
The number of rows (steps) in g4sntuple
Example
-------
>>> g4sfile = h5py.File('g4simpleout.hdf5', 'r')
>>> g4sntuple = g4sfile['default_ntuples/g4sntuple']
>>> get_n_rows(g4sntuple)
227539 # may vary
'''
return g4sntuple['event/pages'].shape[0]
def get_datasets(g4sntuple, fields):
''' get a dictionary of hdf5 datasets for the specified fields
Note: performs no data reads from the file. Actual reads are performed when
you subsequently access / use the returned datasets.
Parameters
----------
g4sntuple : h5py.Dataset
The g4simple ntuple. Should be written in step-wise mode.
fields : list of str
A list of names of the fields to be accessed in the ntuple
Returns
-------
datasets : dict of h5py datasets
A dictionary of the requested h5py datasets, keyed by their names
Example
-------
>>> g4sfile = h5py.File('g4simpleout.hdf5', 'r')
>>> g4sntuple = g4sfile['default_ntuples/g4sntuple']
>>> datasets = get_datasets(g4sntuple, ['event', 'step', 'pid', 'KE'])
>>> datasets['step'][:10]
array([0, 0, 0, 0, 1, 0, 5, 0, 1, 0], dtype=int32) # may vary
'''
datasets = {}
for field in fields: datasets[field] = g4sntuple[field]['pages']
return datasets
def get_dataframe(datasets, selection=None):
''' get a pandas dataframe from selected data in the provided datasets
Note 1: only reads selected data from disk. If selection = None, all data is
read into member.
Note 2: reads in data, then copies it into ndarrays used to build the
dataframe. To avoid the copying, instead of using this function, work with
the datasets directly, or force single reads into your own buffers using
hp5.Dataset.read_direct
Parameters
----------
datasets : dict of h5py.Datasets
The datasets from which to bulid the dataframe, as returned by
get_datasets
selection : slice object (optional)
A slice object specifing a read of just a subset of the data. Allows one
to cycle through data in a file too large to fit in memory
Returns
-------
dataframe : pandas.DataFrame
A pandas dataframe containing the selected data from the requested
datasets
Example
-------
>>> g4sfile = h5py.File('g4simpleout.hdf5', 'r')
>>> g4sntuple = g4sfile['default_ntuples/g4sntuple']
>>> datasets = get_datasets(g4sntuple, ['event', 'step', 'pid', 'x', 'y', 'z'])
>>> dataframe = get_dataframe(datasets, slice(10,15))
>>> dataframe
event step pid x y z
0 3 1 11 -24.25767 -13.368400 31.085745
1 3 0 11 -24.25767 -13.368400 31.085744
2 3 1 11 -24.25767 -13.368401 31.085745
3 3 0 11 -24.25767 -13.368400 31.085744
4 3 1 11 -24.25767 -13.368400 31.085744 # may vary
'''
array_dict = {}
for key, ds in datasets.items():
if selection is None: array_dict[key] = np.array(ds)
else: array_dict[key] = np.array(ds[selection])
return pd.DataFrame(array_dict)