Skip to content

Commit

Permalink
updating to the cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
AstroMason committed Oct 29, 2020
1 parent bff9956 commit c4b90cf
Show file tree
Hide file tree
Showing 75 changed files with 8,671 additions and 638 deletions.
4 changes: 3 additions & 1 deletion Lv0_dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ def global_par():
"""
Defining global variables for the directories
"""
global BASE_DIR, NICER_DATADIR, NICERSOFT_DATADIR, NGC300
global BASE_DIR, NICER_DATADIR, NICERSOFT_DATADIR, NGC300, NGC300_2020, NGC300_XMM
BASE_DIR = '/Users/masonng/Documents/MIT/Research/'
NICER_DATADIR = '/Volumes/Samsung_T5/NICER-data/'
NICERSOFT_DATADIR = '/Volumes/Samsung_T5/NICERsoft_outputs/'
NGC300 = '/Volumes/Samsung_T5/NGC300_ULX/'
NGC300_2020 = '/Volumes/Samsung_T5/n300_ulx_2020/'
NGC300_XMM = '/Volumes/Samsung_T5/NGC300_XMMdata/'
35 changes: 35 additions & 0 deletions Lv0_get_swift_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Tues April 21 7:10pm 2020
Program that takes in a FITS table, from the HEASARC archive, of observation IDs
and pointing modes (for Swift), then uses the Perl script download_wget.pl to
obtain the data files!
"""
from __future__ import division, print_function
import numpy as np
from astropy.io import fits
import Lv0_dirs,Lv1_data_gtis,Lv2_presto_subroutines,Lv2_mkdir
import os
from tqdm import tqdm
import subprocess
import pathlib

Lv0_dirs.global_par()

def download_txt(txtfile):
"""
Given the text file of download instructions, take the URLs of where the data
are stored within the HEASARC archive, and use download_wget.pl to retrieve them
"""
contents = open(txtfile,'r').read().split('\n')
urls = [contents[i].split(' ')[-1] for i in range(len(contents)-1)]

for i in tqdm(range(len(urls))):
subprocess.run(['perl','/Volumes/Samsung_T5/download_wget.pl',urls[i]])

if __name__ == "__main__":
textfile = '/Volumes/Samsung_T5/NGC300_ULX_Swift/ngc300x1-ulx1_photon_pointing.txt'
download_txt(textfile)
11 changes: 8 additions & 3 deletions Lv0_psrpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy as np
import Lv0_dirs
from astropy.io import fits
from tqdm import tqdm
import os
import subprocess

Expand Down Expand Up @@ -54,6 +55,10 @@ def psrpipe(eventfile,flags):
return

if __name__ == "__main__":
obsid = '1034070101'
eventfile = Lv0_dirs.NICER_DATADIR + obsid + '/xti/event_cl/ni' + obsid+'_0mpu7_cl.evt'
psrpipe(eventfile,['--emin','0.3','--emax','12.0'])
#obsid = '1034070101'
#eventfile = Lv0_dirs.NICER_DATADIR + obsid + '/xti/event_cl/ni' + obsid+'_0mpu7_cl.evt'
#psrpipe(eventfile,['--emin','0.3','--emax','12.0'])

eventfiles = [Lv0_dirs.NICER_DATADIR + str(i) + '/xti/event_cl/ni' + str(i) + '_0mpu7_cl.evt' for i in range(1030180101,1030180188)]
for i in tqdm(range(len(eventfiles))):
psrpipe(eventfiles[i],['--emin','0.3','--emax','12.0','--nounderfilt'])
2 changes: 1 addition & 1 deletion Lv0_scp.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def scp(obsid):

if __name__ == "__main__":
#obsids = ['203420020' + str(i) for i in range(1,6)] + ['103419010' + str(i) for i in range(1,5)] + ['1034200' + str(i) for i in range(201,241)]
obsids = ['00' + str(i) for i in range(34070101,34070105)]
obsids = ['201001023' + str(i) for i in range(6,7)]
for i in range(len(obsids)):
scp(obsids[i])

Expand Down
8 changes: 4 additions & 4 deletions Lv1_data_bin.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def binning_t(eventfile,par_list,tbin_size,t1,t2):
startt = int(t1)
endt = int(t2)

t_bins = np.linspace(startt,endt,(endt-startt)*1/tbin_size+1) #getting an array of time values for the bins
t_bins = np.linspace(startt,endt,int((endt-startt)*1/tbin_size+1)) #getting an array of time values for the bins
summed_data, bin_edges, binnumber = stats.binned_statistic(truncated_t,counts,statistic='sum',bins=t_bins) #binning the counts in the data

print("The data is binned by " + str(tbin_size) + 's')
Expand Down Expand Up @@ -81,13 +81,13 @@ def binning_E(eventfile,par_list,tbin_size,Ebin_size,E1,E2):
startt = int(truncated_t[0])
endt = np.ceil(truncated_t[-1])

t_bins = np.linspace(startt,endt,(endt-startt)*1/tbin_size+1) #getting an array of time values for the bins
t_bins = np.linspace(startt,endt,int((endt-startt)*1/tbin_size+1)) #getting an array of time values for the bins
summed_data_t, bin_edges, binnumber = stats.binned_statistic(truncated_t,counts,statistic='sum',bins=t_bins) #binning the time values in the data

if E1 < 1: #if less than 1keV, the binning for 0.3-1keV is slightly different.
E_bins = np.linspace(E1,E2,(E2-E1)*1/Ebin_size+2) #getting an array of energy values for the bins
E_bins = np.linspace(E1,E2,int((E2-E1)*1/Ebin_size+2)) #getting an array of energy values for the bins
else:
E_bins = np.linspace(E1,E2,(E2-E1)*1/Ebin_size+1) #getting an array of energy values for the bins
E_bins = np.linspace(E1,E2,int((E2-E1)*1/Ebin_size+1)) #getting an array of energy values for the bins
summed_data_E, bin_edges, binnumber = stats.binned_statistic(truncated_E,counts,statistic='sum',bins=E_bins) #binning the energy values in the data

print("The data is binned by " + str(tbin_size) + 's, and ' + str(Ebin_size) + 'keV')
Expand Down
65 changes: 65 additions & 0 deletions Lv1_data_gtis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Sat Feb 22 4:55pm 2020
Script to manipulate GTIs where necessary. In the Feb 22 2020 edition, I will
be combining GTIs, such that consecutive GTIs that are a short time apart will be combined.
This is so that I can run acceleration searches on individual GTIs!
https://stackoverflow.com/questions/3704918/python-way-to-restart-a-for-loop-similar-to-continue-for-while-loops
For GTI_bunching
"""
from __future__ import division, print_function
from astropy.io import fits
import numpy as np
import pathlib
import subprocess
import Lv0_dirs,Lv0_fits2dict

Lv0_dirs.global_par() #obtaining the global parameters

def GTI_bunching(eventfile,gap,gtifile):
"""
To bunch up GTIs which are separated by less than a few seconds apart.
eventfile - path to the event file. Will extract ObsID from this for the NICER files.
gap - maximum separation between end time of 1st GTI and start time of 2nd GTI allowed
gtifile - name of GTI file
"""
parent_folder = str(pathlib.Path(eventfile).parent)
gtifile_path = parent_folder + '/' + gtifile
gtis = list(fits.open(eventfile)[2].data)
N = len(gtis)

should_restart = True
while should_restart: #to restart the for loop if I reached a pair of GTIs that can be combined
should_restart = False
for i in range(N-1): #variable N is there as we need to UPDATE the length of the GTIs each time we delete one!
if (gtis[i+1][0]-gtis[i][1]) <= gap: #if (start time of next GTI - end time of previous GTI) is less than 5s, say
new_start = gtis[i][0] #defined here, because "i" will change after deleting the 2 previous GTIs!
new_end = gtis[i+1][1]
del gtis[i]
del gtis[i] #use the SAME index since the list after the first deletion will have N-1 items!
gtis.insert(i,(new_start,new_end))
N = len(gtis)
should_restart = True
break
else:
N = len(gtis)
continue

gtitxt = open(parent_folder + '/bunchedgtis.txt','w')
for i in range(len(gtis)):
gtitxt.write(str(gtis[i][0]) + ' ' + str(gtis[i][1]) + '\n')
gtitxt.close()

gti_col_file = Lv0_dirs.NICER_DATADIR + 'gti_columns.txt'
gti_header_file = Lv0_dirs.NICER_DATADIR + 'gti_header.txt'
ftcreate_cmd = ['ftcreate',gti_col_file,parent_folder+'/bunchedgtis.txt',gtifile_path,'headfile='+gti_header_file,'extname="GTI"','clobber=yes']
subprocess.run(ftcreate_cmd)

if __name__ == "__main__":
eventfile = Lv0_dirs.NICER_DATADIR + 'xtej1739/2002131540_bary.evt'
gtifile = Lv0_dirs.NICER_DATADIR + 'xtej1739/bunched.gti'
GTI_bunching(eventfile,5,gtifile)
161 changes: 64 additions & 97 deletions Lv1_ngc300_binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,121 +10,88 @@
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from astropy.io import fits
import subprocess
import pathlib
import glob
import Lv0_dirs

Lv0_dirs.global_par()

binsize_s = 1
binsize_s = 5
bin_size = str(binsize_s).zfill(2) + 'd' #bins of 1 day!
bgsub_type = 'xsbgsub'

RGcms = Lv0_dirs.NGC300 + 'n300_ulx.bgsub_cl50_RGcms.ffphot'
RGnorm = Lv0_dirs.NGC300 + 'n300_ulx.bgsub_cl50_RGnorm.ffphot'
RGerror = Lv0_dirs.NGC300 + 'n300_ulx.bgsub_cl50_RGerr_norm.ffphot'
#RGcms = Lv0_dirs.NGC300_2020 + 'n300_ulx.bgsub_cl50_g2020.fffphot' NOT USING for 2020. No fffphot file for this anyways
norm_fffphot = Lv0_dirs.NGC300_2020 + 'n300_ulx.' + bgsub_type + '_cl50_g2020norm.fffphot'

soft1,soft2,A_band,B_band,C_band,D_band,inband,time = np.genfromtxt(RGnorm,usecols=(3,4,5,6,7,8,9,11),unpack=True)
soft1_err,soft2_err,A_err,B_err,C_err,D_err,inband_err,time_error = np.genfromtxt(RGerror,usecols=(3,4,5,6,7,8,9,11),unpack=True)
# time_error does not mean error in the time value, I just mean the time value found in the error text file
filename = np.genfromtxt(RGnorm,dtype='str',usecols=(0),unpack=True)
filename_err = np.genfromtxt(RGerror,dtype='str',usecols=(0),unpack=True)
### extracting photometry/count rates for soft_1 (0.2-0.3 keV), soft_2 (0.3-0.4 keV),
### A (0.4-1 keV), B (1-2 keV), C (2-4 keV), D (4-12) keV, in-band (A+B+C+D = 0.4-12 keV) bands, plus MJD
time = np.genfromtxt(norm_fffphot,dtype='float64',usecols=(11),unpack=True)
filenames = np.genfromtxt(norm_fffphot,dtype='str',usecols=(0),unpack=True)

time_bins = np.arange(58239,58606,binsize_s)
filename_dict,soft1_dict,soft2_dict,A_dict,B_dict,C_dict,D_dict,inband_dict = {},{},{},{},{},{},{},{}
filename_err_dict,soft1_err_dict,soft2_err_dict,A_err_dict,B_err_dict,C_err_dict,D_err_dict,inband_err_dict = {},{},{},{},{},{},{},{}
time = np.floor(time) #can't use int ; basically round the time values down to the integer value

#### bin up the counts and associated errors into dictionaries!
for i in tqdm(range(len(time_bins))):
## so for each time bin, gather up the count rates (and errors) in a dictionary
filename_dict[time_bins[i]] = filename[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
soft1_dict[time_bins[i]] = soft1[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
soft2_dict[time_bins[i]] = soft2[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
A_dict[time_bins[i]] = A_band[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
B_dict[time_bins[i]] = B_band[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
C_dict[time_bins[i]] = C_band[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
D_dict[time_bins[i]] = D_band[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
inband_dict[time_bins[i]] = inband[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]

filename_err_dict[time_bins[i]] = filename_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
soft1_err_dict[time_bins[i]] = soft1_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
soft2_err_dict[time_bins[i]] = soft2_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
A_err_dict[time_bins[i]] = A_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
B_err_dict[time_bins[i]] = B_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
C_err_dict[time_bins[i]] = C_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
D_err_dict[time_bins[i]] = D_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
inband_err_dict[time_bins[i]] = inband_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]

### doing the calculations for weighted averages and associated uncertainties
def get_binned_data(counts_dict,err_dict):
"""
Given the dictionaries where the data (counts and uncertainty) are already
binned, put them into lists!
counts_dict - dictionary where the keys are MJD values, and the entries correspond
to the counts/rate for a given MJD
err_dict - dictionary where the keys are MJD values, and the entries correspond
to the UNCERTAINTY in the counts/rate for a given MJD
"""
if type(counts_dict) != dict and type(err_dict) != dict:
raise TypeError("Make sure that counts_dict and err_dict are actually dictionaries!")

binned_MJD = []
binned_counts = []
binned_uncertainty = []
binned_pha_files = []

dict_keys = sorted(counts_dict.keys())
for i in range(len(dict_keys)):
counts = counts_dict[dict_keys[i]] #get count rates corresponding to a given MJD
if len(counts) != 0:
pha_string = ''
pha_list = filename_dict[dict_keys[i]]
for j in range(len(pha_list)):
if j != len(pha_list)-1:
pha_string += pha_list[j] + ','
else:
pha_string += pha_list[j]
binned_pha_files.append(pha_string)

unc = err_dict[dict_keys[i]] #get errors corresponding to a given MJD
weights = 1/unc**2 #calculating the weights
weighted_ave = sum(weights*counts)/sum(weights) #weighted average
weighted_ave_unc = 1/np.sqrt(sum(weights)) #uncertainty for the weighted average
binned_MJD.append(dict_keys[i])
binned_counts.append(weighted_ave)
binned_uncertainty.append(weighted_ave_unc)

return binned_pha_files,binned_MJD,binned_counts,binned_uncertainty

def binned_text():
def binned_text(bin_size):
"""
Given the MJDs, binned counts, and associated uncertainties, put them into a text file
No arguments because I'll put all the bands in here
"""
binned_pha_files,binned_MJD,binned_counts_soft1,binned_unc_soft1 = get_binned_data(soft1_dict,soft1_err_dict)
binned_pha_files,binned_MJD,binned_counts_soft2,binned_unc_soft2 = get_binned_data(soft2_dict,soft2_err_dict)
binned_pha_files,binned_MJD,binned_counts_A,binned_unc_A = get_binned_data(A_dict,A_err_dict)
binned_pha_files,binned_MJD,binned_counts_B,binned_unc_B = get_binned_data(B_dict,B_err_dict)
binned_pha_files,binned_MJD,binned_counts_C,binned_unc_C = get_binned_data(C_dict,C_err_dict)
binned_pha_files,binned_MJD,binned_counts_D,binned_unc_D = get_binned_data(D_dict,D_err_dict)
binned_pha_files,binned_MJD,binned_counts_inband,binned_unc_inband = get_binned_data(inband_dict,inband_err_dict)

counts_file = '/Volumes/Samsung_T5/NGC300_ULX/n300_ulx.bgsub_cl50_RGnorm_' + bin_size + '.ffphot'
output_file = open(counts_file,'w')

for i in range(len(binned_MJD)):
output_file.write(str(binned_MJD[i]) + ' ' + str(round(binned_counts_soft1[i],4)) + ' ' + str(round(binned_counts_soft2[i],4)) + ' ' + str(round(binned_counts_A[i],4)) + ' ' + str(round(binned_counts_B[i],4)) + ' ' + str(round(binned_counts_C[i],4)) + ' ' + str(round(binned_counts_D[i],4)) + ' ' + str(round(binned_counts_inband[i],4)) + ' ' + binned_pha_files[i] + '\n')
output_file.close()

unc_file = '/Volumes/Samsung_T5/NGC300_ULX/n300_ulx.bgsub_cl50_RGerr_' + bin_size + '.ffphot'
output_file = open(unc_file,'w')

for i in range(len(binned_MJD)):
output_file.write(str(binned_MJD[i]) + ' ' + str(round(binned_unc_soft1[i],4)) + ' ' + str(round(binned_unc_soft2[i],4)) + ' ' + str(round(binned_unc_A[i],4)) + ' ' + str(round(binned_unc_B[i],4)) + ' ' + str(round(binned_unc_C[i],4)) + ' ' + str(round(binned_unc_D[i],4)) + ' ' + str(round(binned_unc_inband[i],4)) + ' ' + binned_pha_files[i] + '\n')
output_file.close()
E_bins_low = np.array([20-1,30-1,40-1,100-1,200-1,400-1,40-1,1300-1])
E_bins_high = np.array([30-1,40-1,100-1,200-1,400-1,1200-1,1200-1,1501-1])

bgsub_files = sorted(glob.glob(Lv0_dirs.NGC300_2020 + 'spectra_' + bin_size + '/58*_' + bgsub_type + '*_cl50.pha'))

output_text = open(Lv0_dirs.NGC300_2020 + 'n300_ulx.' + bgsub_type + '_cl50_g2020norm_' + str(binsize_s).zfill(2) + 'd.fffphot','w')
output_text_err = open(Lv0_dirs.NGC300_2020 + 'n300_ulx.' + bgsub_type + '_cl50_g2020err_norm_' + str(binsize_s).zfill(2) + 'd.fffphot','w')

for i in tqdm(range(len(bgsub_files))): #for each averaged spectrum
mjd = str(pathlib.Path(bgsub_files[i]).name)[:5]
files_in_interval = filenames[(time>=float(mjd))&(time<float(mjd)+binsize_s)]

if bgsub_type == 'bgsub':
filtered_files = [Lv0_dirs.NGC300_2020 + 'bgsub_cl50/pha/' + files_in_interval[a] for a in range(len(files_in_interval))]
elif bgsub_type == 'xbgsub':
filtered_files = [Lv0_dirs.NGC300_2020 + 'bgsub_cl50/xpha/' + files_in_interval[a] for a in range(len(files_in_interval))]
elif bgsub_type == 'xsbgsub':
filtered_files = [Lv0_dirs.NGC300_2020 + 'bgsub_cl50/xspha/' + files_in_interval[a] for a in range(len(files_in_interval))]

rates_all = []
errs_all = []
for j in range(len(E_bins_low)): #for each energy band
rates_all.append(sum(fits.open(bgsub_files[i])[1].data['RATE'][E_bins_low[j]:E_bins_high[j]]))
errs = fits.open(bgsub_files[i])[1].data['STAT_ERR'][E_bins_low[j]:E_bins_high[j]]
errs_all.append( np.sqrt(np.mean(errs**2) * len(errs)))

rate_1 = mjd #for the MJD
rate_2 = '' #for the count rates in each band
rate_3 = '' #for the list of files that goes into each bin
for j in range(len(rates_all)):
rate_2 += str(round(rates_all[j],4)) + ' '
for j in range(len(filtered_files)):
if j != len(filtered_files)-1:
rate_3 += filtered_files[j] + ','
else:
rate_3 += filtered_files[j]
output_text.write(rate_1 + ' ' + rate_2 + rate_3 + '\n')

errs_1 = mjd #for the MJD
errs_2 = '' #for the count rates in each band
errs_3 = '' #for the list of files that goes into each bin
for j in range(len(errs_all)):
errs_2 += str(round(errs_all[j],4)) + ' '
for j in range(len(filtered_files)):
if j != len(filtered_files)-1:
errs_3 += filtered_files[j] + ','
else:
errs_3 += filtered_files[j]
output_text_err.write(errs_1 + ' ' + errs_2 + errs_3 + '\n')

output_text.close()
output_text_err.close()


if __name__ == "__main__":
binned_text()
binned_text(bin_size)
Loading

0 comments on commit c4b90cf

Please sign in to comment.