updating to the cloud

masonng-astro · Oct 29, 2020 · c4b90cf · c4b90cf
1 parent bff9956
commit c4b90cf
Show file tree

Hide file tree

Showing 75 changed files with 8,671 additions and 638 deletions.
diff --git a/Lv0_dirs.py b/Lv0_dirs.py
@@ -12,8 +12,10 @@ def global_par():
     """
     Defining global variables for the directories
     """
-    global BASE_DIR, NICER_DATADIR, NICERSOFT_DATADIR, NGC300
+    global BASE_DIR, NICER_DATADIR, NICERSOFT_DATADIR, NGC300, NGC300_2020, NGC300_XMM
     BASE_DIR = '/Users/masonng/Documents/MIT/Research/'
     NICER_DATADIR = '/Volumes/Samsung_T5/NICER-data/'
     NICERSOFT_DATADIR = '/Volumes/Samsung_T5/NICERsoft_outputs/'
     NGC300 = '/Volumes/Samsung_T5/NGC300_ULX/'
+    NGC300_2020 = '/Volumes/Samsung_T5/n300_ulx_2020/'
+    NGC300_XMM = '/Volumes/Samsung_T5/NGC300_XMMdata/'
diff --git a/Lv0_get_swift_data.py b/Lv0_get_swift_data.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Created on Tues April 21 7:10pm 2020
+
+Program that takes in a FITS table, from the HEASARC archive, of observation IDs
+and pointing modes (for Swift), then uses the Perl script download_wget.pl to
+obtain the data files!
+
+"""
+from __future__ import division, print_function
+import numpy as np
+from astropy.io import fits
+import Lv0_dirs,Lv1_data_gtis,Lv2_presto_subroutines,Lv2_mkdir
+import os
+from tqdm import tqdm
+import subprocess
+import pathlib
+
+Lv0_dirs.global_par()
+
+def download_txt(txtfile):
+    """
+    Given the text file of download instructions, take the URLs of where the data
+    are stored within the HEASARC archive, and use download_wget.pl to retrieve them
+    """
+    contents = open(txtfile,'r').read().split('\n')
+    urls = [contents[i].split(' ')[-1] for i in range(len(contents)-1)]
+
+    for i in tqdm(range(len(urls))):
+        subprocess.run(['perl','/Volumes/Samsung_T5/download_wget.pl',urls[i]])
+
+if __name__ == "__main__":
+    textfile = '/Volumes/Samsung_T5/NGC300_ULX_Swift/ngc300x1-ulx1_photon_pointing.txt'
+    download_txt(textfile)
diff --git a/Lv0_psrpipe.py b/Lv0_psrpipe.py
@@ -10,6 +10,7 @@
 import numpy as np
 import Lv0_dirs
 from astropy.io import fits
+from tqdm import tqdm
 import os
 import subprocess
 
@@ -54,6 +55,10 @@ def psrpipe(eventfile,flags):
     return
 
 if __name__ == "__main__":
-    obsid = '1034070101'
-    eventfile = Lv0_dirs.NICER_DATADIR + obsid + '/xti/event_cl/ni' + obsid+'_0mpu7_cl.evt'
-    psrpipe(eventfile,['--emin','0.3','--emax','12.0'])
+    #obsid = '1034070101'
+    #eventfile = Lv0_dirs.NICER_DATADIR + obsid + '/xti/event_cl/ni' + obsid+'_0mpu7_cl.evt'
+    #psrpipe(eventfile,['--emin','0.3','--emax','12.0'])
+
+    eventfiles = [Lv0_dirs.NICER_DATADIR + str(i) + '/xti/event_cl/ni' + str(i) + '_0mpu7_cl.evt' for i in range(1030180101,1030180188)]
+    for i in tqdm(range(len(eventfiles))):
+        psrpipe(eventfiles[i],['--emin','0.3','--emax','12.0','--nounderfilt'])
diff --git a/Lv0_scp.py b/Lv0_scp.py
@@ -36,7 +36,7 @@ def scp(obsid):
 
 if __name__ == "__main__":
     #obsids = ['203420020' + str(i) for i in range(1,6)] + ['103419010' + str(i) for i in range(1,5)] + ['1034200' + str(i) for i in range(201,241)]
-    obsids = ['00' + str(i) for i in range(34070101,34070105)]
+    obsids = ['201001023' + str(i) for i in range(6,7)]
     for i in range(len(obsids)):
         scp(obsids[i])
 

diff --git a/Lv1_data_bin.py b/Lv1_data_bin.py
@@ -41,7 +41,7 @@ def binning_t(eventfile,par_list,tbin_size,t1,t2):
     startt = int(t1)
     endt = int(t2)
 
-    t_bins = np.linspace(startt,endt,(endt-startt)*1/tbin_size+1) #getting an array of time values for the bins
+    t_bins = np.linspace(startt,endt,int((endt-startt)*1/tbin_size+1)) #getting an array of time values for the bins
     summed_data, bin_edges, binnumber = stats.binned_statistic(truncated_t,counts,statistic='sum',bins=t_bins) #binning the counts in the data
 
     print("The data is binned by " + str(tbin_size) + 's')
@@ -81,13 +81,13 @@ def binning_E(eventfile,par_list,tbin_size,Ebin_size,E1,E2):
     startt = int(truncated_t[0])
     endt = np.ceil(truncated_t[-1])
 
-    t_bins = np.linspace(startt,endt,(endt-startt)*1/tbin_size+1) #getting an array of time values for the bins
+    t_bins = np.linspace(startt,endt,int((endt-startt)*1/tbin_size+1)) #getting an array of time values for the bins
     summed_data_t, bin_edges, binnumber = stats.binned_statistic(truncated_t,counts,statistic='sum',bins=t_bins) #binning the time values in the data
 
     if E1 < 1: #if less than 1keV, the binning for 0.3-1keV is slightly different.
-        E_bins = np.linspace(E1,E2,(E2-E1)*1/Ebin_size+2) #getting an array of energy values for the bins
+        E_bins = np.linspace(E1,E2,int((E2-E1)*1/Ebin_size+2)) #getting an array of energy values for the bins
     else:
-        E_bins = np.linspace(E1,E2,(E2-E1)*1/Ebin_size+1) #getting an array of energy values for the bins
+        E_bins = np.linspace(E1,E2,int((E2-E1)*1/Ebin_size+1)) #getting an array of energy values for the bins
     summed_data_E, bin_edges, binnumber = stats.binned_statistic(truncated_E,counts,statistic='sum',bins=E_bins) #binning the energy values in the data
 
     print("The data is binned by " + str(tbin_size) + 's, and ' + str(Ebin_size) + 'keV')

diff --git a/Lv1_data_gtis.py b/Lv1_data_gtis.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Feb 22 4:55pm 2020
+
+Script to manipulate GTIs where necessary. In the Feb 22 2020 edition, I will
+be combining GTIs, such that consecutive GTIs that are a short time apart will be combined.
+This is so that I can run acceleration searches on individual GTIs!
+
+https://stackoverflow.com/questions/3704918/python-way-to-restart-a-for-loop-similar-to-continue-for-while-loops
+For GTI_bunching
+"""
+from __future__ import division, print_function
+from astropy.io import fits
+import numpy as np
+import pathlib
+import subprocess
+import Lv0_dirs,Lv0_fits2dict
+
+Lv0_dirs.global_par() #obtaining the global parameters
+
+def GTI_bunching(eventfile,gap,gtifile):
+    """
+    To bunch up GTIs which are separated by less than a few seconds apart.
+
+    eventfile - path to the event file. Will extract ObsID from this for the NICER files.
+    gap - maximum separation between end time of 1st GTI and start time of 2nd GTI allowed
+    gtifile - name of GTI file
+    """
+    parent_folder = str(pathlib.Path(eventfile).parent)
+    gtifile_path = parent_folder + '/' + gtifile
+    gtis = list(fits.open(eventfile)[2].data)
+    N = len(gtis)
+
+    should_restart = True
+    while should_restart: #to restart the for loop if I reached a pair of GTIs that can be combined
+        should_restart = False
+        for i in range(N-1): #variable N is there as we need to UPDATE the length of the GTIs each time we delete one!
+            if (gtis[i+1][0]-gtis[i][1]) <= gap: #if (start time of next GTI - end time of previous GTI) is less than 5s, say
+                new_start = gtis[i][0] #defined here, because "i" will change after deleting the 2 previous GTIs!
+                new_end = gtis[i+1][1]
+                del gtis[i]
+                del gtis[i] #use the SAME index since the list after the first deletion will have N-1 items!
+                gtis.insert(i,(new_start,new_end))
+                N = len(gtis)
+                should_restart = True
+                break
+            else:
+                N = len(gtis)
+                continue
+
+    gtitxt = open(parent_folder + '/bunchedgtis.txt','w')
+    for i in range(len(gtis)):
+        gtitxt.write(str(gtis[i][0]) + ' ' + str(gtis[i][1]) + '\n')
+    gtitxt.close()
+
+    gti_col_file = Lv0_dirs.NICER_DATADIR + 'gti_columns.txt'
+    gti_header_file = Lv0_dirs.NICER_DATADIR + 'gti_header.txt'
+    ftcreate_cmd = ['ftcreate',gti_col_file,parent_folder+'/bunchedgtis.txt',gtifile_path,'headfile='+gti_header_file,'extname="GTI"','clobber=yes']
+    subprocess.run(ftcreate_cmd)
+
+if __name__ == "__main__":
+    eventfile = Lv0_dirs.NICER_DATADIR + 'xtej1739/2002131540_bary.evt'
+    gtifile = Lv0_dirs.NICER_DATADIR + 'xtej1739/bunched.gti'
+    GTI_bunching(eventfile,5,gtifile)
diff --git a/Lv1_ngc300_binning.py b/Lv1_ngc300_binning.py
@@ -10,121 +10,88 @@
 import numpy as np
 import matplotlib.pyplot as plt
 from tqdm import tqdm
+from astropy.io import fits
 import subprocess
+import pathlib
 import glob
 import Lv0_dirs
 
 Lv0_dirs.global_par()
 
-binsize_s = 1
+binsize_s = 5
 bin_size = str(binsize_s).zfill(2) + 'd' #bins of 1 day!
+bgsub_type = 'xsbgsub'
 
-RGcms = Lv0_dirs.NGC300 + 'n300_ulx.bgsub_cl50_RGcms.ffphot'
-RGnorm = Lv0_dirs.NGC300 + 'n300_ulx.bgsub_cl50_RGnorm.ffphot'
-RGerror = Lv0_dirs.NGC300 + 'n300_ulx.bgsub_cl50_RGerr_norm.ffphot'
+#RGcms = Lv0_dirs.NGC300_2020 + 'n300_ulx.bgsub_cl50_g2020.fffphot' NOT USING for 2020. No fffphot file for this anyways
+norm_fffphot = Lv0_dirs.NGC300_2020 + 'n300_ulx.' + bgsub_type + '_cl50_g2020norm.fffphot'
 
-soft1,soft2,A_band,B_band,C_band,D_band,inband,time = np.genfromtxt(RGnorm,usecols=(3,4,5,6,7,8,9,11),unpack=True)
-soft1_err,soft2_err,A_err,B_err,C_err,D_err,inband_err,time_error = np.genfromtxt(RGerror,usecols=(3,4,5,6,7,8,9,11),unpack=True)
-# time_error does not mean error in the time value, I just mean the time value found in the error text file
-filename = np.genfromtxt(RGnorm,dtype='str',usecols=(0),unpack=True)
-filename_err = np.genfromtxt(RGerror,dtype='str',usecols=(0),unpack=True)
+### extracting photometry/count rates for soft_1 (0.2-0.3 keV), soft_2 (0.3-0.4 keV),
+### A (0.4-1 keV), B (1-2 keV), C (2-4 keV), D (4-12) keV, in-band (A+B+C+D = 0.4-12 keV) bands, plus MJD
+time = np.genfromtxt(norm_fffphot,dtype='float64',usecols=(11),unpack=True)
+filenames = np.genfromtxt(norm_fffphot,dtype='str',usecols=(0),unpack=True)
 
 time_bins = np.arange(58239,58606,binsize_s)
-filename_dict,soft1_dict,soft2_dict,A_dict,B_dict,C_dict,D_dict,inband_dict = {},{},{},{},{},{},{},{}
-filename_err_dict,soft1_err_dict,soft2_err_dict,A_err_dict,B_err_dict,C_err_dict,D_err_dict,inband_err_dict = {},{},{},{},{},{},{},{}
 time = np.floor(time) #can't use int ; basically round the time values down to the integer value
 
-#### bin up the counts and associated errors into dictionaries!
-for i in tqdm(range(len(time_bins))):
-## so for each time bin, gather up the count rates (and errors) in a dictionary
-    filename_dict[time_bins[i]] = filename[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    soft1_dict[time_bins[i]] = soft1[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    soft2_dict[time_bins[i]] = soft2[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    A_dict[time_bins[i]] = A_band[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    B_dict[time_bins[i]] = B_band[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    C_dict[time_bins[i]] = C_band[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    D_dict[time_bins[i]] = D_band[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    inband_dict[time_bins[i]] = inband[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-
-    filename_err_dict[time_bins[i]] = filename_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    soft1_err_dict[time_bins[i]] = soft1_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    soft2_err_dict[time_bins[i]] = soft2_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    A_err_dict[time_bins[i]] = A_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    B_err_dict[time_bins[i]] = B_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    C_err_dict[time_bins[i]] = C_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    D_err_dict[time_bins[i]] = D_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-    inband_err_dict[time_bins[i]] = inband_err[(time>=time_bins[i])&(time<time_bins[i]+binsize_s)]
-
-### doing the calculations for weighted averages and associated uncertainties
-def get_binned_data(counts_dict,err_dict):
-    """
-    Given the dictionaries where the data (counts and uncertainty) are already
-    binned, put them into lists!
-
-    counts_dict - dictionary where the keys are MJD values, and the entries correspond
-    to the counts/rate for a given MJD
-    err_dict - dictionary where the keys are MJD values, and the entries correspond
-    to the UNCERTAINTY in the counts/rate for a given MJD
-    """
-    if type(counts_dict) != dict and type(err_dict) != dict:
-        raise TypeError("Make sure that counts_dict and err_dict are actually dictionaries!")
-
-    binned_MJD = []
-    binned_counts = []
-    binned_uncertainty = []
-    binned_pha_files = []
-
-    dict_keys = sorted(counts_dict.keys())
-    for i in range(len(dict_keys)):
-        counts = counts_dict[dict_keys[i]] #get count rates corresponding to a given MJD
-        if len(counts) != 0:
-            pha_string = ''
-            pha_list = filename_dict[dict_keys[i]]
-            for j in range(len(pha_list)):
-                if j != len(pha_list)-1:
-                    pha_string += pha_list[j] + ','
-                else:
-                    pha_string += pha_list[j]
-            binned_pha_files.append(pha_string)
-
-            unc = err_dict[dict_keys[i]] #get errors corresponding to a given MJD
-            weights = 1/unc**2 #calculating the weights
-            weighted_ave = sum(weights*counts)/sum(weights) #weighted average
-            weighted_ave_unc = 1/np.sqrt(sum(weights)) #uncertainty for the weighted average
-            binned_MJD.append(dict_keys[i])
-            binned_counts.append(weighted_ave)
-            binned_uncertainty.append(weighted_ave_unc)
-
-    return binned_pha_files,binned_MJD,binned_counts,binned_uncertainty
-
-def binned_text():
+def binned_text(bin_size):
     """
     Given the MJDs, binned counts, and associated uncertainties, put them into a text file
 
     No arguments because I'll put all the bands in here
     """
-    binned_pha_files,binned_MJD,binned_counts_soft1,binned_unc_soft1 = get_binned_data(soft1_dict,soft1_err_dict)
-    binned_pha_files,binned_MJD,binned_counts_soft2,binned_unc_soft2 = get_binned_data(soft2_dict,soft2_err_dict)
-    binned_pha_files,binned_MJD,binned_counts_A,binned_unc_A = get_binned_data(A_dict,A_err_dict)
-    binned_pha_files,binned_MJD,binned_counts_B,binned_unc_B = get_binned_data(B_dict,B_err_dict)
-    binned_pha_files,binned_MJD,binned_counts_C,binned_unc_C = get_binned_data(C_dict,C_err_dict)
-    binned_pha_files,binned_MJD,binned_counts_D,binned_unc_D = get_binned_data(D_dict,D_err_dict)
-    binned_pha_files,binned_MJD,binned_counts_inband,binned_unc_inband = get_binned_data(inband_dict,inband_err_dict)
-
-    counts_file = '/Volumes/Samsung_T5/NGC300_ULX/n300_ulx.bgsub_cl50_RGnorm_' + bin_size + '.ffphot'
-    output_file = open(counts_file,'w')
-
-    for i in range(len(binned_MJD)):
-        output_file.write(str(binned_MJD[i]) + ' ' + str(round(binned_counts_soft1[i],4)) + ' ' + str(round(binned_counts_soft2[i],4)) + ' ' + str(round(binned_counts_A[i],4)) + ' ' + str(round(binned_counts_B[i],4)) + ' ' + str(round(binned_counts_C[i],4)) + ' ' + str(round(binned_counts_D[i],4)) + ' ' + str(round(binned_counts_inband[i],4)) + ' ' + binned_pha_files[i] + '\n')
-    output_file.close()
-
-    unc_file = '/Volumes/Samsung_T5/NGC300_ULX/n300_ulx.bgsub_cl50_RGerr_' + bin_size + '.ffphot'
-    output_file = open(unc_file,'w')
-
-    for i in range(len(binned_MJD)):
-        output_file.write(str(binned_MJD[i]) + ' ' + str(round(binned_unc_soft1[i],4)) + ' ' + str(round(binned_unc_soft2[i],4)) + ' ' + str(round(binned_unc_A[i],4)) + ' ' + str(round(binned_unc_B[i],4)) + ' ' + str(round(binned_unc_C[i],4)) + ' ' + str(round(binned_unc_D[i],4)) + ' ' + str(round(binned_unc_inband[i],4)) + ' ' + binned_pha_files[i] + '\n')
-    output_file.close()
+    E_bins_low = np.array([20-1,30-1,40-1,100-1,200-1,400-1,40-1,1300-1])
+    E_bins_high = np.array([30-1,40-1,100-1,200-1,400-1,1200-1,1200-1,1501-1])
+
+    bgsub_files = sorted(glob.glob(Lv0_dirs.NGC300_2020 + 'spectra_' + bin_size + '/58*_' + bgsub_type + '*_cl50.pha'))
+
+    output_text = open(Lv0_dirs.NGC300_2020 + 'n300_ulx.' + bgsub_type + '_cl50_g2020norm_' + str(binsize_s).zfill(2) + 'd.fffphot','w')
+    output_text_err = open(Lv0_dirs.NGC300_2020 + 'n300_ulx.' + bgsub_type + '_cl50_g2020err_norm_' + str(binsize_s).zfill(2) + 'd.fffphot','w')
+
+    for i in tqdm(range(len(bgsub_files))): #for each averaged spectrum
+        mjd = str(pathlib.Path(bgsub_files[i]).name)[:5]
+        files_in_interval = filenames[(time>=float(mjd))&(time<float(mjd)+binsize_s)]
+
+        if bgsub_type == 'bgsub':
+            filtered_files = [Lv0_dirs.NGC300_2020 + 'bgsub_cl50/pha/' + files_in_interval[a] for a in range(len(files_in_interval))]
+        elif bgsub_type == 'xbgsub':
+            filtered_files = [Lv0_dirs.NGC300_2020 + 'bgsub_cl50/xpha/' + files_in_interval[a] for a in range(len(files_in_interval))]
+        elif bgsub_type == 'xsbgsub':
+            filtered_files = [Lv0_dirs.NGC300_2020 + 'bgsub_cl50/xspha/' + files_in_interval[a] for a in range(len(files_in_interval))]
+
+        rates_all = []
+        errs_all = []
+        for j in range(len(E_bins_low)): #for each energy band
+            rates_all.append(sum(fits.open(bgsub_files[i])[1].data['RATE'][E_bins_low[j]:E_bins_high[j]]))
+            errs = fits.open(bgsub_files[i])[1].data['STAT_ERR'][E_bins_low[j]:E_bins_high[j]]
+            errs_all.append( np.sqrt(np.mean(errs**2) * len(errs)))
+
+        rate_1 = mjd #for the MJD
+        rate_2 = '' #for the count rates in each band
+        rate_3 = '' #for the list of files that goes into each bin
+        for j in range(len(rates_all)):
+            rate_2 += str(round(rates_all[j],4)) + ' '
+        for j in range(len(filtered_files)):
+            if j != len(filtered_files)-1:
+                rate_3 += filtered_files[j] + ','
+            else:
+                rate_3 += filtered_files[j]
+        output_text.write(rate_1 + ' ' + rate_2 + rate_3 + '\n')
+
+        errs_1 = mjd #for the MJD
+        errs_2 = '' #for the count rates in each band
+        errs_3 = '' #for the list of files that goes into each bin
+        for j in range(len(errs_all)):
+            errs_2 += str(round(errs_all[j],4)) + ' '
+        for j in range(len(filtered_files)):
+            if j != len(filtered_files)-1:
+                errs_3 += filtered_files[j] + ','
+            else:
+                errs_3 += filtered_files[j]
+        output_text_err.write(errs_1 + ' ' + errs_2 + errs_3 + '\n')
+
+    output_text.close()
+    output_text_err.close()
 
 
 if __name__ == "__main__":
-    binned_text()
+    binned_text(bin_size)