From e702249f9e36f412ddfdefab50105a05d7c02cb6 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 24 Oct 2018 16:00:43 -0600 Subject: [PATCH 01/89] Updated readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5901cf3..6375cd3 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This package is a little difficult to create a working python interpreter for. First, get [Anaconda](anaconda.org) and [git](https://git-scm.com/), these tools are important here. -Next, create your environment +Next, create your environment. ``` conda create -n irri python=3.6``` @@ -18,4 +18,4 @@ Then get the latest gdal: Then the latest master branch of rasterio: -```pip install git+https://github.com/mapbox/rasterio.git``` \ No newline at end of file +```pip install git+https://github.com/mapbox/rasterio.git``` From 9938d42826570f50b3e6473118a21fe083dd7a3f Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 9 Nov 2018 14:14:44 -0700 Subject: [PATCH 02/89] Added grid_raster_extract to facilitate getting training data for conv. nets --- pixel_classification/compose_array.py | 103 +++++++++++++++++++++++-- pixel_classification/prepare_images.py | 15 +++- pixel_classification/runner.py | 15 ++-- pixel_classification/runspec.py | 2 +- 4 files changed, 118 insertions(+), 17 deletions(-) diff --git a/pixel_classification/compose_array.py b/pixel_classification/compose_array.py index 02189de..50ee46c 100644 --- a/pixel_classification/compose_array.py +++ b/pixel_classification/compose_array.py @@ -24,7 +24,7 @@ from warnings import warn from fiona import open as fopen -from numpy import linspace, max, nan, unique +from numpy import linspace, max, nan, unique, ndarray from numpy.random import shuffle from pandas import DataFrame, Series from pyproj import Proj, transform @@ -60,7 +60,7 @@ class PixelTrainingArray(object): def __init__(self, root=None, geography=None, paths_map=None, masks=None, instances=None, from_dict=None, pkl_path=None, - overwrite_array=False, overwrite_points=False): + overwrite_array=False, overwrite_points=False, kernel_size=None): """ @@ -101,12 +101,15 @@ def __init__(self, root=None, geography=None, paths_map=None, masks=None, self.path_row_dir = os.path.join(self.root, str(geography.path), str(geography.row)) self.year_dir = os.path.join(self.path_row_dir, str(geography.year)) self.is_binary = None + self.kernel_size = kernel_size self.features = None self.data = None self.target_values = None self.m_instances = instances + if self.kernel_size is not None: + self.extracted_points = DataFrame(columns = ['FID', 'X', 'Y', 'POINT_TYPE']) self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) self.object_id = 0 @@ -128,7 +131,10 @@ def extract_sample(self, save_points=True): self.save_sample_points() if self.overwrite_array: - self.populate_data_array() + if self.kernel_size is not None: + self.populate_raster_data_array() + else: + self.populate_data_array() def create_sample_points(self): """ Create a clipped training set from polygon shapefiles. @@ -212,6 +218,33 @@ def populate_data_array(self): self._check_targets(targets) + def populate_raster_data_array(self): + + for key, val in self.paths_map.items(): + s = self._grid_raster_extract(val, _name=key) + print('Extracting {}'.format(key)) + self.extracted_points = self.extracted_points.join(s, how='outer') + + for key, val in self.masks.items(): + s = self._grid_raster_extract(val, _name=key) + print('Extracting {}'.format(key)) + self.extracted_points = self.extracted_points.join(s, how='outer') + + data_array, targets = self._purge_raster_array() + data = {'df': data_array, + 'features': data_array.columns.values, + 'data': data_array.values, + 'target_values': targets, + 'paths_map': self.paths_map} + + print('feature dimensions: {}'.format(data_array.shape)) + for key, val in data.items(): + setattr(self, key, val) + + self.to_pickle(data) + + self._check_targets(targets) + def save_sample_points(self): points_schema = { @@ -247,12 +280,45 @@ def from_pickle(self, path=None): self._check_targets(self.target_values) - def _purge_array(self): - - data_array = deepcopy(self.extracted_points) + def _purge_raster_array(self): + data_array = deepcopy(self.extracted_points) # extracted pixels would + # be a better name target_vals = Series(data_array.POINT_TYPE.values, name='POINT_TYPE') data_array.drop(['X', 'Y', 'FID', 'POINT_TYPE'], axis=1, inplace=True) + try: + for msk in self.masks.keys(): + # TODO: make the below calculation vectorized + for idx, sub_raster in enumerate(data_array[msk]): + if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 1.: + data_array.loc[idx, :] = nan # make whole row NaN + except TypeError as e: # sub_raster is nan. Am I accidentally making it NaN? + print(sub_raster, msk, idx) + data_array.loc[idx, :] = nan + + try: + for bnd in self.paths_map.keys(): + for idx, sub_raster in enumerate(data_array[bnd]): + if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 0.: + data_array.loc[idx, :] = nan + except TypeError as e: + print(sub_raster, msk, idx) + data_array.loc[idx, :] = nan + + data_array = data_array.join(target_vals, how='outer') + data_array.dropna(axis=0, inplace=True) + data_array.drop(self.masks, axis=1, inplace=True) + target_vals = data_array.POINT_TYPE.values + + data_array = data_array.drop(['POINT_TYPE'], + axis=1, inplace=False) + return data_array, target_vals + + def _purge_array(self): + data_array = deepcopy(self.extracted_points) # extracted pixels would + # be a better name + target_vals = Series(data_array.POINT_TYPE.values, name='POINT_TYPE') + data_array.drop(['X', 'Y', 'FID', 'POINT_TYPE'], axis=1, inplace=True) for msk in self.masks.keys(): data_array[data_array[msk] == 1.] = nan @@ -294,7 +360,6 @@ def _check_targets(self, target_vals): self.is_binary = False def _point_raster_extract(self, raster, _name): - with rasopen(raster, 'r') as rsrc: rass_arr = rsrc.read() rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) @@ -309,6 +374,30 @@ def _point_raster_extract(self, raster, _name): s[ind] = float(raster_val) except IndexError: s[ind] = None + return s + + def _grid_raster_extract(self, raster, _name): + """Open the raster. Store the points in a Series - a labeled + numpy array. Then in _purge array, we iterate over the masks + and the paths_map and drop pixels where masks = 1 and pixels where bound = 0. + The + """ + with rasopen(raster, 'r') as rsrc: + rass_arr = rsrc.read() + rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) + affine = rsrc.transform + + s = Series(index=range(0, self.extracted_points.shape[0]), name=_name, dtype=object) + for ind, row in self.extracted_points.iterrows(): + x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) + c, r = ~affine * (x, y) + try: + ofs = self.kernel_size // 2 + rr = int(r); cc = int(c) + raster_subgrid = rass_arr[rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] # possible issues: edges of image + s[ind] = raster_subgrid + except IndexError: + s[ind] = None return s diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index 7380859..4ef7245 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -99,6 +99,10 @@ def build_evaluating(self): self.paths_map, self.masks = self._order_images() def get_cdl(self): + """download cdl and make a mask, save to the + root directory with filename cdl_mask.tif. + The cdl is reprojected here. + """ self.cdl_mask = os.path.join(self.root, 'cdl_mask.tif') if not os.path.isfile(self.cdl_mask): print('get {}'.format(self.cdl_mask)) @@ -110,6 +114,10 @@ def get_cdl(self): self.exclude_rasters.append(self.cdl_mask) def get_landsat(self, fmask=False): + """Download from internet and select scenes from n_landsat + g.download() then saves the selected scenes into + the root directory. + """ g = GoogleDownload(self.start, self.end, self.sat, path=self.path, row=self.row, output_path=self.root, max_cloud_percent=self.max_cloud) @@ -125,6 +133,11 @@ def get_landsat(self, fmask=False): [self._make_fmask(d) for d in self.image_dirs] def get_terrain(self): + """Get digital elevation maps from amazon web services + save in the project root directory with filenames enumerated + in the next three lines. + + """ slope_name = os.path.join(self.root, 'slope.tif') aspect_name = os.path.join(self.root, 'aspect.tif') @@ -185,7 +198,7 @@ def _make_fmask(self, image_dir): f.save_array(cloud, self.dst_path_cloud) f.save_array(water, self.dst_path_water) - def _orgainize_directory(self): + def _organize_directory(self): dst_dir = os.path.join(self.root, str(self.path), str(self.row), str(self.year)) if not os.path.isdir(dst_dir): diff --git a/pixel_classification/runner.py b/pixel_classification/runner.py index afdb000..bdadcbe 100644 --- a/pixel_classification/runner.py +++ b/pixel_classification/runner.py @@ -29,14 +29,10 @@ from pixel_classification.classify import classify_multiproc from pixel_classification.target_path_rows import get_path_rows -OBJECT_MAP = { # 'MT': Montana, - 'NV': Nevada, - 'OR': Oregon, - 'UT': Utah, - 'WA': Washington} - +OBJECT_MAP = { 'MT': Montana} def concatenate_training_data(existing, training_array): + existing_array = existing['data'] add_array = training_array.data new_array = vstack((existing_array, add_array)) @@ -80,8 +76,8 @@ def model_training_scenes(project, n_images, training, model): i = ImageStack(root=project_state_dir, satellite=geo.sat, path=geo.path, row=geo.row, n_landsat=n_images, year=geo.year, max_cloud_pct=70) i.build_training() - p = Pta(root=i.root, geography=geo, paths_map=i.paths_map, instances=10, masks=i.masks, - overwrite_array=True, overwrite_points=True, pkl_path=geo_data_path) + p = Pta(root=i.root, geography=geo, paths_map=i.paths_map, instances=10000, masks=i.masks, + overwrite_array=True, overwrite_points=True, pkl_path=geo_data_path, kernel_size=5) p.extract_sample() if first: @@ -143,4 +139,7 @@ def run_targets(directory, model): model_training_scenes(t_project_dir, n_images, training_dir, model_name) + + + # ========================= EOF ==================================================================== diff --git a/pixel_classification/runspec.py b/pixel_classification/runspec.py index 80789a7..c5d6221 100644 --- a/pixel_classification/runspec.py +++ b/pixel_classification/runspec.py @@ -62,7 +62,7 @@ def __init__(self, root): 1: {'ltype': 'dryland', 'path': None}, - 2: {'ltype': 'forrest', 'path': None}, + 2: {'ltype': 'forest', 'path': None}, 3: {'ltype': 'other', 'path': None}} From 4daf20afdebbd5e261266a249c42719503ecd03a Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 12 Dec 2018 13:06:26 -0700 Subject: [PATCH 03/89] Changed GoogleDownload to accept lat/lon, started work a new run file that is oriented --- pixel_classification/classify.py | 4 +- pixel_classification/compose_array.py | 7 +- pixel_classification/prepare_images.py | 45 ++++++++-- pixel_classification/runner.py | 13 +-- pixel_classification/runner_from_shapefile.py | 85 +++++++++++++++++++ .../tf_multilayer_perceptron.py | 8 +- 6 files changed, 137 insertions(+), 25 deletions(-) create mode 100644 pixel_classification/runner_from_shapefile.py diff --git a/pixel_classification/classify.py b/pixel_classification/classify.py index 39c6d3a..3b0cd09 100644 --- a/pixel_classification/classify.py +++ b/pixel_classification/classify.py @@ -223,10 +223,10 @@ def _get_stack_channels(self): first = False else: try: - stack[i, :, :] = self.normalize_image_channel(arr) + stack[i, :, :] = arr except ValueError: arr = warp_single_image(self.feature_ras, first_geo) - stack[i, :, :] = self.normalize_image_channel(arr) + stack[i, :, :] = arr return stack diff --git a/pixel_classification/compose_array.py b/pixel_classification/compose_array.py index 50ee46c..c615fed 100644 --- a/pixel_classification/compose_array.py +++ b/pixel_classification/compose_array.py @@ -108,8 +108,6 @@ def __init__(self, root=None, geography=None, paths_map=None, masks=None, self.target_values = None self.m_instances = instances - if self.kernel_size is not None: - self.extracted_points = DataFrame(columns = ['FID', 'X', 'Y', 'POINT_TYPE']) self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) self.object_id = 0 @@ -291,7 +289,7 @@ def _purge_raster_array(self): for idx, sub_raster in enumerate(data_array[msk]): if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 1.: data_array.loc[idx, :] = nan # make whole row NaN - except TypeError as e: # sub_raster is nan. Am I accidentally making it NaN? + except TypeError as e: print(sub_raster, msk, idx) data_array.loc[idx, :] = nan @@ -377,7 +375,8 @@ def _point_raster_extract(self, raster, _name): return s def _grid_raster_extract(self, raster, _name): - """Open the raster. Store the points in a Series - a labeled + """ + Open the raster. Store the points in a Series - a labeled numpy array. Then in _purge array, we iterate over the masks and the paths_map and drop pixels where masks = 1 and pixels where bound = 0. The diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index 4ef7245..e576592 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -30,17 +30,17 @@ from bounds import RasterBounds from dem import AwsDem from ssebop_app.image import get_image - +from rasterio import open as rasopen, float32 from pixel_classification.crop_data_layer import CropDataLayer as Cdl from pixel_classification.runspec import landsat_rasters, static_rasters, ancillary_rasters, mask_rasters - +from sklearn.preprocessing import StandardScaler class ImageStack(object): """ Prepare a stack of images from Landsat, terrain, etc. Save stack in identical geometry. """ - def __init__(self, satellite, path, row, root=None, max_cloud_pct=None, start=None, end=None, + def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None, max_cloud_pct=None, start=None, end=None, year=None, n_landsat=None): self.landsat_mapping = {'LT5': Landsat5, 'LE7': Landsat7, 'LC8': Landsat8} @@ -52,6 +52,8 @@ def __init__(self, satellite, path, row, root=None, max_cloud_pct=None, start=No self.path = path self.row = row + self.lat = lat + self.lon = lon self.year = year self.max_cloud = max_cloud_pct @@ -96,7 +98,8 @@ def build_evaluating(self): self.get_et() self.get_terrain() self.get_cdl() - self.paths_map, self.masks = self._order_images() + self.paths_map, self.masks = self._order_images() # paths map is just path-> location + # in filesystem. def get_cdl(self): """download cdl and make a mask, save to the @@ -118,8 +121,14 @@ def get_landsat(self, fmask=False): g.download() then saves the selected scenes into the root directory. """ - g = GoogleDownload(self.start, self.end, self.sat, path=self.path, row=self.row, - output_path=self.root, max_cloud_percent=self.max_cloud) + if self.lat is None: + g = GoogleDownload(self.start, self.end, self.sat, path=self.path, row=self.row, + output_path=self.root, max_cloud_percent=self.max_cloud) + else: + g = GoogleDownload(self.start, self.end, self.sat, latitude=self.lat, longitude=self.lon, + output_path=self.root, max_cloud_percent=self.max_cloud) + self.path = g.p + self.row = g.r g.select_scenes(self.n) self.scenes = g.selected_scenes @@ -164,6 +173,7 @@ def get_et(self): for i, d in enumerate(self.image_dirs): l = self.landsat_mapping[self.sat_abv](d) _id = l.landsat_scene_id + print(self.path, self.row) get_image(image_dir=d, parent_dir=self.root, image_exists=True, image_id=_id, satellite=self.sat, path=self.path, row=self.row, image_date=l.date_acquired, landsat_object=self.landsat, overwrite=False) @@ -213,7 +223,6 @@ def _organize_directory(self): return dst_dir def _order_images(self): - band_dct = OrderedDict() mask_dct = OrderedDict() @@ -239,6 +248,7 @@ def _order_images(self): bands.sort() for p in bands: band_dct[os.path.basename(p).split('.')[0]] = p + self._normalize_and_save_image(p) masks = [os.path.join(self.root, sc, x) for x in paths if x.endswith(mask_rasters())] for m in masks: @@ -249,9 +259,30 @@ def _order_images(self): static_files = [x for x in files if x.endswith(static_rasters())] for st in static_files: band_dct[os.path.basename(st).split('.')[0]] = os.path.join(self.root, st) + self._normalize_and_save_image(os.path.join(self.root, st)) return band_dct, mask_dct + @staticmethod + def _normalize_and_save_image(fname): + norm = True + with rasopen(fname, 'r') as rsrc: + if "normalized" not in rsrc.tags(): + norm = False + rass_arr = rsrc.read() + rass_arr = rass_arr.astype(float32) + profile = rsrc.profile + profile.update(dtype=float32) + rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) + scaler = StandardScaler() # z-normalization + scaler.fit(rass_arr) + rass_arr = scaler.transform(rass_arr) + if not norm: + with rasopen(fname, 'w', **profile) as dst: + dst.write(rass_arr, 1) + print("Normalizing", fname) + dst.update_tags(normalized=True) + if __name__ == '__main__': pass diff --git a/pixel_classification/runner.py b/pixel_classification/runner.py index bdadcbe..5638c05 100644 --- a/pixel_classification/runner.py +++ b/pixel_classification/runner.py @@ -77,7 +77,7 @@ def model_training_scenes(project, n_images, training, model): n_landsat=n_images, year=geo.year, max_cloud_pct=70) i.build_training() p = Pta(root=i.root, geography=geo, paths_map=i.paths_map, instances=10000, masks=i.masks, - overwrite_array=True, overwrite_points=True, pkl_path=geo_data_path, kernel_size=5) + overwrite_array=True, overwrite_points=True, pkl_path=geo_data_path) p.extract_sample() if first: @@ -90,14 +90,14 @@ def model_training_scenes(project, n_images, training, model): print('Shape {}: {}'.format(key, p.data.shape)) p = Pta(from_dict=training_data) - p.to_pickle(training_data, os.path.join(project, 'data.pkl')) + p.to_pickle(training_data, os.path.join(project, 'data_kernel31.pkl'.format())) mlp(p, model) print('Model saved to {}'.format(model)) def classify_scene(path, row, sat, year, eval_directory, model, n_images, result=None): print('Time: {}'.format(datetime.now())) - print('Classfiy path {} row {} sat {} year {}'.format(path, row, sat, year)) + print('Classify path {} row {} sat {} year {}'.format(path, row, sat, year)) sub = os.path.join(eval_directory, '{}_{}_{}'.format(path, row, year)) if not os.path.isdir(sub): os.mkdir(sub) @@ -112,7 +112,7 @@ def classify_scene(path, row, sat, year, eval_directory, model, n_images, result path_row_year_dir = '{}_{}_{}'.format(path, row, year) result = os.path.join(eval_directory, path_row_year_dir, tif) - classify_multiproc(model, stack_data=i, mask=i.cdl_mask, result=result) + classify_multiproc(model, stack_data=i, mask=i.cdl_mask, result=result, array_outfile="a_recognizable_name") print('Time: {}'.format(datetime.now())) @@ -136,9 +136,12 @@ def run_targets(directory, model): model_data = os.path.join(abspath, 'model_data') model_name = os.path.join(model_data, 'model-mt.ckpt'.format(n_images)) t_project_dir = os.path.join(model_data, 'allstates_3') - + c_project_dir = os.path.join(model_data, 'stacks') model_training_scenes(t_project_dir, n_images, training_dir, model_name) + # classify_scene(path=37, row=28, sat=8, year=2017,eval_directory=c_project_dir, + # n_images=3, model=model_name) + diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py new file mode 100644 index 0000000..e2d52df --- /dev/null +++ b/pixel_classification/runner_from_shapefile.py @@ -0,0 +1,85 @@ +import fiona +from rasterio import float32, open as rasopen +from prepare_images import ImageStack +from sat_image.warped_vrt import warp_single_image + +def get_shapefile_center(shapefile): + '''Since ImageStack can deal with lat/lon when + downloading from the Internet, I need to get + a central location of the shapefile''' + + with fiona.open(shapefile) as src: + bounds = src.bounds # returns minx, miny. maxx, maxy + (minx, miny, maxx, maxy) = bounds + latc = (maxy + miny)/2 + lonc = (minx + maxx)/2 + + return latc, lonc + +def download_images(project_directory, shapefile, year): + + lat, lon = get_shapefile_center(shapefile) + + image_stack = ImageStack(satellite=8, lat=lat, lon=lon, root=project_directory, + max_cloud_pct=70, n_landsat=3, year=year) + + image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is + # a cloud mask. Evaluating excludes a cloud mask + # need to save paths_map and masks + return image_stack + + +def create_master_raster(image_stack): + + paths_map = image_stack.paths_map + first = True + stack = None + + for i, feat in enumerate(paths_map.keys()): # ordered dict ensures accuracy here. + + feature_raster = paths_map[feat] # maps bands to their location in filesystem. + + with rasopen(feature_raster, mode='r') as src: + arr = src.read() + raster_geo = src.meta.copy() + + if first: + geo = deepcopy(raster_geo) + empty = zeros((len(paths_map.keys()), arr.shape[1], arr.shape[2]), float32) + stack = empty + stack[i, :, :] = arr + first = False + else: + try: + stack[i, :, :] = arr + except ValueError: + arr = warp_single_image(self.feature_ras, first_geo) + stack[i, :, :] = arr + + with rasopen("master_raster.tif", mode='w', **geo) as dst: + dst.write(stack) + + return "master_raster.tif" # TODO: replace with a sensible name. + +def create_training_data(project_directory, shapefile, year, lat, lon): + # why don't we create a master masked raster? + # then use the same image for evaluation and + # training. + image_stack = download_images(project_directory, shapefile, year) + +if __name__ == "__main__": + + shpfile = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_3728.shp' + + iss = download_images("../model_data/allstates_3/MT", shpfile, 2017) + string = create_master_raster(iss) + + + + + + + + + + diff --git a/pixel_classification/tf_multilayer_perceptron.py b/pixel_classification/tf_multilayer_perceptron.py index 4f3a323..920cef4 100644 --- a/pixel_classification/tf_multilayer_perceptron.py +++ b/pixel_classification/tf_multilayer_perceptron.py @@ -33,7 +33,7 @@ def mlp(data, model_path): :return: """ - x = normalize_feature_array(data.data) + x = data.data labels = data.target_values y = get_dummies(labels.reshape((labels.shape[0],))).values N = len(unique(data.target_values)) @@ -101,12 +101,6 @@ def multilayer_perceptron(x, weights, biases): return out_layer -def normalize_feature_array(data): - scaler = StandardScaler() - scaler = scaler.fit(data) - data = scaler.transform(data) - return data - def get_size(start_path='.'): """ Size of data directory in GB. From 30a76df428f414cd3f6ecbbb90e8e5f9f0e41289 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 26 Dec 2018 10:52:27 -0700 Subject: [PATCH 04/89] Made a new runfile --- pixel_classification/runner_from_shapefile.py | 252 ++++++++++++++---- 1 file changed, 202 insertions(+), 50 deletions(-) diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index e2d52df..ab03663 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -1,33 +1,16 @@ +import glob +import os +from collections import defaultdict import fiona +from lxml import html +from requests import get +from copy import deepcopy +from numpy import zeros +import re from rasterio import float32, open as rasopen +from compose_array_single_shapefile import PixelTrainingArraySingleShapefile from prepare_images import ImageStack -from sat_image.warped_vrt import warp_single_image - -def get_shapefile_center(shapefile): - '''Since ImageStack can deal with lat/lon when - downloading from the Internet, I need to get - a central location of the shapefile''' - - with fiona.open(shapefile) as src: - bounds = src.bounds # returns minx, miny. maxx, maxy - (minx, miny, maxx, maxy) = bounds - latc = (maxy + miny)/2 - lonc = (minx + maxx)/2 - - return latc, lonc - -def download_images(project_directory, shapefile, year): - - lat, lon = get_shapefile_center(shapefile) - - image_stack = ImageStack(satellite=8, lat=lat, lon=lon, root=project_directory, - max_cloud_pct=70, n_landsat=3, year=year) - - image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is - # a cloud mask. Evaluating excludes a cloud mask - # need to save paths_map and masks - return image_stack - +import sat_image def create_master_raster(image_stack): @@ -43,38 +26,207 @@ def create_master_raster(image_stack): arr = src.read() raster_geo = src.meta.copy() - if first: - geo = deepcopy(raster_geo) - empty = zeros((len(paths_map.keys()), arr.shape[1], arr.shape[2]), float32) - stack = empty + if first: + first_geo = deepcopy(raster_geo) + empty = zeros((len(paths_map.keys()), arr.shape[1], arr.shape[2]), float32) + stack = empty + stack[i, :, :] = arr + first = False + else: + try: stack[i, :, :] = arr - first = False - else: - try: - stack[i, :, :] = arr - except ValueError: - arr = warp_single_image(self.feature_ras, first_geo) - stack[i, :, :] = arr + except ValueError: + import pprint + pprint.pprint(first_geo) + # error was thrown here b/c source raster didn't have crs + arr = sat_image.warped_vrt.warp_single_image(feature_raster, first_geo) + stack[i, :, :] = arr + + # print(stack.shape) + first_geo.update(count=len(paths_map.keys())) - with rasopen("master_raster.tif", mode='w', **geo) as dst: - dst.write(stack) + with rasopen("master_raster.tif", mode='w', **first_geo) as dst: + dst.write(stack) return "master_raster.tif" # TODO: replace with a sensible name. + +def get_path_row(lat, lon): + """ + :param lat: Latitude float + :param lon: Longitude float + 'convert_pr_to_ll' [path, row to coordinates] + :return: lat, lon tuple or path, row tuple + """ + conversion_type = 'convert_ll_to_pr' + base = 'https://landsat.usgs.gov/landsat/lat_long_converter/tools_latlong.php' + unk_number = 1508518830987 + + full_url = '{}?rs={}&rsargs[]={}&rsargs[]={}&rsargs[]=1&rsrnd={}'.format(base, + conversion_type, + lat, lon, + unk_number) + r = get(full_url) + tree = html.fromstring(r.text) + + # remember to view source html to build xpath + # i.e. inspect element > network > find GET with relevant PARAMS + # > go to GET URL > view source HTML + p_string = tree.xpath('//table/tr[1]/td[2]/text()') + path = int(re.search(r'\d+', p_string[0]).group()) + + r_string = tree.xpath('//table/tr[1]/td[4]/text()') + row = int(re.search(r'\d+', r_string[0]).group()) + + return path, row + +def get_shapefile_lat_lon(shapefile): + with fiona.open(shapefile, "r") as src: + minx, miny, maxx, maxy = src.bounds + latc = (maxy + miny) / 2 + lonc = (maxx + minx) / 2 + + return latc, lonc + +def get_shapefile_path_rows(shapefile): + '''Since ImageStack can deal with lat/lon when + downloading from the Internet, I need to get + a central location of the shapefile. However, + since shapefiles may extend over multiple p/rs, + I need to figure out all the path/rows. + These methods work; however, they don't account + for the overlapping nature of the wrs2 tiles. + This means that many, many images may be downloaded. + ''' + + dct = defaultdict(list) + from shapely.geometry import shape + with fiona.open(shapefile, "r") as src: + for feat in src: + pt = shape(feat['geometry']).centroid + pt = pt.coords[0] + p, r = get_path_row(pt[1], pt[0]) + pr = str(p) + "_" + str(r) + dct[pr].append(feat) + return dct + +def split_shapefile(path, data_directory, base_shapefile, pr_dict): + ''' + Multiple p/rs can be contained in one + shapefile. To ease the extraction of + training data, we need an automated way + to look through all shapefile attributes and assign + them a path/row, then save all the polygons in different + files (if they're not in the same p/r) + ''' + + prefix = os.path.splitext(base_shapefile)[0] + + with fiona.open(os.path.join(path, base_shapefile), "r") as src: + meta = deepcopy(src.meta) + + for pr in pr_dict.keys(): + out = prefix + "_" + pr + ".shp" + print(out) + with fiona.open(os.path.join(data_directory, out), 'w', **meta) as dst: + for poly in pr_dict[pr]: + dst.write(poly) + return + +def download_images(project_directory, path, row, year): + + image_stack = ImageStack(satellite=8, path=path, row=row, root=project_directory, + max_cloud_pct=70, n_landsat=3, year=year) + + image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is + # a cloud mask. + return image_stack + -def create_training_data(project_directory, shapefile, year, lat, lon): - # why don't we create a master masked raster? - # then use the same image for evaluation and - # training. - image_stack = download_images(project_directory, shapefile, year) - -if __name__ == "__main__": +def create_training_data(shapefile, shapefile_directory, image_directory, class_code, + kernel_size, instances, training_directory, year): + + latc, lonc = get_shapefile_lat_lon(os.path.join(shapefile_directory, shapefile)) + p, r = get_path_row(latc, lonc) + suff = str(p) + '_' + str(r) + "_" + str(year) + landsat_dir = os.path.join(image_directory, suff) + + if not os.path.isdir(landsat_dir): + os.mkdir(landsat_dir) + ims = download_images(landsat_dir, p, r, year) + else: + print("Images may have been downloaded for {}_{}_{}".format(p, r, year)) + print("Check to make sure they're all there.") + ims = download_images(landsat_dir, p, r, year) - shpfile = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_3728.shp' + shp = os.path.join(shapefile_directory, shapefile) + pta = PixelTrainingArraySingleShapefile(root=landsat_dir, shapefile_path=shp, + paths_map=ims.paths_map, masks=ims.masks, class_code=class_code, + instances=instances, kernel_size=kernel_size, path=p, row=r) - iss = download_images("../model_data/allstates_3/MT", shpfile, 2017) - string = create_master_raster(iss) + pta.extract_sample() + prefix = os.path.splitext(shapefile)[0] + prefix += "_data_kernel_{}".format(kernel_size) + pta.to_pickle(pta.data, os.path.join(training_directory, prefix)) + + print("Data saved to ".format(training_directory)) + + +if __name__ == "__main__": + # Workflow: + # split all shapefiles up into separate path/rows. + # Then for each (split) shapefile, download the + # requisite image data (landsat -> et etc). + # Then, for each split shapefile, + # download training data and save in the training data directory. + # TODO: 1. Make a program to train the model + # 2. Figure out how to make the model evaluate on every path/row + # that's present in the training data. + # The code broke on an SSEBop run, something + # that I have no control over. At least I think I don't. + # shpfile = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_other_3728.shp' + # path = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/' + # pr_dct = get_shapefile_path_rows(shpfile) + + train_dir = '../training_data' + data_directory = '../shapefile_data/' + image_directory = '../image_data' + + for f in glob.glob(data_directory + "*.shp"): + if "sample_points" not in f: + shp = os.path.basename(f) + print(shp) + create_training_data(shp, data_directory, image_directory, 3, 17, 10, + train_dir, 2013) + + # + # data_directory = '../shapefile_data/' + # shpfile = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_other_3728.shp' + # pr_dct = get_shapefile_path_rows(shpfile) + # image_directory = '../image_data' + # split_shapefile(path, data_directory, 'MT_other_3728.shp', pr_dct) + +# # now create imagestacks from each shapefile in the split shapefile directory. + + # # shapefile_year_map = empty + + # year = 2013 # I think? + # for f in glob.glob(data_directory + '*.shp'): + # latc, lonc = get_shapefile_lat_lon(f) + # p, r = get_path_row(latc, lonc) + # suff = str(p) + '_' + str(r) + "_" + str(year) + # landsat_dir = os.path.join(image_directory, suff) + # if not os.path.isdir(landsat_dir): + # os.mkdir(landsat_dir) + # ims = download_images(landsat_dir, p, r, 2013) + # else: + # print("Images may have been downloaded for {}_{}_{}".format(p, r, year)) + # print(i) +# project_directory, shapefile, year) + + +# download_images From e7104ff880b31635b6a86fcc96dffd99be329382 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 11 Jan 2019 10:57:11 -0700 Subject: [PATCH 05/89] Reworking data pipeline --- pixel_classification/classify.py | 2 + pixel_classification/compose_array.py | 4 +- .../compose_array_single_shapefile.py | 414 ++++++++++++++++++ pixel_classification/data_utils.py | 222 ++++++++++ pixel_classification/keras_cnn.py | 101 +++++ pixel_classification/prepare_images.py | 3 +- pixel_classification/runner_from_shapefile.py | 300 ++++--------- 7 files changed, 832 insertions(+), 214 deletions(-) create mode 100644 pixel_classification/compose_array_single_shapefile.py create mode 100644 pixel_classification/data_utils.py create mode 100644 pixel_classification/keras_cnn.py diff --git a/pixel_classification/classify.py b/pixel_classification/classify.py index 3b0cd09..a2656e8 100644 --- a/pixel_classification/classify.py +++ b/pixel_classification/classify.py @@ -225,6 +225,8 @@ def _get_stack_channels(self): try: stack[i, :, :] = arr except ValueError: + import pprint + pprint.pprint(first_geo) arr = warp_single_image(self.feature_ras, first_geo) stack[i, :, :] = arr diff --git a/pixel_classification/compose_array.py b/pixel_classification/compose_array.py index c615fed..7cbf0e8 100644 --- a/pixel_classification/compose_array.py +++ b/pixel_classification/compose_array.py @@ -155,7 +155,7 @@ def create_sample_points(self): for class_code, _dict in self.geography.attributes.items(): print(_dict['ltype']) - polygons = self._get_polygons(_dict['path']) + polygons = self._get_polygons(_dict['path']) # this is a hardcoded shapefile name. _dict['instance_count'] = 0 if len(polygons) > self.m_instances: @@ -379,8 +379,8 @@ def _grid_raster_extract(self, raster, _name): Open the raster. Store the points in a Series - a labeled numpy array. Then in _purge array, we iterate over the masks and the paths_map and drop pixels where masks = 1 and pixels where bound = 0. - The """ + with rasopen(raster, 'r') as rsrc: rass_arr = rsrc.read() rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) diff --git a/pixel_classification/compose_array_single_shapefile.py b/pixel_classification/compose_array_single_shapefile.py new file mode 100644 index 0000000..cb24ed1 --- /dev/null +++ b/pixel_classification/compose_array_single_shapefile.py @@ -0,0 +1,414 @@ +# ============================================================================================= +# Copyright 2018 dgketchum +# +# Licensed under the Apache License, Version 2. (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================================= + +import os +import sys + +abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(abspath) +import pickle +from copy import deepcopy +from warnings import warn + +from fiona import open as fopen +from numpy import linspace, max, nan, unique, ndarray, swapaxes, zeros +import h5py +from numpy.random import shuffle +from pandas import DataFrame, Series +import warnings +from pyproj import Proj, transform +from rasterio import open as rasopen +from shapely.geometry import shape, Point, mapping +from shapely.ops import unary_union +loc = os.path.dirname(__file__) +WRS_2 = loc.replace('pixel_classification', + os.path.join('spatial_data', 'wrs2_descending.shp')) + +''' +This script contains a class meant to gather data from rasters using a polygon shapefile. +The high-level method `extract_sample` will return an object ready for a +learning algorithm. +''' +loc = os.path.dirname(__file__) +WRS_2 = loc.replace('pixel_classification', + os.path.join('spatial_data', 'wrs2_descending.shp')) + +class NoCoordinateReferenceError(Exception): + pass + + +class UnexpectedCoordinateReferenceSystemError(Exception): + pass + + +class PTASingleShapefile: + + def __init__(self, master_raster=None, shapefile_path=None, class_code=None, path=None, + row=None, masked_raster=None, training_directory=None, paths_map=None, masks=None, + instances=None, overwrite_points=None, kernel_size=None): + self.shapefile_path = shapefile_path + self.path = path + self.object_id = 0 + self.paths_map = paths_map + self.masks = masks + self.row = row + self.training_directory = training_directory + self.overwrite_points=overwrite_points + self.class_code = class_code + self.crs = self._get_crs() + self.m_instances = instances + self.master_raster = master_raster + self.masked_raster = masked_raster + if masked_raster is not None: + print(masked_raster, "Masked raster present.") + self.data = None + self.kernel_size = kernel_size + self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) + + def extract_sample(self, save_points=True): + + out = os.path.splitext(self.shapefile_path)[0] + out += "_sample_points.shp" + if os.path.isfile(out) and not self.overwrite_points: + print("sample points already created") + self._populate_array_from_points(out) + else: + self.create_sample_points() + if save_points: + self.save_sample_points() + if self.master_raster is not None: + self.training_data_from_master_raster() + else: + self.populate_raster_data_array() + + def _populate_array_from_points(self, fname): + + with fopen(fname, 'r') as src: + for feat in src: + coords = feat['geometry']['coordinates'] + val = feat['properties']['POINT_TYPE'] + self._add_entry(coords, val=val) + + def _create_hdf_file(self): + n = os.path.basename(self.shapefile_path)[:-4] + "_train.h5" + to_save = os.path.join(self.training_directory, n) + hdf5 = h5py.File(to_save, 'a') + shape = (self.m_instances, self.kernel_size, self.kernel_size, len(self.paths_map.keys())) + hdf5.create_dataset("cc:{}".format(self.class_code), shape, compression='gzip') + return hdf5 + + def _verify_point(self, x, y): + """ Check to see if x, y is masked. """ + pass + + + def training_data_from_master_raster(self): + + ofs = self.kernel_size // 2 + #TODO: Make saving data easier. + # Query how much memory I have left? + + + sz = 10000 # some heuristic that indicates when I run out of memory + tmp_arr = zeros((sz, len(self.paths_map.keys()), self.kernel_size, self.kernel_size)) + + n = os.path.basename(self.shapefile_path)[:-4] + "_train.h5" + to_save = os.path.join(self.training_directory, n) + hdf5 = h5py.File(to_save, 'a') + shape = (self.m_instances, len(self.paths_map.keys()), self.kernel_size, self.kernel_size) + dset = hdf5.create_dataset("cc:{}".format(self.class_code), shape, compression='gzip') + + with rasopen(self.master_raster, 'r') as rsrc: + rass_arr = rsrc.read() + affine = rsrc.transform + + j = 0 + p = 0 + for ind, row in self.extracted_points.iterrows(): + p = ind + # iterate through extracted points. + if j == sz: + print("Writing to disk...") + dset[ind-j:ind, :, :, :] = tmp_arr + j = 0 + if ind % 1000 == 0: + print("Step:", ind) + x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) + c, r = ~affine * (x, y) + try: + rr = int(r); cc = int(c) + raster_subgrid = rass_arr[:, rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] + # possible issues: edges of image + # rs = swapaxes(raster_subgrid, 0, 2) # for N, H, W, C format + tmp_arr[j, :, :, :] = raster_subgrid + j += 1 + + except IndexError as e: + print(e) + tmp_arr[j, :, :, :] = nan + j += 1 + + print("Writing to disk...") + dset[p-j:p, :, :, :] = tmp_arr + hdf5.close() + + def create_sample_points(self): + """ Create a clipped training set from polygon shapefiles. + + This complicated-looking function finds the wrs_2 descending Landsat tile corresponding + to the path row provided, gets the bounding box and profile (aka meta) from + compose_array.get_tile_geometry, clips the training data to the landsat tile, then perform + s a union to reduce the number of polygon objects. + The dict object this uses has a template in pixel_classification.runspec.py. + Approach is to loop through the polygons, create a random grid of points over the + extent of each polygon, random shuffle order of points, loop over points, check if + point is within polygon, and if within, create a sample point. + + If a relatively simple geometry is available, use create_negative_sample_points(), though if + there are > 10**4 polygons, it will probably hang on unary_union(). """ + print("Making sample pts") + + polygons = self._get_polygons(self.shapefile_path) + instance_count = 0 + + if len(polygons) < 2: + warnings.warn("You have < 2 polygons in shapefile {}. ".format(os.path.basename(self.shapefile_path), Warning)) + + if len(polygons) > self.m_instances: + areas = zip(polygons, [x.area for x in polygons]) + srt = sorted(areas, key=lambda x: x[1], reverse=True) + polygons = [x for x, y in srt[:self.m_instances]] + + polygons = unary_union(polygons) + if not isinstance(polygons, list): + polygons = [polygons] # for the case of a single polygon. + + positive_area = sum([x.area for x in polygons]) # the sum of all + # the areas. + class_count = 0 + + for i, poly in enumerate(polygons): + if class_count >= self.m_instances: + print("Broke b/c class_count >= m_instances") + break + fractional_area = poly.area / positive_area # percent of + # total area that this polygon occupies + required_points = max([1, fractional_area * self.m_instances]) # how + # many points overall that are required to evenly + # sample from each polygon, based on area. + poly_pt_ct = 0 + #while poly_pt_ct < required_points: # I wasn't getting enough points. + # I feel like I was/am doing something wrong. + x_range, y_range = self._random_points(poly.bounds) + for coord in zip(x_range, y_range): + if Point(coord[0], coord[1]).within(poly): + self._add_entry(coord, val=self.class_code) + poly_pt_ct += 1 + instance_count += 1 + if instance_count % 1000 == 0: + print("Instances:", instance_count) + + if instance_count > self.m_instances: + print("Broke b/c instance_count > m_instances") + break + + if poly_pt_ct >= required_points: + print("Broke b/c poly_pt_ct > required_points") + break + + class_count += poly_pt_ct + + print("Final number of points: ", self.object_id) + + def populate_raster_data_array(self, save=True): + + for key, val in self.paths_map.items(): + s = self._grid_raster_extract(val, _name=key) + print('Extracting {}'.format(key)) + self.extracted_points = self.extracted_points.join(s, how='outer') + + for key, val in self.masks.items(): + s = self._grid_raster_extract(val, _name=key) + print('Extracting {}'.format(key)) + self.extracted_points = self.extracted_points.join(s, how='outer') + + data_array, targets = self._purge_raster_array() + data = {'df': data_array, + 'features': data_array.columns.values, + 'data': data_array.values, + 'target_values': targets, + 'paths_map': self.paths_map} + + print('feature dimensions: {}'.format(data_array.shape)) + + for key, val in data.items(): + setattr(self, key, val) + + + def _purge_raster_array(self): + data_array = deepcopy(self.extracted_points) + target_vals = Series(data_array.POINT_TYPE.values, name='POINT_TYPE') + data_array.drop(['X', 'Y', 'FID', 'POINT_TYPE'], axis=1, inplace=True) + try: + for msk in self.masks.keys(): + for idx, sub_raster in enumerate(data_array[msk]): + if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 1.: + data_array.loc[idx, :] = nan # make whole row NaN + except TypeError as e: + print(sub_raster, msk, idx) + data_array.loc[idx, :] = nan + + try: + for bnd in self.paths_map.keys(): + for idx, sub_raster in enumerate(data_array[bnd]): + if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 0.: + data_array.loc[idx, :] = nan + except TypeError as e: + print(sub_raster, msk, idx) + data_array.loc[idx, :] = nan + + data_array = data_array.join(target_vals, how='outer') + + data_array.dropna(axis=0, inplace=True) + data_array.drop(self.masks, axis=1, inplace=True) + target_vals = data_array.POINT_TYPE.values + + data_array = data_array.drop(['POINT_TYPE'], + axis=1, inplace=False) + return data_array, target_vals + + def _random_points(self, coords): + min_x, max_x = coords[0], coords[2] + min_y, max_y = coords[1], coords[3] + x_range = linspace(min_x, max_x, num=10 * self.m_instances) + y_range = linspace(min_y, max_y, num=10 * self.m_instances) + shuffle(x_range), shuffle(y_range) + return x_range, y_range + + def _add_entry(self, coord, val=0): + + self.extracted_points = self.extracted_points.append({'FID': int(self.object_id), + 'X': coord[0], + 'Y': coord[1], + 'POINT_TYPE': val}, + ignore_index=True) + self.object_id += 1 + + def _geo_point_to_projected_coords(self, x, y): + + in_crs = Proj(init='epsg:4326') + out_crs = Proj(init=self.crs['init']) + x, y = transform(in_crs, out_crs, x, y) + return x, y + + def _grid_raster_extract(self, raster, _name): + """ + Open the raster. Store the points in a Series - a labeled + numpy array. Then in _purge array, we iterate over the masks + and the paths_map and drop pixels where masks = 1 and pixels where bound = 0. + """ + + with rasopen(raster, 'r') as rsrc: + rass_arr = rsrc.read() + rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) + affine = rsrc.transform + + s = Series(index=range(0, self.extracted_points.shape[0]), name=_name, dtype=object) + for ind, row in self.extracted_points.iterrows(): + x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) + c, r = ~affine * (x, y) + try: + ofs = self.kernel_size // 2 + rr = int(r); cc = int(c) + raster_subgrid = rass_arr[rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] # possible issues: edges of image + s[ind] = raster_subgrid + except IndexError: + s[ind] = None + + return s + + def _get_polygons(self, vector): + with fopen(vector, 'r') as src: + crs = src.crs + if not crs: + raise NoCoordinateReferenceError( + 'Provided shapefile has no reference data.') + if crs['init'] != 'epsg:4326': + raise UnexpectedCoordinateReferenceSystemError( + 'Provided shapefile should be in unprojected (geographic)' + 'coordinate system, i.e., WGS84, EPSG 4326, {} is not'.format( + vector)) + clipped = src.filter(mask=self.tile_bbox) + polys = [] + bad_geo_count = 0 + for feat in clipped: + try: + geo = shape(feat['geometry']) + polys.append(geo) + except AttributeError: + bad_geo_count += 1 + + return polys + + @property + def tile_bbox(self): + with fopen(WRS_2, 'r') as wrs: + for feature in wrs: + fp = feature['properties'] + if fp['PATH'] == self.path and fp['ROW'] == self.row: + bbox = feature['geometry'] + return bbox + + def _get_crs(self): + for key, val in self.paths_map.items(): + with rasopen(val, 'r') as src: + crs = src.crs + break + return crs + + + def save_sample_points(self): + + points_schema = { + 'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]), + 'geometry': 'Point'} + meta = self.tile_geometry.copy() + meta['schema'] = points_schema + + out = os.path.splitext(self.shapefile_path)[0] + out += "_sample_points.shp" + + with fopen(out, 'w', **meta) as output: + for index, row in self.extracted_points.iterrows(): + props = dict([('FID', row['FID']), ('POINT_TYPE', row['POINT_TYPE'])]) + pt = Point(row['X'], row['Y']) + output.write({'properties': props, + 'geometry': mapping(pt)}) + return None + + @property + def tile_geometry(self): + with fopen(WRS_2, 'r') as wrs: + wrs_meta = wrs.meta.copy() + return wrs_meta + + def to_pickle(self, data, path): + + with open(path, 'wb') as handle: + pickle.dump(data, handle, protocol=2) + + return path + diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py new file mode 100644 index 0000000..7749924 --- /dev/null +++ b/pixel_classification/data_utils.py @@ -0,0 +1,222 @@ +from shapely.geometry import shape +import glob +import os +from collections import defaultdict +import fiona +from lxml import html +from requests import get +from copy import deepcopy +from numpy import zeros +import re +from rasterio import float32, open as rasopen +from prepare_images import ImageStack +import sat_image + +WRS_2 = '../spatial_data/wrs2_descending.shp' + +def create_master_masked_raster(image_stack, path, row, year, raster_directory): + masks = image_stack.masks + if not masks: + return None + + first = True + stack = None + + for i, feat in enumerate(masks.keys()): # ordered dict ensures accuracy here. + + mask_raster = masks[feat] # maps bands to their location in filesystem. + + with rasopen(mask_raster, mode='r') as src: + arr = src.read() + raster_geo = src.meta.copy() + + if first: + first_geo = deepcopy(raster_geo) + print(first_geo, "FIRST_GEO") + empty = zeros((len(masks.keys()), arr.shape[1], arr.shape[2]), float32) + stack = empty + stack[i, :, :] = arr + first = False + else: + try: + stack[i, :, :] = arr + except ValueError: + import pprint + pprint.pprint(first_geo) + # error was thrown here b/c source raster didn't have crs + arr = sat_image.warped_vrt.warp_single_image(mask_raster, first_geo) + stack[i, :, :] = arr + + first_geo.update(count=len(masks.keys())) + + fname = "master_mask_raster_{}_{}_{}.tif".format(path, row, year) + pth = os.path.join(raster_directory, fname) + + with rasopen(pth, mode='w', **first_geo) as dst: + dst.write(stack) + + return pth + + +def create_master_raster(image_stack, path, row, year, raster_directory): + fname = "master_raster_{}_{}_{}.tif".format(path, row, year) + pth = os.path.join(raster_directory, fname) + if os.path.isfile(pth): + print("Master raster already created for {}_{}_{}.".format(path, row, year)) + return pth + + paths_map = image_stack.paths_map + first = True + stack = None + + for i, feat in enumerate(paths_map.keys()): # ordered dict ensures accuracy here. + + feature_raster = paths_map[feat] # maps bands to their location in filesystem. + + with rasopen(feature_raster, mode='r') as src: + arr = src.read() + raster_geo = src.meta.copy() + + if first: + first_geo = deepcopy(raster_geo) + empty = zeros((len(paths_map.keys()), arr.shape[1], arr.shape[2]), float32) + stack = empty + stack[i, :, :] = arr + first = False + else: + try: + stack[i, :, :] = arr + except ValueError: + # import pprint + # pprint.pprint(first_geo) + # error was thrown here b/c source raster didn't have crs + arr = sat_image.warped_vrt.warp_single_image(feature_raster, first_geo) + stack[i, :, :] = arr + + first_geo.update(count=len(paths_map.keys())) + + with rasopen(pth, mode='w', **first_geo) as dst: + dst.write(stack) + + return pth + + +def get_path_row(lat, lon): + """ + :param lat: Latitude float + :param lon: Longitude float + 'convert_pr_to_ll' [path, row to coordinates] + :return: lat, lon tuple or path, row tuple + """ + conversion_type = 'convert_ll_to_pr' + base = 'https://landsat.usgs.gov/landsat/lat_long_converter/tools_latlong.php' + unk_number = 1508518830987 + + full_url = '{}?rs={}&rsargs[]={}&rsargs[]={}&rsargs[]=1&rsrnd={}'.format(base, + conversion_type, + lat, lon, + unk_number) + r = get(full_url) + tree = html.fromstring(r.text) + + # remember to view source html to build xpath + # i.e. inspect element > network > find GET with relevant PARAMS + # > go to GET URL > view source HTML + p_string = tree.xpath('//table/tr[1]/td[2]/text()') + path = int(re.search(r'\d+', p_string[0]).group()) + + r_string = tree.xpath('//table/tr[1]/td[4]/text()') + row = int(re.search(r'\d+', r_string[0]).group()) + + return path, row + +def get_shapefile_lat_lon(shapefile): + with fiona.open(shapefile, "r") as src: + minx, miny, maxx, maxy = src.bounds + latc = (maxy + miny) / 2 + lonc = (maxx + minx) / 2 + + return latc, lonc + + +def download_images(project_directory, path, row, year, satellite=8): + + image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, + max_cloud_pct=70, n_landsat=3, year=year) + + image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is + # a cloud mask. + return image_stack + + +def get_pr(poly, wrs2): + ls = [] + for feature in wrs2: + tile = shape(feature['geometry']) + if poly.within(tile): + z = feature['properties'] + p = z['PATH'] + r = z['ROW'] + ls.append(str(p) + "_" + str(r)) + return ls + +def split_shapefile(base, base_shapefile, data_directory): + """ Shapefiles may deal with data over multiple path/rows. + Data directory: where the split shapefiles will be saved.""" + # TODO: un-hardcode this directory + wrs2 = fiona.open("../spatial_data/wrs2_descending_usa.shp") + + dct = defaultdict(list) + shapefile_mapping = defaultdict(list) + + with fiona.open(base + base_shapefile, "r") as src: + meta = deepcopy(src.meta) + for feat in src: + feat_id = int(feat['id']) + shapefile_mapping[feat_id] = feat + poly = shape(feat['geometry']) + prs = get_pr(poly, wrs2) + dct[feat_id] = prs + wrs2.close() + id_mapping = defaultdict(list) + for key in dct: + for e in dct[key]: + id_mapping[e].append(key) + # Now find the unique values between the lists. + for key1 in id_mapping: + for key2 in id_mapping: + if key1 != key2: + res = set(id_mapping[key2]) - set(id_mapping[key1]) + # above line gives the keys that are present + # in the second list that do not appear in the first list. + # By doing this for all path/rows, we can get all of the unique path/rows. + # Still need to test this. + id_mapping[key2] = list(sorted(res)) + + prefix = os.path.splitext(base_shapefile)[0] + for key in id_mapping: + if len(id_mapping[key]): + out = prefix + "_" + key + ".shp" + print("Split shapefile saving to:", os.path.join(data_directory, out)) + with fiona.open(os.path.join(data_directory, out), 'w', **meta) as dst: + for feat_id in id_mapping[key]: + poly = shapefile_mapping[feat_id] + dst.write(poly) + + return + +def get_shapefile_path_row(shapefile): + """This function assumes that the original + shapefile has already been split, and relies on + the naming convention to get the path and row.""" + # strip extension + # TODO: Find some way to update shapefile metadata + shp = shapefile[-9:-4].split("_") + return int(shp[0]), int(shp[1]) + +if __name__ == "__main__": + pass + # base = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/" + # base_shapefile = "MT_Huntley_Main_2013_3728.shp" + # data_directory = "shapefile_data/" + # split_shapefile(base, base_shapefile, data_directory) diff --git a/pixel_classification/keras_cnn.py b/pixel_classification/keras_cnn.py new file mode 100644 index 0000000..986be39 --- /dev/null +++ b/pixel_classification/keras_cnn.py @@ -0,0 +1,101 @@ +import h5py +import glob +import tensorflow as tf +import numpy as np + +N_INSTANCES_IRRIGATED = 30000 +N_INSTANCES_NOT = 10000 + +def keras_model(kernel_size): + model = tf.keras.Sequential() + # Must define the input shape in the first layer of the neural network + model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', + input_shape=(kernel_size, kernel_size, 3))) + model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) + model.add(tf.keras.layers.Dropout(0.3)) + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')) + model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) model.add(tf.keras.layers.Dropout(0.3)) + model.add(tf.keras.layers.Flatten()) + model.add(tf.keras.layers.Dense(256, activation='relu')) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Dense(2, activation='softmax')) + # Take a look at the model summary + model.summary() + return model + +def train_model(kernel_size): + + from sklearn.model_selection import train_test_split + + x_train, x_test, y_train, y_test = train_test_split(features, labels, + test_size=0.1, random_state=42) + + model = keras_model(kernel_size) + model.compile(loss='binary_crossentropy', + optimizer='adam', + metrics=['accuracy']) + model.fit(x_train, + y_train, + batch_size=128, + epochs=10, + validation_data=(x_test, y_test)) + score = model.evaluate(x_test, y_test, verbose=0) + print('\n', 'Test accuracy:', score[1]) + return model + +def make_one_hot(labels, n_classes): + ret = np.zeros((len(labels), n_classes)) + for i, e in enumerate(labels): + ret[i, e] = 1 + return ret + +def generate_labels_and_features(filename, class_code, index_1, index_2, n_classes=2): + # approach: + # I have n files containing training data on disk. + # Loop through all classes and sample a subset + # of each file. This actually shouldn't be that hard. + # Then, shuffle the data (in memory?) and split it + # into training and test sets. + with h5py.File(filename, 'r') as f: + data = f['cc:'+str(class_code)] + labels = [class_code]*(index_2-index_1) + labels = make_one_hot(labels, n_classes=n_classes) + + return data[index_1:index_2, :, :, :] # this is an assumption about the shape of the data + +def shuffle_data(training_directory, suffix='.h5'): + # Make piles, and shuffle that way. + # Reference that website. + # approach: + # for each (h5) file in directory: + # open it, and make piles with it (in parallel) + # then combine each litle pile into a large pile, + # but iterate through the littler piles when + # creating the big pile + return None + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index e576592..f32d35a 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -95,7 +95,7 @@ def build_training(self): def build_evaluating(self): self.get_landsat(fmask=False) self.profile = self.landsat.rasterio_geometry - self.get_et() + # self.get_et() self.get_terrain() self.get_cdl() self.paths_map, self.masks = self._order_images() # paths map is just path-> location @@ -129,6 +129,7 @@ def get_landsat(self, fmask=False): output_path=self.root, max_cloud_percent=self.max_cloud) self.path = g.p self.row = g.r + print("Path:", self.path, "Row:", self.row) g.select_scenes(self.n) self.scenes = g.selected_scenes diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index ab03663..2640123 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -1,234 +1,112 @@ +import warnings import glob import os -from collections import defaultdict -import fiona -from lxml import html -from requests import get -from copy import deepcopy -from numpy import zeros -import re -from rasterio import float32, open as rasopen -from compose_array_single_shapefile import PixelTrainingArraySingleShapefile -from prepare_images import ImageStack -import sat_image - -def create_master_raster(image_stack): - - paths_map = image_stack.paths_map - first = True - stack = None - - for i, feat in enumerate(paths_map.keys()): # ordered dict ensures accuracy here. - - feature_raster = paths_map[feat] # maps bands to their location in filesystem. - - with rasopen(feature_raster, mode='r') as src: - arr = src.read() - raster_geo = src.meta.copy() - - if first: - first_geo = deepcopy(raster_geo) - empty = zeros((len(paths_map.keys()), arr.shape[1], arr.shape[2]), float32) - stack = empty - stack[i, :, :] = arr - first = False - else: - try: - stack[i, :, :] = arr - except ValueError: - import pprint - pprint.pprint(first_geo) - # error was thrown here b/c source raster didn't have crs - arr = sat_image.warped_vrt.warp_single_image(feature_raster, first_geo) - stack[i, :, :] = arr - - # print(stack.shape) - first_geo.update(count=len(paths_map.keys())) - - with rasopen("master_raster.tif", mode='w', **first_geo) as dst: - dst.write(stack) - - return "master_raster.tif" # TODO: replace with a sensible name. - -def get_path_row(lat, lon): - """ - :param lat: Latitude float - :param lon: Longitude float - 'convert_pr_to_ll' [path, row to coordinates] - :return: lat, lon tuple or path, row tuple - """ - conversion_type = 'convert_ll_to_pr' - base = 'https://landsat.usgs.gov/landsat/lat_long_converter/tools_latlong.php' - unk_number = 1508518830987 - - full_url = '{}?rs={}&rsargs[]={}&rsargs[]={}&rsargs[]=1&rsrnd={}'.format(base, - conversion_type, - lat, lon, - unk_number) - r = get(full_url) - tree = html.fromstring(r.text) - - # remember to view source html to build xpath - # i.e. inspect element > network > find GET with relevant PARAMS - # > go to GET URL > view source HTML - p_string = tree.xpath('//table/tr[1]/td[2]/text()') - path = int(re.search(r'\d+', p_string[0]).group()) - - r_string = tree.xpath('//table/tr[1]/td[4]/text()') - row = int(re.search(r'\d+', r_string[0]).group()) - - return path, row - -def get_shapefile_lat_lon(shapefile): - with fiona.open(shapefile, "r") as src: - minx, miny, maxx, maxy = src.bounds - latc = (maxy + miny) / 2 - lonc = (maxx + minx) / 2 - - return latc, lonc - -def get_shapefile_path_rows(shapefile): - '''Since ImageStack can deal with lat/lon when - downloading from the Internet, I need to get - a central location of the shapefile. However, - since shapefiles may extend over multiple p/rs, - I need to figure out all the path/rows. - These methods work; however, they don't account - for the overlapping nature of the wrs2 tiles. - This means that many, many images may be downloaded. - ''' - - dct = defaultdict(list) - from shapely.geometry import shape - with fiona.open(shapefile, "r") as src: - for feat in src: - pt = shape(feat['geometry']).centroid - pt = pt.coords[0] - p, r = get_path_row(pt[1], pt[0]) - pr = str(p) + "_" + str(r) - dct[pr].append(feat) - return dct - -def split_shapefile(path, data_directory, base_shapefile, pr_dict): - ''' - Multiple p/rs can be contained in one - shapefile. To ease the extraction of - training data, we need an automated way - to look through all shapefile attributes and assign - them a path/row, then save all the polygons in different - files (if they're not in the same p/r) - ''' - - prefix = os.path.splitext(base_shapefile)[0] - - with fiona.open(os.path.join(path, base_shapefile), "r") as src: - meta = deepcopy(src.meta) - - for pr in pr_dict.keys(): - out = prefix + "_" + pr + ".shp" - print(out) - with fiona.open(os.path.join(data_directory, out), 'w', **meta) as dst: - for poly in pr_dict[pr]: - dst.write(poly) - return - -def download_images(project_directory, path, row, year): - - image_stack = ImageStack(satellite=8, path=path, row=row, root=project_directory, - max_cloud_pct=70, n_landsat=3, year=year) - - image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is - # a cloud mask. - return image_stack +from numpy import save as nsave +from compose_array_single_shapefile import PTASingleShapefile +from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster, create_master_masked_raster - def create_training_data(shapefile, shapefile_directory, image_directory, class_code, - kernel_size, instances, training_directory, year): + kernel_size, instances, training_directory, year, raster_directory, save=True): - latc, lonc = get_shapefile_lat_lon(os.path.join(shapefile_directory, shapefile)) - p, r = get_path_row(latc, lonc) - suff = str(p) + '_' + str(r) + "_" + str(year) - landsat_dir = os.path.join(image_directory, suff) + p, r = get_shapefile_path_row(shapefile) + suff = str(p) + '_' + str(r) + "_" + str(year) + landsat_dir = os.path.join(image_directory, suff) + satellite = 8 + if year < 2013: + satellite = 7 if not os.path.isdir(landsat_dir): os.mkdir(landsat_dir) - ims = download_images(landsat_dir, p, r, year) + ims = download_images(landsat_dir, p, r, year, satellite) else: print("Images may have been downloaded for {}_{}_{}".format(p, r, year)) print("Check to make sure they're all there.") - ims = download_images(landsat_dir, p, r, year) + ims = download_images(landsat_dir, p, r, year, satellite) + + ms = create_master_raster(ims, p, r, year, raster_directory) + mms = create_master_masked_raster(ims, p, r, year, raster_directory) - shp = os.path.join(shapefile_directory, shapefile) - pta = PixelTrainingArraySingleShapefile(root=landsat_dir, shapefile_path=shp, - paths_map=ims.paths_map, masks=ims.masks, class_code=class_code, - instances=instances, kernel_size=kernel_size, path=p, row=r) + shp_path = os.path.join(shapefile_directory, shapefile) + pta = PTASingleShapefile(shapefile_path=shp_path, master_raster=ms, + training_directory=training_directory, overwrite_points=True, class_code=class_code, + path=p, row=r, paths_map=ims.paths_map, masks=ims.masks, + instances=instances, kernel_size=kernel_size) pta.extract_sample() - prefix = os.path.splitext(shapefile)[0] - prefix += "_data_kernel_{}".format(kernel_size) - pta.to_pickle(pta.data, os.path.join(training_directory, prefix)) +irrigated = {'MT_Sun_River_2013':2013, + "MT_Huntley_Main_2013":2013} - print("Data saved to ".format(training_directory)) - - -if __name__ == "__main__": - - # Workflow: - # split all shapefiles up into separate path/rows. - # Then for each (split) shapefile, download the - # requisite image data (landsat -> et etc). - # Then, for each split shapefile, - # download training data and save in the training data directory. - # TODO: 1. Make a program to train the model - # 2. Figure out how to make the model evaluate on every path/row - # that's present in the training data. - # The code broke on an SSEBop run, something - # that I have no control over. At least I think I don't. - # shpfile = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_other_3728.shp' - # path = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/' - # pr_dct = get_shapefile_path_rows(shpfile) - - train_dir = '../training_data' - data_directory = '../shapefile_data/' - image_directory = '../image_data' +def get_all_shapefiles(to_match, year, data_directory, irrigated): + ''' Get all shapefiles in same p/r as to_match ''' + p, r = get_shapefile_path_row(os.path.join(data_directory, to_match)) + ls = [] for f in glob.glob(data_directory + "*.shp"): if "sample_points" not in f: - shp = os.path.basename(f) - print(shp) - create_training_data(shp, data_directory, image_directory, 3, 17, 10, - train_dir, 2013) - - # - # data_directory = '../shapefile_data/' - # shpfile = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_other_3728.shp' - # pr_dct = get_shapefile_path_rows(shpfile) - # image_directory = '../image_data' - # split_shapefile(path, data_directory, 'MT_other_3728.shp', pr_dct) - -# # now create imagestacks from each shapefile in the split shapefile directory. - - # # shapefile_year_map = empty - - # year = 2013 # I think? - # for f in glob.glob(data_directory + '*.shp'): - # latc, lonc = get_shapefile_lat_lon(f) - # p, r = get_path_row(latc, lonc) - # suff = str(p) + '_' + str(r) + "_" + str(year) - # landsat_dir = os.path.join(image_directory, suff) - # if not os.path.isdir(landsat_dir): - # os.mkdir(landsat_dir) - # ims = download_images(landsat_dir, p, r, 2013) - # else: - # print("Images may have been downloaded for {}_{}_{}".format(p, r, year)) - # print(i) -# project_directory, shapefile, year) - - -# download_images - + pp, rr = get_shapefile_path_row(f) + if pp == p and rr == r: + oup = False + for key in irrigated: + if key in f: + oup = True + if not oup: + ls.append(f) + + return ls + + +if __name__ == "__main__": + shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/' + train_dir = 'training_data/' + data_directory = 'shapefile_data/' + image_directory = 'image_data/' + raster_dir = 'master_rasters' + kernel_size = 57 + + # for f in glob.glob(shp_dir + "*.shp"): + # fname = os.path.basename(f) + # split_shapefile(shp_dir, fname, data_directory) + + from pprint import pprint + for irr in irrigated: + for f in glob.glob(data_directory + "*.shp"): + if "sample" not in f: + if irr in f: + shp = os.path.basename(f) + others = get_all_shapefiles(shp, irrigated[irr], data_directory, irrigated) + pprint(others) + year = irrigated[irr] + class_code = 1 + instances = 30000 + shp = os.path.basename(f) + n = shp[:-4] + "_train.h5" + if not os.path.isfile(os.path.join(train_dir, n)): + print("Creating training data for {}".format(n)) + create_training_data(shp, data_directory, image_directory, + class_code=class_code, kernel_size=kernel_size, instances=instances, training_directory=train_dir, year=year, raster_directory=raster_dir) + else: + print("Training data already created for", n) + + for ff in others: + shp = os.path.basename(ff) + year = 2017 + class_code = 0 + n = shp[:-4] + "_train.h5" + if not os.path.isfile(os.path.join(train_dir, n)): + print("Creating training data for {}".format(n)) + create_training_data(shp, data_directory, image_directory, + class_code=class_code, kernel_size=kernel_size, instances=10000, training_directory=train_dir, year=year, raster_directory=raster_dir) + else: + print("Training data already created for", n) + + + + + + + From d445543d82ec9c95271c38a2b11e6b6ffa28d6ae Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 11 Jan 2019 11:00:32 -0700 Subject: [PATCH 06/89] Updating gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5ba1244..8689bda 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ # Distribution / packaging .Python +.condaauto env/ build/ develop-eggs/ @@ -110,4 +111,4 @@ scene_list/ spatial_data/MT tests/data gee_training.py -model_data/ \ No newline at end of file +model_data/ From 456b40295b83d9a12571b7898869155c6abaaa11 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 11 Jan 2019 11:02:05 -0700 Subject: [PATCH 07/89] Removed comment --- pixel_classification/compose_array_single_shapefile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pixel_classification/compose_array_single_shapefile.py b/pixel_classification/compose_array_single_shapefile.py index cb24ed1..cb603a6 100644 --- a/pixel_classification/compose_array_single_shapefile.py +++ b/pixel_classification/compose_array_single_shapefile.py @@ -211,7 +211,6 @@ def create_sample_points(self): # sample from each polygon, based on area. poly_pt_ct = 0 #while poly_pt_ct < required_points: # I wasn't getting enough points. - # I feel like I was/am doing something wrong. x_range, y_range = self._random_points(poly.bounds) for coord in zip(x_range, y_range): if Point(coord[0], coord[1]).within(poly): From 55bef6d94d22014e0f1ce7aafd89a1d2516efcec Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Thu, 17 Jan 2019 16:54:04 -0700 Subject: [PATCH 08/89] Reworking data pipeline v2 --- .../compose_array_single_shapefile.py | 173 ++++++++---------- pixel_classification/data_shuffle.py | 42 +++++ pixel_classification/data_utils.py | 88 +++++---- pixel_classification/prepare_images.py | 16 +- pixel_classification/runner.py | 8 +- pixel_classification/runner_from_shapefile.py | 133 +++++++++----- 6 files changed, 273 insertions(+), 187 deletions(-) create mode 100644 pixel_classification/data_shuffle.py diff --git a/pixel_classification/compose_array_single_shapefile.py b/pixel_classification/compose_array_single_shapefile.py index cb603a6..582cdab 100644 --- a/pixel_classification/compose_array_single_shapefile.py +++ b/pixel_classification/compose_array_single_shapefile.py @@ -22,9 +22,9 @@ import pickle from copy import deepcopy from warnings import warn - +from datetime import datetime from fiona import open as fopen -from numpy import linspace, max, nan, unique, ndarray, swapaxes, zeros +from numpy import linspace, max, nan, unique, ndarray, swapaxes, zeros, asarray import h5py from numpy.random import shuffle from pandas import DataFrame, Series @@ -35,16 +35,16 @@ from shapely.ops import unary_union loc = os.path.dirname(__file__) WRS_2 = loc.replace('pixel_classification', - os.path.join('spatial_data', 'wrs2_descending.shp')) + os.path.join('spatial_data', 'wrs2_descending.shp')) ''' -This script contains a class meant to gather data from rasters using a polygon shapefile. -The high-level method `extract_sample` will return an object ready for a -learning algorithm. +This script contains a class meant to gather data from rasters using a polygon shapefile. +The high-level method `extract_sample` will return an object ready for a +learning algorithm. ''' loc = os.path.dirname(__file__) WRS_2 = loc.replace('pixel_classification', - os.path.join('spatial_data', 'wrs2_descending.shp')) + os.path.join('spatial_data', 'wrs2_descending.shp')) class NoCoordinateReferenceError(Exception): pass @@ -53,15 +53,15 @@ class NoCoordinateReferenceError(Exception): class UnexpectedCoordinateReferenceSystemError(Exception): pass - class PTASingleShapefile: - def __init__(self, master_raster=None, shapefile_path=None, class_code=None, path=None, - row=None, masked_raster=None, training_directory=None, paths_map=None, masks=None, - instances=None, overwrite_points=None, kernel_size=None): + def __init__(self, master_raster=None, shapefile_path=None, class_code=None, path=None, + row=None, masked_raster=None, training_directory=None, paths_map=None, masks=None, + instances=None, overwrite_points=None, kernel_size=None, data_filename=None): self.shapefile_path = shapefile_path self.path = path self.object_id = 0 + self.data_filename = data_filename self.paths_map = paths_map self.masks = masks self.row = row @@ -102,68 +102,55 @@ def _populate_array_from_points(self, fname): val = feat['properties']['POINT_TYPE'] self._add_entry(coords, val=val) - def _create_hdf_file(self): - n = os.path.basename(self.shapefile_path)[:-4] + "_train.h5" - to_save = os.path.join(self.training_directory, n) - hdf5 = h5py.File(to_save, 'a') - shape = (self.m_instances, self.kernel_size, self.kernel_size, len(self.paths_map.keys())) - hdf5.create_dataset("cc:{}".format(self.class_code), shape, compression='gzip') - return hdf5 - def _verify_point(self, x, y): """ Check to see if x, y is masked. """ - pass - + return None + + def _dump_data(self, data): + n = os.path.basename(self.shapefile_path)[:-4] + "_train.h5" + if self.data_filename is None: + to_save = os.path.join(self.training_directory, n) + else: + to_save = self.data_filename + with h5py.File(to_save, 'a') as f: + dset = f.create_dataset("cc:{}:{}".format(self.class_code, + str(datetime.now())), data=data) def training_data_from_master_raster(self): ofs = self.kernel_size // 2 - #TODO: Make saving data easier. # Query how much memory I have left? - + sz = 5000 # some heuristic that indicates when I run out of memory - sz = 10000 # some heuristic that indicates when I run out of memory - tmp_arr = zeros((sz, len(self.paths_map.keys()), self.kernel_size, self.kernel_size)) - - n = os.path.basename(self.shapefile_path)[:-4] + "_train.h5" - to_save = os.path.join(self.training_directory, n) - hdf5 = h5py.File(to_save, 'a') - shape = (self.m_instances, len(self.paths_map.keys()), self.kernel_size, self.kernel_size) - dset = hdf5.create_dataset("cc:{}".format(self.class_code), shape, compression='gzip') + tmp_arr = [] with rasopen(self.master_raster, 'r') as rsrc: rass_arr = rsrc.read() affine = rsrc.transform - j = 0 - p = 0 for ind, row in self.extracted_points.iterrows(): - p = ind - # iterate through extracted points. - if j == sz: + # iterate through extracted points. + if (ind+1) % sz == 0: print("Writing to disk...") - dset[ind-j:ind, :, :, :] = tmp_arr - j = 0 - if ind % 1000 == 0: - print("Step:", ind) + qq = asarray(tmp_arr) + del tmp_arr + self._dump_data(qq) + tmp_arr = [] + x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) c, r = ~affine * (x, y) try: rr = int(r); cc = int(c) - raster_subgrid = rass_arr[:, rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] - # possible issues: edges of image - # rs = swapaxes(raster_subgrid, 0, 2) # for N, H, W, C format - tmp_arr[j, :, :, :] = raster_subgrid - j += 1 + raster_subgrid = rass_arr[:, rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] + tmp_arr.append(raster_subgrid) except IndexError as e: print(e) - tmp_arr[j, :, :, :] = nan - j += 1 - print("Writing to disk...") - dset[p-j:p, :, :, :] = tmp_arr - hdf5.close() + if len(tmp_arr): + print("Writing to disk...") + qq = asarray(tmp_arr) + self._dump_data(qq) def create_sample_points(self): """ Create a clipped training set from polygon shapefiles. @@ -173,16 +160,17 @@ def create_sample_points(self): compose_array.get_tile_geometry, clips the training data to the landsat tile, then perform s a union to reduce the number of polygon objects. The dict object this uses has a template in pixel_classification.runspec.py. - Approach is to loop through the polygons, create a random grid of points over the - extent of each polygon, random shuffle order of points, loop over points, check if + Approach is to loop through the polygons, create a random grid of points over the + extent of each polygon, random shuffle order of points, loop over points, check if point is within polygon, and if within, create a sample point. If a relatively simple geometry is available, use create_negative_sample_points(), though if there are > 10**4 polygons, it will probably hang on unary_union(). """ - print("Making sample pts") - polygons = self._get_polygons(self.shapefile_path) + polygons = self._get_polygons(self.shapefile_path) instance_count = 0 + print("Making sample points. You have {} polygons".format(len(polygons))) + print("N_instances:", self.m_instances) if len(polygons) < 2: warnings.warn("You have < 2 polygons in shapefile {}. ".format(os.path.basename(self.shapefile_path), Warning)) @@ -192,7 +180,8 @@ def create_sample_points(self): srt = sorted(areas, key=lambda x: x[1], reverse=True) polygons = [x for x, y in srt[:self.m_instances]] - polygons = unary_union(polygons) + #polygons = unary_union(polygons) # this + # can be very inefficient in tse where if not isinstance(polygons, list): polygons = [polygons] # for the case of a single polygon. @@ -202,35 +191,29 @@ def create_sample_points(self): for i, poly in enumerate(polygons): if class_count >= self.m_instances: - print("Broke b/c class_count >= m_instances") break fractional_area = poly.area / positive_area # percent of - # total area that this polygon occupies + # total area that this polygon occupies required_points = max([1, fractional_area * self.m_instances]) # how - # many points overall that are required to evenly - # sample from each polygon, based on area. + # many points overall that are required to evenly + # sample from each polygon, based on area. poly_pt_ct = 0 - #while poly_pt_ct < required_points: # I wasn't getting enough points. + # while poly_pt_ct < required_points: # I wasn't getting enough points. x_range, y_range = self._random_points(poly.bounds) for coord in zip(x_range, y_range): - if Point(coord[0], coord[1]).within(poly): + if instance_count >= self.m_instances: + break + if Point(coord[0], coord[1]).within(poly): self._add_entry(coord, val=self.class_code) poly_pt_ct += 1 instance_count += 1 - if instance_count % 1000 == 0: - print("Instances:", instance_count) - - if instance_count > self.m_instances: - print("Broke b/c instance_count > m_instances") - break - + # print(instance_count) if poly_pt_ct >= required_points: - print("Broke b/c poly_pt_ct > required_points") break class_count += poly_pt_ct + print("Final instance count:", instance_count) - print("Final number of points: ", self.object_id) def populate_raster_data_array(self, save=True): @@ -258,7 +241,7 @@ def populate_raster_data_array(self, save=True): def _purge_raster_array(self): - data_array = deepcopy(self.extracted_points) + data_array = deepcopy(self.extracted_points) target_vals = Series(data_array.POINT_TYPE.values, name='POINT_TYPE') data_array.drop(['X', 'Y', 'FID', 'POINT_TYPE'], axis=1, inplace=True) try: @@ -286,7 +269,7 @@ def _purge_raster_array(self): target_vals = data_array.POINT_TYPE.values data_array = data_array.drop(['POINT_TYPE'], - axis=1, inplace=False) + axis=1, inplace=False) return data_array, target_vals def _random_points(self, coords): @@ -300,10 +283,10 @@ def _random_points(self, coords): def _add_entry(self, coord, val=0): self.extracted_points = self.extracted_points.append({'FID': int(self.object_id), - 'X': coord[0], - 'Y': coord[1], - 'POINT_TYPE': val}, - ignore_index=True) + 'X': coord[0], + 'Y': coord[1], + 'POINT_TYPE': val}, + ignore_index=True) self.object_id += 1 def _geo_point_to_projected_coords(self, x, y): @@ -344,12 +327,12 @@ def _get_polygons(self, vector): crs = src.crs if not crs: raise NoCoordinateReferenceError( - 'Provided shapefile has no reference data.') - if crs['init'] != 'epsg:4326': - raise UnexpectedCoordinateReferenceSystemError( - 'Provided shapefile should be in unprojected (geographic)' - 'coordinate system, i.e., WGS84, EPSG 4326, {} is not'.format( - vector)) + 'Provided shapefile has no reference data.') + if crs['init'] != 'epsg:4326': + raise UnexpectedCoordinateReferenceSystemError( + 'Provided shapefile should be in unprojected (geographic)' + 'coordinate system, i.e., WGS84, EPSG 4326, {} is not'.format( + vector)) clipped = src.filter(mask=self.tile_bbox) polys = [] bad_geo_count = 0 @@ -377,32 +360,32 @@ def _get_crs(self): crs = src.crs break return crs - - + + def save_sample_points(self): - + points_schema = { - 'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]), - 'geometry': 'Point'} + 'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]), + 'geometry': 'Point'} meta = self.tile_geometry.copy() meta['schema'] = points_schema - + out = os.path.splitext(self.shapefile_path)[0] out += "_sample_points.shp" - + with fopen(out, 'w', **meta) as output: for index, row in self.extracted_points.iterrows(): props = dict([('FID', row['FID']), ('POINT_TYPE', row['POINT_TYPE'])]) pt = Point(row['X'], row['Y']) output.write({'properties': props, - 'geometry': mapping(pt)}) + 'geometry': mapping(pt)}) return None - @property - def tile_geometry(self): - with fopen(WRS_2, 'r') as wrs: - wrs_meta = wrs.meta.copy() - return wrs_meta + @property + def tile_geometry(self): + with fopen(WRS_2, 'r') as wrs: + wrs_meta = wrs.meta.copy() + return wrs_meta def to_pickle(self, data, path): diff --git a/pixel_classification/data_shuffle.py b/pixel_classification/data_shuffle.py new file mode 100644 index 0000000..7ebf042 --- /dev/null +++ b/pixel_classification/data_shuffle.py @@ -0,0 +1,42 @@ +import h5py +import numpy as np + +def next_batch(file_map): + '''File map: {class_code:{files:[], instances:int}}''' + for class_code in file_map: + files = file_map[class_code]['files'] + n_instances = file_map[class_code]['instances'] + + + + +def load_sample(required_instances, fnames, class_code): + ''' Fnames: filenames of all files of class_code class + required_instances: number of instances of training data required ''' + total_instances, num_files = get_total_instances(fnames) + random_sample = np.random.randint(0, total_instances, required_instances) + random_sample.sort() + ls = [] + last = 0 + offset = 0 + for f in fnames: + with h5py.File(f, 'f') as hdf5: + for key in hdf5: + if hdf5[key].shape[0]: + frac_membership = int((hdf5[key].shape[0] / total_instances)*required_instances) + indices = sorted_sample[last:frac_membership] - sorted_sample[last] + last = frac_membership + ls.append(hdf5[key][indices, :, :, :]) + ls = np.asarray(ls) + return ls, np.ones((len(ls)))*class_code + +def get_total_instances(fnames): + total_instances = 0 + num_keys = 0 + for f in fnames: + with h5py.File(f, 'r') as hdf5: + for key in hdf5: + if hdf5[key].shape[0]: + total_instances += hdf5[key].shape[0] + num_keys += 1 + return total_instances, num_keys diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index 7749924..18d64e7 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -163,52 +163,74 @@ def get_pr(poly, wrs2): def split_shapefile(base, base_shapefile, data_directory): """ Shapefiles may deal with data over multiple path/rows. Data directory: where the split shapefiles will be saved.""" - # TODO: un-hardcode this directory - wrs2 = fiona.open("../spatial_data/wrs2_descending_usa.shp") + path_row = defaultdict(list) + id_mapping = {} + wrs2 = fiona.open('../spatial_data/wrs2_descending_usa.shp', 'r') - dct = defaultdict(list) - shapefile_mapping = defaultdict(list) - - with fiona.open(base + base_shapefile, "r") as src: + with fiona.open(os.path.join(base, base_shapefile), "r") as src: meta = deepcopy(src.meta) for feat in src: - feat_id = int(feat['id']) - shapefile_mapping[feat_id] = feat + idd = feat['id'] + id_mapping[idd] = feat poly = shape(feat['geometry']) prs = get_pr(poly, wrs2) - dct[feat_id] = prs + for p in prs: + path_row[p].append(idd) + wrs2.close() - id_mapping = defaultdict(list) - for key in dct: - for e in dct[key]: - id_mapping[e].append(key) - # Now find the unique values between the lists. - for key1 in id_mapping: - for key2 in id_mapping: - if key1 != key2: - res = set(id_mapping[key2]) - set(id_mapping[key1]) - # above line gives the keys that are present - # in the second list that do not appear in the first list. - # By doing this for all path/rows, we can get all of the unique path/rows. - # Still need to test this. - id_mapping[key2] = list(sorted(res)) + + # I have all path/rows and their corresponding features + # I need to figure out the unique features in each path row. + # How should I treat the non-unique features? + # Create a set of non-unique features and their + # corresponding path/row. Also create + # a set of unique features. Then iterate over the + # unique set and for each non-unique feature + # place it in the path/row with the greatest number of + # unique points. + non_unique = defaultdict(list) + unique = defaultdict(list) + for key in path_row: + ls = path_row[key] # all features in a given path/row + placeholder = ls + for key1 in path_row: + if key != key1: + ls1 = path_row[key1] + # find unique keys in ls + placeholder = set(placeholder) - set(ls1) #all + # features present in placeholder that are not + # present in ls1; i.e. unique keys + unique[key] = list(placeholder) + nu = set(ls) - set(placeholder) # all features present + # in ls that are not present in placeholder (non-unique) + for idd in list(nu): + non_unique[idd].append(key) + + for key in non_unique: # unique ids + pr = None + hi = 0 + for pathrow in non_unique[key]: # path/rows corresponding to non + # unique features + if len(unique[pathrow]) > hi: + pr = pathrow + hi = len(unique[pathrow]) + if pr is not None: + unique[pr].append(key) prefix = os.path.splitext(base_shapefile)[0] - for key in id_mapping: - if len(id_mapping[key]): - out = prefix + "_" + key + ".shp" - print("Split shapefile saving to:", os.path.join(data_directory, out)) + for key in unique: + out = prefix + "_" + key + ".shp" + if len(unique[key]): with fiona.open(os.path.join(data_directory, out), 'w', **meta) as dst: - for feat_id in id_mapping[key]: - poly = shapefile_mapping[feat_id] - dst.write(poly) - - return + print("Split shapefile saving to:", + os.path.join(data_directory, out)) + for feat in unique[key]: + dst.write(id_mapping[feat]) def get_shapefile_path_row(shapefile): """This function assumes that the original shapefile has already been split, and relies on - the naming convention to get the path and row.""" + the naming convention to get the path and row. """ # strip extension # TODO: Find some way to update shapefile metadata shp = shapefile[-9:-4].split("_") diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index f32d35a..beafa26 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -95,7 +95,7 @@ def build_training(self): def build_evaluating(self): self.get_landsat(fmask=False) self.profile = self.landsat.rasterio_geometry - # self.get_et() + #self.get_et() self.get_terrain() self.get_cdl() self.paths_map, self.masks = self._order_images() # paths map is just path-> location @@ -268,8 +268,9 @@ def _order_images(self): def _normalize_and_save_image(fname): norm = True with rasopen(fname, 'r') as rsrc: - if "normalized" not in rsrc.tags(): - norm = False + if "normalized" in rsrc.tags(): + return + else: rass_arr = rsrc.read() rass_arr = rass_arr.astype(float32) profile = rsrc.profile @@ -278,11 +279,10 @@ def _normalize_and_save_image(fname): scaler = StandardScaler() # z-normalization scaler.fit(rass_arr) rass_arr = scaler.transform(rass_arr) - if not norm: - with rasopen(fname, 'w', **profile) as dst: - dst.write(rass_arr, 1) - print("Normalizing", fname) - dst.update_tags(normalized=True) + with rasopen(fname, 'w', **profile) as dst: + dst.write(rass_arr, 1) + print("Normalized", fname) + dst.update_tags(normalized=True) if __name__ == '__main__': diff --git a/pixel_classification/runner.py b/pixel_classification/runner.py index 5638c05..0812c57 100644 --- a/pixel_classification/runner.py +++ b/pixel_classification/runner.py @@ -102,6 +102,10 @@ def classify_scene(path, row, sat, year, eval_directory, model, n_images, result if not os.path.isdir(sub): os.mkdir(sub) + from pixel_classification.runner_from_shapefile import get_shapefile_center + + # lat, lon = get_shapefile_center('/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_3927.shp') + i = ImageStack(root=sub, satellite=sat, path=path, row=row, n_landsat=n_images, year=year, max_cloud_pct=70) i.build_evaluating() @@ -139,8 +143,8 @@ def run_targets(directory, model): c_project_dir = os.path.join(model_data, 'stacks') model_training_scenes(t_project_dir, n_images, training_dir, model_name) - # classify_scene(path=37, row=28, sat=8, year=2017,eval_directory=c_project_dir, - # n_images=3, model=model_name) + classify_scene(path=39, row=27, sat=8, year=2015, eval_directory=c_project_dir, + n_images=3, model=model_name) diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index 2640123..81b00b2 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -1,15 +1,17 @@ import warnings import glob import os +from multiprocessing import Pool from numpy import save as nsave from compose_array_single_shapefile import PTASingleShapefile +from fiona import open as fopen +from shapely.geometry import shape from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster, create_master_masked_raster def create_training_data(shapefile, shapefile_directory, image_directory, class_code, kernel_size, instances, training_directory, year, raster_directory, save=True): - - p, r = get_shapefile_path_row(shapefile) + p, r = get_shapefile_path_row(shapefile) suff = str(p) + '_' + str(r) + "_" + str(year) landsat_dir = os.path.join(image_directory, suff) satellite = 8 @@ -19,23 +21,24 @@ def create_training_data(shapefile, shapefile_directory, image_directory, class_ os.mkdir(landsat_dir) ims = download_images(landsat_dir, p, r, year, satellite) else: - print("Images may have been downloaded for {}_{}_{}".format(p, r, year)) - print("Check to make sure they're all there.") ims = download_images(landsat_dir, p, r, year, satellite) - + + # print("Paths:", len(ims.paths_map)) + if len(ims.paths_map) > 36: + print("AAAAAAAHHHH") + print(len(ims.paths_map), shapefile) + ms = create_master_raster(ims, p, r, year, raster_directory) mms = create_master_masked_raster(ims, p, r, year, raster_directory) shp_path = os.path.join(shapefile_directory, shapefile) pta = PTASingleShapefile(shapefile_path=shp_path, master_raster=ms, - training_directory=training_directory, overwrite_points=True, class_code=class_code, - path=p, row=r, paths_map=ims.paths_map, masks=ims.masks, + training_directory=training_directory, overwrite_points=False, class_code=class_code, + path=p, row=r, paths_map=ims.paths_map, masks=ims.masks, instances=instances, kernel_size=kernel_size) pta.extract_sample() -irrigated = {'MT_Sun_River_2013':2013, - "MT_Huntley_Main_2013":2013} def get_all_shapefiles(to_match, year, data_directory, irrigated): @@ -52,55 +55,87 @@ def get_all_shapefiles(to_match, year, data_directory, irrigated): oup = True if not oup: ls.append(f) - return ls +def shapefile_area(shapefile): + summ = 0 + with fopen(shapefile, "r") as src: + for feat in src: + poly = shape(feat['geometry']) + summ += poly.area + return summ + +def get_total_area(data_directory, filenames): + ''' Gets the total area of the polygons + in the files in filenames ''' + tot = 0 + for f in glob.glob(data_directory + "*.shp"): + if "sample" not in f: + for f2 in filenames: + if f2 in f: + tot += shapefile_area(f) + return tot + +def required_points(shapefile, total_area, total_instances): + area = shapefile_area(shapefile) + frac = area / total_area + return int(total_instances * frac) + +def extract_data(data_directory, names, n_instances): + + def is_it(f, names): + for e in names: + if e in f: + return True + return False + + total_area = get_total_area(data_directory, names) # units? + for f in glob.glob(data_directory + "*.shp"): + if is_it(f, names) and 'sample' not in f: + req_points = required_points(f, total_area, n_instances) + ff = os.path.basename(f) + create_training_data(ff, data_directory, image_directory, 0, 41, + req_points, train_dir, 2013, raster_dir) + + return None + +def go(f): + data_directory = 'split_shapefiles_west/' + shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup' + fname = os.path.basename(f) + split_shapefile(shp_dir, fname, data_directory) + if __name__ == "__main__": - - shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/' + + irrigated = ['MT_Sun_River_2013', "MT_Huntley_Main_2013"] + other = ['other'] + fallow = ['Fallow'] + forest = ['Forrest'] + train_dir = 'training_data/' - data_directory = 'shapefile_data/' + data_directory = 'shapefile_data_western_us/' image_directory = 'image_data/' raster_dir = 'master_rasters' kernel_size = 57 - # for f in glob.glob(shp_dir + "*.shp"): - # fname = os.path.basename(f) - # split_shapefile(shp_dir, fname, data_directory) - - from pprint import pprint - for irr in irrigated: - for f in glob.glob(data_directory + "*.shp"): - if "sample" not in f: - if irr in f: - shp = os.path.basename(f) - others = get_all_shapefiles(shp, irrigated[irr], data_directory, irrigated) - pprint(others) - year = irrigated[irr] - class_code = 1 - instances = 30000 - shp = os.path.basename(f) - n = shp[:-4] + "_train.h5" - if not os.path.isfile(os.path.join(train_dir, n)): - print("Creating training data for {}".format(n)) - create_training_data(shp, data_directory, image_directory, - class_code=class_code, kernel_size=kernel_size, instances=instances, training_directory=train_dir, year=year, raster_directory=raster_dir) - else: - print("Training data already created for", n) - - for ff in others: - shp = os.path.basename(ff) - year = 2017 - class_code = 0 - n = shp[:-4] + "_train.h5" - if not os.path.isfile(os.path.join(train_dir, n)): - print("Creating training data for {}".format(n)) - create_training_data(shp, data_directory, image_directory, - class_code=class_code, kernel_size=kernel_size, instances=10000, training_directory=train_dir, year=year, raster_directory=raster_dir) - else: - print("Training data already created for", n) - + + shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup/' + fnames = [f for f in glob.glob(shp_dir + "*.shp") if 'reproj' in f] + print(fnames) + # print(os.cpu_count()) + instances = [1e5]*4 + dd = [data_directory]*4 + names = [irrigated, other, fallow, forest] + # note: the extraction of training data took 6h 29m + #with Pool(processes=None) as pool: + # pool.starmap(extract_data, zip(dd, names, instances)) + + with Pool(os.cpu_count()) as pool: + pool.map(go, fnames) + # 12 minutes to 5 and a half. + + From 62676a1ce6eafab945b776e43dad49bb48ed6788 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 18 Jan 2019 12:56:33 -0700 Subject: [PATCH 09/89] Input pipeline mostly done. split_shapefiles needs to be improved --- pixel_classification/keras_cnn.py | 99 ++++++++----------- pixel_classification/runner_from_shapefile.py | 12 --- pixel_classification/shuffle_data.py | 58 +++++++++++ 3 files changed, 100 insertions(+), 69 deletions(-) create mode 100644 pixel_classification/shuffle_data.py diff --git a/pixel_classification/keras_cnn.py b/pixel_classification/keras_cnn.py index 986be39..532c67a 100644 --- a/pixel_classification/keras_cnn.py +++ b/pixel_classification/keras_cnn.py @@ -1,37 +1,36 @@ import h5py -import glob +from glob import glob import tensorflow as tf import numpy as np +from shuffle_data import next_batch -N_INSTANCES_IRRIGATED = 30000 -N_INSTANCES_NOT = 10000 - -def keras_model(kernel_size): +def keras_model(kernel_size, n_classes): model = tf.keras.Sequential() # Must define the input shape in the first layer of the neural network model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', - input_shape=(kernel_size, kernel_size, 3))) + input_shape=(36, kernel_size, kernel_size))) model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) model.add(tf.keras.layers.Dropout(0.3)) model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')) - model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) model.add(tf.keras.layers.Dropout(0.3)) + model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) + model.add(tf.keras.layers.Dropout(0.3)) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(256, activation='relu')) model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Dense(2, activation='softmax')) + model.add(tf.keras.layers.Dense(n_classes, activation='softmax')) # Take a look at the model summary model.summary() return model -def train_model(kernel_size): +def train_model(kernel_size, features, labels, n_classes=4): from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.1, random_state=42) - model = keras_model(kernel_size) - model.compile(loss='binary_crossentropy', + model = keras_model(kernel_size, n_classes) + model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, @@ -46,56 +45,42 @@ def train_model(kernel_size): def make_one_hot(labels, n_classes): ret = np.zeros((len(labels), n_classes)) for i, e in enumerate(labels): - ret[i, e] = 1 + ret[i, int(e)] = 1 return ret -def generate_labels_and_features(filename, class_code, index_1, index_2, n_classes=2): - # approach: - # I have n files containing training data on disk. - # Loop through all classes and sample a subset - # of each file. This actually shouldn't be that hard. - # Then, shuffle the data (in memory?) and split it - # into training and test sets. - with h5py.File(filename, 'r') as f: - data = f['cc:'+str(class_code)] - labels = [class_code]*(index_2-index_1) - labels = make_one_hot(labels, n_classes=n_classes) - - return data[index_1:index_2, :, :, :] # this is an assumption about the shape of the data - -def shuffle_data(training_directory, suffix='.h5'): - # Make piles, and shuffle that way. - # Reference that website. - # approach: - # for each (h5) file in directory: - # open it, and make piles with it (in parallel) - # then combine each litle pile into a large pile, - # but iterate through the littler piles when - # creating the big pile - return None - - - - - - - - - +def get_next_batch(file_map, n_classes=4): + features, labels = next_batch(file_map) + labels = make_one_hot(labels, n_classes) + return features, labels + +def is_it(f, targets): + for e in targets: + if e in f and 'sample' not in f: + return True + return False + + +if __name__ == '__main__': + train_dir = 'training_data/' + irrigated = ['MT_Sun_River_2013', "MT_Huntley_Main_2013"] + other = ['other'] + fallow = ['Fallow'] + forest = ['Forrest'] + n = 10000 + irr = {'files':[f for f in glob(train_dir + "*.h5") if is_it(f, irrigated)], 'instances':n} + fall = {'files':[f for f in glob(train_dir + "*.h5") if is_it(f, fallow)], 'instances':n} + forest_ = {'files':[f for f in glob(train_dir + "*.h5") if is_it(f, forest)], 'instances':n} + other_ = {'files':[f for f in glob(train_dir + "*.h5") if is_it(f, other)], 'instances':n} + #fall = [f for f in glob(shp_dir) if is_it(f, fallow)] + #forest_ = [f for f in glob(shp_dir) if is_it(f, forest)] + #other_ = [f for f in glob(shp_dir) if is_it(f, other)] + file_map = {0: irr, 1:fall, 2:forest_, 3:other_} - - - - - - - - - - - - + for i in range(2): + features, labels = get_next_batch(file_map) + print(features.shape, labels.shape) + train_model(41, features, labels) diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index 81b00b2..038d0d3 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -136,15 +136,3 @@ def go(f): # 12 minutes to 5 and a half. - - - - - - - - - - - - diff --git a/pixel_classification/shuffle_data.py b/pixel_classification/shuffle_data.py new file mode 100644 index 0000000..e2c4a03 --- /dev/null +++ b/pixel_classification/shuffle_data.py @@ -0,0 +1,58 @@ +import h5py +import numpy as np + +def next_batch(file_map): + '''File map: {class_code:{files:[], instances:int}}''' + features = [] + labels = [] + for class_code in file_map: + files = file_map[class_code]['files'] + n_instances = file_map[class_code]['instances'] + f = load_sample(n_instances, files) + l = np.ones(f.shape[0])*class_code + labels.append(l) + features.append(f) + feat_flat = [itm for sublist in features for itm in sublist] + labels_flat = [itm for sublist in labels for itm in sublist] + labels_flat = np.asarray(labels_flat) + features_flat = np.asarray(feat_flat) + return features_flat, labels_flat + +def load_sample(required_instances, fnames): + ''' Fnames: filenames of all files of class_code class + required_instances: number of instances of training data required ''' + total_instances, num_files = get_total_instances(fnames) + random_sample = np.random.choice(total_instances, required_instances, replace=False) + random_sample.sort() + ls = [] + last = 0 + offset = 0 + for f in fnames: + with h5py.File(f, 'r') as hdf5: + for key in hdf5: + if hdf5[key].shape[0]: + last = offset + offset += hdf5[key].shape[0] + indices = random_sample[random_sample < offset] + indices = indices[indices > last] + try: + ls.append(hdf5[key][indices-last, :, :, :]) + except UnboundLocalError as e: + pass + + flattened = [e for sublist in ls for e in sublist] + return np.asarray(flattened) + +def get_total_instances(fnames): + total_instances = 0 + num_keys = 0 + for f in fnames: + with h5py.File(f, 'r') as hdf5: + for key in hdf5: + if hdf5[key].shape[0]: + total_instances += hdf5[key].shape[0] + num_keys += 1 + return total_instances, num_keys + +if __name__ == '__main__': + pass From 52b4ac5ffa1dce375a0ad54164ff0420465e87b8 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 22 Jan 2019 12:41:08 -0700 Subject: [PATCH 10/89] Working on shuffling the dataset out-of-core --- pixel_classification/keras_cnn.py | 59 +++++++++++++--------------- pixel_classification/shuffle_data.py | 42 ++++++++++---------- 2 files changed, 48 insertions(+), 53 deletions(-) diff --git a/pixel_classification/keras_cnn.py b/pixel_classification/keras_cnn.py index 532c67a..89f3b37 100644 --- a/pixel_classification/keras_cnn.py +++ b/pixel_classification/keras_cnn.py @@ -1,8 +1,9 @@ import h5py from glob import glob import tensorflow as tf +from sklearn.model_selection import train_test_split import numpy as np -from shuffle_data import next_batch +from shuffle_data import one_epoch def keras_model(kernel_size, n_classes): model = tf.keras.Sequential() @@ -20,35 +21,34 @@ def keras_model(kernel_size, n_classes): model.add(tf.keras.layers.Dense(n_classes, activation='softmax')) # Take a look at the model summary model.summary() + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) return model -def train_model(kernel_size, features, labels, n_classes=4): - - from sklearn.model_selection import train_test_split +def train_next_batch(model, features, labels, n_classes=4, epochs=5, batch_size=128): + # shuffle the labels again x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.1, random_state=42) - - model = keras_model(kernel_size, n_classes) - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) model.fit(x_train, y_train, - batch_size=128, - epochs=10, + batch_size=batch_size, + epochs=epochs, validation_data=(x_test, y_test)) - score = model.evaluate(x_test, y_test, verbose=0) - print('\n', 'Test accuracy:', score[1]) return model + +def evaluate_model(features, labels): + score = model.evaluate(features, labels, verbose=0) + print('\n', 'Test accuracy:', score[1], '\n') + def make_one_hot(labels, n_classes): ret = np.zeros((len(labels), n_classes)) for i, e in enumerate(labels): ret[i, int(e)] = 1 return ret - def get_next_batch(file_map, n_classes=4): features, labels = next_batch(file_map) labels = make_one_hot(labels, n_classes) @@ -63,24 +63,21 @@ def is_it(f, targets): if __name__ == '__main__': train_dir = 'training_data/' - irrigated = ['MT_Sun_River_2013', "MT_Huntley_Main_2013"] - other = ['other'] - fallow = ['Fallow'] - forest = ['Forrest'] - n = 10000 - irr = {'files':[f for f in glob(train_dir + "*.h5") if is_it(f, irrigated)], 'instances':n} - fall = {'files':[f for f in glob(train_dir + "*.h5") if is_it(f, fallow)], 'instances':n} - forest_ = {'files':[f for f in glob(train_dir + "*.h5") if is_it(f, forest)], 'instances':n} - other_ = {'files':[f for f in glob(train_dir + "*.h5") if is_it(f, other)], 'instances':n} - #fall = [f for f in glob(shp_dir) if is_it(f, fallow)] - #forest_ = [f for f in glob(shp_dir) if is_it(f, forest)] - #other_ = [f for f in glob(shp_dir) if is_it(f, other)] + n_epochs = 40 + model = keras_model(41, 4) + + for i in range(n_epochs): + + random_indices = np.random.choice(total_instances, total_instances, repeat=False) + + + + if i > 0: + evaluate_model(features, labels) + + train_next_batch(model, features, labels, epochs=1) + - file_map = {0: irr, 1:fall, 2:forest_, 3:other_} - for i in range(2): - features, labels = get_next_batch(file_map) - print(features.shape, labels.shape) - train_model(41, features, labels) diff --git a/pixel_classification/shuffle_data.py b/pixel_classification/shuffle_data.py index e2c4a03..8170c2b 100644 --- a/pixel_classification/shuffle_data.py +++ b/pixel_classification/shuffle_data.py @@ -1,29 +1,24 @@ import h5py +from collections import defaultdict import numpy as np -def next_batch(file_map): - '''File map: {class_code:{files:[], instances:int}}''' - features = [] - labels = [] - for class_code in file_map: - files = file_map[class_code]['files'] - n_instances = file_map[class_code]['instances'] - f = load_sample(n_instances, files) - l = np.ones(f.shape[0])*class_code - labels.append(l) - features.append(f) - feat_flat = [itm for sublist in features for itm in sublist] - labels_flat = [itm for sublist in labels for itm in sublist] - labels_flat = np.asarray(labels_flat) - features_flat = np.asarray(feat_flat) - return features_flat, labels_flat - -def load_sample(required_instances, fnames): +random_indices = np.random.choice(total_instances, total_instances, repeat=False) + +def one_epoch(filenames, random_indices, class_code, chunk_size=5000): + ''' Filename is the name of the data file, + instances the number of instances that can fit in memory. + ''' + if not isinstance(filenames, list): + filenames = [filenames] + for i in range(0, indices.shape[0], chunk_size): + ret = load_sample(filenames, random_indices[i:i+chunk_size]) + yield ret + + +def load_sample(fnames, random_indices): ''' Fnames: filenames of all files of class_code class required_instances: number of instances of training data required ''' - total_instances, num_files = get_total_instances(fnames) - random_sample = np.random.choice(total_instances, required_instances, replace=False) - random_sample.sort() + random_indices.sort() ls = [] last = 0 offset = 0 @@ -33,16 +28,19 @@ def load_sample(required_instances, fnames): if hdf5[key].shape[0]: last = offset offset += hdf5[key].shape[0] - indices = random_sample[random_sample < offset] + indices = random_indices[random_indices < offset] indices = indices[indices > last] try: ls.append(hdf5[key][indices-last, :, :, :]) except UnboundLocalError as e: + # When the index array is empty. This is + # an unhandled exception in the hdf5 library pass flattened = [e for sublist in ls for e in sublist] return np.asarray(flattened) + def get_total_instances(fnames): total_instances = 0 num_keys = 0 From 725cdf7420c356b39db7f73b14d30b1def9e67e1 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 22 Jan 2019 13:24:53 -0700 Subject: [PATCH 11/89] Finished shuffling data --- pixel_classification/data_shuffle.py | 42 ---------------------------- pixel_classification/keras_cnn.py | 39 +++++++++++++++----------- 2 files changed, 22 insertions(+), 59 deletions(-) delete mode 100644 pixel_classification/data_shuffle.py diff --git a/pixel_classification/data_shuffle.py b/pixel_classification/data_shuffle.py deleted file mode 100644 index 7ebf042..0000000 --- a/pixel_classification/data_shuffle.py +++ /dev/null @@ -1,42 +0,0 @@ -import h5py -import numpy as np - -def next_batch(file_map): - '''File map: {class_code:{files:[], instances:int}}''' - for class_code in file_map: - files = file_map[class_code]['files'] - n_instances = file_map[class_code]['instances'] - - - - -def load_sample(required_instances, fnames, class_code): - ''' Fnames: filenames of all files of class_code class - required_instances: number of instances of training data required ''' - total_instances, num_files = get_total_instances(fnames) - random_sample = np.random.randint(0, total_instances, required_instances) - random_sample.sort() - ls = [] - last = 0 - offset = 0 - for f in fnames: - with h5py.File(f, 'f') as hdf5: - for key in hdf5: - if hdf5[key].shape[0]: - frac_membership = int((hdf5[key].shape[0] / total_instances)*required_instances) - indices = sorted_sample[last:frac_membership] - sorted_sample[last] - last = frac_membership - ls.append(hdf5[key][indices, :, :, :]) - ls = np.asarray(ls) - return ls, np.ones((len(ls)))*class_code - -def get_total_instances(fnames): - total_instances = 0 - num_keys = 0 - for f in fnames: - with h5py.File(f, 'r') as hdf5: - for key in hdf5: - if hdf5[key].shape[0]: - total_instances += hdf5[key].shape[0] - num_keys += 1 - return total_instances, num_keys diff --git a/pixel_classification/keras_cnn.py b/pixel_classification/keras_cnn.py index 89f3b37..935e75b 100644 --- a/pixel_classification/keras_cnn.py +++ b/pixel_classification/keras_cnn.py @@ -60,24 +60,29 @@ def is_it(f, targets): return True return False +def fnames(class_code): + return "training_data/class_{}_train.h5".format(class_code) if __name__ == '__main__': train_dir = 'training_data/' - - n_epochs = 40 - model = keras_model(41, 4) - + n_epochs = 10 + model = keras_model(57, 4) + total_instances = 100000 for i in range(n_epochs): - - random_indices = np.random.choice(total_instances, total_instances, repeat=False) - - - - if i > 0: - evaluate_model(features, labels) - - train_next_batch(model, features, labels, epochs=1) - - - - + random_indices = np.random.choice(total_instances, total_instances, replace=False) + irr = one_epoch(fnames(0), random_indices, 0) + fallow = one_epoch(fnames(1), random_indices, 1) + forest = one_epoch(fnames(2), random_indices, 2) + other = one_epoch(fnames(3), random_indices, 3) + + j = 0 + for irr, fall, fo, ot in zip(irr, fallow, forest, other): + d1, l1 = irr[0], irr[1] + d2, l2 = fall[0], fall[1] + d3, l3 = fo[0], fo[1] + d4, l4 = ot[0], ot[1] + features = np.concatenate((d1, d2, d3, d4)) + labels = np.concatenate((l1, l2, l3, l4)) + labels = make_one_hot(labels, 4) + train_next_batch(model, features, labels, epochs=1) + print("\nCustom epoch {}/{}\n".format(i, n_epochs)) From 689ebfcd3500efe31034c76d656da4f5926abc26 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 22 Jan 2019 14:18:37 -0700 Subject: [PATCH 12/89] Fetching data from disk is the bottlenexk --- README.md | 1 + .../compose_array_single_shapefile.py | 2 +- pixel_classification/keras_cnn.py | 27 ++++++++++++++++--- pixel_classification/runner_from_shapefile.py | 19 ++++++------- pixel_classification/shuffle_data.py | 11 +++++--- 5 files changed, 43 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 6375cd3..1586322 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,4 @@ Then get the latest gdal: Then the latest master branch of rasterio: ```pip install git+https://github.com/mapbox/rasterio.git``` +Don't install latest version of rasterio. Install rasterio version=1.0a12. diff --git a/pixel_classification/compose_array_single_shapefile.py b/pixel_classification/compose_array_single_shapefile.py index 582cdab..b84c351 100644 --- a/pixel_classification/compose_array_single_shapefile.py +++ b/pixel_classification/compose_array_single_shapefile.py @@ -107,7 +107,7 @@ def _verify_point(self, x, y): return None def _dump_data(self, data): - n = os.path.basename(self.shapefile_path)[:-4] + "_train.h5" + n = "class_{}_train.h5".format(self.class_code) if self.data_filename is None: to_save = os.path.join(self.training_directory, n) else: diff --git a/pixel_classification/keras_cnn.py b/pixel_classification/keras_cnn.py index 935e75b..6a8754e 100644 --- a/pixel_classification/keras_cnn.py +++ b/pixel_classification/keras_cnn.py @@ -20,7 +20,7 @@ def keras_model(kernel_size, n_classes): model.add(tf.keras.layers.Dropout(0.5)) model.add(tf.keras.layers.Dense(n_classes, activation='softmax')) # Take a look at the model summary - model.summary() + # model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) @@ -63,10 +63,31 @@ def is_it(f, targets): def fnames(class_code): return "training_data/class_{}_train.h5".format(class_code) +# Yield the concatenated training array? + +def generate(): + total_instances = 100000 + random_indices = np.random.choice(total_instances, total_instances, replace=False) + irr = one_epoch(fnames(0), random_indices, 0) + fallow = one_epoch(fnames(1), random_indices, 1) + forest = one_epoch(fnames(2), random_indices, 2) + other = one_epoch(fnames(3), random_indices, 3) + j = 0 + for irr, fall, fo, ot in zip(irr, fallow, forest, other): + d1, l1 = irr[0], irr[1] + d2, l2 = fall[0], fall[1] + d3, l3 = fo[0], fo[1] + d4, l4 = ot[0], ot[1] + features = np.concatenate((d1, d2, d3, d4)) + labels = np.concatenate((l1, l2, l3, l4)) + yield (features, labels) + if __name__ == '__main__': train_dir = 'training_data/' n_epochs = 10 model = keras_model(57, 4) +# model.fit_generator(generate(), epochs=2, use_multiprocessing=True, steps_per_epoch=25) + total_instances = 100000 for i in range(n_epochs): random_indices = np.random.choice(total_instances, total_instances, replace=False) @@ -75,7 +96,6 @@ def fnames(class_code): forest = one_epoch(fnames(2), random_indices, 2) other = one_epoch(fnames(3), random_indices, 3) - j = 0 for irr, fall, fo, ot in zip(irr, fallow, forest, other): d1, l1 = irr[0], irr[1] d2, l2 = fall[0], fall[1] @@ -83,6 +103,7 @@ def fnames(class_code): d4, l4 = ot[0], ot[1] features = np.concatenate((d1, d2, d3, d4)) labels = np.concatenate((l1, l2, l3, l4)) - labels = make_one_hot(labels, 4) + # labels = make_one_hot(labels, 4) train_next_batch(model, features, labels, epochs=1) + print("\nCustom epoch {}/{}\n".format(i, n_epochs)) diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index 038d0d3..e81f85b 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -81,7 +81,7 @@ def required_points(shapefile, total_area, total_instances): frac = area / total_area return int(total_instances * frac) -def extract_data(data_directory, names, n_instances): +def extract_data(data_directory, names, n_instances, class_code): def is_it(f, names): for e in names: @@ -94,9 +94,8 @@ def is_it(f, names): if is_it(f, names) and 'sample' not in f: req_points = required_points(f, total_area, n_instances) ff = os.path.basename(f) - create_training_data(ff, data_directory, image_directory, 0, 41, - req_points, train_dir, 2013, raster_dir) - + create_training_data(ff, data_directory, image_directory, + class_code, 57, req_points, train_dir, 2013, raster_dir) return None def go(f): @@ -124,15 +123,17 @@ def go(f): fnames = [f for f in glob.glob(shp_dir + "*.shp") if 'reproj' in f] print(fnames) # print(os.cpu_count()) - instances = [1e5]*4 + instances = [50000, 1e5, 1e5, 1e5] + class_code = [0, 1, 2, 3] dd = [data_directory]*4 names = [irrigated, other, fallow, forest] # note: the extraction of training data took 6h 29m - #with Pool(processes=None) as pool: - # pool.starmap(extract_data, zip(dd, names, instances)) + extract_data(dd[0], fallow, 1e5, 2) +# with Pool(8) as pool: +# pool.starmap(extract_data, zip(dd, names, instances, class_code)) - with Pool(os.cpu_count()) as pool: - pool.map(go, fnames) + #with Pool(os.cpu_count()) as pool: + # pool.map(go, fnames) # 12 minutes to 5 and a half. diff --git a/pixel_classification/shuffle_data.py b/pixel_classification/shuffle_data.py index 8170c2b..468d9c3 100644 --- a/pixel_classification/shuffle_data.py +++ b/pixel_classification/shuffle_data.py @@ -2,18 +2,21 @@ from collections import defaultdict import numpy as np -random_indices = np.random.choice(total_instances, total_instances, repeat=False) - def one_epoch(filenames, random_indices, class_code, chunk_size=5000): ''' Filename is the name of the data file, instances the number of instances that can fit in memory. ''' if not isinstance(filenames, list): filenames = [filenames] - for i in range(0, indices.shape[0], chunk_size): + for i in range(0, random_indices.shape[0], chunk_size): ret = load_sample(filenames, random_indices[i:i+chunk_size]) - yield ret + yield ret, make_one_hot(np.ones((ret.shape[0]))*class_code, 4) +def make_one_hot(labels, n_classes): + ret = np.zeros((len(labels), n_classes)) + for i, e in enumerate(labels): + ret[i, int(e)] = 1 + return ret def load_sample(fnames, random_indices): ''' Fnames: filenames of all files of class_code class From 6d37d0a4314b15827e33bbabda0866796323c0db Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Mon, 28 Jan 2019 09:22:01 -0700 Subject: [PATCH 13/89] Added evaluation of images --- .../compose_array_single_shapefile.py | 16 ++- pixel_classification/evaluate_image.py | 129 ++++++++++++++++++ pixel_classification/keras_cnn.py | 59 ++++---- pixel_classification/runner_from_shapefile.py | 38 +++--- pixel_classification/shuffle_data.py | 4 +- 5 files changed, 194 insertions(+), 52 deletions(-) create mode 100644 pixel_classification/evaluate_image.py diff --git a/pixel_classification/compose_array_single_shapefile.py b/pixel_classification/compose_array_single_shapefile.py index b84c351..ef14d5e 100644 --- a/pixel_classification/compose_array_single_shapefile.py +++ b/pixel_classification/compose_array_single_shapefile.py @@ -35,7 +35,7 @@ from shapely.ops import unary_union loc = os.path.dirname(__file__) WRS_2 = loc.replace('pixel_classification', - os.path.join('spatial_data', 'wrs2_descending.shp')) + os.path.join('spatial_data', 'wrs2_usa_descending.shp')) ''' This script contains a class meant to gather data from rasters using a polygon shapefile. @@ -57,7 +57,7 @@ class PTASingleShapefile: def __init__(self, master_raster=None, shapefile_path=None, class_code=None, path=None, row=None, masked_raster=None, training_directory=None, paths_map=None, masks=None, - instances=None, overwrite_points=None, kernel_size=None, data_filename=None): + instances=None, sz=1000, overwrite_points=None, kernel_size=None, data_filename=None): self.shapefile_path = shapefile_path self.path = path self.object_id = 0 @@ -70,6 +70,7 @@ def __init__(self, master_raster=None, shapefile_path=None, class_code=None, pat self.class_code = class_code self.crs = self._get_crs() self.m_instances = instances + self.sz = sz self.master_raster = master_raster self.masked_raster = masked_raster if masked_raster is not None: @@ -113,17 +114,15 @@ def _dump_data(self, data): else: to_save = self.data_filename with h5py.File(to_save, 'a') as f: - dset = f.create_dataset("cc:{}:{}".format(self.class_code, + pref = os.path.basename(self.shapefile_path) + dset = f.create_dataset("{}_{}".format(pref, str(datetime.now())), data=data) def training_data_from_master_raster(self): ofs = self.kernel_size // 2 - # Query how much memory I have left? - sz = 5000 # some heuristic that indicates when I run out of memory - + sz = self.sz # some heuristic that indicates when I run out of memory tmp_arr = [] - with rasopen(self.master_raster, 'r') as rsrc: rass_arr = rsrc.read() affine = rsrc.transform @@ -135,6 +134,7 @@ def training_data_from_master_raster(self): qq = asarray(tmp_arr) del tmp_arr self._dump_data(qq) + del qq tmp_arr = [] x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) @@ -151,6 +151,8 @@ def training_data_from_master_raster(self): print("Writing to disk...") qq = asarray(tmp_arr) self._dump_data(qq) + del qq + del tmp_arr def create_sample_points(self): """ Create a clipped training set from polygon shapefiles. diff --git a/pixel_classification/evaluate_image.py b/pixel_classification/evaluate_image.py new file mode 100644 index 0000000..dc89e3b --- /dev/null +++ b/pixel_classification/evaluate_image.py @@ -0,0 +1,129 @@ +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +from sys import stdout +#os.environ["CUDA_VISIBLE_DEVICES"]="-1" +import numpy as np +import multiprocessing +multiprocessing.set_start_method('spawn', force=True) +from rasterio import open as rasopen +from glob import glob +import numpy.ma as ma +import tensorflow as tf +from tensorflow.keras.models import load_model + +def get_weights(path): + model = tf.keras.models.load_model(path) + return model.weights + +def sub_img_list(im, kernel_size): + ofs = kernel_size // 2 + ls = [] + for i in range(kernel_size, im.shape[1]): + sub_imgs = np.zeros((im.shape[2]-kernel_size, 36, kernel_size, kernel_size)) + k = 0 + for j in range(kernel_size, im.shape[2]): + sub_img = im[:, i-kernel_size:i, j-kernel_size:j] + sub_imgs[k, :, :, :] = sub_img + k += 1 + + ls.append(sub_imgs) + if i % 2 == 0: + yield ls + +class Result: + + def __init__(self, data, idx): + self.data = data + self.idx = idx + +def write_raster(data, name, raster_geo): + + raster_geo['dtype'] = data.dtype + raster_geo['count'] = 1 + with rasopen(name, 'w', **raster_geo) as dst: + dst.write(data) + return None + +def split_image(image, kernel_size): + num_rows = image.shape[1] // os.cpu_count() + leftover = image.shape[1] % os.cpu_count() + ids = [] + arrs = [] + j = 0 + for idx, i in enumerate(range(kernel_size, image.shape[1], num_rows)): + arrs.append(image[:, i-kernel_size:i+num_rows+kernel_size:, :]) + ids.append(j) + j += 1 + + arrs.append(image[ :, image.shape[1]-leftover-kernel_size:, :]) + ids.append(j) + return arrs, ids + +def pool_job(path, image, ids): + model = Network(path) + while True: + eval_image(image, model, ids) + queue.put(os.getpid()) + +def is_target(f, targets): + + for ff in targets: + if ff in f: + return True + return False + +def get_prev_mask(target): + + for f in glob('evaluated_images/' + "*.npy"): + if target in f and 'running' in f: + return f + return None + +def eval_image(im, msk, idd): + model_path = 'models/model_kernel_41' + model = load_model(model_path) + kernel_size = 41 + mask = np.zeros((im.shape[1], im.shape[2])) + if msk is not None: + msk = np.load(msk) + mask[:msk.shape[0], :] = msk + begin = msk.shape[0] + del msk + else: + begin = kernel_size + ofs = kernel_size // 2 + for i in range(begin, im.shape[1]): + sub_imgs = np.zeros((im.shape[2]-kernel_size, 36, kernel_size, kernel_size)) + k = 0 + for j in range(kernel_size, im.shape[2]): + sub_img = im[:, i-kernel_size:i, j-kernel_size:j] + sub_imgs[k, :, :, :] = sub_img + k += 1 + + result = model.predict(sub_imgs) + result = np.argmax(result, axis=1) + mask[i-ofs, kernel_size - ofs: -(kernel_size-ofs-1)] = result + if i % 100 == 0: + np.save("evaluated_images/{}_running_eval".format(idd), mask[:i, :]) + stdout.write("\r{:.5f}".format(float(i)/im.shape[1])) + + np.save("evaluated_images/eval_{}".format(idd), mask) + return Result(mask, idd) + + +if __name__ == '__main__': + + path = 'models/model_kernel_41' + targets = ['38_27_2013', '40_26_2013', '40_27_2013', '39_27_2013', + '39_26_2013'] + i = 0 + kernel_size = 41 + for f in glob("master_rasters/to_eval/" + "*.tif"): + stdout.write("\rEvaluating image {}\n".format(f)) + with rasopen(f, 'r') as src: + raster_geo = src.meta.copy() + im = src.read() + eval_image(im, None, os.path.basename(f)) + + + diff --git a/pixel_classification/keras_cnn.py b/pixel_classification/keras_cnn.py index 6a8754e..c257538 100644 --- a/pixel_classification/keras_cnn.py +++ b/pixel_classification/keras_cnn.py @@ -1,4 +1,5 @@ import h5py +import os from glob import glob import tensorflow as tf from sklearn.model_selection import train_test_split @@ -30,7 +31,7 @@ def train_next_batch(model, features, labels, n_classes=4, epochs=5, batch_size= # shuffle the labels again x_train, x_test, y_train, y_test = train_test_split(features, labels, - test_size=0.1, random_state=42) + test_size=0.01, random_state=42) model.fit(x_train, y_train, batch_size=batch_size, @@ -80,30 +81,40 @@ def generate(): d4, l4 = ot[0], ot[1] features = np.concatenate((d1, d2, d3, d4)) labels = np.concatenate((l1, l2, l3, l4)) - yield (features, labels) + p = np.random.permutation(features.shape[0]) + yield (features[p], labels[p]) if __name__ == '__main__': train_dir = 'training_data/' - n_epochs = 10 - model = keras_model(57, 4) -# model.fit_generator(generate(), epochs=2, use_multiprocessing=True, steps_per_epoch=25) + model_dir = 'models/' + n_epochs = 1 + kernel_size = 41 + model = keras_model(kernel_size, 4) # kernel and n_classes + model.fit_generator(generate(), steps_per_epoch=200, epochs=2, verbose=1, + use_multiprocessing=True) + # possible: fit_generator from keras. + # model_name = 'model_kernel_{}'.format(kernel_size) + # total_instances = 100000 + # for i in range(n_epochs): + # random_indices = np.random.choice(total_instances, total_instances, replace=False) + # irr = one_epoch(fnames(0), random_indices, 0) + # fallow = one_epoch(fnames(1), random_indices, 1) + # forest = one_epoch(fnames(2), random_indices, 2) + # other = one_epoch(fnames(3), random_indices, 3) + + # for irr, fall, fo, ot in zip(irr, fallow, forest, other): + # d1, l1 = irr[0], irr[1] + # d2, l2 = fall[0], fall[1] + # d3, l3 = fo[0], fo[1] + # d4, l4 = ot[0], ot[1] + # features = np.concatenate((d1, d2, d3, d4)) + # labels = np.concatenate((l1, l2, l3, l4)) + # train_next_batch(model, features, labels, epochs=1) + + # print("\nCustom epoch {}/{}\n".format(i+1, n_epochs)) + + # model_path = os.path.join(model_dir, model_name) + # if not os.path.isfile(model_path): + # model.save(model_path) + - total_instances = 100000 - for i in range(n_epochs): - random_indices = np.random.choice(total_instances, total_instances, replace=False) - irr = one_epoch(fnames(0), random_indices, 0) - fallow = one_epoch(fnames(1), random_indices, 1) - forest = one_epoch(fnames(2), random_indices, 2) - other = one_epoch(fnames(3), random_indices, 3) - - for irr, fall, fo, ot in zip(irr, fallow, forest, other): - d1, l1 = irr[0], irr[1] - d2, l2 = fall[0], fall[1] - d3, l3 = fo[0], fo[1] - d4, l4 = ot[0], ot[1] - features = np.concatenate((d1, d2, d3, d4)) - labels = np.concatenate((l1, l2, l3, l4)) - # labels = make_one_hot(labels, 4) - train_next_batch(model, features, labels, epochs=1) - - print("\nCustom epoch {}/{}\n".format(i, n_epochs)) diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index e81f85b..15d4669 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -1,6 +1,7 @@ import warnings import glob import os +import gc from multiprocessing import Pool from numpy import save as nsave from compose_array_single_shapefile import PTASingleShapefile @@ -9,7 +10,7 @@ from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster, create_master_masked_raster def create_training_data(shapefile, shapefile_directory, image_directory, class_code, - kernel_size, instances, training_directory, year, raster_directory, save=True): + kernel_size, instances, training_directory, year, raster_directory, chunk_size=2000, save=True): p, r = get_shapefile_path_row(shapefile) suff = str(p) + '_' + str(r) + "_" + str(year) @@ -23,11 +24,6 @@ def create_training_data(shapefile, shapefile_directory, image_directory, class_ else: ims = download_images(landsat_dir, p, r, year, satellite) - # print("Paths:", len(ims.paths_map)) - if len(ims.paths_map) > 36: - print("AAAAAAAHHHH") - print(len(ims.paths_map), shapefile) - ms = create_master_raster(ims, p, r, year, raster_directory) mms = create_master_masked_raster(ims, p, r, year, raster_directory) @@ -35,12 +31,10 @@ def create_training_data(shapefile, shapefile_directory, image_directory, class_ pta = PTASingleShapefile(shapefile_path=shp_path, master_raster=ms, training_directory=training_directory, overwrite_points=False, class_code=class_code, path=p, row=r, paths_map=ims.paths_map, masks=ims.masks, - instances=instances, kernel_size=kernel_size) + instances=instances, kernel_size=kernel_size, sz=chunk_size) pta.extract_sample() - - def get_all_shapefiles(to_match, year, data_directory, irrigated): ''' Get all shapefiles in same p/r as to_match ''' p, r = get_shapefile_path_row(os.path.join(data_directory, to_match)) @@ -81,7 +75,7 @@ def required_points(shapefile, total_area, total_instances): frac = area / total_area return int(total_instances * frac) -def extract_data(data_directory, names, n_instances, class_code): +def extract_data(data_directory, names, n_instances, class_code, kernel_size): def is_it(f, names): for e in names: @@ -95,8 +89,8 @@ def is_it(f, names): req_points = required_points(f, total_area, n_instances) ff = os.path.basename(f) create_training_data(ff, data_directory, image_directory, - class_code, 57, req_points, train_dir, 2013, raster_dir) - return None + class_code, kernel_size, req_points, train_dir, 2013, raster_dir) + gc.collect() def go(f): data_directory = 'split_shapefiles_west/' @@ -121,16 +115,22 @@ def go(f): shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup/' fnames = [f for f in glob.glob(shp_dir + "*.shp") if 'reproj' in f] - print(fnames) - # print(os.cpu_count()) instances = [50000, 1e5, 1e5, 1e5] + i2 = [1e5, 1e5] class_code = [0, 1, 2, 3] - dd = [data_directory]*4 - names = [irrigated, other, fallow, forest] + c2 = [2, 3] + dd = [data_directory]*2 + dd2 = dd.copy() + ks = [41]*2 + ks2 = ks.copy() + names = [irrigated, other] + names2 = [fallow, forest] # note: the extraction of training data took 6h 29m - extract_data(dd[0], fallow, 1e5, 2) -# with Pool(8) as pool: -# pool.starmap(extract_data, zip(dd, names, instances, class_code)) + # extract_data(dd[0], fallow, 1e5, 2, 41) + with Pool() as pool: + pool.starmap(extract_data, zip(dd, names, instances, class_code, ks)) + with Pool() as pool: + pool.starmap(extract_data, zip(dd2, names2, i2, c2, ks2)) #with Pool(os.cpu_count()) as pool: # pool.map(go, fnames) diff --git a/pixel_classification/shuffle_data.py b/pixel_classification/shuffle_data.py index 468d9c3..1255bef 100644 --- a/pixel_classification/shuffle_data.py +++ b/pixel_classification/shuffle_data.py @@ -2,9 +2,9 @@ from collections import defaultdict import numpy as np -def one_epoch(filenames, random_indices, class_code, chunk_size=5000): +def one_epoch(filenames, random_indices, class_code, chunk_size=500): ''' Filename is the name of the data file, - instances the number of instances that can fit in memory. + chunk_size the number of instances that can fit in memory. ''' if not isinstance(filenames, list): filenames = [filenames] From e023e4da6dc9152fd4de1e3ac264019ab3b0c697 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Mon, 28 Jan 2019 14:26:31 -0700 Subject: [PATCH 14/89] Black boxes are masked --- pixel_classification/classify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixel_classification/classify.py b/pixel_classification/classify.py index a2656e8..170fd0d 100644 --- a/pixel_classification/classify.py +++ b/pixel_classification/classify.py @@ -122,7 +122,7 @@ def get_stack(self, image_data, outfile=None, mask_path=None): self.final_shape = 1, stack.shape[1], stack.shape[2] stack = stack.reshape((stack.shape[0], stack.shape[1] * stack.shape[2])) - stack[stack == 0.] = np.nan + stack[stack == 0.] = np.nan # for "borders" if mask_path: ms = self.mask.shape From 6bc505ee7a7aeeb8cd70b9252a92d37c24b2ba8b Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 30 Jan 2019 15:26:29 -0700 Subject: [PATCH 15/89] improved split shapefile --- pixel_classification/data_utils.py | 73 ++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index 18d64e7..0c95f5e 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -149,6 +149,24 @@ def download_images(project_directory, path, row, year, satellite=8): return image_stack +def construct_tree(wrs2): + from sklearn.neighbors import KDTree + centroids = [] + path_rows = [] # a mapping + features = [] + for feature in wrs2: + tile = shape(feature['geometry']) + centroid = tile.centroid + centroids.append(centroid) + z = feature['properties'] + p = z['PATH'] + r = z['ROW'] + path_rows.append(str(p) + "_" + str(r)) + features.append(feature) + + tree = KDTree(centroids) + return tree, path_rows, features + def get_pr(poly, wrs2): ls = [] for feature in wrs2: @@ -160,13 +178,31 @@ def get_pr(poly, wrs2): ls.append(str(p) + "_" + str(r)) return ls +def get_pr_subset(poly, tiles): + ls = [] + for feature in tiles: + tile = shape(feature['geometry']) + if poly.within(tile): + z = feature['properties'] + p = z['PATH'] + r = z['ROW'] + ls.append(str(p) + "_" + str(r)) + return ls + def split_shapefile(base, base_shapefile, data_directory): """ Shapefiles may deal with data over multiple path/rows. - Data directory: where the split shapefiles will be saved.""" + Data directory: where the split shapefiles will be saved. + base: directory containing base_shapefile.""" path_row = defaultdict(list) id_mapping = {} wrs2 = fiona.open('../spatial_data/wrs2_descending_usa.shp', 'r') + tree, path_rows, features = construct_tree(wrs2) + wrs2.close() + + + import time + start = time.time() with fiona.open(os.path.join(base, base_shapefile), "r") as src: meta = deepcopy(src.meta) for feat in src: @@ -178,7 +214,11 @@ def split_shapefile(base, base_shapefile, data_directory): path_row[p].append(idd) wrs2.close() + print(time.time() - start) + start = time.time() + # TODO: Solve this more efficiently. + # DOUBLE TODO: Solve this more efficiently. # I have all path/rows and their corresponding features # I need to figure out the unique features in each path row. # How should I treat the non-unique features? @@ -188,11 +228,11 @@ def split_shapefile(base, base_shapefile, data_directory): # unique set and for each non-unique feature # place it in the path/row with the greatest number of # unique points. - non_unique = defaultdict(list) + non_unique_ids = defaultdict(list) unique = defaultdict(list) for key in path_row: ls = path_row[key] # all features in a given path/row - placeholder = ls + placeholder = ls.copy() for key1 in path_row: if key != key1: ls1 = path_row[key1] @@ -201,24 +241,35 @@ def split_shapefile(base, base_shapefile, data_directory): # features present in placeholder that are not # present in ls1; i.e. unique keys unique[key] = list(placeholder) - nu = set(ls) - set(placeholder) # all features present - # in ls that are not present in placeholder (non-unique) - for idd in list(nu): - non_unique[idd].append(key) + if len(ls) != len(placeholder): + nu = set(ls) - set(placeholder) # all features present in ls that are not present in placeholder (non-unique) + for idd in list(nu): + non_unique_ids[idd].append(key) - for key in non_unique: # unique ids + print(time.time() - start) + start = time.time() + match_key = [] + for key in non_unique_ids: # unique ids pr = None hi = 0 - for pathrow in non_unique[key]: # path/rows corresponding to non + for pathrow in non_unique_ids[key]: # path/rows corresponding to non # unique features if len(unique[pathrow]) > hi: pr = pathrow hi = len(unique[pathrow]) + if pr is not None: unique[pr].append(key) + else: + choice = non_unique_ids[key].sort() + unique[choice].append(key) + print(time.time() - start) prefix = os.path.splitext(base_shapefile)[0] for key in unique: + if key is None: + print(key, unique[key]) + continue out = prefix + "_" + key + ".shp" if len(unique[key]): with fiona.open(os.path.join(data_directory, out), 'w', **meta) as dst: @@ -238,7 +289,3 @@ def get_shapefile_path_row(shapefile): if __name__ == "__main__": pass - # base = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/" - # base_shapefile = "MT_Huntley_Main_2013_3728.shp" - # data_directory = "shapefile_data/" - # split_shapefile(base, base_shapefile, data_directory) From 12a50e607b38a1d4f40f73fc9c93c323ab0ba3d3 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 30 Jan 2019 17:23:53 -0700 Subject: [PATCH 16/89] Improved split_shapefile method --- pixel_classification/data_utils.py | 79 ++++++------------- pixel_classification/runner.py | 4 - pixel_classification/runner_from_shapefile.py | 19 ++--- 3 files changed, 32 insertions(+), 70 deletions(-) diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index 0c95f5e..922dfe3 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -6,13 +6,12 @@ from lxml import html from requests import get from copy import deepcopy -from numpy import zeros -import re +from numpy import zeros, asarray, array, reshape from rasterio import float32, open as rasopen from prepare_images import ImageStack +from sklearn.neighbors import KDTree import sat_image -WRS_2 = '../spatial_data/wrs2_descending.shp' def create_master_masked_raster(image_stack, path, row, year, raster_directory): masks = image_stack.masks @@ -41,9 +40,6 @@ def create_master_masked_raster(image_stack, path, row, year, raster_directory): try: stack[i, :, :] = arr except ValueError: - import pprint - pprint.pprint(first_geo) - # error was thrown here b/c source raster didn't have crs arr = sat_image.warped_vrt.warp_single_image(mask_raster, first_geo) stack[i, :, :] = arr @@ -100,36 +96,6 @@ def create_master_raster(image_stack, path, row, year, raster_directory): return pth - -def get_path_row(lat, lon): - """ - :param lat: Latitude float - :param lon: Longitude float - 'convert_pr_to_ll' [path, row to coordinates] - :return: lat, lon tuple or path, row tuple - """ - conversion_type = 'convert_ll_to_pr' - base = 'https://landsat.usgs.gov/landsat/lat_long_converter/tools_latlong.php' - unk_number = 1508518830987 - - full_url = '{}?rs={}&rsargs[]={}&rsargs[]={}&rsargs[]=1&rsrnd={}'.format(base, - conversion_type, - lat, lon, - unk_number) - r = get(full_url) - tree = html.fromstring(r.text) - - # remember to view source html to build xpath - # i.e. inspect element > network > find GET with relevant PARAMS - # > go to GET URL > view source HTML - p_string = tree.xpath('//table/tr[1]/td[2]/text()') - path = int(re.search(r'\d+', p_string[0]).group()) - - r_string = tree.xpath('//table/tr[1]/td[4]/text()') - row = int(re.search(r'\d+', r_string[0]).group()) - - return path, row - def get_shapefile_lat_lon(shapefile): with fiona.open(shapefile, "r") as src: minx, miny, maxx, maxy = src.bounds @@ -138,7 +104,6 @@ def get_shapefile_lat_lon(shapefile): return latc, lonc - def download_images(project_directory, path, row, year, satellite=8): image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, @@ -148,24 +113,22 @@ def download_images(project_directory, path, row, year, satellite=8): # a cloud mask. return image_stack - -def construct_tree(wrs2): - from sklearn.neighbors import KDTree +def construct_kdtree(wrs2): centroids = [] path_rows = [] # a mapping features = [] for feature in wrs2: tile = shape(feature['geometry']) - centroid = tile.centroid - centroids.append(centroid) + centroid = tile.centroid.coords[0] + centroids.append([centroid[0], centroid[1]]) z = feature['properties'] p = z['PATH'] r = z['ROW'] path_rows.append(str(p) + "_" + str(r)) features.append(feature) - tree = KDTree(centroids) - return tree, path_rows, features + tree = KDTree(asarray(centroids)) + return tree, asarray(path_rows), asarray(features) def get_pr(poly, wrs2): ls = [] @@ -190,33 +153,36 @@ def get_pr_subset(poly, tiles): return ls def split_shapefile(base, base_shapefile, data_directory): - """ Shapefiles may deal with data over multiple path/rows. + """Previous method took ~25 minutes to get all path/rows. + Now, with kdtree, 25 seconds. + Shapefiles may deal with data over multiple path/rows. Data directory: where the split shapefiles will be saved. base: directory containing base_shapefile.""" path_row = defaultdict(list) id_mapping = {} wrs2 = fiona.open('../spatial_data/wrs2_descending_usa.shp', 'r') - tree, path_rows, features = construct_tree(wrs2) + tree, path_rows, features = construct_kdtree(wrs2) wrs2.close() - - - import time - start = time.time() + cent_arr = array([0, 0]) with fiona.open(os.path.join(base, base_shapefile), "r") as src: meta = deepcopy(src.meta) for feat in src: idd = feat['id'] id_mapping[idd] = feat poly = shape(feat['geometry']) - prs = get_pr(poly, wrs2) + centroid = poly.centroid.coords[0] + cent_arr[0] = centroid[0] + cent_arr[1] = centroid[1] + centroid = cent_arr.reshape(1, -1) + dist, ind = tree.query(centroid, k=10) + tiles = features[ind[0]] + prs = get_pr_subset(poly, tiles) for p in prs: path_row[p].append(idd) wrs2.close() - print(time.time() - start) - start = time.time() # TODO: Solve this more efficiently. # DOUBLE TODO: Solve this more efficiently. # I have all path/rows and their corresponding features @@ -246,8 +212,6 @@ def split_shapefile(base, base_shapefile, data_directory): for idd in list(nu): non_unique_ids[idd].append(key) - print(time.time() - start) - start = time.time() match_key = [] for key in non_unique_ids: # unique ids pr = None @@ -261,10 +225,11 @@ def split_shapefile(base, base_shapefile, data_directory): if pr is not None: unique[pr].append(key) else: - choice = non_unique_ids[key].sort() + choice = non_unique_ids[key] + choice.sort() + choice = choice[0] unique[choice].append(key) - print(time.time() - start) prefix = os.path.splitext(base_shapefile)[0] for key in unique: if key is None: diff --git a/pixel_classification/runner.py b/pixel_classification/runner.py index 0812c57..bce2242 100644 --- a/pixel_classification/runner.py +++ b/pixel_classification/runner.py @@ -102,10 +102,6 @@ def classify_scene(path, row, sat, year, eval_directory, model, n_images, result if not os.path.isdir(sub): os.mkdir(sub) - from pixel_classification.runner_from_shapefile import get_shapefile_center - - # lat, lon = get_shapefile_center('/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_3927.shp') - i = ImageStack(root=sub, satellite=sat, path=path, row=row, n_landsat=n_images, year=year, max_cloud_pct=70) i.build_evaluating() diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index 15d4669..1743e29 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -127,13 +127,14 @@ def go(f): names2 = [fallow, forest] # note: the extraction of training data took 6h 29m # extract_data(dd[0], fallow, 1e5, 2, 41) - with Pool() as pool: - pool.starmap(extract_data, zip(dd, names, instances, class_code, ks)) - with Pool() as pool: - pool.starmap(extract_data, zip(dd2, names2, i2, c2, ks2)) - - #with Pool(os.cpu_count()) as pool: + # with Pool() as pool: + # pool.starmap(extract_data, zip(dd, names, instances, class_code, ks)) + # with Pool() as pool: + # pool.starmap(extract_data, zip(dd2, names2, i2, c2, ks2)) + + fnames = [f for f in glob.glob(shp_dir + "*.shp") if 'reproj' in f and 'irri' in f and 'un' not + in f] + go(fnames[0]) + #with Pool() as pool: # pool.map(go, fnames) - # 12 minutes to 5 and a half. - - + # 12 minutes to 5 and a half. From 632647a0aea6d2b8ad083f0fdf613ade5a7431da Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Mon, 4 Feb 2019 20:04:59 -0700 Subject: [PATCH 17/89] Added wrs2 over CONUS --- pixel_classification/keras_cnn.py | 74 +++++++++++---------------- pixel_classification/shuffle_data.py | 8 ++- spatial_data/wrs2_descending_usa.cpg | 1 + spatial_data/wrs2_descending_usa.dbf | Bin 0 -> 78148 bytes spatial_data/wrs2_descending_usa.prj | 1 + spatial_data/wrs2_descending_usa.shp | Bin 0 -> 199604 bytes spatial_data/wrs2_descending_usa.shx | Bin 0 -> 5964 bytes 7 files changed, 34 insertions(+), 50 deletions(-) create mode 100644 spatial_data/wrs2_descending_usa.cpg create mode 100644 spatial_data/wrs2_descending_usa.dbf create mode 100644 spatial_data/wrs2_descending_usa.prj create mode 100644 spatial_data/wrs2_descending_usa.shp create mode 100644 spatial_data/wrs2_descending_usa.shx diff --git a/pixel_classification/keras_cnn.py b/pixel_classification/keras_cnn.py index c257538..18f4a2e 100644 --- a/pixel_classification/keras_cnn.py +++ b/pixel_classification/keras_cnn.py @@ -66,55 +66,39 @@ def fnames(class_code): # Yield the concatenated training array? -def generate(): - total_instances = 100000 - random_indices = np.random.choice(total_instances, total_instances, replace=False) - irr = one_epoch(fnames(0), random_indices, 0) - fallow = one_epoch(fnames(1), random_indices, 1) - forest = one_epoch(fnames(2), random_indices, 2) - other = one_epoch(fnames(3), random_indices, 3) - j = 0 - for irr, fall, fo, ot in zip(irr, fallow, forest, other): - d1, l1 = irr[0], irr[1] - d2, l2 = fall[0], fall[1] - d3, l3 = fo[0], fo[1] - d4, l4 = ot[0], ot[1] - features = np.concatenate((d1, d2, d3, d4)) - labels = np.concatenate((l1, l2, l3, l4)) - p = np.random.permutation(features.shape[0]) - yield (features[p], labels[p]) - if __name__ == '__main__': train_dir = 'training_data/' model_dir = 'models/' n_epochs = 1 kernel_size = 41 - model = keras_model(kernel_size, 4) # kernel and n_classes - model.fit_generator(generate(), steps_per_epoch=200, epochs=2, verbose=1, - use_multiprocessing=True) - # possible: fit_generator from keras. - # model_name = 'model_kernel_{}'.format(kernel_size) - # total_instances = 100000 - # for i in range(n_epochs): - # random_indices = np.random.choice(total_instances, total_instances, replace=False) - # irr = one_epoch(fnames(0), random_indices, 0) - # fallow = one_epoch(fnames(1), random_indices, 1) - # forest = one_epoch(fnames(2), random_indices, 2) - # other = one_epoch(fnames(3), random_indices, 3) - - # for irr, fall, fo, ot in zip(irr, fallow, forest, other): - # d1, l1 = irr[0], irr[1] - # d2, l2 = fall[0], fall[1] - # d3, l3 = fo[0], fo[1] - # d4, l4 = ot[0], ot[1] - # features = np.concatenate((d1, d2, d3, d4)) - # labels = np.concatenate((l1, l2, l3, l4)) - # train_next_batch(model, features, labels, epochs=1) - - # print("\nCustom epoch {}/{}\n".format(i+1, n_epochs)) - - # model_path = os.path.join(model_dir, model_name) - # if not os.path.isfile(model_path): - # model.save(model_path) + model_name = 'model_kernel_{}'.format(kernel_size) + total_instances = 100000 + for i in range(n_epochs): + random_indices = np.random.choice(total_instances, total_instances, replace=False) + cs = 5342 + irr = one_epoch(fnames(0), random_indices, 0, chunk_size=cs) + fallow = one_epoch(fnames(1), random_indices, 1, chunk_size=cs) + forest = one_epoch(fnames(2), random_indices, 2, chunk_size=cs) + other = one_epoch(fnames(3), random_indices, 3, chunk_size=cs) + + for irr, fall, fo, ot in zip(irr, fallow, forest, other): + d1, l1 = irr[0], irr[1] + print(d1.shape) + d2, l2 = fall[0], fall[1] + print(d2.shape) + d3, l3 = fo[0], fo[1] + print(d3.shape) + d4, l4 = ot[0], ot[1] + print(d4.shape) + #features = np.concatenate((d1, d2, d3, d4)) + #labels = np.concatenate((l1, l2, l3, l4)) + #train_next_batch(model, features, labels, epochs=1) + + print("\nCustom epoch {}/{}\n".format(i+1, n_epochs)) + break + + model_path = os.path.join(model_dir, model_name) + if not os.path.isfile(model_path): + model.save(model_path) diff --git a/pixel_classification/shuffle_data.py b/pixel_classification/shuffle_data.py index 1255bef..ff15bcd 100644 --- a/pixel_classification/shuffle_data.py +++ b/pixel_classification/shuffle_data.py @@ -2,7 +2,7 @@ from collections import defaultdict import numpy as np -def one_epoch(filenames, random_indices, class_code, chunk_size=500): +def one_epoch(filenames, random_indices, class_code, chunk_size=500, n_classes=4): ''' Filename is the name of the data file, chunk_size the number of instances that can fit in memory. ''' @@ -10,7 +10,7 @@ def one_epoch(filenames, random_indices, class_code, chunk_size=500): filenames = [filenames] for i in range(0, random_indices.shape[0], chunk_size): ret = load_sample(filenames, random_indices[i:i+chunk_size]) - yield ret, make_one_hot(np.ones((ret.shape[0]))*class_code, 4) + yield ret, make_one_hot(np.ones((ret.shape[0]))*class_code, n_classes) def make_one_hot(labels, n_classes): ret = np.zeros((len(labels), n_classes)) @@ -32,12 +32,10 @@ def load_sample(fnames, random_indices): last = offset offset += hdf5[key].shape[0] indices = random_indices[random_indices < offset] - indices = indices[indices > last] + indices = indices[indices >= last] try: ls.append(hdf5[key][indices-last, :, :, :]) except UnboundLocalError as e: - # When the index array is empty. This is - # an unhandled exception in the hdf5 library pass flattened = [e for sublist in ls for e in sublist] diff --git a/spatial_data/wrs2_descending_usa.cpg b/spatial_data/wrs2_descending_usa.cpg new file mode 100644 index 0000000..cd89cb9 --- /dev/null +++ b/spatial_data/wrs2_descending_usa.cpg @@ -0,0 +1 @@ +ISO-8859-1 \ No newline at end of file diff --git a/spatial_data/wrs2_descending_usa.dbf b/spatial_data/wrs2_descending_usa.dbf new file mode 100644 index 0000000000000000000000000000000000000000..a97d85e9b88a5d672be900e857d812cba5acef58 GIT binary patch literal 78148 zcmb`Q&5|@bl9qcIgIQp#*s-cBD69O>T(-?Q24gVY&FR6$SoNM8;QF`_?uF-lIk@lIFzWj+NTvE8?E?30g|L?#1hyVL;{_uzY^LPL256@nI{O#92#`}NL-~ae; z$8Y`R*Wdo>pMU+AUw`|u-Pk3+JZS)8prd`T4)|SG)MjAOGdQ#Q*&JzwaNp^xL2R zHQ#yq`9J^pmtW%x|ERxzm*0-Rzx?rk{^Ni9<6r;EfBk!!VtD=a*Z=-Me*M!w>KOif zRsDbX4GO6wf1Q?Z-9NuhkH@v%I^L$@p}q6f{v1wUd?)3{@%GEd;p2F`JnDBU zoIbC&+kGJ2J|ADVOB{u-9PjmuEY766Ns@fOk*@s^jdZ!+&&N6zh1=)te)&4rTgQHG zS)54^lO#Wmk08aj;y|a*%h%<6sl%Fdy&Z17%1-^a$>J6YO zJAEGSkMlskeV%UjM~9#Kr&*jyCzB*UPNyIZZ35|VrK<^=uiNL5_ZFo7i&GS5(#0gn zkJBYc!%;;#obUbO0eAa6oN2rSf1LWKS)55%lO*|mkoqBXtkdWHcDwas@z(wGa6g@^ zl8?8!IFgQ!uj7@@-Q>rykuG1uQ6b%Kr$tqnBpXp(`*^F`tKv+$nIxGu;xe`gB-x05 zZ9-}`qAK~=HLl`Jx|<}KHsbQNuDvZAaqJgy6=xCvYEP1XZl}xFaQ^6ojTkM|4K)vs zg-&&D$6FL964n8dW!i|#p&EdSQjgfO5yxAdTNP&##v77k+K9^`zGaR`*ofZI^zQ8w z8*wSaaJ<*KRdFUAO_EF-aXHi>%-XgQ$9tVy6=#x6_!lNbq-G;7Tw2Q)q~mqn_l}SA zwZ4V{&L329CY?=^{5YLkk2utI7&>8j#Uk$GQ%E zT#p)~o*Ffdy_a8(RgqO3N#~2kindSM=4e{kzmsS;L(#<5v z95rs2>Pev!qsDr^_xM_j?w4boTNP*0-6Y8zHE!dzgHDVZ<7`FHsW@vo)kJUYK%R?qXrcpugjC-L}I+4oh)NmZOlN0TIT)VPh~EA1p}ztZV+|5|qKx&kj) z0KaC^$t1}fHE!4VR{M!a$LqD)T<>Dbnbp>1cX?8rNoSKJbJVz9>pC>+^7-|+jeYO& z`8X|_Zt;sMj->11i)%mS$D!H1$D^(elg^)-mJhY}_`II3eOi0#=Aw!->0y%O$D#G| z$784|s_J$=KbAFmIDZ~5_nZZN9KOD?7J6|c-ELour78J^uBX%SH0ng7TpxW#hn@$f zOQ$|9YdT}NlOp^RK*nfgF>U90QpKpDD73uiNbgzsqhtG5C#hG+8Nsro9BX=HVW{0oO z>-o;`*N?{(Un^^5fvS z5u`YuIAz(0Yt@h6l#RGv`{8Tei75z8XPxdQNrsJpl-FTHoj*@^>MywdbH{Al(@RIi zs@qu>(pM)b8J)&m4kfOuSB=>S=9O!h6UVhG=)JR;W zN7eG;b^W{^9^Ke|EPj#2k;J@9AB5!lzkD1&F1Jg3D-L}`fqVBR^zQZZ@*qx7>{5?| zJ?(p9@Igqv6r{18JYyN#ZWRkF=a%>zb>r8l(-9-g>@}okEa>#;ewTk^KX;r1Hb1vi zFXvXIr#iu4v`pz4i%DypG-Ij5ioTk$lyfW6)9XnLJlv3;v6z%&fK_62XU3A#gZ7qZ zEd63G=T_0nr`MAhc(@@wV=<{WwGM1Qw@)MWmSSO#K3yv3R-{mVYA1QPkR&~0F)7CY z9n_@bZ5TB?V_CSv<=l!CRX>yPa3M*0#$wWV<+iBXpl6G^RdbeeD^gVbOk$D@Nz#6< zNyAl2?Hvx&Y?-SmuN)n|&ZCMm2@e;Nr2SlzqNBFTgM^>E2wC&C{oEP}Ays`>aVBAe zAW446&owEHbVETpEQ8~XbU1hOe(V#nD$XQ4Tu74kb4BWbJENMPJJe|NbE`Saxm88J zvJvudAxVD7&owE|r>R8kF}3SmTJOsHJl7=eqg>99UL18gUFChIwD)=ZxH1b5M*HKK z7myY%|5;!LuQz^aK9He$4K8p`n$mc69;xz*utF!Ix097)&v z7dASj{amckB0OykqD^fRTCfU!uWJ~C+=I1V>1lfnsDT=dB7n3CI z=b9AXG6xH=gP$v%t|m#^&oybZdza4_er~FXB3a+PIO}vXNz#63dUQ%h-O%YV`hT)UtCcQW zqf`3MT=8YrdvPWmOp;7%#26R-yZza=M#u85AVo#aq@ziaX^pTmE7F#YI94q{dOF`z zy`4>xOdIi7Mdr6{#Ib4tQdG#S)5Rpov=NV?^}k~yj#UegqC#fU)g;NZ5rm_L+S|4f z#~P6#J+*tPy_-prX(Jwe*fw$7Mi6L@bAuGcS*N>6l4&CztH}JB84Js-3q6VN5g(~( z%HdefKrcoYjOsxrriUp%jzmXwJz*}EuU)dW4$jsJX|Lw=L3U4Q9=KhywP0ZGLfWe_ zDXmyTZAYlIM4PWHj_o7{d0HXu)tEGn8un_2cEUjYG+XN+uLhE(y&99oQDZw311}|2 zs^X**-w{dDUX4l1ex_&+SwKDFpkq1pmP1>V|v{*j%nQr&JO8pV^ zsWlRVm`6wozkHa~+gGUVzE`O~B0aT6Vi5BPnSJl}HCm(XzE>epk)B#3F^GAD%)ZAY z+_jcsyYE#epk)rCyz9$CJ(U95qR;!P+6CQO*h=g?GhbhI3cAPt#<6%rLGl$muRE(%G~9<@p9>*>hK z%+PjBVn}SNj#D8~k)F;Y#2_vj(jK)*<2<58M{|xJIt@9A6%rLGOxdgxaZyN;_NYx7 zj%ts(Pa}ggu;`j@INZNMr%!+3WCC-&LG-vW-x56rGxl zxRvMNqoNaYfW>kN@7km87kGt41$|{B=)KZR(jK*RT6t=_L`PxRMp!yJYSPfpJ|LZ{ zQkt610_p)0;!5p(>GaA*DF25GNRrhdTi9E94(;bht?>5*M7_xbntZ}VFE5)+l5YFe z)K08EiX{1QW;NV8jbkpP%OeMab{I%si+orzHQM{=l*O538^Jo3NURf@v>tOA3Cn%< zA%C9PL?%0#O(An6BnL3h(0?6U9y1vnTkdLBKv|)_r&q2Rs1g#jtkYw>cG^g|UcLKi zW)lo@nnKcVf>b>zbh_LhLp*}(>2|-CBZwk($s7sSQ3A!5r}0Vouw{ZOpe)kU(OufuMp))Z zxQ-IM%+@MWrD&mTI+UYM7cxNaEY~$@@AE|DCkHe42ND2w$kr0$VUnaHR3?q{i1X)0 zgbGE6J* zVvrFQ5@!u-B+^)WuOd`6frgZ|*NbEAi9yCpNJpql8f#B^VaLAMB%K^DujvwpPjk_W zGl^UoNRp11n^d;N?X*i?7z{GPLOK>E5|(}W+9fXx1{q-?9SbulG!NV!@p3W92n*?0 zm`S6jrdSvo)fQ?xFRU+h&^TG&y*RfM8DWql9SbvQoP}*#Bg{(HcQ4K)GQuE9rZqbE zbt~(!X^l91)^{(?q=!k8X^lwMT-228-i0klLybN#?~fXDIegYaFHR)#h|tOM;}}=& zYV}UJZs%Lpk559mZbY`@ggAV*VlU1lU%AS4!&%cDR=s?@avdcY=N67o^-fZu0cOk3 zqXY*M)QM1q^xWutIN>Y}wYNu6xG_6mu?PQs9$8<{lBgEV8=*iTrY z0r^kSKw~?J!A`=Eo*S7o+H%c}-p!#BLm}lRJvTCG-6OPDDEqOS&~u|Q;3!g`R&mxz z7p$TLT)f)3Xj0mi)-gOc8s`?Por@J3Q0+x`nn~`dcP;p(v zYvBb~aV9A_RXJ25;Xj!a--_e39I8Q2M+y4&n+gp`r>9W@F^FP^bdAAn@(D1k|98$q_#sFSm` z`a;V$vb9*(AXy=#J!(j2nr!*nC0h#yL7b5Gs7)GLe;1Eh47yxM>ezkn+(U#`&9UX0 zJNhko)IZuQBnH`9A?;C1r!n^xop#99;wAuOYlX~FgT(7)GeLTPuaFolXN1gAIXs+qbi|VBmCy%u(aqBPLeBqed>% zHa#6%&h=EeoHfg+;;a*ST97397kzI%67G<#1w)&Wn#@sS?bT@1824-7yR#UYbCZP| zy(rE)*+wvGNGGzj+{@QC;)`h1@SaAE#U`^#(Gn?)=1j7UVAK$)jT%IWqSv)Q9yKzw zpY6mHuQy$#Xo>W6BvifGMp)*kvG!_S`GmF9!L&bZaxacLU9Zlb3TdB^+I#d+t~Ibv zSP33_>;EH%3I^FzA?*{I6z9`wia_$=G_DGHuh3Ur;3$&4wHIfd$ew~EX`j%f(Hd>% zM^xgJ-1r!rXA;>{kRG4$|XP@C)@t8uZEy&p0(d&U(Nma$Vz67 zJ7-S4IO)W)A|y+;Nj6em=E1w;0kDL&pdT*mtY^rKw+_dOM)%@Kx|78I_(=KjK_)xf z#K!y%1t_<6E`B42irq%!P=$1a%B1D_I~2N+(fN%WDi~x>g>;0E|3 z$(+8}h8k_TIn{B# zk(=XtEnrKOp+nq=xzXjoZA$78)}XCO4mIi($wg>j;aq2N)JY6h+(X)bGAX{*YO489 z{jhp>br-Kn$ie%3k(&d9759+#pG+ERN?y&zUA!tGN2I5tyBMsyhqV7>QVu*#Df>@7 zX{G5j9Nl-x z&4EGZ4r%|%q;Y(5)NQmIKi|0sgRGp8_Mc1|YKq!ZZq87n_MiHaP9@|}dt8@(&9z5v z4kStYPbQ7ylUF3m^!PA}y0HVM9aD#|K2*h-#03jU(*Bc4s9O(@Rj5tS zP<|%KIygfIQnL|j51>Weh8lI$ZK+80p%+m%F>E6&?LVOti-%b){@bE%gN1U`Eob0T zr20@5XPs;#@SjwB%q&f+1~6%lsGD@Mjj(jo&7^AIB%R8fHEmnet-e;FHmUc@Mkp(X z`w&Vx>Sof=qv_>4T>z=s4?oKsM{s2fev39S8*nh#{x+*MvWWe`x2z_ z>G^%#>TBD3Bw;XWq>NDm(%8!tb?eg;pTy7eSe!>J64@**n^^5sfrh4oK~r~#?k*Gg&Bc>4}LFvw#G8KVZIvSLAEW;R}99yNM`ZY8fM zZWR4w>U)eAB1y)m0co(?^u71@NQz+_VHu+aq_vGuM(3bs8#U^!3bje6u!gfvwh@dP z(kX1j%2V6iX+?HP8QUt3qzj%h87v{aa!fDhHVUnKhukHPg|ut(SVB6FMWoeA*X9~E zI#qA2;-nMtevu@-Mr~4jD|B)^V(@fgPgj18U{P9?UK~l>kwzIaAsvr^bX~30(52tc zn1R8%dq~G4Od8us@rZGi)jLe9o)pwetI~_JPOP{?lEk6&sBvAb)$Ecn1A|qW%!GWaDCmplO`^v=?U*88eV19gi?6+qWHjd-vFObW{v-WkNbSYEoT?M(q(D z6@y%vkdAzqRHoCUO}j@*Nz@hfn#H;H$d!R4nRf5G%0Sz+dmKLdP%lm-t{q60Y4@&U z5LBaVD{p21(MhWs`dQ>Hym@7G}Sl z6vdfDP7@?a&z4OZ$6Uu)MxUz1SQhQn>o%Q|nM7t2BuU3uOzQ2cQ{9fS5HsmIy|NK} zMEf>PsB`$l*LIWK7eHD7>h~0eH&?O|C1b*d}VPa*+%d&I%)rtNxgl8 z#QvwD?U>e*9Nynrc>7tLNwyJuEK=Br3`ECiveSy3#9lwVeP!X5t?8+UHb&$ti!;eK zf{#TC8?hRnd>5~*ly*Spg{9S|&-855DvqQRYbSJskd9DMd)%2ePtbq8=a>^&yogY- z{lDI-jmCmLwGm>F?Ge%uDwBG(=;+>aoR>+%XWrMUXj1Gtgvw!Iru407E4ikH*(#!GjGKp zZWJ=D5nB$HmG_NYw@SBt8Cw>EEVqzpjZW#91aH$C^?vPJ*OR5=up+JU zEVnZSyCaHDPv?6g`FgT+9M+_)^;SFfbH}|x&cx_X;VkEt`QCFsmpdynH%jTb5s%II z%1L}QL%X-uP@_M8!X6~Dazc7;WYQQn+Kw%kb1R*m&SAwMHz%b1T$8d>W$oeTcDGD# z{`|=U+|_|D7t(&NN#jVk9a}EvRysvBkG)(Bl1f6_&o!xrZ7XFzHv{fX&7VJc00v=* zkT?Oqe3&#|x!bYja&F~ogfg>EESx}+w4ZCz(9fEmJI-S4=aymM@O52OaVC+u2T9U? zu1I~R6SepLu30h2+zV+x*Q7X~)+Xq*!;=SKkiQqwey&MX7opP*Pac3_8)0cb*QB(} zO{rb-_aNCuSUR?B(r6>LW6R~-s_(tB5zOsWdy@8ZO&Vb2eCGCmybRGeNxm`-f zmQ7mQi1+jN*uJIcXh?h1ka$F3^tx;(^7qCWv`3AaJajkurAI9WF^`ZrYMjRSB)$Co z{5>&I@%MAD``(UMI(C?<+51d93AbRtlnLX z8r%7MFccRJnWM%jm&J;-!!r(G5FHJfqsFPGR}HmC{+<{L7d4rq#t93xd}-8pd)F)s zU9e5&sBywJEnhFAqcynHR#|1B#U(QJ2*pJqN#>}rx^`sC-_G9?gScqO95vS7XB#z! zaoAbkeF9oHpOQ{-iOf3LMkqRpPWbM!5i9Srtw;`S_v8FMX=58Ba=ZUrK35J?tS11($?MIx1%{?kOdUd(HxV;zPC%JAPn+#+wZ%1>)AX6};qd6vxJ;Jf&ArqtRTr8K0Zjd&)7v~;9jvypS zM{`UXXJL-!^nOg0I#aMeVUNS7RqVx?M2;XNNk?-`%I@3R>1Av=E1ARR0Md&yiA+IA zl8)w>ly#A$tv!<4(Ht?z6b$KTj!9V;jkHUqAPjN@LpoE?q^yf1y&pG%frU+(7M4Ae z4H0rPLz*+~O_s%}os=U8Pckj+>Is3^@*T43$igO?6VlNf5|nv7>7dhgCZynZa+gT#CNYYU$ti!+IQI!Kb9 z!NYlX}pYLAF--G0&U$?h#4Qe5dw zio-_hO+7-sJ5x|e{zZ@IeOAY{-Ro|d-qPN?#Uqs}%qvouvYCYM4oNcY-o55jI%wFv zydc|KE#h0|vC^3op?dD;!oYVAnRah+!|Nbn_c9SCNoX~<`qL=cJ#y-5tal$qV@*6-JYhvHx@7L*#63ER8`i^ zdKRa4BA!CuvvkDQI<0NQ`w`!>pK6;jMh(o~I9v9p(Yv%%Qx4UkoThI?e8nJ#DrAfr zkV5mQE=PRFRsWU`TUA!uv=xVa+RMcthbko1t#*R69tq!ns;wC8)eae>2BdW_*Qhbj zJ!z3-*(UVX2U!8nIZ z5gT@<_Zau0f44uInHU_t)T`o5A|uTAH{#2>2bxH$70F$)wT=qXL9(P8 zP2%>tgHDh*m!a**3p+e|CG%AI{6!Tfok+_RN%G@No+Uc*bkF>q|L&0&rYSg4f{>0R zLaH!+@EFS9&I=QR_&`WU5>2YQFl&#zFfoWEhIAy+r0PjQI^Rx%PuTLpGRC2Fro7gB zacn0sh$Mz|B+;bN?!C%VMACh8hMvFL z30w}lNKtKb?UAPiNz$`0lj2*Q^!}3&VUVX4(!RS%<49=Vy~YLOTg2-H5ka(5DuG)L*)rdj1R!IBqCZ)Y>2Wr0iP@`UV&)iKNzHC($ z=XTN!U!1c=J24A0seWgaeD~Yv>3An_4<^)BBtc_vn@P42`0nZv%^KC0vu<{H#(}i4 zjj*)uZk>j!wE6DcQ5XNm&p3djY^{*?-A$^dj@o1Y(-`6LUe;Oy$CatxTE(&UPOJyv zHifkB4ynEzwI|b3)p8H)-9=uk0*Y^}PoVm#>LUIqdS(p7wGvh~|W( z-?;D9mrJMjeRna4=7h{qqf)y%>Fv9%!BDU*WR4mOpFeuU4%u2Tbip>6qeeOUrPE2b zb&CJ_52J=3-5Kwd!>)FUjyv@|MSLMi=BQC$F47L!S}=(ChRji;9Q{sW)Txf9cd_L> zF}*Ikg>P5IStnw>kR)@|s4o|3hiokv#Ck*Ks8P3O>m*jXN`a;mv%MZNs}wETiH7iN z*2y+P5npt|ch{AhUoO%Ycdi+1p+*fhe#RF#d|m5RoJqD3!suU+>UTbrMvb?#wWv4S z2+JHb);{6p3jcR9hW4bp=2;@l&M z??IAuJi?^>UF#7qqoa!sm);4aD9$7zdypg@k1#2Jm!$VIo5UbG8q)Cyk%nuhJ;QXB zx%&9T5@J}|iy%a3K)q&h*6CrAq%)gL%HOpEZRU=0_#BaYaUzk$gie;uY%(c-m!$Wf zPbh|Mgk{)0YA>6*k+!YTH=Yo|a~;`r7c#9;|6L<(Tcdg`1ZF0^-?8?@VApKOv_|#i z2KvU8TaQo>DGba^VuvXt$-Hvw%Ry3J*m%!avLDGMsKCL~yRAiA#}EKRd0`=)7e060|VTMax7wY|%a8GW3UoWfR|Ndypm zHyauZ51^6y@ARwfxDhk@PI_vM#L%$Rq&$E|>c0!pa}S^!VrKXG%HpIGA3(B{2Vl~) z%DNRdVn&~!D$XRb+#pHH18AiFyQUL&ko(=Z5i|NuisDRS*$I-QJb*^(ziTAcsK=P) z&!0s>9G1vONcp)UJyv(BXuW)xTez1m(k~W4kSvZmi9zHeWcG3%@LqM%bb3FtNep5h zA+wi1x~pvBb}z4xs4m>6SFRXDK0;{5kZ91_={ks{R6%rNc>6HtE{+%*=`D3&^ zT)A7lywaIOdV1xGL3A`^_VUMY*V%}UF0250{(LVh3>l#&qnBU4`i+aCowhPMDwh_#6N4w$ZC^N)( zEux%NoO^_AgrcML2#wbgJLEcDSeDgULnIl>aaVynOX79E*&KDo(Y> zPBD=rb)Uqh(@<0Nh#hjM$eHIpR3T%0B4oL~p12QHZwV>&^BgJ|+?>NpGJPUiQ*8q5SZ=cvK;;B^}+er*^s6xj0gih=0iScQ)h2C*M#m8$A zPp#raVtj&R8RHYAI)sn3OAZwba;QSa_ylRZo_5Khf94bhXihMLu{;t*DE;&>%$e{|Um}Mg^Tc1Ud&mD58V30i(QZdU$8s`z4 zHiDpDnu}hXYme+HNRnwIc;>;PN4J>e@7f3sAIE(evlN5fm?6_f+!oOVAz!Va!Ph#-Qfspp7O&X3W-~GU|uNTdPP7G{46jN@rnk-Ipu03MQ zkR3CZ&7&UZ_NbTMXMOs@qZUJpK~37DHfbC+_P7f&F(PFFg|znxi3h4JYl_<2 zAqxlw3;QAMeVVlHdoS+UR40TZl;>8(v7N*q;1SZ^r%CI+w?h^X3|&S|+WR!A+Sg}l zTR!gMw)feSoyz4@#>~^c2LoH4GHv;7wTnXD=WUF!ICr!r4;;SEt%_6aVar95Oj~}V z!!KWZc%Nbr`VX15eDyR4`rZy%KrqMx3YoV2#;jublK1)hceF2;QyDX1^yk`>D=1s8 z+GDn?9#d`V}Mn2baa@P+m>H41}{>LPgR^r zwh>r#k(e!8r@9V zkSi0?-X|oIsIoKo?@RA_0LSy;Q4P?3zMUVz zjJ}heT39jg)I!<=Fe%O_e!U&dVMbr1EKWM{0VGR%049wV)+>_h2o*Q9?-zL800=)f z7fDAFw^hUmNok*u<;X)i#ibKI;W)Eg<%QKP-$jbcekQRR4o}iPp-JOBVmmKvu`jTS zGl_L@NRrVb(5d$%skPlBDosW@#c9qYR>L7lMvs8hds2|zzxz9PBw#f>Wb}w5QG;lq z_Gg!j85pdChqO?kFa>P70GA{g>dHL z)K2(>B1zgOG%3fmc5M5E!&;3zwa2AG*O2PmsyLI#n1Li|pU|Z8QH-)r*bl4s>=V{@ zdPsF{Rh&s=%s`T~PiWH6CiLZxCBb+hQ2@}-O!p5B!Ituz_c-qVp#4BH4xXUv!sbsPtR zbb0h^Q}06hS}aK|?m<$;shyaEizlfki#DC=WX;+oH;3$A0h$D661S@kZQ7rfJ33rr z9tm^IBOXyMZ!eBIi9v2o$Q%jJy{SW|9ddJEked^d)(oA7)^8)>7^Bb~?JwQfRYH!` zdpf#{L2gdS90|{(-4^M2hIDnD3WYNzw@fhW9pDLT5i(OiW@ zMS42Ai$QKq$Q%jJT*u3oM?!Aux_Egc%zRWGzRsvcB?N`h97z}7i6y0dLP*zi7vk8P(Hst6=TXI(^!N}- z($O4~25U+#!N#4PA(c(8;zVNS6C_LRe^TvTM_=h>)7yCD>IFs1-2}#mf3nmSg zhl_{x$YI8!JKA6FwQE*RWz=*pjwDu|l|vQM5h_UB(`DKEx=gS4q3znf&$@`@RCdzS zz9$Bm7$F^@GHGn5R}rc*#E|mbdU4i?Obkepj!>CY7mCl+5vslt$a^h9RhJ!y&vWa= znM4i-BuPi8Od8v1mmDe>?=f6Eud}tv?m_A{B8xN0HbU82x`B=@>?0?K(N3HD-63@wk;R#0 z8=-71NX-Ly^wdxV^VpcdxBTisn4+8t5`!G7ke*qZl(n8D9KGBOn>um7 z58!|?lWJ375TYO zM?x{ko(k!irAcX-Q>XW1EMkyD71A?Hlg3fQGs`hr-)5HewYpK0bc(8<+eyoV&ZMH9 zm|2>XmN|7&j3sSo)E*aW4N4ro>br_FiD@1rNyk`3TID3}5n~a9$b3l0SWK!uYX$a* zvB1D9OX(PkNoko|?Ri(y7%^3h<*VP?w*o6_r&l%tuS_IK$5>2S+X(Ism5V(M?z=HBITr2c*XsBf@uQR}%Jd@zSoO#jOXoBOhYOG&JeRhe_+}>HWxu z7-V~dq>kOoxy97t^kWU6=EMF-k&kmXg%$3Q8~^lr5`(<3kdAyjX4qd z=n6EQ^gMtX#%e2)bb5L{iGeC1DYPDu)0<9uKQ9ah8DSxFd|G>IuOc7)602k|bV~ni zZYL6&AW7!<^vGoN(uwhDT)k7|<7atcFl-|%b9{Q>4=i6ApZazkf1>ee5v;Fphje;6 z=1Lpe2+JIwR#c^sns+0KvnMh+Dua;QSu`xI%lPPj`B6%2ByLfZQ@DKrnw z_V7N%AcrcXy-$;}0UFBQ=Mb$Y6XQsBbDI3_a%yG$sf`eW?5U6pyRs3~-ZJaq4^I;XtzdIFmaHi51;wq4wua z=IvqFb9}fkTRzm+2%u$e?ZuJA6Hlntlph~tYeBl^Wgfg;vbFFV$<_+#SeQv;?J?aa zb8OjmZ9id-4_wHZqO~YaIw@NVlB8o{Cgm01lzJ5ltE4VS*^0e5lgQSBB*_Uy?Tr?8 z+Zuf%TMGtxS|QUKU8^UhPKOgFDq2l{h*0U#omrtMlJ(t-vrgpyK$1*rG#-_KfLPe2 za?FCnBkQ<8+KaVUg zBt3sVAttlEzLOV*HtrFYx(`(&4d+i}o}k}2-&^gyW4$_|RUCEVK{{xYGIZibiP@9d z&)a(LK>vAW6Y(~(7DC#qIiE>c8T;NYnN2WQWC&@mMx?%z&Pwf)*#v{Eg^>1YOv+l% z+S}n?KKu8k5pqCh7g1i(=5ZL)xn`DX)$sy}xr&4DuC1+N&`sFUcg~`qvQQ z_kMn-bQt6`g|t^=(z=(k+a+Cy24`?2;+H0fu2l71#qoL)gYauedo?DF<5P>er9lXt z6m_dhtU_%sqi$l5(-hKPjY-2%@c`c6xhMviO(E^om{ccg1=@P<(DNO2>!zeaZ7-v4 zV%SDl+N&{XZ6la}j`rPNP2Vn1p*E48j!z=lMp)Xb5ozTC>=AVn$u`2$Q8$x@nre4S z&roFRdq2-?BFtnPVd44Ici)-Yf3vYYP_2T1cUsa zkU45xd2jjB9?7@w&;x@kppZFgtUa}tdn9}9_4fx8r*zmMIs+|eJ7;%Wq}QlLTCH^Lk_F^DQ|tM8tn;(5E4xP! zX|-0PETC~+;OBRL7lXC^kdAzqG|s{l`QQQbi=OX0DOHOo)Rwc}UL0#r4A%BTI`U!C zI1Ae)3kU}JKOr6YFlp=&yJP{uAPXp@BOfAFR!Xa|U9x~+kpC0Xkq?uij-zE9`RM(a zciY|HGajIJ%KGlbxt+-Wfh6h3he_l3w8vgnG06W3={T%Oqo=k<999hSe?mGAYf|>v zR(+cmmS|AccQ4MhNB$2a$!1~IBbL{Wa&tDaA31#1cP~yPEG#6;W?@AtD`urOEi8x6 z`tHS6SD~mIUoF+(;o-La+_HxgbhmlapxXayR z%zhJ8#hFBA6C_E`mQ5OaxnnGKZH7+V&$5;wX`Ei)S)56F;DGa+Ah9ZB(l`=2#?q5k zg!0{?@|gU8G3(9ZOkzR_Nz$`rkuv1%(^oIw+C9eXH@?faqd1euY=R`|*|JITt@yRG zfci92?>Zi_2%+QVv`$^`SwmLvU(C!#hK*mNm)R+H*r1Xgg1V@ zz0*pY2=bN1ndIwc)S%%(B#*O^WIt=iH|)GecaqfTOwwGh(L z9FuyrG?Fr#20i_>)S4pm!>ZdV&LpxHAW1r!1Bp4CbxGmzsm40LtMRGwNTf)eM-^uh znN5%+V|?Pj@vT6$XignV?`nKH(z5ZYbX!5gluaZ?CJt>G@Q0 zD1=J46)CEICXv$wNs@o+d;0H+L^NllEA#f=6ZY$tg^+UjWhRl?#Gxf)eB!@-3=oo0 zgFC-fo9ivcCng`IdZpWn6o{FmBrWGRLGty)f2#p>RFO6^n=0@xQt)RInN5%+V|?Pj z!}&u}ynh(87zq#e-bJ_90K~wAhm7%w{|^03q>VjY^$-B*6jeX#WE(*%NGJAmxkn7= z4{0l!v%Ll&ZEPbfV|?PxwT-|dS=7^+FH+{H z!CmrJq!()di^Erazgh!;MD#x-ov_v3=#_2fv3x8NA*(p*#0mxjXG*3=CS7uSLTI*` z$3o1qYN3iViL5%`b%=?A?u*Tpdr2c%q>cMxLyF@n4jaXpL{=RnNk?-`8po&2Xby+3 zYrTpyiL5%`-$4U(PUP*qXokbKQO zVwFoSr*E0B!ysZ5(y?WeM!V;%y1{pE zIlkp&Q)Ss(dvVr@*fJzZ$CgbR?Ve)Gmto~tS#|6L&C8X;XMgF%nM7TBAF*aQWJ_Mktc?-HWqMM3o^)rZu`}=qGS5W6O)fXMID8;!Gm8 z3`sJr(LF;yMSB0qqGAwR4w=@7eZR}fd$|j}3~>*>a!F>7oODtI*3aTZC&gjiBgX5= zS#>$(Y3HD&Wq*>TF81F1*Q&}8IJOV`@gtUddURa_|E7o|P@0Dfqdo5EiFIT2-S$n-W zlDHWUHZG;-dyp<;-T=M^^SyDzca)${Y$`Os#5n^7Gl?u9c#@v)nUrHd>co66!$IvW zM+v%7>kSWL=KF9(J>G_^XNVnCpzv_d)PRUBpoF%sXA4VcE}WjL8f3xM+r1iho_O+^WQv_Dr2G6h3AN?=ls z0Y!TI83$r$VZTX72}H_9ZJ>w}3^nRZ!G1AsM+wBRjj(i-z@+Fmo%HtpT#;-eEFC2< zX|xf$WC}vEjj(i-z@*VeI7;wtrr^<;f*~CxFllWgo<<4Cc8==pp@ElC0tM-a7=?6{ z01}&M@(}*({PNL8c+}`&mv8Kvg+bgXWR4p5MaW$!jT-lKA=)`;)VTNPXr%{Ar{}RS z7(|Rh=BTl_olxlXew08AB1R!|)L7kD0iAZ(HOmt#i5rE?QR6<&gCOnjGzAz6wl$ff z#+{kc^7U%hY|d8IIMs)$IF1@(Fj$1lQRAMC8aQnE{XY7>Ui<7{tF5wvJ{<|gAhsMb zM~(a7E4A42J*RPvu2Ex=Vp{2ekkV0^bt0+^Nis)`wfCvm@_oFXJZdbi-0q#!YR60> zwhT!!M~$`jso3(gmz8L95Bk(i!lcvFkx&fV2+JHb?!A3oSKG1W?My+DY$Ggl)VOD( zR*zt^J?>^yemPri;|nNKma>X-?b$}C>h#a^y|wqbnO#COr)E=C97z}KG5LET?bR?> z{>mUzobqON38b1$RdFVfxd%y-VO}~xs^P#aI@BxskV>>zWF|6^n=M(nr zirN<4&y^MV13H5M? zNKb9K7{ok6I`UysU57#1A@Tu($VW&=K1{0X(4<|m>R^yh7t&dECJl4&eX{C^ni3ri z>8v`FYOD^OE~m%1uJK?Xd8D7Y2;nS>Q;$$I2a;skJ<=9TNbxGh!jIbPJG(JitJJGcVVNL z*r@QoZ`k|Z{_bDSdB-#E#CN|}_s;ufZkaM>Rm%M9uk5t@`Q^)$ser#tie^VE)T(1Q zx`V*~Yf`lEX_4k7)%9hM_Dvmk)xkwLzw}7j{6Gb} zeP(FqN$p()v0`;?A2oiSZF~yqmK=S$=1r`NfNi&2xz+a_l+PC3oR(L|m!%G_`{~z6 z7s32~2cue09=6T6PISDB(ErrT^-I4hSjYLhQuRz-h1i1?x*q$YVA6#pNcHS9kRb8xL3z+lK;X)1sWK@FRcoqghG?vom<(8S&Re*jClsWczpp z^JwqaJ=oPnPz+u^CVhfJvW?;gv&zB2d8jpoU0Bn_myK+>q-1`IvoJP&ZHq(W6zq)2 z&JhCuyDkr=KhJ$$26cN>E3A}y$XUR));*8;ra^hu@`H7AJNvS%mK(<@jynrI#%?+N ztCKHl5MkP)<|$`^;-I`WE_i0$5O~9}DUXP)LT5wko}T}2_qff8N1Clsu&ENMRs$-QcNVa1t5M~aSAz1+Q*V?*{d`%S>}SoZR&o{^U0=3ktFJG!+!$Rk zxtg;;aZuiNw~JX<7I?(5DUXN^u$L*b)LEc*7d}bq8hQ$@k4hG-&LbzmYE0W^O;0N% z+bDkMMGp>sJy>7v;dO1eFIzI-Ai=tYlTg0$tfuOd3bwArp+D0ByDks0SqFnILft~6 zu?HMIoCIw9@J>>kGn9`MwlQu3zgxX2IlkJ*Nm&25N}9_sUpBAjwHoIFoCJ!4@>cm* zM*ls)8;(tRL~KLP6z@)a<|I(N|K#5PR{n*8rPn;#;JlfW@Z*JQ&21>7Z4^InAU+y6 z4~qMZ#;H<#na74I&aH+z3WxhtE7#_^f*pHar%4WA*X5yXFQ0S4Q1{3xUn|98M*-U& z&#rIk2j%5;oIg*W>&u+R#w$-PcNEs7Uvqvr$CuT+TsHXTDo26hpu7cSZ(8pKyy4iC zN5uBVv}2{MmQDh-drg~(J*JfRWyQ^}?5;S%QFw0MkcH^Uwo&}RK}F=DOE~nN<;zxI z&bo4Izk~3~ZvB(v<+NqMuFFHa4c~1mLf!l8?5sOHb`Y>_yZISEJ3@J6!&bk|gSu^g7rcC7>L6fSk%m`L&aKe=wgdcLy?eu|o!dAFx2~<; zJMok+)9cmn_RtOv0>wdjtDe_(eg^P{V^bax+d<=L1q$XMP`d}OjkBI-2jgXpJ*d=k z5Z>y+167c1qxiwJO2|WUH+Vz-^ksX`k3PP7p}o+mV$x!V5MKsQOP@bud;f=r>T}}) z2SD8u!9yF|Ibbhf+rF!}Y`Y8Ps^QgU&imoZnz3FbjgH$3Zx24G-U!Ou%l-+}WZMfS zsXJ7^AN#Vg>Ng=VvAqSo{)zEDpxh!X^x^H1y@h+W@ybn)GygeuU9_`Cir*5=~0_?iiL)rAHW4c1!4fAuo8_6? z-*-@M6yGOjcTGQbqI~UeeLZ`@v+=#!pA7t1eZvVmH&wP5C=SY-<;~@eejD6Zgj5mteKKAtzUFR zb*$wt6Q$$_RmzDi)1Zu!eQr8n{i6r2f?=I=eikKxtk;QxI@x)y!cq@-KoskY-F8Yx z0m_!`UPt)>7R08!V0C)eeplglXSv!%V`c>ks zy<>WyWE;f~98^UfSO|E94ob=A8nEYW+`egn|MVw23RP#mfx17MyVZLO{v)rYY~l((49lMV>L8~T%!N5qEy zB(>Y>(yp)}R|47NmyRb6EOZujop`sZ=hZ;THi{oO5MKfOz8F`y(`WExrQ~xx@digS zH&F8Fba^=6_1t+i)J1<1+oC^NQf>OZMM+9#uJtEZcC@yN zmgc}4`jeDL#0J>8KS}Mj%k5Ac^f{23rX-$v5B{VWGB_0mO14q_&`S+CI1kh7-OK&4 zS}FNl+?V?lDEV}{Je2!l-scn4)ep?WYVxt;yX8{ijF9F(_j@XCq<-q4?^e3ywXY7kCRI-cqF8QJ0PmX$*yI}oirQ}ai9F(_hMWI8_0B`6|QXUZ-`jgb| z@}KJ@?;EOQdCS%@Z}2A}V*C&sBilyt0|&K`2k=l28dncuu}OMo=Jn|<@OyNKl6C9v zI4~A`VO<{fs!#W92z7Ha22U0JL2Uc75Xgh_Vxyy}q1A$z`tSP*r@=q0@gS%1sj5LN z$8e0R=nqmHl(*SKUjN<W&w}ub1r=L>Mij13zX3| ziXS+r4aYDs=!B=i+$E+#l3&f=Su>UY@vB8=L;S0Ey`1eVsJpfCv)r5DM`K&`tLZqD zm)$ErZafRTpwCWu!m-hBrgp8Kc5TG%Or_*EM~IHs2Bn0Z;seNa;NW)7JNeB#)GkQ! zExSS}Y65+s?<$^ev~z63D>;Esw~OcU&qm-UV_Wo_jZ-{xQ+oxm2SqXO9#yax`V{yr zH-vJvmuFgS2EUo&fV*>?U8^8zEF2sCVrti{Y`4t)Ov(N})53c|XwFlz~Y@oNmQ>DoE%eXQ#us5>Km~fz{Q0^k^u_NH1;i3iM+}<8KRV7g)%Wsizbe`K zFPW3JKkX$zbg$R>n^Nkl{=p4>+@bFU)zMAs8-1%Zwakm|$1?cn!NauDM{P23PPn9@XQsbQrC8}`;Q$I_vMny^H#!h ziTOH?HymLrWHf|H#2Rv61_mo@&4<1p9QcBgIW{R-uC?l3a|WojospD{tw*oYlo?pH_1!@PcKTYl%$J&t^HA6k#R-nAN!4%qb z7!R@0@xg`$aB#no&zbmyOR*P!x6TRsXI$xd_oe4csC)Tatza>Z#I_h$YP#C2dk()B zzHu2}6Mke&!`SpF4x!=0A3uoEJ(=5ce(%QtZxmnwA77^=Z8UNH( zXz}3Jgh7_!(%4nrIG|Ct_TQ*f6eK7?)6WU`iPCcX?=dWrwS|mwO0V&Va4Gg$4{pPqJz-L8r~7_9+M@los!(@B z%zf2sh%2!z#+74k4hfAJ7S8(VT~2HPab#!f{9F~3%hcH5DbDp%9F#Xc&j`F>TuFIE zY#3KkyJy*NIk@G17&B~tJ?)mGi;!3cu5>8lHyDZ^IB0}C_=?WQf^aGJDxIeflVUqv z9)90AbLS1z{k(1Zt;O(N0^4F-X~{0`Jr2KLiQm2`US7w4!nN zX^1N+&X-r4Klrs>#zMyj8;s%L=WD|XF#2t{6!*n(CnQ{oy>!plx|T`1H$vUhSKjUN zfw&LbV%(P=qk6OMO*m^<^wL>B-BB>j8F;1>lvBei=7`@B`1t_b7}rrdaBPh0C@(4R zTfa2#7tTgBnme~3)=?M^kD7H*#%~}LAK2Iw4*tC6@7{_LQtZQHl!$OCw$i;n{~nyE z^n`!8lHQ z{3fJ_OIWY$A2N45l=tjeFxVTuaZr33AT{mJLndqhaFjrf_U+=_51b`tGGzmN8t z`=eatZwKK1ITl-}@#!t_=jEYrsW0psWP2fKIfbm2X*zv z3sx}~Jq2tV_RzWfSSY(+`uT8vnu=vF+iidEny1iF`Oc*Yl*h$1)<1L8Q+U73s`x{q ziWwJ-voxLHC4~AN)FePTx8~Voqa-ik8a!4eB*T+SToezXnQ+Rn(c^ty#|dMB7EhVt>P zUEN%Es@VSIN25;|dkXd?X0P*hs93aN`nnm-JOzq_^2YsV;0?#7JR&w8S1ID4{vfg#6 z?n8wz`_-8R3wo(pc!hAoGqR@g zeWb+2g|w~_MfKI}QFf(bt7%{lu^%XD1xKF9BY$`7RI|&M;IE}Z0I$VJ?`3a{#JYIovZXd-3qaA9te^`D{v5!KSdUBG* z!){bIdN#ku{tasOM~lCeW1Y;wDQecX+PUSOfTu>M_OEiBrDp9<`c=9; z`~R;CdL4KH?KW^7AXoIdK>KzE&9$q&QX!mq{PFw-H#N&`Ij(gyTz4MpcARV0SRN0# z=kpxn{{KAxAOB^_n8In{_XCen^VIA_4Y<3n`jkF{Ox27(WBvYnKR9HZa%m2AL(UDY z_49|1fNi;+<)@F3f2XPCkH>0ua#r6>fki$-N3X=Bvyaql;g`y-C;ssf=y{;LJBkw= zz&nmj&%>ORd23!SQ?rMCf>JNSb@P0V(fy8a9n=oKH2l4_La1gauuZ^IW49eE#Or`R zPr(-M54QgQ>k4^(Bv>AP|J+CTJVrglI9$yJt~0pt?Yd9tJWYhm4YeOQG=pQ3ct3ET zqpF5^i@|ZCPif2;sxI|s!FJu}tM|aYsqd<2Sk27=wa#Vul+O1;+YX}82+9v4O7u=t z*08*sT`z{M^AT1W?f!YCl7?*@;g--h(?_5mR{*vNa}Euz zeCz-1ZqvD^(G$47d;VNLs&KsaJkM84wo&|GzB%$R-VRhD9W`upUbsufK-zZ*!}>g$D47YI|7CcC>miWffr8<>BL{jWGpK zw^f~`pQb%bI*G4`>2&!m5g3_3)W`8ROWsf_A>IvrXz2?1&V|6 z#{DPY4acTDBDPs6FfnA|BT&2lzF3%f?YWxO=%q>=u*SPI&U~SkY@_&r0}JEp^3Z70!52NCZl$gdK6mQiEnr*zK812+&55sBBQ>nk zuGU$LJ9!J$(ke_{76#6B$^Ks|DdI@lbZ;plg6m6sUfrB>41Ajg&(y)U&%DY^WsZTsJPJ*W-%m0R+>Z_HV!VK=pY<*`Xqq9-iSu#mT_FNl65#X)&< z6^RAjaBRvWpPvS7twUk<_otUY?XKw5EZ~5JhCM796c^&?B@|77NsBhJZ4^InV2M0% zUD0-pD_jn5++^D*e&C=j987#)#EU_C zwJ655UgzC}Z2089e}#sb8&uu5;gCDO9>k9g;-62d#p=IRqgbAP*p$t`-2`mQ^(#(tP%oV&2$VlTJ%Ric>Nu`a%~D!2=my$0zA6=~S*_?(4L7Pt!=7p{t~ z{Y%5rO8QpbmhLVz-r1(cneQ4_(Rfl;@15?#P{TC)WGJ6_*rtqfx4XcsH}4ZCjOH%;vC<(UmzY67;MNyXpxJG%*pb@H=wOOHVLWYM;52~DHe)vdK>xO%t= z<~|iaRfBSJ|FI2P`nU-rtPAERR*zyKS{^A6^tu@;z7XNMLEGtdRM@A+tz&SWcU2l{ zU{c^F*jz3*Q#{W$%6^%RfalWrVGV<~>BSG%{{OsD{(#!{aB%;LKa*zrl+HcB?kHS1 z)qC5(iatz~AI{#|=|9iM&&Rg6-U@XG9IrcVkC~N#Z4JZ#70T_)HO$PP<--iJ2QQ58 zVkMM&aI5BWD32c(KJjaJD`CQpZ9YY#ec0!Sz#BfxIthMpvzJYWa`Z&ww?1n+3AUo( zZj?N>|1z^LFQELY_ejrKfCaJf^Er{N>1u><&Uiw?QF|puGG~i!;_a9R!Ml^0vAs z5COd5*px>J+n%6rhl)D>pWQjfQka#M59{CP$mX_*9RxmK3uRp2LGc3z9g&BUW`Dnr zsuUp2gR3Wk8r0uknyc33!L5Jo*chmb`YLRT^WYPD-k7*bKY;bn>Z?ptldnIj7$E7Z z`b6(Ey7k_+fsmzhoQ554~8y!S9Qz&R~UUfHY5TDhg9a z`~PR29OrGCBzuZ`CV&U;VKA9JrM{J!IvZOU=bSG~xrl{L0y0PCkr z;QA_xgYx!xICzM_8_v;F9uXVP%Tv2eEB$C-z0{x0X_~1y33`40y?U9yWE;f~9CSh+ zc;CGPq`7MTU3!VXG#9RWJ-q4A@BVD4+a+YSrKqpMwm8pzfW7X2$UcBgXtStBHt6+h zH7^LN2j%Q>w*KNgJHK$?d?a&T_NPy79)xo}+`I>#LF>k4&0-srPM)azqgoQL1&-O^xf zSO5$A)8?whGY8?<=$>OQh6XUz_q+8)eHFz)d260BYIq~y4d>x0kBAND;i=t$f6|I; z-u7pj3E_uajyeeZy9<<2Uq$f)2VIZ{@l$iX_yB2cd$hN4y+OD9rMYrl9#-EwKcObn z-Lvi7Fi~HHZE+r6|Jk>a9%BPo_u+;o4_Z11L$0bW>>LxoE}AuVj{%Jm#X)&{ri2w< zz#GofQyvi;&dXD~i;VZToBze171*4qKBa;~sZRZ?zhoQ54;)w{4_9G{TKh!-(%d?~ zM+^O>xpG||M%5drv4pyt+AA)K`YLRT^YEF6hu+w*Fo2DIHgSzr9O&<(E*sul5WpTA z^*UMv^YRo2<&D38fj87wQ63Q+&f8PFhnd($xYP__Ap^!2uLb=+Uk?Ih)K^jbz(H5! zAq(8Mm0JTOjS`PZ3<4w#lr9fvOs2#vgSssaok|hcEnr*JSDg!+ZB)D^fE5I5h6Eh7 z7l!Xl+UEx41MzA3YS8{u9F#XMl>y#Re?@sjY^cAYcDrWZKmMdm02@$#{!AsTTX+uc zUYezB8^sSCbVDA@#Yvqr0g{G^f76G)boE#GzPRo?%D6w&ZF8zeo~XaVwy3{KUR1*| z<8%PCy4c?P59qJ{%ztCi49XooT;}>Kii7gT*BJnBI5y=Gv7tVT+THPUntQf$0IR6g zXNgM~C%DMAQT)Jx4f0UBb|gU3IB~56^o1HK-Rt4=llM(CAA$d{D!;0z$HKO#&w4Ry z+9yN!-R41&*$xju$f~F`k9r8a85n-O1R5xcgYxDgP67dMI5y=Gv7vs8+U;0F?`I=b z05e&9y1I|KAc#{*yP=HQGm0NL=#D&a4bQg#Npr>Lu=)i^8Y*2LLMyj72!pzfimGf* z0R0uVMLm{H-Fo{^d<|d@OL{!-Vqqs(-8^N}63SouUA&S38Y+r|@-}?y><0sYH`I<% z9uXVrxv1TnXUu4@BLV!CFlCK7peL)DV7;YRqHG(*4;=JB9w5?ux2jrSN&m%T6nH1- z>c8;5sPiSuF&*kw+*rTZ4)j~t7WG{3o;TcA6MhfLUedYxA<$g4cON{kYG3AW*6C^# zXs{>_%3Bu97nlKWI5y=Gv7sJ}+8ycmXmt!c$G0{gzir+qTY>whsR5F06hCm#6L~0o z#|BHuiMcq}z*AtZ{-U?t=L*24lJM1k0-4(V)-Idb8=x4!?VD{UgY;N`8 zE<$vc>PYnWU^f5Ghf`|MT~QpAw=try4tPV|73C4Jq3(*>-SuGbU9SkRrB$mQkKJ7a zK5y4gwvFNk4tgOEqKx%p{SZl;#OE5JFVsHi@^HX4=+k?si@Gapi@K{`1Fe=b_}y&E z;uGsYceOpibGd815Vp)jF-_E6Q5=*vak=Jb;0?#7JR&yKT~WJZYwp;UKP8x%6qo9* zxUO$%uw)y>4;SAE_Nt*FrYk%?xIp=QtnndFUi?o;CzaSP?tMyA&VvD!{?* z6sL5}{X?Xfi_b}|50+v&T{~yTL|<(Tbv1K8z3C0{B(}wP^7ika?{@o!uzP*mM|z3t zbDD2zuo}ug%naY3hcf>z-z7xqo8rQ;rFfCpX*uWhqU5u|tX;yD=jTAz#r=hI!4h_g z4{WrDgU8L{C)T~gLZq0AYh%s?OEH`7bzLpG>BTKjH#|I(>$0#d#*0DEzn0w;8^Y>p zby?4B|32ypWz=O+9F!M+et{Q^7b#DO4dX>>_eq10jwzY=`LS58tVOxwB*R}J={5?B_)zRv<+Bx@n z|7KPQTcDi9bz2k%<&A%T0p4(I$|JQCv10sKxA@@cFM1(tab)5C+Xoy<*N0S)ZKL>s z0|z+xbrhU?l8SlFW$Y9m*yxCMa!p5eh!it%?G^NeF`2HN zExp4o`$FBNd1-E9Jcw;E9*hePI=_^KFw5-vuG?U}&D&p_w*5X4!oIE9RYP2FLvfVq zPvo(v9f$?vzfsxU49fNjVHMunx3L197=OOo$=E4Au+a(aTp9zrliUxH;ywN?$2LTY zxpeL9Fu%Go=3WS^7UtG865==7FSQfKfUg5`U({|K3*$FxM|g7< zN2=!rD34mGF`EKw=qWz1(FN_~bLmw>rI<#P5%d`uBE?L)cBUTw<1@8NC@b&$`q~AE zzi7YIPQFeA?8Nws+Kppj{6%rk_>ej`DJ6uJw3t2cB&;)OA}*ku7b0Or>=-Xnd|;z1 z+W8Ka!%b@uD#c^`T@HO?Or&e4MZHgsQ=snoFD}CqAs)lF7>`w2`*MT@{64)|^W36V zcEXrGb-VXz9?BlN&iJ$%;xYc*0G_aKii_HXSTO!-_WX%q?M)%WegzUFaJPx)b0`}Lqlt5^K{(NPs!7xm1$;RZ$N zz7|Jhu9o_-inXC!G{(|u1!(KsmlZj81N&UpocE{&`Y@}Le^zdp3p&u&v)n{oqWLhR zNYe-v^L=~l{Vf;|xkr6iP=r&^$aX3=YYltcE}z_&qdG zC26$8^N4!MCe96v+Cbg<4k;~*Kp%!}Q6Dzo@7H{xc_jPV$g2JAUp~T;XQ_2Bm_#!5 z$1}#`L8nB|1Ld9D@mU_5o`*(DWci-D6*Km5oXGR3iHRrjK_52yvUTRbHE^CAJZLl(^kMvYzE&l1 zL+uA@+~MHm9BkII+4~Px*VbtzNNlmJCxUOQo zj{UY62>LL7o^Plm+bDkEzyo>k7lY@Kk&=druPM6^lI$$%@^_tQ5K{?Qa zVO!LPxgYGYASo`A-Kq9z_)E}-ojJa7BO4LPYS-_(av5l>C=SXSk6(c|9Gmip*iauv z?e?wSx=XWS6%$fNk1;a$Db?qes3hAce&E0pd4PW*){B%hRD2HLuS(KriT4%GM@?H@ zJMsOJNVebk(MM4qCN>q@qCPCH^L$q$_p|UfioN@rfgTLoqCTu*NY9q-Z$+{#9riw%GQ~@1u=(2b z={F-;)ZwaShk!ne;-I|obxFV*>cc3Hhz<2&)b0!Y&HEo18p)<6rzJ&$K8*VnF|utG zKXBlKJa8ZKbEKpXn}+>cc3Hhz<2&)NbQxCBE)+BiX~*7Z*KV zGi?Oym^g0g*CrbQQs@8ne+M?QBz^P2E{>nrwjcV{;@EKf%ud>>>~7 ze{j8CGd0^%7-l6Q#es{vM=Ah#*HnH3` z&_n1IRQ{Y1l=ly7aw&GGhp?^KxNS8k?~OF8mJ4O<|3b>^OiFEySODdR)? zff_&Lk!xvhX{5Cte6GAiUQ43OquYdvn>;~B`5>nJDskOQ^m3mncR+7f?@yeq=wAxI zYlXIezR-WgzWH+){MqbviwAwWp^^OA=s)*IwYm;EOUrtU^ttM-j=BewuJX7FN)PzpaNMrGP0eHi>rt(VtX4m1o z;ChD=pNjSsKg#{hx_m8c31G#z2C-vYW47kw<=Dzm?20z7c~Nb?ccFfi6xUdu9((Rm znJ5YWq)DcGr{!x{HDw$WYv^@BuLC|04%Y#$d!g5bxNV7eU0iW- z|6vGm&6Kt&b55?;uqVRdJn_1lTcv1P*U=i*T{|9fk8uq>&y;^SeF1P}i06@G=^iEd z?tCtKlIEXr&CnY=lWn0c#x>X$sZUF54%Q5aSc5Wly|P{tP~}U zP0z#Nmd&sI{Q~D*8`oGhYp>r2c*3|Q$7?q0SO7iD>M2&Q<6cr`KMR!O0Eo`odV!_%th_4$`M@eH} z%<200Wh9&zZCr%d(4TK>urHza8;umltX(66b^yg1yn;o!#-{Z9Y{EL|T4|{HllKgp!gYw4L z{{V03&r==|8~XFq?!5O6cct1!u^_EK&;7w(vTYPUaG*pUxc2Qzl;qFzZyG(LB%fNB zhk$m5$97S!YF!>;2AeId2X)b( z$F}ItEAwwGw0{@HR%`wFfz|`RFDrbK+7|AECZ@llS<){#f6m3*z_Al=^4A#&#C=ERxIp({{T@7`O?c7FQlw zCPcH9Uwy}kYYE&2G<0eW<#H`bvK~R1$Ncfp4Dp#gHYsQZi+yuQ&@h$pvh-O6_r^OA1wGI4Swv1-c;wIh3Zo+Ge`U`ut zie~kKKEG}ZYbCHf+RKT6z5mgkZhJ>2ogNV_M5G<)8n>WN={FQ8`=vEY83kqKSv|da zanV9awa6}OQ~L-NuN-cA9Lf{ZzkPeMT(Vynq_rRWC;Uj`!SA7P@OK|Ulwx|_px~_^}WB46&2_#L9e(C9@ka7w9=r}9%W;@u2 zoRzIy0W>!~p2g%hf^xSw>rFpfy9&c#Yq|z)aymupl3$IAQd^K}9H zeI&hxE)Nqj{vN*tbupI!wneSTh29^YRfgYfw0pm%nmk?5B}O4>>M0J&8~4G0H`Izy z9uXUAMX24k6uV}GJ@8>)w7CQZd45Upd*~x+MJRsYARKw%`(dWY`|XQciA}lhBk4PI zdB{zRs2dM;acu&&MXg4qAx%v;&s8uZ?b?Kq30KDE&rwLZ1Sk&58=tcR-cVaYc|>f0 zooh>|-R?6E=q>&0!)&#=1o+;L#Xgd@gyM%@BH-Zng(&~7zE$3DpX=oQ_(*yWT^`Q) zelXRCx|mA<+oG1F_6k*ja*Kj>(&iHQlPsvGWGSRv0u%@3EppB6&L4p{T$@07L~N)v zp>_w1sd>uISRw8G%K2<6+eYyN2mO!-t{*-R`8l+?1Zo%BUx{w=?_2^wucoD@L0!xx zfNfE0a`R~7@l)p%tWtbwE`cIP{W{H{jJX6T4$9lMfuOPm-cV~oc|>feHKBI@ys@{^ z30noc2TJ#T<=UcNvTYPUaG*jS_`cL(@_yRl7W!qE_5G)|uwU{AuV10C2eme53bsXU z;rPTsPn*E+p4z>_YghU5xpPP$TcypJ(sJ6Q1OD9unfu2nMvq}nx4PY(7f-hdWaXbd z*I>`MzM(Z`dpRt$c*nXSu1(<7GYMJofP_XW1a_r;*FCafL& zZmWG?bna0zHaI;{+AEympuF+9c;F4+7nDcDhVKh%_sAi6QIjhNO1WpM3}`-NY!%ry ziXS-8AP-P8A@iubpLQ~&Gul+?-+P6}>`EzI33c&(fo<`9aWdJfeD0AzHj0(zo~gaK zIG{R|G4~9`L3!hQ+W~L*zMwoJHo(r`7u4>8wXdEYZ3E{?`@XQ~4r&2Q**1zFdWnLA zzc09s?0%r6--A`6Ij1cCy;t~vp-uEGp)S5Jur0nX4(*$Ne*C>aR$cqPNM92xY`PmL zy)P&Z%3C>c5hC!0?+eN!V#D_Zwfoxwy9|v-pp<)t@2})3+eYyN2hqp_?51sc@_V54 zzSvU@l=1F?(lcB4zW7yU-|!<)7vC4y7T*_h?qsdV{}#xGXmigjkFfsi1Z8|*P#lyu zF%`rP;0@mwlt;vd?+a@8vJcN+J|7$?t!3kLPeWweD1P9eKk@)si@pwl{nc^3*W+@K zj0pNdeV{H6tpfeMWl9bKk;Qs?5sdGLS34Brg9_LeSEgIKiE@_;s*`}AP*fx$DysfpEm5` zYrA45^!1P2Gb20cEw+cc7TVl1*cP>kndg%yt+!ONFWTHQxv6>)d2Qsq!YK~Q8-FhX zk2p5v5wW3mk=pGXbvN(h#z2;>%{{}{E^P{wbe9xAa4-;g$n+9wt6b#$w8az~b)hd^ z?IL~?ZxC3gS}4@T+%eb|wTmUK^ID`kD_I?FE}8XJtC&}SGUkq_sf|;Nta3S0|$eUhl#(t56kT*@3##|?AD(O{ExN~ zd8p`fw`>m7#T-1?7PW;BO@?>+6rp5&vP*ODJWmu3CPEo=@K79-x9gBgEb{CW%2?mKPug<5%m?7+R!96SdF*u4PCsD-5XfrG)w1Habd<^8z%{NR&7NuQ|u zeE1dqrrA=cyFr_S2iv0du~yIRakb(1bJ`p{PETx)`;1jeId~`z${SyM2E3s*lJbby zP#a0@K3;gY+UKG`DF;vK-W`FGu9M;i4u&8Pe_`vKI}7Cfy1AYa`a(UUE)R7r2Clma zb#W~lwnc5?SXYy?cJO;h!fDH+9)j?G(UlXa^ObCFhI0x$bT-W1k*|05YCuc`2u=%(N_Ww>TUCS0=Y`jff zd@UQrL3zuCv~hh!9<}?ZQyvi;u4SWkpUl5d=LI~+Eth)yo;BQ7;QpzrY#YT79K<3I zm26i8tcLs`xZgP6#})e0-3uMBhm<#6Q_`Vs(vLNhufpEv*cSIfUwCu9ep&dv;-66G z%YAwZ9Y!zqj)j~V-==LQyvi;?wwBU?)}4Mz5_hRx4e6{ zU}L$S0@qqV8EvEZfrDYl1Aj(;P?ml_oZVgEd(gqN>NlkQC*`$gRa?w>-Lf7=7wD1P8zxX6V#$Iic_ z>IE^Lx2K^^>39AyO2+530K3RTRXD`Ily2rNYeU@!?j{d?!);3U)`2pgSA;Shhw{=K zwkOF0Ua)V<6W18P*kbC`W~;gj)UKIcnU{vngPbp#7x%JT*$9(dAv+yBPY^rB2ard= z!Q-gjAlGX?Ti##3bX`>FKj&E@jn@8Z26bChPB9Vp&d0Vm&l2y^$HoAD-#9Jz)YyG-8=t8`4Q2h2ZTv$IBkFIU8w0F#$vT=IJp)qTHd$6E_>T~HI#Ahe8h(H zEVJB<$89(t$~I~D&R-vNHRA1uQ1)88cYYykyo;}6Sat2*`8@YcUD-B@A2=9^Jn(gSm*oBB zxgNLnzxU3suw|gD8Pvr!oY)rUSv;4-w)%51jJay}&hNS2xoy&gFcwhF%R^klNpVo# zxL*gn;XDiF5wYPs3$?rNqLRQ1)?w`7wV(ts_bu1sa50NpsiY z62SA&7tVF)-v17JYN@lK?i5?sy5ipX*cRtm=s1)YzP18*!M-U^)DFY~SoyrlXM^)? z41&X8&%XDQE5Vvh&Ks1u#vSq#Q+&{BG#vfJ=Nyl{D#`oX^L;LrVbWZNXeaK?O~-k1 zdwt&>@nLLQxzHusES*X<%ZXvqoDBAj^DJ$~nOr(wF`RY%xt8yhPjT^ik2mtzhy~|O z==cC_3>^G?aUFU~dH;FtQvx=ei_kq^+kUrf@e=Cd-uc)T=RtNlRS16CBAng%Rl0Y6 z`;6+=!=a38G{x;=?tvZHH?@oMf>?0g<5f3Php^>g?3MkkiN?bmOLJR58Rz^dKCp2t z9Q=9Bb)=A=0OvWlUT#^KG}oZ}oa&YtUT!|r9Uk@Jhqz}Cw#9jlb|0$!Z3e$rYi+yS zC6KqqW}c&kEq?;S1OtDirgzrg!lo3D-H0~-_3PQD*RO1Kou$BF{% z&tcMBfv%l>uV0<@4(i_7Q}K$J(+u0oQI%xbv@lPc3X{b zcIx%(sg|%Nlke3DWt@kg_`t?-XeYmRHik>FIzLC%!==Yl^I35oEoZ`>A&T>8hzOHdcKS?hm+9}0TDP_j4fV!>s7V}&Q*cP=q zgSs9!+XJ}?I<;)rk3p`4RMSptdO+Ez@9EsrpuOMK)O2yh1}fHfbH)d84MT3vKSMeh zso1cQcczbqH4A+0YJE9Z!f17))R|DmH4KOiwK`4RH|!1F*^kAaG5j+GawUwn>l5L5 zwI92y)!y^>1(Z>%lS1%=X%mo#Qf+~pPoPwn)=$#E>++Cu;pSmyFBSWs&6R*{QG4&O zbY{6|PZisv&6RNGZT#vI4>?x?#X))F_apFz+Iz|)VngjcwfkzQWv}7y`!U-krMVLL z_r(wWB<(%L4;)N{BSZ8v_*_o3O46Hi4bQuNlAc}U0k!wdHy3X`t%3Dy?Uv?>+WUrG zGn5X{_K#M3kA0!`KI_fz=(B_;tJ9u;2qo0v1aHI1^ z4QQYjmYAO~g}!cYDM%1=PGH}ty>Hzt{`H}Hum}G5PDe(uCRR1`;n}vHs=InHLf+~u8g&xvvs07lriT7+Jf5q=~Z6M z$#;acN7|ed=lY*~aehW5bJyma;F{W5a?Xi<)jPbcwJ(yTjgL1F*UD1+fx{#?^2Fl8iZJPWq#dL$HZI% zM{icWS=CC-Y|MWK|8!CabyQ)emH}2X^M&=s!47Yk^bgv(K_DN@6M@l&-3QjvLv^f+hY44GH)ZWwcO!;sX__O>}q@;h>eXi_xG|v>E?za-_A3tDCF}6kR{kjvXMs>!i*^A7* z&x0Z7!~vVjHZK$8oD=jsP~Q2xoV%QJf}V$3Gp&O4y2?2x28@8E~}-S6AurJYH<6KTFN;=aZuj)nlH!|gX#F1L=4P8E|r8Ni?2jz|XTfiHxL7+S$He7>1?Vfw}McAiF zYL;hPnsb7$NlB7zqxgY?WaNS8TdXDLW#H?mCjL9;gztLenSM|=I`G$l_OPZH+u|C8 zFEazS3^0T>M?0RHZ|>qPTze4|bD$>VsOV{EI}+9uQyi2xzBUqg!!-z$N5qC}5UAbi z^{2bff%~wTHs=J_an0e=0cmF-#{><`v{S`bQ492Ff9=gP<+WbHLRd zYgnTel>2LQz)>8OHy$SeZ#XvP5wT%hL+x(+_SDuXdK>yQY}Wb5xLRqxgY?S#U7% zy~)>$F48bQ7nbN=y02VQ4deS)LF^&EH!o~1FgqP?V72OyXuTXP%Fs&LHPq}XXAO`-#9kQc|1zjfXI19bkD=YCbjQ0g1YZBzf{}~ zIZv=H=0JHrx7x#3n_zA2pU0PH{dE&Y?LB))56V~iwXIqM@_3XezP}y?Yl??khHuIP z+`Tj&=Q^ap_5FL;4KW7_#YxXApTCFmiuqaSdFJyhz(a;M2g-nM*Pr^t$vIF`J$9K+ zRmnL}&{oWWLV3TiN%NN2q{elI97=%|?<;m>YfTt0AvkCV{k>-3`C;r?vUqQ!O2!Jd%&gO=3I>g~@S zOlfqdB7Ar06fnD$9+cyd-|&;ZZ&3We!Cd5l$9m)Befaq| zv1@+PHv?UN`bMMIi_bybf>#sPh&cwZEq;%PG0!{M5`K5{Y&|P^nWGTW%G*^jrv~tb-zzAOhz-9_P`k?-cbN0xGw`6*@pJ#9&`wXOzI0%Z< zJ{1af`7^y<4Q~&H?;8{cxFeTt2?4;{d3OIR>yTe$Qy8UYGIgl0O^u zv^2-S#L#k244{1BXK$Wkfa0LM^>+l10eHi)DUXN^zjsi((}EJ8d%3~$W_`|u&tAQS z%wyU0P2FYND1P8z0rJ3ovseDoH-j}lJHFc9$6xv;q07U}i7hwXfV!y5#J2doqlxe7Y9n>8hcZw8Jj#Q*i7^pL zVvYf9i@MDBonQP-h2Ov3Yko2rQiCbVFB!GEUI4RPHEyb?%cM9cZ>zU#yBh(#;n0q3Au7QI|>a0|%+d1J}N_m-penn-u4qy4+vV zM(XmgZD^vBXaeRN*#11#vb|UB2au_<5E1|AI5>2&PvWVQ2MPtNYcXTp6?)^@z$PDH*R5| z>NV)_ur2EFdYP<$xuZic>!)`)u?6VvekV0mu7h%!8XG(ZK$+s8yol)nhX60A!=pSQ zHq_x!JMHHcyUtk_B;``zIigoUdEeA=R~=jgiXS*whCGzUWOBZNQY}r8q=nPv;bhal z{o42jOFBGki#oh7Rm=A(1HWI<>hP|te(yBRCz!F2$L^vIkK&-baors7hB`dTBVt1x z9<@9EwQqCN<3Um`1wL;D<&ok-h!kgm;s*|v!@;k69@`F+^AqrWG>-;JS~-yiyzUck z*YdWF4VH2#(0(ZoeBKXuK;0eX1;>IRxXzB+F>F(bQN6q%)cmh##{<~z8rYM^@dHHwO99(rT4=w`gV|keE`@h>)_9GA zGV1gw4$2EZzrYK|#gr$+hH){qd$^4%W{^oRyWDW@+=5s~f$J5aj5(!RM!T6uc^(Y(5=a{Ff+(cfw>eY4$2$< zE(E+`TugaHY#0|)yNz#!cIjvr%$90%DR3PLl+iYdA2?VA2fyz5*iVBctsI}5v<;SG zQ{DSJBd7hZAy5}{DPUWSi|II&7yjM?UNG*ZJmJ_F_forZI$oOU7!oYyQsDD6p)z)g z4{ThGc5)5U?_eqJuX-5U zKpAr>P#jzbpC^w+?LaIT=T2PuaAL2q!BQ>-{(O&@u~U3tV+PvE_c5v?=O^I#;>QF_ zv8Aq^M~&B?-%>k-c{yb9Tne;bYA4^H1nk5(mD-JCVVp|sc%L((aQ32LcJ}+BEVFcb zfn!Y#mf}>zj&U``2R5!jJNdQJR?bhr_tS^IF?Q6oGwsRV3zwm8C2cMRY>RPg&*g!) zi!DRgbI;OT3I-N>>KRb}tbERMDe&)lz!UaOaZ$Sv3&yE-9!EpQZU^2fM{^w?&pEIo zSc!q`yP&Q%LCva=mSn4UJ50_~UD$@4LTo!A$(o8rc? zG49-S^PqZKPB7b7{a}@sW!CvkYsWwD>*bo%B8@c8^9CBl@u3_jd3Nlv(4ct zlYO2AOSu&IJjio+E@^WqP<&wHdN?w~?^S$F9FzB%=ieeK%KPApcA^e%ZO6e^`#@dX zs}kFyUgmwi^O}egDrv9E#<{O9=O0r^dsVVFb2hG*lOHy$k^s3Ew0l+dpZLZn0Q8Hfmnoiavp3g5-h&@)LA^}Mf{oh+ z(A{BfhcD`CmVwY7^)kN?&Ip^)OU3fEdsXuHy`9VrwI8U-goFDr{M%Q7yiYyP18e*5 zJ@|Wno@`wQ>RxI7&`jKeAKRi{W+a=o*7B{2RSF$sehc>CFJoS4R+taGYWJF?I4JL2 z4+A+Ma1VZZ9uOPqWvJalhQ^-uidC_8+CBLBc^)R)M)3m&8;}S7t;$f|gWplSnTP&+ z5B_blqU_E=-RK#;4JyDM{MZ)tGVN|1Z}ProBpVRddR;5ngTHgwn@M*KB3a1qxwDpd z`Un&U<&Ecfg4`LXm!Ui&Hq^^dyYKu=cyG5t#q70v@N>Oly1WNJ#Sa{8L>~Az#4hqa z^<1O6{NH=Sca5eO9JxXV4pJ_v~zNIn}gJCwUKkii7gT_a+72 zP%lGyL~N*+p>~^i+$zdArD7J^Jv;e)z-g7VA1TET9Be`!cq|Yg?^Dm$LY!1dnicW+ zg7dk5J^eR_LS6l?4?cGSy$rTRz08x!A%`ydN3wCdT4ybWJ^06tKJR+iFOpr;?%7Fk zP~Ny73cR6ShVqEmP%lI6ZnC#^>-TUUCTjQK=X>_U^@VyFiXTkdj6CprJWk%Ho@@DD zLtdJH=w(bUM|Ertb&U(gS(<`=9owQ_W?pi^l5LPv;Y!W3$wo9Aw5n_y#Sa{0ArD;pnj-I0 z&-at9EboJ_dp*Rw`quOS)WtnJu`TLlo_QT!)d_w-rrm>o@dNWV3fKc5_u!{EC~sW% z1-zkNhVqEmP%lI6UN=lpb&!R;2S3j-)<(9C;s*}4z|l{9@A5fM3r*=eI;XS1wY#8~ z;n(OqYpxT)b#`w*8HF_hT~4E^b{#u*w-!E3zcubQ=;xdU1>e7VyR*RkV9?v~_dWE5 zeVe|Rvg&e`E`rh4Q%jzV#AmH)4{Gy$NsdC`*N(TEeVPUvQTh1(XMMO~NZgszP#5=N z$F^g{4R>H~_LG%6_4V#7@6B!%Jl&-2`ctal`=7IQdZ-~2fUW#@UXn;Q(5_rM=| zX>(i0SkTdEb2xcqu5U6B`d>Y$Zr56{2R_A3dFJy6z_TdT^8X7tp9t@03-?c94}8kg z$Q|n<9z9b_I(+WOzmV4?Jk)ni?P#E3Ep|WZCF;wm{XpM#IPydu`98TD<$dY-T=i2~ zTP{8y8o(j`d9(>AdG%e^vDf%>@W-C=8Y#C|z5XA{Y^|hWi?q4D9vj;=@q_Ce{Xae* zXCjZqbGia=xCj2c8A10x!FfUb_0*oqUoGdzd*IhR)S}*gz>0g|BX-o`@0(+voU%b9 z<@Or*^QpdJCbZY)_S&`M!o-4=@*e(!Z?g)S(=<|Uug+}ai3v&a9{BXSpw|J{oWXT~ zI(&LvnACbPRJ<-whp*q`)#-=!pk37F_WGM&c4lC04fE6H_CoGahfmKl9juHn-Q1kO@71-_%HP1w9Xx zcWwvp&UNH|3W1)7PX;4rHeMj_fq$S{(uw}S6UG%@#@CuZ1)gvZe6$7Q3VK~Rd|tO) zitGQ~b`R=eT!C#du9y(ncTS1f z|M>a}u&kEvYiw-A?iK^ZLIp(+VggEcr`_F+-F0p3PV5}(+6lHIiYVCK@vSqQea?IL zkN@ZM@Z4waJ9B33b#l+_y;rp8ruh{UUW|GBqI$Fl()8Bfo;%-TQ8juJQMxbZsyB zU3x!C_bWJla1c*C$eNoyjC%PpNB3@&{%vo{L#O*MOY}xxbIq@yvE*0SE&5=Y)ID0* zYktMqY+?H^bu+X8I1bL6{N4g@wv8>MRj$o&OeU*uPC{NP{@ z@gTot)){r;3f>S()YB(1NZ!G@w>0~J^i$cSR9EO^z=R5 zC*b$WM$ph8za**1_?!Y-5Q&-}BIEEU8*{&l-?w-d+T@oIHu|1knOt~gS|y|Q|Ht2H zLq|9mHSlHLNGroOjvpNC#h*~0lX6d@M2tWMeD@B;oD{j{y9IM697!pP7+sYIin~s2 zwj<^{YV~`M+?da+j+3&x>T;zu#{N3%VZ9&q>FHRgCijn8zrJ9ztfh7pj}fO2Y?@Wu z1@-O6+g4G~j_o|8R8`cgU)Qz#>FwEL^my7SQ9W1V#3GZ%|f9a&`%er!YH)t%3 zu+KTa4B~Ccw!OtrN%Xpt(*546Ev_o8!=E z$UA+OJM8UIysK02$!JkbtB)`D_s}Np{t5TF@B#A|Xt{r6*NRTcU+dX+qmrUUnQP^m z_dvaU&L61Vhd=q=mwWED4DA4Ujh%?rYyGR|fz}hd)xUh-1AS|aoRZ!Pb@pj2)!l!2 z^WKeaHDkove2p!Jqt1S|+D(pzq8*y6pue3m!FCTX0tg16Wwvf&M=TWJf$KoBvFDv_E z$SCM4+&;R-^X=_u(L>W!Sa!PN&VF~Gd8FwyaQi{ce*DSzeu!FRaFJ0HTDB9s{}bOq z>n4M@*?w$s92_7XP`RpU=L1GfXc^1!!&{HR zP_HNApkl+ zM*IiI!FiMMTHuZ1KRAzsjp9GJ-MmBc*A3kKH{^ck0AFqS{t@;{NUga@i1I{d4KWIV=7RQ ze&*9wK6=cC>Gklh|LNm*(URo^8urB{o3!~ok|KMP} zNi8JsM)4z@N5V$&Bi!zeb_09H#ro)Z6&dSsK6+jS#}5vU5D#*Xww_TFI`7`~L(fk1 z(PKDFd2rwKqQzJArFz#imf}awj!y1U6~EWe^btPf>TztWwNdYygLzIWHOY^>ISqOlY|a^`Z>kb9^ZK2)oB?brQg+oot&ZTBr!&8y%zIB#kR z@>uXj@gtl^!bb5U-0s9T?OaEk_7Q=jLJqb&;;6`d?lYP#sCUirgM(wl!(^1R9oyNc z39ZUy+&tx@$9S0XFy};M%5d~u-FNPwr_~)58cXpbiyJNMzA*-M+@I8Iy35K@Njqs# zrYzc*EXz9gLS6;O!FiMKMes)PC7egXM)4)w?tz6nkN)}CN4&Bo_raGZEhOGqvoYt)36_b85K8ec;1%|f4j&n!aUCU497^y=M8p|KQSGO}Zw)15KC zqVjk9Vu=G(H90%u3`hVb!=H@&SHi|dlcIPSbX2Toc822z2ZDHz&-$fCP3S?8l}Y~XqsNSxUJnjM8XeD$zEuvMnt2;_ zq-ZR~e{^wc-nHuzUvb+%s)zRx2j$i3208aFM*Zob3)3P`I4B$k=S}8yf;Wm+;XD#H zidW%wrx$VEl?`+de?5bD!ffV zU&^bXu@s-;@H9nn#qZy>yowc*`cIp_+gH!4;5ayMG7kd0QTz($k+4y`3b)%Pp?+vx z8=R+3^HL`zD;e6yXv_QezM^pa;NT?jfNZl(MK1a3F(I;+a-+=aUH81PqE8|8rFz#i zmf}Z#zw@3q{33K=hGx{eZqq!+jB6Ks#dNLSHOIkulku+LjpAK6kA#ilTe#h)BY%9E z=;|xZeI7f}zXtL?)H<(jhHV@_I5dK+h!X;?=l@EftP~^Y(g0bE`z~M)5D4N5V$& zFWheD;kMmMcl8zZ0{se1M_z^G6K#rr;rPM9Y2rcd+5Pm@a}Q)qg)Xo~t2u4T!|c?+ z1-Z~S=Y|r0)VvBBOL-Ncw=U)wf!gUWPAqDa|De5+Z^Ey`iQj$ode`8@jzM`mB?^?cJ(WYA796vZXLp;b(p~6N@XjMML2k!*RZ7}7* zYjD|)chGla-#zSQeMU17YF?2 z#V_0HRYBsOR{vPW562mES6DQ2-2=3#{xM-Azk5Taldn>;1_>{%{_(bY1Fl)t3=%K2 z`o}WPqn2SC#}5w95)T=Bz(IO$g8W9Qk$L^&CS$&zocIhlwfe_2mi+Em>vKQD@q2j7 zjQYopcQ-$~{4_|ff6Q@k-ef!*cq6}?^GMjp@8))=XDOUk+%-s4PxUNaxlKDo?sKC} zc^4c%I5>wtBMv{OQ6pH^NbDG-`@ZUZL2>xMCPXKDqi^lsJIBsB*fwL|7H#so`8=E# z8GjC5$gk!+(cI)$bGt@(7fS8WFGy_d`!=c79Ifs!+LTwp@d5gI{DrCOr*hAtno%QI z?)ml&`j1~t@d$jLlqM$&w45I#_DpMg*bViDr5$Kf-b2T!`x5?CHR>VLT;zvyTr$2W zyHWp`umGm?qxt;!zZdW)?U1#TTNyQnWi7FJLAr0Mo-eY43%33oBZa+o~N7VbU^J@t$s1LQ|@n7G3pl+7V?An{D5{5fAYGMF+ZV3 z4PaRtBtA&@Elsbx{Ws3dtBk%>znI37Uwf$RrVHgmP={5kUu-*g*ZS?j!FpbVj0c1r zAs`s zd&hA^*8Z2#m+BYOSn^wa&g|?Nir*b;WYjM%9+S2=3^kwaYFys97k(?p!FiGKK;VV^ zR?ZV)Bfpi~ebUcn#QJ7rAm z4x(y!OZIy_iBBi;X(A>G^w+b>X*`3@=M3$DtP?O_F(ZMF3X2116wI4*C_WR z(5CvuvJO9ZBEOU4qPfZMKDuBZyf_W#|Im)lASU(;C!&2OCYs+ zaV+xvOwZ?{Fa3`to(&fHf}LdE13xaeQ^o_qPC6F1o8}@vlG`yXcb1n4?SsWPhquj6 z&v#HH)(!?%!cKlK#|Im)k)86|c^s_!Uh*8Z3)X!*Q#-3wXf^pO`fg0vC+io}Sn?~k z_?({J2*3aN+3VHA91e=V?*+eI$-#R4VwtA^p2%l^PxR( zZj|MC_>CMNY`ji(X5^aST0@18##I$X>efdD>%N<*o#n!}wrKDpSlmoMd#DNW@o6mi zjeH(%cShczacs^5%}stGx1;oxH*dC#3>NE$To~8|exck48)abU_+aA={K@CB%yE5b z)bP#NE6=<>@y@UfccajEXum!+8kX#+&{(SDb#BJnGw)x7iOLrnC(kO=QJJvncg+%L zw^=(mdP1d+%B8rLRtK+#>HE=2?(AD^(584!8F!Al%2dbe*xCF4+W{0lRmZ~TP%#2h#0S?(``XR1%kc_&+_J~8L1X7vibsq4Z-7p-nrMqR)# zy;gIBAv301nuUv-MgA$T)*a*agT6%k36)0~>Y?&@y7N!t|PB>U~4|^4GQ{ z$E2Ze(SoIFwm|(e8cX%i))u%>vKD?{XBizlbDy&^`a+PyS=9A@`{zU7HK>O+rdmXq zmEirBhKrvE&UL0j_k|2YrQjg>WOJ+wJ1+{(_td7<;> z?AgJ00@`#uvX$zgabEwH92vj&c$l!&>Y>T_DG{b~mD{;@P3LD}dOb9{4sLuOKcpP$ zp+$wpPU)Q|Ty%LcdTdqHL*x8`+MD>3*R#F)5ZY_hM$X6q4A)~m)$>66@-05MpV$v^ z)D)lE!S`pUUki*n(Zyd58{7uRqB_xZY>H3a`ODn=UVMbOVAg4PzxD3Q@cZo+x$KS* z4>tr^FW=~{?Ady}X)heVlzmwlpIV^myUpi@;CO2fHcCMp@T)Q3XH*=F`0^sRD>wY< zuE;q1fkvI^e_DHv>Vo#m2Knl?LA)y2Lh-5jzBFl*>j~~htxmMFXVK-)s~L5oWxSQ8 z!A0t~v8(HQM2JONooH@9sJVr|Wc7NMwRT<@wVUPIPu0xpL_4oul=#XvLiE$>MCaMi zs<~*3`qgunUOeaGuB^PcaODAKqfYeJx?QFRMns5eTAgV5zKb;Gy7~BHSa0w~b)xg{ zT0iU?&I{Fvb`ZhSYh8`d>qPf1dZ6+FVC_`9@Xryb6HVADKDBVWX>(_%M~Kr7z5M2* zPV|S%(~1^Ji_q&t`?X2S^DY^A0?VrX3PGJ~@1WEo>&`{!b)q*+YBqNC=?J|}G`}wR zbs+Qga2-&5D!(o!g!nJ(3H#cuI_F}s++Df0cb@Ij2}YghK06mU`T4-Uas?hHOmkO= zdx}rx=b7`5({~$x@_vw5N*i^eEi^E&c~lX@Lcd}{Hu zlzPv=6U8Z$Efk;1udDFUi+ig)Q{LOTrSqmP|Hr5kEuVjjjXKfXesFjPe`D19LDt4> zW7JZXzUQLM>qL+2VV}Py`cj=}8mq^rmVN8v*g8_Qd%1JErle+CAby)-;)A z)KZo)^`DH~GgBUZFa4ai68d&-QmDq{$F2&ErTElIC6yD-Gm)PW7rV8@GgoCn^4H(N zGa^Nm5*r^Ke&MQc9Go|q2ieW26U})fY!si$?Y215xOl_Jh%v{7V{3{$xtBTHq-BMc|Ddy zev0N_zTGip^Z<;dI?r@0@-K%+q;9)ZDoVI(b)Wl9Z~VgkPyQv1CI9l>((p;m@%w4bzr5=E zX~$jEr6>RL+V1aDc7b>DFZp@kyzjqlf7}Z^H`n}2`8-3Lu<`RyOLYKop2@#_Tsv** zO0?;`EqQV5!a}sE-Zt4v{w3$tZGF+o=Way`8_mDmS{9Kuwi%WP zXAUR7;rLWGifbf1vg;od>zb)z+vPEq(W!k$O#Q)ANuccl0bf^rd>+ zG?waZw_X4BWlsDauUtAf3iY-<-dx-~19_r@e#{V0QE&US zWsyAX8%BxmqlPu!je6T0CqJ*!uflnydfWUw*W0!0@)z)Mxc`kYR$HBw#Sz`#^?<&> zn!*XAtHaN>8@8<4nm>_3NlXvhf%(W*s<+K~cb&g)kKKz%JwM>k{9k#ty}`Gl7OyYY zrQSvgdB2=3;ySaTaH80}r_BU&%rbH`a2L4@U zR@zCC^|qcE$M(LJ{^ZNmXod0w#uQ!{)%|R=Lh^xavFR>2mn zZuyq3*|Ns3j#lV8>UOEmx@Tzb)vg=49=yh|J*0iUF&EIL>nPrRpYsRc-(}vjU`6Cv z^tf_j(a1Z|imhMRIyKLNxT8FaFAuxND&Ugpq;vjo4pQ(ZpYQVBF+5t)WBt|QGf$%x z%0*Dm1Fg3dzT2$R9Q3_mKDtSNVHdA&ugHT2XSa^70+?t zh4Me77B+05JP3ZB{93)d)+daoJcuRM*)Dy$9IcdW-|*J;=njhf9z~nD;r4@?r}!JA zKHt?+u`fq>>U+r%YS8>SkN@mTlV9aDrBKXM^rd)j8cX}kD?+XfDvI9+iYJxxnzvQn z_84>CbGWA-@BMS`_JR%}o_gGTdz+OF2L^kJ23nlFjFS!W)aNc#WbTtVv`dFpuTltg z6A2q&%YACM))9Blo%ax*v^e*(!@789d*vaTnPu=@?W%RpPb{zQBv_IdB~$C zA1pCy&CA%MmmdGw*CrlXw^=#3ANo?fH;tuz>jg6`nx9_mDe`Oa-b04IkAL%zr7ResJ)Nc#!9E zzfo&m#vJ9%ypI0OJB?GbpfAOH(^%RU@3q6Kp!Gga(Pqd{zf((ED+8Mxh?>6FQxw0P zB_I)X^f?aBoBS>TZ?tdDc_eJKZ_e!=_;0j-Eh|r9rp0^jziyV4wT@vM#}5vk6A!t+ z$90Z}76I)oAG$a7cXaK{>*!~3co?`1eW{KE ziMEWJvh#5V&;RV3(|zHdH`aVI`quvWGF6TDrm?hdy*$qR(~DQ0qM=Cq>e}wB(U;=AX)NulAAIN9?r}~pVd)=RG#T;UV^XFq zbw<08vwwXx-kam#ya`BtSb;a%cjr73HrjXRc5g48AdXJ(6sZCGrpF=PTgKH*^wi_M zIewV-f_T`pwv$q%l2L13y-i+>_tf{wO?miuWK48H^xaZV$arrWOZ(pcj-Fg_Kt(U{ zCLkl;J1TVaR#CxAEE_Xd#(Q%doVV?FLI=5mH`=G?JQ6nAr{{JjagM*jEgS^&kjau_ErgWvJzISfQ!_CIs7bKzYdaaH=jir70H%00__s8#N zvSrlKueASo{Y6cUI{F+3=gmxYpurpM+jAZX8|~Y3yO&%D7}Mmir%2xx7CSdtQEom* zK^C+r-kakG2d{{SyEp~qJ&aoOvPSzM&;RV((|z$i%gf?r(3j%9X)NvAdsSR*8|&^R z0*7V9d!O&_-tfMgmzbf&dvhF|w{M?JnnBx9i-So+711+GiK+ z>&tb>Tb_DM0>=*yUK0<_{>b|3YSfySYwn4jdd#)y^^h{tDrW-vy4}dWX;{UU3XP?B z@6Jt%Y_q}d6z^UCi>SAxidm#DnyMpus>f(R-VGI~j#zQOva|5BD85yIP=c)n)s0D~R`| zu@vv!rR>iylcC9Qr}6r2@Ex%9ECfhiEaPXFRcv`gF`BZ4WP)zhXJT}AePN0};Qy#jWIG7~`eaC)X zHBPOgPh%f1Q4)F)E>ZUw& z$zHj3C-l8}WZ2|7SM3xUTYf`Y`?_fNnD#~d-R336hwj+ml4z%-4ZiiaD%y9G?2`PR z*eM(b=S|k118+1p=aI14TO}@emcO~e?M|+jb6Zb5Uz$F+;ch+8PLbcIXv=*?*v9dL zgZIS4g6PSS!Kb0q^>_BDpQUY;TRyL>4>$1=PXy6f&q3@9;Ox%y^5o6ZSQOwy+A>bdWoRNm-nvt*hC4f za>VV+9WPNfzenE5W=$22gYyQ@SJ;9#nw#@T*dz~iZ53|!B%j1FsWEtO2i#Cr?QWu& zs|$dg4cj<=aPSd-GX7YujXN8f4GmB4s@rRl-+$^4P~3a->5s}pqi^!EYEoZ<#!?XV~nb*^#z66b>JckZn+xfii<}ds;eThrmPMu3e zd)n4@QeT4O;JnE10`Nk44xA^#MtKh0ZqH(2YlkQLiTbOCPYr0_PQjN?@oqQ$^gIWS z9~^up9^_u=bVGwd=I|!?>A47|JX|^&cfAYxcFZ#6<^tq7&{)cI`1NG{c6$=PRfKPV*;@I~LeF>Ql)Xc!n@xjJ#_>=2qvc`X^ zzn+UAV~%Yz*OwU3E@*3Y^rbus8cXX%gXc6)sr=Mm&!hPAvi0%ZDgJsM1;@d8k$Ujp zh1QEWPlQchFJg9w70U5wLx8{NTP8zaLdJOn8n$u#;NUy)Aoq}=(Lw7)@}1)Euje|L z-rra52YB2-U(%PLv31pj64VR$=fS+#LHPZJrZ16jZ}^ocXm&Ky@+>$G&YO%M0B@9M z!FeQXv|hyR{?sSOuxvy8#aO@oZ#;x!MttBC1@7=X!cKeI9M|*#Wa%-i?_?LKpeZ7a{ zgN?t)PPwP)YG^jdm>(RA)?7^OELYyVXkGNZ9iQ`RBJ%rbEUowOdAQv&-W+z*u{jSk zH?8MzyDAkL@a@eNf06L)`2;KEUC4N}tN!|W4#x)@f8#Gq-3O8Xx)>S_Qe)t6Ond<;-v?gL|2ggl)X?^5pt8uw@W(yD-2WR9t$o=r_ z#<6KGTEE%UrFDb9qXKk23aJkOJ85oiH=iFi{KcQVzwlqYlMRgp`R>h{xjw|w2{zUL zEP~$F{O)~+V?Bk&(t1i|w>n4O#2#C=Jdh%%Po7O`(4>UKehj2UQ*UbIEyK#WH*}iG})IRo#JRaJFo#Qhz z%c8=U=TpX*4-M6QhKxOpkWBrZyY}Yar=TzC>Css7cd{LAe75|MP?25J)5|?*OP7m- zLq*;n9i_h`X$0DD@l!xwxuWg@!zVEk9lKu{jC4Xnx@0-&i z@Oy&h?+l$I<~=_Us_W@-9Go}#y$;@JZq6fNBY%h6UFB}IK9j45iVA;5*Rn+3w|rh$ z8n$u#;2}HlAbs!chNgx5M$KzzW|;EOV9@@rspv~}b!jZcE$q|FRbPl3oXmrMTshc^e({z8EUELgaFH7Eb)y=$%yDquJ6t~QIBy1G7 z%|EQ%dAeLj4+ z-tNv#^rg6E8cT7@UG4Judf@l%T7BWLhG8kaYefjp&8hQ6Vn>DJ;JnH867WWIa~=sB z#VvEYD?hjSQ1)!N7$G)*e z+{`dn_J=`IucO_*#CdAGEa`Q~^)|HSK035Q2#d`3fL;fmA9mz{9r8OzYM|XfthTyH z<=0Tj&{T;O^4kU4AnN&~^{t~tBmc>YzUkp%r(G8}RA_8IHMkJ%+;zSV8h0J>@ZO8e z8g6N*ocLMhz!08u^YN_y0_d$Y}Oszu+AIWb^9o^JuCIC;D~%j zncoXrDBtmreJN3(IV{3d1_}0c< ziOe(SW?x*#?iGHlOsejl5l4$SaH+de)m`x#wlq8zyivYmkpWxg6vTO`HpW=8Vc)Hh8{=K!LP5)n?>pQ zn(t0n96MFn(Bn9`<@?~}=_<5+X z*Cp1bZj{)p<;gbxc4M%59Za-;rNe=N^3aPDcDN-Npqmc{B?FWwqK+hQUevr@Hm4;@8%wc?Q8?duw)`wjB@$jz{g;|C9ghzD7_ zG~LjwkhwUWGS}mHl+8839(_rVgT|8IaOIzimR-`KL|;vh<4v1U9fv)S()|XGgYzcy zU%?ycac~|98~F{~?$4)>Rk|@aO7|P&cjpkpHjW=WsE-VJU&u9(+=gbwr{9S~(g$U( z$C2=_{k0kBOMU~5CBI?MxYo(P%%jC9&2Q-Vc31D0xuSKQKaPX*Ccme^8~F{KN5V#a z1GoEwRS~!HxSmMoPwID|O@0H%4<0Ot2YHV}6NUT+nM=1cO8<5@y)VABarN^=U-BDh zEcp#le-n=-L6_!+<~IynY3^~YnxUt`ad6&%$;=EqlHb62By8k2aJ#Esifo(fc$BX5 zC-Vl;CfhiE@KA(!kZY1A+8wCcKCI|5XivHQD%l?zg{C|>hCXVWhQ8!C&{*;t4xU|e zCJMjr)cl5>;;Z{O=!B5pz;STirqb=5VL7Vc$%p(Kx1))uOesp}QBU7V7yHE3iqQx++ zevgC8tmKMlm(cPlrJoWQt>^ju3|-#3E!u6h`aNU|<@wE@c3}Al*fz)Yd#=+dsE_jW zSl^;0qV+sKdHtYm-D^#jBXwMqO^xep@aY?^=hG2BBhN33nz|>{_pV&~nHH_*uE`o$ z#i5b%bJP19sKui4NO^uYuk8F@9DQ?^AG5g?>d4So%JZvUuS}gg(6Kr4pthOcZf7O3 zjzyWb(6Omrxn|v!s3Y?spy`gw;5}T+Kji0u^Zw{h=a|0W`GQ%sHEJCh8E1nwVf(+% z1LgUd2fu54q@Qu#P8Lq@8;>^Cks(_t&yVvOxua!Bc)e&bWkG{eHODzC2_5kUt8Z|1 z?MFcDFkk3AY58(t*;0EI1U}01ONu?LEFB)L=eKeGa6K2N^B{fCJ%(0{^r72luCvq4 zGE0S$;FWZCnihKdEdpanXNQhO`CoIZdtJX8A1&@{Iy;YhO}ih0HtFot^6oR@@~UVN zpyhv=$M2Zx8W$~+a|MrAb#^?XT4tNOGFnX3bateUJ=&D}7V_ehO-r<$j@o#rIy;-Y z%o8dPl>fCe>+kEa(0$68CEdIn^1sM7%Ky5YFwFc6Y~4R++|i1cos{&ZV_I&-{owGq z@B}q)s$R|`$$5jLMR1FwP1St27wLTlj}DC%>7r|PHGfX#O@$fUmFu{1Tw%;l`E%pT zJo&H;=Rwc2V*KGel)!nA&lOp_?yR9DBcGWend|I4uadPvU-bQIQ+B7SvqNJkzij`E zs0LI1jTX*H+k!?RpKNmRDFa8LO*%XLyz%oSuXmg$$}i*RZAzD!%?IOtBb}Z58}=K9m6B@FRIxa)>C4)+fa-+POI!x{QAt2PdJ zlJ?G9*O$5K-COjzj^8z%o=?u7%@17j76&#BcKiWd9vNS7)tGzUyh49Bqa9SrHGKr) zhg2`JVrICFx7e=5z1?oT)hRZ>TaW*ead?4-Z5%&%C`CLRSvb4kFbhM2MRMrx zt?x;gzOTz?S=6{6`tED_&3r!Mzi2G&hm_ksIbeNZA3bj_b=vUUHwqd0G8_lz?aZIu z!#{vG+7ID85;oco;dW15__R{_QLsC;_3{0P|H`cflA=xVUmQPpC`~-5a$)7_8yYN4 zN{hgXBfa$)EmIyUWc@L{82VDaA&sT@uim#0E^kxMN6gmp4L|+t@@|5)k9b|Yht!wh zI5=;Cty-kr1#j}bi1;whBVhw}86U>&w)GK*^Tm0K%CC;hO@qG7eWZweMVsQoIDYU@ zhIn|C43Vl%h6am_MP2Ew=lq%S@V;WBN~_U#?(CTMtzFtEG?wDS0>+1Y{kNl!*!#qO z<}vp+$^qs3*$QZf`3)Pd<{NSxoVWOVeYZ{nZAdlVM&|ryDKT2^dis3Tl!SQvQZw=6QbLlx&?h9Hg zG?wDWrUzb`7}m{445>Y@zzO6VK09)K@X@Y5;=~AhFE!tg2W7etqGBlRr$12qMwW=L{uUe--4x1;A%GJVE7WSLyBf`JjDWU4i za2%XB8E*pKD1MCdNZ2TTjN5IsXk^&~f4oJQrZ01-JEFb+YPKMLjN=Cn<%tLR?Yhd) zU~yIpg5p>d<7LXjkEdCt??>OJdDCRRA&sT@v95piHZ6tU4|rz1(7d*zGGRtaSD!c^ zk+Ad0{hrX5;W#*NGL90w(cGLz!bb6A-0qhLm$@`4>m%xG`Z99gv7BKW#}6JV5D#+i z?XaQ2BJa^Mnd{4R*yTDd0ev@GyI)l6_|jO4FI(j8^}Zv1k9$0NRfGPml%JjAt%o1- z5f{F-I`RYgh8zdyEu}FY(%_9^+c=MejpEI?-5;H;8XaotBkb0lzwo|OtBktec!vRQuNneJ>QvBJPZ+#oDOY{-D zw0y%h&Rr7TC-~_4G8_lzZ9LM6#(+1PoAXH6R@oQ0P2=s+W%*V`j?3nPG|cE;$W- z85&FRY7?vVC~z=`uQ>hShvg0zMH#)SMcaI6*DhH2((PF16`Ju~SogM1=*X^SjkhlsoS}Ir&fZ(en*Ce(+F*c*xlEGc;JBY+{!M z$D*7+Qy%KCEmkLcc|%`@#!`Ho{mx$rG39(k;QVGOHnmzRIeTyRy;#=Jm*F@#Z!#YY zywTj8N5V$&bKLHAReCq^S%d3U)0dI?Z)lUg495>1suBdwdM3s(E+ zF>a{RPzmKEae-1+H~R0An2+UXur$%8uASf4ErY_ z)yY>Ze>cbLH*)qk4$hm@aRP5NH|LSCQNAI!`(t@`&whAL4_I}--P@ea71c#>z;m5! zczRV)+}W$LeL|G6d?efTlc@x7by9Ci?o#Pq>m5x~?=<-XCZmtAF89I^VuX{yi@95J`*%dZ!Zk`H3 z!hCK2Q<2bh1YJdkB zk0Enulpx(Vl{Jed1?s-9$^*q?+_tGPZ7%xu>yov-sxL!h$?xX#a9-p(D0m^in)5_+ zlV8p4nq;-dd(zfGu~E~P$%s=6)Uk7Xu(KxMhpFo=R_iTz)AVd)ye-;`u+-DI zN*jgagPpa>PPvA>*wAp1F(+>Vb>Gv}&hrH)b$_r3I#=`kY_GwO<;UfAO1@zy`LWz? znv48cZb#AQJ*(a+5G33+y&1W0R?xso*vaqa_+Y0M?3CA~yoRCiLw>FFE%X0Ze}=A0 zKJO*#$7L4O2@=kWd)hqP@2JSJXw$LoIGq3Z6M91VdW~6W1wWYPB0pHh9e_9TV>wR3 zLVhftA9mCsJ7lcg)gV1LNY<3L`mcTr*}-vNdeQLZ<_0sDz{U{VMD8k{8GX~ekh+GcGRWoPUgU+2kE|-{B{L4^6gBo zyP;)zt^I<&qz^-5$*+t}+HGxx-wUkD(1*D^?5T5HT9B>}BjbW#2OXQ+#d#qtTzGOputI-Sg*1S52R$vw@xCgPqo7r(847W@xTRjSd`(d^b}&S6wb)ZIRW`hvCQN zcFOkx?4)CHyJ;@+1Gyb*lh+0o9v>tsX!O_&nThxjzEC$*SZzh z0CWO1Z5xgcb~Ye8<(`;JuAmdf+k3WZ-g1&jObC%;<>-B6B0K9?E>>vPfE%y$L*$#i_$w{Pt7 zea`sFVfwzk>rjVLh0vybd-;ByWMIub!}W;`+O%&^w$Q$PTElDGw_ApZ7Yo-dO2oc> zT;aY2vpa@~^ICmgxi5t_?c48W>a-lEP=cTc{q)d1{wMmBvzi-Vxb%+Z4g?TeLc>lO`7rw7wXo z*XOO<;zr{ohYZ{umo?wybudh?m&v`AI?K#V35xh=}SUuhT_s?%<9O@b$CuDFR`D4{0NFmzt*Z_ zt}+cHgx`j?8={uDD@$)(vW~*BNQZ}xO>ybgX#wJ!TZBl|ba>3pE_Uti8X=N4Wa#kN z{!{qGn7wLz_!Z*PW&VtXp~F+$tV+JI zXxndhw^ifP$rkx8xa_VBd~|f-i&5dC!#`Cf@3`o$TsSe}XO2_hBKTz&zb8Z8GxAVQ zhwEH)dzuj7_BLEB)O2{b{ot_)cuZEGD>8O%lA(c1gdPq)f* zJ6t?o8S_0N#9gsNTdMfKHG&z4QEnNHwO*!`!apc|Y+dbR&H(cZ` z^*Lc2;?iY&oS89KE;RuQyluJSSYktccjdnQv6V$}Ue=vlI>o7xyJGP6jFU6(P zSc*%pSaw+A+%ljK~>HIvD3A=T{p|7FC)6ca) zwwd6G;?kEU*raC%Pn4fPwoqI;zpkEjdugfid}{RY<7p{wO4~`Pt&Z3re`=4F%Xq}4 z%lC9+gBxx?c(etNW7PXWY7aayv})utyHVykJX!r_ooR=@q{BmFDK5R0xkJH-#}UF` z)8T0pSS_#`bf73Mo#WuV%Xn56J73>zZ1< z!!xW?+LR)>ke}i;y!WDxZVJc2d6Vn*;Em?yJQ6mFOXqfHvD#j-|J(@SHGk0Hk~Q2i z;)CWHIy@XdcxVQ*II-oMCam34V= zeNkLG#}6Lti3b@&+Rf0ak!zqyc&E4{3Iw^hraavIRkWB7`jQS0jitDBG4w>ivCyL; z9iHoFefr<+V(9R29Go}#dUSizq>oI6=)8f+476=NN zfV@xA;o&$qZ&D`(yir^_=aH~cTspV=_L>PTmRE`tcQw6JnGaOi(Ba|u!9z>pK|WJg z8d^1St+k?|g=5M??A_UB3(=S2#Az()@T|;v_f=*5PX4EV>E~|kR{)0=C(dzj-elep zcq1Ji&Ld$X9Ug9XtrMNgooHd`@W}YqmWFK{KX_2UgZ%!Gw(K{wXXHB@$0BVTl?U?k zWuCx3gj+^r={Y&snSUK-x3;Fq+r#~yqM->mib6@lG<$Z)U`T6GW2gUpCi4?arKcDcC z{ov67^a%C)L$29hF|=jm9+Q9OIy~#DUzkw}eaX+KvE=6uofO|P5ISI_^Xlc;w#&Cm zk-83#_X_7$*T6gZ`TRU^-kYDAHMl=`CO=<3&(J16|NlA<Za;W*1dn6X>)99OZ!Py5+8pv5CR^sZ zBdhDTZCDX~DGr#%(thNFoH<*&?eh>NPh{whv@f=A;jFzLy6%W)v!@S6`~zK(d>>>y z^@@jyX1f-7h~}E^NK#*X87(yC?sE9-jt6Ly?g(K6Y`K5A@ztXB*|U3y&zkN?lPW7J zx2*0V4&}-H~gREXUSL@X&QfI1bL6+~)>w6bHQ2aVSjzxQzrteGT=iC+c=xY-=O~wJ!SlW-Q zoekSKe(=zic#wMw`3!9iSz{;KL*K(RvU&1LXL`=^{o!bbb2-0q$KoDSdgMe}}zPad#xL><`qGb7NxWLeg^7xq^<4$hlh6{q3j!5huZ zc_eJ|`Guv=0ueJ;W<54!DH+LAu66-%IBB!9#oEL25-f7}^~29$n?3@2Q&d za4C6djU@D?IA9t}`?VD(#x3&0?}N2C;Cl7UZcS96e^Ii9j05I4IB)X34&G=#m-9&2 zX#bYm{p>~WD&LQJhlK+k#ocL4E^QrsmB3x9Go|~-vr)h zZq6fNqy1lQ_larw$IrRvAtq&=J;iU4gJP#{u-*62_kTHl@Zdx|jDeI>xdDbYhbnRP z;I4t#TWmltf>O1E#UG-SEE>T$pv2j|Vd1QeCP zBkc!s9tj)m2Xnip96n#L|7Q=8t6WALFtR1Q>wWRi_lG%t@Zd~5oGgLFvPeH6bNsip zS61(V(8NE!LcYt9V`s|4w+fwa{*3Sw&Tq$;OuFA*p|Pq=Ml#yziB|D-@q5-MkG2#} zZm(3@8$7#GxSuHYuD#O*z5cG-P*3)KDI9R0X2^7|YsE!h!o?1K~O^ldPE*Zk~= zZ=|sl@919K{qN}se!}HMJP31-YDLY z^GMhz-jUmVs_o}>bJKlA%Yx}0)jIQP>fqcLzIrSt#}6Jl5)ZE-iIlkBsKqX0Y}0)8 zm_t(@?t5FLUO?Z!M;@Z>=Ut<jYo zExmo+zm>on#Y1u)2^+;ja=Y8-Iy=5*13z8gFDWZB4I3J^as1%Hm3a7?69LhwetHa} zd`{H=Z=HEsr=C)0SpPQYd&o7*#GKjMDm0e-i~FUYjUM(C&x1qORxfk4Rnn|e`|nBd z)BOvMgYzcW;lZ1HADn8da2^R8uuK1f+kM>r^4~}=oF`4!Q0{kl8@6%$;K7Y}khT7^ z8nxW7#vpsc(@+1#H@z?7n{}*w6Ma376?oAKd4@EW{0pChLq879;x7hObaZ})I`nQ? zQ=Nfn;|AO;K*vP-&cAvMZek|`mKi$7L-D+FcB(y2slH&&t z?!<$v=~%(2}FoqDrV zY1Z$`8FlJ84$hn02Lo^9UvM4?8~GR9?w(UFWSchMPuDe+ar6uDyg({MwVtgNjvqXD z5D)SiL~VbHp_DZv=K1O0_@+E8@%??WAo|`;+Ff6*OHX6TzgQ9911G2RK_-u8u zqoN$F+;TN)|DP;>aN}O&8FCz)H@Q{}-pIe;JQ6nYFSy++0`9FU9`7g4Mr7z3%6M?J z$u^E3Ja~c!8D}i#_BLw0SA+~ttKEM3H@?aP#m&vSQMJf(^!?hY;lwBKFK8_J7knPh zi+s<57xEuCPc%3A58SRF0hfkGU-J{88y7F1mcvnzd1Kf8bpL_lgPmT0AEvIe$bWr| zTJQ26yy~Za!>e{u+$x_(IqNZ})N4O6E4g;maeS}K=QY}tw@1e&|Ke;?w!G_l`HPwD zkMI40yh4skem8p=HS!4y`3ro0*x`*iq#ZIBbBa;R{e2`H#8-a$H@K-C9QWzzU*6Wv z?Ju^CeS6q?zJns)M`%;LrRC=>6T0KrRJWeyB7Z@~Wxy`_-sg4_7W&@j^TQ4w*debw zxreZt)rL3!Z=HF%?mAr>RoE7N3s}s2ujcX5So(fny!vOcL96`59W7q7!kR2kKE(Np z#JT5W9v`<$)~}ju%q7ns=BDp+&Qp`bEcPEO`-}KB1MJOH6-BPARyDA5e6Z7(?38Ph zhy3-JN4eLIV^K_{>HSq}WV8Ft4*H9Q+ny}CfV?|?TyCe_w}PE?EN(Z)O>S-*+ zYx-MWKmFvozv!jKYtFx7Ha!d46t5}c*T54Uo8zLnDPEJ?`Qe6IxW>TxV8i>BMbX}+ z)v4$BV5dLXDesANe?1pX?q&G+>$z^G=W}Wm=YYQGOLgjLEalnpdAQxOegf>KJUY$; z%}sf9+>YL6+g#@1nYYq8qfWg%US9({#|JwDV5fY}NQ}^4pxvnhVo$*G^`i$I^VBI%U?V#`9bFbzt_!oUIf3^t71;3ESl3)19`T2{}`29nA zyOD+Pdz_b}*Y{l^qKeh8&BNdq)^iwPUwTf6o(H$9(ebY4vqMCvmIt@u?S_AyXBoBr zC;tAF*AZ>XgClI@7v`T_!_(<~u<+9A@z+>%ukp+hA!4$Y2PfCPOB%Lu{NN#ocv#&= zJqsavu9$q57tg#Nf0L#UKfOZV^}X&%{V^I#e&L6-(raB4LPVHWkAH{L!s3o-Q$2o; zgY%Z5Ut-M7c_eJ)7jnCE`1qU6un!T1t#2&6*}7duJ`CEF2gmV)hhXAC=0<&jra@Jx z^v!ImEOf-vvssAlo0(n@gVOp}xQM<~kDtbpU$`N0=&zOd{i;@ve|(py?A1Po==JzH z4$hm5p9XK_7jhm68~KIY?$$fcC!Yxm5nFy|)Z>@W#c;zmjvqXPfCu^hl{I^cgzA2w z+{+FP(S0-1=h)ukUCm<9m-NSIEcu0e9?px5rw1?O_i>(RZeWvsAGa%Hwx!FAu_5Bl zy>2NZp0vrRYdtb{ZGe8REF_cFE9xxm=J-|L0yd2}?E{Jv4o z?w?p|9V%{W`eR+PZWuQlZPFj(IAlHi!lC+H+z!G*exApxW|KnJgoy5T>l&w2Z=(!V z9iO!!I(Ci^c80-D`J9uriaLhszK@KxTOFeNR;Jf=Ia})=lhOCo)mPH5qp{@IeIHk_ z{zMn(UHnLGn`>EXW!1wH`HP{wASCB`wSEl8!FiGA6TFaL$9W=bZv(d>Ry8f8VD|)Qi zf_)>7A3Q|RbuZ_h5UTq|o8RN32gf4c%k;WGJ-_3-yy&|yJyiO2G?x52J`d+bzPG>& z9h>vS?I0}V=gGXvzactSr;DX;hM*lEKc~B^8TP3dKG+#a=UYBIpG@GSF}&=DzH_}|!)V>>BZexF?WU<-6bUaY*( z8MQ>st9>iAWR{^rk)HggW^S}cYV~8~du^sM_YaFU2fxk;7jBvk1!1Fjvr9I+_g}0W zE-Gre5g$&3O)b_YTs+kD6J*>7+GHEY4<4e32lds%Csw^6%4LmR@-1~9$uTbb>3tAxI3n(hFNCEWqrk{zs9 z^@YG?6A zN{2z{8~ro;wq2GDGQP*rE~p09q0Q&vb~lp&JI1j&58MtJUjr=i`x$x)b6$^VY17KU z>aga3byKvXqJvh92X>ARc6NrHaq7B_jE&uDXbQ-0rrSnth-xRr&GoG@;=nq@_fZ~8 zLc&|81~UwOg)R?wP8f}2k-h>QoAOvX#7vv#vL#a2R~WLUB)zr1-UG{hj*+&{w$e=lqSA zIDe#vkoNh>jM<36BYg$=ofl}#)yEzlK6snwZk4bOc`W~g{_ap0=Y{eY3XX8PANa3v zywbDJ#(sPpspqi}cFJQp7Mc3xVsfN7rRA~ozdCV9r$>?Ek9ngeUp%1qu&%>;%R7;x z|M8QVzJE@~NYVV|rnJOq?h0{Fc`W=qbN+E2yW$ke`$1wU zZ0IYVUi2+Bg0)MzEXUg3g z*#!C{i|18qJsSE7{5){pr5)g%@>uwJSQEFRh1bPMU0>luO4WMm{YiN&uB9prS9!YF ztZ$nR$Y&y3D366-S9BdcP^=EUMjlI(adw4f&59HOn!bXpdoah)SK#)8$8O+pjCw!F zTAXbnA&%jb7J?X$*B30!nw znPL|uO6Gqt)CYQE90%uJ=7WNF%46Z@fv`~?3%9#Y^`xZb0&$~Ld)RbC9*ew2PDBb< zb%A8Ko5Jyfhwj9KjOmLsGzjDx1@0BfB{BVeN?tjvddj}qg*=aqRZ90%u3#ujxu#l;#L1Tx0@rIAZw%0uMaIEx_kC0#ZeOL;6F z6UrsWPK?rZ+4|SNmSgb*Ltla8;JnGaCh$giESyKeMtLmU?qNF%-FsKm&{vS(rNs@~ zIDYWZlX#G8FY65r0_lU|Sd>d*%7gFjS(CS-FX=1LSjuCu>$UKoGWeZz*_`4YSms%W zdrtcv<~TTS^1BJV(cGLz!bW*4-0niBI#f@xjnZ}5q#gsVFUn)#_`yRj;z8cy(72#) zZ~1J&u_%|sl!syUR~71qzND`}W67^be^Gt#?^Doc(exF5Oby8M_9ULmnqR?jaNeYj zCwQZ|Igf;m{0eTjqwBYCmO)XvE}P5;L7Qyj_`ySO;z6#JCmR|BaxEt?bA5%gPWAuh zL|^hNXe{{^E#g0q91lGM@+)k%cF3~|dI#iJa2%XB`JMrfS$EsiFmwktm+6`M1^OPRHJt%jw+d~_Yfmc~m5?88(jAaGHHD%@+&lZdwUF0d z$=CPt>Ri#{Z~DGBRW~>*F5Mej-1;1)&quaWUVApjCtWANcIV({8*)PjVBFdC=RF_e zde-XYdmO(cQc|NtyjH(n?jt>m(zz?CzMwFF^4T&y)x7qEfJhjQ#U@Ut1@Ijzziv zud`2df9V!2hHfh|xxI^%5)<%n-bl1b7hs2SveBy+s3))4$#L@YD%Ux1UI`mN&(>b4 zL+XMD(ghfF`>xd*oCiCs!C0get4op_r4hrs)s6dYC_d%l#v}R&-KEeeYRWJrCK2 zO{>|hnv=r$!+Ge3^B}*UWRBq*H{Jh~u|i$l{^NhrzDk3JdveWE`_8J;SM@h(Ecu@i z%~yr=!0&b)QYTic)j&DA|JKmquieCIJByVc?ckpdinn}t*G=qwy!EUb^Z@6Ndh5^? z?K1;gEQndvP?7mucZ|75wXwc%7VSEZ&pS+oFHG3TAD!Q;^t_Y6y7%IZDru1ol|567 zTP2Qm6FY{K>RI$G1% zZTlySyIB0I%9+OJ>nR)u=k3yH1jT|knw#^eV~h9yeDFv8|FL_6*_JKE7PyI%6%TK! zGaPu#QP~x3vW?>h4+Dq?xfkVP)V`N7^Ygs)y$w?y22TtP+lsyvZ%1QkU&Xq+|M*AF z-XeKx!$H2s+A3QL|7e>J?ZCIGH3}haY*ave&f8Yr!Y(DY(2L&fl+Cs8j4p-t+?VgB zR!5v-`3-69>((~v`ajzHZE01spA1f48I8C_!bbZl)~_FzI62=-G+Z@&sv2i#AOFgx z;Q=pEWNdig$a!rQH5|9rK`(vJh~ozj1BnMU#lg3`QTsk=-lT~E`@QtN4^tk_3_r2Y z6@4k6hsM&rikbV^v!lCti=tXSPxsLJrH^;@*5mCs4$fOv^~5B!DOTMz0CEx>!t67nDQW+xSe~5zLd{H zV`<-|@R(E6t?_$)EuZI8_tn`0CVA`mJpY4(@#Y`f^wt^hMteS-N5V$?Fx+mJpng%s ze|m}k$JSMVb^cTIRZaJ`{@FS!nB z9{S#hDGnR=TuSJJzVBa-Uaji-(^%S6Zdp{uQ~Wc(vMc9+)=GnuJFWYaHGJdt z1BYRRgPg;n76Nh78eeMWa;TN6Ac!~=e z-%jrHp-sMV`+>u7!a-`XJn+=_I#jvpj!iSy^}qP1vF}&(wM>5}b^U29?aO?hzrVE? zey`Z9OumND^{>{YNtOBcJw^5wWp4LIJ`cBp;})pi@W74sZ8(l(8|~9@zYo;Ow|=#k zr^vTu*gl`^4jK6y-iB}7e&7&AI7lt_@18n`E??q?;b%NO|A&u9^M`(w+gvSxzQe-a z_^Z19G?w^w9;roNcl_olsssi+ygdlI{@>)XJM@_9Ip zWE=7GxZmvxOe|7!pr0k;`*aUrgZU@Ir=HCD}Iyc9WY$LuP_dBp*LgRIt@x9XWc~t3(kS(5iJ`cAaIE*43 zJUSs;O)<3ZJyfE~CQqH?XNp7kf|wDL(Kk-Dxl z*S~d8)`?DN&n%bx>mvAt+zyW0Ahqxya3g*p$B}F!J|Xw}mgD6t$4+>P@#gEI%`Cy2 zTxMBn(MeC83(4&V4x<5@~4K-^>_7nS~wTl@%b8@jD`Lqw}a#MUf5J=58UY797nQ^_=eo? z8eJTG+><=T&kM(*YrSZpG(X&FapgyzI#-h04;;o24)PgCjR?}dmut=sJ#|i?DGs(1 z{bNU=?}_=l@+dp(GTskp6W_4f_~~7~^zjnUn)KVAYrmbcX4#L^?a}@ffBu20>(A}r zxZPQc6?ouAd_;~T*+zUs?srz_+zUJ7J-*@5yL|nj>yMdHVu>G~x^LWm;4qeO=#mTB zrKsgY9KuTWU>oC|PulvXIIPIivy%(@_8JmBUe)!dvBWogy7JD2U8rZ%_jiEPCFuH3 zQO*{tjP|#QAKR+B{@f0Zo2(BH-00jKN3xChiQMn!U#FhfiucQ?Y+o*9%hf!iP9EOt zE&sg(~$M_32{=!$&^{27KXH3rB$1S+2znHUS{NHo6 z+bTUaz51LV?RA>2f6~;zeb)aOy8eD6W1p8R=`V(Ad`79qR>IKrAG!G4WN)&(!Xbp2^8@fibtcIo%X&tG)bbp4+kDU_|P zFLd_rq)1(VZU@Iru5$u6`K;pH97nQ^_>A1|T^Dm#s(ai|Jgio&*(2vR8Fh)cB}QR#06Us3MoyzKH3i5P*ec^_et!5d z3BSneCEuZY4Q+h6my;`VUH@DYikJGk$6x0o(pZX%g9>kKxG>RQ%+PfG9h!KBj76Jt z{WEmmjbn4a=-d?da-25CE_%?lroXV(bp7Q%lBHogw-0_!hM#hMP--UL@YgweaxK*2 ze|7z7exKmq>E0~#9d~lzJ2lUY#!}oH{r!32dDs0#jc3ir-&)aH3Fs78tu)#TH9jJ@ zgX1Ev4{)Klm*YgXQQXV@wyT=wa%vkq-?p1$Rt;&b$hsTt4Bxo@z+nnqZ+VZS76QeF z@|xgS6nm=gDgF5@)jQkq`zcY@^uF^m?rd%U7Zy`Yugg++K}KX)ML1a}&K{yH_#l0^D(*dML@qleoiMprDlnpp|vly*<=0n z*w56@xOE3txi&|AiNN+1LlH;v<8nXccLjb@9LfErb5R`0?T+8u_M*#Se_^iG1(3F) zO?hf$JH@@+KKMC<{FK+;%h1}F*XWSH9^0AvnXCNK)&0>ochfC<)jTsAOL67KJ0aWF zcm{~VE~%ZG=d@RP=O}7xh4xJ?&rI$o0w;)JV3Y}EAZr>-{Jl^t!;v!C%h$IORzhMxVFUyH`ALYwpkTW$Tk z{YUd)@ky&AFrae#V`I=JoxA9hm39n3TfTQuM_|Q>pwJ#@Q(b}Q>sOqtgmy$@W$haE z_ra+%&o>L!^$9&KKb$#+HtF1vucUM5lQwCNqv#@*YWjiMlkMBS{MtqItN-ZB;?d4Z zMqW-AA^B({oR#9{#g`{$CUi}RDtU7yC5X#(}piR3`^Z8!Pqc(qPEjir76`y(BEW4i%&t!{jQ8<%HvM7!*h zba%B*d_v8tANP41c3+<|J7l0&u&!s%?c~>0&JS^2Y2Tk;XSxogXYX60z?_8*3>?0l z*i_?kW#fG0EA9Jp+^Zg#ViQszSj?z&IzBL^y)x)g?gDp*i$Y!4hhc_D*kD@?!(#_S@5l@p~FtL6MwaQg*w?TR|yrzH2&(f zX;&t?nTP89)iU<`ep{9e70H^8eD6OIZ=Pih75PQ=)hf?7+I>;;BYzEDbgN;V=8c1% zeRPAAm37uZXTH*{RW2zZVtd3>*D@P4{rmR@RvyF4#7#iEZTR4NDz90-*WurlHqJka z#DGV<<H9ptHnL9g&isxnP*SY=6O}Y8L=YHEx zh7LQw4jgxRe*$;nuk!2Qw>l(eK*3O5zkjB79~2Ag?3~irUD>n!$SM^l(&Hyzh`-A3 zD;hs`)Y^bo;IAg!DbVizb=;?=AC#R49d@~na5F?4EeK-%Rq#le|G;rRARMdWD08gF z7+U4>U2`o&=a8!JC5o?VK1~=EiN0=Qb~>B`f0f1(fA#d&`DeyL_upUBVc+jGv{>s= zh7LQogX1pwslc81tNc2UZNy*Yepj8Dx;-{qsHlB7-LgCQtMWQ$H+r}4?S;1eWvBY1Uf4pd;FY9n`=Ks|H1^BC3&y9Aj zhjy|5ykeF?hn?HOag*y|z>Ut$aU|P_zsmid{GZvqd36jOc9}m=*YJ(o4;&T}4sxz? z&d@5Cb7kwyb=Y$cTsATYeMyI%#u9&Z<=TG^GtR<~9@o6`zjRfuy(?XG#hFm?BJ{^5 zRfnD1!Eux8&A^TLs~ktNjrgnF@4G$xUViZo6_$Z#qFzIXUGf`!4d1x^z+n;LAZyvZ zF|^9%Hxb7o4yoz=a4*-_o!im3&rq?iPF>fGJYuwozZzuoWcZoap(0w-VGk&F-KH4Y zja_YX41$(9w}a!>RwZx&H##@Rk!&OWD);-=_4wi~aX$_3aiwUUuP%y=_s}N(Dz_gv zEG8Ue&Lp((iQ_4`^&>OaVfT9-d1(;(o_628r!w;5Xe{wpZ6b=DU6m6$#&vf;o-n~h z@k-cK>t_x_hn?HOag*~e;70sajw9Jd{8jFEdjEfUEmnn!lbQ~@jDyi8-?;t2VF}?N zW6UbZ#ZWgT=61|@=2xQT2&5+mBB#&veyE;fM7})eOZ-(DOZ?U4y<1M7WF96CYdY*5 z)3%jd3te>5Vdr*m+@wx6aHMl{9Oe56wh@1o``vxc?e7PVgo?SE4!g{oJQ}L|#_b0V zO9_Vuzee{s)ZEZ2m)!Wnp}H2jDGqs0J>9n(eTNU|XVIWkM}@|c4!ijm%k}p7-TXr1 z7gAIejPaO$K#*WnGakkK9%n?v?)I2*P)Siox`>G^j+4O=?^=F>Gjsu@15BA zCfZbQjeMo}l;i3=gw;22Y^B6Il%hwHg&>UAiM|J1*Z)#AH1n-Q+prz&_VdvM;R;UZ3} zPZhPa(x)sj;d*_lzGE89UYigue6_ep&TA8q=X4Q&h>ISo?-cO&DK6T%XX9zdSK<16 z*v>!c`H)Hx`ti=**_0(WY^C~CWIM%0z2Z*yPqvNF>r++!uj|)P8;sZT`g&Y;?P*sZ zym2iqIz0W4_xI8fdR+AWkyYH)65vm3amvUXZ~raG8KK8T-?#ND7?K10Z!Ip8eBf*m zI#%m)?kc?)ZHkM?KZ=WvH;Jg*;C8qk7o9JDt>&hU;d)#|*i&4z$~Mxx$x!gIwK`-R ze_Y3CTo(CW%HSqP=yQL`(ccxW$0nxlrMRRimhn9!^tgz|Qe5<7VOaW!9ueZR78g~G z%4X5AdxTz}ieCqgyX3_KcZ!Snb=YG4YfztR5qexC--EahC@#ur=T%U}iQ;4Oh2kQ9 zUlGm}7o~@<-~0YexcI2WMe@D&&d7P>{sYGt;5b%&9^@KoGC1tR@RnTZnvo0Vh-+T$ z`Gl32!>c$_T;yX`?_EFirFk%orFn2l-6gwX9!Kc&;L5{yj@|YsLZ1hZ>Kit~>TrY} zPds(qvNro6Tth8>ka>LvjdOSMDtW*GZHg!4cM!JGJoxsyC2iI@Md)>yEPj6W$k8`K zp9jl%R6oNvZa;8X3HQ=e9OU2FNI^9Sw6BwK*oKvwfrzym)VufzbYyTAgT94TsW)Y6q`JJ58(MaQ5W05gl7a zDr2L+eR~Y;->w&YM~8YuDlw^@Mjk}&McTPf^&-*Un|=J2M`+8~$ zeS6~7if-+df}!7EdX6?~IFNlSZZF$!*`&R0uWf+qlNz;+cD5$Li?h@++T>r2Mk$T9 zdqs-uzMo%}E7M+)HFha<2w$hNmEWG6UPX(^L!#Er=Mu42|r-`vF3{~9(@ zwU2j~vp#aMjJv3#ar32CVp+pCZa;8{B^=~F!*zF^U+`xN3NDv+*ZB*k?}D_urMB!t z-#smV%Gww-mbei=Cgr{S{+he6(=>pOh5Z>>3T>*5!R_F3jo=8B9OBUf*{4ZHBsU3@6nE9d0=P71e!ZLnQ(DY)PM{a!Tc6zncG&K_~%Ly}{L255-k8@C@gtRWl@?wghK zQ$eE+hve&Yao6(#Oz($Ai;~8^M&F#?>sz(%;Hc18;$B?5_P1w#{Qfm1SK>)`N2Omt zRI|teoy3)=qbHn!1~9jS<0iRiz>RVeIF4i+aWlBzS7#i_TX~$jh|zKqq!ugMl#{^i z2M%iq2UYrPbSB>micYz0*-1?6kdc#c zdgs^PS<(KiVI-0y*}L)~Iy-9=Wd#)zD2ug3dF(*Wl7 z1BZ2lLms3z)kIwu;%3Nol~wLKf5H?8v#|S}s-W*ZEhmA-5;r4aZTcEt)M@#x4j<>MkyNbz4^bpo1c{*U={5 zxc$IkJ>gLQWqh^gM-6>>*-i(KykcrEFYx$Gad2*1smMe0^{c34%Tc|pLSu*5 z7<2qyE&R3f-aNK4aU{MyU0Q#c-6Y0aEUR&{s9J*{OTLHGx<`A9 zDt{`CEQy)}2aVgoag!Wj;6|K6jw9Jd-%alKp(0lkKezQ3 zdo}I+A^po1NN#8N#_b0V8wm%QA8uyo%SW%Nvj2A*Z~fb4dOvjd+jdG4`cf_*jivAA z_E?*^BKSSN>c)}oN03W5+RD%FpSP}^&+Xv2sWjg};6~p~jw9Jd-%alK-tE8LB6@j? zF zwOJdy6dE zcQdzIlxeP{J? znd2jD`)AbVIu!V;>517s;(}J2i`&6*lix+)Mtn<-BiTkANbYyLtsP_5n_e&Dc;aFDwFTMRvX`HbfIUu}KD;pAGg8!gb6YID(8;!LKr$6W*@Qc>6q2^ z2ewjbehnY{auaF_X|=hy9UM2g*9P2(Q^|28+lWKS{eHCV<|W@6K6-5~`F%l~IF{Ug z;1EwZNS@CLADyQt*EOsEuQos7P&~d&QXKkHZ7v#1oJPBM=2Hse_sd#sE@4-?YT$7n zy*3xOgX8ws0R?Y>8*wl>j%1tso^)=daKHDv45|prD?FSCq2?urQ zZS+GQoyRD7L^u}lB2C{9=E?bI4M$(9%|&C0bD30W!>(Nq@SZ$h$Mts(d&RlJxMdB{ zuA6d6YWs6LIBqic4!F^|IgVr-aWc8z<3?K_xDn{1*XEMC(Lsi9+@-V-b(i6o)~Zr+?X->LcE5`Dvl%^3ho0L@v2IEzugkFTE9hJ{Y-tX$w9s9{Lry z6)KXd=JIhnIBxRW4czG597nQ^IGo(?1uIrWS`Wkd7M(2MW`SHobukic;&5{Nfx}MV z&`({9lzK?{4ZVG-b2-#U=T)jW@d2A!L1 zq1-xte)zE)e#rYy#v*RM8T%MBY?YB;cDVJ#yC6LPvqR+hncjCxepHKij=uA=neDB8 z#a5xQQcDEwW^bqWj=}HwqVi|Ca>G`+755x9h>__=eD!D zx%?$+B5<63*lizq;689_I%WCKCANy3-{EYH(%I& z$Mb=rzm^jy_m0jP=bm$O>BzBYQ;h?%jpFzJx?DUlrBP8>@#}f1@}PlLaT8g zYnF5}eB<^5hdqRY%)>2Y)B%uu61PA-hE}gPeShr>jf=2EU#f9HV<~?Bee_82Sp|$5 z2m34AP8o|f)i~gGaNH!<8@N&Y&T%B$D1PUD7pncI-}#||BEMGS0H&Ln4GYxQCb<2; zVJ~{f`$6)G>KJtZ$5m&x^-I4G5o<^Mj zxgUHmP>+#SKk2)}&l7Os@sAZrfufsMb3pDxJqpz0VLCR&@4bDW7u(W3NSt&p7jzQw zI=4%5xjGp2704Ef$NBl;#{rlpub0%3pJM3zXYhIg^%&RmdRY{{(c?7wp44g>&{&GE zEBhs^K0Y}}l+kJ!#O*#fsuS9jGdN0pK;Z|)&)hFMH^t8!r{ecU#I*T?_nuaxK=KaJ zrgdj-AN)KBKjrf&c_66sK=HGT1%C(XF|6tHIpn`0?fau|-Fa7ks9HEQmg48lSr4|( zhTrqov|C>3iGwm_?U$L^QTL&lR-=I1!EuqA9>9g-XO0uuM)5QEdy~z`rfbU>H40>o zQ#r#oZa;81MAuv1Yex*-f5{8Qu_%T$z20@L|J_bL6eM;iy%Kvu+lC*P;~>8?z=4j% zap88;xhX!DIi5C#ZPkAqaEwMfss3hJlYnfe_?_DaKM#|iGQae0kRC(Ief*|DdW>r7 zXX{^%*8|b_&XTSZ)to{aOY!l+3U-#UcY=ifpAVx?LhGi%llHrEqJ6f=(6*}9jjVkR zoGAX~cG0;h{^fpkdggJgm}ih!cewADaJN=U#=f&*JGT#h9w9%azQE@oJ^q!?jz^Fl zgPQtz?d9m}m(Z7L&CytjfBAX1-|{;JzbU@uIMBH%zU6-Hy8Cv|?Ea|L^y<|_E7Zc1 zoI|wbeK6l%;r7ALqxAgBeAGyz9)ZjUe-fMk)3CTaDT6T_Tb zl&SNNuJF$iBD@R#%vTS!7G#cGR>L=LKX4F)gM7D-3DJ3%k{4zcqVqaUamaZqE@&_M zrr7y!wFak@#uBG=?3&;Ww?~JF#K}oRj#lWXWc}vm&=2j%BGGH7R_>^9J2-AKX8^bn zr@jgoxJ~r&Q*~S2TR%_5+7wz#&$R`{cVHI{ZCreP5C^)IH;w z9h7;Eg73vH_d5yhE7d5-K4h$|i)EPb+32(}9Jw}6zIQsX1jo8pq}k%y$mOA9%XI*> z+bFH8ShWokVa2A88-U!NHMvXNS?LreMi)8SE(W?*iMuE%2ln<2(7&Qux zj5@b*YrZg1a>2iXAJW`2YIx=`Y81q{6{vU!ZK_cq-#_q0zAsRt;AHW`b$y2zH41+B zUpCd|K&bExm{a5FaQBRQS_eaQETaF*HG9I#P_cN_a1<4SKg@sNcpMNWt2oNJ#u%e+ zz!-Hhu`g8T=c)G-aT&86D<@i94%IdOPnqbWz> zAJr(Bah=fL5b`#uArt_ocQ|CvOWAJEwbX@W z6FKAJ?MCQ3S*uY%V=2eWcY5lWz<*()f>xuT%<&F~o&JXDH43;L95*@t25xk2jw9Jd zIbPgv>+(Nd{hEgN>+(|dhoDA*OOBVH}X zk!&MwEB9O6KbR$CSC}Z!B4k9j)vk)<_$Gwuyj^ZTa5zIatWyhtI2bzq@>$#&rsr9i z-VaGJPm~MjOEn5;Eai9|^?mJTi{FQ8HN39OuuisU6|UFp;&yP{c795N6tW0rexp(2lY3S>z z%_C{7p5ryCDmLbkH!vb&URi4Ek?aM~rg+%QQFD{J7akXPDL!1B_8*-vIJ|?>%j|sd6=;v@;#Mwi zWC!I{wc{hVZwnV*UHc4)fEEUnP|mMLn{tz4di{Mq4{ftT8|I(G?@)C)?TmKAwV zXzz)>yXn_f)YQ4Z+(Z2x6cdLYfsSm8yq;dxWXD|WpVU|T(^+}M71I~ez*=7={jWS z>Kb()mC!79=4j*wX|*Hu`y#gVfhaG3q|Z7$4`Nx)i3@VbSyF=bz#^->|i0vl^%=QR+(WkM>LOK5A}P!UK5~ z2kwtM?ywv+d^|cGS&AAGGN&DFswq+YV7BFLfm1=Pro@{u-lcBB&X}mnXL_QhME+Cl zYP*~W7bTumuiFX7o922lS1fD`Dbn-1T9boprJ5Z5KHbif7r)QdYI3}DtkWU-1Lk*K zhn231ypV=g#XsbI7%u!Y961jBzLD$dxNpdJejn{_*`!`a*iSV%(vR1Ap#zn=@m~Hy74L%ac8m2HSzyTp0ubm`VxpVm$jejWPE3}D6cQRMkS7~cK^x6Vs z8?D=~DB(JAOK}fjm)lYl2k(A;_D%IVHS*ATbaI`rvEduHA2?hg9ON_o*h60{4;ypO zZg|7YHSsqOET3&N`W{-@slKX-Ph)AFzeW3EwQoQ25W&NS`$ol(Y^u4k!+)V0`B*n`Ys>h=6VR z4d2Pdld(_WF?zuMFV==8KDUG8wh(1YZU8qrH^-4|qkRPKchKx=zaqBbeXumjVOhsk zisTo>md^E9rSWtVy|>lMX%3!==%)Ze&BGEaFF*}sG%D#V+S0IxNN34Y`W&2FbRE06Q9Ns zkM3A}$sAem`*}?h|C{>;*A^k3x+XrigX4B9PdD>kz>Ut$aU|P_N5}nkj<8?(=$VIj z7#}=&ezKx$K?$mu=N>voklPO&ZV?XK(m>dWGIZl5R|Cf)E}JP1r=qVlJdVC;D_`eR zd2}?EcytvGPW7&h-#15{v^vy5QDSm7bss<6Q_Rvd@wpuww~Hy~PP77UbZ(9#*+x7% z?sut|#i!W(^$>UMUVrNV9^IXyNXA2(cy!!;;BcF8P}6Zn%{O%8bDWw}Z|ENno$F?b z!`_<3sz#x&+x6_5RZV;vOFX(}*MFUVI1f7V3!6P_1Wo)L^ESHJqn#Rj;fkt>&+Xv2 zwN?W=;6^+KW;|;H0@3T|FUFwXT zyvb9%DOmfes)^6-;JA%e6P8pQmk*6z1Kc={WE=7BxZeZ(8*I3S_qgTK4Bns2tFCAG z#_b0VcL@i%)-cG>OPBg#JA5=1;(OCIjM*I2WBFAC|_=z2w z7W|+wGeX;=O3o=*pD4KZ$ow`>+A1<$aW!=4-Cp%lXo-fNdIopHSLa@t;?TTd+@}2KOS~@{OT4d>9uqq~-VLqx35$-dJ>aA? zh+NgFYl5FxR>^&m%KPGWaNOj&0&pYV7sruoBiD(z*iuy>!|J8~ozVY|BVsVA*e!A8>KQ6~XuKxoE;*fD%=v=Tx z^18SmZx3w?{?W=$*P@p=IT*H*?Zi9d_QB7Gth zLv1>&5A)NtW|0! zLDQ0#`wpSQOstFLyOW8HHV<@*HxSA)dO#g#!|d! zQD$Xe8T?+{;p@va$qq_Fy}9diL!W=1&-6`+h!?pX95<+v`ywr(4BFpT z?<}bD#<(4lciqa+z2|KK?!Oth;r`!t| zYUsPmwf_5lddy|&XOjk<3eOtiFI*h9NG*DPT<)i=9SA=ue&c@AxhQ_)esp_O$+q`* zKXK;!{w+14?G=$EKLd)XyTRp+&c&ZyrsH zp2kuRghZXf)7Mt;iNiA{#SyWEe#u_$IT^|SN_mpp4X`ipE%ucQ_|KQ8xE z=I_8yIu`ev+fC=Dc&y5s$hb%=f00;VU)~orlnlNk+7y40?G!I^`{3tu^0U86`WF6r zZjk(z*UDUretVWye^#UK*{^N-k3c*|V<{e+HLFRjcKCftW%J8v)mkdQ8+QHD^{Bsi z*mmltRfsF(djmL8yv6OJb5p#<{oHW5-uZft{^Esobjq^#EfmR9ax!e^_QB5=@H1K6 zPm{Wo#|=Gfd9L(eool3CPvT|SZd%yTu}Fw08(r;pP>{Rwd)QB_qTtm%Z0Ec10(e>3 zpVw}jw@QduP{^ad2wcttx-5vfZoHhtR7gjXCED9oogt3=xgDN3Bw|x2^Km@aa7xM8syqadNy`9{w z)u03DOWNBsmUvkWzLs~q`YA-$-uAtFxW6wn$iqIKsxS(?EPfq0?(!Z2?!?RD*WvNZ zgdp&m&Na^dVQWT76kOCCUf0~_u?aC`*_<$P4`v6eIRspY-ZCv)xXvFX0|C!+5a^M^H5?QI%MysTa4 z1Ds;YhKk$)JuGgmaZ_5&Sb%>Dq6(igfZa;8%O*qIsQ3pexTE3fCfkS(5Ygzy}W$Jr@)?wZ)Z2qhX`bJJ| zRVW9vw`nZ#vU)vUbS$tHG^q#HTw4v=+XK3as_pEdNuEAy`V#2&aXUC}GEWe=(YZN} zWE=6axZnA*b(**1Vu-H2C-syrg$SwJP|Z!@_5+7EgoCVSA8F`Q%NXGT^!Ka%f6c)* zi<;jViN1+WNgq{S7L6rdR%6Tk>o!J!*Q)Wdyzlfpzdzj2-sX02+@y{kaHDf`9LYA~ zWpTeddu?4=zbY-w;wpXB^)G&WU`@8E$4u#Av!n8^nN(Kcum|T z^gVU8m1RR{Z_`-fWxeuGn%EM*Kb`YCcwz%rC2{Pk_m-1F#kH828-1a@&F$d0NxnC5 zBVHEAk!&Mg7WX^ih}oq;+)oag_O@KN!E;KyEN(w=ct<$MZ#(qQiI*igqoob~Yf~JO zZ|s}20DaGB+S@dicv%@5J92u=ns9v{7I%B>wcWAcIcf8*>DxyfyNBbi>QSOe(TDDe zYef6*jk<=53nK7dbyio<#Ejajk8yhKXD|T1?u0!Ci^lAC=9eICwN#o&n@JzoX5^tJExc>;$-SPFkLX+|sqS@*|{yN&Uh=JUt>tE&WDF*K_; zCokDtVsE%UAMbyDbl6AeB-4DH-xvHokntVv1Im-&_eD^-gGKJcKbns_IUerTaUk%} z=Hs?s?^>O09j?#E345B4^Xts<$94Qf&x5ojx1sqg=R~fVYd+UKckk&%^riVYjivec zq_K$!Pjf|xWKHvVbA)GdPiSA$e4Jkgj=S^&xYK-`UkB?l&b4P957#xHQ%m}Usm~|P z$7fhyJ|{N)-d>&hx$P44rTI9GrTKVPpVZykp)pSL@!3a%Z3owj z5UF$f_w>M=mfOK`m-9K`PV;eo9mqDCk8{6&Wq)-08t!kJkIU=44}3e#H*P<0_(C{H zPQG7+K4+D4;yvKdW-=ddW`5JXJ^IpooW|08{9v!zb^G~7h#uN}{CKMO0)L+feLl|Z z;J8UGbl^tk<~WjVG#}@F`_Fyz@AxykFBc9OTFSyLBhT@Lkpsf*2M%8e2g#`&W$0wf zZ{E{zeNJq8KP)l7={N^{_h|ER8cXx>xqID;9T*uQ?6mp#hVX1Q7e_>h=b;(3aJU^D zH<>pB+-N?|aU|PlKFx8(au7}1u&FAw!`kiw1K!o_N&EMsE6xwud zE35gPYNAc^d9sb>p%34_uIJl1LQK-;-z{>B@*f67=<`sS*E7)YjoS|#z5@q2|B@OB z{)UFR^zCbe*b?l~sRp>Trt_~E_Pg(&N8c`*-aCyYz4xAVs`?Da?|(GC_ruluG{5E- zDaL4e?-eWkv^mr^Qjp$zfgp#~`_N7||F?&Olf+<*k1qFi*l6gxTbzyh(Iro$IIrou-@E*y^E0$b-<|sp9DmUL)Ixo!PnOo$_YHflm3{h5yGF z7KO3T?q9!ARQV5X!uHwZ!m2*|^lW}#yS#T3W8Qt5UVTuT3_XZ<#<>s9iYn9-?WaTE ztWtGm$u`<&&vzTy^`Mj@i(>Vm(e&Fzna0pSw z@!A>M+md6n%q?RM08MmL999&%-mEJ660e)a(ms4vA4jtxZQaGh*Z+>Ja&uI!I8+@L z+s0kzb#ps7ZW;PYhORirk!+)VdhWMH!_+$g2i?R9O;;RhY7^=la??2m+-MKJ3_(wFB_(*O|UB27?@)8}7HO{`zvaRy+^zEFT zetPM313K=V{vY$HBWWWCAzsxmx$Nu5Xk$qi{Tr$A2_5F4(|m- z=*k*e%aU6Y=%sUlOz(%l^;LVeLSNF6rm@6FD!OU;%SmOtMR~3MLcpfY@8^{E)}L=~ z2gmJMV+ig8H{v649LYA~BXPgu9?$PqdxDp!r@b#OmxiL^L@%9>#O((TzX^xA^@i52 z+|1BgmK^i(UYWipKd%_&Q5t=z{sN7q_vGiceTQ|l^%mAz{e}J2Lu(AT@z(1va633| za^4Nx=pD^*B-`jc$^8zhccJB}4e(p5zaaN7HhSsrNp3%I_(M2s@;+bxq_3g1oG)?1 z@H6YZ^mmo%{g4`+yyX!3(tDD|(tEPjfY^iYeY}N*_MSYq*tMV)+Ejmm+re>@`7gkY z-jf_hvJJM&_ayhbz~>G363%)F^Y@45zeXO2oY$W7(%+NZe&Fzza7Y*d0luMz*7D`4 zAa0)V%Je-s&)uO`JM_(6c9pEZKx64Wd1_%;$=*Y}#V4))Lbr;KUz{H7t=C`Rc5vLL zY{W$YZp3HeIFfDjp5%USUEZs`?;9@>yY*V%(dZF=pcbBavwhbo&#XTOiW?_Q2xt>(YbSmHAsP8?dV34V9&m{EU0>Fir7Y`(WJ zTh;8v4d_U7J2-BYRDpHiMtmlYBiTlLChm7V^X#>1=Yd{-pQ?$u`nOh~Dn8~O+N6WU z?Kd;as@_>LZqK47H9?D;I6q-2eyv*M*4nV$6o+5cTK<`UzW25I3pAGaOhY1rJ06Jl z7J*v*h3C98@C@gsP0J|vLP)j?7X+m396`)R;p*NBYk&-#v+ce+n7tg_GqOrth8aAoN zqGRX0#aG)h`5M-8P*%IY9{A@h>LIi!BlBOl9UQlNU52<;0B*#0;y99R#CPI;uko1B z<56pG@zLf~feFZek@*gehHu<{;Go_R5(laM``laS49RB{$1>$R(SBn24vk}@(3fzL%5kH=zEqRjj?G_9FsUywp z;JBd*MVkS@jrdm_N3xChR^0EQO~-Vy+T|?n5Tv}e4|3YJlZ&k$k+2;cI-CxUpxu0j=-~o<4 zdj1QygX32Esn36VfgAC$IF4i+@v*qy9!-nw{(He&BoDawYUq;|irg!{=&f_fxc$H( z7vYfo)l7>I&=IHnmmue}x$JN(%0V&3;jqQ9v2W2g=f?m3wuEjpjivk-ztXQi1>yI6 zra(qPelL%9!L~cSueNKh%q%fx*i6(@i1{?n;}5i=xg8ugsn-tN=-eDfvW;}B zx!*xAZHkw|dwM~(FBh`qYObhLq1R}WZ`^+1ph~*R_k(;#&oH#2&3g@tT90=XX;GWv zU^QY*N>%hdI(+jcE9hF&Skk$kQfh_$i0P=e@Z#}X<-V=b^`DFRpJ}*1x_m#Z>RfX> zIBq3-bo@FSxY4;ej$|9@U~|9kl<4Nt8t<0|53jq|SzxQk`6=3@gU#&+4tWWO7Q=lu zHCl~2H7IYEUdBd|xtYa%1ZGz$ez4sXhrge@#5O?Rn1bfFJ%Vf$8Y}1VXm7|i;ON#^ z+#fT?&2{Q(qol^z?<|D&uK1e|r}Vc`xE&m~jj!8U{{wDxZjK|_ChI({v{ks@39Zv& zA6fZ`h+Yk|t*T_BbXEo2@xCG7xc$IEP4$rXgM79v49)C}T#JBA^TJ+bOWmoUZoA2H_iK16B0y}`^uR3~%}eE(86aM3df<|$H!DD2 zW8(G$hx~+tjP=_Yn%Rn0VgF01~`A~Aty}O~AEj7w^1n6raDh}$+qy8n#t7*{$eH#q49yf2lQ%0T; z+O!Vy|IWj3$ynDnw8lA3bZ%N_;eKsQb1D4hMu4cZDN^cvOFg|?0s1-%w-0_6gxSIB zWT~LKi9N;8%$D`daV%QvQ2itwYL0*uS!1xM1z@52i>5@E&W1U@}KTG zDrZ-QmrcgGNH3huP3tt=PPyLQ)zBX2=Z7DKaAxU;T!UY4XmCp{tnUH(T7{_}+|I8# zf7@UFFHoF_y7KHybw@?+KbA6%MaQOflpS3MOnkLAP&E0uM(TNUzcTcp4J~o9h1Ny* z`Qb-l_#y8*xfXrg(8QKp?vk17dDnk6UkG6I~FMJ{CeS(E4sC^ zdq$U(IU-Qc+nQ1mJ2>!z)-kwWbZ%P5;5hZHIxKxelR)v({??cZ{aP#V!OX0wVLP`E zeyUU@`M#Ap^GTY1H)=-7J-o)5>vDS`u2}Vk$T=Vmew&6?gb?(_?`5;=S404 zWB)Kv*YoCfa9pHrCvc&442~1oM(Y^dZ@b>_zBKg;6b{RJH+s2O^95~Mqv7@ghoZni zUT;}H`)8oOj&WpFnL0@xf%;m4>3haD$J#a(egx`z-u$>62l<@<4zw=8aiMe3x&-$_ z^1}uiw$-~FwC5h$q~}ex(>e;b4}KOSKV|G%)X>D1+=&5!`dWdhpDtJ5v|53_q~}dz zX`SNKx8&tXMGQS}yL~C*w~-@6GpuiS(V2aTn52!0;! zxBN!IZ(3*IIMBIioq_vxX#4CM7vu2V(e%6}UvF!mzRtkygP*DdoV;)4z0*2KkHzIR z+7hV8`lfznUG(kBUGyzjboS>`X7&n=rF919o&#={!0)fp`n*pdvQ zXJuX%w?pPtSqACnqI1(a0{81(^sw~#rvgR7+Cefe>%~>`2ft1S>b7(H;ActlGb0x+ zNRPE8M+e8ESl-mnrGxz@7Yhs$x89sN&;&XfG?vy8_<6YBa(@JV)3G@Y+z+ya)(sXt zjcz0pIzojqD_1cTi@27N})}B53-H;9?z63h2O*0VW*1x?gf2zrO|+c4_5|@ z2O95K=6kFP*10U)e&A4waFF+AIYYxdgVPhNbAQ$AP3x&2dW;@*3w?)cykiEY=5}!0WL^nyBi=E`k!&O0G50&Rljm}?L%}-lSjPTn zAGkmLPsHO63b!9Ps3~c&>by|;(lA8Gz3_Rq$zz4}AXk&d;~4J#gK4}M(rnI(Sz^rw>5w(cF2E_sI3*@E$V z`jx+^>N8%BpSLIm?S#-8`7fh=cxqG=547i6#9#c<_y60?ahA^!a3)(h?vnQcUr+^e zZtUw03dhN(?)Nm$JHg`Y(b*^Mq5qhnFB`05F({w)-lg?IM2Mze$^8e8WdM&*am=kg z9L~sH8;!U72x;g{3ze?f0=k+smUK0fORYY%9CZyg z)`$q4y~kNOc(cKaCTOSp{nCH!erM%K9h;+t8ik14#gbPVQKZZeux;|=yJY(Xiw?-_g^a7XU(duRgag! zj|~>2tI2V-uHxZc{d2I`SpU(N#iN}wIH+h7R&6|;E*<{c6-gTd>ZtVwKJQul@DYw*+OC}B(K}n8ak1HgyLFA#m zCDM??SbO~gHQ(UHyrR`UL8G7YO!s%_RIfL%?Le=Tvr|HaP5FXJH;~s#*b|SQUuTX#u48$69%St8Y-sAsZ|eNa>kSOI zbu__l|DFXojCupL`YcF!44jBZ-}%ddn_j?)@+rs{;?eW_YUQ+k#Z;V$N1tm$ z59@;GLq+x7Rvji>cgy(JT?iFNA+fmTs++?72aXkh<5=~1kXi^s3{8HSt9mvxW8Fcm zwSYb}UpO$N_|9eMd-i#I2Q{yi#uAS{t@`P-gu!9L_uY;)SCQBHe)txrp@YJ79zC~% z<1Xiez@2#X{5p_r#G~hak1Vm*Y8LKqsy867bGnhU%k2ja6$uBq_7`Jl^2>M9FC(|q z6o)G#yVmzW-|p47e$9@&RvJq@`jp=0Z-z&QiC?8RgjPphYwWg_gCkZL^#-^d95)%y z12^K)a~#Pw;?Z-z6LPnUEmgy)Hz2?N7KU%!e&A4vaLCXoFf{pP?oTyC^WPK)tKY7> zmx?ekuldQqnaFFUvBaaF(rm87fB3zSo#R~h=dQ}FN<+q9IT9uk*8Lp0_pPhK?clh{ zc`tAy9zDmAY?Hh`OE-o4?RdJtfibQ|y#cw-?PmDK?FSB(2?v=QnQUnC%bX~e%*#u@#y!A8J+9r<1k^n{fWiqMy|@j`{U0$p`E*V5xYUiYvp!u z++-XK+=xfdaU|P_N6-DPzWl@DvbdirzOop3Hq|8~uEKLlJbG?Fa4;tvB=-X~A1JR? z=J*WEyxzd6H7~ntLSL#|MPn(i_3_a|O>X@O)8nM+FCPTeLYv|uZU@Ir&fkC=<+XAg z$u`Ps<$gD~969FiqA)$qlKG)a4Bxo@z@ZA^AT{2~ASX=S)STNfQGCh$ zo_jox<;TP@J-(FrOMAj}-?;t2p(^1Z->D4@P5w2GP?_3|ccGdjiP~kRIN1Fx@%|P1 zQhZ5cDZcco7Ib$IelM-Xmql!PHD6vI7;Et*w}ay*^Q3?qotxuGwo!b^{r>d$eePm6 z!u0r3<~yKG@g=t(I8*};az36hhX~hWD*4U69;U}^Dh?E1%JtXw$e+~W%i0HmtGsXy z*W*h%7R8sBdL44N!|(B0d>OyEz{*1yPw{2><~6hLK%3&r{3VNSh(vp#7GH9^InFY! z0nQX(a@@%miZ3}%n-0(F-Yq3eoYUgVb3+gze+<*HIJNXtwYav>ch}-e?muv>4tRt* z|ByMfp++qNnOpoJOpoDI94WpGa&#Tu6Mai)@gun* zKWg!1!NXT;n|Hu-sl}K4I&j>-Jt-b`3b;~yDc@&kQ+&y<1Nj-D#g|>@A6s4+{YPo> z<(J~){b|ln&u7{Hd=fs_c73>_;Sh9q-K|`!}a*m>r<)7?l?Zh zm&aCL%$tflUWzX{ez*=b=sL)p$e~8<0U5jJGHN53UI+VP%^Sp_@B8&%bH=B)&(JAC zo9Z_7-*I`)P_$j`FYX`xr@b<_VS~g4Ly#}*+U0tI9L~zCpKDzHBF}no+AQ%LadFoA zZ@2A3p7pQMQH^&+J1g8yeqCi;h3iUn8~AnZb8u?^uE62o!0ThJwmK_!dbL`ZWRKj? zA~(iVN1Q!u;=pso>V^yD=9}Q{I3M{+bsISDGgr>?i7Oi}0@kmAsR}r*^~n^LtcZSo%qOzrb_M%JNMmWA?{dd^2Z#OcEV4Yc zn|;LHQ7M$qYUZ`{&Z3J?)Pyshj>?dWR_&G6Ey4W^-?;t2 zp(f!Vk6qf()IV6GP`<=Io%Ovw)9+02#lsIhN8d*NGptm;G>xTw#EaFkv@eR^3t25F zcx-uV#coRW=6y?fh+`w|JZ^$>%i7wQFDRz;$D>p%Vc(EeXq|Hhu|Am8_h-Ek2l)I zsda;CEbTMi*=_eZ4K){9{;qY^@~MMT?V+b%RkTCC-~CUm7r^b{xPj%ny@Y2kmcj$U!Nh5)9%sU$BSD?FSCE2nV?r=4NQ>ui4$?=kcwb z^}Rt;94@qY8r&Uy6Ske{r}Cv~Eba3(XjACvLRSxA+rP;1eO3<2(nce;=0W>HjdIR? zP-mFi!Eq~ir0z0H;70qB97nQ^_9eOB)h1pk{N{XT@k!%Lzk7;=oeQ1yJydQ#aIhjA zq{e#>LsMTqqc|4r6`JA@J7T=kB=p@}$Nhp@FM!6MRy@9rbq&>IHzoX65j@v%XKs z?FSCE2?wxYYki+!XzEvo?9!!|z%&!S^yf*@ZeP%s_|i0%_9@LztV{F4@AEajbaaUo zOV*C}(D~Bb4vyRXvLi$412;N1$B}HKeN686qUx!!O|wB4KkM9SKByNU=Ywd=eL?W0 zx&6SwnsD$?i#tafn))x}AwZQi^Lhb)_s%vG=u3QQ8cX|_yMETF{%eJYsIT#*ExuxTw&fK3?Uhje5hdjvOOW*F$wO51_bE6Nd}*m~iuNd(aMVKK_5+7{ zgoES;J@e2x#gc2_?xAy*P2ZD>&&lfF(3kkqG?w_%rP?)I(jCA1AIsoN*IpRj@%U2@ z;i2)Rxg8ug$^Qdx#Fyqcl5ND7=6=uG{C(URyvL7feCdSz{kP8OY52zN2M+ZKhYT*U zht4Ukq$V}tSj1U2#i3AS|G<-|Cq#T{8cTfXR)hXTTj2LC8ejVU{0kM9{K9uO_bkbm z=5}!0WL*y6M(5@@l5ND7=6)YJ>CnH=1P_tl`=H<4tj(1H>cSt|#Fysw1BV8LgM3F9 z@zgoRl6#7G6>*kLaY!)xxUdfTraf3I`O-9&_|mQ0zIrvOu%{^hG=new)_TR+TxjRj z_|n`Cj+@L&1#Wb1jw9Jdd};3Y^%C_ync=+*(dtJ$?`wHu3flS7 zIyM+nr>!E_XQvspDIA}V`fwa=>ztOpuE@uxa}y6Z)v?wMw|72bvfsIKId>sn`~0ep z#ftih?*4gwKmTcyQE$7LVLP`Eem0`(E#JXcj2a4Z&8$e~^&uWLyOPoqeFx6>4tWJ0 zFpVW1@QHliV=iCv6~Wmrji>_N@7Uh`3m-?@tl)-@-O%QCa9m_PYT!aVV2%^nMm%8d z_oj1)y6tc5E1nK)E*3hsQRKX`so@*9A2>879ORnlM_-*wo52Olygo#me6FS2qc7Em zps~aQ9`?iIdj+ZMq_Z3_JjIpdS&qw z&-d=QJ8Bs<79=Ncq^};stM57TvuwS9U&GLs>O;_2T0e+9l2W3OrJuN`)ratT5z{KY zrcocFib}`w zsnOrosG%S^I?=v*jBffqPw~&2nhSkTTzN0+L(o`SAGomot!4H$e&Uu^A7X&n*PIKT z{KTT5oafZ_0d5D!MP6UvLhAz@C$f#!2e{wgt0q*RcGy>3Dl===yS}Xz$+te@tFI4m z`+t=ZDfjXO^riX`G?vx}ihQ=e?2X^QYxN;qKGrPh z6zZq*khvWkH#t88ZnQqYaU|PleSrJjXHvqk=l6ZZSf7C@odnh+)IvB9HD9nk!0iVP z&FH?D=N@g;Sdg3+9E)Of)BCJV+x3CW@#S zU>AbjV4z}m>#EqT*rC5QM}Im7``_-DXFoid`8uWFxDm0Z6(`je}UTvKRc42 zLUZGSrxxRiGjv>=VrbL#w78fzIum{KtNIW$mg4u=DVye2!0#@-zc{teYb7lz?Pjsz zJoKJaeTZ&~APjJ#YjeA}Ut|l#>&dN(lv>;axT*RO;(X0sx1HMuKRc11;;i+lr^e+K z97kM>xWcA>R*(D?k@mz>HdFN>_;tCT;u`RiuEqW4cGKJxe>ac|TCNNB)bt?)|GuAY zE7?x_1Kd9NDUqMzS^o*`7d1u|f6IeBwHVmc&qH@JLLZ|q=|j+1ioaI|K3F~#zc*0z zA@UY74_Ke&sp&(A_XcpH_?p{Ab5nfH{oJwsR@)&HJT-j?u^%*1x1HMuKds=W`1>o) zt`6!o<3+BMxn7fA@srLe^1Rp_w-$Y={w9qj-cYN3+x{Mg{sPtCJXOJB@}&KK8gD4L z)`I%?=IXix+aC@2^KOox{9UbkDtJNAV<6s8sLPQ$qtT8j=3PnAB_P{~H?*i^?KxKp z`N;#;7QOiue5tnwV%I(H;3s!>+utTQ(m@i>RY%=7Za;A7OgM@04b*+(_5%lN!aulO&UwQp~I!k zz0c(Gm&4Tho2e&{M%~S+*Td&_aNIFJF2GFj2C-5vDw$dKVco2Clh`1wR-Te8F)i9mUu&3|3#Pf!td>O)=653 zdiZSzeO%YQs=s_qt%uL;;JAtR18^hW5XX^hBi<1A`}HQLDdTWIQ9XQ7KOfI2@rJnl zz@aPQAf9n(5)f}l>>cd$)3`gP?}w)+=g!%Qz7^k$t6y_`FQc9>+Qb`j99QZ~d$d!E zotav8QZFgM(so)y2|5w4S6}FddidN9j+@Ar0&c_`;y99R#2ey%m*~EBY|uSFd9qrM zQ{;Et_tSji_5+7*go8M%_SS30i`>k+(0<9L4r!y*isQPWFV!KXvBVon?DF*INz|X; zmRV<7zNOS~6*wvrGlgRi}lwD3rdcB77hw^wCW{pr1&jDOb->vL}@ z8~3#@+HK2x%e@u0i2EA0iSK|4PEyS_EqpyH2go+ZXEr`;;Ut~#{`1>CRIgVr@EQW% zsp=cIA2@Ue4)Mx*if2XAYuSr$s)5!ZjQ6yRp->}`g7dL z7x7(%y8axe#_KN~9<(z+tLrcLPrCy&je}Kx27izr255Etx&Oe?4iKg(IEt7E+!(6s zFZQ&yBL_esOCZ-mc~2@yP0GKbRmOXbIt{+R%IClS@rg4^YHD@;YbEBt-|mi{Kd`Y> zNJKiGgRW{_f5CGBPmAV?ojg2`vM!X5;5a_2#YC(N@zBpLyLx7;CvcA_c=9Xq5pHa= zAKn$VQeA(to$?VzMGZ)=Ffvd+sp@Ridsi?p9`)g=uD?a{mDq?uf%5R^h6RG46V~w3 zE5GJ0dR_l-d#_m(fer!H_2>5mzYoMdNk_e|Kff=Y<|}+bxi6@$|Aodys|+fp*Y!XC zsBXsdCjnai17T132z;G6{#eHzSQha-2wM{M+V$ewE>q75P}Y(7fP44UTmEK!pjOwP z#!@~){z3T$<;3sL)bHCPPn?&`TpOr;&+>KPxC=jkJLMzrbub*SRyWZnP|IT(lPk$W zxeq8G!E^8NH=)3Z@>0kb%17Y$Ro)kuz9~3UKEm_(9y}gjzQJz>-xKfS?An`z7$u`Sc)sATXze+0v(CGs?Ju)&AUF$zY?g$ z72FPvyZ9~z?i5$>bs*a)uHb%m>sF`aC*0pn)wn{evzzW4w;wq4Bpk#U#CN^+z4*3w z&c3eyiNZAt_D5feD`+gm6;|#OUe5g%sMYniN!!$8%vbQ)RGlqu2ggnL3EU{I;5d?P z6jyM+)5f+t@ML13rn4o!pQ3c%xc$JP7vUgsrOWHJ?*%tvLiTn2D-@~ucPsi*TtQy7Q($ zO=nAdmnH>jI$PX+;Lw|J5IJ=1^xF4A({^K^_HA$aJzeyAsj$CogS5DU#!_7Iakj`pD3;n`PX<(DX!r5 z0|y7-Al|d0Mo40i#x)k-to4I5F0<)-R(x-*3(|PkE^k7v=EPXa>!oWE?>cj3@*j`& zL0Vq#?X#ur?qNLfuDgad*>MSN;$74AFU)-_d|G?YMGfWaL2~!0*P=FoPds^W znGLzn{!+SYdFNa`B(ZOOO}A}-a>BjsXj?q_U{yE53n_z>l*Z zvX%$Qksb0bumIndeA`?4*Tr|&?W9WA9(cAL69m0IBY*eV`5#j%Vx7J{*)&(t9kB1x ze0?O=vAp^07)SWquuxoL<~+1jT>+s>AFE^M)n`IYW&R?a_DA-wY$u(lad1)Lt+4M} zh4zD>Q^4`VIylk&EcTjz1Zf%s##$ZO>oe4^^{rJp`qqt{@oI2vccYFi+N960^Dt8<2jJ~ATL1T&c zkUOuGFg46gPE++dCTvLy+&#ig)9c`NaNIswp+YKfBi;kYk!&O01NS@rdX~+DG*|gc zU`V&o(CcWa6cT*ms_`DU{lLMEaOibC*6??}UaMWi7`PU36-;qhR%T<$H}s`C^fZ=u z2iM;8%czOp8>)5arJw(`4vvGqf~wcS?cliGc0*y|5qb@Ljw9Iy+XYX8`&}k~j^xt0 z-Q-UB>&m4bb~W;z&?cS)w;wq4Asjrv9=tzur(UaFoYCdVz7GBT8!ewtMqkqFps~b@ zcvLGlpcxz#J9UdOq-Gpgo9JEr~hB1&Ebw}a!RZ2t@bZp545IFfC|o8W#|{_Y-i zw3?f|Le=ZAP(B!{>%MXOfrC5YFj6^uzv8BG3my%_*J{=5>(JK-zT#X8eGjj4YO3T_ z&{*P49BiK)X?5964jvuhehPUNXOes0Ty@D!E^|4jr^1`yc5vK8UNUea-UP>yY$M(T z_xrT-pR?_)-Q>Sjjd>OMd{7;?i|!k@A2@gr4jxlm-#(36{=}Oo7=kLLoz*sMH+?Rg zu7q7Uj=offp2iYyV#!R~Ip0yse?scKyi=cAOU2F}^m0XeT>URomAndW2ggmEvjR8b zO>i8^HsVcizrR*59DL5tO+N4M<<<~&=$lX6FGcz5zH$43gD2tOwexu9Dl@%SyV#rb zbnoJ-377CgKL* zM!X7+BiTkg3hsBzg^wp~C%DOGNuv+An^_xq^k}=D*to7&4%EeG_5%km!a>vyuc+5* zf7}~i&*REJk$ZN#(Se*fJtpwYGEZt^qFeY4|{S0VnstZ>uv zD!Bc?!9X~OZ@<=ht#*-{xXexCN|@dcP4<3#S_OTNdxn;~gZghYmUs`1rp_$gy%lsK z85c`xc$JPFX2$F!Cz`>Vr5YIR?45$u$p3^Z+ z!f`FiZ7{{5-c_q()zP>5iv0y7)S;)blvnY3Q(Vw;Xc}y6miY4JfezARDec++pL_!PyZQ0{;;Q4(m+Fqw*viU{j5^~5 zigxlJh~KI1`0;xU6Le@|V`@LDZg$)@yT%n#-rXvW>fs4(u^@9r{w;aT-f;cXf;MdnV%dI~|R6 z$Im7IUX?k?pw%7cc5vLpJ|l3Wd;^Xn*+y|U_j~YL|M(V}UUJP0=kis%+8BAbXj9#B zZa;7c1P&s1b$6a=XO-Yl1#E z*#a{~9L?v4AHn!V_#ytLJ~n8ad+}~=oPFJKZuftWR_`)*f*)$#adA$B_TqU($F!{0 z)mRq-*QU7naK|!Vt3NW}Y};6O+*r3-pNsoRwon|*=Z7Et$d6viCh>2B7S{?a9SvG+ zsoZzOFX48_{IVEyaF{`MRO^6?yyfBgwdmRu_bv<09dYcZLH_LjX81M4ibCHPeo-9D z{Ulo`j^*>ij}VwAo*!`!qLw?wr6M*QYS3as)92@0KbLx$=u36QX)ML1sXKDj9FN~S z*6y&Z@)K)mxntYG6UunYZEIiNxEFEh3@h9@@Ppz`?ibBXaVN*grb|q~sCfpthgvsW z^hcX)=k~$R{^X}Pn`^4q<`x`^7`zLVKkBf})X%mfS|0Ff0&NTL)V%|dpUuh-r- z=7QlF=!e}__+skkmqwPEchGm!w0?Fhvb;E)p@+PT>ZDPZuq0lmc_3{dTVvV#W^c*qPUOSMRQZ!$Ndzz$o4Q4@w&@l=qgC)Vkq<=VYPV&h3MrgW;z*Hxhq)59&3gjc4WA*UKK^ zcCE!1^rd=cG?wa_MMjscItjl|RnL2E7T27bcmQ>~)p}+zi$bQ`#`|jg&SL-S=R3XH zR}N75oPu|by5CgKZ11e)^V8AJJkUn)JIOYxXO>(!dBm@FzH+KsC%i_-4NjFjeC3ym z)-O)Md9nB|K%45Bar=S85a1B6e9s79uIn|T#o3*E_Vu#Ge$q8x?OffztVQTYjHPpR zx)zGMd3XG7|JPVA``MSXFV0~+ovS-9d)o5|+I!S`*>rt6SLb$foW=PtaHe`@ z9Cz}C&eb_i4=rqWhlTmdXVrRU;<*{AWAP<)-^+bhe6@3Rvd`pPeJCK56&%Hx`%AqB zw2_OIeZB0swFORvqc7FVrmuIM zb)~CYt^;SPXC~qVw8is>de?j%uv@lcIDV&kW-bdNMq8r2U#)Xqxcl(kCDCrD*54*y zsh%0fb$5}(qFv|u%86<{Gf__sZK`K>Nm?@H{vKbgo*B)5BP)7z1=P!~>AF4R?_FOx zcQ>`3+3C2qe^>wZ)#{lIwB2;SV5V;O%*2vCPrvrn>WgtZ`MQetCDxVdnela| z`>pcxjZf>Lo>}qcabupJ)p6LG-k@aVefoUlE7ddOxS#xOvu1ySuiQuFU$p--=SQ7H zUpcpW{w(%`(58B3G=Hs>NzP?a&+PKJu2Fxm4rVoS$6113!STa73K5M=+x%puT%-#)&dNFvFEYoQVNY^`-%dQ?B0Z~bo)zdXyQ_7}@1?E1nG&tnEvIV} zPk6+qCV~B<{N)R3-SUpthkkxAQLkG*YR$_9f54Zgy5-Z0eTj0jK|OEJFsBUgv42Fr z_Db*SFXzu}c{me%Y?1HKMXy_aHkphGMD<@pV!aNOo23r9HXb<3M29!q>t-CyG+6ZXUt=IhMy$2x{#3B~guYE z<`(b1M%mXbFF9o7hPN;MHJ&hyC7y8KnvSD$>559;8(#;G zyLirlJMo11I=BzmI(XL_e~n+h-6JFMFK{BB@V$qH=8ObR#1|)D#NW?4|Np+KHuB^} z1!v+3&yGFvyna8uZn^k79in5y{RfUCf#XEwc@W=(CH1=H;+-9oeckd-{=xkZpfA-e zr?JEno=~Ff?i;9+Pj$17yBN-op{209mqD~33I=5<}8r08TU8U zEuUW&6(yGHb<4T^z+n{OAfBnVdTnpWy; zyvEWG4JNnN>y~pnIBud225=*uFvpQ>Bc3q#yY-i4?yZvXTrVrzbW|+|BQNW;zdR~! zMe+4D93*Z(a0n+HL~Rjoy|%Z|NIL1SadnlyGqeva$5ss4fxi7(6{|fRb<1fi@r3P) z^sn&15Fq!8pStD0=e;HS6~8>*dIiXMSEn6D-EwXR$4%s;0yp9ba~#Pw;t6xVyWMGU zu1luBeB=Irm{)5szf$P4SlDgHyZqBhc?wM=XP-1#P<_$qx?gT zBiTmzhum*@-sEo)&F~zm`G;aZvbpXXw;wo+Asj?3ykD>FEuQ(NdhKvi93EB6@!b=B zDgThhQvRXS?BtPmP$>8n5Ol*DTjp++&6#rZUr;{4QNJLb*5 z79f{a#vZwXHpTgL{j(~6p4-iF7V#x;rZ}JDPQFl_&v9z7>2Z~5v+y0D z#`)qonyq8e=Q9GdIG^k@iSr|YqpW;?h#bLx0b1;;q_f_enSI^z^u`HIW}+{} z`81Z|{9Um#pI-bMAfHt0mdEz2P^drJRJVNo#;VUH;7+<&d>uIMV=f05QTt` zwpbr%h*@4~^d<8m@^4jLEW(-M{88!MT#A1P(BgcKAMWSzv<@QYtCU`Q+nDnZpvBV4 z`;FrKGk@p2Igh?nx17dOod0ckW!pvgo$8jCt~@1lS4q8Y`Qckq_X@f7dh7R_Z9aSl zd8AZFoZHFQRq(d3t`z6p*cn#}Df;f!0BAIc1%rF0`uZ z2mcTMp4OrG{>QGD%LYq{TgtRk@(AW$I^1ONf5DP#`wpd*{F06bPuu<~6)Y83`R^hw zK%2PzacM{;$r&tBzQs|C(EO=Cf+Wg2$knDysem`&=aPm%I{EE=fndqzN%e&C z*iRwsC_kXu%g%Lo)(e(?s`chMeprWzSO*ao-N^AMSAp`*+FA08*|pym4tC0x@4a@$ z&hNR&?}W9#B6IEVgm zIx*homy;ZJ{&e^PoZq|r+}!ujPbcsWhV>bAw3}qUH!JW&IN!P^Zo# zA=Mo10rMA@QuyB74vt&EgnJz#fE&%taU|R5oSXZ-uIBpM=QcUXD^h|?hmL)l9 z=j_~m;1ESPh&>7yz1F;c!tLhoH#%u&)uuT7o0wyCW%PaXe0+k!_olJhx%Zr`b&<}_ zvR%(QGp8=?A}zPSKWmedv;484RmM%6gL6ALZb~`l62Ohl$vKW>8=aGLzipzs54Jh) zBb4!z zLs=;jM2 zIa%dI>@Rar=S8WWwQbn)&=qGxb{Yq6Y1AC+)1<6o=~P`d3(wzI9c;H;tup z?_S=S`SauVO6|&S_PUAk884Ze4r;ji^iw2AM{?FSB12nTWYu}-fwFP>4e?Ca=fRXb4f1p01N>*&*1 zI+wROcOHt=YFp_w&3)J zD$a8I_2(~q?A2Kkyt=BoZ`^+1FqLo+_w+Gm?JQiJvEo{E)^2(~jLmplpdtFESbm?b z@V#j)ott;gJ@voXqj(=g)mq_q#7g>l^jhHKBhFeKeQpQGty%KUhJ%3{&CPKn+vuF1 z`@QMppmQTTILkLwzPE_qJLtW!a>|?x1BXkwD_iK@2s7@o8r(h!uHrN^qsSL zg{#8%rm=LcKi_fj+Hm~-bIlzv!>uIm1&clR+``|NG`oEY-<#XPahvLiBwXM|=l&c= zvW?FDx!?2tX8d$@cb4;1G}h7Iw62#_(nI%++YcP36ApcQoXgkYy|c!NR^(`Q_Hq87 zbAQ@L8~L{V)i&r$b@XX0o%?rKlCnDSowGb8)mTUW!^*@vS#O=?*)gR=9er*G$L(UV zvxg1;#_lGy9pTj?gZc)8p8_R>wL>E(#&hQm>m{;m`^5zi1yX-sy1g zsSXm^M*Q9k$NIfg(FP_Uqxj$XZov*!gp(!3TJjdi4w;wpnBpk#u z*+Z`_jyLSvH@G*&Jr8cODGvFatNRZ`-{TRRMBQ{6OLfzOK3MKOVdo+r$uQPUx4v_2 zp*7l6H=WzTaT9rzz>VhSIFfBtH=X;v`|Ya}GjTt)O}*jRXntEse2?HcrMl_de&8^R za1eWUelGIjfAhxwD&I!hR1&P~6lZy6B^1<8Z~H$uJUIWPF#ny6W zs;$KRo@+U-R1G|bqX#w5y}EK6V}3E(v=$VPX zjc0nangQEQ?}tm11J*Y~--bRv&bqv9CDB+V{m~8W0uw%b?l;Os-k#ohVE$jNq;;P4 zf0aTzbOSu*=2NiTgO(4+|p~((dX`R1eS7Ad zacd#sW*SSp_lZ}21RL=CUA4}BpSTBupVab@SDKsKD|Pm{9UM2_5<0+JE5la9jl68Msg`JI9IUR^Z67q?^S3n*Ov;-Aeo2 z<=UA>y}Ej3k@<4KUE5#a_QB6sTuJyT&J2U}TJ?5gqZ`@pbJt>gWxdJI%jv)S??d1A z+ss~fhE5-irTqn$G|&2efgbYb{q?(DaOo=b8BpVGE3{uo-n-9Xe}UU!th27y^ryLL z|A6~t*LY;&`#bLP994%;)GEB|uI(Ri``~9B%oqDfqLxLJUW*=YhP|b6EsEhy@9T;~ zcLsJv-wpnEM4f#aOZx}k#?4)BHOWK%mzB{i--<5M=3a}#uTS)l{{-X~b@sU(92c=) z4_s(&juY8N`v=_bjBov{hJAOJ?W=v9`gvFvN#Oqj?ZkrJYJ_%?xc$IkKH(rV7@$=^ z`v)Qx{^p+T{(;lkEd@WKFV)$nv9y0+c`l>lLj3+tt+Vfb=JM2NXcx3odHLK9j+>~{ z0^EqV&v7K%X#arwZ5LIgTh-DYazg7CUz9rgf`?Z|_l?^R92U@hFXld=*Q%eQY+B=5 z6vLa|_Xk#&sd^rL59W6~o(>&88cX{Jd>)RA_`3yM=-M17?g!aI`vpROqPcEsjnhjz zS)jdpckIC4$Oq^4!Ow-{r{Ln=@X)yLg1gW(`#Sr_UVaPCL|>}2Ph)9+AkUXieogUv zfLdpN^8C&=3$J@NfPT^rURe}*K9`e@TUw7m&#AN*WGehRKg1-*8?IP=4`D26umbA9Zqsue6f zt)6l>B#uT?L;fpKl|jKCLDKdTI#_Q(=_uPzukY;5ID5{;#N?38vB3MDu9lm}O= zdgW)0PEygQ+is>e^OPUhPXDmFStlb86gbhfxm`3j(dmSZiAN*WKeu`%d8Vi(@EU-W5q2)ZA`kAM7R)Q1yrXI@EqyzR5Xe{Mh^Le=6 zi;(X60)A6IHOGPGrhIDd$LmsF3&L*V??{i&hp+DBNCqFVX%6U7Pk}63RDj@Y&B(KEAr; z?kwzKaJxi*Xj9PKWDD)D@cH4#3b-J?myCZ)eYCjHxYyq|+k9xBI-WZ>mhsW_)o3im zh2@uBis@0>NB&mX^Vzrtwo==zzJ^z&eB{Ri)3e4SE}Yvwz5DOaedXNi@&+pT(9b&6 zG5`3fuUyqHW01=eTdABv$U|Gi+lT{mlvwLr9qsd${T@4`O}0^7IDN(KW%XS8$}Tpk z#jheC+H1DdFnmm3xkO5y)0uy4Bylb_wy)+Jw;wpHBpfOz#migyXt9%c_eb>AVl(Bw zrTBPygEj51pfBmK(O8NLL;sw6)3~LNroUF#r%T`JXp{aLw}azm)N}FC=H@t(Z4?)B zznu@q?6X+iSAIOgsJ|xiP2&4%zH$43!z#i-aFqJ^Xt9%sQCIcVVlz`5KK!oIZwva8 z{u+&?xUkBrT3sgL_Zw?C#wq{V+Mw??zgQ6$(pZWM z`8*sK!D9q26c=)wXl~dh;zI7%-F3I-4}R8HzI%Vb({brtjrqOL`)amx``~9ht|a^v z@4WdwT5Kh_eYh5#{hIoDx@sev2k1-sYc!VP!rwn<6qp+4BOh1e!lz|iFBs6ic*H}* zh1?EBj_pX8k2V+ggKVKVaZirdS32eOkw3Rx-|}hAuF{u3DWhMZP5Im0KKPjcKgD}a za2j{`Xt5L2nZ|X>m$_w}ay%))Tl;T*z@E+bAyNe#e)Y8uq4!k90LTwwIAw|Cgm>+-m%}dqV2w~ z^3&=Qe;t_SJTg+(k2z$K{(HqZKeq&>+#_8ZM8tB= z+3TK!Y)k8U4Sh-XgvJu@_;$bGEnfJYbWdL9Th^)PLqARTgxkS!6Lr9W8}W`gj$|A0 zj=A4WF12bnb(Ei+`ftDpuP(i$8cIMOuKULA2M+5A2f=mwscVCX+Swzs*FD*Dc4+e; z^d;UgjV0c3y~ZteKY`8&>7G=rGo#Z#=#Y@^3AcmeCgKX|(-!Hzar=QoB5)9Y4@8ZNcyNQ2KVI(5B%`KFfGoc8pj~46_h3@N z*tAqb%TxN7B_#qs-lr^Pv%ZEhw_{@A5dezc1($Xj*RmS&Rh zW1)WSi17AXV$jb0&>(bDXm0TyOl>atjvbXb*gHVJIwbx1lDo~Nq^!budV~bX_4d@N zeQsbgWBmcN$#!lZ{M%mXD7WbRmE%pU4x8MuI4^dxdgl^k}tuZ$@q8%7ySQQOh$@ZcO!Mj#d ziQ5N1H^R?&B~B6F7T{zKg#63Ha7SZp>ipU3o@{ICY$yYsH{~-HdvxaAnq!#vdZVd9 z%N(V|7t?Bgc!+$8?PFFqTIXo2Gl6T1_aNGHhx)fVni?Rl4NC7_KG9LyyR~}fPxry2 z4X@vHIkZ9c8UDtX!u1EV+Vs-u+m*NA(=69VLd%__83@)<|1=r#ZE>Htk&Mw}b2 z(rtAe5?8PY+N3ulzH8x&h;xz8II3A^%gqG?w0y=r^D2hT>ZR+Ri2Yax9gAwOAJbw^1_amldHUiV~vp=%`<`Xhfr%?EUS*sPF0IP0n1 zJr-O#q4^gTe+J?XP-Roy_?O8JaryU17Qn+io9-b6klzc2WGAmY;pI#&F? z2>IMM>Oc5L`Ha0S{qD`&is!h_OgrVi>vCb-$~+Tv-4nuI>_?45UMpK?jz88hiJk{x zi?y!pA!*X1yY|*9 z8=(udbbgI46Ohly*MZ|M`~dEh&&byyEoo$W^6vmm_vGK(UpJNefbtn1#}{8W8#s~f z3Hd_#jQqZub)({T1!u}<95}o9o_{i)M^*Pk@UTt>$j6J|#d6I-;{F52&A?H7PYbQK zP+c2DR(ab`ON zfx`~Mp^>r)Ssa=v$|m%@p2oAT+0gjPMo-bqHUHXh^rg6h#!_4{_UNT5Uy223amDa` zmA4f~o8k&?2ggn9V*odro8u^W7O;)t3huY(@?xitYzfrj3XvzZ75ZeVZ`^+1uoF1Q z%J-Ali}wi9a;n50=d(a9$4j{%DBosr@;%o?^vzpwVp8WFJtP`S`DZU{beYi)zaLMn zm&1Km52;4AcH!gQgXEf3>oi<_u!pqA!Oz^=I!N|wTx{@6e|zcL)||B}qkW=FWbIAP3@(oJr5TC`WLu+Ki^Ows)z9$;ve(^mF+R)pY2P?_cm9}AbDv0z8lkG zdq~x?3SV4>>nHyy)V>nx%=9{3?{h7?Ao<^`eeaYyGaNsx!!B9}k;@&WYrz=z@O7;k z({*T2q3+lXeRLh2pLxo%~t9O%h9i2?OrFRWO zgS7mv3mfk~e>6Bqew@!cTG7!d;2Gjm1?_jLj*c<^Pq(ddT4wB(&>-3FXbUGrM<=xJ z5zEyI&iO-&R)&twh#xf?mT80gCg-dC6_A%izENJ%>ho&@UH-$m$3`8keA!Og(s0xA zc6EbftFOf;*Fav^=U>m-FRvFQdv`qA+6j4BD;_VsS)^f*Jn}?;Gey^=f$|3yZOZFP z>wNuDJm#mou5QK6d)LQ0kd6+=59_cS>kzLz4?@dnk**aZp7jR0mW{FwRA**Wxq5SR zpzrql<)i55qId+A1T`!C_>FDrvw(@8 zUpM={`_*&7V=0c#@_2S z?Ika#*qW<9Od;#%_qfSf8$y7s>?}eQ`{d{_`?RJU9{HD2FiX1*+y~x zvt9H2OI&u4t9H+Eah;)=^s0??r&GFv+tg z!Xd^Pn2Wd7qlqihF!Cz+pe(up#1Xowiq8wKE`5Bes0@Iy#?@JxpkdzNDi=W9j^={(_ae{4cx8 zWmO%WC2M--S#!x%)6wB}aNHXHtMca?aHI1rjw9Iy+r{}7_j|~W7O{>UuuiIuj@Z-c zsQbq42Mz}a2l1@Ean;U<1P8mlt9IsOdOu{28MIqQU&^bbv2?!G@zmq+68Js8s-shR z`?2@!U%P5LI@}J9+tl+eCpH2%I$Ps7l5KR}#r=+0J-2t3x2vY36Q=xO>#O_5?FSAA z35P35S8uuH*R^I^l}A;6T#L@2OmT=TVphcgeMv`$#?txM=Vw`Vk@?)@+7U(_or?FSBr z2nVruQ(f1Z5qpT^Ts3Z@DGo`ak{17}rt9d?SmH%~YdYSu8GbjYIyxak-q((WHWBIQ za633|=QBF3dkox&7s+uX+lUv*{qA+Sb2GCQuJTzmuW}Ajr;njcoKJ2)a5zjj?D2rW zR7YKF=GrZY2`qQjxQnJZ9IF|5It6{#4vQQ047^AhOT5VQOSkvTiQi|XH*USNzO_`U zcKttL9o*zgwJduKM$KSu2ghyZjwi1c0ypAKavaGv;z@G9lP)(mcRuJU`)pjga#kK| zNyLqZT(!JPZa;81LO6(+&t2D=5zpuW{4F!7cTBvG_MXEwd_dn;?=9VwddD=Dc#~^) z9jxC3zxT@OP&x%VI#1dUKDD%un_T3`+z4wh`}=`#mZ7PL+orUFBMri=KS6&`J_{N1t3Z-X*smI2X-4Bt2T3@e^zNDi=V~KYe+Wk+}jWgZk-)2S~oeP<>Lo;T$X*xRG4vw41g9L7r zSIKcC+lZIR{jQXw_K_)t+~j-Pe5b~xNz(WVxU&lDzH$43!*RkvoVBjiwPu7yi+T1s zI?GxeaO#h~lvhb(i5FSLZOF)!HEwd32&0b9u!M?3>YzPa)zRU0aNOc&-~|EP#6AG> zQ8|ue8}T-|-yhDEy!WUk^bEgFn&MF#{M4;?eMZ&NedG262bpmAJQp97hut(T>r+RNxeSdRtmp0=plNxENe?x2?k-Q+L6 z7q7;Hx5@3`xCwq7aHF|7j$|A0I=SEBX-@VdBvkU9zM$#eQT&XIy9Deoz_n0n%;pX(@s@Kr}eASs~VvF zO4ZTfc5vLp{vdE8-Y3VAY$M($_uKYcx#ZCvZgM$QM@M|0qD{VW`+>tr!eO@EYqRFi z)+4TF&&h>*Z^t{IxR<6l6#qW8Llg8R9UU4=yv}0jpZ6_G@qq8Ua+27n~Q=Toi zgX1Rjs(>5K&2c2#hzH93UShVm(_Oq@`oA~o=m=gC-s|KWw;wp1A{@k7!U|n$MtrN> z_0+hVrZ}wolk373^z~S9^GYtni!@en3efhdV7bIFMwj<~PTCf?}xr0^@P(QYz1QSe5|HsXz1 zed*?s_nxP`{)Y3<=6j*j6QBHjb`CGOf4M>l&%hfM@0pysZ`^+1kW4t-yYk)n=}}#4 zMsOhid1f1TpY8nq%@gz`9UU4=ard1?w{4vedG26htt4esIs3W za>O2bY1~TV*>(0hI)*A^taqX>>FCf{io5wd92aqp4_qj&<~Y&Z6jyVUbpYWYAf zxw)#NBm5YoUyH6yad&X-i=iifdTBa3+%AzXl&a57w!j1tNAvmN$65R$)=RuY%jw!T z#xoEvEjCsBpnSln#V36pqc8D3X)MLX_o};&8(9`w3vP}4dm;btSP!#JerSKMRs5}z z4=Cyn!w-skxnDFl#l0M-DtoP~b)W4eH&u0e#C|&3#QWs-!OwFrU+fRMw*eczrLK)5 z&NOFvX|bp2d#b_E994#(@1*1rZyZohh{jUfThe^K`|uV9x#QFJQMZTN=xg8uAu|B|s;$Dsu*+y|M_j}omABnyT(MPhn`P9 zkK^qrYtKLT(sXo0y!p~gizDgU6!%VZYvI3Spg|sRvHUni=ZD)R{_Z&F8bM?W#j$*T z_;HceOK7i5)3t1jx#C`0Y-qY(LCT>4hBab<&CRn_4U zwxUh>dt^Jsz1%+dd71naYkv@0MPDGi2rV12MwPPH;o1A7)lO&hB^@3bOL1jzxkG#B z9WZG5d@sw%lM17q5_hh~Cd5Y~{|-1&+{o>sxhZbsehOUL>9&>{`5`p~ZPMZ4_QB69 ze+{owQev3Rd_)T#k$ARXixRCp?Y2LH{ z^1B)|9Ui6ZX9C*z!Y|&kI^slTAN;&ZehQBDD}xpn3J#}>L5s~ypU*0t>{Gg;@A=O* zBA<`OQe4>NqE)W9FAZ{kHJ@+L@L}i8(WZPpZije(T+`>Gxhd}Bem$FdEaY~mL4Mm} zuK%*kj*`e*9irRL?Sr4!;HRv7?+8q$=$a=+&YG^lqWDSY_GMzc#x6o%svApV>D>PH z`z{UcPS*8dPH(>BIudQthdGz`VWXZybbXi?xkGCdL!0XNhVMJ)95UEfZlLP&Y_C@( z=tHQk4Vz}#iC7hWA9D4*ti|qR5v!RdGf%fXj9!-@|Di* z{qrpwIKQ~BR*%>A=l8P#^?c>lsy>X!H$|Jy?H%jC?vUlE>#PXAGpe4VS5+9L z>!NV{unyO09R$~YnXYjnI4yN`4HnaN_-(LlaT|R}ABM(K-Pn#l@5ipe?^~ry2gm=h zle&-ZwEx&rU)iF!&kZoDrT5K##nqYNEALk8-4=-p%JXcRuU5yE+sW5ee3xKdsctM^ zXTpK%#xAb(tZAMg9fzh@CS2_3>??OldF8u(MGuL5rMj^kcbk_>J#TmPmAzDbm|ZU) z@36Io?v+|MR^%z6O?6{weySVm6tcLjCDwuJ#&Z0y4mYq4Y0C2;IJ9oM)`+O-5tF^H zN!vAVS1n!dr|Fvfvg)!hWS5`3Mb$M4DeYUscfX(fSJgFf`91tp&MSU$&=cFtH0YWL z-tJXDjR!2=+sG>*U6WPKUi?~{Q`a@Q{rY@`du{#Y1*)#eK9`p*gJCQ2fXQ~^0ejWX zyP>12zkEXF0T*46So;L@G5+Rn-s+o^qZD5=pwcRPe~rhyIbVl4F3tVrLUPT7rr-g; z581b{L1SIlWI$A%7N4LGv!dC%RSG}+P2DZl$1C~E>r_6t$mgr5>zc$yf9&<80(49Q zM>bLT;g>q4?681;!~@>8XI)V22S53-bSO>X0UwAh=9BlNpQdX<*b@(U%Kr6z7RDmq z=uL=wI`nHe{#eJGSi*Q^9fd7Zb?pvuW`D^~A>K8)bZrp@N7^qB&YW504*EJw*lB$pJYX72JmB1a z`w#pG9Uad%JJwuV;~;%==(T^(HsCsM*gzNXfVmwUcfs=o?!*J;>p-><519MyzUZQJ z@fQAa$D_aM_K#H8xuxzKw;wp%A{@jw^c`LMLkT;NHOpSt`8}?i;rsINT;2M6C8x*ZvUSXujF&nskjETKOvalCBAjB_8nAtV6f5vivn& zlk3xZmMDZa>6&mmIBud|7jPpUFvpQ>BOWmK+k9k`nE^BWwfsV{Pcc*XjoS|#?hp>m zlz$ew_J{bJI6Zq^lmALwaIK8KL&D_)jT-lsXe{x7H&`F2(7L>?YqGS$rianxbX^l} z2ggm+j{t7O1LioAZNvlSeph%n<-m5_Pu2sjmMHMKmn3vh@SGA4nA;B=?h+2-jI_0` z{ULG`w&HJ!a%hO$NYnSj(oY?RWuPzVn$TF{0Y?vcJ+K&lU#04rEI2yhYDgY9kS?7F|kapv{|hkJyBh!tIs)1q=^ z#WQ~ufBUlG0S6X!+8%+v!~>?W!~@OGMj zD#lV=MAsr+lT+5dC&x?)(DX|WhVCr#6XQwOq;|>6VQ}{`%T@&Q)C#@f_9Bu{i&Hbo8Oi!o=?spVlcHu(~JO6rZM_@VegxZHiCH zSBg(LuG6Q*v>eqrK+`W3d824ke7fe~q%G$CkuRynd4w~?r@t;QDb#q5uIIt=!~Oh_ z?q`w1Q#4q5f^zLqe%+0^_@jd*;uM?S&lkSSZJwg7iWAK|7`)>37ST$JF$5?%QdEaT(f~8lg9)^eu(5AV= zPWG_;iZSC#3T)OksycYzV*eyCSlTne<6mf9J8Af-jU7`41xv)^7Ig*S z3-Or`ULG+oH9S}z6d!*2y{dOBDn0(@yKgri=@36nZcrdw z+@uoO>Yi3E+uRP7a+lk2wA||^kC*N4>9z8Z+o@h2bXx4d-+l6n2xXV0L$?_pBlqnVq!{Cd|@! zf5hFje|PPluKm-s|JmF>G&}pGo6!j4X6KZ-J+xmB?iZS!n{CwHu_9~DqZtoJw{X?^?a)@`yVjcETe;@T{vmg0vo)0hJe9HY`4)>=s<>xSA zcHu4E!+yzfu6uE+T;TT4fRS1MX7`eDFkzO<{Ug{8L$gb3iWC(48FK-`Jv4?N-ynkG-USD?oF;12Q`_sY(4zB?pc zE_Sc#DI>QaN3L>*=0eOK8hKo(dZFsWR3FBA%&ds>@qLl@E8>2Lug1?QKXHqzoGV`g`3@OU))BeT&7h@U$*!^mvRL>R(@ z?pW-?cKV2cT%a`;69iuBN&=Z z&Vl%S@)j63dx-lXuFHc7vxn0?ewh8CS$(05yN~ev(CiWR!zm*q=Occ!3P$i4jGIl} z=sp&@)7T%GP1_Gcv&SdN!)}A#w}JhrKQY}UjHl0bXLOR>ANiTxq~7<*9v(lr)D5%b z3Rq`0OZ8c*H?od=Bm3b~I{zu1Kbz+xKb!L@_k&M!{}|lv&KczSIjh|7^px{qX!cCH zTkB`$ObLcYd+l>^?sn zMrO|+gmJS4Yuz7Afsxrlz0X3u&kI?|hh{IVa2NHHZ@4dN{)?KwSo0Ta{!7LF-AkIk zg!4nQCEMH|R>+<1(rQ>|wv_dl*~>g1@ynbKmvKMjmra)mi2RSL$nEZ%yg%}9a(}pPj7+$1 zO@MV~Z?PUTThH@p1d;!Fx_m~SaGOx$cTGECX!eV0xyRiQgZO3x>+tR2o_~9r+g#!K z=FJ|zqxw6noKzr zcs}9}G{24W5w~$a)Y}ev{2_j4{==Q__TiHEL;iPpFbfc;=5@u{I7f% z!Y21{foyUAT_RiE&&y>SjGO&ufqP_>`$Z-U&AwRX9?h03-T(HNYusa+e@yeg)ch|s z|G4HK*Zi+E|0~YNJDgyhs{d>Cf6ad6Px5@q`S5?-kMjO?7Fd#;3KJFtqhSP3!q9@G zM)$O2IoIuwDtUi=*D*sbf#^>zf(Z*w&-VCq-#_T&`v;x4Kl;w7l)Ky%%}>$%Gc~`n z=6BZoE}Gv(^HViHmGg0K*IshD+pVu$?VdG2u5;4{%VxJb_s2QiH@WF$a;tmxNV&t! z7zOd38LY=FIEUv`?uUF2?cam@Gv@u^x%IN$&E)-&&uoL3e_o~J`$QJ>Y=ChK&gXuJ z&))|V7W8WMxOXiKE$Ds7&EolpvyQrb*pIjm=fiBh?*-g1vfzT%Fm6H4WcR{+|L(#j zw{L-LaW5*7oR9PRl}qh^vG%{1`=j2U_oI8=OU6Uo_Y&1}RnKJ|`AgYPb^ZXIKY-^W ze;Maf?g#Vw%VqB6X>tz4-24^Ai?and%2Z>uxxg(Dw3NZ`U+}b+=8L(M?6&TQ^@;;78L6K!*u^)yg%|qd>`8GUR^5>xyAMJ zsC!L=Jn5D+y4NPlxo#=<$GN2oAl~gd)?<+8Q|<@L^1WWR(!HMVgYU1O=MLxlARazd z9)LLahRHCppgax6A@2uoT;ll=e4m5`H)VT#Q!5ND7+EekAMwqVQtwlt_o>kP+`{*X zET~L|ak$mJmHQ#CQh$~DZ&Uwm>L0~^%o)}0-p>0YzWtC}T`!NicQi=7@90L4@8o>+ z-8t7ClPVWLyw6zHsrtvNe;oUfAIJ0IU7Sz3AB=H-+Unk2AlJZ{1>@5o=8j+F-jgYp zLG(>vozC(6y(yCKgT5N|*QozK_T&5eScemnU<6mfga!BWeu!%aK=jwHa~~Kio83u8 za+CXD8H`&nne%BI#5oVud;KBaALrFINWIU)df)nF&)0K4zI!B9E^w!0$R+Nhz2tIt zYG1k9eN6j5rv0aJf82K(>oE%+=lO^q*Zc;~N8G^uP;c1l@e`G@)tz1-*SIsXVcdcl w?e0vTpRnM`c`&jd%!eUta%Zt0@vIiNu|)EGtWo2r@4Rhf5%fCr~m)} literal 0 HcmV?d00001 From 7e0cdcaa76b18ae7711dbd1f0c1bc43e9c328822 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 12 Feb 2019 14:26:17 -0700 Subject: [PATCH 18/89] Adding GridMet --- .../compose_array_single_shapefile.py | 301 +++++++++--------- pixel_classification/data_utils.py | 12 - pixel_classification/prepare_images.py | 21 +- pixel_classification/runner_from_shapefile.py | 130 +++----- 4 files changed, 218 insertions(+), 246 deletions(-) diff --git a/pixel_classification/compose_array_single_shapefile.py b/pixel_classification/compose_array_single_shapefile.py index ef14d5e..4639f1e 100644 --- a/pixel_classification/compose_array_single_shapefile.py +++ b/pixel_classification/compose_array_single_shapefile.py @@ -33,6 +33,7 @@ from rasterio import open as rasopen from shapely.geometry import shape, Point, mapping from shapely.ops import unary_union +from data_utils import get_shapefile_path_row loc = os.path.dirname(__file__) WRS_2 = loc.replace('pixel_classification', os.path.join('spatial_data', 'wrs2_usa_descending.shp')) @@ -53,6 +54,156 @@ class NoCoordinateReferenceError(Exception): class UnexpectedCoordinateReferenceSystemError(Exception): pass + +class ShapefileSamplePoints: + + def __init__(self, shapefile_path=None, sample_point_directory=None, m_instances=None): + if sample_point_directory is None: + self.outfile = os.path.splitext(shapefile_path)[0] + self.outfile += "_sample_points.shp" + else: + self.outfile = sample_point_directory + self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) + self.m_instances = m_instances + self.object_id = 0 + self.shapefile_path = shapefile_path + self.path, self.row = get_shapefile_path_row(shapefile_path) + + def _random_points(self, coords): + min_x, max_x = coords[0], coords[2] + min_y, max_y = coords[1], coords[3] + x_range = linspace(min_x, max_x, num=2 * self.m_instances) + y_range = linspace(min_y, max_y, num=2 * self.m_instances) + shuffle(x_range), shuffle(y_range) + return x_range, y_range + + def _add_entry(self, coord, val=0): + # TODO: Encode class_code in shapefile schema. + self.extracted_points = self.extracted_points.append({'FID': int(self.object_id), + 'X': coord[0], + 'Y': coord[1], + 'POINT_TYPE': val}, + ignore_index=True) + self.object_id += 1 + + def save_sample_points(self): + + points_schema = { + 'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]), + 'geometry': 'Point'} + meta = self.tile_geometry.copy() + meta['schema'] = points_schema + + with fopen(self.outfile, 'w', **meta) as output: + for index, row in self.extracted_points.iterrows(): + props = dict([('FID', row['FID']), ('POINT_TYPE', row['POINT_TYPE'])]) + pt = Point(row['X'], row['Y']) + output.write({'properties': props, + 'geometry': mapping(pt)}) + return None + + def _get_polygons(self, vector): + with fopen(vector, 'r') as src: + crs = src.crs + if not crs: + raise NoCoordinateReferenceError( + 'Provided shapefile has no reference data.') + if crs['init'] != 'epsg:4326': + raise UnexpectedCoordinateReferenceSystemError( + 'Provided shapefile should be in unprojected (geographic)' + 'coordinate system, i.e., WGS84, EPSG 4326, {} is not'.format( + vector)) + clipped = src.filter(mask=self.tile_bbox) + polys = [] + bad_geo_count = 0 + for feat in clipped: + try: + geo = shape(feat['geometry']) + polys.append(geo) + except AttributeError: + bad_geo_count += 1 + + return polys + + def create_sample_points(self, save_points=True): + """ Create a clipped training set from polygon shapefiles. + + This complicated-looking function finds the wrs_2 descending Landsat tile corresponding + to the path row provided, gets the bounding box and profile (aka meta) from + compose_array.get_tile_geometry, clips the training data to the landsat tile, then perform + s a union to reduce the number of polygon objects. + The dict object this uses has a template in pixel_classification.runspec.py. + Approach is to loop through the polygons, create a random grid of points over the + extent of each polygon, random shuffle order of points, loop over points, check if + point is within polygon, and if within, create a sample point. + + If a relatively simple geometry is available, use create_negative_sample_points(), though if + there are > 10**4 polygons, it will probably hang on unary_union(). """ + + polygons = self._get_polygons(self.shapefile_path) + instance_count = 0 + print("Making sample points. You have {} polygons".format(len(polygons))) + print("N_instances:", self.m_instances) + + if len(polygons) > self.m_instances: + areas = zip(polygons, [x.area for x in polygons]) + srt = sorted(areas, key=lambda x: x[1], reverse=True) + polygons = [x for x, y in srt[:self.m_instances]] + + if not isinstance(polygons, list): + polygons = [polygons] # for the case of a single polygon. + + positive_area = sum([x.area for x in polygons]) # the sum of all + # the areas. + class_count = 0 + + for i, poly in enumerate(polygons): + if class_count >= self.m_instances: + break + fractional_area = poly.area / positive_area # percent of + # total area that this polygon occupies + required_points = max([1, fractional_area * self.m_instances]) # how + # many points overall that are required to evenly + # sample from each polygon, based on area. + poly_pt_ct = 0 + x_range, y_range = self._random_points(poly.bounds) + for coord in zip(x_range, y_range): + if instance_count >= self.m_instances: + break + if Point(coord[0], coord[1]).within(poly): + self._add_entry(coord) + poly_pt_ct += 1 + instance_count += 1 + # print(instance_count) + if poly_pt_ct >= required_points: + break + class_count += poly_pt_ct + + if save_points: + self.save_sample_points() + + @property + def tile_bbox(self): + with fopen(WRS_2, 'r') as wrs: + for feature in wrs: + fp = feature['properties'] + if fp['PATH'] == self.path and fp['ROW'] == self.row: + bbox = feature['geometry'] + return bbox + + def _get_crs(self): + for key, val in self.paths_map.items(): + with rasopen(val, 'r') as src: + crs = src.crs + break + return crs + + @property + def tile_geometry(self): + with fopen(WRS_2, 'r') as wrs: + wrs_meta = wrs.meta.copy() + return wrs_meta + class PTASingleShapefile: def __init__(self, master_raster=None, shapefile_path=None, class_code=None, path=None, @@ -72,24 +223,23 @@ def __init__(self, master_raster=None, shapefile_path=None, class_code=None, pat self.m_instances = instances self.sz = sz self.master_raster = master_raster - self.masked_raster = masked_raster - if masked_raster is not None: - print(masked_raster, "Masked raster present.") self.data = None self.kernel_size = kernel_size self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) def extract_sample(self, save_points=True): + # TODO: Pare down this class' methods. + # Because of the large data size, pickling output data + # (and therefore using a one-band at a time extraction approach) + # is not feasible. out = os.path.splitext(self.shapefile_path)[0] out += "_sample_points.shp" - if os.path.isfile(out) and not self.overwrite_points: + if os.path.isfile(out): print("sample points already created") self._populate_array_from_points(out) else: - self.create_sample_points() - if save_points: - self.save_sample_points() + print("Sample points not detected at {}".format(out)) if self.master_raster is not None: self.training_data_from_master_raster() else: @@ -103,10 +253,6 @@ def _populate_array_from_points(self, fname): val = feat['properties']['POINT_TYPE'] self._add_entry(coords, val=val) - def _verify_point(self, x, y): - """ Check to see if x, y is masked. """ - return None - def _dump_data(self, data): n = "class_{}_train.h5".format(self.class_code) if self.data_filename is None: @@ -154,69 +300,6 @@ def training_data_from_master_raster(self): del qq del tmp_arr - def create_sample_points(self): - """ Create a clipped training set from polygon shapefiles. - - This complicated-looking function finds the wrs_2 descending Landsat tile corresponding - to the path row provided, gets the bounding box and profile (aka meta) from - compose_array.get_tile_geometry, clips the training data to the landsat tile, then perform - s a union to reduce the number of polygon objects. - The dict object this uses has a template in pixel_classification.runspec.py. - Approach is to loop through the polygons, create a random grid of points over the - extent of each polygon, random shuffle order of points, loop over points, check if - point is within polygon, and if within, create a sample point. - - If a relatively simple geometry is available, use create_negative_sample_points(), though if - there are > 10**4 polygons, it will probably hang on unary_union(). """ - - polygons = self._get_polygons(self.shapefile_path) - instance_count = 0 - print("Making sample points. You have {} polygons".format(len(polygons))) - print("N_instances:", self.m_instances) - - if len(polygons) < 2: - warnings.warn("You have < 2 polygons in shapefile {}. ".format(os.path.basename(self.shapefile_path), Warning)) - - if len(polygons) > self.m_instances: - areas = zip(polygons, [x.area for x in polygons]) - srt = sorted(areas, key=lambda x: x[1], reverse=True) - polygons = [x for x, y in srt[:self.m_instances]] - - #polygons = unary_union(polygons) # this - # can be very inefficient in tse where - if not isinstance(polygons, list): - polygons = [polygons] # for the case of a single polygon. - - positive_area = sum([x.area for x in polygons]) # the sum of all - # the areas. - class_count = 0 - - for i, poly in enumerate(polygons): - if class_count >= self.m_instances: - break - fractional_area = poly.area / positive_area # percent of - # total area that this polygon occupies - required_points = max([1, fractional_area * self.m_instances]) # how - # many points overall that are required to evenly - # sample from each polygon, based on area. - poly_pt_ct = 0 - # while poly_pt_ct < required_points: # I wasn't getting enough points. - x_range, y_range = self._random_points(poly.bounds) - for coord in zip(x_range, y_range): - if instance_count >= self.m_instances: - break - if Point(coord[0], coord[1]).within(poly): - self._add_entry(coord, val=self.class_code) - poly_pt_ct += 1 - instance_count += 1 - # print(instance_count) - if poly_pt_ct >= required_points: - break - - class_count += poly_pt_ct - print("Final instance count:", instance_count) - - def populate_raster_data_array(self, save=True): for key, val in self.paths_map.items(): @@ -241,7 +324,6 @@ def populate_raster_data_array(self, save=True): for key, val in data.items(): setattr(self, key, val) - def _purge_raster_array(self): data_array = deepcopy(self.extracted_points) target_vals = Series(data_array.POINT_TYPE.values, name='POINT_TYPE') @@ -261,7 +343,6 @@ def _purge_raster_array(self): if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 0.: data_array.loc[idx, :] = nan except TypeError as e: - print(sub_raster, msk, idx) data_array.loc[idx, :] = nan data_array = data_array.join(target_vals, how='outer') @@ -274,23 +355,6 @@ def _purge_raster_array(self): axis=1, inplace=False) return data_array, target_vals - def _random_points(self, coords): - min_x, max_x = coords[0], coords[2] - min_y, max_y = coords[1], coords[3] - x_range = linspace(min_x, max_x, num=10 * self.m_instances) - y_range = linspace(min_y, max_y, num=10 * self.m_instances) - shuffle(x_range), shuffle(y_range) - return x_range, y_range - - def _add_entry(self, coord, val=0): - - self.extracted_points = self.extracted_points.append({'FID': int(self.object_id), - 'X': coord[0], - 'Y': coord[1], - 'POINT_TYPE': val}, - ignore_index=True) - self.object_id += 1 - def _geo_point_to_projected_coords(self, x, y): in_crs = Proj(init='epsg:4326') @@ -324,29 +388,6 @@ def _grid_raster_extract(self, raster, _name): return s - def _get_polygons(self, vector): - with fopen(vector, 'r') as src: - crs = src.crs - if not crs: - raise NoCoordinateReferenceError( - 'Provided shapefile has no reference data.') - if crs['init'] != 'epsg:4326': - raise UnexpectedCoordinateReferenceSystemError( - 'Provided shapefile should be in unprojected (geographic)' - 'coordinate system, i.e., WGS84, EPSG 4326, {} is not'.format( - vector)) - clipped = src.filter(mask=self.tile_bbox) - polys = [] - bad_geo_count = 0 - for feat in clipped: - try: - geo = shape(feat['geometry']) - polys.append(geo) - except AttributeError: - bad_geo_count += 1 - - return polys - @property def tile_bbox(self): with fopen(WRS_2, 'r') as wrs: @@ -363,36 +404,8 @@ def _get_crs(self): break return crs - - def save_sample_points(self): - - points_schema = { - 'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]), - 'geometry': 'Point'} - meta = self.tile_geometry.copy() - meta['schema'] = points_schema - - out = os.path.splitext(self.shapefile_path)[0] - out += "_sample_points.shp" - - with fopen(out, 'w', **meta) as output: - for index, row in self.extracted_points.iterrows(): - props = dict([('FID', row['FID']), ('POINT_TYPE', row['POINT_TYPE'])]) - pt = Point(row['X'], row['Y']) - output.write({'properties': props, - 'geometry': mapping(pt)}) - return None - @property def tile_geometry(self): with fopen(WRS_2, 'r') as wrs: wrs_meta = wrs.meta.copy() return wrs_meta - - def to_pickle(self, data, path): - - with open(path, 'wb') as handle: - pickle.dump(data, handle, protocol=2) - - return path - diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index 922dfe3..c6a6107 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -183,17 +183,6 @@ def split_shapefile(base, base_shapefile, data_directory): wrs2.close() - # TODO: Solve this more efficiently. - # DOUBLE TODO: Solve this more efficiently. - # I have all path/rows and their corresponding features - # I need to figure out the unique features in each path row. - # How should I treat the non-unique features? - # Create a set of non-unique features and their - # corresponding path/row. Also create - # a set of unique features. Then iterate over the - # unique set and for each non-unique feature - # place it in the path/row with the greatest number of - # unique points. non_unique_ids = defaultdict(list) unique = defaultdict(list) for key in path_row: @@ -233,7 +222,6 @@ def split_shapefile(base, base_shapefile, data_directory): prefix = os.path.splitext(base_shapefile)[0] for key in unique: if key is None: - print(key, unique[key]) continue out = prefix + "_" + key + ".shp" if len(unique[key]): diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index beafa26..c1227ff 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -22,12 +22,12 @@ sys.path.append(abspath) from numpy import mean, datetime64 from collections import OrderedDict - from landsat.google_download import GoogleDownload from sat_image.image import Landsat5, Landsat7, Landsat8 from sat_image.fmask import Fmask from sat_image.warped_vrt import warp_vrt -from bounds import RasterBounds +from met.thredds import GridMet, TopoWX +from bounds import RasterBounds, GeoBounds from dem import AwsDem from ssebop_app.image import get_image from rasterio import open as rasopen, float32 @@ -87,6 +87,7 @@ def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None def build_training(self): self.get_landsat(fmask=True) self.profile = self.landsat.rasterio_geometry + self.get_precip() self.get_et() self.get_terrain() self.get_cdl() @@ -129,7 +130,6 @@ def get_landsat(self, fmask=False): output_path=self.root, max_cloud_percent=self.max_cloud) self.path = g.p self.row = g.r - print("Path:", self.path, "Row:", self.row) g.select_scenes(self.n) self.scenes = g.selected_scenes @@ -142,6 +142,20 @@ def get_landsat(self, fmask=False): if fmask: [self._make_fmask(d) for d in self.image_dirs] + def get_precip(self): + poly = self.landsat.get_tile_geometry() + print(type(poly)) + dates = self.scenes['DATE_ACQUIRED'].values + # Assuming these are date strings. Or datetime objects. + bounds = poly.bounds + for date in dates: + print("Date", date) + print(type(date)) + gm = GridMet(variable='pr', bounds=GeoBounds(wsen=bounds), date=date) + out = gm.get_data_subset() + outfile = os.path.join(self.root, 'GridMet{}.tif'.format(date)) + gm.save_raster(out, self.landsat.rasterio_geometry, outfile) + def get_terrain(self): """Get digital elevation maps from amazon web services save in the project root directory with filenames enumerated @@ -174,7 +188,6 @@ def get_et(self): for i, d in enumerate(self.image_dirs): l = self.landsat_mapping[self.sat_abv](d) _id = l.landsat_scene_id - print(self.path, self.row) get_image(image_dir=d, parent_dir=self.root, image_exists=True, image_id=_id, satellite=self.sat, path=self.path, row=self.row, image_date=l.date_acquired, landsat_object=self.landsat, overwrite=False) diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index 1743e29..f57a437 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -1,17 +1,16 @@ import warnings -import glob import os -import gc +import glob from multiprocessing import Pool from numpy import save as nsave -from compose_array_single_shapefile import PTASingleShapefile +from compose_array_single_shapefile import PTASingleShapefile, ShapefileSamplePoints from fiona import open as fopen from shapely.geometry import shape -from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster, create_master_masked_raster - -def create_training_data(shapefile, shapefile_directory, image_directory, class_code, - kernel_size, instances, training_directory, year, raster_directory, chunk_size=2000, save=True): +from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster +def download_images_over_shapefile(shapefile, image_directory, year, master_raster_directory): + '''Downloads p/r corresponding to the location of + the shapefile, and creates master raster''' p, r = get_shapefile_path_row(shapefile) suff = str(p) + '_' + str(r) + "_" + str(year) landsat_dir = os.path.join(image_directory, suff) @@ -24,32 +23,15 @@ def create_training_data(shapefile, shapefile_directory, image_directory, class_ else: ims = download_images(landsat_dir, p, r, year, satellite) - ms = create_master_raster(ims, p, r, year, raster_directory) - mms = create_master_masked_raster(ims, p, r, year, raster_directory) + ms = create_master_raster(ims, p, r, year, master_raster_directory) + + return ims - shp_path = os.path.join(shapefile_directory, shapefile) - pta = PTASingleShapefile(shapefile_path=shp_path, master_raster=ms, - training_directory=training_directory, overwrite_points=False, class_code=class_code, - path=p, row=r, paths_map=ims.paths_map, masks=ims.masks, - instances=instances, kernel_size=kernel_size, sz=chunk_size) - - pta.extract_sample() - -def get_all_shapefiles(to_match, year, data_directory, irrigated): - ''' Get all shapefiles in same p/r as to_match ''' - p, r = get_shapefile_path_row(os.path.join(data_directory, to_match)) - ls = [] - for f in glob.glob(data_directory + "*.shp"): - if "sample_points" not in f: - pp, rr = get_shapefile_path_row(f) - if pp == p and rr == r: - oup = False - for key in irrigated: - if key in f: - oup = True - if not oup: - ls.append(f) - return ls +def create_sample_points_from_shapefile(shapefile_path, instances): + '''Hopefully this can be nicely parallelized.''' + ssp = ShapefileSamplePoints(shapefile_path, m_instances=instances) + ssp.create_sample_points(save_points=True) + return ssp.outfile def shapefile_area(shapefile): summ = 0 @@ -61,7 +43,9 @@ def shapefile_area(shapefile): def get_total_area(data_directory, filenames): ''' Gets the total area of the polygons - in the files in filenames ''' + in the files in filenames + TODO: Get an equal-area projection''' + tot = 0 for f in glob.glob(data_directory + "*.shp"): if "sample" not in f: @@ -75,24 +59,7 @@ def required_points(shapefile, total_area, total_instances): frac = area / total_area return int(total_instances * frac) -def extract_data(data_directory, names, n_instances, class_code, kernel_size): - - def is_it(f, names): - for e in names: - if e in f: - return True - return False - - total_area = get_total_area(data_directory, names) # units? - for f in glob.glob(data_directory + "*.shp"): - if is_it(f, names) and 'sample' not in f: - req_points = required_points(f, total_area, n_instances) - ff = os.path.basename(f) - create_training_data(ff, data_directory, image_directory, - class_code, kernel_size, req_points, train_dir, 2013, raster_dir) - gc.collect() - -def go(f): +def split_shapefiles_multiproc(f): data_directory = 'split_shapefiles_west/' shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup' fname = os.path.basename(f) @@ -101,40 +68,31 @@ def go(f): if __name__ == "__main__": - irrigated = ['MT_Sun_River_2013', "MT_Huntley_Main_2013"] - other = ['other'] - fallow = ['Fallow'] - forest = ['Forrest'] - - train_dir = 'training_data/' - data_directory = 'shapefile_data_western_us/' image_directory = 'image_data/' - raster_dir = 'master_rasters' - kernel_size = 57 - - - shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup/' - fnames = [f for f in glob.glob(shp_dir + "*.shp") if 'reproj' in f] - instances = [50000, 1e5, 1e5, 1e5] - i2 = [1e5, 1e5] - class_code = [0, 1, 2, 3] - c2 = [2, 3] - dd = [data_directory]*2 - dd2 = dd.copy() - ks = [41]*2 - ks2 = ks.copy() - names = [irrigated, other] - names2 = [fallow, forest] - # note: the extraction of training data took 6h 29m - # extract_data(dd[0], fallow, 1e5, 2, 41) - # with Pool() as pool: - # pool.starmap(extract_data, zip(dd, names, instances, class_code, ks)) + p = 34 + r = 28 + year = 2013 + satellite = 8 + + ims = download_images(image_directory, p, r, year, satellite) + + # irrigated = ['MT_Sun_River_2013', "MT_Huntley_Main_2013"] + # other = ['other'] + # fallow = ['Fallow'] + # forest = ['Forrest'] + + # train_dir = 'training_data/' + # data_directory = 'split_shapefiles_west/' + # image_directory = 'image_data/' + # raster_dir = 'master_rasters' + # kernel_size = 57 + + # fnames = [f for f in glob.glob(data_directory + "*.shp")] + + # instances = 10000 + # instances = [instances]*len(fnames) + # with Pool() as pool: - # pool.starmap(extract_data, zip(dd2, names2, i2, c2, ks2)) - - fnames = [f for f in glob.glob(shp_dir + "*.shp") if 'reproj' in f and 'irri' in f and 'un' not - in f] - go(fnames[0]) - #with Pool() as pool: - # pool.map(go, fnames) - # 12 minutes to 5 and a half. + # out = pool.starmap(create_sample_points_from_shapefile, zip(fnames, instances)) + + From a6f4cf8cce3e844804d42936f8bad114811264b6 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 13 Feb 2019 11:28:05 -0700 Subject: [PATCH 19/89] Adding gridmet --- pixel_classification/data_utils.py | 4 ++-- pixel_classification/prepare_images.py | 20 ++++++++++++------- pixel_classification/runner_from_shapefile.py | 3 +++ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index c6a6107..460698b 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -104,10 +104,10 @@ def get_shapefile_lat_lon(shapefile): return latc, lonc -def download_images(project_directory, path, row, year, satellite=8): +def download_images(project_directory, path, row, year, satellite=8, n_landsat=3): image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, - max_cloud_pct=70, n_landsat=3, year=year) + max_cloud_pct=70, n_landsat=n_landsat, year=year) image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is # a cloud mask. diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index c1227ff..7c01b96 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -22,9 +22,11 @@ sys.path.append(abspath) from numpy import mean, datetime64 from collections import OrderedDict +from datetime import datetime from landsat.google_download import GoogleDownload from sat_image.image import Landsat5, Landsat7, Landsat8 from sat_image.fmask import Fmask +from shapely.geometry import shape from sat_image.warped_vrt import warp_vrt from met.thredds import GridMet, TopoWX from bounds import RasterBounds, GeoBounds @@ -97,6 +99,7 @@ def build_evaluating(self): self.get_landsat(fmask=False) self.profile = self.landsat.rasterio_geometry #self.get_et() + self.get_precip() self.get_terrain() self.get_cdl() self.paths_map, self.masks = self._order_images() # paths map is just path-> location @@ -144,20 +147,23 @@ def get_landsat(self, fmask=False): def get_precip(self): poly = self.landsat.get_tile_geometry() - print(type(poly)) dates = self.scenes['DATE_ACQUIRED'].values - # Assuming these are date strings. Or datetime objects. - bounds = poly.bounds + b = poly[0]['coordinates'][0] + # Change the coordinate system + bb = shape(poly[0]).bounds + # Ask david + bb = (60.5284298033, 29.318572496, 75.1580277851, 38.4862816432) for date in dates: - print("Date", date) - print(type(date)) - gm = GridMet(variable='pr', bounds=GeoBounds(wsen=bounds), date=date) + d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. + bds = GeoBounds(wsen=bb) + gm = GridMet(variable='pr', bounds=bds, target_profile=self.profile, date=d) out = gm.get_data_subset() outfile = os.path.join(self.root, 'GridMet{}.tif'.format(date)) gm.save_raster(out, self.landsat.rasterio_geometry, outfile) def get_terrain(self): - """Get digital elevation maps from amazon web services + """ + Get digital elevation maps from amazon web services save in the project root directory with filenames enumerated in the next three lines. diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index f57a437..641e1e0 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -73,6 +73,9 @@ def split_shapefiles_multiproc(f): r = 28 year = 2013 satellite = 8 + image_directory += str(p) + "_" + str(r) + "_" + str(year) + if not os.path.isdir(image_directory): + os.mkdir(image_directory) ims = download_images(image_directory, p, r, year, satellite) From d45dacb6c02ef5e6700af18c85e09a2f8616a767 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 13 Feb 2019 14:37:24 -0700 Subject: [PATCH 20/89] Gridmet integration v3 --- pixel_classification/prepare_images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index 7c01b96..85a8b36 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -152,7 +152,7 @@ def get_precip(self): # Change the coordinate system bb = shape(poly[0]).bounds # Ask david - bb = (60.5284298033, 29.318572496, 75.1580277851, 38.4862816432) + bb = (-124.84, -66.88, 24.89, 49.38) # bbox of usa for sanity check for date in dates: d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. bds = GeoBounds(wsen=bb) From 2650a9acf1cfcbf72a98d25a15744cbd412348cc Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Thu, 14 Feb 2019 14:04:31 -0700 Subject: [PATCH 21/89] Formatting input data for network --- pixel_classification/data_utils.py | 76 +++++++++++++++---- pixel_classification/prepare_images.py | 2 +- pixel_classification/runner_from_shapefile.py | 53 ++++++------- 3 files changed, 85 insertions(+), 46 deletions(-) diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index 460698b..a97f2f3 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -6,12 +6,35 @@ from lxml import html from requests import get from copy import deepcopy -from numpy import zeros, asarray, array, reshape +from numpy import zeros, asarray, array, reshape, nan from rasterio import float32, open as rasopen +from rasterio.mask import mask from prepare_images import ImageStack from sklearn.neighbors import KDTree -import sat_image - +from sat_image.warped_vrt import warp_single_image +import geopandas as gpd +import json + +NO_DATA = nan + +def get_features(gdf): + tmp = json.loads(gdf.to_json()) + features = [feature['geometry'] for feature in tmp['features']] + return features + +def generate_class_mask(shapefile, master_raster): + ''' Generates a mask with class_val everywhere + shapefile data is present and a no_data value everywhere else. + no_data is -1 in this case, as it is never a valid class label. + Switching coordinate reference systems is important here, or + else the masking won't work. + ''' + shp = gpd.read_file(shapefile) + with rasopen(master_raster, 'r') as src: + shp = shp.to_crs(src.crs) + features = get_features(shp) + out_image, out_transform = mask(src, shapes=features, nodata=nan) + return out_image def create_master_masked_raster(image_stack, path, row, year, raster_directory): masks = image_stack.masks @@ -31,7 +54,6 @@ def create_master_masked_raster(image_stack, path, row, year, raster_directory): if first: first_geo = deepcopy(raster_geo) - print(first_geo, "FIRST_GEO") empty = zeros((len(masks.keys()), arr.shape[1], arr.shape[2]), float32) stack = empty stack[i, :, :] = arr @@ -40,7 +62,7 @@ def create_master_masked_raster(image_stack, path, row, year, raster_directory): try: stack[i, :, :] = arr except ValueError: - arr = sat_image.warped_vrt.warp_single_image(mask_raster, first_geo) + arr = warp_single_image(mask_raster, first_geo) stack[i, :, :] = arr first_geo.update(count=len(masks.keys())) @@ -57,9 +79,26 @@ def create_master_masked_raster(image_stack, path, row, year, raster_directory): def create_master_raster(image_stack, path, row, year, raster_directory): fname = "master_raster_{}_{}_{}.tif".format(path, row, year) pth = os.path.join(raster_directory, fname) + mask_fname = "class_mask_{}_{}_{}.tif".format(path, row, year) + mask_path = os.path.join(raster_directory, mask_fname) if os.path.isfile(pth): print("Master raster already created for {}_{}_{}.".format(path, row, year)) - return pth + if os.path.isfile(mask_path): + print('Class mask template already created') + return pth + else: + print("Creating class mask template.") + with rasopen(pth, 'r') as src: + meta = src.meta.copy() + h = meta['height'] + w = meta['width'] + + meta.update(count=1, dtype=float32) + + with rasopen(mask_path, 'w', **meta) as msk: + out = zeros((h, w)).astype(float32) + msk.write(out, 1) + return pth paths_map = image_stack.paths_map first = True @@ -83,12 +122,15 @@ def create_master_raster(image_stack, path, row, year, raster_directory): try: stack[i, :, :] = arr except ValueError: - # import pprint - # pprint.pprint(first_geo) - # error was thrown here b/c source raster didn't have crs - arr = sat_image.warped_vrt.warp_single_image(feature_raster, first_geo) + # error can be thrown here if source raster doesn't have crs + arr = warp_single_image(feature_raster, first_geo) stack[i, :, :] = arr + first_geo.update(count=1) + msk_out = zeros((stack.shape[1], stack.shape[0])) + with rasopen(mask_path, mode='w', **first_geo) as msk: + msk.write(msk_out) + first_geo.update(count=len(paths_map.keys())) with rasopen(pth, mode='w', **first_geo) as dst: @@ -97,6 +139,7 @@ def create_master_raster(image_stack, path, row, year, raster_directory): return pth def get_shapefile_lat_lon(shapefile): + ''' Center of shapefile''' with fiona.open(shapefile, "r") as src: minx, miny, maxx, maxy = src.bounds latc = (maxy + miny) / 2 @@ -142,6 +185,8 @@ def get_pr(poly, wrs2): return ls def get_pr_subset(poly, tiles): + ''' Use when you only want to iterate + over a subset of wrs2 tiles.''' ls = [] for feature in tiles: tile = shape(feature['geometry']) @@ -153,13 +198,15 @@ def get_pr_subset(poly, tiles): return ls def split_shapefile(base, base_shapefile, data_directory): - """Previous method took ~25 minutes to get all path/rows. - Now, with kdtree, 25 seconds. + """ Shapefiles may deal with data over multiple path/rows. + This is a method to get the minimum number of + path/rows required to cover all features. Data directory: where the split shapefiles will be saved. base: directory containing base_shapefile.""" path_row = defaultdict(list) id_mapping = {} + # TODO: un hardcode this directory. wrs2 = fiona.open('../spatial_data/wrs2_descending_usa.shp', 'r') tree, path_rows, features = construct_kdtree(wrs2) wrs2.close() @@ -181,8 +228,6 @@ def split_shapefile(base, base_shapefile, data_directory): for p in prs: path_row[p].append(idd) - wrs2.close() - non_unique_ids = defaultdict(list) unique = defaultdict(list) for key in path_row: @@ -226,8 +271,7 @@ def split_shapefile(base, base_shapefile, data_directory): out = prefix + "_" + key + ".shp" if len(unique[key]): with fiona.open(os.path.join(data_directory, out), 'w', **meta) as dst: - print("Split shapefile saving to:", - os.path.join(data_directory, out)) + print("Saving split shapefile to: {}".format(os.path.join(data_directory, out))) for feat in unique[key]: dst.write(id_mapping[feat]) diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index 85a8b36..39f08e4 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -99,7 +99,7 @@ def build_evaluating(self): self.get_landsat(fmask=False) self.profile = self.landsat.rasterio_geometry #self.get_et() - self.get_precip() + #self.get_precip() self.get_terrain() self.get_cdl() self.paths_map, self.masks = self._order_images() # paths map is just path-> location diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index 641e1e0..38080ee 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -27,7 +27,25 @@ def download_images_over_shapefile(shapefile, image_directory, year, master_rast return ims -def create_sample_points_from_shapefile(shapefile_path, instances): +def download_from_pr(p, r, image_directory, year, master_raster_directory): + '''Downloads p/r corresponding to the location of + the shapefile, and creates master raster''' + suff = str(p) + '_' + str(r) + "_" + str(year) + landsat_dir = os.path.join(image_directory, suff) + satellite = 8 + if year < 2013: + satellite = 7 + if not os.path.isdir(landsat_dir): + os.mkdir(landsat_dir) + ims = download_images(landsat_dir, p, r, year, satellite) + else: + ims = download_images(landsat_dir, p, r, year, satellite) + + ms = create_master_raster(ims, p, r, year, master_raster_directory) + + return ims + +def sample_points_from_shapefile(shapefile_path, instances): '''Hopefully this can be nicely parallelized.''' ssp = ShapefileSamplePoints(shapefile_path, m_instances=instances) ssp.create_sample_points(save_points=True) @@ -65,37 +83,14 @@ def split_shapefiles_multiproc(f): fname = os.path.basename(f) split_shapefile(shp_dir, fname, data_directory) +# Need a function that takes a targets dict if __name__ == "__main__": image_directory = 'image_data/' - p = 34 - r = 28 + master = 'master_rasters/' + p = 39 + r = 27 year = 2013 satellite = 8 - image_directory += str(p) + "_" + str(r) + "_" + str(year) - if not os.path.isdir(image_directory): - os.mkdir(image_directory) - - ims = download_images(image_directory, p, r, year, satellite) - - # irrigated = ['MT_Sun_River_2013', "MT_Huntley_Main_2013"] - # other = ['other'] - # fallow = ['Fallow'] - # forest = ['Forrest'] - - # train_dir = 'training_data/' - # data_directory = 'split_shapefiles_west/' - # image_directory = 'image_data/' - # raster_dir = 'master_rasters' - # kernel_size = 57 - - # fnames = [f for f in glob.glob(data_directory + "*.shp")] - - # instances = 10000 - # instances = [instances]*len(fnames) - - # with Pool() as pool: - # out = pool.starmap(create_sample_points_from_shapefile, zip(fnames, instances)) - - + ims = download_from_pr(p, r, image_directory, year, master) From 31bc520e8c94356a7097e10edf0922ea074a26d2 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Thu, 14 Feb 2019 14:04:45 -0700 Subject: [PATCH 22/89] Fully conv. network --- pixel_classification/fully_conv.py | 197 +++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 pixel_classification/fully_conv.py diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py new file mode 100644 index 0000000..60f35f7 --- /dev/null +++ b/pixel_classification/fully_conv.py @@ -0,0 +1,197 @@ +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' +import keras.backend as K +import matplotlib.pyplot as plt +import numpy as np +import time +import tensorflow as tf +from glob import glob +from skimage import transform, util +from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, +Concatenate, Dropout, UpSampling2D) +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import TensorBoard +from data_utils import generate_class_mask, get_shapefile_path_row + +NO_DATA = np.nan + +def custom_objective(y_true, y_pred): + '''I want to mask all values that + are not data, given a y_true + that has NODATA values. ''' + y_true = tf.reshape(y_true, (1080*1920, 2)) + y_pred = tf.reshape(y_pred, (1080*1920, 2)) + masked = tf.not_equal(y_true, NO_DATA) + y_true_mask = tf.boolean_mask(y_true, masked) + y_pred_mask = tf.boolean_mask(y_pred, masked) + return tf.keras.losses.binary_crossentropy(y_true_mask, y_pred_mask) + +def fcnn_functional(image_shape, n_classes): + + x = Input(image_shape) + + c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x) + c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(c1) + mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) + + c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(mp1) + c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(c2) + mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) + mp2 = Dropout(0.5)(mp2) + + c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(mp2) + c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(c3) + mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) + + last_conv = Conv2D(filters=256, kernel_size=(3,3), activation='relu', padding='same')(mp3) + + u1 = UpSampling2D(size=(2, 2))(last_conv) + u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) + u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) + + u1_c3 = Concatenate()([c3, u1]) + + u2 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1_c3) + u2 = UpSampling2D(size=(2, 2))(u2) + u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) + u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) + u2 = Dropout(0.5)(u2) + + u2_c2 = Concatenate()([u2, c2]) + u2_c2 = Dropout(0.5)(u2_c2) + + c4 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u2_c2) + u3 = UpSampling2D(size=(2, 2))(c4) + u3 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u3) + + u3_c1 = Concatenate()([u3, c1]) + + c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u3_c1) + + model = Model(inputs=x, outputs=c5) + model.summary() + return model + +def one_hot_encoding(class_mask, n_classes): + '''Assumes classes range from 0 -> (n-1)''' + shp = class_mask.shape + out = np.ones((shp[0], shp[1], n_classes))*NO_DATA + for i in range(n_classes): + out[:, :, i][class_mask == i] = 1 + return out + +def rotation(image, angle): + return transform.rotate(image, angle, mode='constant', cval=NO_DATA) + +def random_noise(image): + return util.random_noise(image) + +def h_flip(image): + return image[:, ::-1] + +def augment_data(image, class_mask): + '''Randomly augments an image.''' + if np.random.randint(2): + deg = np.random.uniform(-25, 25) + image = rotation(image, deg) + class_mask = rotation(class_mask, deg) + if np.random.randint(2): + image = random_noise(image) + if np.random.randint(2): + image = h_flip(image) + class_mask = h_flip(class_mask) + if np.random.randint(2): + image = np.flipud(image) + class_mask = np.flipud(class_mask) + return image, class_mask + +def generate(image_directory, box_size): + while True: + for f in glob(image_directory + "*.json"): + jpg = f[:-13] + ".jpg" + class_mask, input_image = generate_class_mask(f, jpg, box_size=box_size) + if class_mask is None: + continue + if np.random.randint(2): + input_image, class_mask = augment_data(input_image, class_mask) + + X, y = preprocess_training_data(input_image, class_mask) + + yield X, y + +def create_model(image_shape, n_classes): + model = fcnn_functional(image_shape, n_classes) + model.compile(loss=custom_objective, + optimizer='adam', + metrics=['accuracy']) + return model + +def train_model(train_directory, test_directory, image_shape, box_size=6, epochs=15): + n_classes = 2 + model = create_model(image_shape, n_classes) + tb = TensorBoard(log_dir='graphs/') + n_augmented = 0 + train_generator = generate(train_directory, box_size) + test_generator = generate(test_directory, box_size) + model.fit_generator(train_generator, + steps_per_epoch=50, + epochs=epochs, + verbose=1, + callbacks=[tb], + validation_data=test_generator, + validation_steps=4, + use_multiprocessing=True) + return model + +def all_matching_shapefiles(to_match, shapefile_directory): + out = [] + pr = get_shapefile_path_row(to_match) + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if get_shapefile_path_row(f) == pr: + out.append(f) + return out + +if __name__ == '__main__': + # Steps: + # 1. split shapefiles + # 2. download all images + # 3. Get all shapefiles corresponding to a given + # p/r. If there aren't n_classes shapefiles + # for a given path/row, randomly sample from + # places outside the annotated data that is present. + # else: generate a class mask with class balance, + # where all of the pixels are drawn from irrigated + # areas and only some from other areas. + # 4. Input this into the model. + # Here assume steps 3 and 4 are done and then synthesize + # the steps into one coherent file. + # need an easier way to specify year. + + shapefile_directory = 'shapefile_data/backup' + image_directory = 'master_rasters' + target = 'irrigated' + fallow = 'Fallow' + forest = 'Forrest' + other = 'other' + target_dict = {target:0, fallow:1, forest:2, other:3} + year = 2013 + done = set() + train_raster = 'master_raster_' + mask_raster = 'class_mask_' + + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if target in f: + out = all_matching_shapefiles(f, shapefile_directory) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + master_raster = os.path.join(image_directory, train_raster + suffix) + mask_file = os.path.join(image_directory, mask_raster + suffix) + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + else: + for shp in out: + mask = generate_class_mask(shp, mask_file) + plt.imshow(mask[0, :, :]) + plt.colorbar() + plt.show() From 0a3ced10126612df33c85883712ffd011e629a11 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 15 Feb 2019 09:44:23 -0700 Subject: [PATCH 23/89] Training data generated in fully_conv --- .../compose_array_single_shapefile.py | 10 +-- pixel_classification/data_utils.py | 5 +- pixel_classification/fully_conv.py | 79 +++++++++++++++---- pixel_classification/runner_from_shapefile.py | 5 +- 4 files changed, 74 insertions(+), 25 deletions(-) diff --git a/pixel_classification/compose_array_single_shapefile.py b/pixel_classification/compose_array_single_shapefile.py index 4639f1e..4ce57ef 100644 --- a/pixel_classification/compose_array_single_shapefile.py +++ b/pixel_classification/compose_array_single_shapefile.py @@ -58,11 +58,11 @@ class UnexpectedCoordinateReferenceSystemError(Exception): class ShapefileSamplePoints: def __init__(self, shapefile_path=None, sample_point_directory=None, m_instances=None): - if sample_point_directory is None: - self.outfile = os.path.splitext(shapefile_path)[0] - self.outfile += "_sample_points.shp" - else: - self.outfile = sample_point_directory + self.outfile = os.path.splitext(shapefile_path)[0] + self.outfile += "_sample_points.shp" + if sample_point_directory: + self.outfile = os.path.join(sample_point_directory, self.outfile) + self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) self.m_instances = m_instances self.object_id = 0 diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index a97f2f3..7355086 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -15,7 +15,7 @@ import geopandas as gpd import json -NO_DATA = nan +NO_DATA = -1 def get_features(gdf): tmp = json.loads(gdf.to_json()) @@ -32,8 +32,9 @@ def generate_class_mask(shapefile, master_raster): shp = gpd.read_file(shapefile) with rasopen(master_raster, 'r') as src: shp = shp.to_crs(src.crs) + arr = src.read() features = get_features(shp) - out_image, out_transform = mask(src, shapes=features, nodata=nan) + out_image, out_transform = mask(src, shapes=features, nodata=NO_DATA) return out_image def create_master_masked_raster(image_stack, path, row, year, raster_directory): diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index 60f35f7..974a039 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -13,8 +13,10 @@ from tensorflow.keras.models import Model from tensorflow.keras.callbacks import TensorBoard from data_utils import generate_class_mask, get_shapefile_path_row +from multiprocessing import Pool +from rasterio import open as rasopen -NO_DATA = np.nan +NO_DATA = -1 def custom_objective(y_true, y_pred): '''I want to mask all values that @@ -148,10 +150,61 @@ def all_matching_shapefiles(to_match, shapefile_directory): out = [] pr = get_shapefile_path_row(to_match) for f in glob(os.path.join(shapefile_directory, "*.shp")): - if get_shapefile_path_row(f) == pr: + if get_shapefile_path_row(f) == pr and to_match not in f: out.append(f) return out +def generate_binary_train(shapefile_directory, image_directory, box_size): + + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if target in f: + all_matches = all_matching_shapefiles(f, shapefile_directory) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + master_raster = os.path.join(image_directory, train_raster + suffix) + mask_file = os.path.join(image_directory, mask_raster + suffix) + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive error handling. + else: + target_mask = generate_class_mask(f, mask_file) + class_mask = np.ones((n_classes, target_mask.shape[1], target_mask.shape[2]))*NO_DATA + class_mask[1, :, :] = target_mask + + required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) + masks = [] + for match in all_matches: + msk = generate_class_mask(match, mask_file) + samp = random_sample(msk, required_instances, box_size) + masks.append(samp) + + for i, s in enumerate(masks): + class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 + # May need to do some preprocessing. + yield class_mask, load_raster(master_raster) + +def random_sample(class_mask, n_instances, box_size, class_code=0): + out = np.where(class_mask != NO_DATA) + # returns (elements from class_mask, indices_x, indices_y) + out_x = out[1] + out_y = out[2] + indices = np.random.choice(len(out_x), size=n_instances, replace=False) + out_x = out_x[indices] + out_y = out_y[indices] + class_mask[:, :, :] = NO_DATA + if box_size == 0: + class_mask[:, out_x, out_y] = class_code + else: + ofs = box_size // 2 + for x, y in zip(out_x, out_y): + class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = class_code + return class_mask + +def load_raster(master_raster): + with rasopen(master_raster, 'r') as src: + arr = src.read() + return arr + if __name__ == '__main__': # Steps: # 1. split shapefiles @@ -167,8 +220,10 @@ def all_matching_shapefiles(to_match, shapefile_directory): # Here assume steps 3 and 4 are done and then synthesize # the steps into one coherent file. # need an easier way to specify year. + # Let's do a binary classification model. shapefile_directory = 'shapefile_data/backup' + sample_dir = os.path.join(shapefile_directory, 'sample_points') image_directory = 'master_rasters' target = 'irrigated' fallow = 'Fallow' @@ -179,19 +234,9 @@ def all_matching_shapefiles(to_match, shapefile_directory): done = set() train_raster = 'master_raster_' mask_raster = 'class_mask_' + n_classes = 2 + box_size = 6 + + + - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if target in f: - out = all_matching_shapefiles(f, shapefile_directory) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - master_raster = os.path.join(image_directory, train_raster + suffix) - mask_file = os.path.join(image_directory, mask_raster + suffix) - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - else: - for shp in out: - mask = generate_class_mask(shp, mask_file) - plt.imshow(mask[0, :, :]) - plt.colorbar() - plt.show() diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index 38080ee..8299aa9 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -88,9 +88,12 @@ def split_shapefiles_multiproc(f): if __name__ == "__main__": image_directory = 'image_data/' + shp = 'shapefile_data/backup' master = 'master_rasters/' p = 39 r = 27 year = 2013 + satellite = 8 - ims = download_from_pr(p, r, image_directory, year, master) + for f in glob.glob(os.path.join(shp, "*.shp")): + download_images_over_shapefile(f, image_directory, year, master) From 3aac282362d7ef3e699c3507a20665b75ea7cf94 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 15 Feb 2019 14:39:43 -0700 Subject: [PATCH 24/89] Gridmet working, but hacked together. --- pixel_classification/prepare_images.py | 42 ++++++++++++++----- pixel_classification/runner_from_shapefile.py | 6 +-- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index 39f08e4..1c26055 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -26,24 +26,27 @@ from landsat.google_download import GoogleDownload from sat_image.image import Landsat5, Landsat7, Landsat8 from sat_image.fmask import Fmask -from shapely.geometry import shape from sat_image.warped_vrt import warp_vrt from met.thredds import GridMet, TopoWX from bounds import RasterBounds, GeoBounds from dem import AwsDem from ssebop_app.image import get_image +from functools import partial +from pyproj import Proj, transform as pytransform +from shapely.geometry import shape, Polygon, mapping +from shapely.ops import transform from rasterio import open as rasopen, float32 from pixel_classification.crop_data_layer import CropDataLayer as Cdl from pixel_classification.runspec import landsat_rasters, static_rasters, ancillary_rasters, mask_rasters from sklearn.preprocessing import StandardScaler +from geopandas.geodataframe import GeoDataFrame class ImageStack(object): """ Prepare a stack of images from Landsat, terrain, etc. Save stack in identical geometry. """ - def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None, max_cloud_pct=None, start=None, end=None, - year=None, n_landsat=None): + def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None, max_cloud_pct=None, start=None, end=None, year=None, n_landsat=None): self.landsat_mapping = {'LT5': Landsat5, 'LE7': Landsat7, 'LC8': Landsat8} self.landsat_mapping_abv = {5: 'LT5', 7: 'LE7', 8: 'LC8'} @@ -99,7 +102,7 @@ def build_evaluating(self): self.get_landsat(fmask=False) self.profile = self.landsat.rasterio_geometry #self.get_et() - #self.get_precip() + self.get_precip() self.get_terrain() self.get_cdl() self.paths_map, self.masks = self._order_images() # paths map is just path-> location @@ -146,17 +149,36 @@ def get_landsat(self, fmask=False): [self._make_fmask(d) for d in self.image_dirs] def get_precip(self): - poly = self.landsat.get_tile_geometry() + poly_in = self.landsat.get_tile_geometry() + poly_in = Polygon(poly_in[0]['coordinates'][0]) + project = partial( + pytransform, + Proj(self.profile['crs']), + Proj(init='epsg:32612')) + for_bounds = partial( + pytransform, + Proj(self.profile['crs']), + Proj(init='epsg:4326')) dates = self.scenes['DATE_ACQUIRED'].values - b = poly[0]['coordinates'][0] # Change the coordinate system - bb = shape(poly[0]).bounds # Ask david - bb = (-124.84, -66.88, 24.89, 49.38) # bbox of usa for sanity check + poly = transform(project, poly_in) + poly_bounds = transform(for_bounds, poly_in) + poly = Polygon(poly.exterior.coords) + from rasterio.crs import CRS + geometry = [mapping(poly)] + geometry[0]['crs'] = CRS({'init':'epsg:32612'}) + feat = {'type': 'Polygon', 'coordinates': list(poly.exterior.coords)} + bounds = poly.bounds + print(bounds) + bounds = (bounds[2], bounds[1], bounds[0], bounds[3]) + bounds = (-124.84, -66.88, 24.89, 49.38) # bbox of usa for sanity check + bounds = poly_bounds.bounds for date in dates: d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. - bds = GeoBounds(wsen=bb) - gm = GridMet(variable='pr', bounds=bds, target_profile=self.profile, date=d) + bds = GeoBounds(wsen=bounds) + gm = GridMet(variable='pr', clip_feature=geometry, + bbox=bds, target_profile=self.profile, date=d) out = gm.get_data_subset() outfile = os.path.join(self.root, 'GridMet{}.tif'.format(date)) gm.save_raster(out, self.landsat.rasterio_geometry, outfile) diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index 8299aa9..41f4e7c 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -10,7 +10,9 @@ def download_images_over_shapefile(shapefile, image_directory, year, master_raster_directory): '''Downloads p/r corresponding to the location of - the shapefile, and creates master raster''' + the shapefile, and creates master raster. + Image_directory: where to save the raw images. + mr_directory: " " master_rasters.''' p, r = get_shapefile_path_row(shapefile) suff = str(p) + '_' + str(r) + "_" + str(year) landsat_dir = os.path.join(image_directory, suff) @@ -90,8 +92,6 @@ def split_shapefiles_multiproc(f): image_directory = 'image_data/' shp = 'shapefile_data/backup' master = 'master_rasters/' - p = 39 - r = 27 year = 2013 satellite = 8 From f8959c6daa69c50356e7af994f29bac53ddfd37e Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 15 Feb 2019 15:19:26 -0700 Subject: [PATCH 25/89] Added precip information. --- pixel_classification/data_utils.py | 2 +- pixel_classification/fully_conv.py | 8 +++++++- pixel_classification/prepare_images.py | 19 ++++++++++--------- pixel_classification/runner_from_shapefile.py | 10 +++++++++- pixel_classification/runspec.py | 5 ++++- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index 7355086..748b7af 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -128,7 +128,7 @@ def create_master_raster(image_stack, path, row, year, raster_directory): stack[i, :, :] = arr first_geo.update(count=1) - msk_out = zeros((stack.shape[1], stack.shape[0])) + msk_out = zeros((1, stack.shape[1], stack.shape[2])).astype(float32) with rasopen(mask_path, mode='w', **first_geo) as msk: msk.write(msk_out) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index 974a039..23448ef 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -238,5 +238,11 @@ def load_raster(master_raster): box_size = 6 - + for i, j in generate_binary_train(shapefile_directory, image_directory, box_size): + + fig, ax = plt.subplots(ncols=2) + ax[0].imshow(i[0, :, :]) + ax[1].imshow(i[1, :, :]) + plt.show() + diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index 1c26055..cbf03ea 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -36,8 +36,9 @@ from shapely.geometry import shape, Polygon, mapping from shapely.ops import transform from rasterio import open as rasopen, float32 +from rasterio.crs import CRS from pixel_classification.crop_data_layer import CropDataLayer as Cdl -from pixel_classification.runspec import landsat_rasters, static_rasters, ancillary_rasters, mask_rasters +from pixel_classification.runspec import landsat_rasters, static_rasters, ancillary_rasters, mask_rasters, climate_rasters from sklearn.preprocessing import StandardScaler from geopandas.geodataframe import GeoDataFrame @@ -161,18 +162,15 @@ def get_precip(self): Proj(init='epsg:4326')) dates = self.scenes['DATE_ACQUIRED'].values # Change the coordinate system - # Ask david + # The issue: the CRSs for the bounding box and for the mask are different. + # In _project, the incorrect CRS was making it throw an error. + # the fix? Inputting bounds in a unprojected CRS and + # a projected shape for masking. poly = transform(project, poly_in) poly_bounds = transform(for_bounds, poly_in) poly = Polygon(poly.exterior.coords) - from rasterio.crs import CRS geometry = [mapping(poly)] geometry[0]['crs'] = CRS({'init':'epsg:32612'}) - feat = {'type': 'Polygon', 'coordinates': list(poly.exterior.coords)} - bounds = poly.bounds - print(bounds) - bounds = (bounds[2], bounds[1], bounds[0], bounds[3]) - bounds = (-124.84, -66.88, 24.89, 49.38) # bbox of usa for sanity check bounds = poly_bounds.bounds for date in dates: d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. @@ -283,10 +281,13 @@ def _order_images(self): s = d for sc in scenes: + paths = os.listdir(os.path.join(self.root, sc)) + c = climate_rasters(self.root) b = [os.path.join(self.root, sc, x) for x in paths if x.endswith(landsat_rasters()[self.sat])] a = [os.path.join(self.root, sc, x) for x in paths if x.endswith(ancillary_rasters())] - bands = a + b + bands = a + b + c + bands.sort() for p in bands: band_dct[os.path.basename(p).split('.')[0]] = p diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index 41f4e7c..def279a 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -94,6 +94,14 @@ def split_shapefiles_multiproc(f): master = 'master_rasters/' year = 2013 + template = "{}_{}_{}" + + done = set() + satellite = 8 for f in glob.glob(os.path.join(shp, "*.shp")): - download_images_over_shapefile(f, image_directory, year, master) + p, r = get_shapefile_path_row(f) + t = template.format(p,r,year) + if t not in done: + done.add(t) + download_images_over_shapefile(f, image_directory, year, master) diff --git a/pixel_classification/runspec.py b/pixel_classification/runspec.py index c5d6221..41fe743 100644 --- a/pixel_classification/runspec.py +++ b/pixel_classification/runspec.py @@ -16,7 +16,7 @@ import os import sys - +from glob import glob abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(abspath) @@ -35,6 +35,9 @@ def landsat_rasters(): 'B7.TIF', 'B8.TIF', 'B9.TIF', 'B10.TIF', 'B11.TIF')} return b +def climate_rasters(root): + return [f for f in glob(os.path.join(root, "*.tif")) if 'GridMet' in f] + def ancillary_rasters(): a = ('lst.tif', 'ssebop_etrf.tif') From 9f9ec0f5db86b75f034de3b71b2729806b2f3893 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sat, 16 Feb 2019 13:51:38 -0700 Subject: [PATCH 26/89] Fully conv. pipeline almost built --- pixel_classification/fully_conv.py | 106 +++++++++++++++-------------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index 23448ef..aa1d0a5 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -1,5 +1,5 @@ import os -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' import keras.backend as K import matplotlib.pyplot as plt @@ -17,21 +17,23 @@ from rasterio import open as rasopen NO_DATA = -1 +MAX_POOL_SHP = 8 +CHUNK_SIZE = 1000 def custom_objective(y_true, y_pred): '''I want to mask all values that are not data, given a y_true that has NODATA values. ''' - y_true = tf.reshape(y_true, (1080*1920, 2)) - y_pred = tf.reshape(y_pred, (1080*1920, 2)) + y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], 2)) + y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], 2)) masked = tf.not_equal(y_true, NO_DATA) y_true_mask = tf.boolean_mask(y_true, masked) y_pred_mask = tf.boolean_mask(y_pred, masked) return tf.keras.losses.binary_crossentropy(y_true_mask, y_pred_mask) -def fcnn_functional(image_shape, n_classes): +def fcnn_functional(n_classes): - x = Input(image_shape) + x = Input((None, None, 36)) c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x) c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(c1) @@ -108,43 +110,27 @@ def augment_data(image, class_mask): class_mask = np.flipud(class_mask) return image, class_mask -def generate(image_directory, box_size): - while True: - for f in glob(image_directory + "*.json"): - jpg = f[:-13] + ".jpg" - class_mask, input_image = generate_class_mask(f, jpg, box_size=box_size) - if class_mask is None: - continue - if np.random.randint(2): - input_image, class_mask = augment_data(input_image, class_mask) - - X, y = preprocess_training_data(input_image, class_mask) - - yield X, y - -def create_model(image_shape, n_classes): - model = fcnn_functional(image_shape, n_classes) +def preprocess_data(master, mask): + shp = master.shape + rows = shp[1]; cols = shp[2] + cut_rows = rows % MAX_POOL_SHP + cut_cols = cols % MAX_POOL_SHP + out_m = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_m[0, :, :, :] = master[:, cut_rows:, cut_cols:] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, cut_rows:, cut_cols:] + out_m = np.swapaxes(out_m, 1, 3) + out_mask = np.swapaxes(out_mask, 1, 3) + return out_m, out_mask + +def create_model(n_classes): + model = fcnn_functional(n_classes) model.compile(loss=custom_objective, optimizer='adam', metrics=['accuracy']) return model -def train_model(train_directory, test_directory, image_shape, box_size=6, epochs=15): - n_classes = 2 - model = create_model(image_shape, n_classes) - tb = TensorBoard(log_dir='graphs/') - n_augmented = 0 - train_generator = generate(train_directory, box_size) - test_generator = generate(test_directory, box_size) - model.fit_generator(train_generator, - steps_per_epoch=50, - epochs=epochs, - verbose=1, - callbacks=[tb], - validation_data=test_generator, - validation_steps=4, - use_multiprocessing=True) - return model def all_matching_shapefiles(to_match, shapefile_directory): out = [] @@ -154,8 +140,9 @@ def all_matching_shapefiles(to_match, shapefile_directory): out.append(f) return out -def generate_binary_train(shapefile_directory, image_directory, box_size): +def generate_binary_train(shapefile_directory, image_directory, box_size, target): + #while True: for f in glob(os.path.join(shapefile_directory, "*.shp")): if target in f: all_matches = all_matching_shapefiles(f, shapefile_directory) @@ -181,7 +168,17 @@ def generate_binary_train(shapefile_directory, image_directory, box_size): for i, s in enumerate(masks): class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 # May need to do some preprocessing. - yield class_mask, load_raster(master_raster) + master = load_raster(master_raster) + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_master, sub_mask = preprocess_data(sub_master, sub_mask) + if np.all(sub_mask == NO_DATA): + continue + else: + yield sub_master, sub_mask + def random_sample(class_mask, n_instances, box_size, class_code=0): out = np.where(class_mask != NO_DATA) @@ -205,6 +202,24 @@ def load_raster(master_raster): arr = src.read() return arr +def train_model(shapefile_directory, image_directory, box_size=6, epochs=15): + # image shape will change here, so it must be + # inferred at runtime. + n_classes = 2 + model = create_model(n_classes) + tb = TensorBoard(log_dir='graphs/') + n_augmented = 0 + train_generator = generate_binary_train(shapefile_directory, image_directory, box_size, + 'irrigated') + model.fit_generator(train_generator, + steps_per_epoch=13, + epochs=epochs, + verbose=1, + callbacks=[tb], + use_multiprocessing=False) + return model + + if __name__ == '__main__': # Steps: # 1. split shapefiles @@ -221,10 +236,9 @@ def load_raster(master_raster): # the steps into one coherent file. # need an easier way to specify year. # Let's do a binary classification model. - shapefile_directory = 'shapefile_data/backup' sample_dir = os.path.join(shapefile_directory, 'sample_points') - image_directory = 'master_rasters' + image_directory = 'master_rasters/backup' target = 'irrigated' fallow = 'Fallow' forest = 'Forrest' @@ -237,12 +251,4 @@ def load_raster(master_raster): n_classes = 2 box_size = 6 - - for i, j in generate_binary_train(shapefile_directory, image_directory, box_size): - - fig, ax = plt.subplots(ncols=2) - ax[0].imshow(i[0, :, :]) - ax[1].imshow(i[1, :, :]) - plt.show() - - + train_model(shapefile_directory, image_directory) From b1f1d0c39fedc66d5904fca2e015249afe596139 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sat, 16 Feb 2019 14:41:19 -0700 Subject: [PATCH 27/89] Model is training on GPU --- pixel_classification/fully_conv.py | 107 ++++++++++++++++++++++++----- 1 file changed, 90 insertions(+), 17 deletions(-) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index aa1d0a5..d618602 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -17,8 +17,8 @@ from rasterio import open as rasopen NO_DATA = -1 -MAX_POOL_SHP = 8 -CHUNK_SIZE = 1000 +MAX_POOLS = 3 +CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. def custom_objective(y_true, y_pred): '''I want to mask all values that @@ -33,7 +33,7 @@ def custom_objective(y_true, y_pred): def fcnn_functional(n_classes): - x = Input((None, None, 36)) + x = Input((None, None, 39)) c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x) c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(c1) @@ -113,13 +113,31 @@ def augment_data(image, class_mask): def preprocess_data(master, mask): shp = master.shape rows = shp[1]; cols = shp[2] - cut_rows = rows % MAX_POOL_SHP - cut_cols = cols % MAX_POOL_SHP + cut_rows = rows % (2**MAX_POOLS) + cut_cols = cols % (2**MAX_POOLS) out_m = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_m[0, :, :, :] = master[:, cut_rows:, cut_cols:] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, cut_rows:, cut_cols:] + + if cut_cols != 0 and cut_rows != 0: + out_m[0, :, :, :] = master[:, :-cut_rows, :-cut_cols] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :-cut_rows, :-cut_cols] + elif cut_cols == 0 and cut_rows != 0: + out_m[0, :, :, :] = master[:, :-cut_rows, :] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :-cut_rows, :] + elif cut_cols != 0 and cut_rows == 0: + out_m[0, :, :, :] = master[:, :, :-cut_cols] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :, :-cut_cols] + else: + out_m[0, :, :, :] = master[:, :, :] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :, :] + out_m = np.swapaxes(out_m, 1, 3) out_mask = np.swapaxes(out_mask, 1, 3) return out_m, out_mask @@ -140,9 +158,8 @@ def all_matching_shapefiles(to_match, shapefile_directory): out.append(f) return out -def generate_binary_train(shapefile_directory, image_directory, box_size, target): +def instances_per_epoch(shapefile_directory, image_directory, box_size, target): - #while True: for f in glob(os.path.join(shapefile_directory, "*.shp")): if target in f: all_matches = all_matching_shapefiles(f, shapefile_directory) @@ -157,14 +174,12 @@ def generate_binary_train(shapefile_directory, image_directory, box_size, target target_mask = generate_class_mask(f, mask_file) class_mask = np.ones((n_classes, target_mask.shape[1], target_mask.shape[2]))*NO_DATA class_mask[1, :, :] = target_mask - required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) masks = [] for match in all_matches: msk = generate_class_mask(match, mask_file) samp = random_sample(msk, required_instances, box_size) masks.append(samp) - for i, s in enumerate(masks): class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 # May need to do some preprocessing. @@ -180,6 +195,44 @@ def generate_binary_train(shapefile_directory, image_directory, box_size, target yield sub_master, sub_mask +def generate_binary_train(shapefile_directory, image_directory, box_size, target): + + while True: + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if target in f: + all_matches = all_matching_shapefiles(f, shapefile_directory) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + master_raster = os.path.join(image_directory, train_raster + suffix) + mask_file = os.path.join(image_directory, mask_raster + suffix) + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive error handling. + else: + target_mask = generate_class_mask(f, mask_file) + class_mask = np.ones((n_classes, target_mask.shape[1], target_mask.shape[2]))*NO_DATA + class_mask[1, :, :] = target_mask + required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) + masks = [] + for match in all_matches: + msk = generate_class_mask(match, mask_file) + samp = random_sample(msk, required_instances, box_size) + masks.append(samp) + for i, s in enumerate(masks): + class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 + # May need to do some preprocessing. + master = load_raster(master_raster) + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_master, sub_mask = preprocess_data(sub_master, sub_mask) + if np.all(sub_mask == NO_DATA): + continue + else: + yield sub_master, sub_mask + + def random_sample(class_mask, n_instances, box_size, class_code=0): out = np.where(class_mask != NO_DATA) # returns (elements from class_mask, indices_x, indices_y) @@ -202,7 +255,22 @@ def load_raster(master_raster): arr = src.read() return arr -def train_model(shapefile_directory, image_directory, box_size=6, epochs=15): +def evaluate_image(master_raster, model): + + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive error handling. + else: + master = load_raster(master_raster) + out = np.zeros(master.shape) + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_master, sub_mask = preprocess_data(sub_master, sub_mask) + + +def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size=6, epochs=3): # image shape will change here, so it must be # inferred at runtime. n_classes = 2 @@ -212,7 +280,7 @@ def train_model(shapefile_directory, image_directory, box_size=6, epochs=15): train_generator = generate_binary_train(shapefile_directory, image_directory, box_size, 'irrigated') model.fit_generator(train_generator, - steps_per_epoch=13, + steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=1, callbacks=[tb], @@ -238,7 +306,7 @@ def train_model(shapefile_directory, image_directory, box_size=6, epochs=15): # Let's do a binary classification model. shapefile_directory = 'shapefile_data/backup' sample_dir = os.path.join(shapefile_directory, 'sample_points') - image_directory = 'master_rasters/backup' + image_directory = 'master_rasters/' target = 'irrigated' fallow = 'Fallow' forest = 'Forrest' @@ -251,4 +319,9 @@ def train_model(shapefile_directory, image_directory, box_size=6, epochs=15): n_classes = 2 box_size = 6 - train_model(shapefile_directory, image_directory) + # k = 0 + # for i in instances_per_epoch(shapefile_directory, image_directory, box_size, 'irrigated'): + # k += 1 + # print("INSTANCES:", k) + + train_model(shapefile_directory, 76, image_directory) From 7cf24235c713737c9f85a8a0a097bdae9e26ec11 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 17 Feb 2019 10:09:21 -0700 Subject: [PATCH 28/89] Need to work on class balance issue. --- pixel_classification/fully_conv.py | 35 ++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index d618602..7b51178 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -19,6 +19,7 @@ NO_DATA = -1 MAX_POOLS = 3 CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. +NUM_CLASSES = 2 def custom_objective(y_true, y_pred): '''I want to mask all values that @@ -210,7 +211,7 @@ def generate_binary_train(shapefile_directory, image_directory, box_size, target # TODO: More extensive error handling. else: target_mask = generate_class_mask(f, mask_file) - class_mask = np.ones((n_classes, target_mask.shape[1], target_mask.shape[2]))*NO_DATA + class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA class_mask[1, :, :] = target_mask required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) masks = [] @@ -262,18 +263,28 @@ def evaluate_image(master_raster, model): # TODO: More extensive error handling. else: master = load_raster(master_raster) - out = np.zeros(master.shape) + class_mask = np.zeros((2, master.shape[1], master.shape[2])) + out = np.zeros((master.shape[1], master.shape[2])) for i in range(0, master.shape[1], CHUNK_SIZE): for j in range(0, master.shape[2], CHUNK_SIZE): sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] sub_master, sub_mask = preprocess_data(sub_master, sub_mask) + preds = model.predict(sub_master) + preds = preds[0, :, :, :] + fig, ax = plt.subplots(ncols=2) + i1 = ax[0].imshow(preds[:, :, 0]) + i2 = ax[1].imshow(preds[:, :, 1]) + fig.colorbar(i1, ax=ax[0]) + fig.colorbar(i2, ax=ax[1]) + plt.show() + out[i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] = np.argmax(preds, axis=2) + return out def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size=6, epochs=3): # image shape will change here, so it must be # inferred at runtime. - n_classes = 2 model = create_model(n_classes) tb = TensorBoard(log_dir='graphs/') n_augmented = 0 @@ -323,5 +334,21 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= # for i in instances_per_epoch(shapefile_directory, image_directory, box_size, 'irrigated'): # k += 1 # print("INSTANCES:", k) + pth = 'test_model.h5' + if not os.path.isfile(pth): + model = train_model(shapefile_directory, 75, image_directory, epochs=1) + model.save(pth) + else: + model = tf.keras.models.load_model(pth, + custom_objects={'custom_objective':custom_objective}) + + for f in glob(os.path.join(image_directory, "*.tif")): + if "class" not in f: + out = evaluate_image(f, model) + plt.imshow(out) + plt.show() + + + + - train_model(shapefile_directory, 76, image_directory) From 533c5f34ef10a9678144a17b8b332711d8dc6bd0 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 17 Feb 2019 13:11:21 -0700 Subject: [PATCH 29/89] Had to balance classes somehow, so I oversampled from the irrigated class --- pixel_classification/fully_conv.py | 150 +++++++++++++++++++++++------ 1 file changed, 120 insertions(+), 30 deletions(-) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index 7b51178..5b06434 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -78,6 +78,21 @@ def fcnn_functional(n_classes): model.summary() return model +def fcnn_model(n_classes): + model = tf.keras.Sequential() + # Must define the input shape in the first layer of the neural network + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=8, padding='same', activation='relu', + input_shape=(None, None, 39))) + model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=4, padding='same', activation='relu')) + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=4, padding='same', activation='relu')) + model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Conv2D(filters=n_classes, kernel_size=2, padding='same', + activation='softmax')) # 1x1 convolutions for pixel-wise prediciton. + # Take a look at the model summary + #model.summary() + return model + def one_hot_encoding(class_mask, n_classes): '''Assumes classes range from 0 -> (n-1)''' shp = class_mask.shape @@ -144,7 +159,7 @@ def preprocess_data(master, mask): return out_m, out_mask def create_model(n_classes): - model = fcnn_functional(n_classes) + model = fcnn_model(n_classes) model.compile(loss=custom_objective, optimizer='adam', metrics=['accuracy']) @@ -196,6 +211,108 @@ def instances_per_epoch(shapefile_directory, image_directory, box_size, target): yield sub_master, sub_mask +def generate_balanced_data(shapefile_directory, image_directory, box_size, target): + ''' This is pretty much for binary classification.''' + #while True: + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if target in f: + all_matches = all_matching_shapefiles(f, shapefile_directory) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + master_raster = os.path.join(image_directory, train_raster + suffix) + mask_file = os.path.join(image_directory, mask_raster + suffix) + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive error handling. + else: + target_mask = generate_class_mask(f, mask_file) + class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA + class_mask[1, :, :] = target_mask + required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) + masks = [] + for match in all_matches: + msk = generate_class_mask(match, mask_file) + samp = random_sample(msk, required_instances, box_size) + masks.append(samp) + for i, s in enumerate(masks): + class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 + + master = load_raster(master_raster) + print(f) + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + if np.all(sub_mask == NO_DATA): + continue + else: + n_negative = len(np.where(sub_mask[0, :, :] != NO_DATA)[1]) + positive = np.where(target_mask[:, :] != NO_DATA) + sorted_x = sorted(positive[1]) + sorted_y = sorted(positive[2]) + l = len(sorted_x) // 2 + center_x = sorted_x[l] + center_y = sorted_y[l] + ofs = CHUNK_SIZE // 2 + sub_positive = target_mask[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] + sub_master_positive = master[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] + required_instances = min(len(np.where(sub_positive[0, :, :] != NO_DATA)[1]), n_negative) + sub_negative = random_sample(sub_mask[0, :, :], required_instances, + box_size=0, class_code=1) + sub_master_negative = sub_master + sub_positive = random_sample(sub_positive[0, :, :], required_instances, + box_size=0, class_code=1) + one_hot_pos = np.ones((2, sub_positive.shape[0], sub_positive.shape[1]))*NO_DATA + one_hot_neg = np.ones((2, sub_negative.shape[0], sub_negative.shape[1]))*NO_DATA + one_hot_pos[1, :, :] = sub_positive + one_hot_neg[0, :, :] = sub_negative + sub_mas_pos, class_mask_pos = preprocess_data(sub_master_positive, + one_hot_pos) + sub_mas_neg, class_mask_neg = preprocess_data(sub_master_negative, + one_hot_neg) + ims = [sub_mas_pos, sub_mas_neg] + class_masks = [class_mask_pos, class_mask_neg] + for ii, jj in zip(ims, class_masks): + yield ii, jj + + + +def random_sample(class_mask, n_instances, box_size, class_code=1): + out = np.where(class_mask != NO_DATA) + class_mask = class_mask.copy() + # returns (indices_z, indices_x, indices_y) + try: + out_x = out[1] + out_y = out[2] + except IndexError as e: + out_x = out[0] + out_y = out[1] + + indices = np.random.choice(len(out_x), size=n_instances, replace=False) + out_x = out_x[indices] + out_y = out_y[indices] + + try: + class_mask[:, :, :] = NO_DATA + if box_size == 0: + class_mask[0, out_x, out_y] = class_code + else: + ofs = box_size // 2 + for x, y in zip(out_x, out_y): + class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = class_code + + except IndexError as e: + class_mask[:, :] = NO_DATA + if box_size == 0: + class_mask[out_x, out_y] = class_code + else: + ofs = box_size // 2 + for x, y in zip(out_x, out_y): + class_mask[x-ofs:x+ofs, y-ofs:y+ofs] = class_code + + return class_mask + + def generate_binary_train(shapefile_directory, image_directory, box_size, target): while True: @@ -234,23 +351,6 @@ def generate_binary_train(shapefile_directory, image_directory, box_size, target yield sub_master, sub_mask -def random_sample(class_mask, n_instances, box_size, class_code=0): - out = np.where(class_mask != NO_DATA) - # returns (elements from class_mask, indices_x, indices_y) - out_x = out[1] - out_y = out[2] - indices = np.random.choice(len(out_x), size=n_instances, replace=False) - out_x = out_x[indices] - out_y = out_y[indices] - class_mask[:, :, :] = NO_DATA - if box_size == 0: - class_mask[:, out_x, out_y] = class_code - else: - ofs = box_size // 2 - for x, y in zip(out_x, out_y): - class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = class_code - return class_mask - def load_raster(master_raster): with rasopen(master_raster, 'r') as src: arr = src.read() @@ -288,7 +388,7 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= model = create_model(n_classes) tb = TensorBoard(log_dir='graphs/') n_augmented = 0 - train_generator = generate_binary_train(shapefile_directory, image_directory, box_size, + train_generator = generate_balanced_data(shapefile_directory, image_directory, box_size, 'irrigated') model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, @@ -328,15 +428,10 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= train_raster = 'master_raster_' mask_raster = 'class_mask_' n_classes = 2 - box_size = 6 - # k = 0 - # for i in instances_per_epoch(shapefile_directory, image_directory, box_size, 'irrigated'): - # k += 1 - # print("INSTANCES:", k) pth = 'test_model.h5' if not os.path.isfile(pth): - model = train_model(shapefile_directory, 75, image_directory, epochs=1) + model = train_model(shapefile_directory, 76, image_directory, epochs=1) model.save(pth) else: model = tf.keras.models.load_model(pth, @@ -347,8 +442,3 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= out = evaluate_image(f, model) plt.imshow(out) plt.show() - - - - - From fe4c9383348bdc96db20b83d4b7925bc9c577bbf Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 17 Feb 2019 14:26:18 -0700 Subject: [PATCH 30/89] Working pipeline. Accuracy not so great.j --- pixel_classification/fully_conv.py | 184 ++++++++++++++++------------- 1 file changed, 104 insertions(+), 80 deletions(-) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index 5b06434..f000278 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -1,5 +1,5 @@ import os -#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' import keras.backend as K import matplotlib.pyplot as plt @@ -75,7 +75,7 @@ def fcnn_functional(n_classes): c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u3_c1) model = Model(inputs=x, outputs=c5) - model.summary() + #model.summary() return model def fcnn_model(n_classes): @@ -126,7 +126,7 @@ def augment_data(image, class_mask): class_mask = np.flipud(class_mask) return image, class_mask -def preprocess_data(master, mask): +def preprocess_data(master, mask, return_cuts=False): shp = master.shape rows = shp[1]; cols = shp[2] cut_rows = rows % (2**MAX_POOLS) @@ -156,10 +156,13 @@ def preprocess_data(master, mask): out_m = np.swapaxes(out_m, 1, 3) out_mask = np.swapaxes(out_mask, 1, 3) + if return_cuts: + return out_m, out_mask, cut_rows, cut_cols + return out_m, out_mask def create_model(n_classes): - model = fcnn_model(n_classes) + model = fcnn_functional(n_classes) model.compile(loss=custom_objective, optimizer='adam', metrics=['accuracy']) @@ -213,67 +216,66 @@ def instances_per_epoch(shapefile_directory, image_directory, box_size, target): def generate_balanced_data(shapefile_directory, image_directory, box_size, target): ''' This is pretty much for binary classification.''' - #while True: - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if target in f: - all_matches = all_matching_shapefiles(f, shapefile_directory) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - master_raster = os.path.join(image_directory, train_raster + suffix) - mask_file = os.path.join(image_directory, mask_raster + suffix) - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive error handling. - else: - target_mask = generate_class_mask(f, mask_file) - class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA - class_mask[1, :, :] = target_mask - required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) - masks = [] - for match in all_matches: - msk = generate_class_mask(match, mask_file) - samp = random_sample(msk, required_instances, box_size) - masks.append(samp) - for i, s in enumerate(masks): - class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 + while True: + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if target in f: + all_matches = all_matching_shapefiles(f, shapefile_directory) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + master_raster = os.path.join(image_directory, train_raster + suffix) + mask_file = os.path.join(image_directory, mask_raster + suffix) + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive error handling. + else: + target_mask = generate_class_mask(f, mask_file) + class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA + class_mask[1, :, :] = target_mask + required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) + masks = [] + for match in all_matches: + msk = generate_class_mask(match, mask_file) + samp = random_sample(msk, required_instances, box_size) + masks.append(samp) + for i, s in enumerate(masks): + class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 - master = load_raster(master_raster) - print(f) - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - if np.all(sub_mask == NO_DATA): - continue - else: - n_negative = len(np.where(sub_mask[0, :, :] != NO_DATA)[1]) - positive = np.where(target_mask[:, :] != NO_DATA) - sorted_x = sorted(positive[1]) - sorted_y = sorted(positive[2]) - l = len(sorted_x) // 2 - center_x = sorted_x[l] - center_y = sorted_y[l] - ofs = CHUNK_SIZE // 2 - sub_positive = target_mask[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] - sub_master_positive = master[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] - required_instances = min(len(np.where(sub_positive[0, :, :] != NO_DATA)[1]), n_negative) - sub_negative = random_sample(sub_mask[0, :, :], required_instances, - box_size=0, class_code=1) - sub_master_negative = sub_master - sub_positive = random_sample(sub_positive[0, :, :], required_instances, - box_size=0, class_code=1) - one_hot_pos = np.ones((2, sub_positive.shape[0], sub_positive.shape[1]))*NO_DATA - one_hot_neg = np.ones((2, sub_negative.shape[0], sub_negative.shape[1]))*NO_DATA - one_hot_pos[1, :, :] = sub_positive - one_hot_neg[0, :, :] = sub_negative - sub_mas_pos, class_mask_pos = preprocess_data(sub_master_positive, - one_hot_pos) - sub_mas_neg, class_mask_neg = preprocess_data(sub_master_negative, - one_hot_neg) - ims = [sub_mas_pos, sub_mas_neg] - class_masks = [class_mask_pos, class_mask_neg] - for ii, jj in zip(ims, class_masks): - yield ii, jj + master = load_raster(master_raster) + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + if np.all(sub_mask == NO_DATA): + continue + else: + n_negative = len(np.where(sub_mask[0, :, :] != NO_DATA)[1]) + positive = np.where(target_mask[:, :] != NO_DATA) + sorted_x = sorted(positive[1]) + sorted_y = sorted(positive[2]) + l = len(sorted_x) // 2 + center_x = sorted_x[l] + center_y = sorted_y[l] + ofs = CHUNK_SIZE // 2 + sub_positive = target_mask[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] + sub_master_positive = master[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] + required_instances = min(len(np.where(sub_positive[0, :, :] != NO_DATA)[1]), n_negative) + sub_negative = random_sample(sub_mask[0, :, :], required_instances, + box_size=0, class_code=1) + sub_master_negative = sub_master + sub_positive = random_sample(sub_positive[0, :, :], required_instances, + box_size=0, class_code=1) + one_hot_pos = np.ones((2, sub_positive.shape[0], sub_positive.shape[1]))*NO_DATA + one_hot_neg = np.ones((2, sub_negative.shape[0], sub_negative.shape[1]))*NO_DATA + one_hot_pos[1, :, :] = sub_positive + one_hot_neg[0, :, :] = sub_negative + sub_mas_pos, class_mask_pos = preprocess_data(sub_master_positive, + one_hot_pos) + sub_mas_neg, class_mask_neg = preprocess_data(sub_master_negative, + one_hot_neg) + ims = [sub_mas_pos, sub_mas_neg] + class_masks = [class_mask_pos, class_mask_neg] + for ii, jj in zip(ims, class_masks): + yield ii, jj @@ -354,33 +356,51 @@ def generate_binary_train(shapefile_directory, image_directory, box_size, target def load_raster(master_raster): with rasopen(master_raster, 'r') as src: arr = src.read() - return arr + meta = src.meta.copy() + return arr, meta -def evaluate_image(master_raster, model): +def evaluate_image(master_raster, model, outfile=None): if not os.path.isfile(master_raster): print("Master raster not created for {}".format(suffix)) # TODO: More extensive error handling. else: - master = load_raster(master_raster) + master, meta = load_raster(master_raster) class_mask = np.zeros((2, master.shape[1], master.shape[2])) - out = np.zeros((master.shape[1], master.shape[2])) + out = np.zeros((master.shape[2], master.shape[1])) for i in range(0, master.shape[1], CHUNK_SIZE): for j in range(0, master.shape[2], CHUNK_SIZE): sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_master, sub_mask = preprocess_data(sub_master, sub_mask) + sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, return_cuts=True) preds = model.predict(sub_master) preds = preds[0, :, :, :] - fig, ax = plt.subplots(ncols=2) - i1 = ax[0].imshow(preds[:, :, 0]) - i2 = ax[1].imshow(preds[:, :, 1]) - fig.colorbar(i1, ax=ax[0]) - fig.colorbar(i2, ax=ax[1]) - plt.show() - out[i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] = np.argmax(preds, axis=2) + preds = np.argmax(preds, axis=2) + if cut_cols == 0 and cut_rows == 0: + out[j:j+CHUNK_SIZE, i:i+CHUNK_SIZE] = preds + elif cut_cols == 0 and cut_rows != 0: + ofs = master.shape[1]-cut_rows + out[j:j+CHUNK_SIZE, i:ofs] = preds + elif cut_cols != 0 and cut_rows == 0: + ofs = master.shape[2]-cut_cols + out[j:ofs, i:i+CHUNK_SIZE] = preds + elif cut_cols != 0 and cut_rows != 0: + ofs_col = master.shape[2]-cut_cols + ofs_row = master.shape[1]-cut_rows + out[j:ofs_col, i:ofs_row] = preds + else: + print("whatcha got goin on here?") + print("Percent done: {:.3f}".format(i / master.shape[1])) + out = np.swapaxes(out, 0, 1) + + if outfile: + save_raster(out, outfile, meta) return out +def save_raster(arr, outfile, meta): + meta.update(count=1) + with rasopen(outfile, 'w', **meta) as dst: + dst.write(arr) def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size=6, epochs=3): # image shape will change here, so it must be @@ -428,10 +448,11 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= train_raster = 'master_raster_' mask_raster = 'class_mask_' n_classes = 2 + out_directory = 'fully_conv_evaluated_images/' pth = 'test_model.h5' if not os.path.isfile(pth): - model = train_model(shapefile_directory, 76, image_directory, epochs=1) + model = train_model(shapefile_directory, 76, image_directory, epochs=2) model.save(pth) else: model = tf.keras.models.load_model(pth, @@ -439,6 +460,9 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= for f in glob(os.path.join(image_directory, "*.tif")): if "class" not in f: - out = evaluate_image(f, model) - plt.imshow(out) - plt.show() + print(os.path.splitext(f)[0]) + + # out = evaluate_image(f, model) + # plt.imshow(out) + # plt.colorbar() + # plt.show() From e3ed9ca0b952bfa8fee59165f7d4045ff1e34c61 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 17 Feb 2019 16:14:48 -0700 Subject: [PATCH 31/89] Added clip_raster function --- pixel_classification/data_utils.py | 1 - pixel_classification/fully_conv.py | 63 ++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index 748b7af..e35b8a3 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -32,7 +32,6 @@ def generate_class_mask(shapefile, master_raster): shp = gpd.read_file(shapefile) with rasopen(master_raster, 'r') as src: shp = shp.to_crs(src.crs) - arr = src.read() features = get_features(shp) out_image, out_transform = mask(src, shapes=features, nodata=NO_DATA) return out_image diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index f000278..e70c2ea 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -15,11 +15,17 @@ from data_utils import generate_class_mask, get_shapefile_path_row from multiprocessing import Pool from rasterio import open as rasopen +from rasterio.mask import mask +from shapely.geometry import shape +from fiona import open as fopen +import json +import geopandas as gpd NO_DATA = -1 MAX_POOLS = 3 CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. NUM_CLASSES = 2 +WRS2 = '../spatial_data/wrs2_descending_usa.shp' def custom_objective(y_true, y_pred): '''I want to mask all values that @@ -375,7 +381,7 @@ def evaluate_image(master_raster, model, outfile=None): sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, return_cuts=True) preds = model.predict(sub_master) preds = preds[0, :, :, :] - preds = np.argmax(preds, axis=2) + preds = preds[:, :, 1] #np.argmax(preds, axis=2) if cut_cols == 0 and cut_rows == 0: out[j:j+CHUNK_SIZE, i:i+CHUNK_SIZE] = preds elif cut_cols == 0 and cut_rows != 0: @@ -391,7 +397,14 @@ def evaluate_image(master_raster, model, outfile=None): else: print("whatcha got goin on here?") print("Percent done: {:.3f}".format(i / master.shape[1])) + + plt.imshow(preds) + plt.colorbar() + plt.show() out = np.swapaxes(out, 0, 1) + out[out == 0] = np.nan + arr = np.expand_dims(arr, axis=0) + arr = arr.astype(np.float32) if outfile: save_raster(out, outfile, meta) @@ -418,6 +431,27 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= use_multiprocessing=False) return model +def get_features(gdf, path, row): + tmp = json.loads(gdf.to_json()) + features = [] + for feature in tmp['features']: + if feature['properties']['PATH'] == path and feature['properties']['ROW'] == row: + features.append(feature['geometry']) + return features + +def clip_raster(evaluated, path, row, outfile=None): + + shp = gpd.read_file(WRS2) + + with rasopen(evaluated, 'r') as src: + print(src.crs) + shp = shp.to_crs(src.crs) + meta = src.meta.copy() + features = get_features(shp, path, row) + out_image, out_transform = mask(src, shapes=features, nodata=np.nan) + + if outfile: + save_raster(out_image, outfile, meta) if __name__ == '__main__': # Steps: @@ -448,7 +482,19 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= train_raster = 'master_raster_' mask_raster = 'class_mask_' n_classes = 2 - out_directory = 'fully_conv_evaluated_images/' + out_directory = 'evaluated_images_fully_conv/' + + # for f in glob(os.path.join(out_directory, "*.tif")): + # if 'clipped' not in f: + # out = os.path.basename(f) + # os.path.split(out)[1] + # out = out[out.find("_")+1:] + # path = out[:2] + # row = out[3:5] + # out = os.path.splitext(out)[0] + # out = 'eval_clipped_' + out + ".tif" + # out = os.path.join(out_directory, out) + # clip_raster(f, int(path), int(row), outfile=out) pth = 'test_model.h5' if not os.path.isfile(pth): @@ -460,9 +506,12 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= for f in glob(os.path.join(image_directory, "*.tif")): if "class" not in f: - print(os.path.splitext(f)[0]) + out = os.path.basename(f) + os.path.split(out)[1] + out = out[out.find("_")+1:] + out = out[out.find("_"):] + out = os.path.splitext(out)[0] + out = 'eval_probability' + out + ".tif" + out = os.path.join(out_directory, out) + evaluate_image(f, model, out) - # out = evaluate_image(f, model) - # plt.imshow(out) - # plt.colorbar() - # plt.show() From d220ebbdd130fdcec24df932eb63898ca9e16cc0 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Mon, 18 Feb 2019 14:02:50 -0700 Subject: [PATCH 32/89] Path/row/year in tif metadata --- pixel_classification/data_utils.py | 42 ++------------------ pixel_classification/fully_conv.py | 62 +++++++++++++++--------------- pixel_classification/keras_cnn.py | 38 ++++++------------ 3 files changed, 45 insertions(+), 97 deletions(-) diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index e35b8a3..5049b7e 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -36,45 +36,6 @@ def generate_class_mask(shapefile, master_raster): out_image, out_transform = mask(src, shapes=features, nodata=NO_DATA) return out_image -def create_master_masked_raster(image_stack, path, row, year, raster_directory): - masks = image_stack.masks - if not masks: - return None - - first = True - stack = None - - for i, feat in enumerate(masks.keys()): # ordered dict ensures accuracy here. - - mask_raster = masks[feat] # maps bands to their location in filesystem. - - with rasopen(mask_raster, mode='r') as src: - arr = src.read() - raster_geo = src.meta.copy() - - if first: - first_geo = deepcopy(raster_geo) - empty = zeros((len(masks.keys()), arr.shape[1], arr.shape[2]), float32) - stack = empty - stack[i, :, :] = arr - first = False - else: - try: - stack[i, :, :] = arr - except ValueError: - arr = warp_single_image(mask_raster, first_geo) - stack[i, :, :] = arr - - first_geo.update(count=len(masks.keys())) - - fname = "master_mask_raster_{}_{}_{}.tif".format(path, row, year) - pth = os.path.join(raster_directory, fname) - - with rasopen(pth, mode='w', **first_geo) as dst: - dst.write(stack) - - return pth - def create_master_raster(image_stack, path, row, year, raster_directory): fname = "master_raster_{}_{}_{}.tif".format(path, row, year) @@ -134,6 +95,9 @@ def create_master_raster(image_stack, path, row, year, raster_directory): first_geo.update(count=len(paths_map.keys())) with rasopen(pth, mode='w', **first_geo) as dst: + dst.update_tags(path=path) + dst.update_tags(row=row) + dst.update_tags(year=year) dst.write(stack) return pth diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index e70c2ea..725b8df 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -81,7 +81,7 @@ def fcnn_functional(n_classes): c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u3_c1) model = Model(inputs=x, outputs=c5) - #model.summary() + model.summary() return model def fcnn_model(n_classes): @@ -398,14 +398,10 @@ def evaluate_image(master_raster, model, outfile=None): print("whatcha got goin on here?") print("Percent done: {:.3f}".format(i / master.shape[1])) - plt.imshow(preds) - plt.colorbar() - plt.show() out = np.swapaxes(out, 0, 1) out[out == 0] = np.nan - arr = np.expand_dims(arr, axis=0) - arr = arr.astype(np.float32) - + out = np.expand_dims(out, axis=0) + out = out.astype(np.float32) if outfile: save_raster(out, outfile, meta) return out @@ -450,6 +446,7 @@ def clip_raster(evaluated, path, row, outfile=None): features = get_features(shp, path, row) out_image, out_transform = mask(src, shapes=features, nodata=np.nan) + out_image[out_image == 0] = np.nan if outfile: save_raster(out_image, outfile, meta) @@ -484,34 +481,35 @@ def clip_raster(evaluated, path, row, outfile=None): n_classes = 2 out_directory = 'evaluated_images_fully_conv/' - # for f in glob(os.path.join(out_directory, "*.tif")): - # if 'clipped' not in f: - # out = os.path.basename(f) - # os.path.split(out)[1] - # out = out[out.find("_")+1:] - # path = out[:2] - # row = out[3:5] - # out = os.path.splitext(out)[0] - # out = 'eval_clipped_' + out + ".tif" - # out = os.path.join(out_directory, out) - # clip_raster(f, int(path), int(row), outfile=out) - - pth = 'test_model.h5' - if not os.path.isfile(pth): - model = train_model(shapefile_directory, 76, image_directory, epochs=2) - model.save(pth) - else: - model = tf.keras.models.load_model(pth, - custom_objects={'custom_objective':custom_objective}) - - for f in glob(os.path.join(image_directory, "*.tif")): - if "class" not in f: + for f in glob(os.path.join(out_directory, "*.tif")): + if 'probab' in f: out = os.path.basename(f) os.path.split(out)[1] out = out[out.find("_")+1:] - out = out[out.find("_"):] + out = out[out.find("_")+1:] + path = out[:2] + row = out[3:5] out = os.path.splitext(out)[0] - out = 'eval_probability' + out + ".tif" + out = 'eval_clipped_' + out + ".tif" out = os.path.join(out_directory, out) - evaluate_image(f, model, out) + clip_raster(f, int(path), int(row), outfile=f) + + # pth = 'test_model.h5' + # if not os.path.isfile(pth): + # model = train_model(shapefile_directory, 76, image_directory, epochs=2) + # model.save(pth) + # else: + # model = tf.keras.models.load_model(pth, + # custom_objects={'custom_objective':custom_objective}) + + # for f in glob(os.path.join(image_directory, "*.tif")): + # if "class" not in f: + # out = os.path.basename(f) + # os.path.split(out)[1] + # out = out[out.find("_")+1:] + # out = out[out.find("_"):] + # out = os.path.splitext(out)[0] + # out = 'eval_probability' + out + ".tif" + # out = os.path.join(out_directory, out) + # evaluate_image(f, model, out) diff --git a/pixel_classification/keras_cnn.py b/pixel_classification/keras_cnn.py index 18f4a2e..cb33b0b 100644 --- a/pixel_classification/keras_cnn.py +++ b/pixel_classification/keras_cnn.py @@ -3,6 +3,7 @@ from glob import glob import tensorflow as tf from sklearn.model_selection import train_test_split +from tensorflow.keras.callbacks import TensorBoard import numpy as np from shuffle_data import one_epoch @@ -21,7 +22,7 @@ def keras_model(kernel_size, n_classes): model.add(tf.keras.layers.Dropout(0.5)) model.add(tf.keras.layers.Dense(n_classes, activation='softmax')) # Take a look at the model summary - # model.summary() + model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) @@ -30,13 +31,16 @@ def keras_model(kernel_size, n_classes): def train_next_batch(model, features, labels, n_classes=4, epochs=5, batch_size=128): # shuffle the labels again + + tb = TensorBoard(log_dir='graphs/cnn/') x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.01, random_state=42) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, - validation_data=(x_test, y_test)) + validation_data=(x_test, y_test), + callbacks=[tb]) return model @@ -73,31 +77,13 @@ def fnames(class_code): kernel_size = 41 model_name = 'model_kernel_{}'.format(kernel_size) total_instances = 100000 - for i in range(n_epochs): - random_indices = np.random.choice(total_instances, total_instances, replace=False) - cs = 5342 - irr = one_epoch(fnames(0), random_indices, 0, chunk_size=cs) - fallow = one_epoch(fnames(1), random_indices, 1, chunk_size=cs) - forest = one_epoch(fnames(2), random_indices, 2, chunk_size=cs) - other = one_epoch(fnames(3), random_indices, 3, chunk_size=cs) - - for irr, fall, fo, ot in zip(irr, fallow, forest, other): - d1, l1 = irr[0], irr[1] - print(d1.shape) - d2, l2 = fall[0], fall[1] - print(d2.shape) - d3, l3 = fo[0], fo[1] - print(d3.shape) - d4, l4 = ot[0], ot[1] - print(d4.shape) - #features = np.concatenate((d1, d2, d3, d4)) - #labels = np.concatenate((l1, l2, l3, l4)) - #train_next_batch(model, features, labels, epochs=1) - - print("\nCustom epoch {}/{}\n".format(i+1, n_epochs)) - break - + model_path = os.path.join(model_dir, model_name) + model = keras_model(41, 2) + model = tf.keras.models.load_model(model_path) + features = np.zeros((128, 36, 41, 41)) + labels = np.zeros((128, 4)) + train_next_batch(model, features, labels) if not os.path.isfile(model_path): model.save(model_path) From c5c478bdb66ea19878facb3be6d943944161e432 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 19 Feb 2019 08:59:30 -0700 Subject: [PATCH 33/89] Removed get_cdl, no need --- pixel_classification/prepare_images.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index cbf03ea..522f756 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -96,7 +96,6 @@ def build_training(self): self.get_precip() self.get_et() self.get_terrain() - self.get_cdl() self.paths_map, self.masks = self._order_images() def build_evaluating(self): @@ -105,7 +104,6 @@ def build_evaluating(self): #self.get_et() self.get_precip() self.get_terrain() - self.get_cdl() self.paths_map, self.masks = self._order_images() # paths map is just path-> location # in filesystem. From ee4404ac134a26d2f2da1bb58635a6c858037a74 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Thu, 21 Feb 2019 12:32:46 -0700 Subject: [PATCH 34/89] Refactor, move generators to data_generators. Investigating image discontinuities --- pixel_classification/data_generators.py | 228 ++++++++++++++++ pixel_classification/data_utils.py | 16 +- pixel_classification/fully_conv.py | 332 ++++-------------------- pixel_classification/runspec.py | 2 +- 4 files changed, 285 insertions(+), 293 deletions(-) create mode 100644 pixel_classification/data_generators.py diff --git a/pixel_classification/data_generators.py b/pixel_classification/data_generators.py new file mode 100644 index 0000000..b785eb2 --- /dev/null +++ b/pixel_classification/data_generators.py @@ -0,0 +1,228 @@ +import numpy as np +import os +from glob import glob +from data_utils import generate_class_mask, get_shapefile_path_row +from rasterio import open as rasopen + +NO_DATA = -1 +MAX_POOLS = 3 +CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. +NUM_CLASSES = 2 + +def random_sample(class_mask, n_instances, box_size, class_code=1): + out = np.where(class_mask != NO_DATA) + class_mask = class_mask.copy() + try: + out_x = out[1] + out_y = out[2] + except IndexError as e: + out_x = out[0] + out_y = out[1] + + indices = np.random.choice(len(out_x), size=n_instances, replace=False) + out_x = out_x[indices] + out_y = out_y[indices] + + try: + class_mask[:, :, :] = NO_DATA + if box_size == 0: + class_mask[0, out_x, out_y] = class_code + else: + ofs = box_size // 2 + for x, y in zip(out_x, out_y): + class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = class_code + + except IndexError as e: + class_mask[:, :] = NO_DATA + if box_size == 0: + class_mask[out_x, out_y] = class_code + else: + ofs = box_size // 2 + for x, y in zip(out_x, out_y): + class_mask[x-ofs:x+ofs, y-ofs:y+ofs] = class_code + + return class_mask + +def load_raster(master_raster): + with rasopen(master_raster, 'r') as src: + arr = src.read() + meta = src.meta.copy() + return arr, meta + +def generate_binary_train(shapefile_directory, image_directory, box_size, target): + + while True: + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if target in f: + all_matches = all_matching_shapefiles(f, shapefile_directory) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + master_raster = os.path.join(image_directory, train_raster + suffix) + mask_file = os.path.join(image_directory, mask_raster + suffix) + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive error handling. + else: + target_mask = generate_class_mask(f, mask_file) + class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA + class_mask[1, :, :] = target_mask + required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) + masks = [] + for match in all_matches: + msk = generate_class_mask(match, mask_file) + samp = random_sample(msk, required_instances, box_size) + masks.append(samp) + for i, s in enumerate(masks): + class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 + # May need to do some preprocessing. + master = load_raster(master_raster) + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_master, sub_mask = preprocess_data(sub_master, sub_mask) + if np.all(sub_mask == NO_DATA): + continue + else: + yield sub_master, sub_mask + + +def all_matching_shapefiles(to_match, shapefile_directory): + out = [] + pr = get_shapefile_path_row(to_match) + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if get_shapefile_path_row(f) == pr and to_match not in f: + out.append(f) + return out + +def generate_balanced_data(shapefile_directory, image_directory, box_size, target, year=2013): + train_raster = 'master_raster_' # templates + mask_raster = 'class_mask_' + ''' This is pretty much for binary classification.''' + while True: + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if target in f: + all_matches = all_matching_shapefiles(f, shapefile_directory) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + master_raster = os.path.join(image_directory, train_raster + suffix) + mask_file = os.path.join(image_directory, mask_raster + suffix) + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive error handling. + else: + target_mask = generate_class_mask(f, mask_file) + class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA + class_mask[1, :, :] = target_mask + required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) + masks = [] + for match in all_matches: + msk = generate_class_mask(match, mask_file) + #samp = random_sample(msk, required_instances, box_size) + #masks.append(samp) + masks.append(msk) + + for i, s in enumerate(masks): + class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 + + master, meta = load_raster(master_raster) + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + if np.all(sub_mask == NO_DATA): + continue + else: + n_negative = len(np.where(sub_mask[0, :, :] != NO_DATA)[1]) + positive = np.where(target_mask[:, :] != NO_DATA) + sorted_x = sorted(positive[1]) + sorted_y = sorted(positive[2]) + l = len(sorted_x) // 2 + center_x = sorted_x[l] + center_y = sorted_y[l] + ofs = CHUNK_SIZE // 2 + sub_positive = target_mask[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] + sub_master_positive = master[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] + required_instances = min(len(np.where(sub_positive[0, :, :] != NO_DATA)[1]), n_negative) + sub_negative = random_sample(sub_mask[0, :, :], required_instances, + box_size=0, class_code=1) + sub_master_negative = sub_master + sub_positive = random_sample(sub_positive[0, :, :], required_instances, + box_size=0, class_code=1) + one_hot_pos = np.ones((2, sub_positive.shape[0], sub_positive.shape[1]))*NO_DATA + one_hot_neg = np.ones((2, sub_negative.shape[0], sub_negative.shape[1]))*NO_DATA + one_hot_pos[1, :, :] = sub_positive + one_hot_neg[0, :, :] = sub_negative + sub_mas_pos, class_mask_pos = preprocess_data(sub_master_positive, + one_hot_pos) + sub_mas_neg, class_mask_neg = preprocess_data(sub_master_negative, + one_hot_neg) + ims = [sub_mas_pos, sub_mas_neg] + class_masks = [class_mask_pos, class_mask_neg] + for ii, jj in zip(ims, class_masks): + yield ii, jj + + +def rotation(image, angle): + return transform.rotate(image, angle, mode='constant', cval=NO_DATA) + +def random_noise(image): + return util.random_noise(image) + +def h_flip(image): + return image[:, ::-1] + +def augment_data(image, class_mask): + '''Randomly augments an image.''' + if np.random.randint(2): + deg = np.random.uniform(-25, 25) + image = rotation(image, deg) + class_mask = rotation(class_mask, deg) + if np.random.randint(2): + image = random_noise(image) + if np.random.randint(2): + image = h_flip(image) + class_mask = h_flip(class_mask) + if np.random.randint(2): + image = np.flipud(image) + class_mask = np.flipud(class_mask) + return image, class_mask + + +def preprocess_data(master, mask, return_cuts=False): + shp = master.shape + rows = shp[1]; cols = shp[2] + cut_rows = rows % (2**MAX_POOLS) + cut_cols = cols % (2**MAX_POOLS) + out_m = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + + if cut_cols != 0 and cut_rows != 0: + out_m[0, :, :, :] = master[:, :-cut_rows, :-cut_cols] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :-cut_rows, :-cut_cols] + elif cut_cols == 0 and cut_rows != 0: + out_m[0, :, :, :] = master[:, :-cut_rows, :] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :-cut_rows, :] + elif cut_cols != 0 and cut_rows == 0: + out_m[0, :, :, :] = master[:, :, :-cut_cols] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :, :-cut_cols] + else: + out_m[0, :, :, :] = master[:, :, :] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :, :] + + out_m = np.swapaxes(out_m, 1, 3) + out_mask = np.swapaxes(out_mask, 1, 3) + if return_cuts: + return out_m, out_mask, cut_rows, cut_cols + + return out_m, out_mask + +if __name__ == '__main__': + pass diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index 5049b7e..407cd8c 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -1,19 +1,19 @@ -from shapely.geometry import shape import glob import os -from collections import defaultdict +import geopandas as gpd +import json import fiona from lxml import html from requests import get from copy import deepcopy from numpy import zeros, asarray, array, reshape, nan +from shapely.geometry import shape +from collections import defaultdict from rasterio import float32, open as rasopen from rasterio.mask import mask from prepare_images import ImageStack from sklearn.neighbors import KDTree from sat_image.warped_vrt import warp_single_image -import geopandas as gpd -import json NO_DATA = -1 @@ -74,7 +74,7 @@ def create_master_raster(image_stack, path, row, year, raster_directory): raster_geo = src.meta.copy() if first: - first_geo = deepcopy(raster_geo) + first_geo = raster_geo.copy() empty = zeros((len(paths_map.keys()), arr.shape[1], arr.shape[2]), float32) stack = empty stack[i, :, :] = arr @@ -84,6 +84,9 @@ def create_master_raster(image_stack, path, row, year, raster_directory): stack[i, :, :] = arr except ValueError: # error can be thrown here if source raster doesn't have crs + # OR ! Because rasterio version. + # However, deepcopy becomes an issue with the latest + # version of rasterio. arr = warp_single_image(feature_raster, first_geo) stack[i, :, :] = arr @@ -95,9 +98,6 @@ def create_master_raster(image_stack, path, row, year, raster_directory): first_geo.update(count=len(paths_map.keys())) with rasopen(pth, mode='w', **first_geo) as dst: - dst.update_tags(path=path) - dst.update_tags(row=row) - dst.update_tags(year=year) dst.write(stack) return pth diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index 725b8df..e3d8695 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -1,18 +1,18 @@ import os +os.environ['KERAS_BACKEND'] = 'tensorflow' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' +import tensorflow as tf import keras.backend as K import matplotlib.pyplot as plt import numpy as np import time -import tensorflow as tf from glob import glob from skimage import transform, util from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, Dropout, UpSampling2D) from tensorflow.keras.models import Model from tensorflow.keras.callbacks import TensorBoard -from data_utils import generate_class_mask, get_shapefile_path_row from multiprocessing import Pool from rasterio import open as rasopen from rasterio.mask import mask @@ -20,6 +20,7 @@ from fiona import open as fopen import json import geopandas as gpd +from data_generators import generate_balanced_data, load_raster, preprocess_data NO_DATA = -1 MAX_POOLS = 3 @@ -40,7 +41,7 @@ def custom_objective(y_true, y_pred): def fcnn_functional(n_classes): - x = Input((None, None, 39)) + x = Input((None, None, 36)) c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x) c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(c1) @@ -81,7 +82,7 @@ def fcnn_functional(n_classes): c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u3_c1) model = Model(inputs=x, outputs=c5) - model.summary() + #model.summary() return model def fcnn_model(n_classes): @@ -107,66 +108,6 @@ def one_hot_encoding(class_mask, n_classes): out[:, :, i][class_mask == i] = 1 return out -def rotation(image, angle): - return transform.rotate(image, angle, mode='constant', cval=NO_DATA) - -def random_noise(image): - return util.random_noise(image) - -def h_flip(image): - return image[:, ::-1] - -def augment_data(image, class_mask): - '''Randomly augments an image.''' - if np.random.randint(2): - deg = np.random.uniform(-25, 25) - image = rotation(image, deg) - class_mask = rotation(class_mask, deg) - if np.random.randint(2): - image = random_noise(image) - if np.random.randint(2): - image = h_flip(image) - class_mask = h_flip(class_mask) - if np.random.randint(2): - image = np.flipud(image) - class_mask = np.flipud(class_mask) - return image, class_mask - -def preprocess_data(master, mask, return_cuts=False): - shp = master.shape - rows = shp[1]; cols = shp[2] - cut_rows = rows % (2**MAX_POOLS) - cut_cols = cols % (2**MAX_POOLS) - out_m = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - - if cut_cols != 0 and cut_rows != 0: - out_m[0, :, :, :] = master[:, :-cut_rows, :-cut_cols] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :-cut_rows, :-cut_cols] - elif cut_cols == 0 and cut_rows != 0: - out_m[0, :, :, :] = master[:, :-cut_rows, :] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :-cut_rows, :] - elif cut_cols != 0 and cut_rows == 0: - out_m[0, :, :, :] = master[:, :, :-cut_cols] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :, :-cut_cols] - else: - out_m[0, :, :, :] = master[:, :, :] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :, :] - - out_m = np.swapaxes(out_m, 1, 3) - out_mask = np.swapaxes(out_mask, 1, 3) - if return_cuts: - return out_m, out_mask, cut_rows, cut_cols - - return out_m, out_mask - def create_model(n_classes): model = fcnn_functional(n_classes) model.compile(loss=custom_objective, @@ -174,197 +115,6 @@ def create_model(n_classes): metrics=['accuracy']) return model - -def all_matching_shapefiles(to_match, shapefile_directory): - out = [] - pr = get_shapefile_path_row(to_match) - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if get_shapefile_path_row(f) == pr and to_match not in f: - out.append(f) - return out - -def instances_per_epoch(shapefile_directory, image_directory, box_size, target): - - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if target in f: - all_matches = all_matching_shapefiles(f, shapefile_directory) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - master_raster = os.path.join(image_directory, train_raster + suffix) - mask_file = os.path.join(image_directory, mask_raster + suffix) - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive error handling. - else: - target_mask = generate_class_mask(f, mask_file) - class_mask = np.ones((n_classes, target_mask.shape[1], target_mask.shape[2]))*NO_DATA - class_mask[1, :, :] = target_mask - required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) - masks = [] - for match in all_matches: - msk = generate_class_mask(match, mask_file) - samp = random_sample(msk, required_instances, box_size) - masks.append(samp) - for i, s in enumerate(masks): - class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 - # May need to do some preprocessing. - master = load_raster(master_raster) - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_master, sub_mask = preprocess_data(sub_master, sub_mask) - if np.all(sub_mask == NO_DATA): - continue - else: - yield sub_master, sub_mask - - -def generate_balanced_data(shapefile_directory, image_directory, box_size, target): - ''' This is pretty much for binary classification.''' - while True: - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if target in f: - all_matches = all_matching_shapefiles(f, shapefile_directory) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - master_raster = os.path.join(image_directory, train_raster + suffix) - mask_file = os.path.join(image_directory, mask_raster + suffix) - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive error handling. - else: - target_mask = generate_class_mask(f, mask_file) - class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA - class_mask[1, :, :] = target_mask - required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) - masks = [] - for match in all_matches: - msk = generate_class_mask(match, mask_file) - samp = random_sample(msk, required_instances, box_size) - masks.append(samp) - for i, s in enumerate(masks): - class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 - - master = load_raster(master_raster) - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - if np.all(sub_mask == NO_DATA): - continue - else: - n_negative = len(np.where(sub_mask[0, :, :] != NO_DATA)[1]) - positive = np.where(target_mask[:, :] != NO_DATA) - sorted_x = sorted(positive[1]) - sorted_y = sorted(positive[2]) - l = len(sorted_x) // 2 - center_x = sorted_x[l] - center_y = sorted_y[l] - ofs = CHUNK_SIZE // 2 - sub_positive = target_mask[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] - sub_master_positive = master[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] - required_instances = min(len(np.where(sub_positive[0, :, :] != NO_DATA)[1]), n_negative) - sub_negative = random_sample(sub_mask[0, :, :], required_instances, - box_size=0, class_code=1) - sub_master_negative = sub_master - sub_positive = random_sample(sub_positive[0, :, :], required_instances, - box_size=0, class_code=1) - one_hot_pos = np.ones((2, sub_positive.shape[0], sub_positive.shape[1]))*NO_DATA - one_hot_neg = np.ones((2, sub_negative.shape[0], sub_negative.shape[1]))*NO_DATA - one_hot_pos[1, :, :] = sub_positive - one_hot_neg[0, :, :] = sub_negative - sub_mas_pos, class_mask_pos = preprocess_data(sub_master_positive, - one_hot_pos) - sub_mas_neg, class_mask_neg = preprocess_data(sub_master_negative, - one_hot_neg) - ims = [sub_mas_pos, sub_mas_neg] - class_masks = [class_mask_pos, class_mask_neg] - for ii, jj in zip(ims, class_masks): - yield ii, jj - - - -def random_sample(class_mask, n_instances, box_size, class_code=1): - out = np.where(class_mask != NO_DATA) - class_mask = class_mask.copy() - # returns (indices_z, indices_x, indices_y) - try: - out_x = out[1] - out_y = out[2] - except IndexError as e: - out_x = out[0] - out_y = out[1] - - indices = np.random.choice(len(out_x), size=n_instances, replace=False) - out_x = out_x[indices] - out_y = out_y[indices] - - try: - class_mask[:, :, :] = NO_DATA - if box_size == 0: - class_mask[0, out_x, out_y] = class_code - else: - ofs = box_size // 2 - for x, y in zip(out_x, out_y): - class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = class_code - - except IndexError as e: - class_mask[:, :] = NO_DATA - if box_size == 0: - class_mask[out_x, out_y] = class_code - else: - ofs = box_size // 2 - for x, y in zip(out_x, out_y): - class_mask[x-ofs:x+ofs, y-ofs:y+ofs] = class_code - - return class_mask - - -def generate_binary_train(shapefile_directory, image_directory, box_size, target): - - while True: - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if target in f: - all_matches = all_matching_shapefiles(f, shapefile_directory) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - master_raster = os.path.join(image_directory, train_raster + suffix) - mask_file = os.path.join(image_directory, mask_raster + suffix) - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive error handling. - else: - target_mask = generate_class_mask(f, mask_file) - class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA - class_mask[1, :, :] = target_mask - required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) - masks = [] - for match in all_matches: - msk = generate_class_mask(match, mask_file) - samp = random_sample(msk, required_instances, box_size) - masks.append(samp) - for i, s in enumerate(masks): - class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 - # May need to do some preprocessing. - master = load_raster(master_raster) - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_master, sub_mask = preprocess_data(sub_master, sub_mask) - if np.all(sub_mask == NO_DATA): - continue - else: - yield sub_master, sub_mask - - -def load_raster(master_raster): - with rasopen(master_raster, 'r') as src: - arr = src.read() - meta = src.meta.copy() - return arr, meta - def evaluate_image(master_raster, model, outfile=None): if not os.path.isfile(master_raster): @@ -372,8 +122,11 @@ def evaluate_image(master_raster, model, outfile=None): # TODO: More extensive error handling. else: master, meta = load_raster(master_raster) + class_mask = np.zeros((2, master.shape[1], master.shape[2])) out = np.zeros((master.shape[2], master.shape[1])) + x_plot =[] + y_plot =[] for i in range(0, master.shape[1], CHUNK_SIZE): for j in range(0, master.shape[2], CHUNK_SIZE): sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] @@ -382,6 +135,20 @@ def evaluate_image(master_raster, model, outfile=None): preds = model.predict(sub_master) preds = preds[0, :, :, :] preds = preds[:, :, 1] #np.argmax(preds, axis=2) + # fig, ax = plt.subplots(ncols=2) + # ax[0].imshow(master[38, :, :]) + # x_plot.append([i, i+CHUNK_SIZE, i, i+CHUNK_SIZE]) + # y_plot.append([j, j+CHUNK_SIZE, j+CHUNK_SIZE, j]) + # ax[0].plot(x_plot, y_plot, 'rx') + # ax[1].imshow(preds) + # plt.show() + # plot_or_not = input('Plot the master?') + # if plot_or_not == 'y': + # for q in range(sub_master.shape[3]): + # plt.figure() + # plt.imshow(sub_master[0, :, :, q]) + # plt.show() + if cut_cols == 0 and cut_rows == 0: out[j:j+CHUNK_SIZE, i:i+CHUNK_SIZE] = preds elif cut_cols == 0 and cut_rows != 0: @@ -396,6 +163,7 @@ def evaluate_image(master_raster, model, outfile=None): out[j:ofs_col, i:ofs_row] = preds else: print("whatcha got goin on here?") + print("Percent done: {:.3f}".format(i / master.shape[1])) out = np.swapaxes(out, 0, 1) @@ -440,7 +208,6 @@ def clip_raster(evaluated, path, row, outfile=None): shp = gpd.read_file(WRS2) with rasopen(evaluated, 'r') as src: - print(src.crs) shp = shp.to_crs(src.crs) meta = src.meta.copy() features = get_features(shp, path, row) @@ -481,35 +248,32 @@ def clip_raster(evaluated, path, row, outfile=None): n_classes = 2 out_directory = 'evaluated_images_fully_conv/' - for f in glob(os.path.join(out_directory, "*.tif")): - if 'probab' in f: - out = os.path.basename(f) - os.path.split(out)[1] - out = out[out.find("_")+1:] - out = out[out.find("_")+1:] - path = out[:2] - row = out[3:5] - out = os.path.splitext(out)[0] - out = 'eval_clipped_' + out + ".tif" - out = os.path.join(out_directory, out) - clip_raster(f, int(path), int(row), outfile=f) - - # pth = 'test_model.h5' - # if not os.path.isfile(pth): - # model = train_model(shapefile_directory, 76, image_directory, epochs=2) - # model.save(pth) - # else: - # model = tf.keras.models.load_model(pth, - # custom_objects={'custom_objective':custom_objective}) - - # for f in glob(os.path.join(image_directory, "*.tif")): - # if "class" not in f: + # for f in glob(os.path.join('oversamplin', "*.tif")): + # if 'corrected_master' in f: # out = os.path.basename(f) # os.path.split(out)[1] # out = out[out.find("_")+1:] - # out = out[out.find("_"):] - # out = os.path.splitext(out)[0] - # out = 'eval_probability' + out + ".tif" - # out = os.path.join(out_directory, out) - # evaluate_image(f, model, out) + # out = out[out.find("_")+1:] + # path = out[:2] + # row = out[3:5] + # clip_raster(f, int(path), int(row), outfile=f) + + pth = 'oversamplin/model_no_precip.h5' + if not os.path.isfile(pth): + model = train_model(shapefile_directory, 76, image_directory, epochs=3) + model.save(pth) + else: + model = tf.keras.models.load_model(pth, + custom_objects={'custom_objective':custom_objective}) + + for f in glob(os.path.join(image_directory, "*.tif")): + if "class" not in f: + out = os.path.basename(f) + os.path.split(out)[1] + out = out[out.find("_")+1:] + out = out[out.find("_"):] + out = os.path.splitext(out)[0] + out = 'no_precip' + out + ".tif" + out = os.path.join('oversamplin', out) + evaluate_image(f, model, out) diff --git a/pixel_classification/runspec.py b/pixel_classification/runspec.py index 41fe743..d18aabf 100644 --- a/pixel_classification/runspec.py +++ b/pixel_classification/runspec.py @@ -36,7 +36,7 @@ def landsat_rasters(): return b def climate_rasters(root): - return [f for f in glob(os.path.join(root, "*.tif")) if 'GridMet' in f] + return [f for f in glob(os.path.join(root, "*.tif")) if 'precip' in f] def ancillary_rasters(): From 3a4077cc5a627f52554e1074936a0f3b72d110e7 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Thu, 21 Feb 2019 15:10:07 -0700 Subject: [PATCH 35/89] Shapefiles can intersect multiple path/rows. For more training data, we want to download all --- pixel_classification/data_utils.py | 47 +++++++++++++++++-- pixel_classification/prepare_images.py | 35 +++++++------- pixel_classification/runner_from_shapefile.py | 22 +++++---- 3 files changed, 73 insertions(+), 31 deletions(-) diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py index 407cd8c..eb6e788 100644 --- a/pixel_classification/data_utils.py +++ b/pixel_classification/data_utils.py @@ -2,7 +2,7 @@ import os import geopandas as gpd import json -import fiona +from fiona import open as fopen from lxml import html from requests import get from copy import deepcopy @@ -104,7 +104,7 @@ def create_master_raster(image_stack, path, row, year, raster_directory): def get_shapefile_lat_lon(shapefile): ''' Center of shapefile''' - with fiona.open(shapefile, "r") as src: + with fopen(shapefile, "r") as src: minx, miny, maxx, maxy = src.bounds latc = (maxy + miny) / 2 lonc = (maxx + minx) / 2 @@ -161,6 +161,43 @@ def get_pr_subset(poly, tiles): ls.append(str(p) + "_" + str(r)) return ls +def filter_shapefile(shapefile, out_directory): + """ Shapefiles may span multiple path/rows. + For training, we want all of the data available. + This function filters the polygons contained in + the shapefile into separate files for each path/row + contained in the shapefile. """ + path_row_map = defaultdict(list) + wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') + tree, path_rows, features = construct_kdtree(wrs2) + wrs2.close() + + cent_arr = array([0, 0]) + with fopen(shapefile, "r") as src: + meta = deepcopy(src.meta) + for feat in src: + poly = shape(feat['geometry']) + centroid = poly.centroid.coords[0] + cent_arr[0] = centroid[0] + cent_arr[1] = centroid[1] + centroid = cent_arr.reshape(1, -1) + dist, ind = tree.query(centroid, k=10) + tiles = features[ind[0]] + prs = get_pr_subset(poly, tiles) + for p in prs: + path_row_map[p].append(feat) + + outfile = os.path.basename(shapefile) + outfile = os.path.splitext(outfile)[0] + + for path_row in path_row_map: + out = outfile + path_row + ".shp" + with fopen(os.path.join(out_directory, out), 'w', **meta) as dst: + print("Saving {}".format(out)) + for feat in path_row_map[path_row]: + dst.write(feat) + + def split_shapefile(base, base_shapefile, data_directory): """ Shapefiles may deal with data over multiple path/rows. @@ -171,12 +208,12 @@ def split_shapefile(base, base_shapefile, data_directory): path_row = defaultdict(list) id_mapping = {} # TODO: un hardcode this directory. - wrs2 = fiona.open('../spatial_data/wrs2_descending_usa.shp', 'r') + wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') tree, path_rows, features = construct_kdtree(wrs2) wrs2.close() cent_arr = array([0, 0]) - with fiona.open(os.path.join(base, base_shapefile), "r") as src: + with fopen(os.path.join(base, base_shapefile), "r") as src: meta = deepcopy(src.meta) for feat in src: idd = feat['id'] @@ -234,7 +271,7 @@ def split_shapefile(base, base_shapefile, data_directory): continue out = prefix + "_" + key + ".shp" if len(unique[key]): - with fiona.open(os.path.join(data_directory, out), 'w', **meta) as dst: + with fopen(os.path.join(data_directory, out), 'w', **meta) as dst: print("Saving split shapefile to: {}".format(os.path.join(data_directory, out))) for feat in unique[key]: dst.write(id_mapping[feat]) diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index 522f756..3ccbe2f 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -102,7 +102,7 @@ def build_evaluating(self): self.get_landsat(fmask=False) self.profile = self.landsat.rasterio_geometry #self.get_et() - self.get_precip() + #self.get_precip() self.get_terrain() self.paths_map, self.masks = self._order_images() # paths map is just path-> location # in filesystem. @@ -133,8 +133,6 @@ def get_landsat(self, fmask=False): else: g = GoogleDownload(self.start, self.end, self.sat, latitude=self.lat, longitude=self.lon, output_path=self.root, max_cloud_percent=self.max_cloud) - self.path = g.p - self.row = g.r g.select_scenes(self.n) self.scenes = g.selected_scenes @@ -171,13 +169,16 @@ def get_precip(self): geometry[0]['crs'] = CRS({'init':'epsg:32612'}) bounds = poly_bounds.bounds for date in dates: - d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. - bds = GeoBounds(wsen=bounds) - gm = GridMet(variable='pr', clip_feature=geometry, - bbox=bds, target_profile=self.profile, date=d) - out = gm.get_data_subset() - outfile = os.path.join(self.root, 'GridMet{}.tif'.format(date)) - gm.save_raster(out, self.landsat.rasterio_geometry, outfile) + outfile = os.path.join(self.root, 'precip_{}.tif'.format(date)) + if not os.path.isfile(outfile): + print("Get {}".format(outfile)) + d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. + bds = GeoBounds(wsen=bounds) + gm = GridMet(variable='pr', clip_feature=geometry, + bbox=bds, target_profile=self.profile, date=d) + out = gm.get_data_subset() + gm.save_raster(out, self.landsat.rasterio_geometry, outfile) + def get_terrain(self): """ @@ -281,10 +282,10 @@ def _order_images(self): for sc in scenes: paths = os.listdir(os.path.join(self.root, sc)) - c = climate_rasters(self.root) + #c = climate_rasters(self.root) b = [os.path.join(self.root, sc, x) for x in paths if x.endswith(landsat_rasters()[self.sat])] a = [os.path.join(self.root, sc, x) for x in paths if x.endswith(ancillary_rasters())] - bands = a + b + c + bands = a + b# + c bands.sort() for p in bands: @@ -313,16 +314,16 @@ def _normalize_and_save_image(fname): else: rass_arr = rsrc.read() rass_arr = rass_arr.astype(float32) - profile = rsrc.profile + profile = rsrc.profile.copy() profile.update(dtype=float32) rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) scaler = StandardScaler() # z-normalization scaler.fit(rass_arr) rass_arr = scaler.transform(rass_arr) - with rasopen(fname, 'w', **profile) as dst: - dst.write(rass_arr, 1) - print("Normalized", fname) - dst.update_tags(normalized=True) + with rasopen(fname, 'w', **profile) as dst: + dst.write(rass_arr, 1) + print("Normalized", fname) + dst.update_tags(normalized=True) if __name__ == '__main__': diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py index def279a..cde589c 100644 --- a/pixel_classification/runner_from_shapefile.py +++ b/pixel_classification/runner_from_shapefile.py @@ -6,7 +6,8 @@ from compose_array_single_shapefile import PTASingleShapefile, ShapefileSamplePoints from fiona import open as fopen from shapely.geometry import shape -from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster +from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster, filter_shapefile + def download_images_over_shapefile(shapefile, image_directory, year, master_raster_directory): '''Downloads p/r corresponding to the location of @@ -85,19 +86,13 @@ def split_shapefiles_multiproc(f): fname = os.path.basename(f) split_shapefile(shp_dir, fname, data_directory) -# Need a function that takes a targets dict - -if __name__ == "__main__": - +def dl_all_ims(): image_directory = 'image_data/' - shp = 'shapefile_data/backup' + shp = 'shapefile_data/' master = 'master_rasters/' year = 2013 - template = "{}_{}_{}" - done = set() - satellite = 8 for f in glob.glob(os.path.join(shp, "*.shp")): p, r = get_shapefile_path_row(f) @@ -105,3 +100,12 @@ def split_shapefiles_multiproc(f): if t not in done: done.add(t) download_images_over_shapefile(f, image_directory, year, master) + +if __name__ == "__main__": + # out_shapefile_directory = 'shapefile_data' + # shp = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/" + # for f in glob.glob(shp + "*.shp"): + # filter_shapefile(f, out_shapefile_directory) + dl_all_ims() + + From f7b9423a62f6c6fdbbfb881d16b1b3d0a9ac785e Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 22 Feb 2019 15:45:29 -0700 Subject: [PATCH 36/89] Working datagen pipeline --- pixel_classification/data_generators.py | 133 +++++++++++++++++------- pixel_classification/fully_conv.py | 29 +++--- 2 files changed, 110 insertions(+), 52 deletions(-) diff --git a/pixel_classification/data_generators.py b/pixel_classification/data_generators.py index b785eb2..812307f 100644 --- a/pixel_classification/data_generators.py +++ b/pixel_classification/data_generators.py @@ -3,11 +3,13 @@ from glob import glob from data_utils import generate_class_mask, get_shapefile_path_row from rasterio import open as rasopen +import time +import pickle NO_DATA = -1 MAX_POOLS = 3 CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. -NUM_CLASSES = 2 +NUM_CLASSES = 4 def random_sample(class_mask, n_instances, box_size, class_code=1): out = np.where(class_mask != NO_DATA) @@ -49,43 +51,88 @@ def load_raster(master_raster): meta = src.meta.copy() return arr, meta -def generate_binary_train(shapefile_directory, image_directory, box_size, target): - while True: - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if target in f: - all_matches = all_matching_shapefiles(f, shapefile_directory) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - master_raster = os.path.join(image_directory, train_raster + suffix) - mask_file = os.path.join(image_directory, mask_raster + suffix) - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive error handling. - else: - target_mask = generate_class_mask(f, mask_file) - class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA - class_mask[1, :, :] = target_mask - required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) - masks = [] - for match in all_matches: - msk = generate_class_mask(match, mask_file) - samp = random_sample(msk, required_instances, box_size) - masks.append(samp) - for i, s in enumerate(masks): - class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 - # May need to do some preprocessing. - master = load_raster(master_raster) - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_master, sub_mask = preprocess_data(sub_master, sub_mask) - if np.all(sub_mask == NO_DATA): - continue - else: - yield sub_master, sub_mask +def assign_class_code(target_dict, shapefilename): + + for key in target_dict: + if key in shapefilename: + return target_dict[key] + print("{} has no known match in target_dict.".format(shapefilename)) + return None + + +class DataMask(object): + + def __init__(self, mask, class_code): + self.mask = mask + self.class_code = class_code + + +class DataTile(object): + + def __init__(self, data, class_mask, class_code): + self.dict = {} + self.dict['data'] = data + self.dict['class_mask'] = class_mask + self.dict['class_code'] = class_code + + def to_pickle(self, training_directory): + if not os.path.isdir(training_directory): + os.mkdir(training_directory) + template = os.path.join(training_directory, + 'class_{}_data/'.format(self.dict['class_code'])) + if not os.path.isdir(template): + os.mkdir(template) + # Need to save the dict object with a unique filename + outfile = os.path.join(template, str(int(time.time())) + ".pkl") + with open(outfile, 'wb') as f: + pickle.dump(self.dict, f, protocol=pickle.HIGHEST_PROTOCOL) + + def set_data(self, data): + self.dict['data'] = data + + def set_code(self, class_code): + self.dict['class_code'] = class_code + + def set_class_mask(self, class_mask): + self.dict['class_mask'] = class_mask + + +def create_training_data(target_dict, shapefile_directory, image_directory, training_directory): + ''' target_dict: {filename or string in filename : class_code} ''' + done = set() + train_raster = 'master_raster_' + mask_raster = 'class_mask_' + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if f not in done: + all_matches = all_matching_shapefiles(f, shapefile_directory) + done.add(f) + for match in all_matches: + done.add(match) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + master_raster = os.path.join(image_directory, train_raster + suffix) + mask_file = os.path.join(image_directory, mask_raster + suffix) # for rasterio.mask.mask + # this file is projected the same as the shapefile. + master, meta = load_raster(master_raster) + masks = [] + for match in all_matches: + msk = generate_class_mask(match, mask_file) + cc = assign_class_code(target_dict, match) + if cc is not None: + dm = DataMask(msk, cc) + masks.append(dm) + + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_masks = [] + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + for msk in masks: + s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + if not np.all(s == NO_DATA): + dt = DataTile(sub_master, s, msk.class_code) + dt.to_pickle(training_directory) def all_matching_shapefiles(to_match, shapefile_directory): out = [] @@ -225,4 +272,16 @@ def preprocess_data(master, mask, return_cuts=False): return out_m, out_mask if __name__ == '__main__': - pass + shapefile_directory = 'shapefile_data/' + image_directory = 'master_rasters/' + irr1 = 'Huntley' + irr2 = 'Sun_River' + fallow = 'Fallow' + forest = 'Forrest' + other = 'other' + target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} + year = 2013 + done = set() + n_classes = 2 + training_directory = 'training_data' + create_training_data(target_dict, shapefile_directory, image_directory, training_directory) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index e3d8695..cb267bb 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -167,7 +167,7 @@ def evaluate_image(master_raster, model, outfile=None): print("Percent done: {:.3f}".format(i / master.shape[1])) out = np.swapaxes(out, 0, 1) - out[out == 0] = np.nan + #out[out == 0] = np.nan out = np.expand_dims(out, axis=0) out = out.astype(np.float32) if outfile: @@ -248,20 +248,9 @@ def clip_raster(evaluated, path, row, outfile=None): n_classes = 2 out_directory = 'evaluated_images_fully_conv/' - # for f in glob(os.path.join('oversamplin', "*.tif")): - # if 'corrected_master' in f: - # out = os.path.basename(f) - # os.path.split(out)[1] - # out = out[out.find("_")+1:] - # out = out[out.find("_")+1:] - # path = out[:2] - # row = out[3:5] - # clip_raster(f, int(path), int(row), outfile=f) - - - pth = 'oversamplin/model_no_precip.h5' + pth = 'oversamplin/model_no_precip10epochs.h5' if not os.path.isfile(pth): - model = train_model(shapefile_directory, 76, image_directory, epochs=3) + model = train_model(shapefile_directory, 78, image_directory, epochs=7) model.save(pth) else: model = tf.keras.models.load_model(pth, @@ -274,6 +263,16 @@ def clip_raster(evaluated, path, row, outfile=None): out = out[out.find("_")+1:] out = out[out.find("_"):] out = os.path.splitext(out)[0] - out = 'no_precip' + out + ".tif" + out = 'no_precip10epochs' + out + ".tif" out = os.path.join('oversamplin', out) evaluate_image(f, model, out) + + for f in glob(os.path.join('oversamplin', "*.tif")): + if 'no_precip' in f: + out = os.path.basename(f) + os.path.split(out)[1] + out = out[out.find("_")+1:] + out = out[out.find("_")+1:] + path = out[:2] + row = out[3:5] + clip_raster(f, int(path), int(row), outfile=f) From 5c4da83a3ce8d5bc37a8bb74d14137d0523e18ab Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 22 Feb 2019 20:55:18 -0700 Subject: [PATCH 37/89] working on multiclass pipeline --- pixel_classification/data_generators.py | 91 ++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 9 deletions(-) diff --git a/pixel_classification/data_generators.py b/pixel_classification/data_generators.py index 812307f..a70d44a 100644 --- a/pixel_classification/data_generators.py +++ b/pixel_classification/data_generators.py @@ -1,17 +1,21 @@ import numpy as np import os +import time +import pickle +import matplotlib.pyplot as plt from glob import glob +from random import sample from data_utils import generate_class_mask, get_shapefile_path_row from rasterio import open as rasopen -import time -import pickle NO_DATA = -1 MAX_POOLS = 3 CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. NUM_CLASSES = 4 -def random_sample(class_mask, n_instances, box_size, class_code=1): +def random_sample(class_mask, n_instances, box_size=0, fill_value=1): + if box_size: + n_instances /= box_size out = np.where(class_mask != NO_DATA) class_mask = class_mask.copy() try: @@ -28,20 +32,20 @@ def random_sample(class_mask, n_instances, box_size, class_code=1): try: class_mask[:, :, :] = NO_DATA if box_size == 0: - class_mask[0, out_x, out_y] = class_code + class_mask[0, out_x, out_y] = fill_value else: ofs = box_size // 2 for x, y in zip(out_x, out_y): - class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = class_code + class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = fill_value except IndexError as e: class_mask[:, :] = NO_DATA if box_size == 0: - class_mask[out_x, out_y] = class_code + class_mask[out_x, out_y] = fill_value else: ofs = box_size // 2 for x, y in zip(out_x, out_y): - class_mask[x-ofs:x+ofs, y-ofs:y+ofs] = class_code + class_mask[x-ofs:x+ofs, y-ofs:y+ofs] = fill_value return class_mask @@ -52,7 +56,6 @@ def load_raster(master_raster): return arr, meta - def assign_class_code(target_dict, shapefilename): for key in target_dict: @@ -209,6 +212,74 @@ def generate_balanced_data(shapefile_directory, image_directory, box_size, targe for ii, jj in zip(ims, class_masks): yield ii, jj +class DataGen: + + def __init__(self, class_filename): + self.file_list = None + self.class_filename = class_filename + self._get_files() + self.n_files = len(self.file_list) + self.first = True + self.idx = 0 + + def _get_files(self): + self.file_list = [x[2] for x in os.walk(self.class_filename)][0] + self.file_list = [os.path.join(self.class_filename, x) for x in self.file_list] + + def next(self): + if self.idx == self.n_files: + self.first = True + if self.first: + self.first = False + self.idx = 0 + self.shuffled = sample(self.file_list, self.n_files) + out = self.shuffled[self.idx] + self.idx += 1 + else: + out = self.shuffled[self.idx] + self.idx += 1 + return self._from_pickle(out) + + def _from_pickle(self, filename): + with open(filename, 'rb') as f: + data = pickle.load(f) + return data + + +def generate_training_data(n_classes, training_directory): + ''' Assumes data is stored in training_directory + in subdirectories labeled class_n_train ''' + class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] + generators = [] + for d in class_dirs: + generators.append(DataGen(d)) + # I want to + # a. shuffle the filenames to draw. + # b. Define one epoch to be when we've iterated over all + # examples of the class with the most training examples. + min_samples = np.inf + data = [] + for gen in generators: + out = gen.next() + data.append(out) + n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + if n_samples < min_samples: + min_samples = n_samples + + for subset in data: + samp = random_sample(subset['class_mask'], min_samples) + one_hot = np.ones((NUM_CLASSES, samp.shape[1], samp.shape[2]))*NO_DATA + one_hot[int(subset['class_code']), :, :] = samp + subset['class_mask'] = one_hot + + + # need to preprocess data. + # This means ... + # Sample min() random examples from each data chunk. + # return the associated master_raster chunk and + # the class_mask chunk, in one-hot form. + + def rotation(image, angle): return transform.rotate(image, angle, mode='constant', cval=NO_DATA) @@ -219,6 +290,7 @@ def random_noise(image): def h_flip(image): return image[:, ::-1] + def augment_data(image, class_mask): '''Randomly augments an image.''' if np.random.randint(2): @@ -284,4 +356,5 @@ def preprocess_data(master, mask, return_cuts=False): done = set() n_classes = 2 training_directory = 'training_data' - create_training_data(target_dict, shapefile_directory, image_directory, training_directory) + #create_training_data(target_dict, shapefile_directory, image_directory, training_directory) + generate_training_data(4, training_directory) From ceae6dbb26a54e1c7fa06956bb5e0e42e2096de7 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sat, 23 Feb 2019 14:19:02 -0700 Subject: [PATCH 38/89] Working multiclass pipeline. Built in accuracy metrics do not reflect true accuracy, so they're omitted. --- pixel_classification/data_generators.py | 66 ++++++++------- pixel_classification/fully_conv.py | 103 +++++++++++++----------- 2 files changed, 94 insertions(+), 75 deletions(-) diff --git a/pixel_classification/data_generators.py b/pixel_classification/data_generators.py index a70d44a..a4b28c2 100644 --- a/pixel_classification/data_generators.py +++ b/pixel_classification/data_generators.py @@ -4,7 +4,7 @@ import pickle import matplotlib.pyplot as plt from glob import glob -from random import sample +from random import sample, shuffle from data_utils import generate_class_mask, get_shapefile_path_row from rasterio import open as rasopen @@ -129,7 +129,6 @@ def create_training_data(target_dict, shapefile_directory, image_directory, trai for i in range(0, master.shape[1], CHUNK_SIZE): for j in range(0, master.shape[2], CHUNK_SIZE): - sub_masks = [] sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] for msk in masks: s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] @@ -246,9 +245,10 @@ def _from_pickle(self, filename): return data -def generate_training_data(n_classes, training_directory): +def generate_training_data(training_directory, box_size=6): ''' Assumes data is stored in training_directory - in subdirectories labeled class_n_train ''' + in subdirectories labeled class_n_train + and that n_classes is a global variable.''' class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] generators = [] for d in class_dirs: @@ -257,28 +257,38 @@ def generate_training_data(n_classes, training_directory): # a. shuffle the filenames to draw. # b. Define one epoch to be when we've iterated over all # examples of the class with the most training examples. - min_samples = np.inf - data = [] - for gen in generators: - out = gen.next() - data.append(out) - n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) - if n_samples < min_samples: - min_samples = n_samples - - for subset in data: - samp = random_sample(subset['class_mask'], min_samples) - one_hot = np.ones((NUM_CLASSES, samp.shape[1], samp.shape[2]))*NO_DATA - one_hot[int(subset['class_code']), :, :] = samp - subset['class_mask'] = one_hot - - - # need to preprocess data. - # This means ... - # Sample min() random examples from each data chunk. - # return the associated master_raster chunk and - # the class_mask chunk, in one-hot form. - + while True: + min_samples = np.inf + data = [] + for gen in generators: + out = gen.next().copy() + data.append(out) + n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + if n_samples < min_samples: + min_samples = n_samples + for subset in data: + #min_samples = len(np.where(subset != NO_DATA)[0]) + samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, + fill_value=1) + one_hot = np.ones((NUM_CLASSES, samp.shape[1], samp.shape[2]))*NO_DATA + one_hot[int(subset['class_code']), :, :] = samp + for i in range(NUM_CLASSES): + if i != int(subset['class_code']): + one_hot[i, :, :][samp[0, :, :] != NO_DATA] = 0 + subset['class_mask'] = one_hot + + masters = [] + masks = [] + for subset in data: + master, mask = preprocess_data(subset['data'], subset['class_mask']) + masters.append(master) + masks.append(mask) + + outt = list(zip(masters, masks)) + shuffle(outt) + for ms, msk in outt: + msk = msk.astype(np.int32) + yield ms, msk def rotation(image, angle): @@ -353,8 +363,6 @@ def preprocess_data(master, mask, return_cuts=False): other = 'other' target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} year = 2013 - done = set() - n_classes = 2 training_directory = 'training_data' #create_training_data(target_dict, shapefile_directory, image_directory, training_directory) - generate_training_data(4, training_directory) + generate_training_data(training_directory) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index cb267bb..53501f3 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -4,9 +4,13 @@ #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' import tensorflow as tf import keras.backend as K +#tf.enable_eager_execution() import matplotlib.pyplot as plt import numpy as np import time +import json +import geopandas as gpd +import sys from glob import glob from skimage import transform, util from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, @@ -18,26 +22,37 @@ from rasterio.mask import mask from shapely.geometry import shape from fiona import open as fopen -import json -import geopandas as gpd -from data_generators import generate_balanced_data, load_raster, preprocess_data +from data_generators import generate_training_data, load_raster, preprocess_data NO_DATA = -1 MAX_POOLS = 3 CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. -NUM_CLASSES = 2 +NUM_CLASSES = 4 WRS2 = '../spatial_data/wrs2_descending_usa.shp' def custom_objective(y_true, y_pred): '''I want to mask all values that are not data, given a y_true - that has NODATA values. ''' - y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], 2)) - y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], 2)) + that has NODATA values. The boolean mask + operation is failing. It should output + a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) + tensor.''' + # fig, ax = plt.subplots(ncols=2) + # boo = y_true.numpy() + # boo2 = y_pred.numpy() + # ax[0].imshow(boo[0, :, :, 0]) + # ax[1].imshow(boo2[0, :, :, 0]) + # plt.show() + # Let's see the argmax of y_true, y_pred. + y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) + y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) masked = tf.not_equal(y_true, NO_DATA) - y_true_mask = tf.boolean_mask(y_true, masked) - y_pred_mask = tf.boolean_mask(y_pred, masked) - return tf.keras.losses.binary_crossentropy(y_true_mask, y_pred_mask) + indices = tf.where(masked) + indices = tf.to_int32(indices) + indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) + y_true_mask = tf.gather_nd(params=y_true, indices=indices) + y_pred_mask = tf.gather_nd(params=y_pred, indices=indices) + return tf.keras.losses.categorical_crossentropy(y_true_mask, y_pred_mask) def fcnn_functional(n_classes): @@ -89,7 +104,7 @@ def fcnn_model(n_classes): model = tf.keras.Sequential() # Must define the input shape in the first layer of the neural network model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=8, padding='same', activation='relu', - input_shape=(None, None, 39))) + input_shape=(None, None, 36))) model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=4, padding='same', activation='relu')) model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=4, padding='same', activation='relu')) model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) @@ -100,20 +115,6 @@ def fcnn_model(n_classes): #model.summary() return model -def one_hot_encoding(class_mask, n_classes): - '''Assumes classes range from 0 -> (n-1)''' - shp = class_mask.shape - out = np.ones((shp[0], shp[1], n_classes))*NO_DATA - for i in range(n_classes): - out[:, :, i][class_mask == i] = 1 - return out - -def create_model(n_classes): - model = fcnn_functional(n_classes) - model.compile(loss=custom_objective, - optimizer='adam', - metrics=['accuracy']) - return model def evaluate_image(master_raster, model, outfile=None): @@ -134,7 +135,7 @@ def evaluate_image(master_raster, model, outfile=None): sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, return_cuts=True) preds = model.predict(sub_master) preds = preds[0, :, :, :] - preds = preds[:, :, 1] #np.argmax(preds, axis=2) + preds = np.argmax(preds, axis=2) # fig, ax = plt.subplots(ncols=2) # ax[0].imshow(master[38, :, :]) # x_plot.append([i, i+CHUNK_SIZE, i, i+CHUNK_SIZE]) @@ -164,7 +165,7 @@ def evaluate_image(master_raster, model, outfile=None): else: print("whatcha got goin on here?") - print("Percent done: {:.3f}".format(i / master.shape[1])) + sys.stdout.write("Percent done: {:.4f}\r".format(i / master.shape[1])) out = np.swapaxes(out, 0, 1) #out[out == 0] = np.nan @@ -179,14 +180,12 @@ def save_raster(arr, outfile, meta): with rasopen(outfile, 'w', **meta) as dst: dst.write(arr) -def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size=6, epochs=3): +def train_model(training_directory, steps_per_epoch, image_directory, box_size=0, epochs=3): # image shape will change here, so it must be # inferred at runtime. - model = create_model(n_classes) + model = create_model(NUM_CLASSES) tb = TensorBoard(log_dir='graphs/') - n_augmented = 0 - train_generator = generate_balanced_data(shapefile_directory, image_directory, box_size, - 'irrigated') + train_generator = generate_training_data(training_directory, box_size) model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, @@ -195,6 +194,12 @@ def train_model(shapefile_directory, steps_per_epoch, image_directory, box_size= use_multiprocessing=False) return model +def create_model(n_classes): + model = fcnn_functional(n_classes) + model.compile(loss=custom_objective, + optimizer=tf.keras.optimizers.Adam()) + return model + def get_features(gdf, path, row): tmp = json.loads(gdf.to_json()) features = [] @@ -213,10 +218,22 @@ def clip_raster(evaluated, path, row, outfile=None): features = get_features(shp, path, row) out_image, out_transform = mask(src, shapes=features, nodata=np.nan) - out_image[out_image == 0] = np.nan + #out_image[out_image != 0] = np.nan if outfile: save_raster(out_image, outfile, meta) + +def clip_rasters(evaluated_tif_dir, include_string): + for f in glob(os.path.join(evaluated_tif_dir, "*.tif")): + if include_string in f: + out = os.path.basename(f) + os.path.split(out)[1] + out = out[out.find("_")+1:] + out = out[out.find("_")+1:] + path = out[:2] + row = out[3:5] + clip_raster(f, int(path), int(row), outfile=f) + if __name__ == '__main__': # Steps: # 1. split shapefiles @@ -247,10 +264,12 @@ def clip_raster(evaluated, path, row, outfile=None): mask_raster = 'class_mask_' n_classes = 2 out_directory = 'evaluated_images_fully_conv/' + training_directory = 'training_data' - pth = 'oversamplin/model_no_precip10epochs.h5' + m_dir = 'data_from_disk/balance/all_classes' + pth = os.path.join(m_dir, "model_axx.h5") if not os.path.isfile(pth): - model = train_model(shapefile_directory, 78, image_directory, epochs=7) + model = train_model(training_directory, 87, image_directory, epochs=5) model.save(pth) else: model = tf.keras.models.load_model(pth, @@ -263,16 +282,8 @@ def clip_raster(evaluated, path, row, outfile=None): out = out[out.find("_")+1:] out = out[out.find("_"):] out = os.path.splitext(out)[0] - out = 'no_precip10epochs' + out + ".tif" - out = os.path.join('oversamplin', out) + out = 'evaluated_master' + out + ".tif" + out = os.path.join(m_dir, out) evaluate_image(f, model, out) - for f in glob(os.path.join('oversamplin', "*.tif")): - if 'no_precip' in f: - out = os.path.basename(f) - os.path.split(out)[1] - out = out[out.find("_")+1:] - out = out[out.find("_")+1:] - path = out[:2] - row = out[3:5] - clip_raster(f, int(path), int(row), outfile=f) + clip_rasters(m_dir, "evaluated") From 361aad00dcbf5dd432a31fb955c35fee16803e94 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sat, 23 Feb 2019 16:51:15 -0700 Subject: [PATCH 39/89] Need to implement IoU for some test data. --- pixel_classification/data_generators.py | 6 +-- pixel_classification/fully_conv.py | 60 +++++++++++-------------- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/pixel_classification/data_generators.py b/pixel_classification/data_generators.py index a4b28c2..fd70f9d 100644 --- a/pixel_classification/data_generators.py +++ b/pixel_classification/data_generators.py @@ -245,7 +245,7 @@ def _from_pickle(self, filename): return data -def generate_training_data(training_directory, box_size=6): +def generate_training_data(training_directory, box_size=0): ''' Assumes data is stored in training_directory in subdirectories labeled class_n_train and that n_classes is a global variable.''' @@ -257,6 +257,7 @@ def generate_training_data(training_directory, box_size=6): # a. shuffle the filenames to draw. # b. Define one epoch to be when we've iterated over all # examples of the class with the most training examples. + # TODO: Apply image augmentation. while True: min_samples = np.inf data = [] @@ -267,7 +268,6 @@ def generate_training_data(training_directory, box_size=6): if n_samples < min_samples: min_samples = n_samples for subset in data: - #min_samples = len(np.where(subset != NO_DATA)[0]) samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, fill_value=1) one_hot = np.ones((NUM_CLASSES, samp.shape[1], samp.shape[2]))*NO_DATA @@ -365,4 +365,4 @@ def preprocess_data(master, mask, return_cuts=False): year = 2013 training_directory = 'training_data' #create_training_data(target_dict, shapefile_directory, image_directory, training_directory) - generate_training_data(training_directory) + #generate_training_data(training_directory) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index 53501f3..97b31b2 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -116,7 +116,7 @@ def fcnn_model(n_classes): return model -def evaluate_image(master_raster, model, outfile=None): +def evaluate_image(master_raster, model, outfile=None, ii=None): if not os.path.isfile(master_raster): print("Master raster not created for {}".format(suffix)) @@ -125,7 +125,7 @@ def evaluate_image(master_raster, model, outfile=None): master, meta = load_raster(master_raster) class_mask = np.zeros((2, master.shape[1], master.shape[2])) - out = np.zeros((master.shape[2], master.shape[1])) + out = np.zeros((NUM_CLASSES, master.shape[2], master.shape[1])) x_plot =[] y_plot =[] for i in range(0, master.shape[1], CHUNK_SIZE): @@ -135,52 +135,39 @@ def evaluate_image(master_raster, model, outfile=None): sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, return_cuts=True) preds = model.predict(sub_master) preds = preds[0, :, :, :] - preds = np.argmax(preds, axis=2) - # fig, ax = plt.subplots(ncols=2) - # ax[0].imshow(master[38, :, :]) - # x_plot.append([i, i+CHUNK_SIZE, i, i+CHUNK_SIZE]) - # y_plot.append([j, j+CHUNK_SIZE, j+CHUNK_SIZE, j]) - # ax[0].plot(x_plot, y_plot, 'rx') - # ax[1].imshow(preds) - # plt.show() - # plot_or_not = input('Plot the master?') - # if plot_or_not == 'y': - # for q in range(sub_master.shape[3]): - # plt.figure() - # plt.imshow(sub_master[0, :, :, q]) - # plt.show() + preds = np.swapaxes(preds, 1, 2) + preds = np.swapaxes(preds, 0, 1) + #preds = np.argmax(preds, axis=2) if cut_cols == 0 and cut_rows == 0: - out[j:j+CHUNK_SIZE, i:i+CHUNK_SIZE] = preds + out[:,j:j+CHUNK_SIZE, i:i+CHUNK_SIZE] = preds elif cut_cols == 0 and cut_rows != 0: ofs = master.shape[1]-cut_rows - out[j:j+CHUNK_SIZE, i:ofs] = preds + out[:, j:j+CHUNK_SIZE, i:ofs] = preds elif cut_cols != 0 and cut_rows == 0: ofs = master.shape[2]-cut_cols - out[j:ofs, i:i+CHUNK_SIZE] = preds + out[:, j:ofs, i:i+CHUNK_SIZE] = preds elif cut_cols != 0 and cut_rows != 0: ofs_col = master.shape[2]-cut_cols ofs_row = master.shape[1]-cut_rows - out[j:ofs_col, i:ofs_row] = preds + out[:, j:ofs_col, i:ofs_row] = preds else: print("whatcha got goin on here?") - sys.stdout.write("Percent done: {:.4f}\r".format(i / master.shape[1])) + sys.stdout.write("ii: {} Percent done: {:.4f}\r".format(ii, i / master.shape[1])) - out = np.swapaxes(out, 0, 1) - #out[out == 0] = np.nan - out = np.expand_dims(out, axis=0) + out = np.swapaxes(out, 1, 2) out = out.astype(np.float32) if outfile: save_raster(out, outfile, meta) return out def save_raster(arr, outfile, meta): - meta.update(count=1) + meta.update(count=NUM_CLASSES) with rasopen(outfile, 'w', **meta) as dst: dst.write(arr) -def train_model(training_directory, steps_per_epoch, image_directory, box_size=0, epochs=3): +def train_model(training_directory, steps_per_epoch, box_size=0, epochs=3): # image shape will change here, so it must be # inferred at runtime. model = create_model(NUM_CLASSES) @@ -191,7 +178,7 @@ def train_model(training_directory, steps_per_epoch, image_directory, box_size=0 epochs=epochs, verbose=1, callbacks=[tb], - use_multiprocessing=False) + use_multiprocessing=True) return model def create_model(n_classes): @@ -222,18 +209,19 @@ def clip_raster(evaluated, path, row, outfile=None): if outfile: save_raster(out_image, outfile, meta) - def clip_rasters(evaluated_tif_dir, include_string): for f in glob(os.path.join(evaluated_tif_dir, "*.tif")): if include_string in f: out = os.path.basename(f) os.path.split(out)[1] out = out[out.find("_")+1:] - out = out[out.find("_")+1:] + #out = out[out.find("_")+1:] path = out[:2] row = out[3:5] clip_raster(f, int(path), int(row), outfile=f) +# TODO: Implement IoU so I can actually see how my model is doing. + if __name__ == '__main__': # Steps: # 1. split shapefiles @@ -267,14 +255,14 @@ def clip_rasters(evaluated_tif_dir, include_string): training_directory = 'training_data' m_dir = 'data_from_disk/balance/all_classes' - pth = os.path.join(m_dir, "model_axx.h5") + pth = os.path.join(m_dir, "model_adam30epochs.h5") if not os.path.isfile(pth): - model = train_model(training_directory, 87, image_directory, epochs=5) + model = train_model(training_directory, 87, epochs=30) model.save(pth) else: model = tf.keras.models.load_model(pth, custom_objects={'custom_objective':custom_objective}) - + ii = 0 for f in glob(os.path.join(image_directory, "*.tif")): if "class" not in f: out = os.path.basename(f) @@ -282,8 +270,10 @@ def clip_rasters(evaluated_tif_dir, include_string): out = out[out.find("_")+1:] out = out[out.find("_"):] out = os.path.splitext(out)[0] - out = 'evaluated_master' + out + ".tif" + out = '30epochstestadamevaluated' + out + ".tif" + #out = 'testing' + out + ".tif" out = os.path.join(m_dir, out) - evaluate_image(f, model, out) + ii += 1 + evaluate_image(f, model, outfile=out, ii=ii) + clip_rasters(m_dir, "test") - clip_rasters(m_dir, "evaluated") From 897cf7ad45218599ece42391e52d809a8615c378 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 24 Feb 2019 11:22:30 -0700 Subject: [PATCH 40/89] Starting to compare binary vs multiclass 2D conv. nets --- pixel_classification/data_generators.py | 26 +++- pixel_classification/fully_conv.py | 174 +++++++----------------- pixel_classification/models.py | 151 ++++++++++++++++++++ 3 files changed, 217 insertions(+), 134 deletions(-) create mode 100644 pixel_classification/models.py diff --git a/pixel_classification/data_generators.py b/pixel_classification/data_generators.py index fd70f9d..ec29673 100644 --- a/pixel_classification/data_generators.py +++ b/pixel_classification/data_generators.py @@ -9,7 +9,7 @@ from rasterio import open as rasopen NO_DATA = -1 -MAX_POOLS = 3 +MAX_POOLS = 5 CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. NUM_CLASSES = 4 @@ -120,6 +120,7 @@ def create_training_data(target_dict, shapefile_directory, image_directory, trai # this file is projected the same as the shapefile. master, meta = load_raster(master_raster) masks = [] + all_matches.append(f) for match in all_matches: msk = generate_class_mask(match, mask_file) cc = assign_class_code(target_dict, match) @@ -284,9 +285,8 @@ def generate_training_data(training_directory, box_size=0): masters.append(master) masks.append(mask) - outt = list(zip(masters, masks)) - shuffle(outt) - for ms, msk in outt: + # Shuffle order of data here? + for ms, msk in zip(masters, masks): msk = msk.astype(np.int32) yield ms, msk @@ -363,6 +363,18 @@ def preprocess_data(master, mask, return_cuts=False): other = 'other' target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} year = 2013 - training_directory = 'training_data' - #create_training_data(target_dict, shapefile_directory, image_directory, training_directory) - #generate_training_data(training_directory) + train_dir = 'training_data/train/' + shp_train = 'shapefile_data/train/' + # create_training_data(target_dict, shp_train, image_directory, train_dir) + # print("Created training data") + test_dir = 'training_data/test/' + shp_test = 'shapefile_data/test/' + # create_training_data(target_dict, shp_test, image_directory, test_dir) + j = 0 + for k in generate_training_data(train_dir): + j += 1 + print("Train steps:", j) + j = 0 + for k in generate_training_data(test_dir): + j += 1 + print("Test steps:", j) diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py index 97b31b2..b796633 100644 --- a/pixel_classification/fully_conv.py +++ b/pixel_classification/fully_conv.py @@ -1,35 +1,43 @@ import os -os.environ['KERAS_BACKEND'] = 'tensorflow' -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' import tensorflow as tf import keras.backend as K #tf.enable_eager_execution() import matplotlib.pyplot as plt import numpy as np -import time import json import geopandas as gpd import sys from glob import glob from skimage import transform, util -from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, -Concatenate, Dropout, UpSampling2D) -from tensorflow.keras.models import Model from tensorflow.keras.callbacks import TensorBoard -from multiprocessing import Pool from rasterio import open as rasopen from rasterio.mask import mask from shapely.geometry import shape from fiona import open as fopen from data_generators import generate_training_data, load_raster, preprocess_data +from models import fcnn_functional NO_DATA = -1 -MAX_POOLS = 3 -CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. +MAX_POOLS = 5 +CHUNK_SIZE = 1248 # some value that is divisible by 2^3. NUM_CLASSES = 4 WRS2 = '../spatial_data/wrs2_descending_usa.shp' +def m_acc(y_true, y_pred): + ''' Calculate accuracy from masked data. + The built-in accuracy metric uses all data (masked & unmasked).''' + y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) + y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) + masked = tf.not_equal(y_true, NO_DATA) + indices = tf.where(masked) + indices = tf.to_int32(indices) + indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) + y_true_masked = tf.gather_nd(params=y_true, indices=indices) + y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) + return K.cast(K.equal(K.argmax(y_true_masked, axis=-1), K.argmax(y_pred_masked, axis=-1)), K.floatx()) + def custom_objective(y_true, y_pred): '''I want to mask all values that are not data, given a y_true @@ -37,97 +45,27 @@ def custom_objective(y_true, y_pred): operation is failing. It should output a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) tensor.''' - # fig, ax = plt.subplots(ncols=2) - # boo = y_true.numpy() - # boo2 = y_pred.numpy() - # ax[0].imshow(boo[0, :, :, 0]) - # ax[1].imshow(boo2[0, :, :, 0]) - # plt.show() - # Let's see the argmax of y_true, y_pred. + # Dice coefficient? y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) masked = tf.not_equal(y_true, NO_DATA) indices = tf.where(masked) indices = tf.to_int32(indices) indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) - y_true_mask = tf.gather_nd(params=y_true, indices=indices) - y_pred_mask = tf.gather_nd(params=y_pred, indices=indices) - return tf.keras.losses.categorical_crossentropy(y_true_mask, y_pred_mask) - -def fcnn_functional(n_classes): - - x = Input((None, None, 36)) - - c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x) - c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(c1) - mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) - - c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(mp1) - c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(c2) - mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - mp2 = Dropout(0.5)(mp2) - - c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(mp2) - c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(c3) - mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) - - last_conv = Conv2D(filters=256, kernel_size=(3,3), activation='relu', padding='same')(mp3) - - u1 = UpSampling2D(size=(2, 2))(last_conv) - u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) - u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) - - u1_c3 = Concatenate()([c3, u1]) - - u2 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1_c3) - u2 = UpSampling2D(size=(2, 2))(u2) - u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Dropout(0.5)(u2) - - u2_c2 = Concatenate()([u2, c2]) - u2_c2 = Dropout(0.5)(u2_c2) - - c4 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u2_c2) - u3 = UpSampling2D(size=(2, 2))(c4) - u3 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u3) - - u3_c1 = Concatenate()([u3, c1]) - - c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u3_c1) - - model = Model(inputs=x, outputs=c5) - #model.summary() - return model - -def fcnn_model(n_classes): - model = tf.keras.Sequential() - # Must define the input shape in the first layer of the neural network - model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=8, padding='same', activation='relu', - input_shape=(None, None, 36))) - model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=4, padding='same', activation='relu')) - model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=4, padding='same', activation='relu')) - model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) - model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Conv2D(filters=n_classes, kernel_size=2, padding='same', - activation='softmax')) # 1x1 convolutions for pixel-wise prediciton. - # Take a look at the model summary - #model.summary() - return model - + y_true_masked = tf.gather_nd(params=y_true, indices=indices) + y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) + return tf.keras.losses.categorical_crossentropy(y_true_masked, y_pred_masked) def evaluate_image(master_raster, model, outfile=None, ii=None): if not os.path.isfile(master_raster): print("Master raster not created for {}".format(suffix)) - # TODO: More extensive error handling. + # TODO: More extensive handling of this case. else: master, meta = load_raster(master_raster) - - class_mask = np.zeros((2, master.shape[1], master.shape[2])) - out = np.zeros((NUM_CLASSES, master.shape[2], master.shape[1])) - x_plot =[] - y_plot =[] + class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder + out = np.zeros((master.shape[1], master.shape[2], NUM_CLASSES)) + for i in range(0, master.shape[1], CHUNK_SIZE): for j in range(0, master.shape[2], CHUNK_SIZE): sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] @@ -135,9 +73,6 @@ def evaluate_image(master_raster, model, outfile=None, ii=None): sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, return_cuts=True) preds = model.predict(sub_master) preds = preds[0, :, :, :] - preds = np.swapaxes(preds, 1, 2) - preds = np.swapaxes(preds, 0, 1) - #preds = np.argmax(preds, axis=2) if cut_cols == 0 and cut_rows == 0: out[:,j:j+CHUNK_SIZE, i:i+CHUNK_SIZE] = preds @@ -154,9 +89,9 @@ def evaluate_image(master_raster, model, outfile=None, ii=None): else: print("whatcha got goin on here?") - sys.stdout.write("ii: {} Percent done: {:.4f}\r".format(ii, i / master.shape[1])) + sys.stdout.write("N eval: {}. Percent done: {:.4f}\r".format(ii, i / master.shape[1])) - out = np.swapaxes(out, 1, 2) + out = np.swapaxes(out, 0, 2) out = out.astype(np.float32) if outfile: save_raster(out, outfile, meta) @@ -170,10 +105,18 @@ def save_raster(arr, outfile, meta): def train_model(training_directory, steps_per_epoch, box_size=0, epochs=3): # image shape will change here, so it must be # inferred at runtime. + ''' This function assumes that train/test data are + subdirectories of training_directory, with + the names train/test.''' model = create_model(NUM_CLASSES) tb = TensorBoard(log_dir='graphs/') - train_generator = generate_training_data(training_directory, box_size) + train = os.path.join(training_directory, 'train') + test = os.path.join(training_directory, 'test') + train_generator = generate_training_data(train, box_size) + test_generator = generate_training_data(test, box_size) model.fit_generator(train_generator, + validation_data=test_generator, + validation_steps=31, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=1, @@ -181,12 +124,15 @@ def train_model(training_directory, steps_per_epoch, box_size=0, epochs=3): use_multiprocessing=True) return model + def create_model(n_classes): model = fcnn_functional(n_classes) model.compile(loss=custom_objective, - optimizer=tf.keras.optimizers.Adam()) + metrics=[m_acc], + optimizer='adam') return model + def get_features(gdf, path, row): tmp = json.loads(gdf.to_json()) features = [] @@ -223,57 +169,31 @@ def clip_rasters(evaluated_tif_dir, include_string): # TODO: Implement IoU so I can actually see how my model is doing. if __name__ == '__main__': - # Steps: - # 1. split shapefiles - # 2. download all images - # 3. Get all shapefiles corresponding to a given - # p/r. If there aren't n_classes shapefiles - # for a given path/row, randomly sample from - # places outside the annotated data that is present. - # else: generate a class mask with class balance, - # where all of the pixels are drawn from irrigated - # areas and only some from other areas. - # 4. Input this into the model. - # Here assume steps 3 and 4 are done and then synthesize - # the steps into one coherent file. - # need an easier way to specify year. - # Let's do a binary classification model. + shapefile_directory = 'shapefile_data/backup' - sample_dir = os.path.join(shapefile_directory, 'sample_points') image_directory = 'master_rasters/' - target = 'irrigated' - fallow = 'Fallow' - forest = 'Forrest' - other = 'other' - target_dict = {target:0, fallow:1, forest:2, other:3} - year = 2013 - done = set() - train_raster = 'master_raster_' - mask_raster = 'class_mask_' - n_classes = 2 - out_directory = 'evaluated_images_fully_conv/' training_directory = 'training_data' - m_dir = 'data_from_disk/balance/all_classes' - pth = os.path.join(m_dir, "model_adam30epochs.h5") + m_dir = 'eval_test' + pth = os.path.join(m_dir, "model_acctst.h5") if not os.path.isfile(pth): - model = train_model(training_directory, 87, epochs=30) + model = train_model(training_directory, 109, epochs=2) model.save(pth) else: model = tf.keras.models.load_model(pth, custom_objects={'custom_objective':custom_objective}) ii = 0 for f in glob(os.path.join(image_directory, "*.tif")): - if "class" not in f: + if "class" not in f and "37_28" in f: out = os.path.basename(f) os.path.split(out)[1] out = out[out.find("_")+1:] out = out[out.find("_"):] out = os.path.splitext(out)[0] - out = '30epochstestadamevaluated' + out + ".tif" + out = 'complexfcnn_multiclass' + out + ".tif" #out = 'testing' + out + ".tif" out = os.path.join(m_dir, out) ii += 1 evaluate_image(f, model, outfile=out, ii=ii) - clip_rasters(m_dir, "test") + clip_rasters(m_dir, "37_28") diff --git a/pixel_classification/models.py b/pixel_classification/models.py new file mode 100644 index 0000000..d3df459 --- /dev/null +++ b/pixel_classification/models.py @@ -0,0 +1,151 @@ +import os +os.environ['KERAS_BACKEND'] = 'tensorflow' +import keras.backend as K +import tensorflow as tf +from tensorflow.keras.models import Model +from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, Dropout, UpSampling2D) + +def fcnn_model(n_classes): + model = tf.keras.Sequential() + # Must define the input shape in the first layer of the neural network + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=8, padding='same', activation='relu', + input_shape=(None, None, 36))) + model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=4, padding='same', activation='relu')) + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=4, padding='same', activation='relu')) + model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Conv2D(filters=n_classes, kernel_size=2, padding='same', + activation='softmax')) # 1x1 convolutions for pixel-wise prediciton. + # Take a look at the model summary + #model.summary() + return model + +def fcnn_functional_small(n_classes): + x = Input((None, None, 36)) + + c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x) + c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(c1) + mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) + + c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(mp1) + c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(c2) + mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) + mp2 = Dropout(0.5)(mp2) + + c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(mp2) + c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(c3) + mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) + + last_conv = Conv2D(filters=256, kernel_size=(3,3), activation='relu', padding='same')(mp3) + + u1 = UpSampling2D(size=(2, 2))(last_conv) + u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) + u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) + + u1_c3 = Concatenate()([c3, u1]) + + u2 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1_c3) + u2 = UpSampling2D(size=(2, 2))(u2) + u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) + u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) + u2 = Dropout(0.5)(u2) + + u2_c2 = Concatenate()([u2, c2]) + u2_c2 = Dropout(0.5)(u2_c2) + + c4 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u2_c2) + u3 = UpSampling2D(size=(2, 2))(c4) + u3 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u3) + + u3_c1 = Concatenate()([u3, c1]) + + c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u3_c1) + + model = Model(inputs=x, outputs=c5) + #model.summary() + return model + + +def fcnn_functional(n_classes): + + x = Input((None, None, 36)) + base = 2 + exp = 4 + + c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(x) + c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c1) + mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) + + exp+=1 + + c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp1) + c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c2) + mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) + mp2 = Dropout(0.5)(mp2) + + exp+=1 + + c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp2) + c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c3) + mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) + + exp+=1 + + c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp3) + c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c4) + mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) + + exp+=1 + + c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp4) + c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c5) + mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c5) + + last_conv = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp4) + + u1 = UpSampling2D(size=(2, 2))(last_conv) + u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) + u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) + + exp-=1 + + u1_c5 = Concatenate()([c5, u1]) + + u2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1_c5) + u2 = UpSampling2D(size=(2, 2))(u2) + u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) + u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) + u2 = Dropout(0.5)(u2) + + u2_c4 = Concatenate()([u2, c4]) + + exp-=1 + + u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u2_c4) + u3 = UpSampling2D(size=(2, 2))(u3) + u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3) + + u3_c3 = Concatenate()([u3, c3]) + + exp-=1 + + u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3_c3) + u4 = UpSampling2D(size=(2, 2))(u4) + u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4) + + u4_c2 = Concatenate()([u4, c2]) + + exp-=1 + + u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4_c2) + u5 = UpSampling2D(size=(2, 2))(u5) + u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) + u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='softmax', padding='same')(u5) + + u5_c1 = Concatenate()([u5, c1]) + + u6 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u5_c1) + + model = Model(inputs=x, outputs=u6) + #model.summary() + return model From 200f74243fc25ef2a410de061edbe9b405e74e95 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Mon, 25 Feb 2019 15:11:06 -0700 Subject: [PATCH 41/89] Fixed a bug in the saving of training data --- pixel_classification/data_generators.py | 132 +++++++----------------- 1 file changed, 39 insertions(+), 93 deletions(-) diff --git a/pixel_classification/data_generators.py b/pixel_classification/data_generators.py index ec29673..74a636d 100644 --- a/pixel_classification/data_generators.py +++ b/pixel_classification/data_generators.py @@ -2,16 +2,23 @@ import os import time import pickle +import sys import matplotlib.pyplot as plt from glob import glob from random import sample, shuffle from data_utils import generate_class_mask, get_shapefile_path_row from rasterio import open as rasopen +from warnings import warn NO_DATA = -1 -MAX_POOLS = 5 +try: + MAX_POOLS = int(os.environ["MAX_POOLS"]) + print("MAX_POOLS", MAX_POOLS) +except: + warn("MAX_POOLS environment variable has not been set. Defaulting to 5.") + MAX_POOLS = 5 CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. -NUM_CLASSES = 4 +NUM_CLASSES = 2 def random_sample(class_mask, n_instances, box_size=0, fill_value=1): if box_size: @@ -57,7 +64,6 @@ def load_raster(master_raster): def assign_class_code(target_dict, shapefilename): - for key in target_dict: if key in shapefilename: return target_dict[key] @@ -87,10 +93,12 @@ def to_pickle(self, training_directory): 'class_{}_data/'.format(self.dict['class_code'])) if not os.path.isdir(template): os.mkdir(template) - # Need to save the dict object with a unique filename - outfile = os.path.join(template, str(int(time.time())) + ".pkl") - with open(outfile, 'wb') as f: - pickle.dump(self.dict, f, protocol=pickle.HIGHEST_PROTOCOL) + outfile = os.path.join(template, str(time.time()) + ".pkl") + if not os.path.isfile(outfile): + with open(outfile, 'wb') as f: + pickle.dump(self.dict, f, protocol=pickle.HIGHEST_PROTOCOL) + else: + print("What? Contact administrator.") def set_data(self, data): self.dict['data'] = data @@ -102,9 +110,11 @@ def set_class_mask(self, class_mask): self.dict['class_mask'] = class_mask -def create_training_data(target_dict, shapefile_directory, image_directory, training_directory): +def create_training_data(target_dict, shapefile_directory, image_directory, training_directory, + count): ''' target_dict: {filename or string in filename : class_code} ''' done = set() + year = 2013 train_raster = 'master_raster_' mask_raster = 'class_mask_' for f in glob(os.path.join(shapefile_directory, "*.shp")): @@ -127,15 +137,20 @@ def create_training_data(target_dict, shapefile_directory, image_directory, trai if cc is not None: dm = DataMask(msk, cc) masks.append(dm) - + for i in range(0, master.shape[1], CHUNK_SIZE): for j in range(0, master.shape[2], CHUNK_SIZE): sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + q = 0 for msk in masks: s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] if not np.all(s == NO_DATA): + q += 1 dt = DataTile(sub_master, s, msk.class_code) dt.to_pickle(training_directory) + count += 1 + + return count def all_matching_shapefiles(to_match, shapefile_directory): out = [] @@ -145,72 +160,6 @@ def all_matching_shapefiles(to_match, shapefile_directory): out.append(f) return out -def generate_balanced_data(shapefile_directory, image_directory, box_size, target, year=2013): - train_raster = 'master_raster_' # templates - mask_raster = 'class_mask_' - ''' This is pretty much for binary classification.''' - while True: - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if target in f: - all_matches = all_matching_shapefiles(f, shapefile_directory) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - master_raster = os.path.join(image_directory, train_raster + suffix) - mask_file = os.path.join(image_directory, mask_raster + suffix) - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive error handling. - else: - target_mask = generate_class_mask(f, mask_file) - class_mask = np.ones((NUM_CLASSES, target_mask.shape[1], target_mask.shape[2]))*NO_DATA - class_mask[1, :, :] = target_mask - required_instances = len(np.where(target_mask != NO_DATA)[0]) // (box_size*len(all_matches)) - masks = [] - for match in all_matches: - msk = generate_class_mask(match, mask_file) - #samp = random_sample(msk, required_instances, box_size) - #masks.append(samp) - masks.append(msk) - - for i, s in enumerate(masks): - class_mask[0, :, :][s[0, :, :] != NO_DATA] = 1 - - master, meta = load_raster(master_raster) - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - if np.all(sub_mask == NO_DATA): - continue - else: - n_negative = len(np.where(sub_mask[0, :, :] != NO_DATA)[1]) - positive = np.where(target_mask[:, :] != NO_DATA) - sorted_x = sorted(positive[1]) - sorted_y = sorted(positive[2]) - l = len(sorted_x) // 2 - center_x = sorted_x[l] - center_y = sorted_y[l] - ofs = CHUNK_SIZE // 2 - sub_positive = target_mask[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] - sub_master_positive = master[:, center_x - ofs: center_x + ofs, center_y - ofs: center_y + ofs] - required_instances = min(len(np.where(sub_positive[0, :, :] != NO_DATA)[1]), n_negative) - sub_negative = random_sample(sub_mask[0, :, :], required_instances, - box_size=0, class_code=1) - sub_master_negative = sub_master - sub_positive = random_sample(sub_positive[0, :, :], required_instances, - box_size=0, class_code=1) - one_hot_pos = np.ones((2, sub_positive.shape[0], sub_positive.shape[1]))*NO_DATA - one_hot_neg = np.ones((2, sub_negative.shape[0], sub_negative.shape[1]))*NO_DATA - one_hot_pos[1, :, :] = sub_positive - one_hot_neg[0, :, :] = sub_negative - sub_mas_pos, class_mask_pos = preprocess_data(sub_master_positive, - one_hot_pos) - sub_mas_neg, class_mask_neg = preprocess_data(sub_master_negative, - one_hot_neg) - ims = [sub_mas_pos, sub_mas_neg] - class_masks = [class_mask_pos, class_mask_neg] - for ii, jj in zip(ims, class_masks): - yield ii, jj class DataGen: @@ -307,8 +256,6 @@ def augment_data(image, class_mask): deg = np.random.uniform(-25, 25) image = rotation(image, deg) class_mask = rotation(class_mask, deg) - if np.random.randint(2): - image = random_noise(image) if np.random.randint(2): image = h_flip(image) class_mask = h_flip(class_mask) @@ -321,8 +268,12 @@ def augment_data(image, class_mask): def preprocess_data(master, mask, return_cuts=False): shp = master.shape rows = shp[1]; cols = shp[2] - cut_rows = rows % (2**MAX_POOLS) - cut_cols = cols % (2**MAX_POOLS) + if MAX_POOLS != 0: + cut_rows = rows % (2**MAX_POOLS) + cut_cols = cols % (2**MAX_POOLS) + else: + cut_rows = 0 + cut_cols = 0 out_m = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) if cut_cols != 0 and cut_rows != 0: @@ -361,20 +312,15 @@ def preprocess_data(master, mask, return_cuts=False): fallow = 'Fallow' forest = 'Forrest' other = 'other' - target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} + target_dict = {irr2:0, irr1:0, fallow:1, forest:1, other:1} year = 2013 - train_dir = 'training_data/train/' + train_dir = 'training_data/binary/train/' shp_train = 'shapefile_data/train/' - # create_training_data(target_dict, shp_train, image_directory, train_dir) - # print("Created training data") - test_dir = 'training_data/test/' + count = 0 + count = create_training_data(target_dict, shp_train, image_directory, train_dir, count) + print("You have {} instances per training epoch.".format(count)) + test_dir = 'training_data/binary/test/' shp_test = 'shapefile_data/test/' - # create_training_data(target_dict, shp_test, image_directory, test_dir) - j = 0 - for k in generate_training_data(train_dir): - j += 1 - print("Train steps:", j) - j = 0 - for k in generate_training_data(test_dir): - j += 1 - print("Test steps:", j) + count = 0 + count = create_training_data(target_dict, shp_test, image_directory, test_dir, count) + print("You have {} instances per test epoch.".format(count)) From beeb188106d793de39dad14638ad274bb46a294d Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 3 Mar 2019 10:20:25 -0700 Subject: [PATCH 42/89] Changing project setup --- .../compose_array_single_shapefile.py | 411 ++++++++++++++++++ fully-conv-classification/data_generators.py | 348 +++++++++++++++ fully-conv-classification/data_utils.py | 308 +++++++++++++ fully-conv-classification/fully_conv.py | 279 ++++++++++++ fully-conv-classification/keras_cnn.py | 90 ++++ fully-conv-classification/models.py | 154 +++++++ fully-conv-classification/path_map.pkl | Bin 0 -> 6 bytes fully-conv-classification/prepare_images.py | 309 +++++++++++++ .../runner_from_shapefile.py | 120 +++++ fully-conv-classification/runspec.py | 185 ++++++++ fully-conv-classification/shuffle_data.py | 57 +++ 11 files changed, 2261 insertions(+) create mode 100644 fully-conv-classification/compose_array_single_shapefile.py create mode 100644 fully-conv-classification/data_generators.py create mode 100644 fully-conv-classification/data_utils.py create mode 100644 fully-conv-classification/fully_conv.py create mode 100644 fully-conv-classification/keras_cnn.py create mode 100644 fully-conv-classification/models.py create mode 100644 fully-conv-classification/path_map.pkl create mode 100644 fully-conv-classification/prepare_images.py create mode 100644 fully-conv-classification/runner_from_shapefile.py create mode 100644 fully-conv-classification/runspec.py create mode 100644 fully-conv-classification/shuffle_data.py diff --git a/fully-conv-classification/compose_array_single_shapefile.py b/fully-conv-classification/compose_array_single_shapefile.py new file mode 100644 index 0000000..4ce57ef --- /dev/null +++ b/fully-conv-classification/compose_array_single_shapefile.py @@ -0,0 +1,411 @@ +# ============================================================================================= +# Copyright 2018 dgketchum +# +# Licensed under the Apache License, Version 2. (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================================= + +import os +import sys + +abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(abspath) +import pickle +from copy import deepcopy +from warnings import warn +from datetime import datetime +from fiona import open as fopen +from numpy import linspace, max, nan, unique, ndarray, swapaxes, zeros, asarray +import h5py +from numpy.random import shuffle +from pandas import DataFrame, Series +import warnings +from pyproj import Proj, transform +from rasterio import open as rasopen +from shapely.geometry import shape, Point, mapping +from shapely.ops import unary_union +from data_utils import get_shapefile_path_row +loc = os.path.dirname(__file__) +WRS_2 = loc.replace('pixel_classification', + os.path.join('spatial_data', 'wrs2_usa_descending.shp')) + +''' +This script contains a class meant to gather data from rasters using a polygon shapefile. +The high-level method `extract_sample` will return an object ready for a +learning algorithm. +''' +loc = os.path.dirname(__file__) +WRS_2 = loc.replace('pixel_classification', + os.path.join('spatial_data', 'wrs2_descending.shp')) + +class NoCoordinateReferenceError(Exception): + pass + + +class UnexpectedCoordinateReferenceSystemError(Exception): + pass + + +class ShapefileSamplePoints: + + def __init__(self, shapefile_path=None, sample_point_directory=None, m_instances=None): + self.outfile = os.path.splitext(shapefile_path)[0] + self.outfile += "_sample_points.shp" + if sample_point_directory: + self.outfile = os.path.join(sample_point_directory, self.outfile) + + self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) + self.m_instances = m_instances + self.object_id = 0 + self.shapefile_path = shapefile_path + self.path, self.row = get_shapefile_path_row(shapefile_path) + + def _random_points(self, coords): + min_x, max_x = coords[0], coords[2] + min_y, max_y = coords[1], coords[3] + x_range = linspace(min_x, max_x, num=2 * self.m_instances) + y_range = linspace(min_y, max_y, num=2 * self.m_instances) + shuffle(x_range), shuffle(y_range) + return x_range, y_range + + def _add_entry(self, coord, val=0): + # TODO: Encode class_code in shapefile schema. + self.extracted_points = self.extracted_points.append({'FID': int(self.object_id), + 'X': coord[0], + 'Y': coord[1], + 'POINT_TYPE': val}, + ignore_index=True) + self.object_id += 1 + + def save_sample_points(self): + + points_schema = { + 'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]), + 'geometry': 'Point'} + meta = self.tile_geometry.copy() + meta['schema'] = points_schema + + with fopen(self.outfile, 'w', **meta) as output: + for index, row in self.extracted_points.iterrows(): + props = dict([('FID', row['FID']), ('POINT_TYPE', row['POINT_TYPE'])]) + pt = Point(row['X'], row['Y']) + output.write({'properties': props, + 'geometry': mapping(pt)}) + return None + + def _get_polygons(self, vector): + with fopen(vector, 'r') as src: + crs = src.crs + if not crs: + raise NoCoordinateReferenceError( + 'Provided shapefile has no reference data.') + if crs['init'] != 'epsg:4326': + raise UnexpectedCoordinateReferenceSystemError( + 'Provided shapefile should be in unprojected (geographic)' + 'coordinate system, i.e., WGS84, EPSG 4326, {} is not'.format( + vector)) + clipped = src.filter(mask=self.tile_bbox) + polys = [] + bad_geo_count = 0 + for feat in clipped: + try: + geo = shape(feat['geometry']) + polys.append(geo) + except AttributeError: + bad_geo_count += 1 + + return polys + + def create_sample_points(self, save_points=True): + """ Create a clipped training set from polygon shapefiles. + + This complicated-looking function finds the wrs_2 descending Landsat tile corresponding + to the path row provided, gets the bounding box and profile (aka meta) from + compose_array.get_tile_geometry, clips the training data to the landsat tile, then perform + s a union to reduce the number of polygon objects. + The dict object this uses has a template in pixel_classification.runspec.py. + Approach is to loop through the polygons, create a random grid of points over the + extent of each polygon, random shuffle order of points, loop over points, check if + point is within polygon, and if within, create a sample point. + + If a relatively simple geometry is available, use create_negative_sample_points(), though if + there are > 10**4 polygons, it will probably hang on unary_union(). """ + + polygons = self._get_polygons(self.shapefile_path) + instance_count = 0 + print("Making sample points. You have {} polygons".format(len(polygons))) + print("N_instances:", self.m_instances) + + if len(polygons) > self.m_instances: + areas = zip(polygons, [x.area for x in polygons]) + srt = sorted(areas, key=lambda x: x[1], reverse=True) + polygons = [x for x, y in srt[:self.m_instances]] + + if not isinstance(polygons, list): + polygons = [polygons] # for the case of a single polygon. + + positive_area = sum([x.area for x in polygons]) # the sum of all + # the areas. + class_count = 0 + + for i, poly in enumerate(polygons): + if class_count >= self.m_instances: + break + fractional_area = poly.area / positive_area # percent of + # total area that this polygon occupies + required_points = max([1, fractional_area * self.m_instances]) # how + # many points overall that are required to evenly + # sample from each polygon, based on area. + poly_pt_ct = 0 + x_range, y_range = self._random_points(poly.bounds) + for coord in zip(x_range, y_range): + if instance_count >= self.m_instances: + break + if Point(coord[0], coord[1]).within(poly): + self._add_entry(coord) + poly_pt_ct += 1 + instance_count += 1 + # print(instance_count) + if poly_pt_ct >= required_points: + break + class_count += poly_pt_ct + + if save_points: + self.save_sample_points() + + @property + def tile_bbox(self): + with fopen(WRS_2, 'r') as wrs: + for feature in wrs: + fp = feature['properties'] + if fp['PATH'] == self.path and fp['ROW'] == self.row: + bbox = feature['geometry'] + return bbox + + def _get_crs(self): + for key, val in self.paths_map.items(): + with rasopen(val, 'r') as src: + crs = src.crs + break + return crs + + @property + def tile_geometry(self): + with fopen(WRS_2, 'r') as wrs: + wrs_meta = wrs.meta.copy() + return wrs_meta + +class PTASingleShapefile: + + def __init__(self, master_raster=None, shapefile_path=None, class_code=None, path=None, + row=None, masked_raster=None, training_directory=None, paths_map=None, masks=None, + instances=None, sz=1000, overwrite_points=None, kernel_size=None, data_filename=None): + self.shapefile_path = shapefile_path + self.path = path + self.object_id = 0 + self.data_filename = data_filename + self.paths_map = paths_map + self.masks = masks + self.row = row + self.training_directory = training_directory + self.overwrite_points=overwrite_points + self.class_code = class_code + self.crs = self._get_crs() + self.m_instances = instances + self.sz = sz + self.master_raster = master_raster + self.data = None + self.kernel_size = kernel_size + self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) + + def extract_sample(self, save_points=True): + # TODO: Pare down this class' methods. + # Because of the large data size, pickling output data + # (and therefore using a one-band at a time extraction approach) + # is not feasible. + + out = os.path.splitext(self.shapefile_path)[0] + out += "_sample_points.shp" + if os.path.isfile(out): + print("sample points already created") + self._populate_array_from_points(out) + else: + print("Sample points not detected at {}".format(out)) + if self.master_raster is not None: + self.training_data_from_master_raster() + else: + self.populate_raster_data_array() + + def _populate_array_from_points(self, fname): + + with fopen(fname, 'r') as src: + for feat in src: + coords = feat['geometry']['coordinates'] + val = feat['properties']['POINT_TYPE'] + self._add_entry(coords, val=val) + + def _dump_data(self, data): + n = "class_{}_train.h5".format(self.class_code) + if self.data_filename is None: + to_save = os.path.join(self.training_directory, n) + else: + to_save = self.data_filename + with h5py.File(to_save, 'a') as f: + pref = os.path.basename(self.shapefile_path) + dset = f.create_dataset("{}_{}".format(pref, + str(datetime.now())), data=data) + + def training_data_from_master_raster(self): + + ofs = self.kernel_size // 2 + sz = self.sz # some heuristic that indicates when I run out of memory + tmp_arr = [] + with rasopen(self.master_raster, 'r') as rsrc: + rass_arr = rsrc.read() + affine = rsrc.transform + + for ind, row in self.extracted_points.iterrows(): + # iterate through extracted points. + if (ind+1) % sz == 0: + print("Writing to disk...") + qq = asarray(tmp_arr) + del tmp_arr + self._dump_data(qq) + del qq + tmp_arr = [] + + x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) + c, r = ~affine * (x, y) + try: + rr = int(r); cc = int(c) + raster_subgrid = rass_arr[:, rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] + tmp_arr.append(raster_subgrid) + + except IndexError as e: + print(e) + + if len(tmp_arr): + print("Writing to disk...") + qq = asarray(tmp_arr) + self._dump_data(qq) + del qq + del tmp_arr + + def populate_raster_data_array(self, save=True): + + for key, val in self.paths_map.items(): + s = self._grid_raster_extract(val, _name=key) + print('Extracting {}'.format(key)) + self.extracted_points = self.extracted_points.join(s, how='outer') + + for key, val in self.masks.items(): + s = self._grid_raster_extract(val, _name=key) + print('Extracting {}'.format(key)) + self.extracted_points = self.extracted_points.join(s, how='outer') + + data_array, targets = self._purge_raster_array() + data = {'df': data_array, + 'features': data_array.columns.values, + 'data': data_array.values, + 'target_values': targets, + 'paths_map': self.paths_map} + + print('feature dimensions: {}'.format(data_array.shape)) + + for key, val in data.items(): + setattr(self, key, val) + + def _purge_raster_array(self): + data_array = deepcopy(self.extracted_points) + target_vals = Series(data_array.POINT_TYPE.values, name='POINT_TYPE') + data_array.drop(['X', 'Y', 'FID', 'POINT_TYPE'], axis=1, inplace=True) + try: + for msk in self.masks.keys(): + for idx, sub_raster in enumerate(data_array[msk]): + if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 1.: + data_array.loc[idx, :] = nan # make whole row NaN + except TypeError as e: + print(sub_raster, msk, idx) + data_array.loc[idx, :] = nan + + try: + for bnd in self.paths_map.keys(): + for idx, sub_raster in enumerate(data_array[bnd]): + if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 0.: + data_array.loc[idx, :] = nan + except TypeError as e: + data_array.loc[idx, :] = nan + + data_array = data_array.join(target_vals, how='outer') + + data_array.dropna(axis=0, inplace=True) + data_array.drop(self.masks, axis=1, inplace=True) + target_vals = data_array.POINT_TYPE.values + + data_array = data_array.drop(['POINT_TYPE'], + axis=1, inplace=False) + return data_array, target_vals + + def _geo_point_to_projected_coords(self, x, y): + + in_crs = Proj(init='epsg:4326') + out_crs = Proj(init=self.crs['init']) + x, y = transform(in_crs, out_crs, x, y) + return x, y + + def _grid_raster_extract(self, raster, _name): + """ + Open the raster. Store the points in a Series - a labeled + numpy array. Then in _purge array, we iterate over the masks + and the paths_map and drop pixels where masks = 1 and pixels where bound = 0. + """ + + with rasopen(raster, 'r') as rsrc: + rass_arr = rsrc.read() + rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) + affine = rsrc.transform + + s = Series(index=range(0, self.extracted_points.shape[0]), name=_name, dtype=object) + for ind, row in self.extracted_points.iterrows(): + x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) + c, r = ~affine * (x, y) + try: + ofs = self.kernel_size // 2 + rr = int(r); cc = int(c) + raster_subgrid = rass_arr[rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] # possible issues: edges of image + s[ind] = raster_subgrid + except IndexError: + s[ind] = None + + return s + + @property + def tile_bbox(self): + with fopen(WRS_2, 'r') as wrs: + for feature in wrs: + fp = feature['properties'] + if fp['PATH'] == self.path and fp['ROW'] == self.row: + bbox = feature['geometry'] + return bbox + + def _get_crs(self): + for key, val in self.paths_map.items(): + with rasopen(val, 'r') as src: + crs = src.crs + break + return crs + + @property + def tile_geometry(self): + with fopen(WRS_2, 'r') as wrs: + wrs_meta = wrs.meta.copy() + return wrs_meta diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py new file mode 100644 index 0000000..8d3c204 --- /dev/null +++ b/fully-conv-classification/data_generators.py @@ -0,0 +1,348 @@ +import numpy as np +import os +import time +import pickle +import sys +import matplotlib.pyplot as plt +from glob import glob +from random import sample, shuffle +from sklearn.utils.class_weight import compute_class_weight +from data_utils import generate_class_mask, get_shapefile_path_row +from rasterio import open as rasopen +from warnings import warn +from skimage import transform + +NO_DATA = -1 +CHUNK_SIZE = 608 # some value that is evenly divisible by 2^MAX_POOLS. +NUM_CLASSES = 4 + +def random_sample(class_mask, n_instances, box_size=0, fill_value=1): + if box_size: + n_instances /= box_size + + out = np.where(class_mask != NO_DATA) + class_mask = class_mask.copy() + try: + out_x = out[1] + out_y = out[2] + except IndexError as e: + out_x = out[0] + out_y = out[1] + + indices = np.random.choice(len(out_x), size=n_instances, replace=False) + out_x = out_x[indices] + out_y = out_y[indices] + + try: + class_mask[:, :, :] = NO_DATA + if box_size == 0: + class_mask[0, out_x, out_y] = fill_value + else: + ofs = box_size // 2 + for x, y in zip(out_x, out_y): + class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = fill_value + + except IndexError as e: + class_mask[:, :] = NO_DATA + if box_size == 0: + class_mask[out_x, out_y] = fill_value + else: + ofs = box_size // 2 + for x, y in zip(out_x, out_y): + class_mask[x-ofs:x+ofs, y-ofs:y+ofs] = fill_value + + return class_mask + +def load_raster(master_raster): + with rasopen(master_raster, 'r') as src: + arr = src.read() + meta = src.meta.copy() + return arr, meta + + +def assign_class_code(target_dict, shapefilename): + for key in target_dict: + if key in shapefilename: + return target_dict[key] + print("{} has no known match in target_dict.".format(shapefilename)) + return None + + +class DataMask(object): + + def __init__(self, mask, class_code): + self.mask = mask + self.class_code = class_code + + +class DataTile(object): + + def __init__(self, data, class_mask, class_code): + self.dict = {} + self.dict['data'] = data + self.dict['class_mask'] = class_mask + self.dict['class_code'] = class_code + + def to_pickle(self, training_directory): + if not os.path.isdir(training_directory): + os.mkdir(training_directory) + template = os.path.join(training_directory, + 'class_{}_data/'.format(self.dict['class_code'])) + if not os.path.isdir(template): + os.mkdir(template) + outfile = os.path.join(template, str(time.time()) + ".pkl") + if not os.path.isfile(outfile): + with open(outfile, 'wb') as f: + pickle.dump(self.dict, f, protocol=pickle.HIGHEST_PROTOCOL) + else: + print("What? Contact administrator.") + + def set_data(self, data): + self.dict['data'] = data + + def set_code(self, class_code): + self.dict['class_code'] = class_code + + def set_class_mask(self, class_mask): + self.dict['class_mask'] = class_mask + + +def create_training_data(target_dict, shapefile_directory, image_directory, training_directory, + count, save=True): + ''' target_dict: {filename or string in filename : class_code} ''' + done = set() + pixel_dict = {} # counts number of pixels present in each class. + for class_code in target_dict.values(): + pixel_dict[class_code] = 0 + year = 2013 + train_raster = 'master_raster_' + mask_raster = 'class_mask_' + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if f not in done: + all_matches = all_matching_shapefiles(f, shapefile_directory) + done.add(f) + for match in all_matches: + done.add(match) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + master_raster = os.path.join(image_directory, train_raster + suffix) + mask_file = os.path.join(image_directory, mask_raster + suffix) # for rasterio.mask.mask + # this file is projected the same as the shapefile. + masks = [] + all_matches.append(f) + shp = None + for match in all_matches: + msk = generate_class_mask(match, mask_file) + shp = msk.shape + cc = assign_class_code(target_dict, match) + if cc is not None: + dm = DataMask(msk, cc) + masks.append(dm) + if save: + master, meta = load_raster(master_raster) + else: + master = np.zeros(shp) + + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + for msk in masks: + s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + if not np.all(s == NO_DATA): + pixel_dict[msk.class_code] += len(np.where(s != NO_DATA)[0]) + count += 1 + if save: + dt = DataTile(sub_master, s, msk.class_code) + dt.to_pickle(training_directory) + return count, pixel_dict + + +def all_matching_shapefiles(to_match, shapefile_directory): + out = [] + pr = get_shapefile_path_row(to_match) + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if get_shapefile_path_row(f) == pr and to_match not in f: + out.append(f) + return out + + +class DataGen: + + def __init__(self, class_filename): + self.file_list = None + self.class_filename = class_filename + self._get_files() + self.n_files = len(self.file_list) + self.idx = 0 + + def _get_files(self): + self.file_list = [x[2] for x in os.walk(self.class_filename)][0] + self.file_list = [os.path.join(self.class_filename, x) for x in self.file_list] + + def next(self): + if self.idx == self.n_files or self.idx == 0: + self.idx = 0 + self.shuffled = sample(self.file_list, self.n_files) + out = self.shuffled[self.idx] + self.idx += 1 + else: + out = self.shuffled[self.idx] + self.idx += 1 + return self._from_pickle(out) + + def _from_pickle(self, filename): + with open(filename, 'rb') as f: + data = pickle.load(f) + return data + + +def generate_training_data(training_directory, max_pools, random_sample=True, train=True, box_size=0): + ''' Assumes data is stored in training_directory + in subdirectories labeled class_n_train + and that n_classes is a global variable.''' + class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] + generators = [] + for d in class_dirs: + generators.append(DataGen(d)) + # TODO: Apply image augmentation. + while True: + min_samples = np.inf + data = [] + for gen in generators: + out = gen.next().copy() + data.append(out) + n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + if n_samples < min_samples: + min_samples = n_samples + for subset in data: + if random_sample: + samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, + fill_value=1) + else: + samp = subset['class_mask'] + samp[samp != NO_DATA] = 1 + one_hot = np.ones((NUM_CLASSES, samp.shape[1], samp.shape[2]))*NO_DATA + one_hot[int(subset['class_code']), :, :] = samp + for i in range(NUM_CLASSES): + if i != int(subset['class_code']): + one_hot[i, :, :][samp[0, :, :] != NO_DATA] = 0 + subset['class_mask'] = one_hot + + masters = [] + masks = [] + for subset in data: + master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) + masters.append(master) + masks.append(mask) + + if train: + augmented_masters = [] + augmented_masks = [] + for master, mask in zip(masters, masks): + ms, msk = augment_data(master, mask) + augmented_masters.append(ms) + augmented_masks.append(msk) + + masters += augmented_masters + masks += augmented_masks + + # Shuffle order of data here? + for ms, msk in zip(masters, masks): + yield ms, msk + + +def rotation(image, angle): + return transform.rotate(image, angle, mode='constant', cval=NO_DATA) + +def h_flip(image): + return image[:, ::-1] + +def augment_data(image, class_mask): + '''Randomly augments an image.''' + # if np.random.randint(2): + # deg = np.random.uniform(-25, 25) + # image = rotation(image, deg) + # class_mask = rotation(class_mask, deg) + if np.random.randint(2): + image = h_flip(image) + class_mask = h_flip(class_mask) + if np.random.randint(2): + image = np.flipud(image) + class_mask = np.flipud(class_mask) + return image, class_mask + + +def preprocess_data(master, mask, max_pools, return_cuts=False): + ''' This function preprocesses data in such a way + that downscaling it by 2 max_pools times will result + in an input that plays nicely with the FCNN expecting it. + Master, mask in this example are tiles from the original image.''' + shp = master.shape + rows = shp[1]; cols = shp[2] + if max_pools != 0: + cut_rows = rows % (2**max_pools) + cut_cols = cols % (2**max_pools) + else: + cut_rows = 0 + cut_cols = 0 + out_m = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + + if cut_cols != 0 and cut_rows != 0: + out_m[0, :, :, :] = master[:, :-cut_rows, :-cut_cols] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :-cut_rows, :-cut_cols] + elif cut_cols == 0 and cut_rows != 0: + out_m[0, :, :, :] = master[:, :-cut_rows, :] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :-cut_rows, :] + elif cut_cols != 0 and cut_rows == 0: + out_m[0, :, :, :] = master[:, :, :-cut_cols] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :, :-cut_cols] + else: + out_m[0, :, :, :] = master[:, :, :] + shp = mask.shape + out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) + out_mask[0, :, :, :] = mask[:, :, :] + + out_m = np.swapaxes(out_m, 1, 3) + out_mask = np.swapaxes(out_mask, 1, 3) + if return_cuts: + return out_m, out_mask, cut_rows, cut_cols + + return out_m, out_mask + + +if __name__ == '__main__': + shapefile_directory = 'shapefile_data/' + image_directory = 'master_rasters/' + irr1 = 'Huntley' + irr2 = 'Sun_River' + fallow = 'Fallow' + forest = 'Forrest' + other = 'other' + target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} + year = 2013 + train_dir = 'training_data/multiclass/train/' + shp_train = 'shapefile_data/train/' + count = 0 + save = False + count, pixel_dict = create_training_data(target_dict, shp_train, image_directory, train_dir, + count, save=save) + print("You have {} instances per training epoch.".format(count)) + print("And {} instances in each class.".format(pixel_dict)) + + max_weight = max(pixel_dict.values()) + for key in pixel_dict: + print(key, max_weight / pixel_dict[key]) + tot = 0 + test_dir = 'training_data/multiclass/test/' + shp_test = 'shapefile_data/test/' + count = 0 + count, pixel_dict = create_training_data(target_dict, shp_test, image_directory, test_dir, + count, save=save) + print("You have {} instances per test epoch.".format(count)) + print("And {} instances in each class.".format(pixel_dict)) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py new file mode 100644 index 0000000..a797517 --- /dev/null +++ b/fully-conv-classification/data_utils.py @@ -0,0 +1,308 @@ +import glob +import os +import geopandas as gpd +import json +from fiona import open as fopen +from lxml import html +from requests import get +from copy import deepcopy +from numpy import zeros, asarray, array, reshape, nan +from shapely.geometry import shape +from collections import defaultdict +from rasterio import float32, open as rasopen +from rasterio.mask import mask +from prepare_images import ImageStack +from sklearn.neighbors import KDTree +from sat_image.warped_vrt import warp_single_image + +NO_DATA = -1 + +def get_features(gdf): + tmp = json.loads(gdf.to_json()) + features = [feature['geometry'] for feature in tmp['features']] + return features + +def generate_class_mask(shapefile, master_raster): + ''' Generates a mask with class_val everywhere + shapefile data is present and a no_data value everywhere else. + no_data is -1 in this case, as it is never a valid class label. + Switching coordinate reference systems is important here, or + else the masking won't work. + ''' + shp = gpd.read_file(shapefile) + with rasopen(master_raster, 'r') as src: + shp = shp.to_crs(src.crs) + features = get_features(shp) + out_image, out_transform = mask(src, shapes=features, nodata=NO_DATA) + return out_image + + +def create_master_raster(image_stack, path, row, year, raster_directory): + fname = "master_raster_{}_{}_{}.tif".format(path, row, year) + pth = os.path.join(raster_directory, fname) + mask_fname = "class_mask_{}_{}_{}.tif".format(path, row, year) + mask_path = os.path.join(raster_directory, mask_fname) + if os.path.isfile(pth): + print("Master raster already created for {}_{}_{}.".format(path, row, year)) + if os.path.isfile(mask_path): + print('Class mask template already created') + return pth + else: + print("Creating class mask template.") + with rasopen(pth, 'r') as src: + meta = src.meta.copy() + h = meta['height'] + w = meta['width'] + + meta.update(count=1, dtype=float32) + + with rasopen(mask_path, 'w', **meta) as msk: + out = zeros((h, w)).astype(float32) + msk.write(out, 1) + return pth + + paths_map = image_stack.paths_map + first = True + stack = None + + for i, feat in enumerate(paths_map.keys()): # ordered dict ensures accuracy here. + + feature_raster = paths_map[feat] # maps bands to their location in filesystem. + + with rasopen(feature_raster, mode='r') as src: + arr = src.read() + raster_geo = src.meta.copy() + + if first: + first_geo = raster_geo.copy() + empty = zeros((len(paths_map.keys()), arr.shape[1], arr.shape[2]), float32) + stack = empty + stack[i, :, :] = arr + first = False + else: + try: + stack[i, :, :] = arr + except ValueError: + # error can be thrown here if source raster doesn't have crs + # OR ! Because rasterio version. + # However, deepcopy becomes an issue with the latest + # version of rasterio. + arr = warp_single_image(feature_raster, first_geo) + stack[i, :, :] = arr + + first_geo.update(count=1) + msk_out = zeros((1, stack.shape[1], stack.shape[2])).astype(float32) + with rasopen(mask_path, mode='w', **first_geo) as msk: + msk.write(msk_out) + + first_geo.update(count=len(paths_map.keys())) + + with rasopen(pth, mode='w', **first_geo) as dst: + dst.write(stack) + + return pth + +def get_shapefile_lat_lon(shapefile): + ''' Center of shapefile''' + with fopen(shapefile, "r") as src: + minx, miny, maxx, maxy = src.bounds + latc = (maxy + miny) / 2 + lonc = (maxx + minx) / 2 + + return latc, lonc + +def normalize_and_save_image(fname): + norm = True + with rasopen(fname, 'r') as rsrc: + if "normalized" in rsrc.tags(): + return + else: + rass_arr = rsrc.read() + rass_arr = rass_arr.astype(float32) + profile = rsrc.profile.copy() + profile.update(dtype=float32) + rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) + scaler = StandardScaler() # z-normalization + scaler.fit(rass_arr) + rass_arr = scaler.transform(rass_arr) + with rasopen(fname, 'w', **profile) as dst: + dst.write(rass_arr, 1) + print("Normalized", fname) + dst.update_tags(normalized=True) + +def download_images(project_directory, path, row, year, satellite=8, n_landsat=3): + + image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, + max_cloud_pct=70, n_landsat=n_landsat, year=year) + + image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is + # a cloud mask. + return image_stack + +def construct_kdtree(wrs2): + centroids = [] + path_rows = [] # a mapping + features = [] + for feature in wrs2: + tile = shape(feature['geometry']) + centroid = tile.centroid.coords[0] + centroids.append([centroid[0], centroid[1]]) + z = feature['properties'] + p = z['PATH'] + r = z['ROW'] + path_rows.append(str(p) + "_" + str(r)) + features.append(feature) + + tree = KDTree(asarray(centroids)) + return tree, asarray(path_rows), asarray(features) + +def get_pr(poly, wrs2): + ls = [] + for feature in wrs2: + tile = shape(feature['geometry']) + if poly.within(tile): + z = feature['properties'] + p = z['PATH'] + r = z['ROW'] + ls.append(str(p) + "_" + str(r)) + return ls + +def get_pr_subset(poly, tiles): + ''' Use when you only want to iterate + over a subset of wrs2 tiles.''' + ls = [] + for feature in tiles: + tile = shape(feature['geometry']) + if poly.within(tile): + z = feature['properties'] + p = z['PATH'] + r = z['ROW'] + ls.append(str(p) + "_" + str(r)) + return ls + +def filter_shapefile(shapefile, out_directory): + """ Shapefiles may span multiple path/rows. + For training, we want all of the data available. + This function filters the polygons contained in + the shapefile into separate files for each path/row + contained in the shapefile. """ + path_row_map = defaultdict(list) + wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') + tree, path_rows, features = construct_kdtree(wrs2) + wrs2.close() + + cent_arr = array([0, 0]) + with fopen(shapefile, "r") as src: + meta = deepcopy(src.meta) + for feat in src: + poly = shape(feat['geometry']) + centroid = poly.centroid.coords[0] + cent_arr[0] = centroid[0] + cent_arr[1] = centroid[1] + centroid = cent_arr.reshape(1, -1) + dist, ind = tree.query(centroid, k=10) + tiles = features[ind[0]] + prs = get_pr_subset(poly, tiles) + for p in prs: + path_row_map[p].append(feat) + + outfile = os.path.basename(shapefile) + outfile = os.path.splitext(outfile)[0] + + for path_row in path_row_map: + out = outfile + path_row + ".shp" + with fopen(os.path.join(out_directory, out), 'w', **meta) as dst: + print("Saving {}".format(out)) + for feat in path_row_map[path_row]: + dst.write(feat) + + +def split_shapefile(base, base_shapefile, data_directory): + """ + Shapefiles may deal with data over multiple path/rows. + This is a method to get the minimum number of + path/rows required to cover all features. + Data directory: where the split shapefiles will be saved. + base: directory containing base_shapefile.""" + path_row = defaultdict(list) + id_mapping = {} + # TODO: un hardcode this directory. + wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') + tree, path_rows, features = construct_kdtree(wrs2) + wrs2.close() + + cent_arr = array([0, 0]) + with fopen(os.path.join(base, base_shapefile), "r") as src: + meta = deepcopy(src.meta) + for feat in src: + idd = feat['id'] + id_mapping[idd] = feat + poly = shape(feat['geometry']) + centroid = poly.centroid.coords[0] + cent_arr[0] = centroid[0] + cent_arr[1] = centroid[1] + centroid = cent_arr.reshape(1, -1) + dist, ind = tree.query(centroid, k=10) + tiles = features[ind[0]] + prs = get_pr_subset(poly, tiles) + for p in prs: + path_row[p].append(idd) + + non_unique_ids = defaultdict(list) + unique = defaultdict(list) + for key in path_row: + ls = path_row[key] # all features in a given path/row + placeholder = ls.copy() + for key1 in path_row: + if key != key1: + ls1 = path_row[key1] + # find unique keys in ls + placeholder = set(placeholder) - set(ls1) #all + # features present in placeholder that are not + # present in ls1; i.e. unique keys + unique[key] = list(placeholder) + if len(ls) != len(placeholder): + nu = set(ls) - set(placeholder) # all features present in ls that are not present in placeholder (non-unique) + for idd in list(nu): + non_unique_ids[idd].append(key) + + match_key = [] + for key in non_unique_ids: # unique ids + pr = None + hi = 0 + for pathrow in non_unique_ids[key]: # path/rows corresponding to non + # unique features + if len(unique[pathrow]) > hi: + pr = pathrow + hi = len(unique[pathrow]) + + if pr is not None: + unique[pr].append(key) + else: + choice = non_unique_ids[key] + choice.sort() + choice = choice[0] + unique[choice].append(key) + + prefix = os.path.splitext(base_shapefile)[0] + for key in unique: + if key is None: + continue + out = prefix + "_" + key + ".shp" + if len(unique[key]): + with fopen(os.path.join(data_directory, out), 'w', **meta) as dst: + print("Saving split shapefile to: {}".format(os.path.join(data_directory, out))) + for feat in unique[key]: + dst.write(id_mapping[feat]) + +def get_shapefile_path_row(shapefile): + """This function assumes that the original + shapefile has already been split, and relies on + the naming convention to get the path and row. """ + # strip extension + # TODO: Find some way to update shapefile metadata + shp = shapefile[-9:-4].split("_") + return int(shp[0]), int(shp[1]) + +if __name__ == "__main__": + pass diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py new file mode 100644 index 0000000..3816acc --- /dev/null +++ b/fully-conv-classification/fully_conv.py @@ -0,0 +1,279 @@ +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +import keras.backend as K +import tensorflow as tf +#tf.enable_eager_execution() +import matplotlib.pyplot as plt +import numpy as np +import json +import geopandas as gpd +import sys +from glob import glob +from skimage import transform, util +from sklearn.metrics import confusion_matrix +from tensorflow.keras.callbacks import TensorBoard +from rasterio import open as rasopen +from rasterio.mask import mask +from shapely.geometry import shape +from fiona import open as fopen +from data_generators import generate_training_data, load_raster, preprocess_data +from data_utils import generate_class_mask +from models import fcnn_functional, fcnn_model, fcnn_functional_small + +NO_DATA = -1 +CHUNK_SIZE = 608 # some value that is divisible by 2^MAX_POOLS. +NUM_CLASSES = 4 +WRS2 = '../spatial_data/wrs2_descending_usa.shp' + +def m_acc(y_true, y_pred): + ''' Calculate accuracy from masked data. + The built-in accuracy metric uses all data (masked & unmasked).''' + y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) + y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) + masked = tf.not_equal(y_true, NO_DATA) + indices = tf.where(masked) + indices = tf.to_int32(indices) + indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) + y_true_masked = tf.gather_nd(params=y_true, indices=indices) + y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) + return K.cast(K.equal(K.argmax(y_true_masked, axis=-1), K.argmax(y_pred_masked, axis=-1)), K.floatx()) + +def custom_objective_binary(y_true, y_pred): + '''I want to mask all values that + are not data, given a y_true + that has NODATA values. The boolean mask + operation is failing. It should output + a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) + tensor.''' + y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) + y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) + masked = tf.not_equal(y_true, NO_DATA) + indices = tf.where(masked) + indices = tf.to_int32(indices) + indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) + y_true_masked = tf.gather_nd(params=y_true, indices=indices) + y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) + return tf.keras.losses.binary_crossentropy(y_true_masked, y_pred_masked) + +def custom_objective(y_true, y_pred): + '''I want to mask all values that + are not data, given a y_true + that has NODATA values. The boolean mask + operation is failing. It should output + a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) + tensor.''' + y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) + y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) + masked = tf.not_equal(y_true, NO_DATA) + indices = tf.where(masked) + indices = tf.to_int32(indices) + indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) + y_true_masked = tf.gather_nd(params=y_true, indices=indices) + y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) + return tf.keras.losses.categorical_crossentropy(y_true_masked, y_pred_masked) + + +def evaluate_image(master_raster, model, max_pools, outfile=None, ii=None): + + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive handling of this case. + else: + master, meta = load_raster(master_raster) + class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder + out = np.zeros((master.shape[2], master.shape[1], NUM_CLASSES)) + + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, + max_pools, return_cuts=True) + preds = model.predict(sub_master) + preds = preds[0, :, :, :] + + if cut_cols == 0 and cut_rows == 0: + out[j:j+CHUNK_SIZE, i:i+CHUNK_SIZE, :] = preds + elif cut_cols == 0 and cut_rows != 0: + ofs = master.shape[1]-cut_rows + out[j:j+CHUNK_SIZE, i:ofs, :] = preds + elif cut_cols != 0 and cut_rows == 0: + ofs = master.shape[2]-cut_cols + out[j:ofs, i:i+CHUNK_SIZE, :] = preds + elif cut_cols != 0 and cut_rows != 0: + ofs_col = master.shape[2]-cut_cols + ofs_row = master.shape[1]-cut_rows + out[j:ofs_col, i:ofs_row, :] = preds + else: + print("whatcha got goin on here?") + + sys.stdout.write("N eval: {}. Percent done: {:.4f}\r".format(ii, i / master.shape[1])) + + out = np.swapaxes(out, 0, 2) + out = out.astype(np.float32) + if outfile: + save_raster(out, outfile, meta) + return out + +def save_raster(arr, outfile, meta, count=NUM_CLASSES): + meta.update(count=count) + with rasopen(outfile, 'w', **meta) as dst: + dst.write(arr) + + +def get_features(gdf, path, row): + tmp = json.loads(gdf.to_json()) + features = [] + for feature in tmp['features']: + if feature['properties']['PATH'] == path and feature['properties']['ROW'] == row: + features.append(feature['geometry']) + return features + +def clip_raster(evaluated, path, row, outfile=None): + + shp = gpd.read_file(WRS2) + + with rasopen(evaluated, 'r') as src: + shp = shp.to_crs(src.crs) + meta = src.meta.copy() + features = get_features(shp, path, row) + out_image, out_transform = mask(src, shapes=features, nodata=np.nan) + + if outfile: + save_raster(out_image, outfile, meta) + +def clip_rasters(evaluated_tif_dir, include_string): + for f in glob(os.path.join(evaluated_tif_dir, "*.tif")): + if include_string in f: + out = os.path.basename(f) + out = out[out.find("_")+1:] + out = out[out.find("_")+1:] + out = out[out.find("_")+1:] + path = out[:2] + row = out[3:5] + clip_raster(f, int(path), int(row), outfile=f) + +def evaluate_images(image_directory, include_string, max_pools, exclude_string, prefix, save_dir): + ii = 0 + for f in glob(os.path.join(image_directory, "*.tif")): + if exclude_string not in f and include_string in f: + out = os.path.basename(f) + os.path.split(out)[1] + out = out[out.find("_"):] + out = os.path.splitext(out)[0] + out = prefix + out + ".tif" + out = os.path.join(save_dir, out) + ii += 1 + evaluate_image(f, model, max_pools=max_pools, outfile=out, ii=ii) + +def compute_iou(y_pred, y_true): + ''' This is slow. ''' + y_pred = y_pred.flatten() + y_true = y_true.flatten() + current = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3]) + print(current) + # compute mean iou + intersection = np.diag(current) + ground_truth_set = current.sum(axis=1) + predicted_set = current.sum(axis=0) + union = ground_truth_set + predicted_set - intersection + IoU = intersection / union.astype(np.float32) + return np.mean(IoU) + +def get_iou(): + shpfiles = [ + 'shapefile_data/test/MT_Huntley_Main_2013_372837_28.shp', + 'shapefile_data/test/MT_FLU_2017_Fallow_372837_28.shp', + 'shapefile_data/test/MT_FLU_2017_Forrest_372837_28.shp', + 'shapefile_data/test/MT_other_372837_28.shp'] + + m_dir = 'eval_test/all_ims/' + ls = [] + mask = image_directory + 'class_mask_37_28_2013.tif' + for f in shpfiles: + msk = generate_class_mask(f, mask) + msk[msk != NO_DATA] = 1 + ls.append(msk) + y_true = np.vstack(ls) + indices = np.where(y_true != NO_DATA) + y_true = y_true[:, indices[1], indices[2]] + y_true = np.argmax(y_true, axis=0) + for f in glob(m_dir + "*.tif"): + y_pred, meta = load_raster(f) + y_pred = y_pred[:, indices[1], indices[2]] + y_pred = np.round(y_pred) + y_pred.astype(np.int32) + y_pred = np.argmax(y_pred, axis=0) + print(f, compute_iou(y_pred, y_true)) + +def train_model(training_directory, model, steps_per_epoch, valid_steps, max_pools, box_size=0, epochs=3): + ''' This function assumes that train/test data are + subdirectories of training_directory, with + the names train/test.''' + model = model(NUM_CLASSES) + if NUM_CLASSES <= 2: + model.compile(loss=custom_objective_binary, + metrics=[m_acc], + optimizer='adam') + else: + model.compile(loss=custom_objective, + metrics=['accuracy', m_acc], + optimizer=tf.keras.optimizers.Adam(lr=0.0001)) + + tb = TensorBoard(log_dir='graphs/30epochssimple/') + train = os.path.join(training_directory, 'train') + test = os.path.join(training_directory, 'test') + train_generator = generate_training_data(train, max_pools, random_sample=False, + train=True, box_size=box_size) + test_generator = generate_training_data(test, max_pools, random_sample=False, + train=False, box_size=box_size) + model.fit_generator(train_generator, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=1, + callbacks=[tb], + class_weight=[31.0, 1, 2.16, 67.76], + use_multiprocessing=True) + #validation_data=test_generator, + #validation_steps=valid_steps, + + return model + +if __name__ == '__main__': + + image_directory = 'master_rasters/' + training_directory = 'training_data/multiclass/' + m_dir = 'eval_test/multiclass/' + + #get_iou() + # models = [fcnn_functional, fcnn_functional_small, fcnn_model] + # save_dirs = [os.path.join(m_dir, "complex_fcnn"), os.path.join(m_dir, "simple_fcnn"), + # os.path.join(m_dir, 'no_pools')] + # model_names = ["multiclass_complex_fcnn.h5", 'multiclass_simple_fcnn.h5', + # 'multiclass_no_pools_fcnn.h5'] + # raster_names = ["complex_fcnnmulticlass", "simple_fcnnmulticlass", "no_poolsmulticlass"] + + models = [fcnn_functional] + save_dirs = [os.path.join(m_dir, "augmented/")] + model_names = ["complex_fcnn_augmented.h5"] + raster_names = ["class_weightscomplexaugmented"] + i = 1 + max_pools = 5 + for model_func, save_dir, model_name, raster_name in zip(models, save_dirs, model_names, raster_names): + pth = os.path.join(save_dir, model_name) + if not os.path.isfile(pth): + model = train_model(training_directory, model_func, steps_per_epoch=764, + valid_steps=246, max_pools=max_pools, epochs=5) + model.save(pth) + else: + model = tf.keras.models.load_model(pth, + custom_objects={'m_acc':m_acc, 'custom_objective':custom_objective}) + + evaluate_images(image_directory, include_string="37_28", + exclude_string="class", max_pools=max_pools, prefix=raster_name, save_dir=save_dir) + clip_rasters(save_dir, "37_28") + if i == 2: + max_pools = 3 + if i == 3: + max_pools = 0 + i += 1 diff --git a/fully-conv-classification/keras_cnn.py b/fully-conv-classification/keras_cnn.py new file mode 100644 index 0000000..cb33b0b --- /dev/null +++ b/fully-conv-classification/keras_cnn.py @@ -0,0 +1,90 @@ +import h5py +import os +from glob import glob +import tensorflow as tf +from sklearn.model_selection import train_test_split +from tensorflow.keras.callbacks import TensorBoard +import numpy as np +from shuffle_data import one_epoch + +def keras_model(kernel_size, n_classes): + model = tf.keras.Sequential() + # Must define the input shape in the first layer of the neural network + model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', + input_shape=(36, kernel_size, kernel_size))) + model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) + model.add(tf.keras.layers.Dropout(0.3)) + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')) + model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) + model.add(tf.keras.layers.Dropout(0.3)) + model.add(tf.keras.layers.Flatten()) + model.add(tf.keras.layers.Dense(256, activation='relu')) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Dense(n_classes, activation='softmax')) + # Take a look at the model summary + model.summary() + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) + return model + +def train_next_batch(model, features, labels, n_classes=4, epochs=5, batch_size=128): + + # shuffle the labels again + + tb = TensorBoard(log_dir='graphs/cnn/') + x_train, x_test, y_train, y_test = train_test_split(features, labels, + test_size=0.01, random_state=42) + model.fit(x_train, + y_train, + batch_size=batch_size, + epochs=epochs, + validation_data=(x_test, y_test), + callbacks=[tb]) + return model + + +def evaluate_model(features, labels): + score = model.evaluate(features, labels, verbose=0) + print('\n', 'Test accuracy:', score[1], '\n') + +def make_one_hot(labels, n_classes): + ret = np.zeros((len(labels), n_classes)) + for i, e in enumerate(labels): + ret[i, int(e)] = 1 + return ret + +def get_next_batch(file_map, n_classes=4): + features, labels = next_batch(file_map) + labels = make_one_hot(labels, n_classes) + return features, labels + +def is_it(f, targets): + for e in targets: + if e in f and 'sample' not in f: + return True + return False + +def fnames(class_code): + return "training_data/class_{}_train.h5".format(class_code) + +# Yield the concatenated training array? + +if __name__ == '__main__': + train_dir = 'training_data/' + model_dir = 'models/' + n_epochs = 1 + kernel_size = 41 + model_name = 'model_kernel_{}'.format(kernel_size) + total_instances = 100000 + + model_path = os.path.join(model_dir, model_name) + model = keras_model(41, 2) + model = tf.keras.models.load_model(model_path) + features = np.zeros((128, 36, 41, 41)) + labels = np.zeros((128, 4)) + train_next_batch(model, features, labels) + if not os.path.isfile(model_path): + model.save(model_path) + + diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py new file mode 100644 index 0000000..11cbac7 --- /dev/null +++ b/fully-conv-classification/models.py @@ -0,0 +1,154 @@ +import os +os.environ['KERAS_BACKEND'] = 'tensorflow' +import keras.backend as K +import tensorflow as tf +from tensorflow.keras.models import Model +from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, Dropout, UpSampling2D) + +def fcnn_model(n_classes): + model = tf.keras.Sequential() + # Must define the input shape in the first layer of the neural network + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=8, padding='same', activation='relu', + input_shape=(None, None, 36))) + model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=4, padding='same', activation='relu')) + model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=4, padding='same', activation='relu')) + model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Conv2D(filters=n_classes, kernel_size=2, padding='same', + activation='softmax')) # 1x1 convolutions for pixel-wise prediciton. + # Take a look at the model summary + #model.summary() + return model + +def fcnn_functional_small(n_classes): + x = Input((None, None, 36)) + + c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x) + c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(c1) + mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) + + c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(mp1) + c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(c2) + mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) + mp2 = Dropout(0.5)(mp2) + + c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(mp2) + c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(c3) + mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) + + last_conv = Conv2D(filters=256, kernel_size=(3,3), activation='relu', padding='same')(mp3) + + u1 = UpSampling2D(size=(2, 2))(last_conv) + u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) + u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) + + u1_c3 = Concatenate()([c3, u1]) + + u2 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1_c3) + u2 = UpSampling2D(size=(2, 2))(u2) + u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) + u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) + u2 = Dropout(0.5)(u2) + + u2_c2 = Concatenate()([u2, c2]) + u2_c2 = Dropout(0.5)(u2_c2) + + c4 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u2_c2) + u3 = UpSampling2D(size=(2, 2))(c4) + u3 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u3) + + u3_c1 = Concatenate()([u3, c1]) + + c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u3_c1) + + model = Model(inputs=x, outputs=c5) + #model.summary() + return model + + +def fcnn_functional(n_classes): + + x = Input((None, None, 36)) + base = 2 + # exp from 4 to 5. + exp = 6 + c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(x) + c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c1) + mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) + + exp+=1 + + c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp1) + c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c2) + mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) + # mp2 = Dropout(0.5)(mp2) + + exp+=1 + + c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp2) + c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c3) + mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) + #Jkj mp3 = Dropout(0.5)(mp3) + + exp+=1 + + c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp3) + c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c4) + mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) + # mp4 = Dropout(0.5)(mp4) + + exp+=1 + + c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp4) + c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c5) + mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c5) + + last_conv = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp4) + + u1 = UpSampling2D(size=(2, 2))(last_conv) + u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) + u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) + + exp-=1 + + u1_c5 = Concatenate()([c5, u1]) + + u2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1_c5) + u2 = UpSampling2D(size=(2, 2))(u2) + u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) + u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) + # u2 = Dropout(0.5)(u2) + + u2_c4 = Concatenate()([u2, c4]) + + exp-=1 + + u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u2_c4) + u3 = UpSampling2D(size=(2, 2))(u3) + u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3) + #u3 = Dropout(0.5)(u3) + + u3_c3 = Concatenate()([u3, c3]) + + exp-=1 + + u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3_c3) + u4 = UpSampling2D(size=(2, 2))(u4) + u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4) + + u4_c2 = Concatenate()([u4, c2]) + + exp-=1 + + u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4_c2) + u5 = UpSampling2D(size=(2, 2))(u5) + u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) + u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) + + u5_c1 = Concatenate()([u5, c1]) + + u6 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u5_c1) + + model = Model(inputs=x, outputs=u6) + #model.summary() + return model diff --git a/fully-conv-classification/path_map.pkl b/fully-conv-classification/path_map.pkl new file mode 100644 index 0000000000000000000000000000000000000000..482c0e338367d0aeb4ddc43602ac727276f77052 GIT binary patch literal 6 NcmZo*jxA)+0{{jJ0e}Di literal 0 HcmV?d00001 diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py new file mode 100644 index 0000000..dba7c95 --- /dev/null +++ b/fully-conv-classification/prepare_images.py @@ -0,0 +1,309 @@ +# ============================================================================================= +# Copyright 2018 dgketchum +# +# Licensed under the Apache License, Version 2 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================================= + + +import os +import sys + +abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(abspath) +from numpy import mean, datetime64 +from collections import OrderedDict +from datetime import datetime +from landsat.google_download import GoogleDownload +from sat_image.image import Landsat5, Landsat7, Landsat8 +from sat_image.fmask import Fmask +from sat_image.warped_vrt import warp_vrt +from met.thredds import GridMet, TopoWX +from bounds import RasterBounds, GeoBounds +from dem import AwsDem +from ssebop_app.image import get_image +from functools import partial +from pyproj import Proj, transform as pytransform +from shapely.geometry import shape, Polygon, mapping +from shapely.ops import transform +from rasterio import open as rasopen, float32 +from rasterio.crs import CRS +from pixel_classification.crop_data_layer import CropDataLayer as Cdl +from pixel_classification.runspec import landsat_rasters, static_rasters, ancillary_rasters, mask_rasters, climate_rasters +from sklearn.preprocessing import StandardScaler +from geopandas.geodataframe import GeoDataFrame + +class ImageStack(object): + """ + Prepare a stack of images from Landsat, terrain, etc. Save stack in identical geometry. + """ + + def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None, max_cloud_pct=None, start=None, end=None, year=None, n_landsat=None): + + self.landsat_mapping = {'LT5': Landsat5, 'LE7': Landsat7, 'LC8': Landsat8} + self.landsat_mapping_abv = {5: 'LT5', 7: 'LE7', 8: 'LC8'} + + self.sat = satellite + self.sat_abv = self.landsat_mapping_abv[self.sat] + self.sat_obj = self.landsat_mapping[self.sat_abv] + + self.path = path + self.row = row + self.lat = lat + self.lon = lon + self.year = year + + self.max_cloud = max_cloud_pct + self.start = start + self.end = end + self.root = root + + self.profile = None + self.dst_path_cloud = None + self.dst_path_water = None + self.landsat = None + self.scenes = None + self.image_dirs = None + self.image_paths = None + self.stack_features = None + self.paths_map = None + self.masks = None + + self.cdl_tif = None + self.cdl_mask = None + + self.n = n_landsat + + self.ancillary_rasters = [] + self.exclude_rasters = [] + + if year and not start and not end: + self.start = '{}-05-01'.format(self.year) + self.end = '{}-10-15'.format(self.year) + + def build_training(self): + self.get_landsat(fmask=True) + self.profile = self.landsat.rasterio_geometry + self.get_precip() + self.get_et() + self.get_terrain() + self.paths_map, self.masks = self._order_images() + + def build_evaluating(self): + self.get_landsat(fmask=False) + self.profile = self.landsat.rasterio_geometry + #self.get_et() + #self.get_precip() + self.get_terrain() + self.paths_map, self.masks = self._order_images() # paths map is just path-> location + # in filesystem. + + def get_cdl(self): + """download cdl and make a mask, save to the + root directory with filename cdl_mask.tif. + The cdl is reprojected here. + """ + self.cdl_mask = os.path.join(self.root, 'cdl_mask.tif') + if not os.path.isfile(self.cdl_mask): + print('get {}'.format(self.cdl_mask)) + polygon = self.landsat.get_tile_geometry() + cdl = Cdl(year=self.year, target_profile=self.landsat.profile) + cdl.get_mask(clip_geometry=polygon, out_file=self.cdl_mask) + else: + print('{} exists'.format(self.cdl_mask)) + self.exclude_rasters.append(self.cdl_mask) + + def get_landsat(self, fmask=False): + """Download from internet and select scenes from n_landsat + g.download() then saves the selected scenes into + the root directory. + """ + if self.lat is None: + g = GoogleDownload(self.start, self.end, self.sat, path=self.path, row=self.row, + output_path=self.root, max_cloud_percent=self.max_cloud) + else: + g = GoogleDownload(self.start, self.end, self.sat, latitude=self.lat, longitude=self.lon, + output_path=self.root, max_cloud_percent=self.max_cloud) + + g.select_scenes(self.n) + self.scenes = g.selected_scenes + g.download(list_type='selected') + + self.image_dirs = [x[0] for x in os.walk(self.root) if + os.path.basename(x[0])[:3] in self.landsat_mapping.keys()] + + self._get_geography() + if fmask: + [self._make_fmask(d) for d in self.image_dirs] + + def get_precip(self): + poly_in = self.landsat.get_tile_geometry() + poly_in = Polygon(poly_in[0]['coordinates'][0]) + project = partial( + pytransform, + Proj(self.profile['crs']), + Proj(init='epsg:32612')) + for_bounds = partial( + pytransform, + Proj(self.profile['crs']), + Proj(init='epsg:4326')) + dates = self.scenes['DATE_ACQUIRED'].values + # Change the coordinate system + # The issue: the CRSs for the bounding box and for the mask are different. + # In _project, the incorrect CRS was making it throw an error. + # the fix? Inputting bounds in a unprojected CRS and + # a projected shape for masking. + poly = transform(project, poly_in) + poly_bounds = transform(for_bounds, poly_in) + poly = Polygon(poly.exterior.coords) + geometry = [mapping(poly)] + geometry[0]['crs'] = CRS({'init':'epsg:32612'}) + bounds = poly_bounds.bounds + for date in dates: + outfile = os.path.join(self.root, 'precip_{}.tif'.format(date)) + if not os.path.isfile(outfile): + print("Get {}".format(outfile)) + d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. + bds = GeoBounds(wsen=bounds) + gm = GridMet(variable='pr', clip_feature=geometry, + bbox=bds, target_profile=self.profile, date=d) + out = gm.get_data_subset() + gm.save_raster(out, self.landsat.rasterio_geometry, outfile) + + + def get_terrain(self): + """ + Get digital elevation maps from amazon web services + save in the project root directory with filenames enumerated + in the next three lines. + + """ + + slope_name = os.path.join(self.root, 'slope.tif') + aspect_name = os.path.join(self.root, 'aspect.tif') + dif_elev = os.path.join(self.root, 'elevation_diff.tif') + + check = [os.path.isfile(x) for x in [slope_name, aspect_name, dif_elev]] + + if False in check: + polygon = self.landsat.get_tile_geometry() + bb = RasterBounds(affine_transform=self.profile['transform'], + profile=self.profile, latlon=True) + dem = AwsDem(zoom=10, target_profile=self.profile, bounds=bb, + clip_object=polygon) + + dem.terrain(attribute='slope', + out_file=slope_name, save_and_return=True) + dem.terrain(attribute='aspect', + out_file=aspect_name, save_and_return=True) + elev = dem.terrain(attribute='elevation') + elev = elev - mean(elev) + dem.save(elev, geometry=dem.target_profile, output_filename=dif_elev) + + def get_et(self): + for i, d in enumerate(self.image_dirs): + l = self.landsat_mapping[self.sat_abv](d) + _id = l.landsat_scene_id + get_image(image_dir=d, parent_dir=self.root, image_exists=True, image_id=_id, + satellite=self.sat, path=self.path, row=self.row, image_date=l.date_acquired, + landsat_object=self.landsat, overwrite=False) + + def warp_vrt(self): + warp_vrt(self.root, delete_extra=False, use_band_map=False, remove_bqa=True) + + def _get_geography(self): + + master = self.image_dirs[0] + self.landsat = self.landsat_mapping[self.sat_abv](master) + + def _make_fmask(self, image_dir): + s = os.path.basename(image_dir) + self.dst_path_cloud = os.path.join(image_dir, '{}_cloud_fmask.tif'.format(s)) + self.dst_path_water = os.path.join(image_dir, '{}_water_fmask.tif'.format(s)) + + if os.path.isfile(self.dst_path_cloud) and os.path.isfile(self.dst_path_water): + print('{} and {} exist for {}'.format(os.path.basename(self.dst_path_cloud), + os.path.basename(self.dst_path_water), + image_dir)) + + else: + print('fmask for {}'.format(image_dir)) + lst_image = self.landsat_mapping[self.sat_abv](image_dir) + + f = Fmask(lst_image) + + c, shadow, water = f.cloud_mask() + cloud = c | shadow + + f.save_array(cloud, self.dst_path_cloud) + f.save_array(water, self.dst_path_water) + + def _organize_directory(self): + dst_dir = os.path.join(self.root, str(self.path), str(self.row), + str(self.year)) + if not os.path.isdir(dst_dir): + + try: + os.makedirs(dst_dir) + print('Made {}'.format(dst_dir)) + + except Exception: + pass + + return dst_dir + + def _order_images(self): + band_dct = OrderedDict() + mask_dct = OrderedDict() + + if not self.image_dirs: + raise NotImplementedError('must build stack with "build_all" before listing rasters') + + dates = self.scenes['DATE_ACQUIRED'].values + scenes = self.scenes['SCENE_ID'].values + s = datetime64('{}-01-01'.format(self.year)) + for d in dates: + try: + assert d > s + except AssertionError: + print('Scene dates appear to not increase monotonically') + raise NotImplementedError + s = d + + for sc in scenes: + + paths = os.listdir(os.path.join(self.root, sc)) + #c = climate_rasters(self.root) + b = [os.path.join(self.root, sc, x) for x in paths if x.endswith(landsat_rasters()[self.sat])] + a = [os.path.join(self.root, sc, x) for x in paths if x.endswith(ancillary_rasters())] + bands = a + b# + c + + bands.sort() + for p in bands: + band_dct[os.path.basename(p).split('.')[0]] = p + + masks = [os.path.join(self.root, sc, x) for x in paths if x.endswith(mask_rasters())] + for m in masks: + mask_dct[os.path.basename(m).split('.')[0]] = m + + dir_list = os.listdir(self.root) + files = [x for x in dir_list if os.path.isfile(os.path.join(self.root, x))] + static_files = [x for x in files if x.endswith(static_rasters())] + for st in static_files: + band_dct[os.path.basename(st).split('.')[0]] = os.path.join(self.root, st) + + return band_dct, mask_dct + +if __name__ == '__main__': + pass + +# ========================= EOF ==================================================================== diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py new file mode 100644 index 0000000..49cf263 --- /dev/null +++ b/fully-conv-classification/runner_from_shapefile.py @@ -0,0 +1,120 @@ +import warnings +import os +import glob +import pickle +from multiprocessing import Pool +from numpy import save as nsave +from compose_array_single_shapefile import PTASingleShapefile, ShapefileSamplePoints +from fiona import open as fopen +from shapely.geometry import shape +from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster, filter_shapefile + + +def download_images_over_shapefile(shapefile, image_directory, year, master_raster_directory): + '''Downloads p/r corresponding to the location of + the shapefile, and creates master raster. + Image_directory: where to save the raw images. + mr_directory: " " master_rasters.''' + p, r = get_shapefile_path_row(shapefile) + suff = str(p) + '_' + str(r) + "_" + str(year) + landsat_dir = os.path.join(image_directory, suff) + satellite = 8 + if year < 2013: + satellite = 7 + if not os.path.isdir(landsat_dir): + os.mkdir(landsat_dir) + ims = download_images(landsat_dir, p, r, year, satellite) + else: + ims = download_images(landsat_dir, p, r, year, satellite) + + ms = create_master_raster(ims, p, r, year, master_raster_directory) + + return ims + +def download_from_pr(p, r, image_directory, year, master_raster_directory): + '''Downloads p/r corresponding to the location of + the shapefile, and creates master raster''' + suff = str(p) + '_' + str(r) + "_" + str(year) + landsat_dir = os.path.join(image_directory, suff) + satellite = 8 + if year < 2013: + satellite = 7 + if not os.path.isdir(landsat_dir): + os.mkdir(landsat_dir) + ims = download_images(landsat_dir, p, r, year, satellite) + else: + ims = download_images(landsat_dir, p, r, year, satellite) + + ms = create_master_raster(ims, p, r, year, master_raster_directory) + + return ims + +def sample_points_from_shapefile(shapefile_path, instances): + ssp = ShapefileSamplePoints(shapefile_path, m_instances=instances) + ssp.create_sample_points(save_points=True) + return ssp.outfile + +def shapefile_area(shapefile): + summ = 0 + with fopen(shapefile, "r") as src: + for feat in src: + poly = shape(feat['geometry']) + summ += poly.area + return summ + +def get_total_area(data_directory, filenames): + ''' Gets the total area of the polygons + in the files in filenames + TODO: Get an equal-area projection''' + + tot = 0 + for f in glob.glob(data_directory + "*.shp"): + if "sample" not in f: + for f2 in filenames: + if f2 in f: + tot += shapefile_area(f) + return tot + +def required_points(shapefile, total_area, total_instances): + area = shapefile_area(shapefile) + frac = area / total_area + return int(total_instances * frac) + +def split_shapefiles_multiproc(f): + data_directory = 'split_shapefiles_west/' + shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup' + fname = os.path.basename(f) + split_shapefile(shp_dir, fname, data_directory) + +def download_all_images(image_directory, shapefile_directory, year=2013): + ''' Downloads all images over each shapefile in + shapefile directory ''' + template = "{}_{}_{}" + done = set() + satellite = 8 + all_paths = [] + for f in glob.glob(os.path.join(shapefile_directory, "*.shp")): + p, r = get_shapefile_path_row(f) + t = template.format(p, r, year) + if t not in done: + done.add(t) + ims = download_images_over_shapefile(f, image_directory, year, master) + all_paths.append(ims.paths_map) + + with open("path_map.pkl", 'wb') as f: + pickle.dump(all_paths, f) + + return all_paths + + +if __name__ == "__main__": + # out_shapefile_directory = 'shapefile_data' + # shp = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/" + # for f in glob.glob(shp + "*.shp"): + # filter_shapefile(f, out_shapefile_directory) + + image_directory = 'image_data/' + shapefile_directory = 'shapefile_data/all_shapefiles' + master = 'master_rasters/' + year = 2013 + all_paths = download_all_images(image_directory, shapefile_directory, year) diff --git a/fully-conv-classification/runspec.py b/fully-conv-classification/runspec.py new file mode 100644 index 0000000..d18aabf --- /dev/null +++ b/fully-conv-classification/runspec.py @@ -0,0 +1,185 @@ +# ============================================================================================= +# Copyright 2018 dgketchum +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================================= + +import os +import sys +from glob import glob +abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(abspath) + + +def landsat_rasters(): + b = {1: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', 'B7.TIF'), + 2: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', 'B7.TIF'), + 3: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', 'B7.TIF'), + 4: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', 'B7.TIF'), + 5: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', 'B7.TIF'), + + 7: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', + 'B6_VCID_1.TIF', 'B6_VCID_2.TIF', 'B7.TIF', 'B8.TIF'), + + 8: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', + 'B7.TIF', 'B8.TIF', 'B9.TIF', 'B10.TIF', 'B11.TIF')} + return b + +def climate_rasters(root): + return [f for f in glob(os.path.join(root, "*.tif")) if 'precip' in f] + + +def ancillary_rasters(): + a = ('lst.tif', 'ssebop_etrf.tif') + return a + + +def static_rasters(): + a = ('slope.tif', 'aspect.tif', 'elevation_diff.tif') + return a + + +def mask_rasters(): + m = ('cloud_fmask.tif', 'water_fmask.tif') + return m + + +class TrainingAssignments(object): + def __init__(self, root): + self.attribute_list = ['forest', 'fallow', 'irrigated', 'other'] + + self.root = root + self.shapes = None + + self.attributes = {0: {'ltype': 'irrigated', 'path': None}, + + 1: {'ltype': 'dryland', 'path': None}, + + 2: {'ltype': 'forest', 'path': None}, + + 3: {'ltype': 'other', 'path': None}} + + def add_paths(self): + for key, vector in enumerate(self.shapes): + self.attributes[key]['path'] = os.path.join(self.root, vector) + + +class Idaho(TrainingAssignments): + + def __init__(self, root): + TrainingAssignments.__init__(self, root) + + self.shapes = ['ID_2011_Irrigated_WGS84_4030.shp', 'non_irrigated_ESPA_2011_100_200_ac.shp', + 'ID_Public_forest_4030.shp', 'ID_Public_other_4030.shp'] + self.add_paths() + + self.path = 40 + self.row = 30 + self.year = 2011 + self.sat = 5 + + +class Montana(TrainingAssignments): + + def __init__(self, root): + TrainingAssignments.__init__(self, root) + + self.shapes = ['MT_Huntley_Main_2013_3728.shp', 'MT_FLU_2017_Fallow_3728.shp', + 'MT_FLU_2017_Forrest_3728.shp', 'MT_other_3728.shp'] + self.add_paths() + + self.path = 37 + self.row = 28 + self.year = 2013 + self.sat = 8 + + +class Nevada(TrainingAssignments): + + def __init__(self, root): + TrainingAssignments.__init__(self, root) + + self.shapes = ['2015_IRR_ACRE_WGS84.shp', 'NV_fallow.shp', + 'NV_forest.shp', 'NV_other.shp'] + self.add_paths() + + self.path = 41 + self.row = 32 + self.year = 2015 + self.sat = 8 + + +class Oregon(TrainingAssignments): + + def __init__(self, root): + TrainingAssignments.__init__(self, root) + + self.shapes = ['harney_irrigated_2016.shp', 'harney_fallow_2016.shp', + 'OR_forest.shp', 'OR_other.shp'] + self.add_paths() + + self.path = 43 + self.row = 30 + self.year = 2016 + self.sat = 8 + + +class Utah(TrainingAssignments): + + def __init__(self, root): + TrainingAssignments.__init__(self, root) + + self.shapes = ['UT_Irrigated_3732_2014.shp', 'UT_UnirrigatedAg_3732.shp', + 'UT_forest.shp', 'UT_other.shp'] + self.add_paths() + + self.path = 37 + self.row = 32 + self.year = 2014 + self.sat = 8 + + +class Washington(TrainingAssignments): + + def __init__(self, root): + TrainingAssignments.__init__(self, root) + + self.shapes = ['WA_2017_irrigated_4427.shp', 'WA_2017_unirrigated_ag_4427.shp', + 'WA_Forest_Practices_Applications_4427.shp', 'WA_other_4427.shp'] + self.add_paths() + + self.path = 44 + self.row = 27 + self.year = 2017 + self.sat = 8 + + +class MontanaTest(Montana): + def __init__(self): + Montana.__init__(self) + + for code, _dict in self.attributes.items(): + _dict['path'] = _dict['path'].replace(os.path.join('spatial_data', 'MT'), + os.path.join('tests', 'data', 'pixel_extract_test', + )) + _dict['path'] = _dict['path'].replace('.shp', '_clip.shp') + + self.unique_classes = len(self.attributes.keys()) + + self.sample_negative = False + + +if __name__ == '__main__': + pass + +# ========================= EOF ================================================================ diff --git a/fully-conv-classification/shuffle_data.py b/fully-conv-classification/shuffle_data.py new file mode 100644 index 0000000..ff15bcd --- /dev/null +++ b/fully-conv-classification/shuffle_data.py @@ -0,0 +1,57 @@ +import h5py +from collections import defaultdict +import numpy as np + +def one_epoch(filenames, random_indices, class_code, chunk_size=500, n_classes=4): + ''' Filename is the name of the data file, + chunk_size the number of instances that can fit in memory. + ''' + if not isinstance(filenames, list): + filenames = [filenames] + for i in range(0, random_indices.shape[0], chunk_size): + ret = load_sample(filenames, random_indices[i:i+chunk_size]) + yield ret, make_one_hot(np.ones((ret.shape[0]))*class_code, n_classes) + +def make_one_hot(labels, n_classes): + ret = np.zeros((len(labels), n_classes)) + for i, e in enumerate(labels): + ret[i, int(e)] = 1 + return ret + +def load_sample(fnames, random_indices): + ''' Fnames: filenames of all files of class_code class + required_instances: number of instances of training data required ''' + random_indices.sort() + ls = [] + last = 0 + offset = 0 + for f in fnames: + with h5py.File(f, 'r') as hdf5: + for key in hdf5: + if hdf5[key].shape[0]: + last = offset + offset += hdf5[key].shape[0] + indices = random_indices[random_indices < offset] + indices = indices[indices >= last] + try: + ls.append(hdf5[key][indices-last, :, :, :]) + except UnboundLocalError as e: + pass + + flattened = [e for sublist in ls for e in sublist] + return np.asarray(flattened) + + +def get_total_instances(fnames): + total_instances = 0 + num_keys = 0 + for f in fnames: + with h5py.File(f, 'r') as hdf5: + for key in hdf5: + if hdf5[key].shape[0]: + total_instances += hdf5[key].shape[0] + num_keys += 1 + return total_instances, num_keys + +if __name__ == '__main__': + pass From cf11329a32a6bb35420478a67d83e2d7f72d47f7 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 3 Mar 2019 10:22:34 -0700 Subject: [PATCH 43/89] Changing project organization --- pixel_classification/classify.py | 2 +- .../compose_array_single_shapefile.py | 411 ------------------ pixel_classification/data_generators.py | 326 -------------- pixel_classification/data_utils.py | 289 ------------ pixel_classification/evaluate_image.py | 129 ------ pixel_classification/fully_conv.py | 199 --------- pixel_classification/keras_cnn.py | 90 ---- pixel_classification/models.py | 151 ------- pixel_classification/prepare_images.py | 23 - pixel_classification/runner_from_shapefile.py | 111 ----- pixel_classification/shuffle_data.py | 57 --- 11 files changed, 1 insertion(+), 1787 deletions(-) delete mode 100644 pixel_classification/compose_array_single_shapefile.py delete mode 100644 pixel_classification/data_generators.py delete mode 100644 pixel_classification/data_utils.py delete mode 100644 pixel_classification/evaluate_image.py delete mode 100644 pixel_classification/fully_conv.py delete mode 100644 pixel_classification/keras_cnn.py delete mode 100644 pixel_classification/models.py delete mode 100644 pixel_classification/runner_from_shapefile.py delete mode 100644 pixel_classification/shuffle_data.py diff --git a/pixel_classification/classify.py b/pixel_classification/classify.py index 170fd0d..fdb030f 100644 --- a/pixel_classification/classify.py +++ b/pixel_classification/classify.py @@ -20,7 +20,7 @@ from datetime import datetime from multiprocessing import cpu_count -from multiprocess.pool import Pool +from multiprocessing.pool import Pool from numpy.core.multiarray import concatenate sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) diff --git a/pixel_classification/compose_array_single_shapefile.py b/pixel_classification/compose_array_single_shapefile.py deleted file mode 100644 index 4ce57ef..0000000 --- a/pixel_classification/compose_array_single_shapefile.py +++ /dev/null @@ -1,411 +0,0 @@ -# ============================================================================================= -# Copyright 2018 dgketchum -# -# Licensed under the Apache License, Version 2. (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================================= - -import os -import sys - -abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(abspath) -import pickle -from copy import deepcopy -from warnings import warn -from datetime import datetime -from fiona import open as fopen -from numpy import linspace, max, nan, unique, ndarray, swapaxes, zeros, asarray -import h5py -from numpy.random import shuffle -from pandas import DataFrame, Series -import warnings -from pyproj import Proj, transform -from rasterio import open as rasopen -from shapely.geometry import shape, Point, mapping -from shapely.ops import unary_union -from data_utils import get_shapefile_path_row -loc = os.path.dirname(__file__) -WRS_2 = loc.replace('pixel_classification', - os.path.join('spatial_data', 'wrs2_usa_descending.shp')) - -''' -This script contains a class meant to gather data from rasters using a polygon shapefile. -The high-level method `extract_sample` will return an object ready for a -learning algorithm. -''' -loc = os.path.dirname(__file__) -WRS_2 = loc.replace('pixel_classification', - os.path.join('spatial_data', 'wrs2_descending.shp')) - -class NoCoordinateReferenceError(Exception): - pass - - -class UnexpectedCoordinateReferenceSystemError(Exception): - pass - - -class ShapefileSamplePoints: - - def __init__(self, shapefile_path=None, sample_point_directory=None, m_instances=None): - self.outfile = os.path.splitext(shapefile_path)[0] - self.outfile += "_sample_points.shp" - if sample_point_directory: - self.outfile = os.path.join(sample_point_directory, self.outfile) - - self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) - self.m_instances = m_instances - self.object_id = 0 - self.shapefile_path = shapefile_path - self.path, self.row = get_shapefile_path_row(shapefile_path) - - def _random_points(self, coords): - min_x, max_x = coords[0], coords[2] - min_y, max_y = coords[1], coords[3] - x_range = linspace(min_x, max_x, num=2 * self.m_instances) - y_range = linspace(min_y, max_y, num=2 * self.m_instances) - shuffle(x_range), shuffle(y_range) - return x_range, y_range - - def _add_entry(self, coord, val=0): - # TODO: Encode class_code in shapefile schema. - self.extracted_points = self.extracted_points.append({'FID': int(self.object_id), - 'X': coord[0], - 'Y': coord[1], - 'POINT_TYPE': val}, - ignore_index=True) - self.object_id += 1 - - def save_sample_points(self): - - points_schema = { - 'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]), - 'geometry': 'Point'} - meta = self.tile_geometry.copy() - meta['schema'] = points_schema - - with fopen(self.outfile, 'w', **meta) as output: - for index, row in self.extracted_points.iterrows(): - props = dict([('FID', row['FID']), ('POINT_TYPE', row['POINT_TYPE'])]) - pt = Point(row['X'], row['Y']) - output.write({'properties': props, - 'geometry': mapping(pt)}) - return None - - def _get_polygons(self, vector): - with fopen(vector, 'r') as src: - crs = src.crs - if not crs: - raise NoCoordinateReferenceError( - 'Provided shapefile has no reference data.') - if crs['init'] != 'epsg:4326': - raise UnexpectedCoordinateReferenceSystemError( - 'Provided shapefile should be in unprojected (geographic)' - 'coordinate system, i.e., WGS84, EPSG 4326, {} is not'.format( - vector)) - clipped = src.filter(mask=self.tile_bbox) - polys = [] - bad_geo_count = 0 - for feat in clipped: - try: - geo = shape(feat['geometry']) - polys.append(geo) - except AttributeError: - bad_geo_count += 1 - - return polys - - def create_sample_points(self, save_points=True): - """ Create a clipped training set from polygon shapefiles. - - This complicated-looking function finds the wrs_2 descending Landsat tile corresponding - to the path row provided, gets the bounding box and profile (aka meta) from - compose_array.get_tile_geometry, clips the training data to the landsat tile, then perform - s a union to reduce the number of polygon objects. - The dict object this uses has a template in pixel_classification.runspec.py. - Approach is to loop through the polygons, create a random grid of points over the - extent of each polygon, random shuffle order of points, loop over points, check if - point is within polygon, and if within, create a sample point. - - If a relatively simple geometry is available, use create_negative_sample_points(), though if - there are > 10**4 polygons, it will probably hang on unary_union(). """ - - polygons = self._get_polygons(self.shapefile_path) - instance_count = 0 - print("Making sample points. You have {} polygons".format(len(polygons))) - print("N_instances:", self.m_instances) - - if len(polygons) > self.m_instances: - areas = zip(polygons, [x.area for x in polygons]) - srt = sorted(areas, key=lambda x: x[1], reverse=True) - polygons = [x for x, y in srt[:self.m_instances]] - - if not isinstance(polygons, list): - polygons = [polygons] # for the case of a single polygon. - - positive_area = sum([x.area for x in polygons]) # the sum of all - # the areas. - class_count = 0 - - for i, poly in enumerate(polygons): - if class_count >= self.m_instances: - break - fractional_area = poly.area / positive_area # percent of - # total area that this polygon occupies - required_points = max([1, fractional_area * self.m_instances]) # how - # many points overall that are required to evenly - # sample from each polygon, based on area. - poly_pt_ct = 0 - x_range, y_range = self._random_points(poly.bounds) - for coord in zip(x_range, y_range): - if instance_count >= self.m_instances: - break - if Point(coord[0], coord[1]).within(poly): - self._add_entry(coord) - poly_pt_ct += 1 - instance_count += 1 - # print(instance_count) - if poly_pt_ct >= required_points: - break - class_count += poly_pt_ct - - if save_points: - self.save_sample_points() - - @property - def tile_bbox(self): - with fopen(WRS_2, 'r') as wrs: - for feature in wrs: - fp = feature['properties'] - if fp['PATH'] == self.path and fp['ROW'] == self.row: - bbox = feature['geometry'] - return bbox - - def _get_crs(self): - for key, val in self.paths_map.items(): - with rasopen(val, 'r') as src: - crs = src.crs - break - return crs - - @property - def tile_geometry(self): - with fopen(WRS_2, 'r') as wrs: - wrs_meta = wrs.meta.copy() - return wrs_meta - -class PTASingleShapefile: - - def __init__(self, master_raster=None, shapefile_path=None, class_code=None, path=None, - row=None, masked_raster=None, training_directory=None, paths_map=None, masks=None, - instances=None, sz=1000, overwrite_points=None, kernel_size=None, data_filename=None): - self.shapefile_path = shapefile_path - self.path = path - self.object_id = 0 - self.data_filename = data_filename - self.paths_map = paths_map - self.masks = masks - self.row = row - self.training_directory = training_directory - self.overwrite_points=overwrite_points - self.class_code = class_code - self.crs = self._get_crs() - self.m_instances = instances - self.sz = sz - self.master_raster = master_raster - self.data = None - self.kernel_size = kernel_size - self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) - - def extract_sample(self, save_points=True): - # TODO: Pare down this class' methods. - # Because of the large data size, pickling output data - # (and therefore using a one-band at a time extraction approach) - # is not feasible. - - out = os.path.splitext(self.shapefile_path)[0] - out += "_sample_points.shp" - if os.path.isfile(out): - print("sample points already created") - self._populate_array_from_points(out) - else: - print("Sample points not detected at {}".format(out)) - if self.master_raster is not None: - self.training_data_from_master_raster() - else: - self.populate_raster_data_array() - - def _populate_array_from_points(self, fname): - - with fopen(fname, 'r') as src: - for feat in src: - coords = feat['geometry']['coordinates'] - val = feat['properties']['POINT_TYPE'] - self._add_entry(coords, val=val) - - def _dump_data(self, data): - n = "class_{}_train.h5".format(self.class_code) - if self.data_filename is None: - to_save = os.path.join(self.training_directory, n) - else: - to_save = self.data_filename - with h5py.File(to_save, 'a') as f: - pref = os.path.basename(self.shapefile_path) - dset = f.create_dataset("{}_{}".format(pref, - str(datetime.now())), data=data) - - def training_data_from_master_raster(self): - - ofs = self.kernel_size // 2 - sz = self.sz # some heuristic that indicates when I run out of memory - tmp_arr = [] - with rasopen(self.master_raster, 'r') as rsrc: - rass_arr = rsrc.read() - affine = rsrc.transform - - for ind, row in self.extracted_points.iterrows(): - # iterate through extracted points. - if (ind+1) % sz == 0: - print("Writing to disk...") - qq = asarray(tmp_arr) - del tmp_arr - self._dump_data(qq) - del qq - tmp_arr = [] - - x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) - c, r = ~affine * (x, y) - try: - rr = int(r); cc = int(c) - raster_subgrid = rass_arr[:, rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] - tmp_arr.append(raster_subgrid) - - except IndexError as e: - print(e) - - if len(tmp_arr): - print("Writing to disk...") - qq = asarray(tmp_arr) - self._dump_data(qq) - del qq - del tmp_arr - - def populate_raster_data_array(self, save=True): - - for key, val in self.paths_map.items(): - s = self._grid_raster_extract(val, _name=key) - print('Extracting {}'.format(key)) - self.extracted_points = self.extracted_points.join(s, how='outer') - - for key, val in self.masks.items(): - s = self._grid_raster_extract(val, _name=key) - print('Extracting {}'.format(key)) - self.extracted_points = self.extracted_points.join(s, how='outer') - - data_array, targets = self._purge_raster_array() - data = {'df': data_array, - 'features': data_array.columns.values, - 'data': data_array.values, - 'target_values': targets, - 'paths_map': self.paths_map} - - print('feature dimensions: {}'.format(data_array.shape)) - - for key, val in data.items(): - setattr(self, key, val) - - def _purge_raster_array(self): - data_array = deepcopy(self.extracted_points) - target_vals = Series(data_array.POINT_TYPE.values, name='POINT_TYPE') - data_array.drop(['X', 'Y', 'FID', 'POINT_TYPE'], axis=1, inplace=True) - try: - for msk in self.masks.keys(): - for idx, sub_raster in enumerate(data_array[msk]): - if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 1.: - data_array.loc[idx, :] = nan # make whole row NaN - except TypeError as e: - print(sub_raster, msk, idx) - data_array.loc[idx, :] = nan - - try: - for bnd in self.paths_map.keys(): - for idx, sub_raster in enumerate(data_array[bnd]): - if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 0.: - data_array.loc[idx, :] = nan - except TypeError as e: - data_array.loc[idx, :] = nan - - data_array = data_array.join(target_vals, how='outer') - - data_array.dropna(axis=0, inplace=True) - data_array.drop(self.masks, axis=1, inplace=True) - target_vals = data_array.POINT_TYPE.values - - data_array = data_array.drop(['POINT_TYPE'], - axis=1, inplace=False) - return data_array, target_vals - - def _geo_point_to_projected_coords(self, x, y): - - in_crs = Proj(init='epsg:4326') - out_crs = Proj(init=self.crs['init']) - x, y = transform(in_crs, out_crs, x, y) - return x, y - - def _grid_raster_extract(self, raster, _name): - """ - Open the raster. Store the points in a Series - a labeled - numpy array. Then in _purge array, we iterate over the masks - and the paths_map and drop pixels where masks = 1 and pixels where bound = 0. - """ - - with rasopen(raster, 'r') as rsrc: - rass_arr = rsrc.read() - rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) - affine = rsrc.transform - - s = Series(index=range(0, self.extracted_points.shape[0]), name=_name, dtype=object) - for ind, row in self.extracted_points.iterrows(): - x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) - c, r = ~affine * (x, y) - try: - ofs = self.kernel_size // 2 - rr = int(r); cc = int(c) - raster_subgrid = rass_arr[rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] # possible issues: edges of image - s[ind] = raster_subgrid - except IndexError: - s[ind] = None - - return s - - @property - def tile_bbox(self): - with fopen(WRS_2, 'r') as wrs: - for feature in wrs: - fp = feature['properties'] - if fp['PATH'] == self.path and fp['ROW'] == self.row: - bbox = feature['geometry'] - return bbox - - def _get_crs(self): - for key, val in self.paths_map.items(): - with rasopen(val, 'r') as src: - crs = src.crs - break - return crs - - @property - def tile_geometry(self): - with fopen(WRS_2, 'r') as wrs: - wrs_meta = wrs.meta.copy() - return wrs_meta diff --git a/pixel_classification/data_generators.py b/pixel_classification/data_generators.py deleted file mode 100644 index 74a636d..0000000 --- a/pixel_classification/data_generators.py +++ /dev/null @@ -1,326 +0,0 @@ -import numpy as np -import os -import time -import pickle -import sys -import matplotlib.pyplot as plt -from glob import glob -from random import sample, shuffle -from data_utils import generate_class_mask, get_shapefile_path_row -from rasterio import open as rasopen -from warnings import warn - -NO_DATA = -1 -try: - MAX_POOLS = int(os.environ["MAX_POOLS"]) - print("MAX_POOLS", MAX_POOLS) -except: - warn("MAX_POOLS environment variable has not been set. Defaulting to 5.") - MAX_POOLS = 5 -CHUNK_SIZE = 1248 # some value that is evenly divisible by 2^3. -NUM_CLASSES = 2 - -def random_sample(class_mask, n_instances, box_size=0, fill_value=1): - if box_size: - n_instances /= box_size - out = np.where(class_mask != NO_DATA) - class_mask = class_mask.copy() - try: - out_x = out[1] - out_y = out[2] - except IndexError as e: - out_x = out[0] - out_y = out[1] - - indices = np.random.choice(len(out_x), size=n_instances, replace=False) - out_x = out_x[indices] - out_y = out_y[indices] - - try: - class_mask[:, :, :] = NO_DATA - if box_size == 0: - class_mask[0, out_x, out_y] = fill_value - else: - ofs = box_size // 2 - for x, y in zip(out_x, out_y): - class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = fill_value - - except IndexError as e: - class_mask[:, :] = NO_DATA - if box_size == 0: - class_mask[out_x, out_y] = fill_value - else: - ofs = box_size // 2 - for x, y in zip(out_x, out_y): - class_mask[x-ofs:x+ofs, y-ofs:y+ofs] = fill_value - - return class_mask - -def load_raster(master_raster): - with rasopen(master_raster, 'r') as src: - arr = src.read() - meta = src.meta.copy() - return arr, meta - - -def assign_class_code(target_dict, shapefilename): - for key in target_dict: - if key in shapefilename: - return target_dict[key] - print("{} has no known match in target_dict.".format(shapefilename)) - return None - - -class DataMask(object): - - def __init__(self, mask, class_code): - self.mask = mask - self.class_code = class_code - - -class DataTile(object): - - def __init__(self, data, class_mask, class_code): - self.dict = {} - self.dict['data'] = data - self.dict['class_mask'] = class_mask - self.dict['class_code'] = class_code - - def to_pickle(self, training_directory): - if not os.path.isdir(training_directory): - os.mkdir(training_directory) - template = os.path.join(training_directory, - 'class_{}_data/'.format(self.dict['class_code'])) - if not os.path.isdir(template): - os.mkdir(template) - outfile = os.path.join(template, str(time.time()) + ".pkl") - if not os.path.isfile(outfile): - with open(outfile, 'wb') as f: - pickle.dump(self.dict, f, protocol=pickle.HIGHEST_PROTOCOL) - else: - print("What? Contact administrator.") - - def set_data(self, data): - self.dict['data'] = data - - def set_code(self, class_code): - self.dict['class_code'] = class_code - - def set_class_mask(self, class_mask): - self.dict['class_mask'] = class_mask - - -def create_training_data(target_dict, shapefile_directory, image_directory, training_directory, - count): - ''' target_dict: {filename or string in filename : class_code} ''' - done = set() - year = 2013 - train_raster = 'master_raster_' - mask_raster = 'class_mask_' - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if f not in done: - all_matches = all_matching_shapefiles(f, shapefile_directory) - done.add(f) - for match in all_matches: - done.add(match) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - master_raster = os.path.join(image_directory, train_raster + suffix) - mask_file = os.path.join(image_directory, mask_raster + suffix) # for rasterio.mask.mask - # this file is projected the same as the shapefile. - master, meta = load_raster(master_raster) - masks = [] - all_matches.append(f) - for match in all_matches: - msk = generate_class_mask(match, mask_file) - cc = assign_class_code(target_dict, match) - if cc is not None: - dm = DataMask(msk, cc) - masks.append(dm) - - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - q = 0 - for msk in masks: - s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - if not np.all(s == NO_DATA): - q += 1 - dt = DataTile(sub_master, s, msk.class_code) - dt.to_pickle(training_directory) - count += 1 - - return count - -def all_matching_shapefiles(to_match, shapefile_directory): - out = [] - pr = get_shapefile_path_row(to_match) - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if get_shapefile_path_row(f) == pr and to_match not in f: - out.append(f) - return out - - -class DataGen: - - def __init__(self, class_filename): - self.file_list = None - self.class_filename = class_filename - self._get_files() - self.n_files = len(self.file_list) - self.first = True - self.idx = 0 - - def _get_files(self): - self.file_list = [x[2] for x in os.walk(self.class_filename)][0] - self.file_list = [os.path.join(self.class_filename, x) for x in self.file_list] - - def next(self): - if self.idx == self.n_files: - self.first = True - if self.first: - self.first = False - self.idx = 0 - self.shuffled = sample(self.file_list, self.n_files) - out = self.shuffled[self.idx] - self.idx += 1 - else: - out = self.shuffled[self.idx] - self.idx += 1 - return self._from_pickle(out) - - def _from_pickle(self, filename): - with open(filename, 'rb') as f: - data = pickle.load(f) - return data - - -def generate_training_data(training_directory, box_size=0): - ''' Assumes data is stored in training_directory - in subdirectories labeled class_n_train - and that n_classes is a global variable.''' - class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] - generators = [] - for d in class_dirs: - generators.append(DataGen(d)) - # I want to - # a. shuffle the filenames to draw. - # b. Define one epoch to be when we've iterated over all - # examples of the class with the most training examples. - # TODO: Apply image augmentation. - while True: - min_samples = np.inf - data = [] - for gen in generators: - out = gen.next().copy() - data.append(out) - n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) - if n_samples < min_samples: - min_samples = n_samples - for subset in data: - samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, - fill_value=1) - one_hot = np.ones((NUM_CLASSES, samp.shape[1], samp.shape[2]))*NO_DATA - one_hot[int(subset['class_code']), :, :] = samp - for i in range(NUM_CLASSES): - if i != int(subset['class_code']): - one_hot[i, :, :][samp[0, :, :] != NO_DATA] = 0 - subset['class_mask'] = one_hot - - masters = [] - masks = [] - for subset in data: - master, mask = preprocess_data(subset['data'], subset['class_mask']) - masters.append(master) - masks.append(mask) - - # Shuffle order of data here? - for ms, msk in zip(masters, masks): - msk = msk.astype(np.int32) - yield ms, msk - - -def rotation(image, angle): - return transform.rotate(image, angle, mode='constant', cval=NO_DATA) - -def random_noise(image): - return util.random_noise(image) - -def h_flip(image): - return image[:, ::-1] - - -def augment_data(image, class_mask): - '''Randomly augments an image.''' - if np.random.randint(2): - deg = np.random.uniform(-25, 25) - image = rotation(image, deg) - class_mask = rotation(class_mask, deg) - if np.random.randint(2): - image = h_flip(image) - class_mask = h_flip(class_mask) - if np.random.randint(2): - image = np.flipud(image) - class_mask = np.flipud(class_mask) - return image, class_mask - - -def preprocess_data(master, mask, return_cuts=False): - shp = master.shape - rows = shp[1]; cols = shp[2] - if MAX_POOLS != 0: - cut_rows = rows % (2**MAX_POOLS) - cut_cols = cols % (2**MAX_POOLS) - else: - cut_rows = 0 - cut_cols = 0 - out_m = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - - if cut_cols != 0 and cut_rows != 0: - out_m[0, :, :, :] = master[:, :-cut_rows, :-cut_cols] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :-cut_rows, :-cut_cols] - elif cut_cols == 0 and cut_rows != 0: - out_m[0, :, :, :] = master[:, :-cut_rows, :] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :-cut_rows, :] - elif cut_cols != 0 and cut_rows == 0: - out_m[0, :, :, :] = master[:, :, :-cut_cols] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :, :-cut_cols] - else: - out_m[0, :, :, :] = master[:, :, :] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :, :] - - out_m = np.swapaxes(out_m, 1, 3) - out_mask = np.swapaxes(out_mask, 1, 3) - if return_cuts: - return out_m, out_mask, cut_rows, cut_cols - - return out_m, out_mask - -if __name__ == '__main__': - shapefile_directory = 'shapefile_data/' - image_directory = 'master_rasters/' - irr1 = 'Huntley' - irr2 = 'Sun_River' - fallow = 'Fallow' - forest = 'Forrest' - other = 'other' - target_dict = {irr2:0, irr1:0, fallow:1, forest:1, other:1} - year = 2013 - train_dir = 'training_data/binary/train/' - shp_train = 'shapefile_data/train/' - count = 0 - count = create_training_data(target_dict, shp_train, image_directory, train_dir, count) - print("You have {} instances per training epoch.".format(count)) - test_dir = 'training_data/binary/test/' - shp_test = 'shapefile_data/test/' - count = 0 - count = create_training_data(target_dict, shp_test, image_directory, test_dir, count) - print("You have {} instances per test epoch.".format(count)) diff --git a/pixel_classification/data_utils.py b/pixel_classification/data_utils.py deleted file mode 100644 index eb6e788..0000000 --- a/pixel_classification/data_utils.py +++ /dev/null @@ -1,289 +0,0 @@ -import glob -import os -import geopandas as gpd -import json -from fiona import open as fopen -from lxml import html -from requests import get -from copy import deepcopy -from numpy import zeros, asarray, array, reshape, nan -from shapely.geometry import shape -from collections import defaultdict -from rasterio import float32, open as rasopen -from rasterio.mask import mask -from prepare_images import ImageStack -from sklearn.neighbors import KDTree -from sat_image.warped_vrt import warp_single_image - -NO_DATA = -1 - -def get_features(gdf): - tmp = json.loads(gdf.to_json()) - features = [feature['geometry'] for feature in tmp['features']] - return features - -def generate_class_mask(shapefile, master_raster): - ''' Generates a mask with class_val everywhere - shapefile data is present and a no_data value everywhere else. - no_data is -1 in this case, as it is never a valid class label. - Switching coordinate reference systems is important here, or - else the masking won't work. - ''' - shp = gpd.read_file(shapefile) - with rasopen(master_raster, 'r') as src: - shp = shp.to_crs(src.crs) - features = get_features(shp) - out_image, out_transform = mask(src, shapes=features, nodata=NO_DATA) - return out_image - - -def create_master_raster(image_stack, path, row, year, raster_directory): - fname = "master_raster_{}_{}_{}.tif".format(path, row, year) - pth = os.path.join(raster_directory, fname) - mask_fname = "class_mask_{}_{}_{}.tif".format(path, row, year) - mask_path = os.path.join(raster_directory, mask_fname) - if os.path.isfile(pth): - print("Master raster already created for {}_{}_{}.".format(path, row, year)) - if os.path.isfile(mask_path): - print('Class mask template already created') - return pth - else: - print("Creating class mask template.") - with rasopen(pth, 'r') as src: - meta = src.meta.copy() - h = meta['height'] - w = meta['width'] - - meta.update(count=1, dtype=float32) - - with rasopen(mask_path, 'w', **meta) as msk: - out = zeros((h, w)).astype(float32) - msk.write(out, 1) - return pth - - paths_map = image_stack.paths_map - first = True - stack = None - - for i, feat in enumerate(paths_map.keys()): # ordered dict ensures accuracy here. - - feature_raster = paths_map[feat] # maps bands to their location in filesystem. - - with rasopen(feature_raster, mode='r') as src: - arr = src.read() - raster_geo = src.meta.copy() - - if first: - first_geo = raster_geo.copy() - empty = zeros((len(paths_map.keys()), arr.shape[1], arr.shape[2]), float32) - stack = empty - stack[i, :, :] = arr - first = False - else: - try: - stack[i, :, :] = arr - except ValueError: - # error can be thrown here if source raster doesn't have crs - # OR ! Because rasterio version. - # However, deepcopy becomes an issue with the latest - # version of rasterio. - arr = warp_single_image(feature_raster, first_geo) - stack[i, :, :] = arr - - first_geo.update(count=1) - msk_out = zeros((1, stack.shape[1], stack.shape[2])).astype(float32) - with rasopen(mask_path, mode='w', **first_geo) as msk: - msk.write(msk_out) - - first_geo.update(count=len(paths_map.keys())) - - with rasopen(pth, mode='w', **first_geo) as dst: - dst.write(stack) - - return pth - -def get_shapefile_lat_lon(shapefile): - ''' Center of shapefile''' - with fopen(shapefile, "r") as src: - minx, miny, maxx, maxy = src.bounds - latc = (maxy + miny) / 2 - lonc = (maxx + minx) / 2 - - return latc, lonc - -def download_images(project_directory, path, row, year, satellite=8, n_landsat=3): - - image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, - max_cloud_pct=70, n_landsat=n_landsat, year=year) - - image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is - # a cloud mask. - return image_stack - -def construct_kdtree(wrs2): - centroids = [] - path_rows = [] # a mapping - features = [] - for feature in wrs2: - tile = shape(feature['geometry']) - centroid = tile.centroid.coords[0] - centroids.append([centroid[0], centroid[1]]) - z = feature['properties'] - p = z['PATH'] - r = z['ROW'] - path_rows.append(str(p) + "_" + str(r)) - features.append(feature) - - tree = KDTree(asarray(centroids)) - return tree, asarray(path_rows), asarray(features) - -def get_pr(poly, wrs2): - ls = [] - for feature in wrs2: - tile = shape(feature['geometry']) - if poly.within(tile): - z = feature['properties'] - p = z['PATH'] - r = z['ROW'] - ls.append(str(p) + "_" + str(r)) - return ls - -def get_pr_subset(poly, tiles): - ''' Use when you only want to iterate - over a subset of wrs2 tiles.''' - ls = [] - for feature in tiles: - tile = shape(feature['geometry']) - if poly.within(tile): - z = feature['properties'] - p = z['PATH'] - r = z['ROW'] - ls.append(str(p) + "_" + str(r)) - return ls - -def filter_shapefile(shapefile, out_directory): - """ Shapefiles may span multiple path/rows. - For training, we want all of the data available. - This function filters the polygons contained in - the shapefile into separate files for each path/row - contained in the shapefile. """ - path_row_map = defaultdict(list) - wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') - tree, path_rows, features = construct_kdtree(wrs2) - wrs2.close() - - cent_arr = array([0, 0]) - with fopen(shapefile, "r") as src: - meta = deepcopy(src.meta) - for feat in src: - poly = shape(feat['geometry']) - centroid = poly.centroid.coords[0] - cent_arr[0] = centroid[0] - cent_arr[1] = centroid[1] - centroid = cent_arr.reshape(1, -1) - dist, ind = tree.query(centroid, k=10) - tiles = features[ind[0]] - prs = get_pr_subset(poly, tiles) - for p in prs: - path_row_map[p].append(feat) - - outfile = os.path.basename(shapefile) - outfile = os.path.splitext(outfile)[0] - - for path_row in path_row_map: - out = outfile + path_row + ".shp" - with fopen(os.path.join(out_directory, out), 'w', **meta) as dst: - print("Saving {}".format(out)) - for feat in path_row_map[path_row]: - dst.write(feat) - - -def split_shapefile(base, base_shapefile, data_directory): - """ - Shapefiles may deal with data over multiple path/rows. - This is a method to get the minimum number of - path/rows required to cover all features. - Data directory: where the split shapefiles will be saved. - base: directory containing base_shapefile.""" - path_row = defaultdict(list) - id_mapping = {} - # TODO: un hardcode this directory. - wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') - tree, path_rows, features = construct_kdtree(wrs2) - wrs2.close() - - cent_arr = array([0, 0]) - with fopen(os.path.join(base, base_shapefile), "r") as src: - meta = deepcopy(src.meta) - for feat in src: - idd = feat['id'] - id_mapping[idd] = feat - poly = shape(feat['geometry']) - centroid = poly.centroid.coords[0] - cent_arr[0] = centroid[0] - cent_arr[1] = centroid[1] - centroid = cent_arr.reshape(1, -1) - dist, ind = tree.query(centroid, k=10) - tiles = features[ind[0]] - prs = get_pr_subset(poly, tiles) - for p in prs: - path_row[p].append(idd) - - non_unique_ids = defaultdict(list) - unique = defaultdict(list) - for key in path_row: - ls = path_row[key] # all features in a given path/row - placeholder = ls.copy() - for key1 in path_row: - if key != key1: - ls1 = path_row[key1] - # find unique keys in ls - placeholder = set(placeholder) - set(ls1) #all - # features present in placeholder that are not - # present in ls1; i.e. unique keys - unique[key] = list(placeholder) - if len(ls) != len(placeholder): - nu = set(ls) - set(placeholder) # all features present in ls that are not present in placeholder (non-unique) - for idd in list(nu): - non_unique_ids[idd].append(key) - - match_key = [] - for key in non_unique_ids: # unique ids - pr = None - hi = 0 - for pathrow in non_unique_ids[key]: # path/rows corresponding to non - # unique features - if len(unique[pathrow]) > hi: - pr = pathrow - hi = len(unique[pathrow]) - - if pr is not None: - unique[pr].append(key) - else: - choice = non_unique_ids[key] - choice.sort() - choice = choice[0] - unique[choice].append(key) - - prefix = os.path.splitext(base_shapefile)[0] - for key in unique: - if key is None: - continue - out = prefix + "_" + key + ".shp" - if len(unique[key]): - with fopen(os.path.join(data_directory, out), 'w', **meta) as dst: - print("Saving split shapefile to: {}".format(os.path.join(data_directory, out))) - for feat in unique[key]: - dst.write(id_mapping[feat]) - -def get_shapefile_path_row(shapefile): - """This function assumes that the original - shapefile has already been split, and relies on - the naming convention to get the path and row. """ - # strip extension - # TODO: Find some way to update shapefile metadata - shp = shapefile[-9:-4].split("_") - return int(shp[0]), int(shp[1]) - -if __name__ == "__main__": - pass diff --git a/pixel_classification/evaluate_image.py b/pixel_classification/evaluate_image.py deleted file mode 100644 index dc89e3b..0000000 --- a/pixel_classification/evaluate_image.py +++ /dev/null @@ -1,129 +0,0 @@ -import os -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' -from sys import stdout -#os.environ["CUDA_VISIBLE_DEVICES"]="-1" -import numpy as np -import multiprocessing -multiprocessing.set_start_method('spawn', force=True) -from rasterio import open as rasopen -from glob import glob -import numpy.ma as ma -import tensorflow as tf -from tensorflow.keras.models import load_model - -def get_weights(path): - model = tf.keras.models.load_model(path) - return model.weights - -def sub_img_list(im, kernel_size): - ofs = kernel_size // 2 - ls = [] - for i in range(kernel_size, im.shape[1]): - sub_imgs = np.zeros((im.shape[2]-kernel_size, 36, kernel_size, kernel_size)) - k = 0 - for j in range(kernel_size, im.shape[2]): - sub_img = im[:, i-kernel_size:i, j-kernel_size:j] - sub_imgs[k, :, :, :] = sub_img - k += 1 - - ls.append(sub_imgs) - if i % 2 == 0: - yield ls - -class Result: - - def __init__(self, data, idx): - self.data = data - self.idx = idx - -def write_raster(data, name, raster_geo): - - raster_geo['dtype'] = data.dtype - raster_geo['count'] = 1 - with rasopen(name, 'w', **raster_geo) as dst: - dst.write(data) - return None - -def split_image(image, kernel_size): - num_rows = image.shape[1] // os.cpu_count() - leftover = image.shape[1] % os.cpu_count() - ids = [] - arrs = [] - j = 0 - for idx, i in enumerate(range(kernel_size, image.shape[1], num_rows)): - arrs.append(image[:, i-kernel_size:i+num_rows+kernel_size:, :]) - ids.append(j) - j += 1 - - arrs.append(image[ :, image.shape[1]-leftover-kernel_size:, :]) - ids.append(j) - return arrs, ids - -def pool_job(path, image, ids): - model = Network(path) - while True: - eval_image(image, model, ids) - queue.put(os.getpid()) - -def is_target(f, targets): - - for ff in targets: - if ff in f: - return True - return False - -def get_prev_mask(target): - - for f in glob('evaluated_images/' + "*.npy"): - if target in f and 'running' in f: - return f - return None - -def eval_image(im, msk, idd): - model_path = 'models/model_kernel_41' - model = load_model(model_path) - kernel_size = 41 - mask = np.zeros((im.shape[1], im.shape[2])) - if msk is not None: - msk = np.load(msk) - mask[:msk.shape[0], :] = msk - begin = msk.shape[0] - del msk - else: - begin = kernel_size - ofs = kernel_size // 2 - for i in range(begin, im.shape[1]): - sub_imgs = np.zeros((im.shape[2]-kernel_size, 36, kernel_size, kernel_size)) - k = 0 - for j in range(kernel_size, im.shape[2]): - sub_img = im[:, i-kernel_size:i, j-kernel_size:j] - sub_imgs[k, :, :, :] = sub_img - k += 1 - - result = model.predict(sub_imgs) - result = np.argmax(result, axis=1) - mask[i-ofs, kernel_size - ofs: -(kernel_size-ofs-1)] = result - if i % 100 == 0: - np.save("evaluated_images/{}_running_eval".format(idd), mask[:i, :]) - stdout.write("\r{:.5f}".format(float(i)/im.shape[1])) - - np.save("evaluated_images/eval_{}".format(idd), mask) - return Result(mask, idd) - - -if __name__ == '__main__': - - path = 'models/model_kernel_41' - targets = ['38_27_2013', '40_26_2013', '40_27_2013', '39_27_2013', - '39_26_2013'] - i = 0 - kernel_size = 41 - for f in glob("master_rasters/to_eval/" + "*.tif"): - stdout.write("\rEvaluating image {}\n".format(f)) - with rasopen(f, 'r') as src: - raster_geo = src.meta.copy() - im = src.read() - eval_image(im, None, os.path.basename(f)) - - - diff --git a/pixel_classification/fully_conv.py b/pixel_classification/fully_conv.py deleted file mode 100644 index b796633..0000000 --- a/pixel_classification/fully_conv.py +++ /dev/null @@ -1,199 +0,0 @@ -import os -#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' -#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' -import tensorflow as tf -import keras.backend as K -#tf.enable_eager_execution() -import matplotlib.pyplot as plt -import numpy as np -import json -import geopandas as gpd -import sys -from glob import glob -from skimage import transform, util -from tensorflow.keras.callbacks import TensorBoard -from rasterio import open as rasopen -from rasterio.mask import mask -from shapely.geometry import shape -from fiona import open as fopen -from data_generators import generate_training_data, load_raster, preprocess_data -from models import fcnn_functional - -NO_DATA = -1 -MAX_POOLS = 5 -CHUNK_SIZE = 1248 # some value that is divisible by 2^3. -NUM_CLASSES = 4 -WRS2 = '../spatial_data/wrs2_descending_usa.shp' - -def m_acc(y_true, y_pred): - ''' Calculate accuracy from masked data. - The built-in accuracy metric uses all data (masked & unmasked).''' - y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) - y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) - masked = tf.not_equal(y_true, NO_DATA) - indices = tf.where(masked) - indices = tf.to_int32(indices) - indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) - y_true_masked = tf.gather_nd(params=y_true, indices=indices) - y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) - return K.cast(K.equal(K.argmax(y_true_masked, axis=-1), K.argmax(y_pred_masked, axis=-1)), K.floatx()) - -def custom_objective(y_true, y_pred): - '''I want to mask all values that - are not data, given a y_true - that has NODATA values. The boolean mask - operation is failing. It should output - a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) - tensor.''' - # Dice coefficient? - y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) - y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) - masked = tf.not_equal(y_true, NO_DATA) - indices = tf.where(masked) - indices = tf.to_int32(indices) - indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) - y_true_masked = tf.gather_nd(params=y_true, indices=indices) - y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) - return tf.keras.losses.categorical_crossentropy(y_true_masked, y_pred_masked) - -def evaluate_image(master_raster, model, outfile=None, ii=None): - - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive handling of this case. - else: - master, meta = load_raster(master_raster) - class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder - out = np.zeros((master.shape[1], master.shape[2], NUM_CLASSES)) - - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, return_cuts=True) - preds = model.predict(sub_master) - preds = preds[0, :, :, :] - - if cut_cols == 0 and cut_rows == 0: - out[:,j:j+CHUNK_SIZE, i:i+CHUNK_SIZE] = preds - elif cut_cols == 0 and cut_rows != 0: - ofs = master.shape[1]-cut_rows - out[:, j:j+CHUNK_SIZE, i:ofs] = preds - elif cut_cols != 0 and cut_rows == 0: - ofs = master.shape[2]-cut_cols - out[:, j:ofs, i:i+CHUNK_SIZE] = preds - elif cut_cols != 0 and cut_rows != 0: - ofs_col = master.shape[2]-cut_cols - ofs_row = master.shape[1]-cut_rows - out[:, j:ofs_col, i:ofs_row] = preds - else: - print("whatcha got goin on here?") - - sys.stdout.write("N eval: {}. Percent done: {:.4f}\r".format(ii, i / master.shape[1])) - - out = np.swapaxes(out, 0, 2) - out = out.astype(np.float32) - if outfile: - save_raster(out, outfile, meta) - return out - -def save_raster(arr, outfile, meta): - meta.update(count=NUM_CLASSES) - with rasopen(outfile, 'w', **meta) as dst: - dst.write(arr) - -def train_model(training_directory, steps_per_epoch, box_size=0, epochs=3): - # image shape will change here, so it must be - # inferred at runtime. - ''' This function assumes that train/test data are - subdirectories of training_directory, with - the names train/test.''' - model = create_model(NUM_CLASSES) - tb = TensorBoard(log_dir='graphs/') - train = os.path.join(training_directory, 'train') - test = os.path.join(training_directory, 'test') - train_generator = generate_training_data(train, box_size) - test_generator = generate_training_data(test, box_size) - model.fit_generator(train_generator, - validation_data=test_generator, - validation_steps=31, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - verbose=1, - callbacks=[tb], - use_multiprocessing=True) - return model - - -def create_model(n_classes): - model = fcnn_functional(n_classes) - model.compile(loss=custom_objective, - metrics=[m_acc], - optimizer='adam') - return model - - -def get_features(gdf, path, row): - tmp = json.loads(gdf.to_json()) - features = [] - for feature in tmp['features']: - if feature['properties']['PATH'] == path and feature['properties']['ROW'] == row: - features.append(feature['geometry']) - return features - -def clip_raster(evaluated, path, row, outfile=None): - - shp = gpd.read_file(WRS2) - - with rasopen(evaluated, 'r') as src: - shp = shp.to_crs(src.crs) - meta = src.meta.copy() - features = get_features(shp, path, row) - out_image, out_transform = mask(src, shapes=features, nodata=np.nan) - - #out_image[out_image != 0] = np.nan - if outfile: - save_raster(out_image, outfile, meta) - -def clip_rasters(evaluated_tif_dir, include_string): - for f in glob(os.path.join(evaluated_tif_dir, "*.tif")): - if include_string in f: - out = os.path.basename(f) - os.path.split(out)[1] - out = out[out.find("_")+1:] - #out = out[out.find("_")+1:] - path = out[:2] - row = out[3:5] - clip_raster(f, int(path), int(row), outfile=f) - -# TODO: Implement IoU so I can actually see how my model is doing. - -if __name__ == '__main__': - - shapefile_directory = 'shapefile_data/backup' - image_directory = 'master_rasters/' - training_directory = 'training_data' - - m_dir = 'eval_test' - pth = os.path.join(m_dir, "model_acctst.h5") - if not os.path.isfile(pth): - model = train_model(training_directory, 109, epochs=2) - model.save(pth) - else: - model = tf.keras.models.load_model(pth, - custom_objects={'custom_objective':custom_objective}) - ii = 0 - for f in glob(os.path.join(image_directory, "*.tif")): - if "class" not in f and "37_28" in f: - out = os.path.basename(f) - os.path.split(out)[1] - out = out[out.find("_")+1:] - out = out[out.find("_"):] - out = os.path.splitext(out)[0] - out = 'complexfcnn_multiclass' + out + ".tif" - #out = 'testing' + out + ".tif" - out = os.path.join(m_dir, out) - ii += 1 - evaluate_image(f, model, outfile=out, ii=ii) - clip_rasters(m_dir, "37_28") - diff --git a/pixel_classification/keras_cnn.py b/pixel_classification/keras_cnn.py deleted file mode 100644 index cb33b0b..0000000 --- a/pixel_classification/keras_cnn.py +++ /dev/null @@ -1,90 +0,0 @@ -import h5py -import os -from glob import glob -import tensorflow as tf -from sklearn.model_selection import train_test_split -from tensorflow.keras.callbacks import TensorBoard -import numpy as np -from shuffle_data import one_epoch - -def keras_model(kernel_size, n_classes): - model = tf.keras.Sequential() - # Must define the input shape in the first layer of the neural network - model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', - input_shape=(36, kernel_size, kernel_size))) - model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) - model.add(tf.keras.layers.Dropout(0.3)) - model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')) - model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) - model.add(tf.keras.layers.Dropout(0.3)) - model.add(tf.keras.layers.Flatten()) - model.add(tf.keras.layers.Dense(256, activation='relu')) - model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Dense(n_classes, activation='softmax')) - # Take a look at the model summary - model.summary() - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - return model - -def train_next_batch(model, features, labels, n_classes=4, epochs=5, batch_size=128): - - # shuffle the labels again - - tb = TensorBoard(log_dir='graphs/cnn/') - x_train, x_test, y_train, y_test = train_test_split(features, labels, - test_size=0.01, random_state=42) - model.fit(x_train, - y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - callbacks=[tb]) - return model - - -def evaluate_model(features, labels): - score = model.evaluate(features, labels, verbose=0) - print('\n', 'Test accuracy:', score[1], '\n') - -def make_one_hot(labels, n_classes): - ret = np.zeros((len(labels), n_classes)) - for i, e in enumerate(labels): - ret[i, int(e)] = 1 - return ret - -def get_next_batch(file_map, n_classes=4): - features, labels = next_batch(file_map) - labels = make_one_hot(labels, n_classes) - return features, labels - -def is_it(f, targets): - for e in targets: - if e in f and 'sample' not in f: - return True - return False - -def fnames(class_code): - return "training_data/class_{}_train.h5".format(class_code) - -# Yield the concatenated training array? - -if __name__ == '__main__': - train_dir = 'training_data/' - model_dir = 'models/' - n_epochs = 1 - kernel_size = 41 - model_name = 'model_kernel_{}'.format(kernel_size) - total_instances = 100000 - - model_path = os.path.join(model_dir, model_name) - model = keras_model(41, 2) - model = tf.keras.models.load_model(model_path) - features = np.zeros((128, 36, 41, 41)) - labels = np.zeros((128, 4)) - train_next_batch(model, features, labels) - if not os.path.isfile(model_path): - model.save(model_path) - - diff --git a/pixel_classification/models.py b/pixel_classification/models.py deleted file mode 100644 index d3df459..0000000 --- a/pixel_classification/models.py +++ /dev/null @@ -1,151 +0,0 @@ -import os -os.environ['KERAS_BACKEND'] = 'tensorflow' -import keras.backend as K -import tensorflow as tf -from tensorflow.keras.models import Model -from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, Dropout, UpSampling2D) - -def fcnn_model(n_classes): - model = tf.keras.Sequential() - # Must define the input shape in the first layer of the neural network - model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=8, padding='same', activation='relu', - input_shape=(None, None, 36))) - model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=4, padding='same', activation='relu')) - model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=4, padding='same', activation='relu')) - model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) - model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Conv2D(filters=n_classes, kernel_size=2, padding='same', - activation='softmax')) # 1x1 convolutions for pixel-wise prediciton. - # Take a look at the model summary - #model.summary() - return model - -def fcnn_functional_small(n_classes): - x = Input((None, None, 36)) - - c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x) - c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(c1) - mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) - - c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(mp1) - c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(c2) - mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - mp2 = Dropout(0.5)(mp2) - - c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(mp2) - c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(c3) - mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) - - last_conv = Conv2D(filters=256, kernel_size=(3,3), activation='relu', padding='same')(mp3) - - u1 = UpSampling2D(size=(2, 2))(last_conv) - u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) - u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) - - u1_c3 = Concatenate()([c3, u1]) - - u2 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1_c3) - u2 = UpSampling2D(size=(2, 2))(u2) - u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Dropout(0.5)(u2) - - u2_c2 = Concatenate()([u2, c2]) - u2_c2 = Dropout(0.5)(u2_c2) - - c4 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u2_c2) - u3 = UpSampling2D(size=(2, 2))(c4) - u3 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u3) - - u3_c1 = Concatenate()([u3, c1]) - - c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u3_c1) - - model = Model(inputs=x, outputs=c5) - #model.summary() - return model - - -def fcnn_functional(n_classes): - - x = Input((None, None, 36)) - base = 2 - exp = 4 - - c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(x) - c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c1) - mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) - - exp+=1 - - c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp1) - c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c2) - mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - mp2 = Dropout(0.5)(mp2) - - exp+=1 - - c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp2) - c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c3) - mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) - - exp+=1 - - c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp3) - c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c4) - mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) - - exp+=1 - - c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp4) - c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c5) - mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c5) - - last_conv = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp4) - - u1 = UpSampling2D(size=(2, 2))(last_conv) - u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) - u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) - - exp-=1 - - u1_c5 = Concatenate()([c5, u1]) - - u2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1_c5) - u2 = UpSampling2D(size=(2, 2))(u2) - u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Dropout(0.5)(u2) - - u2_c4 = Concatenate()([u2, c4]) - - exp-=1 - - u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u2_c4) - u3 = UpSampling2D(size=(2, 2))(u3) - u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3) - - u3_c3 = Concatenate()([u3, c3]) - - exp-=1 - - u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3_c3) - u4 = UpSampling2D(size=(2, 2))(u4) - u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4) - - u4_c2 = Concatenate()([u4, c2]) - - exp-=1 - - u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4_c2) - u5 = UpSampling2D(size=(2, 2))(u5) - u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) - u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='softmax', padding='same')(u5) - - u5_c1 = Concatenate()([u5, c1]) - - u6 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u5_c1) - - model = Model(inputs=x, outputs=u6) - #model.summary() - return model diff --git a/pixel_classification/prepare_images.py b/pixel_classification/prepare_images.py index 3ccbe2f..dba7c95 100644 --- a/pixel_classification/prepare_images.py +++ b/pixel_classification/prepare_images.py @@ -290,7 +290,6 @@ def _order_images(self): bands.sort() for p in bands: band_dct[os.path.basename(p).split('.')[0]] = p - self._normalize_and_save_image(p) masks = [os.path.join(self.root, sc, x) for x in paths if x.endswith(mask_rasters())] for m in masks: @@ -301,31 +300,9 @@ def _order_images(self): static_files = [x for x in files if x.endswith(static_rasters())] for st in static_files: band_dct[os.path.basename(st).split('.')[0]] = os.path.join(self.root, st) - self._normalize_and_save_image(os.path.join(self.root, st)) return band_dct, mask_dct - @staticmethod - def _normalize_and_save_image(fname): - norm = True - with rasopen(fname, 'r') as rsrc: - if "normalized" in rsrc.tags(): - return - else: - rass_arr = rsrc.read() - rass_arr = rass_arr.astype(float32) - profile = rsrc.profile.copy() - profile.update(dtype=float32) - rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) - scaler = StandardScaler() # z-normalization - scaler.fit(rass_arr) - rass_arr = scaler.transform(rass_arr) - with rasopen(fname, 'w', **profile) as dst: - dst.write(rass_arr, 1) - print("Normalized", fname) - dst.update_tags(normalized=True) - - if __name__ == '__main__': pass diff --git a/pixel_classification/runner_from_shapefile.py b/pixel_classification/runner_from_shapefile.py deleted file mode 100644 index cde589c..0000000 --- a/pixel_classification/runner_from_shapefile.py +++ /dev/null @@ -1,111 +0,0 @@ -import warnings -import os -import glob -from multiprocessing import Pool -from numpy import save as nsave -from compose_array_single_shapefile import PTASingleShapefile, ShapefileSamplePoints -from fiona import open as fopen -from shapely.geometry import shape -from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster, filter_shapefile - - -def download_images_over_shapefile(shapefile, image_directory, year, master_raster_directory): - '''Downloads p/r corresponding to the location of - the shapefile, and creates master raster. - Image_directory: where to save the raw images. - mr_directory: " " master_rasters.''' - p, r = get_shapefile_path_row(shapefile) - suff = str(p) + '_' + str(r) + "_" + str(year) - landsat_dir = os.path.join(image_directory, suff) - satellite = 8 - if year < 2013: - satellite = 7 - if not os.path.isdir(landsat_dir): - os.mkdir(landsat_dir) - ims = download_images(landsat_dir, p, r, year, satellite) - else: - ims = download_images(landsat_dir, p, r, year, satellite) - - ms = create_master_raster(ims, p, r, year, master_raster_directory) - - return ims - -def download_from_pr(p, r, image_directory, year, master_raster_directory): - '''Downloads p/r corresponding to the location of - the shapefile, and creates master raster''' - suff = str(p) + '_' + str(r) + "_" + str(year) - landsat_dir = os.path.join(image_directory, suff) - satellite = 8 - if year < 2013: - satellite = 7 - if not os.path.isdir(landsat_dir): - os.mkdir(landsat_dir) - ims = download_images(landsat_dir, p, r, year, satellite) - else: - ims = download_images(landsat_dir, p, r, year, satellite) - - ms = create_master_raster(ims, p, r, year, master_raster_directory) - - return ims - -def sample_points_from_shapefile(shapefile_path, instances): - '''Hopefully this can be nicely parallelized.''' - ssp = ShapefileSamplePoints(shapefile_path, m_instances=instances) - ssp.create_sample_points(save_points=True) - return ssp.outfile - -def shapefile_area(shapefile): - summ = 0 - with fopen(shapefile, "r") as src: - for feat in src: - poly = shape(feat['geometry']) - summ += poly.area - return summ - -def get_total_area(data_directory, filenames): - ''' Gets the total area of the polygons - in the files in filenames - TODO: Get an equal-area projection''' - - tot = 0 - for f in glob.glob(data_directory + "*.shp"): - if "sample" not in f: - for f2 in filenames: - if f2 in f: - tot += shapefile_area(f) - return tot - -def required_points(shapefile, total_area, total_instances): - area = shapefile_area(shapefile) - frac = area / total_area - return int(total_instances * frac) - -def split_shapefiles_multiproc(f): - data_directory = 'split_shapefiles_west/' - shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup' - fname = os.path.basename(f) - split_shapefile(shp_dir, fname, data_directory) - -def dl_all_ims(): - image_directory = 'image_data/' - shp = 'shapefile_data/' - master = 'master_rasters/' - year = 2013 - template = "{}_{}_{}" - done = set() - satellite = 8 - for f in glob.glob(os.path.join(shp, "*.shp")): - p, r = get_shapefile_path_row(f) - t = template.format(p,r,year) - if t not in done: - done.add(t) - download_images_over_shapefile(f, image_directory, year, master) - -if __name__ == "__main__": - # out_shapefile_directory = 'shapefile_data' - # shp = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/" - # for f in glob.glob(shp + "*.shp"): - # filter_shapefile(f, out_shapefile_directory) - dl_all_ims() - - diff --git a/pixel_classification/shuffle_data.py b/pixel_classification/shuffle_data.py deleted file mode 100644 index ff15bcd..0000000 --- a/pixel_classification/shuffle_data.py +++ /dev/null @@ -1,57 +0,0 @@ -import h5py -from collections import defaultdict -import numpy as np - -def one_epoch(filenames, random_indices, class_code, chunk_size=500, n_classes=4): - ''' Filename is the name of the data file, - chunk_size the number of instances that can fit in memory. - ''' - if not isinstance(filenames, list): - filenames = [filenames] - for i in range(0, random_indices.shape[0], chunk_size): - ret = load_sample(filenames, random_indices[i:i+chunk_size]) - yield ret, make_one_hot(np.ones((ret.shape[0]))*class_code, n_classes) - -def make_one_hot(labels, n_classes): - ret = np.zeros((len(labels), n_classes)) - for i, e in enumerate(labels): - ret[i, int(e)] = 1 - return ret - -def load_sample(fnames, random_indices): - ''' Fnames: filenames of all files of class_code class - required_instances: number of instances of training data required ''' - random_indices.sort() - ls = [] - last = 0 - offset = 0 - for f in fnames: - with h5py.File(f, 'r') as hdf5: - for key in hdf5: - if hdf5[key].shape[0]: - last = offset - offset += hdf5[key].shape[0] - indices = random_indices[random_indices < offset] - indices = indices[indices >= last] - try: - ls.append(hdf5[key][indices-last, :, :, :]) - except UnboundLocalError as e: - pass - - flattened = [e for sublist in ls for e in sublist] - return np.asarray(flattened) - - -def get_total_instances(fnames): - total_instances = 0 - num_keys = 0 - for f in fnames: - with h5py.File(f, 'r') as hdf5: - for key in hdf5: - if hdf5[key].shape[0]: - total_instances += hdf5[key].shape[0] - num_keys += 1 - return total_instances, num_keys - -if __name__ == '__main__': - pass From 3003d96969a1521bc4d6b0ba84307ee98f27f27e Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 3 Mar 2019 10:26:12 -0700 Subject: [PATCH 44/89] Making sure remote is different --- test | 1 + 1 file changed, 1 insertion(+) create mode 100644 test diff --git a/test b/test new file mode 100644 index 0000000..f9d7946 --- /dev/null +++ b/test @@ -0,0 +1 @@ +Just to check. From 88ef6030e4f85fa8dfff22f855037fc262d1806c Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 3 Mar 2019 10:31:19 -0700 Subject: [PATCH 45/89] Changing organization --- pixel_classification/checkpoint | 2 -- .../classifier.data-00000-of-00001 | Bin 216056 -> 0 bytes pixel_classification/classifier.index | Bin 490 -> 0 bytes pixel_classification/classifier.meta | Bin 46620 -> 0 bytes test | 1 - 5 files changed, 3 deletions(-) delete mode 100644 pixel_classification/checkpoint delete mode 100644 pixel_classification/classifier.data-00000-of-00001 delete mode 100644 pixel_classification/classifier.index delete mode 100644 pixel_classification/classifier.meta delete mode 100644 test diff --git a/pixel_classification/checkpoint b/pixel_classification/checkpoint deleted file mode 100644 index 23ca61b..0000000 --- a/pixel_classification/checkpoint +++ /dev/null @@ -1,2 +0,0 @@ -model_checkpoint_path: "classifier" -all_model_checkpoint_paths: "classifier" diff --git a/pixel_classification/classifier.data-00000-of-00001 b/pixel_classification/classifier.data-00000-of-00001 deleted file mode 100644 index b24a38612b24022946df41b1f63778e95e3bfccb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 216056 zcmWKX_g{}+6u{HaPNhPkvPx)Bs?YO$?s-BgRHF3F9@#UIO-e~allIV{M2j>(_dJDY zNJ2&;36(^m5c&28++XhNo_o%Dzt54BlgG=KlsLicRFwU)2xzwwKuMA4^QdmHR44&s zg$Z1tk23atTT558ekGeycEI}Jy_hnT2qsha!<9+>=zX986uQN^yIF5=(%eh5MX?hK z&+O$ds*d6%{8C7s>UGqun@sAy`+{2QJ2FNtl$!5hsMKCLxL!~JD-P}y{jM}6EgmOO z*b~U7PH)CrHT^hnzZ?Q;^2pc>QTc=Vb=HAxXq%B z9Cq*o=E6-p?)nvy>c`XVMz5jO&IRkYx!^RchJi{C4CA-s{pmIsl;sTtX?}R-bvGD_ zErWFp+H|OADwpMF!7V&ojH+-4UZgY=nay8B?Hhhmtu;aDiI&*GOyGXDFMw~GT#?UF zBFZl97mnwS4no(b2xLTojx%*i#JuI zAVu(l{@UV%lImx%`Tbk6X5I(tUT4B3eL9c9E+e_OKQpl1Z#2I8kA<_%=EV8_EAU$- zPyW?CfFIx9;mx}A>*O%F+#`ZH(t)J*sXwkZxdUel#)I|we0udmKP`9FE$?3P zg`Qq(2XTLo;pVw*;5T**ziCqwHU>}PkFGYtTMbHF?ha+{+T5!!IZ>SN-8Plxej3Nm z&M@F_?|3FWC36^F{*(lpN=;_A&U>(l3Wb*sC&KLdA>50%Nl=>#J=^RIwN^a3SmZ

Ni0`UPM*in>4)P3H=>Iw#PA~K1*({ikl4~Na8uH(YG zvtai7BQ1Uy2^A&DI8vS`!INI#*N0hnTX!P&Aax!uNfL-y(InocQ4aT<>BO=v#l+lk z9G%`Z1Om@8l)0lsf17uKd8i$D_v90rY(Qki%(%4Jv$$Tbo~}xp3?>nAG~w|p@NKAt zc?M#<;$R1=U;lwOt1>{XWeoOhxlRLaCF5+DLO6BNhQE@L0sXJj;mE=7_{nT9{m`Yv zpDVcrO6}25nHvvYqS-uzdbaYoR7%37980Q-$}lFnr-GVAiU^)N>Sa`z^secR3V&pHuSK#*&%gX2FcCiz`?G|30c%`(?K-2PTwpf|Zl?1_SBdapw*Xv4_*14DLo03x^)f3M~ zK%Ss0<^X%iF0|~b{RX;kW4=fk%8QbSTlw0%+QMX&u+qF>BQ`1gF~gQmV*j-EGKMn> zEq829WBZRf3d(2vC-@VbU-I^6O_|XI!!nh5k4oEW%LLLV?y^@*X9<_~s1_^fEiQ|G z_mM5Vqe70}8Ee_&^p6djQ&Pq&T`l3p*Na-so>&$oCzYmLvu7frPL@xylwjYFDJ)CK zBV~P4-<758JySZ^VqI3X*`(CR%BO6F_GL>&K^4QfjuPaB>X!0Uy8P8Gwp3#6R5ta| zH9^_n8OFG{sC-AYLQy5-Q>vx>s%)j(ZOc=0)-v17zFGPYonjmpt!HJjpO@_$I#RYj z_c0@GB4fE|WtpJpr*YYiR|Tcr;85zmuv@?-&$Oh42LuWlDnhxG-IftmL8T3aZ1C<^ zZd!U?>n{5qsW15BHq~;#(4CzyeqKuir!Tg;9yYBm%A}xm%J^h>~{f~ zH=POk&sVVefR?~M*s;{U@N9X@gx1phCjYW)a(v3vx}bs58cUmUuX@8W_sC#TU_@NG!{4yda1&!T zvtdT5jL((w4%ska zbWC8re|yUsXN#4#pMF>tHS)XQ>Vcp#K3Q0H?2Wi3ESp+BBTa+dy4Ozd@OBIv+&W#5 z^8F}#QR}kclYWlCHF9*BZ2Z@TCN;c$+)T%&mQ&P8d z$)~BM%g>OqT+OF~Em0Ds1siXaagq})0+J^)@1J%FVkTTFpVeT>zL(Au_@_h&97CF! z^OAmyq^TX7ke^yQvBa0X-5bPy_?#v9@L@JP7IoOtnpmCSL;(UgsN%j%p>eo9NwaV`T#rdvm zP4PPRLehuQ6-xigGZO`>s9{N6t*>7kh;`B2av8Hf~nRO=wt>p?#POcHN%JN=$)Gle^ z*X$2P97!)xTbC(#WhXB%wXbBGyi!Y@Z5>KyFORc)>sZS4Zp$oJPDm=_@7*Z5sK%7G zdm0J7wRe7ZV&_eZ>%M+2ZPS}?IW#IzVK=mzmP!A|&D=M+v`M9T3j6+{WnkpJVh6uVrppZDj5j%ooaWl1%tsZ>Fw2 zmgzd##)f1pV~(G{&DL+bBn+w30p;ceM6UfKlWlvG4c5^{#VMorO%$X7?Q-p1BUvPUd4prR-A*m<4ZDBmxT zGkYeObF7ZFo|w;+R4A~$>^F9ewhSvZf?g5k{MlC!+tdoXDXg9Xc1S2F$WJ6X>Ssm$$kZB|;ZRuC8DE&Lhb%+4>qAi(E7 z!hM?-Fz0VtuoHf)XExOD7pjOAG81K0+1Gt*S>ugW?21-y$rPkoc@mhig)tgc#z-aK zWo+L`39RD^1XizH1j-s-%ztC$*c+10jNqpQxqmK~4GTA6U!B^+?x~Al)vGI5`|Q!g z_>2k}i2G|PXnMuA%}o}}FF3{ga^KBF4z6ZGP2Vy6+{5gdp8HJCqkdL#ceilVvJkem z|F}>(cOUzKI0%DpHZmXL-!WSbNC`iLe`HjqW--pi(!%?FiGmcxD{R_ERmT3*a(2Qh zOSbxkt}rhyh{tl_4c%%gXX0^dt-8J|LPw)4zL;iBm$n6DWQ z?3vAh?7}UJ*epW{cE@2o)?&;!R+weND8DLX*(0|Zi#99ajcu;XQ7v`mo8%Mb)OcS; zzkfi$GZyUDp5=_bMLi?T(_jYnZ4+7=2C#aovzQmQ3x(fQzq5rM{({qM-37Uuzp%IE zkFs9|;sm{#*=$r%G;60ii+vk7o-ujd%&Mx!vizYWc8ttoW}*F%FwJ4QaH-CJ%;e4k z!n=FM2+5<_>^5Nq`#VubSZR>Y*gvabqia*x@6qk7irsAicViQy;5&<5F?@eYzT(2o zPlfE5X}Zk&-9}8^+GXsbk}P(?lU$~?c@O*J?kk3~@(>C<0+?&Xdd!M^6Jg3NO<@Ho zV0KhAGL}^{h{wv?cSOn;rA@ai>7%r4#PBlnbZC?qJ(I8Te*3A@r9 zTDD7YFV7G6IWJ!c6Uz3IU77cxB6bZn6ul)QS5!iUd=GZl6pMPirg2Hln*4=V*J)?5 zFUV?UV7A79$YfO^oU8f;aAH0F=;?xbeh%oDrNhHfs!)AzwD9=)R*}n#U+C1PPK;tA zh{uFKB&l=?s2rI`xZIh%#-?nt{)QKvj}C+}6Rf!{X5C;Uy4aQIXm&j>}EMeQ;AJ}kZ zi0WFqbKmALq&;pPeQQt!JL`fV^mQ!mInj#xRpa>AulI7t12)6bLm4<=Rt2lHo$2Z8 z{`AYuIP$RnSNWv)QnZ#1gYJxN-1_TwFlDqQ20LVvgGvQN&wM;}sY^mh8*#2xD+!$q z7V&MTT!qzfQ{Yy$A>Usr!G#R`LKqzZbB7)aV~lIiHf#i^|4e~DIPV{r8(7ntK@F_T z;xM-(oW!2Vpm!thq30Yam}f15jusvIpz=EGn5F>+!?m{Gvkq_@cbOO+-+(p)b74~k;;E)hP9wfBG!QUHTL2ozF^5Ni0tpxw3Ux;@SR)fRL5K%$lWsI94 z#peXuawRnpSgBEjEBXbzLXiwn9}xl}GI3xPx|j>t`I}yqDh2N+p(ttSi~b{|2s8UG zjHr&IOI98SHz)?P&&v@v=z)yvaPKnr9KKm_75%?l6UFaYhDIiC5F%RyOLpzYeJ3ZO z>|$@K*}0S}iaLiX*GJRhW^u@Nek+ud)`97Yv*6wQ63az$*bWFyMp&Ot0`l*M#?E;kH(x$wnt`_izuC zx{3;Cp0?zQB(Grj=Q@&m;U&6^TuCu@KF>uy6i!NyAYO0pL2}f2h{(zjURr1)TBI3; z^DG|Yz~)9Ul@$(eo^6+5a3& zcFS>ssD*ro>rc2TSwXe)CAsd^r>L`*A|5#l)OL9>oS>D+cYnfprAR#MbFi@YKC(Oe z!RysY{<60-axb3Y>fCWyXAmn|ep;V1Dd`Y-L{#JH#~V?%!Um4m*TKrpXJ{4nTD0!L zX5sZ)_0(YWNE|RZjfo$w;J+mgV01$dDd|9b^Dz`gt7wo;0Yg`%7z>R*o(HkyJQyb* zL=&`oiC4}H-eu)8c(}4iG}8MtTvW;y&N5w%L7AFtSZ)!#jF2M?FU2oAQUxvp1?0la z``~n_mvH|L3FnXNpsANiAyax9XSC-FxGfsR-9MNPCX$x;uHhrqshr6FyQRcSJg>ri zmxp=EiyCtUZ+`_56bXE)O+;))rHAN3gx8K1?Q%YY~UV^h{I?G>%Wn<5aS+JzjhlH!2 zEx&YV7QeAShQ=*V#LM>KaQ}B1##fg^>cJs=H6;+;8)c#9sw$4%UtYdr-~*hHD}cv) zaM%1fUJGvh zmE~a<)gj2Clv*swp?mbjp&zkgFo(R=g&AVLu(|KM}+8|APxxQ~}Oy zrS~fGai56cZjx2RRQeMMa#t6M2gks#Ejs)hElr$bdst-iGzzvCtfUrCWq6(V23Ytw z77FeO;FiTXOkAwMZJc|9gsJopy9ZZ^q1p)A%*l|(3kC3GAPFt)&qL+id$|6>GupD` z8gcs(jm{_ZVf>y)sC-L{JW*;zL30`v^J$<5=j(%kZa1DuNyQtp1Hty*M{IST4Qi3Q zG4FmYWKB$m?fud8>56pvz_JTH+&@raY{vVaeum8PdvLX?U9`v21;4;4{2Z^ur#>pA zX5)^FR_=R8eyv%93olo*%}|F=FX`i;ttA~Tp8uPrp-P~S@EQj+1s zF1L`JjWw`qq&ohnoke}twBYua@t6d2xn1k_VPexWVV;g1c5H4Wdo7%}cPksn-qs9~ zwZ|U=U&=tsoufNo&0c%{-kMh0suF;2S})Ka zY&Y&dvz`Q)3{Z_{i@~y{3g*9Q05iQNsy#;wL*8m}*~5&k?dK`!!4e2^?@SAM|QAI+w3Qjqs_wM%F)Uxfltt zE8O7+$rjGLQa}zon801#_JVM8U*e*d4*2Y61Tl@8O&XDh&(>1HgBH(lW@axDpeL`m zG7OI{G=@5RZSHIX1M~d%bK6%};YYth%q4RH0qVEE#TOK46)=#2= z@zFHkdoQ%U{D$La-+`Dhd*RgBQkuLZ6<4)CCTyjb-cF)>kEqMs_)U@VYnS% zJe4DpLrt)SW}}&00$yX!5$nNRH0-8mruv?GT{5Cuc2=Q$$RactJVUe3nW0ksCcO34 zjri`Nu&L}4`J8%+s)?I`)~(YR8y190|Ecl^%uKnC?+3VI?cq55nkH`0ctDEH}oZNkKkQ?dEB`@EWErD1#0a{e@JIG1Z9S9*dr zR(j*S``x1a{3y`9)dQ#h-6Y~oQ?MdzE$`lZ3r~|MusqMAvyw7dHzt$*`zFp!zBL11 zG+)6#pEr}d$!AdM(MZ1U^ltp%W`V~F{a6(%Ylv}u3BP`h<@`c&@$9leoVmYR^li5! zZd?17&VRm@|FEJ$cvrFrI~5mmVJ4nr)ye^oU%MMd)+eCe=4Mi@ats>BCx~Wxib&z! zNV?dqu-tRybx;w$1=~H*xR4E|PL}WJ?Md}zGF>%f&bLzRAJRs@thWs)q)#;%@*tSbAN`6Xj7!Hc_IprZ>P}YOiNYP~UtwO(RER$E zO(aZw4ogO!#`E$&!O1ffvWL9{?3hfni=)9ne>={-5yt8H&&I-?S0JIcnYjFA`4jr@ zacPS!-%yZ8H&+9i`J__^?QV3LlK?5Z#BrL8JZkEVAzO1t^07-^LH_nYoMikQ?j6wQ z$Biq6x0z32S6>7MtkwgU3Tb||^lk!>{#Qe0 z-B`^3KEY7?9V!qywv{lMV&q8PRBn6mXZoOOENFRaa`TUkhsl+?(Em(=pYF1TyRUVU zTvwV4b=52+KApnl_W{0|_#T%SB*2N!bK#VE4b2k`(O`pK+99I|*6#xN!zb_3qz_Yh z&HJB0%1;r8rqAG>h@FGU6^F{wzVoolF&2+zRl)b)pHOqz76KQgcn#fY+!ikmZGVb$ zJ6mi}FLMt3jam!yd=$C!9f7dtY7~^8b|%Y4&ExFGXW)973)uB11Hae*0qf`qu+(He zf9KT!k@NX9kPrNdYFVm$(kg`azaKz+(RRMnR=-S1G6f#_n{duIRN;!C6i%34qi0%d z%KMxHQ2Z(-?cd~R?jS2Vu*r+xnJC4bi4DOccHbez{xBhD=3|)l1uRLCz(v0%;{8vj zDJy#tt{EN@{rFl&;;K^N@I?uJ)gl?L#V`PB4&A~TU1PbpxY301cZbL021&u_mGJGE zF)#H!9Z#FN!KEuG-NP$W@^i`yTmGuEsyEF!}{S>$pSI1N1Z`!frQY=!kPv(sTK!t#VbgSIj;e!%(;l#T;bOXX?nMlNDcmG!Id0DL5HJh&Bd-E@ zw0)BapZ*Hq-}o`0FI>&3e5z(jQfqL=^@Z4=@`j!?e1wy_HR!jclQ4J3F44rc4mz{E z8aFA4^97?@$=(DD&@+o0dwo8SEJi3b!U1RXMhAVH|wScIab>XMXb{goU z#3{j1upf$t*01KM^G%Vi$mEIbTR-Zt{S;^0UxjT2^Pu}z9-5hHptMF9SZ>zh2h-&^ zX7wcu8|{WxKfEBIcRu*qsPgky{euf}oM}AnwwI<&&Y|G$)CrC?Y4pD(naqa9*L3#nXwrOaH_Lty3Bk^Hv!9P(D+36obYpp)IBz}ol{i7Wa; z4u@F7u6Mv)nZUre6kS;9k|6TVyo}pTgGEL`aqu`C(fZmo_%kP|fZhr0 zoB0R3GCzf^j*ezPTS_TjI)niTGAQksI3UM%nQo|9bY-SIEhl54cj*ZaP z=L2t!-yowyuL#>`wP2NJhVXmt3+(rjAoG*lIQLI8sq>@9uKJSminZb|9%b5nmcD*Z(Ph5$kL7bc7pFx?j^N=p147X;5 zli6!Wg48i9u>Y5cN_D~LaXb^he*O=m=0@WZn_$e$DWz@$O(OY-Y!N~MH zG<}rkd&fQpB*s{0rz_f@YXq0_uh9ohyYNFu5BVs&70zuwMK-pDpnab=Y~6Mnb}0W8 zW~_TbhZrgDY5g;@k0H3w>lZrcJR-7&KqV*j6aOz7B<+w8gGarhW+|@l^7Cu>Byi?_ ze^7zQv3;V-;rv_@IY7gczLEfmujHw$8?UuD3Y8U|;MC`FaHJ~?$9Y*pgGVE_r}+Uy zG*ib|eQeTJ;#Iy{W9N+i@~QkZs1u{mRP~L{ou|XAnG5lH$VQ~?V`2E?E3b*TM<-3p zg0&N@U`$9QDBX#t_Gwo^-|Z9x+$@LDe|2HkCNq%Vsl%CcEQNn72;K^^z}7Y~?(a%N z+UC$nj8}R=)3jr7GVwD_dMU=&i+Vd`Dg1OOs~_x=`(TioCsXn8asY6^b|NgL>t7tY}&U4k6XlUGD^){ni&QI0q4> zH60>}kqRkd==qF!~^x&wNh3L}N3%r6o#CxVg&f{2`k{*m*LpN~yo(n*=Qo$&dC$3Wx z;ck~Qe|v5Zi7GllZ>%jr+sH_fzuEz~Z+-#u!%jh|tGw`SwjS!Ai=tkG9Diwh9yNE~ zj*rhJ%sn3*ypc57J$ki9*U(G$yzGhj-6K25S>Jii|J|7eV9+J7^-*u9r`$u*m0no``k&z*WZ9$a#LZuN)9}*drXXcQt9qfXVAkz zm;9LjhL(-JD!LXnLv-@xS?qH*!Kn{k(9?;1WZKj(4d3`JD?wSS-D=To;@w0S{^bE3DDFObS_5*dr`IvEg6#aMKj|}>I z;k~;JtZYK7fH}z881gYQcBAN+n#knzG*k_)6LNVP z-1}KG;pzUR#J(jPBiHG3X>Sg~JlRpiM>kFMyY4<*_KL+OEC3B31BhvEAZ}h7$c-)n zKkMUU%syR^luyX(L{xILs1r{ z^=H4pDjCA{uX4_l=h(XjLsIr`%PX}nnm zv5|Ge!mk<1#sAR0o+-fNFUl%N^52R*u-{OgGbq%B#6@3)ACF3skE3^@11SNKVmA1A zso_@7dU987ny?}v9`AIjQ`PaZSRVEfZ@#jCn@w-Pxjq?c5|pv>Up>kuSJ4aI!Q`<* zAYBvM2a~t&#Cd~t;5YLs9=^1OPR|J=>sk+B)!tojwM~W_*LnlJ+AQ$VKm@+%R|5O* z!yKtb424oJQ3={{QO060bk^k+{(4~soePIZ9@HMpM>Fj(x;~S~6$%&F=*eBQ=V~M* zZuTZ`T1G?CqjEY}vmNa_d?E1sBIs4u;LJ+@1DAPYg>Dk@&{R+)`ab(IG%rcR+S~0A z&1vJWMq{erTuZi=?5DcY9jM!xh1DYiA?%KKdVCc;ghq)6xcmL^z0^I1pv6Mm`JWky_SA)*Yrk=> zqB?vXF^VtV`T-04GRi{+I;ri{%Mhc!m3ufc0s=MkuzkTJ$TJ>3dq-3FrCEzRRrUfm z-207EKYelOQzsNJz5-1-=TSAg1#FE2$e&6v_X8O{b5DgV zJ@XLPW`@Chk6S{G>5Jg~=I3xLahRV)et~^fYWx(pi+C^ZkchK6ht*#xOunzr#ePd7 zK~A2?l*+@^X@jD16a8Uk&UiSne;gA4~T5oD7gDi3?0so z;JhJ}G$@GEY3H=@%Zzu_bAJsUj9JBk^EOeRrvX<{mQCZd4nu&-CQvmi5`GJ+g@YFj zaa)57Y_5%jKZajP)AwxHs(ut)uH3-MQ&Z7IEeFRIXW`I_xv;HDihudMmM}5fuyyHc zAmgqQiF?mUlFdaZpECmOwgl6*7cXIPcreKKoPp;XYiP9P6ke`lEB;|rKqT6Wk6MGE zZ^kY@!l4aMv5b3CdceJGRa2FTK@w({I$aX&n^=N{VS9Ye2tGv%?4=nlk z2(pzg(FKHobjLIdQ#FDqwk1S9Vh1xLP_671H%Y zU8jBFL5nWec|Hm^CC5Vl?;98v6G^7_J|F?&Pf;^tD->*r1^H2`n8IHmW>Xb8GiC)$ z9-fgqxE1Xny!ARbRCJ~T~(M#Vu)d1XRo4s(Mu27N+KIZouMr4D0G3{Y(^Jx(TRA=ka# z0RG%qhpSWuF>IPDy=A4r-m-DO7tx(nL>3 zSD;3*=;k#JZm-!+&Me!91yV9#6aJPeCd}aDRkUH|-#QrGB*rOMstC8e{3lA8bd(qc zY{C^gmO`4uVp!`cB$^Im(I)K)-MsBOo|Za>8DnMmgA(1KoZ|}yM^C`I)TPKRl;bPK zKa=}-A5=$_Q19sB(0?2NpJKy^L_;9IAZaO;oj-PqBHcM;}AUWB|* zb6lf!7c0)0gNa!#JeU>^KX4K_7QY0Oze!M=CxbbE*6_3Im0+t-^zdGiE()p(aoJ?{ix?iAwNvA4@hV!}xCR&^}zaKvt@ zTJ(4Igq99{Ff-2wqTxYPGm&aeN+3@r{D9?A2O+EQ51sbW63Z_KL($P}8Wz=%&siM(Mm)^#i?|8;cIX{?dwXir!oeE%c6$4U=Q_+SOV#$df?ln!5{E{M8w|YiT1PGiD670zC3IJX{86T z`M?PN%|dU~K5_+g^$huq`)k6EyFp1s?%^`{|mc)CF3pd06Et${%u@?*wRbKSG^!AHO9EvU>yOUKG=WyB1sb_q50Rp7(5}11O{Z%sHC|# z^Ry5CkSn3JhYmsU)OMk)sRC!e;WB)4wWZn1tYJ!1AJttpm)!iSKsq9Ml;+OR*0q_q zZ?i9!X`dp6qiSGIxF7Cv-48uO6_B#}9?tW6hqGT?hbt*paOS}epqnqj8yy|Pwe4HV z?MMs-$?ACW((Dp-{rMKgj1=FZ6gd!G~x4ZT^BW9NQH&IrqtD8BP>qSfHU%sL?7~pbH)pQ-gDP! z?(|fI%2;zgtMM_I1o4RX?~8;rI|I?g_yy^^EH=x4J**q4K zTuP})_Iu2qF_XVAV+rrOEE4a$>7%A|KEs;>+xY`gTJZSS1*mt($KLz0fbXaCr)$E| z$Lf)2zefpJs4WHc{dsW2)e5%EHQ_pol0=Et!BE=TNj&Y_apnSFl3i%YJ7@kwr#uT- zr!k4Y@puO;J8KGaKaS!5SQNoE$7E9Y*OY(j`9Cl56BtWiD**020yyi-$a{F@J@QX>Tz~z7v-{R)UU6Z-uFq8_FGfJ!oFjhjcYsK>la(s^W)!}%y#t-XUsRz_h{<{0H z+gXEi=zCJMu>|vHjYN|kTL|sU##*x?{N4Tsmt7I#N^UrCqX&1x_zX8*Q>+x1cSWMy z1A9(Q+70XW6WU|6fM1wVK`XB+a}R30!6iM8x^G_3^=PKSYI`eGtQ!P}`~m3I*A_{O z$B`@Te<4}33SQD8ns@RSI!;pJMGKFj$FvB{``iOYF%EQu#RRPB+bs$TFB8s9dqDf! zJMg62Ghx{Z6Mk#!axAYHM2Wm8*t_>D{HeHr1v*;gdAUDe{+bLNt^w+dRRiB?6;=8gkRC$5NZxRdD0o zT&PJd1;=A88!B+YN%k_q-r+adSRJ*ttD zhc5d%(X}fP#!Q!>3u*`Hjp>H)+asLBZ+(oD_J4!w8GxF%@NljL0nB)B)(k@WWJ zC=9uu4g*bIV3kIOXExqJozGHkaY!&Z^e7zN7fi)TU4b}0CIqXNyP-nPbiAcn4<;Ye z$cMWRU=I^P-KM!fZBYRpOLzkg^EP4C&Lw1><3rINhl6nSNH%1Mi$UB=V>k~z(Ead_ z@Y>%Hd^k}H?Yuw2-yku_`#YPESu5e~(rUClc?P9EX=AMa8>s6%k87)waYc0j)#|>B ze1J6`vPhuXk~&biDIXO=tx@uIEwL=H!NFr$u(Mi>KXQ}hYlBO0=)^?$v|~HEJM|LR zm2KF$d_P)6RLDT&ViHi|d6v<2$d+#!-HZJ_0TD(+UDPMSVB@*1pwNHtwT zom~rIiq6^cjo<$fyP>(94mTSze{84lF#>=0ZYGI4o`}qJT7_>%M1x^(_VC*f$C-ta zuzeS+X(|5spt8_Ht)4tT9D!%rv{18f8G4@c z8D>0Bh|M(vJk&CsSDfZcyi1qB?}O{9wwpGDpBfYu)kGug=E*w5!nfxjV-0mW4;{0l*Fg!ILpaB9pJv&oLXp zsdXVNkdnj_|1mr}WP}rTnsU-Z88A?PtUSV%fmqAwsIvJfZV69?w;68aZ-9^RNvZ)p zbP0p;aWY_B>Iwc1r9}U^5%1PvU8dRoCJ%>dX~ zT!6pUxCk3kFX7fljX27-4=Fz$R|NFZk8_JLsY?crIZXgZse>qEu?l|1YvO|=Dxi5Z z687y%fx_?8VABtIaJ(2RdV8=%G^$`E*Q4~1m>tfiXKO;?y{{6@l;+@6X#xDzlPAg6 z=V12TY&2T?9Dg|ailjDeD-YZ@o#XZI3)#zmaCdVd&X5zK`wmt9%h(s?i=vjXy9;-5 z*Zzy4KMuH%P!R(M#+RU@u{GXy4uE(rvOK*xmhfZ7aAd-MSk!q|Bzek;OpA5FquCs&CV=M(%@yavY0{({_ACz!c= zB1(2`5GD$LqQ%}Cu#@$HBg>uOlg=x0YWZpMB{&ew$x`yjMjVtUhv26VA4Iz!nsOfv z6mgIBB9z?{2M2atfrsf0!;a+*%xW1$(_e+tGtX9I+>?u--zy7;#fN>b`EUB+mN_;y zj^ffU_>tVG*L3QnFK{y?0pj1Eg(p=zaQQJUZt>gW;A{Gj%0GKdb$4tK#*fb}pBA|U zlK#1(*OF+m@0$j9V9i3xG53)x@V%sb^_`Tsc=?soX>0|CRxL8IsBO#S2+DyQ>d0t2rc3CiIE;KN9Bf4WzP@pn1A3NI8x|#$y3B-g%9vY~6|ZN|#~7 zhDJQ)mn7JpQihEUb)|KeMui`TD_X3dg z+*oYOkb~IaJgYOM8A5KW5e*dM?ebLcru#^Ajf)(9&%SVNuPzR6qSB>H-hv?II$`2n zVBog`{L#Ndqfd_{(R)(iVc!R6F6*OKSs~!`ejnO3tMSciyx~_@EPlQD4}Yy6Lajt6 zk>&ljNWO4jVa`)Y(`z8PGgZ_rP~uEZoF;?X05T21!<9||wa|W#`~RhZL30_I5gQ4Q zP6EBESxC>WoX_ROBe}ToxzMxfF17CN6HT+r!jRFS7;?Q62TSjgMzhh}qTa2jAbtYs zuY}|Kh6K9k>sxy7nI_t#*22uTS*Nh5-SQ4uplUqh<4c1A0|bZ88`%imOAL}RaBlr^1>&i!9#*TVO({B<=|`Imu*om^0$w;qD@MsRmycF>=Z zBZp@{Hy|&y1ZVHFVJqa)Y4KYf{^7K8u-=kKK3clK`MpUfVfKYy9$A6zTO)>D-g0zU z8-Xv06I?t$88wB@eDJ|x&z1WP2J{^HlGR##W*YG0#tx#nVLqChh2e;xdEDPW<3xQ8 zQlj`@elW??iMn*%<8Pmm!Ku#|qipmh?u^!T-1g!&nHrKpG*U<41g%m0x1TN~@Te?= z6}$jfz8t5T>u~8sVW`DU<7Yfq;&-_!(s9qe;I+Vhrr2~Xmm-#ro1cVWwX7Yt$fpT1 zsTO9^dg{Ah3}i*=Br(qryMqR>uzd;HIMWD=2X>&=469+^Y=-?$pO;rY2*Nuj75Oo* zC!&pl9EAS+f)hM{Lui5>*Qu666D_j9di6HYsGG(;+ui~1TB1-P>nd#C;J|P?U+LiE zKM2avpuRJjhV#eq#g&^V#*XK|HO6E9jeh~sRX}&KLZc(XK+h$JZkmG)4AIE z^u2@__oJ;w_pkDNnyl~SQuVGs5c z?B%56eWB%P058D{_(O9pL7?n^!pZ74(KNvcx3>DhAeq2zQyfPjxE@~Yy+xmGc?qdg z8(_p9E4p>oV_5cX6mR+^6MiSw;`H`=aC6rbZZ>-gq=RzNzT2J?^hokLB?`FlX&S11 z+>5(A3&8(MCpIYU#T@NeQ2Y2F@f5fTgWCM~#055dxO)j)EnkdprxfG2ONzwvq8Hp7 ze+`c1BntV9*MN6iilyDd`^2`_;g)ecce_0Zu4a@9o3qAZe3LIp&wGVaWiOB)J3{!{ z4GmZ|Ee5ZDcm?A9k#KU1P^e}mi=CxG5Tfb~7t>r>g`@FUmQ)TWj5TP%U=lRWXFk`0cSf?3p9-l{cNdl3c?T!X5GNj6+&Brp zbNs3`E%a;rA2g|{gozsu!z*7Cx+vx(Y*Jqi6Q@VvUQ!4SGX|)AWgsTsA40R#a2(m| zNvEqeqV}6fT*}!xtX=;W3KiAJQ;CT};ep*~(iDYyJ13Cb7F#YQJpp?ZJn>5HQ{fx* zqjL^r&}aLq_|v81IsNolxVvnL&|kb5{>*6ppQH1R=kopjxV=k=WQ8artD?dCI=4be zQre=D5-r+&w2+;VRhgNgNXkgOuX86t3n@e?N=lN3wCH>Pet*CJyC3iSy03H2^YuKi zfA2*+>k*1~F9|bCzDU5Qvm)%&54ZT6IyB*W>kAB%P{tYcy?B4W7{f}Oh4D;jW|>(w z%-H`Iz9$Xve?M5kBz;^4LP0G!mQ_j~90}&H57r0Szh%7I!!1~~X9FA5xEU)J9AmAf z*)dm6Ok!#>)mfr8jjizAgs0wWFd@8BzVJmupb8wbr*Q{1Yz;vp<6!=cb83wD;)9Hz ze+0<%*1{tq!xRr`um^=Np_6+zFEqdw7O72ViwBg^bj1o{sWgYQPgG)-nKpvBpdwQ( z*bSHSVwv~tk?fLn%CO?j0%q1`fBY^GfUjNuk@%-EbpBr%rd8=LnY8*j99XRhts2@G z+^>jsaWmMb+4&dta$7841^H(Viwvc8G8dkt(BE~Qd zaf2om3Y!u?}n12S6-pR8Y3-`b|!%6Iq zOMbLd=?5$!2VjDYG;{dCMhLHakIWnfD(A6qVf{ur@a{jxhaU~!GrjSv-4b56M*?0u z<^_`bI_czRX}n$cr5GFeB5HU15`=x2$<$1KNcS{!@Rq$ECnl#X>4wo(zRcl%-r1SA zArxDP*TZreY*RxG6{pdT1_fsRFLkiq_5!kQZenF$eL|UA5}=iG6?B|FV7tdwY!HaV z&tqOxy5uh4AXqabF-F9qb?ZYTYNrfkCfYhaVZ^|dX9nYrenq~%~Y?)PuP_9zOh ztP`6alm;$B>E!swGMM<+!z$`i8*GzX3x97Pf{t15ATTwR9n`HPenVl@S9dRJ?g}Im zL>^*Ya~9q|_y+#ex&;_VHu=+KvQu;P&l zb5&!b83WhV*GoSHQeeL1rSUBzq5@Z?6 zDeU(CFi^{N#+<*gbojCvRsQ6_-u=?e%WBw4EpF^!^^%hC`9KDq8?J}p@g$I1cptQv zJ>`k_RzO)q3Y3(8$1c~~{4?W&*tgn~cJ45M(sRAo9#M^V1-|i&RLe=|&KqD*;lPHU zSqXo`?_$kh8#u=alDci{p>aD82Fwzu)0RQBIXMN4AMC;>9xu-0`4F_TMT7x)spLV-gc2!+mf466povdRpR*&>G=>&&8bGwVPSTM(;)7h`iq2p`a3M&1vfrIf#Lx9u|^eL-Jb9RQXf>G?)2feEXafJ-I-KgayGlHvW?u^ znNK$C%12x0dB7h~0U62h|)EIU23PZ`{Ftk_>Kja76VPip=8dg={F4 z(CqDNQP3d^X74+MK}j3I;Fcs_6Fi9?v&5N|s_{f)RXMJjqrxmW`yFP+-+_}pi-}zI zVU$w*O#XWuk00$mz-(xy0uBMF`5_lx-_ymFx1WH>bQv%Y4JC6GLtySrFUDEwJJ_w; z2P#9sV12oimof01*zd6q?oyZ#OT6hhAtXo7({Uw=_<}#jnOe+dY zr?897=fg(&9}e#ifd%Q3B)_W**P9$f1WNRop)o7&GcgUH)pXX zPNz_6l_IM@F3NoLUBjf!$$~8TvoKbE7#0Ww^K<9_;T?6Z0MW~nDZle2F>^i-6I~;z z&I(bepB7591a|SQ=2|lc4S!kbrk{Yix+E0Z!u4ks4w5NKQS?FfRQB*LD=MDr#ou(W zjNdY?6rZ%8L-_-$WSN*Yb9%J`bJ#MF-T%cA?A9tWNxROI+zHt z>P4WRBp8!>yl8|P<3bx?P zX$xtiN;$u9aEydLk|1>#0vBpdEaK;msZ!sjVmR~eE_rH|&9hP#V>g{$jz_+9K$mR~ zJohlhAio_ntHF)mY!uHPZrp@iruaw3#(+?`zgyyb|+gFo$;L9)Z3_2Huz4 zBnE>kP&cEQI3!Y7Y@A90lZ6*h_`=tW8CWy^tyY3)xB6t58xO4cWWW=vkt+V3zFn_xEa(vIDufJjoLFN5pO|j z@MaIAkLwVv{FDNJkI!Ke{~X5A?5F${SqXS^(K$4UumtAvJ-&P9O;B(BMn#Hqaop}A zH^1(Mx3A|x&!<|{U%Ua&PK`nt6Eo5}z6c|iO0Z4B-e4RWNhLeWQ0dbvSpM}AOx_TH z2Q%;UD}2sDQ|4UegS2p*cq*0m&R`bjRr*By9T$PU(|KGTT8gpuf{^%l4EUZBxa3VF zj$CU5`}P*FbKnEsF=M6Porm8IQF#8o3VSH*D=jq&g7sgx8%{+OYG}pd*=s^rT$X_) zt`t4WF2i0eZ%kLK#Mi}9WJ}p=o?li9wys;lrUd(=)95VvqCN?w`_IwA0v)*RHZgnhK~$brT5(Td`Ozn>p*(Q_$BshorQS{C+UZd zqwpiB1-g1uN&2WGeikZ%mq8U|qwZ1IEZ0c$xE^%PCM|RpRA9A_J?2L)_Td+9aw0wn z*U;1O7}U=-s5|?4Fhv&oe{n1$3__#VVqZfAl z?4XXO|1iXKG86Dum)Wf~ojYshLZ5IHlGoGNf=EL!y7!(WCuM_3L=sH9n+C><&M_Yp z7QhtlF09$qiW+iHk$86cs2VY+W$79PK%Mt)f| z!N7MuxwcsiWd>XDMSB5WuS&+YG99Yru$gSZ67sb@gqeTnH+=QE%Cis4qcsgutj+8J za=;-Lbbk3`r{X`}@)B+4rAi^^ySjmrwyNy%?M=A&=2YV4Tv^?I@C2JO`w0f0ZHC<@ zv2>zu6A4^?2-Rv8k!|F1TzAvaT2q`AE!HLH>!eBQsZ$_l6KX4_5 zzPWavV=_uPcHIbGFGoDzb^~6Y9)VTLC2;T01&F&BiuO0I!nM)_SQQ%z;S)89>DkYy z1l4qdPZ$+f)BrP%LmJ+$$***X#}LI{a!X(~``fP&wkbS;I_YNKEM;*#Sno*o1$?C^ z8ak?#w&#QSI(^o7!Y+t8wiOTEOaqOH6%dnm4DE`$KAR`GB1CbT+}OE==-%h5Kg_>N}S zr6Ee@sdV(NC-2(#U!oX!oANA*QJ+p6IL!o6QWTjUG$p1+Jao$RPKu3G^E-}aS^V49^ z`a91z`Z|uc?!>%aF=qR)1Wn&GK-65i$-*`Ze&h(3={tN2OP*L!hptG-I%mw!elN_t z^*_&Xp#Pz}CO21Ueh6K2rZMYp#6o0^Au}BNfv(f_!M>hcTvxl8Zqi7^&XD`4H#3=} zCIrI=>1V`gHbeg%v?J@eSwc?6Bb+xF2OV2>5`nd!kxjo@ZRop*hBz7EzUMlmP16+9 zpV|6?iZOC$kK@hcX?)B*YMHGP5fJl%D~H*#tzI5<{xRT z!gmk)XnaWt(5>?5zjzJlxE2gTFW%sv(>?J0Q6p~76k|=?KY-8ERw||Hh%2NogKDHD zRS&L${`ynU_3|iGy^O?iCTIexz0T`@ZMR-A`_&_9&_N<>jM(t_){C(8)$h!nw2sB$UkP`2G`C6gR=W_h*vO1C!tt8 za=ZX{TBd-5#Xqv0tGA9WR%49TD=}$q8{oizK_ zcCjArJ?eus@rL+tizk<#FQO38gSv`(*qSXwuWk^8oj)!TZzUso++T~gaKHKHV@L-s(UW09eU@X(gT)ZBfScj2&4$_6HJOa%vLJ4*&dAEC z(sjQc!3R}!P*V@W;%b2CRaKVueZ>&{^$P#WmNw}7FADpmgy=EfeA*^mjQT&rNQ+(& z?UgG;^{KUV@WXwQnxzO&`{dXJ4Oa>|#_Vf-GthhKgn=Fc%$zqBkgd<)>q+hKT`HR9 zEPg`IINM;|elByVxE2$a#?y21L+IE2kDeZ!%tnSuFwxs*pxvou1ZoH_H^r2;;;$*`8Un=#$L2%Z(p#zh4Q&>UyWJbV6&|Lx6G4Dr|sif{Mh zyi-?6$D}N>X6;c3tsKUKpI=c6rCWIK-YyvW@s<|cT!ALPVo4a6tqTu50{b@v<9nq# zI2X_j4&`6y5nB+7xH09v4XNohammaDR}d%1&@C6Vfl@X5Um@6H`h#IgwqmXbBPEp zJ)KIs;BK|e=wwu8KGL~8GHfxEOe9uRk%EJlh-R8QG}H+*kJk@T>#)6K?M?xb)RT^p z_r5`Aj{udUZ=lKdF^1Q_Le&sQnCqg9kIp&sXBw$7AM_${{rV7g_0+k{=TR$MWUNAx ztOl%leBaf(353M(phV~}yxVe+3iX8J_o+Y0X>CuQ%3;oR z+Mvbyjl1H6sPpj7!;Dz~0@%3nBbbW`g36gGpp)MZD(UvbHaZxeSgPXOS{dfbfg)m% zaRUVTS>SK0$k<>wSD&WvtJ9x>$(dlx`|mK+{JB8vg_L1l_9i%Ikpjo@1|}Vq09(6f zlx}3WP64^?%hQJ1hg|z8Ho{3+iZ&e<+5O zcF?aLyYO~qE1rIsKzCanL5+x;w0iYU^lB+2vs4tBN6+Sx!)-sv)mJ4v$)kx-W7|po zoqGaF2aMr^<#E`)n#*y{J`OpPySTiQ0vlhPgVNb7soJoLPUmv-+rpCR>y1OOO87ij zoeSU{TPeZh?@EPVu5+<#kmDhCe<0nGmiYERe-QK1fV9>|vTUstPIGw#eQ)C6a8C<$ zP`wUyuP@`-v?H)!JRAjH?nl${Z5Z8Nh}WOB!n##^^Ab9|e zb&T>Ai(_GK-qh+f!~e(<$5CG2%0Ti(KpIcf3Oip>D6_g zsKJB%_-Te4`>Mm8gk9zwz#8dv$&J%|-9@qdFIS$^cO6xb$n_q#$RuNukR)4K_Xqke zHsPFUMJSn&4$Lllc-|tz{#6i!d)s*6`fni^Dkq@6^7wsA5y zQ#dEd*mv0CFK)ydGI=Y+3YMY0_*YnIWD50V!py-#;Y9h}Nvwad2u3nIaS_B&vxzP+ z>Ky@npSZdHX_jtWnMZy6@1g1YFzOPr5p>S9lf41e^wYR76Jun^nwpmr-;N-3Q)xyc zmqQ>v`7L1AM{p_)rJGjlqg@3v;KLFtP%l*CMSZ+Pcy|l%*}Y?!D=bYKU4~%dn>NTc z*nnmUvM5$?8BUcV@1Lmw{W&%fhomxT(XPp8l{|rcDt3jGs1NZBr`Z6Fj-|Rfdr9pr zf8L=XQ6}wr6XDVCw8Nd~UIyRzy4|X(k?|>wP>GP!2=iBeuLL<5JA!wWnZThc7rw zH)mUAgNEM}@ZrsSMDpDy+}z;IWPbWax@)hZV$yQ5$G)2Gl}QC>;XlN7P7ZI~f!%n? zFATRgSJOuawdqkFm*IHr4!n<(F#OaQe%KX_PtW9IS=R@Y>0d}@4U56ptz3MWP)vD+ z5%8#5kV*d93a9I?Lf-ZJ&~_*g-oHJ-<>7L0uVEgb#zhlK3RyMT`0}uPMk7I7Y!@N0=Ww95_oo0bR$QihP&mAv5O@@^COgyTT z&QG2G9WuYo#2EpVw58<=c>GsJ{-&3qXKE4L8oY>qD=gUgxHTIh%T94zPL$t5$|L5 zLrBh!#&?s2a8HURQ=WX9_UZg4o1;EK|GekC5$-;!x^OeyMx(&Ze=Fqd)8U;21>85` z4jvo1$jh?zgpOfz)SK@L>BT}Yv{@Sp`66uFk4;3(RE>brQ~op?8Dd6`)3kgkHq@yT zl{I+uc)1)uKII?Il5znb$wM^hj}hGBFGVHJ`EWI+j`n^1LJwcAz>Cuvc&+so{>zNU zCt}gCETfw~c8Ejq2PzB|pTvY8l3>ZrJMYv-Jvdcf4$%3T4oSpO zbFDr+opPF5wi%$4lO2f4oQCN5$;|cAN94HPR+u3C3_g9AhNR>R;Qsm;@%G<|B^{^m z2z-E(@w16=MH)#IT!aFXOks#`K%!+?@_l4Fu09wAKJO`N2$aIb{u+F!)`M?Ow~{s7 zyQ8+4@q+mv!>(Y=@01U2WogPPMvN0nuc5!5(8o_h}*Yl8eFeWMdsZ= zp_UxT+8xQ|7k4l#uQ$_>BUP}xmXD9dmRh-ahl8uNF@EwEgTz;SvSM%&PF*L;SllpS zJ$JXjP>2!Z{pK0|C@6swi&(N~`E#qi3zb>^=9ws})JBW0rc$G|gSh#j3Q8D$q!adp zgDS_r2h9S|*t8YjZal)17d*>({p7J$^9OZdC~0{(0q$E*VzknSAW2CZe|{O^&v}r_ zyA^nuie0&gQ}$?q{F)+ge02d%kNd(e2_5Fnvk!P+`cnR!c}CQI!c@j%Up)0^WtenS zhWi0gG-iPcew>j7DYI9@{$K|(Au1J5=joxFTO6ca&cnf^-O&Gtr55$Sh;?-(B`<2Q zuzMjTo?mHqW)?2^b^vsG&eAn&6(OVND0*IPBwM$vg*o&IA*;gB^*cqQb!qq|-w%!~ ztb^qr3n9%X3)gFiK~(%2ESc6vhE55xdXZeT<)9d>w_nRI`uiU&-f*6}a_nQw5pmQN zGT_%6D8rMV?}+E}Z0^}EB0sjxVD${+prAkuOKM^vDRDA}8E=Gt4Wksowb^9{^5CFA z7ubjE;3lCoeq4$b)vQ~|xUX6O2eyA83loe$spbHDaWBQFiTbpE;}v-R`xvY>m;@4_ z0oxD6LZ|2o+87y&iw^Pe)mfyAoxouFt>Jn7tT_iTak0C!esCu()GMJ1;gIO#0 zRD)fh^tLruf9Iiq&K2U5lfd7>2(oSNE=-+DK4kpLrS2<4p!JC#dD}G~jsEO_Q|mIQ zynzO}JEF?%bBM+CkLmE*@-3be&|#Mtoq#HvY*_zEjOq0#Cs5N5zYP&`dPZ@$@B+_H zq8r<9E(dGtW4wx&raakK36OEh46k|31gpc-8EN7BBz;3~wYPO3#yyjw%k{2VX_jul zIJrz7UvWAXG#BDX<1-w04S>_aqIAn9XWqm(4KTVS!Yba3C8m}`WTZlq(f$bRN@;OC z$z@MGKQ94=hxOQXM4XX{4S_cuKR}PmbMQ`lK-(#Y@q>RSq>78Mu2Z*RbCNpehVkOt z!xZ+OM=ixk*J+Z52$L=J048<*M#+t#%*m!B>=M~p*b{q!e%QF06=+X^__ln0@>*4P zL0vCdAF>&1cfBNqMn|k{=dkECnMH%@9dOUR4Iil-Vnp{y;+hVw*IK<4GRvxPj#@l6 z-1vac29(HBrL&M~@`-#^X}SP z441D2)iedhU+n;ypA!ylJ7VEoTNS1kn!uLI^SDDSiqu6k(85P0FekNxbBx7-VaH4> ziDGB2?^}<``%c2tZW-{@{DHxoKmifoft(jAqJ98F6 zZN+x{!S&Q6eUK)KcH?iqe}rf9le{Qf$(m|$PAlEJ-+iDX>RaSM3mt-6{Aey09hO^#%TQu;qH0=!OF+N%+=3`!8xz6x;hRQt_1 z22tVXTzu;+!RvTGNc1g2*jUwzaO7|tP4W1^%gy>iq})y6Wi88?fpP_iX^7@!9G()cb@EE|F}f-jPd4 z!-eJ8=x~TgJC~v8GEdm{RRZrSh_exvnPhh<3pc!W!mRac;j7MJzQ=$B>At@aNBFvY z6Gvr2wkfdE`!3LCpL|Rl^`QklS3viqAb$2}1%GWdP|~TOi%a;J{fLhR#_hb|q_0%e zQ01v$9C_<^ZJ27>C4IWyxi?QJPnTO`Y z!tE0VFr?Xt9ooSd{4&Lf} zae37r@;I~!H0(k#{KpEW?N%l{xRVD;!;>)k;Wbn++E33bj8+$fH{;8n@vtgH0xSLR zQ{}lAuu*&m`Z>t6_V=!#dy6%`6g9+kF`@Wm<#7nje}bM*_0Yz=o`Rq-J9U~L+RZmW zWtsQz-mZsU$$P{X_s!)s?AndjU$S&Cs0cT1n}_jr15kY95`KRf4^cn1GfVyNL9L<_ z$?2bf`1Lx@S-zVVpLm8Lw@uMLsvRFNWAts;Z(Orw87j&>g6v=Vd|P8Lp3*!^Mq|cT zunEf}c9ZMj+Qg^u`F0vKeD}eZS29S)pci&rw1J~3TcPpCUG(UCMQ8YDl5geC;I(}j zto)wN+wLC8mr=QlLd{QkPd=Z3ATHODz+A%34!1Ee`x^-vqev4T@oicW-0t4MCnEkd zU|SxYyZt>mSu3`W}yC32c={Hakzk-A{uuv=}j$1xe;D!`$GF+*{9?lJgS@-9WRG$M7 zMpX!E4bvpk4`k%HJw%OaKuL)`)(fk#625bA{$f|WIy-^B@_h~-MYdq5nnFcZSAf5P zA;$acLfNi)jMKZ>?A@(FG}H17HCo_KzI07tW_0J1debpd@za^zdus;0H2fLk{&b`H zo~v~Gs3@Cn^_qm3I52%rWFh~SGw`JpdF$r}l7EaaQ!}p%2REicb%M54d1pNlTyKU} zD(Z0SPAADb$$2-;;|ZzDCGAhgNJz#anrN=gE>O~ed*Wx|wc$3a7uHjl%83o2ey5%8 zl=H=ErD(dqwula!mciC@^O(p6OU%0R7Rv2}Iahcm$YM2|f7eGe;RaSJti*S7^{}8b z8a;j`;5z%)bj{EeDp+<4pILoFkw7il(jUdSPPz!J3c_z&mts_2H2&_=XBN%1Lc5ne zkl3&kJm+xEDXD2-`(Yt2C zQ@0vaDqYDguQ+@?ZVL;trn1%d?Qz6DmF&B8pLl&9Azjlgz=JQvOue2$70Uc@_v5?7 z-s3;mG}D4n6#NCVO7_5?nFDZnI`=&nT#m8p!f6ywk1-6lVRvR6!NJiZbXLwIIJNgN zj1v*^adtBKsG|+qA=NNdAed(Fu7cX1dL(&58ea362?EPr!@JQfcxGn`rq!mvyF3+M zsnrb}`JzXjDg|;L3dD|R&fu%O5iZX6rXIT1OCT%LE7qyeYF<1|loL9xL z-?P}OaY3-NzMg!(@RN)k4Wo-*MZ){jDjc5>0Yuyu5`L|r28;5E-d+N0-&u1^`)kr{ zX^g@VD{#fjmSh)-NJk68a_v6TaXU*L~ z6|Ug5y)|$oT@Fs~-hz9gE*h3XfWXc=MIr))6%lxsK5nmFHPm|y%NYN!#PJ< z>PALyT^`Qqk)$5=L;O$=V|dW!hGH_eAgX=|KmFkjMSm&yy+SA2PF_a z?KJqPq|WxbhU1FmSMWmfIPD!&1F;=CRR7UY-h5kCY&q--2Y&t{9+yuN!LWMx7NZ8| z`qwg7?uDWLgju*Y&Cz9i4B9N{$6Q0h==jmA&5rF|6_TWq()_d6+ zTsx(grr9he(>O-qHIs+N$+ftXb5*RGLQume5*9X5`2DaFdVO0-W{*0Xx}EbB`G(32J`Zk0ZLN47<4xhZ{2KyDAYjS)`4I zm}_AD8fg;0?4gpBoCxb9LZ98k4{h7CK=sP-Y* z;gLq381KP2>0+?hC&rWWxB{hi0?da`mFSrk0MiRZn4Iwhh+KRc7G*ucr6U1!pJOz> za^K3jm`gH~w*+FCO))ioITL+1bYOZ0irZ?|NvUoBw;b*3;k3p}huhO%M2iDNW(deWG`C$N0{ z4F+-Rt+X2@KyIZm6VZB^NLpy&$?r#)o(Fj_yCRO1b&8{Gh6Vg{Gh}8*oQLfpIZ(8( z7p704xNjAY`MjnDTx@P&=e~7t{M2OTmg;A`(`U-o+~v++{}X5v+DrNrJs>yF2Hr&k z(wGPVw&^tkavPFC?V2t$dt73sOf%u!{#|IfU?%xgYv zo^5qAUHOl^8}|fIT780N;5t z*`hCk5NvZCl^RNL$dL2-?Mo*eL;6r^K8^DNcVMP>D2Tkg1Bsbc_|UwVyyN(x7gLw8 z?SB`u7f+lc8grZ>XHGWEA6X4o8n%;vv>xW|?BpjMd;tG+zmVvSH+dhO8=5I3#k?a(+x}ft9hm2l|;! zXG&MOFpn!dQTL!M`*?mF{n(v}-8-MrPw5j_*e?I7Pv}e5*=lSl?$FpB+Au0AOJUPStE-1!ZH)9dq zxx^N7_(nu=qy`GoHpBU#1}lkNX;|Vgo4vjx3E#b1!5SBx<{N)MM?UK)^H&9w@Q06S zf^TIlPPi(`%1BzVQ*$Zq_&1R)kK4&s4^L$(u08_ElltVyELYZXM=Rb~t;bjzPGNMW zjlhyqzo}HuQBbpyz%ct(axT%Ie10B=_HjbY1ji&QpB0Wu;j)kzuoSM!48Zw&D>zS6 z3#?09!IQ{%0ctFzYE#B}XO3xe`QvmryUZ0FriIY`o3xncN#{Y`_b%E`e?wPpz6Q_g zG}t#BTR)TAD_ML~g07l;2#XHt!d&kh{=9}#l$n%7QWtT%A5NGttx`J7;C?+;w@Vfj zva9}YmjkqXo&x=uMogIOOlEhUHB9vGhd*2f$Vq3ACf^t!gXIrFY6rJ}V%(k94c+1S zhxjq8&dei03LLZjp&E?RXR`CQr9#UuHD>5p7cIAs1;u^uiP-L~a96kyVpP(w)|t!f z|91!XB=4t&qq=O-pgy$8F>q_bdU)ltnrXRm9OdgY8M||uF!ew`HGP*y{<|ZIDgwgz z+0O`yUKukljBjJZ{zDjXEsx}-zk|kE4>|X3I|({gjEh6sv3s%&UHM@e@ePlm>bAdd zp@=GED2u|z8&XvC&Nt#>#(7GeH{hakE=+en=bgL9@xH>8KRc@hr|MXM-?lNzyw|~B zO)>Cvcrp_*b^%T}MPaS}Ef`<1iGAe$4!g#s5o5-1Yz6}#>W%P;moj^TC&uX7o?vzQ z4+CFgE?#<+1IxsCyqP|?;hN!OjEb_tNAs20Pc92sbuQWKsQ3*Hwe#r?*I@9sT1;A` zL!jaGYBn{YRhNYJU*pN?-;BT=NoA-3nxQnuEeYHJYshy6`%PsI?Ne0TE%fk11 zZfsw%I82nbhuj(Jr17u;m>Q~}>8yjyv1abOx|Yw2=kDB3FLuzs^Bl3F!3f?PCDD;Z zy~sP>iF;IE(wnWpfSwnw0Ad=m|W+|PAyk#zErD?!Uuv*COP@2sycKLxh-%jBqF0Z|R zyD*fFB;xNY`*DgvHmav)5R>{GKCklW!2J^*B!yo_Dy8p`=M+x?c(+tOo5~GJ8^G;8Y-qoLU=s4H-~e-ELa_k zcYd=tLn9o1Y^s2Ho5pZcIACx{5~HO0f$x6s5Lyp-K~H-feQ@X=bnKddUp8C>shhXR zcj-`g;k6qa8>f&Cr*RVdPYBPPuOKeD9%SG09;!F(EG#Omr8?JSur$F8Zr|y`o)hOm zN@JKDm}W>smrR7+4(WI@(VXY(pv#J=iZi7~AINOpW@>n8KYN>R$1JK|!|kbk2^*7j zvEp+R{M6B8$5fY+JFcP(5n_4eD`Z*KlvV71ziwmpmbduK;D%M;tR`Z^VFpE(gYa6R z6H3#8=gw$7!xuo=kw0XxuhcUs8;HQ`jDz@bxjWxS!iFDP5MpU< zU<4k=%jwc_6S(&E2HX*f#(k+GmZt-!v*#Aq!q~_|YU6nuRsZCIxsEr+yp6}pyLFiH z)oY;@ z>kOumJs@Y^jNL6W8NvHaNP-Jdxj&k$l6uLLOH4#4E~H=fcR@4fk7{ZewA!gY4Btee z(N6ROd}#`#&6Qc$-9L@FTGNeJJH9x%2_+ z5C1|dTOLlj%lY7}lkvW-9hTpDMix1IrJ3?I{4+Y;2JMht~JrfTN%@=q`8T-F)UqQvRgElASy_8^PkD$Cg;XS)aXa zx&)FJYcczdzrn86emLvw9X!2w1^%ATxxIeZaE@9-P^cGU=lccXg!C}h6oPS>V+>9$ z6@dSWKSS{r6Btv~hb@uHY>!L=sm#bAvLW)c^Uf-^&nXbjrV26hB}-5)qJ-LC`AW}s z^B}PGG|4o(3mHD&_)cw!AbDsNjdCBt=;Oyhg{!MA-~B`L>_T|z?FHUNleqjq9o;ar z74K|V2LU$>X^x2^D_ngKCU3MvUqP=uBa=_;kF=q2%>~S0-atI(z*ORL0SS94xiqmD-g4(?wSPLq9L*zPf2Xsa z&D@T>JxwqzSb)XFhREiZV)B)X^vuBvIPSckUp%{>O6JU`ZsL48kQjl5JHKM@gEI>) zH>Q%MY#>9PCc=*es3Nm-nFIe@ z#Q=!-{DiwonpU9?Id;sITYziufIoOI7G^vzwL1QvF8$4&C7l(b%qkyoa<<;qlE3UG z&w5$}I#*6)v*Qpdyymhd6V@}khQFY9;#C^**o<|FSjHEW%!9#T$``p9j14Bop=jopf&g3dYAF^Or*o{xZd~<=2hUF4#mrz9U!Ok1 zKbIB8?N*IPhwELqW8YL- zgN;AifH4@rr%#2MSym^pu3`velp_{07p&=6%zK)XwhB&cUILoGyU@fT0Yu;9XaE8JPO9#cJwapHf@^x7#Oer>%Mn#OI!?Iz`L!fO-zaz-y~=kdYs&|h%# zU0JQOIvr;w?M9ik++J~8RcNSIVXJ4FGAf$yQFhla(z3gN*XFdHIeGK}G~@=rrQ>Vi zZdomUvru77k8<{zB^?Ik@Kfm)zLY0Q=OpaC>2d$iT;! zIOLg0YTf3u?}SZg$j60@-X}Gf^+J`&*Lq3v{EKLRcOD(O-R#$?AgpEHy7A0CI@0- zpU`JFkGyNBfzd8mdRZI>P;r6=Ki;R3o}y%4h3tKeeY zc8E%RL>K5@ggYm`)0v8bY-++V{IEDeYio9}34wvtE|V_M^7l=oWBM;l_HCp4H7vOP z0*ec;w7{vvERcRFN{(L1A(PZ}*$n{#@HU|m6^G2|<18Yzop&Hr{~O22%7_S|WYbb1nfLP?4U~+QBq{BwB~g;>jI3l- zw!X?Jg^2rkj-tq@h$tGAl$6p?lHd9LpSZ4jKIc5o`~7-}pXV7pNr3sU|AXq21N_t6 zT)Zu4t>xEEXD~sk8FybbU{q$@fcrOe$%y8xs)YsNptg4qm;c<#7=*~M4TI4rDdGYV z!(Eu(m&G4f+zaW-N;vCe654LhhG*SH&{ARuG2b1rJhvS;Bt{Vb!ABU%PG=*2j1nKt zAt=1_C^uUv#gvcSnPPka58qe|r7}Os?vS5U+sGG>2L|B8Ej~=^3{`f^U<=f(bH@;~ zsR-h6xVIpf>y+wabjcRZ`xJs+!Y?r;^*C8fZllrDUyvg+iFR|nR(k;nCd7P*yiiyM zV!zZlXUcTOkSB#}#rEU5x+|pXRXOhK42D0tcd6IjMw04OfpS7Sp>EX+I`Hf?->H2C zZ94f83^vx$^xM~=p5q^kdlbXMGcKr6Btg0|{{xH7lDt8`4q9xIg!U4S;J-Kv%|o_x zjtj0oxW5I~ed*vjzNhG;A6pr1?gK9r845bwGuhNFd>mUN$OLH0K*?fJnCt5Z^Fym? z;YMXfA+dz6m@tGMf;!Bz_}y@r9dSs4g}vTk^__Xkze zxdFj0---QNAJos+XARPpV%H@;*-(*+cZx?a-t7l;9x8(&pD;9YR6#NGH!ydr5i687 z2V%x<;DWD#Sl3mIuGL|9m*XoO8&ZH}8QS<+x|2H1p3Io5U9hx$F@O&SCK2H)P56pq zX5`zNpq#}vG=Gu=l_SaY*HKv%QhGog{+ysMKRDn$T{*bRyN?Ics>$0ur(i;6I26oL zh1(^ZN9EOHxDYLd53|1EndAkqg5$0Pu1Uh4x(syQp@41N@8js05PFYy;QFXgthqc7 z$NuQEDk70&!toN!Rrdwj@QNm@H<0CeJut|VgQcz4XyY=@P51&B^)3UY%}qHK1aI+yj32WFCtzC$yXY?%lF zACRF#tFh9QV4p}PP3QgO?U#8=^5nH?*q?I#$(k=Xxx5?( zOv|9ka|-80b|RE7$#!-HU_i4vk$%2{+2eklX5U)Jb$~kXs`**ow4GC!w?%*H>?0;n zxiyA~|02!a79XJ{8F#RyCW!w>S`AkfHsQR{b=WYukSN7P5$6_T^MAj#fJ|ImRlQ0r z=y!MWCntU2XOHr$4AL68WzZ(x`0^zu7!O84^L^A6!~Nf;)*)MdWy`@$33?~H$(1({pT zCD^)UoMY@fhUQmoXn3R$dGa1qE29s>QZ>=+wieAiEy~z#a3?V{#qn~j3VWh@6^YmA zqf3lBV3pKjcD76ot`AJX)g{AlPq=}GwCG?_$VxQ7H2MvCX;!6;wYJ{!mDx_hqb0jycOwM z=%0TJ#@RHaTO^@OM21#+9f0stKVdlcJ=c=mhVzfT#?Tl=R%p^~cp$^l!C$h-&izCr zcIz`z{x!rda3Y-T>81_UNz{DB4Y>4*QvKZtwDRIM9Dfpz&$u&gJ-rDpcTNP!A|I68 z`GqLHxre`-X0Wc|bu_o52W>qX$wYStY>3nWyPyd$|IAZ*VA@r_vG6?T54Q#J{{$Gl zcWJPRvtHg=nMPjf6yuUf+&%LAOXT=%WoFKRG=519LycKk;FbOYKT5G!Q4~hk^EtQ9 zNzTF9nvBw?lo)K^0PU}9pk_%T2=|%bwKGz<==)iaG)V)o_A6-WHi=)M{0L>4b=0`v zI~=|JkEiyw2+a3y!LiUz%X10~AXVZtSwQL#T@=~cZNaqd!US;0E&`jXYcOoUWrC|4 zsE^fkevzOwhPC$b0!vCEaFz{D%?_j&w^~7K`F5OtIsrwc*K#w99PF*gp_elV{AeK{ z&=U!}bZwB7-Nbi#JIK;nmL84y2TSV>A!$sNH^ccdOxj|Pd)LH5$NjS;G$)b@1ex%v z-xq?_E^{22@&fNhPk~zvC#kwl8kr+qM~-Kj;zpBqe9uK!_&%Y=7`*!diOsu=%h~;G zaVEA00?43;E?hBDVx%8&nbQeBuwl_#5Pocp=A}EZ zH71=ta^QMWGQh}mt^|W`b>x@9eh9OxCr|Z4sFwCGy69E}q^KK^U2;#c>aP-H3%f%{ z^53eM^NrAF985NGJ$ymN6_1~AW_~TtgXeh}d=Cps)}j>BR6^D-C*&kHb- z5UP;cgjTeuel7Ai~b-ZHgnk;1xiF0 zjiK-BKDx$L9v@lv&28EwQ8O6BEr}d!VF}OgY7?zBc~8TW_hb8-O#C&qg>z`ybAFaK z)E(o&xK|-s?0UzKd&5GX4;Rwwnu|8dkMX0JJH{W|1$laJK}#VDt2UdE>$TiIXOSBb zx|;=UFJ+i%Iqq=H>LaOHCCYp-7h$g+Tn*h1B4OK44LEY`5Crfl!F7@iQ(erxFDHZO z-2O)pec~lKcc})hr{{o(=PVjmlFP3Q?C1RBMST9i7(Eu8Mg}6su<&j;=$%?euiS4Y zy}WneaYYxOY%2qqJ~eFGH3B+{@npr)Otk*XC*S`lGgsQ)LYR;XFDrz52~VPttuzML zat-#&EPZz3urqmca00tzngP}RERUO3Z>BDPKk>aLMxn7Kk71H;Vp`u9^c!A{uWjRK z>aB^)I5)4gt#2V86@+2=_H26gP8X@sTtMdVYjE{o2hqxkB`Jyj(UT`Vne}4QWWlt@ zP(4|VefU|6{j0=vb5ndlqPC0oV*%H_3wliNy%WT?!6jh4N61hb5UC-McZsBM@H4tFZ5z zgP_nc6s32FlP(etUR~l$(t<+Z_ddn*qWKWwGZ~ogZy@>lOXz$z0kTbO@rtt<+x2Dz z8Gq3P0{tg&!KI_z9IFKm6}`uV55D;Oo(NmH-I!+u3T*5eb=Y*DhuQLykn!>cvHny= z)bl0KH1814JIxh-FRUf)PhSH*d`OO3oq$#c3uu@i1coPs(WG=OG>h2tvyPcT_%jXu z>^HWg!|M!w;aHH3kyF^`4TjiaX$n7oHw8ruvpO_hK?Sg z=|5M~p)Kn{ZTnAN_x@z;4C02tm+s>4`_^Q{G7_xMim_b^oLBvpPSvN}VhD5oj$SLP zNM!m7e);OB^q<)j{IXY(t?pigBNxwL*lYvP-t&U|SSHCD9_4aZH{XNT2R@$giGpLA zN2uqE5rp|BOuH5f7fNQZe&++RX#H7eC4jF3{@`@5#xS{FQZ{JDe%L6+itcT}U;88Y z!Xxr1qx2D~4_rm7$j1=&T@vRVj-(?iXF$}HGvIseIenZfj*Gejn0H32={P&Ui&lDs z^~TE}^kE-$?7fY*t0chk=tnZe(V2)Ew1L1CQTE=y7P8`A3{}(CpgP>O@yNYcRBNpv zQ@^iZHQem*)aJ$L_2dEfGu;EbTfUfh{uIbf6lDwqrPx)jGVIxPQdp~-%WPHif#WI1 zX>-SCT(+hH6iiI{pDN7p@}dV+RojsMXJrhxmI{F5eo-iGN~K$RKGO#+oZG>8Gcw-A zMAtV29LKfrw_#h=0}VMC_>Xfe3@7krS7<~0Z$a3iV~(ZPNAbz13jE#lo94Xy3me>A zanH3E1vkc!_e4|F3pXJ=iPIOB&pdKBA zknf%g|25qP88#Cnl5=5?-eg9`3F%twCH4-NY3%9|qMc{Y6b$TvH7WY^Zq9S^v)YU< zn6&}8vZ48F3+b$TQVicS8;%;O@rC!F#+NC&?6LI)AY;A}@1qL)%}W8izYRgd z{dsh&QysaJ&Y)gRI@mA$TB)&ZGZ>UcLg*GQhXAASWA;b>*;;PTcS@P@-j$1|H3XRc z2lACJY4y-!)XVK{Io?Eq0=?tHIZHg(5Q&vP@mQGxdp^FLI=>mVoc}@?^Ve6y{-tZ- z?yv~hb|wHXQ5Lni-hoEI97aNm;|k~yM$>L4)7jEOetA!1Vr5g|Mw1|(D%dz0gkD!}ljQ@=_>p_Y%^N8L zUejvkYKj#rJ3I|KW71&Wgln*P>m26tZ95wI_7f_(>7#z{ESmM^2(^F;{_bEyHYegf zA7^D#oq09zxo#?WTPoxBOqR<-Tmx0rdFb!XF&CCLui8=3I4C z*a*bt$e`fdmzL4bA5;1LL%7XwJ_gU@c4;n4AmsUNV1nJ5Um4^4pXK9_H}wL{Gm!;X z`^y+^lF7drAcneKewckofC({7B0;gEd>fy7FubF}D74C9{ly|$b@dIEi{k$MygJW4o<@thuJiLYBtV(y0(9J>$gX;wP5-Q{#H8?E`s?fyTwZz+Iz}Av)Qz+FxGosh ze3f9oj$XriyDA{?ohTSMoPZ*)M4XIfbhi5cna{eGj>ZUAWhPsE+82W#uUQ7B4x6#yeHz`NSw&QhTEWwGA>3C`V5i`IF2_6x zoR*a1(O1#L`|$;I5SGB9Dl=ZliIwm&-jz7~cb825`WM%7*_#;ICA9F+L6R%N<aTtepxyvTL3 z{)KtuPFiZoo^FlFeg*bWPrj9Z4q8Occgb2EGy`EBGxOv8fIcqA& zu3KPE#E!{fihDSGT=vaUzI}i!UA5h!A!joN^cHch$UE43(3P1cD+yZ-K7wO1w=Xbc zpiQBbK1eWN{7sD^DAx?0ai9O19Z67CQvtb-UA!)>|7hnddz3DEh+{6^JTZJ^X|x;yC zzfxm%-Isl++NF)hIOn^Z>JxleUw{?9dGNM<8|jo5WjE#jrBM%qVfCIIs-Yu^;fs@r z(n@z`ROKOG*Wo5fTKyH0+e1)lkuf%`YQ#vU0CFEsz$r;@KzTT4HT5cz zF|q-(<12WwzDfA;&2?%yw3~U%rh>*gF&L?crIF{g;ntdQNWQ5AYm5uv%o=VcVEYm_ zT8`82ID^LBO~tpdV30E zrg`A^*fm(act6f-?1H2?S&o%GlWyv&qwgjugTu4}qP2YyZe8`6gqO#|^)qQ0R^<#< zonf$T$28m_vL0{EUqSSkaJ;`zjImV8Ln@d|U;7EOzN8$Z?)ft^skI!RbCmy~O_(Wd z@`C`oK00C1&?} zK0D-bYDzNrhlInweqZWj7sWp`I|*K8HZb>G(6>6Sc3-#6Nwpbp1bJM&{Xl zm^R3LuDlIl%D-aHsn$mJIytfZLk+ZXTP0-nim==7yahWQV7oT&g>ow)eDCsuPDN1&IvuTcRd+a*~Ci_*I@@|wt`o}Qqnhl8?(a44g?+;;f0S5?1G^s0c9g;|w{rgd<~Q&q z`8Yh_W(O~20x_*sjcIA(a;s)<=6YMqH;u)>ami%vHfK>LkbpG{DPe ze|fzx7lPTH>5wV&6J?3PCFPt&*uozOx=l$jdTdTJJ$=$zNvxt z*esH9GaOeP{z#ci|M(9tp66R6_fX->R!oT64%}zdz?4Q+!eNmhthb3kt?7fjBMvE$ z5~htO6z7s6uB&)y*Ddh9!)3?1*OSNd48W@GF0~7qk1n!%aecN5$vNl`>D2=0e^m?| zdbTrLHVK2P!xg+C7Xwwt#Bk9SEp(h?jA|ET;ci?5ns2yEt^Llz%GX^a;Fu4It5L#T z)vF-=?=Y?0oq(sJHsR%ENpOxXhm>YTcJJj#vZk4!W~KsT-{=7k2E@pbKpT=BcL^80 zD9K?2SZ)pscgKD(v|J)~iDkRv9e9Rp#~d zjB_BD`M81)Eeep-PhtHa3YIO7CHX3o;qA<6^jCj4q;rggy_LmK((g(2=Z_KJQ_~sQ zFKeN4`&RnLqzsF^_0B*0m8DYl*Rg_#44gM;Z!AEHTlO^l$V~aRn!9@nPJgGu)admV! zXwJS}{uPeBRHx%NZ}3((7&G6~1#pH%Gz=SWhL0Pyz+LAqsVd|AlD8+Ytu7bfUAz!! zF&xAinPf{zk#&|0rDx%2T{{HNn}c>9Ye7QEj)`E-!@HRr4?$TQuZ9V*C9&&pbp3r0 z5!7KLx(DIB!A2wtqrk1Wp0so4^|M{us`9#n$(LnsK{_fAPFS{4(UL^if8GY)3ZBEY zU#BoxRzJ95l`#9CSTemRIT4j}EqYd2aDxS!GH+W5CxZa#eN+9?YxKVYL0*K2T zgXVSGWER&$jLE13)OD$FAvN(9CJz=!vX z@v?IqIu6fd5B?Y;$|kvRGUgqrsQ&~NhkxLY$Uvw*dXqrJBKG64NPK(2n%$wI$8-hU z;hv+NX`i?=nsmfN)S3|3KTrvsP7NTwG^6tI@%wb)6g5ojo{356%51amQHVP;7hGDu zMO(Mf@W8Uziuwav$O*e28W~ zm$80XUe$v15!{=57FSsxq)jh+$Z*RjNq?0AE!C4 zfApc00h*m%K$q^)V>au1qQEsSD?ITm==_$zs%Z)QpDsIbZlD`n*>Dx6J1t=NtAtsZ zTmj7bC&<5dSOKTH7W1CzhmqX>WU<-G6rPQz)1?Jm=OL$?CaqXQem7ZwU)n!-!tMPH zPg4@MMHa@kDKkgM)ws^Y6nHe+NQ7Q>@<$T-sqM81tdPil{-csiG2F_z8!MGz zlCl6M^iSh@C|}{7z71@)J^(*c(`k;p2_vvF4<}FuKnJAHJ9g%ZdY_{;&vy z`AOs8EJuD#;X&{&SiK|GmitQxVRSxoal4B&-AXec?Es{(`FCxQX}U z-cgmHWO`4&6od;Op|*1^mU^n;w&l@iK5skBshLZh&9Y(f6d!a|Vxds?A1$d{imRhv zK-}FN;+I|noP!Ksze|J`1r{p4w31WHQedUE4LHm!f?MzVu=KS!E?Lq^8b5Sm&i5ev za!d|NU5&tOcL|th|0SQk-K2HHjUaYZ0yp^&V61H?Z+58)yZP%*ayvkn9hvThdR!mq z-QZI8PgXM6q;vjk@j38!stU95q7zZPmj|IM0x{P~ggx`&GZg13@>Yb;$Me4{XtKy8 z)UxF||9cIXC7KHGO>_ygoPLj9mnUJep$DGn4n@HnLw5R{*Z57#n99h8!s~}(vVa&A|q4BzNh?We4eqSRY zXYE69zQr-*>`fRq0aFM@t)dERvv zTmE~)KlCgVrWASOM)9eb9V-ndngp1mf95l4V?tzx{7;BJ6@vFg+*zt8fy+Jm(Px7M zZ`lvjR|Dc|Cdl{2F#WlEw0L zON?D#K}Qt?n1qaNtn%8+R8(;VPkx;ueRB0Vq`6IG16|(Yit|(9jg2MC9CZMpUlVvY zg=CpEb=<=zTqBo)=7DFH4BN7-0(2tI!WQRM^w8B}ypVYwT&BLKg8V9h6`NHVb)`wH-N`ZfK+J-%wtGed z&wb*Dtny=|ZL;vvQ9Ia^YRMCmiN;Zf>qLz4V2FAm)Gac@oSI_n->3)c;497*na6n$ znu(B+G3)ws3)4Hi4UUMvrLA2~=&ASslCmx`*Oy*K{ms{D)Z=H+E4C89_iO;GD1W}s zHUrjrF_-C?SHrh18YH)-nG?6{d;Ax|&18D%AZ$(HoK};p@Xrp4sh5^kO|zRy*3WKb z5(Up-LuU?T?~TM4&tr)s=LOm1uLPfNe!;5_T#n~_KdHRgLY8+UJ?y!LIkeOqvX>%l zAh$qs{X!_0t%OVC*Ws3D0Po?9cFcG=8=a3#L`Cg|usuSH4a;mSQy%wKy?T*I3U#kFNP`Rz51Pb`dC`la+oAvdFD zBI&yFUXa?kqSB^lA06#|2i9w%neE%_sr&7TkZ<;lYHbY#(+5A{!NxO8$L_;ea*vxI zrX7r*qjY#7XaBcc(c8p1JTEC;AM+&)UP2DL5}nK z3H*mUzF%Vh1zc)DF_;L2e(!xdIpZSC-MN8xb3!tAZ(?87<8%UjZBG*G zuZx%o(bnXwU?9dkcnb}gZ+P@y9!afJgKGDC94Fx?Zr{GyvP^dzEN6%@O|mBJkkb+- zL|BKNr*;tsM53`*`3Bi_rh$Lwh$@?SFC9)dRluaXyO;rkQlho?4LLSvAM9%^hMf!Y zP)tyr=ep`Q{`NIslb_un+wG3w(1t+z@+6|$%y+bK=oxkMEnDJAD2K&qm5wQm?t$A)aB3Ocpycd@@zrvW1L(d1eJwz zm=94O;Nu|~M*i6Z=E(2!ko`f6t-GZF5j)TEF0KxNtuiqvKH)4zc`GvV8ew=c*@pS8 z#{;kKU>q25V{hbULCwq;$Yzv-vxzoHG4{*97!AXk;uXqeCRyFZ0z4}Ed`qbEFip-D0nZ{o%;3B=LY9((4fFj7282>Egl zL&pEnikMW^LDcV^EF|zN1(IrWXd2=oXtF@%q ziQoiw^D=2E$2abdPavtgU(>r0MZC_KdZ<2f4yTGW(Q5Gr#3N!pnan+3+?ziM1G^2` zBW~dwBTAZgxQAP}XISPeBV4H+Hp4B2ki3;i>2QgGT>K5DMy`avtfR zmLUw!=Ej1A8TWnct^rvu&bWz{SGAw%hLtn6^JM--;rN+b@Lzo`-8Sbi##zrIqM<{4 z)tm;pwABwcc7*ZzdtT7>O2L@AgvGB`MbHouhQX3uuxG_VD2|9`oXWK^ba@L{ooU6b za@Dvc>KLPb-iuMN&_;uFW= zm6$mv8jenpVb&?P;lr!uXw{`n#!_#Qw6Ed#uDXOwocWAbe|#zEj?98pLmG_JOe1zd zz&~iZkcz$Q-$3uJO4??m!*?n?Mg0C6;aV3NHpA``lw6KQ*Kld(MEDg#1ZCNqrJK22 z+H$(_jU`*~`52kHQAV- z^teBj8S{pqvPdF0dV;+XoK5X@O+cI5uPr{%Kmu-hpqXAKvOQB6m&uc$clti+p(~9d zIg)Vufi5eav>v)HMS@zSKJB(YjKL?xpj5H6s(4`^h%3lrSl$vy4)?(3Od+=4bpkuS zp%(A0)I)M$Dvdb-tmfvMMBKUv{hRiZ(rSpj5WL+rSZ3oVw z2_S97d2&Agg>}LKFmp-+jcR&G>>n6G^S4bzU`P}O?bIv5HJXs|bir z^o8xelHjKBfX&aU6ECcX?cMfzdgp-K4eLKkuT zYK#6l`^oipmL%=?Vp3-9jZ?>cm?yg)QKcn`AXbo%=IeJd^_?5AVDtrrA3g9bb#*p%CU1;_QLG!#+T2ie5ktNTFfus%S4|5%&kNN1T(TnnJ z?`e~&G1H|k3+V>eXu8Zi=;HF%%4|GgQ+J@4ntg2@ez3VYU2b*f+Wq=31(;8gm)E z<7o?X8y3R1rxv)l@*#A;d`Z77nhw67-H4>5awsl^sJ19l*UkC*ZG+8S}wP6pLmYg#G_nBCzw>1Nn$?ACa3ojbeAw zOFFnU1>80YL$2onP;SyhjWZ26eP1ZPb&i1H8?{8r)CMcdLM_v@w(;dIox;7}OCW3A zb@Hk^2lX%itr}5Yja7>z*s1|TCQopbmsAvpZoeoV&nhClRmJqKdk{>yCdyscyLn%0 zFOvCjIT&wJOvA4gaz2MdxafBt#}5vIl9m^4$?C>9qhZK+eF#UDufna*PE}&9DH#9! zEzZ~@52xuDVtZAZ*A(1B+omRha2>S{kAULWii}0%M3O8i&PJSx!tay0Gb%g*rA{=`gI*>) z%~{jQf~=XSyX`D|51PT3Ke!aUyHm;S#zVOJ&`w<9pTNu6qy@HmNu;XQ0etUEG8>b$ z*qD?h^yA+iFkHJG_eC?%HRm|4P|ATG`CeQW7|U9&*$MZ=J7IfXK0Prl4gb`~LPOSX z{CKCD)cg6sGJ%(vbjT1zq&pzE|1!t(KFDR2WZ=vO4Gfi7gp2gQfKs$GjBI%VeM262 zKinP*E@k3`T}!e0$UhX;l7j+&EhcI4QCj|d7i`u`!6wIg{=Q@K?4`}esrc7!@+aaS zRp}67C#s0EVM@}tZ+aGt*WZSuSypV{kp|v_c2%$}^ru1r;iwoLjRm~h@P6Vo0DV=o zpZ|{c2+eVROa*n}vaLFTYE1vWwJ@zrntgisCW$2XamFr!n`9<|((O8O!)c5z2va1! zs*23HV{71mQ7wctr@->pI=C&ykag!tQ|GN8EZUB|hLGx+(7Pa;*StuC{qgq&oSahz z0-1LB>E155z_Df;^=^=wQa3pKmUAx;UqIP=zv=6P+>V%=QK|TQqFC!m8nuI)@#$ND zGmqXe+ucL{aygb&rX|?u@eayd_7igUJ>P?kB#uJ4cw8=qoDQCcdp8>MC0~T#$?GEE zuzNL#AG}Yj|H$ExVmPa>Hy8L)cTleH3;1<@!SacmPrJty+~(V$iNi}?zq%^2%nbHo z%`f1IThm^lCsQNpU$>;d0O>3a6mfe{cAYVT7gwRVnC&|9XI_d#sg7;u(4SU z&1~d}I@c|a)=VJtt)z1NuO>hotXnA>NtB$p8JD zC*t-Guf=;qX}cxevrP$_5>D}C{QS@&J&R-r2qJIsBKA@3beuBZfO)iJDNO(K1TJi> z1)V=i*cL0sI8F>lKj}d_>9iitHn$xnTkfRxKDF>@;$-@2#dGvq6N(!>HbQM)KH=01 zc>Uun`oZre9nk!Q);7I7r?@f@X*vMozdz7c=P5Y&Arj3?9GGgelOQU)9FyclS$S_! zrYVfO2CcdS*{AcVMn@s}^l%jJ`cA%>3_tWJisd zEsBkF#7l@V64}TGXR8ADya8IGJJ8;^1CG|J(R7gjTq?hW$y3dQLepll@$Fl(Hvb^U zPVFFvM^u;(MXHSSc%a3ez5+7x{u}ptQiw;W5ECKn24X=9Z0IS5rWiVa!tqRevP72s zvPhBb|5?aaTFqmpaC@6GuZ5|>g0+m9*==G?RB-C|t0ex6I83O?BEIJmiHSu9L^RjH zHl-faYCcZ4^<3t1u&41r<4NpUGk}9{vY=LTAC-2Q$KK)o9^B44V7Y!NV2Uy$1+z${ zM-rS)(}yW3_Q0rULF`UB_Q!Ay-px7M^#v(ey2UFw&D_=Jj!XJcuAE(gwumu{)eMw1!|4VH~_Hz_^PIkhhbR zNoigt-9Bc@&Pwybsb93AI(r{>e6#?om`(hxdAoQ4BW2WjvjiJrI+?kX?+XVd^kM3~ zgB-6_5uW~(W*YxQaE_e{9Gq80weRO*;3aWP$S;Fajy|;N=Q+6k(T7m*Ov zi*(_{dR&&FMDySGS{M#klLWWpPGdbs{Bt&;4;7QdgG`rc&kJ5XK5i&EebAdgib2A>3Gy#59j#kz9R0|k= z1>MMF{+U8m73%)A!Bw~AU~8zE`3NLi92>PIkOe`II)RNJ{$z=?!{sA z8Xe{d-3^&`&p{~l8vVR#F4>p*pi+NmA=doqj%ZBPMMmABE>lR5ZE z%O6kfQ-_bq)=&zI!SkspQGIF!?`!v=^5}JNSYpi7bfy#Eo71t`e!MF2@omoMupO+v zagO$e+fZKFfVr|+bnXn!LsD!n629)7#imyG!0dQ+*zoxmZ29a0hkkDW2lJ~i)!-)ob@O33UfB$Cp95i; zd=RcYBTKhL%|#)<7htt}26{H{ixk`D7f zD={Wg>#;v@0($GNVv8<0}W}(c>ww%Ox7NUao`RcN@Um&6C>!r^45sZxFmH zh(vXL0O*=bFH*hk`Q7S%qF3)P8nelQ zJrR|TtFAf2`{%LHs~vz=+n&I8;b^QI+K(ACN_mV$J)P#Ykp2C$3bfgu^!(F4E;I0d z-}WOCxipFM(73|;1H~l!$OFFK8*5CMErP+z3t>r=IQHXSBEfbh8MYN<3TS>wnTHylMp4Gh-J!?*o$*YN%(dZD0qAx zguI$?UAG=4toJ5K`ulN)R|n1r^Tdp2SNYyX7P#6zjf!pj3DbMbVBx_UI+oE0Y2SKb z#+VV@+ffBiq+%K2FJ~ZeQVj29TmTA;s9|>5Yx?JkAQa>tCR+ZNiPUmc$nu^IdlwOu z%gZ4;-@YNQO_80@_zd(`@1gh0K60I|zZg^{1AWCGa2_KK7sQSE%I(vk^I`$SXj#yv z{bw*ksRk#04Fl!2N7&BYGiEi~QjhR;GP)HZFdl}zWJkriX&W=;&SDM zYeDI4F&r`w;T=g|gfCnJVB@r6`evpr_cbZ>U*v9I5J^z<#iB!kIxWATz*S z-|yFvweJp4S2=r}daQ;w*E|=`Usqw5H=d%(dLo#j6bR=IuE04ya?GR63)yrDz#lIo z7(aIpbSs|&UV*o9)Baav%XB_GB7%&9k{@1_)@4ovm(h#I+KBW$X;!=BC5cS^Zn@up zbG}K>Vn?1k@Z1ka6VZ!d@Zn!B*K6+}C+@u>JF2@e*8V>KR_Q#l?28K)ab0);z1!sZ z%wj~d7|8q=47JaLc*dDim=hNa7~RvVAiLNBe+UafT;L2=?Zhjh5PyXzrYBMD7k1Pv zkmJqm9mAdc>DYd318H1z1>JpJ@WgLxIMvetQqO+Df$%ZB@4!d>Q-QEG`x3Qw@F&AWpo(^isfO#t$4h3F_jk(+fTeEhoGoHPL+~=Dvw-WL7v=x zMbCdYjPblKkQEkXHVE6v=VVu0Y#ClouLZuukLiBWr##&v#D;ge`BUdw30b9#O#C|*ti1siGTRykJT zl0B|@o=faj8=`fuBhxzR8oFH5M#U}PVQL8<8a~g&Mssg`{L{)o0l5sR|2=n(0c=jM*O zQN18&EX1fJNH9GniDa?q5eU->!}B*TVc&gWX36w0vbbRmm^4nsJK^u?f&Wo-9{yN< zZyZNtud*`ANXW=4h38y1>Whkqltcq*DWa&PY)U1vq9~P7DOBcjuDfL=i55{QO46Q6 z`rW_(;K9pz&bhA7=lzz|WPcay*y@Y6V`k7&T(*DA_EKpJoTDs!`|kj}Fn7e3RTnYq zO%90B^%(73f)~4Hz_P>-RPPPP0z1X=*m4-gT;~DJTBpiR>z~4^oqkT{LkC>dn2g40 z8gQ+&lhYJ#We&`m!iY0r7_@aY#Cy!8L&ty7*O{*&ra7A0we>)c)gS64x*t4wst}Uo zz%C6IX5RR>(Swn(nDg-=v6lbGx4yp=4HUI;a_w`7vb_VII)`yX)_S_9QVqvTD>3#! z7g5^h!`ztV$Um4Bhe45b7|Na5iZ`#pob%gJEi{Xt;<5~q#Dn>bbFb0bdn(LlpG(+i ze-UU9#U<(A5-EQ=)`_8Vbr?Fgnjx1!d&b$D^w z1L9-I@uoFzz(zY!wkjy@9E;^-Yi&$>g>IOKy+IlLIZyu2bt~FSSc6-GwtR9`vVvWhdBJa{W^Tv zv>F4O>?G_heR8r63q=j)X{bCHwTeOYbh?HROcb$iUzMIrfxPx{YD8soK z)zI_z8kCz$VKub}uZ~#0`n4%&DnGya*Wa2Yda z{?p%%$C{@RVy}WO_swxU^E7FTmu0U$O6R^`)zF=rpWK_X8GU5V!kvg%xaPBzskC*# zDvtfs&Z>jZS3!y`NigqsJm?9$#l|n`)Hd1;&IP2CTLGK#n)-6a?%@Z}n!ca9TggKH zz;YP!H$iGyM8uQkfcC2CI1um#&|mef)gmG|5mo7hH#fM zV&bw9Q`whbdRUEbzj*>=N^#B|Pdi3e`7oRi>xHy$D`3s#Z~Poq8M`(V*`_^-!aq-R z(7L-Cx3y=IK@`DTyXIp+n<7!Tw1)TZZZT{xZ^Y;qxp23jiu?R|(Erb-zSp>t965Ir zqyn$vsr>=qTfG_+19SNI%L8!cz(ia-bvhRAoKF6G&CRb>50PEn{V-v=87$M-!gdA3 z|A?LI(-rDb^4Rn*K}y2kt_Y;(u`*U=Ax1I zT0EZ9N-wEwz#a<=I-#}(xL*lP@c0jITzE@PyoVEAKScQt zJK>)^_xVM)L0TCDcjP!P%Ax@{;c^~L#*CPK89(9j4USbDw-3bYKl0N9Q;hT$2exY(x{e`bo%L!Ij|tkr;tnLdhZ-Sdz+qkuIfli}YpD{h~{_3w_#p_JVO z+7(;R|Fxn3vhS}(4D;qsonQ`yJ9px@fH%0IZ3^Bth(swhIkqcQ5QWTI=~QyC!8^qq zLmOVgiJcm#e{%&&M##Z*Qxhi6?QDIXdpY^J&J%Oj`!dV6J|a%TedLq63g|r2d1Mmv*ZnYBYTd?FE6aJckGcJD^n7N6 z=R){%EfjC&#DYoh6FiX>i^?F)jE9HO?i*+LTVy}OwrCwHWhBj7oSVn~ad{7iBj4fS z^R-y@d^xl8y&Zfjtc3)vOxmeo#2C)e;x}qmV4P{AEpMg|NVhh?=#Ghu$8vl8^=~FF z&@Y0+H!|V(?0^J!BhW%R}QlUMP?7&W243McJlTUV(BLh!08v@A;$^xQcz^I6aVm{9ExGuz6aotS53x!KY)_CHtZ>r zq$-Q@AYir}p79;WB}cMwr>P#}YG;Vz#!Ae@b(R=?`T*`~*1%==2hgmt7iRU?!9r0X zzWkmI@XGHSRlBIjJni;jYMU-Y(QZkyu(%)mZ8p-vA!oWKiu(@apM_~d^N;nsH6R>lgKw) z!B5znjrm56FvWkgVfcD4%}e@4m&zKjlWQ#St`+BOT)Q7fmW09DZ5MH0>p3W6<o!DX?v%m6 z_-@dySc<&yLH?$u9oT>4A)P$U0=Ac?^3LwkWvl1Ou=D+s@WSJw2KQoVc0er!?iC!O zvg2-;_s@fI7Pv%Dii@!KM>RpL_dDlx_r^=^nz*r62KVX|qo4Is#<~0ieEc3~`P+k;kQhKNItJR#dRNVHE8{?G@FOLi(rjBqI7BL(A&Gb9u~`lCa2huQ zvHX|^QL4(Urk5OcZ!p4gmpV9;mQJ$uPSIPw7vV#R1X@Sg<1D|85OqAWp;N4h|IKq8 zewF*uz6u84bsR$1i!w}qmK}Qjtj9@~lC0>|IqdkcSX6&=j;9$BhhiPgUN`9Wl+y7q`T(owBoC0o+E*MK|rDl=!wJSN^SpdEkLo`)WVV8e84E?H=gft$5 z25)I5W2Gus_le^>E{BpVtA%|*^349&C3vti7vEJZ!KC^#;NfS^JQ6>va zNWVx7-4ej?ye&R28HL-6S=(u@x2S$kJa~DU!izt~Y%d!FT?;1L-a9YI1ewM_NZ4%% zxM{#71$-hE%ir^-Db2&ecc<}wQww_RYDT-8dQ7S46owq}1A*){^r_oExV`Qe+5Mh# z_!NfYl~e`B-q4o$+Pf5#5)Tl+>2);z$rk4D%1z`@+*OQM3%eol^Cc?lER1hUM{%1uAF4uI(X{nC z{)L5Qk*;u^!4_C{5&wxv)GxY|$c%Q8 z=JaX|erHc=xO+bR&_(-mYGEw;6!kV!XM)$&;Tty@XuFhz?NThXxb=gbmkp`a?go82 zj*CoOS&8nrsyh?N17E@?>eyyJ?MG6S5%1?=oFp~DJ^vDR@qeti;~S-%R)$13RA_)se6RfR%p10my90*dEcC(mQ1GjA?U#5vXT z>f|RmalDJ^Hn06Y(fh^cIrmLG2~rwETzL=+KdZvj{X#58zU>p@SHox(LexA?xhFT9`0d_Ey5A}Ap2;2MTRFGW6bB(@<`m8sE-1oOSJgq+ z&0hL&u_1HKbR7nLlfkC#JMoI}SrYfC7)6{o?#@Ih+&x@GAEX@Q3k3{Q>9}`f$>mgf zc-lccc_a=cuMLq+B2vV5eI%M0&tz&upOf0XA5i+oU&M@~V4XY}PrXe>tHmcF{z4Ll z?)!|lDr|6}E6YnLJpn4*z0)<`jc+s637Q||;&i{;@M!EjS@&u_lkQmwE2D}q=!r$e2IH|i4UvwAJ?sf_O1HnBg)VK{-@8Hf| ztr*nVP|DjKbPkq88sVgxR(P}szk^)|z{TrgQ7}!*s#1p>l#=da)gU@P&*hgkN(P+(r27QHj?62ps zV5byL=1=83dT+O)y!S~oBP?QYhm+mlj98!tRCz@QQaE z*GDTOO?w7;B`ka`-3YG=IR=O2ZjzjmOO7j;qMe*3$Gta(rW%eL_#+jXBU5Nr$U

qEV~LkrPUjw8|1(CG8I_(fwGfIuCQ*zLQU< z+?Z0{6WZZt#I6nM2D5vQQOhHZw01Y~!WMY5p?6)`j~R>UbF+yUH~S+NlUkVJtVv?# z-a^qZFZ4X2%riN?22Zc-!Yf?9YDL0I`ea`Y?EYZRW zIF-(NAHr-Iv0*Y2@4%v&BFyFF4EXa>8wIjMSc|K_(X_^sdG?_Kt-rj)oRwJ+I{746 zybQxK0UhS5OA2>S(n8tA-8_aR?018!JU?CzPg`Lbk^LHse^M;jn;%kP+J0kj6}4tg z?bT&R$Ry0%G(vw=0Y-Ot{hw5+{!uxoyfK%FUEshxU8llCg}A^9M{X|>nFVEX=5R1H zmTr_8hlIzR(|F+t-svxsnco9bh~<0{_RXec>=o}Mwyi8462AM>j~jrtxaEV!q84^Y ztC)$3XvF7@moYFknVdPE3IDX_v(`o`@cPek_#0Y={^8TuBOE)|`EM3Zuu)`Aq&cDM zb|)r(&qdhs;uO@~zQ}g@G-Fb`1EWxKfLPi;f)x{O*xpeex-*N%`pfh2Lk`0xa_9U@ z`8>8jEe~e|oFVGxIKQH|2J7sY2VCKvS$-d2W}>CE36KWsK!u7*H63% zcTslozoTs8g;%hx?;bw9PyvrR-0^9q9Xqg6^!l3_OQ0rV9V@$1iRnDrjQ1BlMN5|v z>@zxmLb=mObovzLYMB|#cv%k}D&5@tU?~V$Zev^?E3hZ8MzYouGO1!s3{DA8VuJ^T zn9`HstcTnfy=Re45@&Nx?kYd<89e|$J{NHP_FK4U`!Qygo(waX62}HkoWyPskzqI0 zIr5a%w=u;LIh^n49-3x9q6dDRW^{|Vyh&^dwtbRdj#@5f!ZW17$nPn?XH_jmIzPs< zu9wMYjd74DD&<&%KcSIr#)bRmFs|RGf{=?F_s+2b=~IKq*i?{dM#o8{g&q@p{sRbT zB{4P+_1Nr@{~$juAEm#_FxPUm>X%q6vTF>lG4ysGPB4j}yro$n8m|h9zqr2XJwJ3! zu3)2tKS00LI6gBuLjAcs;ZO71Am}W}lsRmJ9^(Uy;VDseQN=-Q+&!CBe#XMOZ!COl ziXolqDQv?>VN8s$r%rjIjCHyJPnvYl>neOa`E)Ku^!1P;>Fa!f-Q##TX)^X6v4-m< zmpBjY8*H*N$GxInghUCkkBKnzr(u+FmzV>^2aEYt_0!o|p~Y=pyk z$05ur3Z5)E+i))IIeoENj@@qH3{JEUFCF`ai~S7wb;(*d+{a}N(pKZKVO6&I*IE8< zZl)?7S3{j%$m9A^F?RB787kQnL=6M`pvUkOQJ6E4^m)yO>H!&Mb*jn5EfX+)lNi%pWk-|~TEHa! zDvBxXM`mLfga?7d(mTbVq#=t(j;W1~ENadt!@ zuO-KV-1wc17p7|B)ui9xv7WoT%{~u_f=!e?nT(IminAh(%5a{08#fhALcgMNVt68w zl)Glb{b_sgJ2!`JHOYjbZi2G;+aTYM+ZQTK$E97BBuO|Mdajn?uVIdpT|WlvLnLsi zbSWH3H-@7FPSCjMEeiiArvpubc-o=@st?Cwqt*VZMUieetq^^olZ%?k)idkt6tf zFrBaUK^(0`ZAqW!p@z$h6P7fH!5)D&@;vem<_9{GE$0mxkGM}XP_hD@x-Wp&RF09a zu@?4CZ{@mlCUmXOGU^pK0bVE^Mt1Ty-kl~}bbUHNU3VR#y@D=yr|UjwE}DWG=X)^h z$`DQ-;yOp3H^Jxwmt(8jPlqhBf$vub_4kKx_}d+fQpm;q2VY`cQ8BcLeMWW4Hc0jM zqUrtwevc#{ggF+xP^uKECFby2UWQ7i74p6unZe|pT?k7w`XM}ub12Y!d{;Psdemcy^{9&4)(gszxYx(=W%k%1N!>CZ068p991?jZBfT2q7iMiH1cC7gq zt+EqloLju`#u^9y^Dj$bTTcdcFWiHVf>KdI`Y~Os{E7@^HIm~kCTOoM1l2VikbhGh zBeWEWltKsY{gnc|rLiaT8~N(1aR60cgWl@nNj$egw}i0;S`r6Ijda3xf*Iv z)wUn=XU>P+QzIa$S_9(h)%opOJV+j$!8;&30urBRGL{nOF>~r}{2^GwQ&y>_uj9`^ z_1AE)JMjx>Zx3H@WeQ5{>clOt-r~jsu`vF2V|gs3 zH{DcV(q9L1C|RCaE~v_Gox7br5&H{wd9$e*SG6sbAEcwb`@kq8oi#4A!kzQOVD)Nc z*u%{+{bbZJs{Jt7XzYaDQA6;xIRQ?3>4JNo8X8!dF)?wpjKp;q z&1R#L#3xj`@Du81GI;30Thj1Z5!>|7QiCQxn6)(nnop;~#jZFoXVyTYMFsiuS&uz= zb1pM?xRK*#c;al`Z1VVXJW&XX#Ks8DkG*^iZscu9gte;6d3%-oA$jB%{(E5qH$!fKJa=E?Liu#0m~&d> zt^)ByAu!EY2%ADO8Cw%IJZ-NA!xG;hVjY+FH5ntuTR-8HeR+^$`IF>icwqA(X%vcR z;?-YTfKo5oZLKP6@%grH8Y#X8L)a(SK5s3MCw8!)I23&{3NM zDLv+RcJ?FQ>_!PTN!5a5%w&)$PD&saw17E#)edqNZ$J&L;~2VZ1*!Z!%4NevS;MXB z6wf%o9L3{A*H4vd$*e$;cNvuTwi-^0=0R@pHGGxzn{>1}Gg>x|{JnjaXjEo`8{6c0 z9tPh~Z_XvCIQfyDmMNrhR!iW}eita}ucV&KTJV(DFr2(yN++M1%+8h(V)Vsc5smC> zSk05-S6xcMx?g+XT#qLnebRz=@5-}wJs0U5o)%;NYCq4)IvuVrX(uz@jnd5W268Vd znBH>jqWs)+_<3A{sacqazjmF%YnvY6e@!D?cefIPmn^~m23`P}vIA;kP4V)^@3cp+I*718cZO@HE&+Wh9=c~JPUHOIjaP24}I4sCJV^t29HJ8Ag zq4WH!mYnzG4IkQb}G_QZ&A2^}45RNaI$l}RX%rkgKc^`M-GLh5N`<)z<))9`9 z#bx;5S_g3znMmgG#PQe)6aM8d> z8f6X=WAoYeZ6mx#Oa_XU%98`XOZXp$)N$58Fk!Ohqq{Bx(wK#9kFMb!_j)YDS$OBx zG0fnYIOglLm_L$T`2N{j@>sT)ns{>W1FL>is4_&WxK6ryc@Gs)+)QqNTE>4Tl}Shc z*@Lt2PjtN6VEZNf62`w012eAceO^PDz5Y@iKQp1|6dcR#YNxQ4WAao?{1)bK`b*Tq zS!m<@f(IqD$n5)haKmyR*9$Gdd+c$hU8NaczEEa*k_zF(o(6C?#&!~h5h8;C~c-04wIPs$vQMP+Md~L&gICLJ25XeJSLjP z+nFOz#jz-NCi+b{Kn0WT;?*Cq*yb6G2gA%kN$Vu%uTtSBzD&iu$#Ezo$>qE~edZzHbz z5ln)L)hSPo@G}mTspRex^_)s#?BVOh9YMgz4#tBeUQQxn=5JT^y?@~ zDr~!HANNoVff3z!?wq}Zhmv||+FgA}SUis>%iRUv2kNk`OZ&*Rp^0qt-c;zhc9z;V zs)6a{1bU&z1cX23z^0T**i+DBy>}7=nlsw@F8qTaa&bRII~udTH#XAA^e>99c?VaI z=`+vQiNYSS=cs$(DV@da0$!1+^j_Giirv53QJA>89il^`Xj>eM z&T{kdYH2VG9J#|=VrI%7?OQ|B{w$;cW(P2LD+^+YV?^9UpB?fM!G6y#WJG^9_&Tez zOFIt0&?Y(X8FGiwR&H^#xU8X`+Xdgw7G$2jI0~1|80^%Sz;} zg%&#l*d%HEyQG<~CijyzPv!(rDGg}W^bqx4T;rDvn6X>Bg}Jw+3QlQeVfpqxk}0Fg zKFwH5CFc%fkkJHaedB^NTsMIHG(O(!AEX&YJSZ$HgW^O9GNZ2>I-B>yc3(q~+t3Yi zre~1O+(ynwJwU}P-{^}#bzJXc%-Z{u(Qj)Hvc;QPV9t{B5cfil=@e+jS?a~Ot*(J5 zY^4L8_s-(4y*e0IwhD7QrxJlB`MkPy=5VxBlIWhliK%{K%<4t|_!?Fguy*ZPdeHO? z6?RU9!((xH+gFdB)oI1oZ9^=%(?z2PZ(;Y>`6%JuN(x?GzCVNtq3wOsg<7_$}>qPQlr6VKhBC5u!f{Ggf)G;qGi@ zwAyG$Vuv!((jW-m^rn&{D~r)y%aZzK#1S%(hCS_ECxu@^EI$^|Nh{Yevkzq9__r*4 zlQ@a>`F$EEm!;rA`Ke@=uqxeoweK< z<4ow^e9xB5YyooZD0W@jMDBwR1{9H1`KjE)I?`wV~-#-6>`1un+WzJN{;#_F8Q%7w7qYh-pUs?9u zNfw7!6+r9RY5WJ-6TsunIFSnJCELcHqtlZsxM902e7YjR25YSV|0J$A>%I}gxI35S zCGAAT|AtK1wRFt1Dc8Oo%1E= z)eiDK-^i2if4X>PvPGaLv4&ic7h$pwDnr(*$!xRUK3HvQ%iP;-$+;WfliTr1;Cf** zmmjMk`%VP%X9a9Qwygm>#P{LOjin@GP8*y*Bty$j{i4oKe>a#*XTa^X++H`Pj<-#$ zl3aOPNqtwGB#wS(h>>pwBxn91BGsIOMLr4Er(MIvKR05;;>EQ7=NbA?g%57uAM>_~ zuLq<2wfIsc1J5*{fb3QzW0(BNxsXOG!p*@a_SR5=imQ0tzW|>tj)s;2b!O>sA|wo6 zhn(UZ`k!31E#%ArgTZ@H_o;>Nr(?icytst(Hmaj-QU+!V9D#GgPi^=02{QS@BgAR% zF6uD5nQqZn1d%JvD5s%~Cp(g;sL=>7;6e-$N<52N+k!#rV?KlrXu*VoH*6%Nn((Wg zD$^k^!m7zV<^T5}1#(6_;jx<(Q_wbpcd<~N`KYf!6P^r^8C!*!WS8fVT^|XRL$k2Q zbwBJ}a*aGN@kIHi094ioF#m4@{q}klj)%SH9NDQjA^b9b&S&mtv2X>4YFTuf@)3tB zb8!AY2}nO21&Xqp!SF*U+z@zwF59%GTkGA$#0@F9`s5gz zkp(<8>1nXk;R~89Rz;FJgTB^eX#Xd~WXI=t*5xN%@*O!Z7uT~8a$RcGdi)JIywG5r!$%=)i4S(KI1b%h7p~8kyWgfgg2=z#SQ%E! zGteIg^|XDsWRExVcX=MJTyTey;eYt?*+jbi#1jdYrX&K9Djr4wQ*k(3=Lr_=_Aq{0f>~d{;PTT$&~M`fC88DRys{0& zWh&@sjvhUoVg?hX=b*vhT2y_+@#w7raEWIsB-|W>;Q=>hYtC!lXJ#w-hFil})nPhq z<{L6=NtRhMGBlQ zQ(zN0HjW#Y$vZr*j`2I3m|@!{dNEOf(Nm}eo4#^n%uk|nM<{;>m(X(_n8shtaVa#{ zH}I#vZbbKRNv7qs5Z1i@N(WB}u!2@O(5{%y_n8+5=O%?isl{5x-JPXB)_g-5ha>3J zEDqJzt6_sOxEwoU45&5)iB4hKn4P^d^(Es8Tnd9sL^|{+j^`;213GpRbMv$Ie2t*kkNe%f^gV zX(*%;4;F%opz1%np^qm8rV;?JceubTpBiFaO-VrDY1nFX7)Q4OFC=OPTYl6ImYN0D zTOLtnavn}+E6<3q745aud_g4d?509o?U#i9F~?}Zk>|AC?*$Eh@tbxD36e^Xf=NNc zc#~rXXRViJeKZwudFUW9+H`@mth#}j!5e_I-RF-xYGPdG3eef@$IhuMfTcwf*t{J( z$ZE?ga0B@e`g)tK`bQI#wZ4NNh9}~5Hi2BMuOzm?uVKz4VGuT~mghBj3S)Iv z7wuvRE4b?DZ3q%nWFyRF z*`pT}@jd4gR&cDMdnYvD1*rsE)mCX%O0gg3E{!Kb+$?&{wM^LR;mGX2Ta2nT1(@1i zP85R#@at+vkPLDmrW@55kK^&QZT%!Xb@My=lplg4NxihiNQ>+fO$1SY27N4L*+qU! z@%inWpim(R%hDEr_+lN_fn7cEF4r}`=KqJeNtZKeQ z&Q=S<#ZFCT>9m!6646I?c!bg8IjT&^@Og5z+l(n)EWw)nu_w$EeL%*5JlMb1V%v@dArk%^){o-u6I{c-qaRt#x_QFQ$7bz+r7Sn|7$<3(qYoS81@d)}A8 z%Xo6Mw36b~BH}jnBef1N#KGqZD4;|*zF7qIYMR7lDMP`{*pb)#SCXlEw-eiXtDxK0 z47xAtuxH|hv7ovKrRRr|a$jYb8wj{OE&yc?HNw!8IqVYOL>4q3kkH~%e3ml-7K09& zv-fzB9sj{0|6cn3%>{CN=XN5&?U}THu4A9=GKE@cZ*p{z1uNRog2xxBv)K=&nR_#O zXrtc{WnWOJFin657mM+ax)0qXqY1MNnqYL!XR_ww4p?e&8f)zYVU>#wewqFPuBepq z7vyKd`DM-2{e&br7#d~sNAn28PPhr#S+3|4B>{p03B>891;f9i#YBqQk&CP*J1CZd zhX$VGM5j95GifWr7rcx~tGJ%tiBE9Iej&%C9)x*&pI~(EFcd9_#>V;(s;P4Y)5x0+NHwe%=K2Z=DX5m9 z3fVqE#Ve_Y$1`YJ@*x z5`x-t+c2+p89d`Fu*X&BfspK3Qh$NK>!UKvNLVVCRrs*2`g+*Gt0w%69Quo?#vL}f z)C$xX4V%5tSU(Zu-bBOqMmf&^mduLn)_}XxQoL<}vzT&?Tv%}NINqM#2eMPofa*>= ze5sv{5>lD$7h`AaG4_IoTND@}PW}xcHcZFqOls%fPa2r%T&8#&YxW$4O|T9+m;9y1 zdnd!G-*Is0^Ie49O0c`3%eHgTM1FISFB`CZD(KgSV)Gw+?yfYGOJpbHr0yI zF?IS(@qcw-I^z!+(^0&zlV=rPP8b=06(rBLi^kP;3HHF zR+sc4NkN_U=%Cbf`aCvb_c6Gf@CTmkcn>RfEMj(ld<5l_AHkY*7WcI*g1XfqT>ikw zcKrAgD0WmQagtM+zLZzE>Ha7gOqkBOYo79kw)|>Xaj+Klc}g>81#fw^8sF$z`RkbI z&$V>E)>7G)-H@wd4trb*Nq^cz#xcQ#8MD}d{IeW?{``4ZZ>$faSyJr7ZQ{(N%ZF&- z#VM>Y-sUNmJ79gi0(&OJ#^zdg1UmJ;qj8$bRPIU*NmiVPAqn#M>WUF6Z`g_T>NCMs z-VnVv^?<-01y+dL8QiQnjQmVT@EEj%fJOzFYpjf>3l0j2aN=!<vtJskFV)K)_Xmpbzt^Hg!-vBlr(PxL#tFT2S2nrKljge%>)0k@F?NG|7%y+wAkqiV$`@i#Nihix%Y@LqF8JJO4DY)n;fj`KkX$_i z4uf*+{>qa?>yZOW?0iTAH&0^fIcNIG_$kcqG;YVvWWn1Brm$T2HEnx5LX`G!?n&YI za8>OF(RS6t@FfI;MiW4~EDk@-*XMQ%zj39Q99%b_4{tXvfScP*aL&Ua9QZmFJi=e_ zXKpP)v6Vk@Y0zyH>Ww5{6S*v#ju|OFBE!71`(WdkeFQHTO=YBP-_T24r{Ii0G$~$i z1E0?C;q|*$P^CJ1RzBB`eAKyy`RDX(-EjuVKYa{+y|zget~a8o)G8ieB9!| z{VydX)6K%)Ah`7b)DS_|RBs9BYVE}2wVmK0dH}(D?+LL0c?(!jMi5XB=Y zC7$qm*n%+gh8rS&g^>%%>Ck5xOWTUI(W2o1FQ)qi>@d)DM@d6-!G-U8VV=|`IiZesEj=<__ zJLbT`nP5J*5fUS|V$z()@FSh;?^@O1{gRnzl`O$t8`fn8B6Q%yrjsZFN3m={95{1# z$=ag5ho8v{-;m4=N)&D%$bbRTUIFf07#Bp98-Yop9U2bZBVcX5mgVSXt** zsv)n>oCxBxZZ4ffx2PCP7tBMo)Eb;(w47Yo91qGp1+bh|f9Y_4!U&MG@WJ$6cB z(`YQ|<6L`IyA@zYo;o|>4|j(qJJ9H^5ei%gfGcf{c=G#p67%snyfU5-`o9F&sm}GF z`17qzkjNLXzs@-2t)7;}T zZ41{ra9IFJjGf#~&!95nmiC==q#p&@BPO`?Z5h!|`c7x|>w%hSAftLY5tI6EVv{T9 zm#vCqe|=Ad^pYwf8GHhfFTnXZHdlnRPfls5#HXvx?HcUhiu-kjp;K@1gkWT2OZ-Ig02pr zt1ArI(~n`GRUa>Zy$;jylgCcna}4q+_j}G&1Gz+Tnx!(0nVU9~^IotddhG@H9T`p2 z6SJv{$5|LjsDjt=L%ioaM=To7B~?oOwE5C|NPZXuQO^x=+wlfcyIPvP5@Lu+t5e~z zO%@6q{Em;pLg4rIL-evx78&?11F>Cl?AbXH7_h~L=)OyaJ*S6x)nO~)@@_3Qr zny?I~Rk+jK{F&^^U)6B?tqS|-ei__2rH`wkOHrz*n~H9}g!~t8=?u*{$f=NLtZQ_z z{J9Y;=d28jT{{{%c5-~g7E@O!?4}O>~f?sBmsH@ma6ehj{ugoT1%EvqON$>^ybYeDhpxOrWm(2x@ zX=lhuJt4;1%?7vF3&5;`ePCU@lp$xIf@QQgW4u5br`bEh(W$Q?NKqG?R5*I-u^V{# zzn|2pp$@L*2jjg?Nw#r+I2f1?p`U9DOlv#IqjS`mxwFsk*crKS-a>&WXPSc0=?r=< z%N>q|-o$xvqx_uMt5kR9ESPe1DmlV2qng4@L2!3F2EWipy{KX^`x6iQ%WXj1uY+GI zSp)t?X&AouC-`$M-8C&|ke@Dr8m6B~K(;3NqGv*PJebUS`)T3hv9nNVahG0cs-o0F zf+k%%P6Q?g0zV*;W^x{P*3lf5PyC|^_z~B>%{QCo*%g=(5%?`|?Hg4a2TAFb=DalH6Jg@C}R{VcM2IQ67SzII_2){~q zgSpT-a{ZS#Hrooa>mEwcC{Y9Y`)M6de%=riRviF=WeZ`|9|nJKZYHN5?1WfP87gFK zhe|G1xLF~LoDK|zDTxaq@>V!*mR4fl7>2M{)^l0pGo5(;ZXw1#JHmUJcLGj`bKSl& zU6^Pk$vky02hCSJbSqqicf3r{!)p}YnvC&3HNGW5DyneKV;IW?PvaJ?GBlXk!#}rk z8qYK8iW@pFiL`yDb?xsp3NCDeJt1Z4Y);Z(dmyX&zp{UznVoM{z80gdJ4exfPU zH^{LoJEVAF8Ls$t{X%ed>4B`0M%<7q#oo$%3mY2jV00iDIC2SQ*$wl;rzx{*Ru!U} zkuEu|H-jk|vtnxx&ci)xKaoe%U1`54w^#b=kH!^!ARL?r;%Cm0$hII*NhpND*IMx4 zR{&jicLsbAY^B{AWAuqi3Zbjxapr}GARe6s)xu_A^jC=0FQV#oQ1ga^sv>kUxCarHIL zC*Ycscd5ZNfBM?Uf<6|?rE_1$lG$8e^LS7VUEVIjm>tWap0ngo@eCn`&sL#ZbrxY4 zn39BpYOvq17;Zn`0yf^);rkC?_~{u#H`WI5JgT|7e9Kep6N$mEU$Q{gegX(@)I*!q zL-f&?8WP&L3*Nc@!k2S2`TPMH5PW1r{bIJ!fw@)xW9Uj8YHFf*D(zBf(sTA-}xF`SOd!nWE)*lC6+JOO)j_Pbl@A72c zUC$l-zrO^M&gb*_inC>UM$o}8)3K4VE~VnI+uFRywyu^SUb@H?3M+*fdpKuxz>{ z!Ju3`-r)|PxTJ{|itQHiH zG8Xv!dMfVVYDnB>Ch^yNf6wK3kCT3yrpvcdSRwhn`<=M6?*t#N8OaZ^oFjSu_^y~u zOcw{hQPS4;S`vM(65nU*C7%AJlM8(&q%Wy^g2L_9^75fw2#M&gv4BB|~R;oma%=0EcZ+|w;`dD;s17oga5jEnJoo7HcV(6E_i0|?JAQ8GGdpY~ zZI_e<8GB76tK~ZR>Mys5uQ@J7T8A5Xi6e9*Vds*>-P?1}!sRkA>S|r-Y4`c!;jYmV zGn4I-rk^7u{k!h)rO#X?r@6fn_fP+k2M^csKgrHz#+yRP%hxrMJ@bcobFB5H&Ob~f zuVX4jCo_)oIThg&U&AuV3oje-v#vq@HJfGp(Yt&3iSKJALl58ae6=bhYksegq|^(f z^Y6;^qG3YbTRqI1y?O^fas5o*Qk7;2XW3)^mVZ&wYp3>0-W09lMP8QW?mTvza$h{6qaaf6U%ezG-rZ)6z=~I=o?YEJ1$h29-ixR}Mv=2)pCbuOgI^uXk35wFWA+lV6ffBy?zbuJhG!ide zKbQYnbVPE^GMc|;x~$@+6DD+Va9;J@_B$3izh4m$FN}rc3U}`|}1pDF#XBtD@#-D!OO`s@33%>fdFn^kbCofbkZH3-cjk=~yvwNL z+w_JAK6*Cc#)OpOFi-6jcWYUJPl)-2AX0zR51=uL_w<$FS*?d9UxwrOt@p6_`GJLe_h~=4 zr#bEtd6>uZRM;T!oAr~=&EF%|F4`!vuqfqE-l#5JU2;?$F7xahxqG5O@v4JhP3~Dq zqx=NvM*cV%M@UtA!Y4s;D#=~aZDc3C+VYtHHA0)W{{>ItuqlnlZB~{xcoCk_$U1Ha z?-k!cmW8)lem;*iUCEC;>??Sxmng|lyq_#n_cE`3s8OtU zd%E=Ouek!77QV!x@Ht-`uOfL&7f3!6AwNH~j}iN?lx$S}$V)0y5xl%}g{K<0j6XNw zpQK5i!<+bwE1l6XReE9KFUf{W6_T(;zxndL+xS%_5?;*kE3R^u4u6^CBmV}wUa$%t zmA<;_$4|tH(ml&2@^4t&;038H6PUBnl7odgJl}`MBw_ozBs(`$NnJkgk$!p;Ad1hr zz)Pz&ki3{%DY43%Ecrf9L89pAA~mxNm84D>CB1iUx4_hU3g0!;LHam}FJ4?y%3WIa zO}uXZA|A|D6?~2uDNQL^B(94vmDe}~`XyO94ec}V7`aEGsTeJ#KHxTfHodMZCl;>%~_?(y|rZkBvX z8YQq-I3!gR*h`OD&f-_iKOl7+yunW&Ia7Qv&p>M9VI#|mtmbaAR}}bs_u?&gP3M1z ztCLvh83_dY6{J7>H%a!J+>$gn=t?Y1Udi%Du8HKe+9WX-cSxUzo7l*GDS~rTIMOvw z5_!{d_ekc*@N|POWs;7j7XA!hGpWtBaZ&}%LaD-~2FbO;soaeo-jZW)o{6*_wFFnu z60xr7NnZB&F_IgRagtOS4%mvn^S3jYpZ$HjB<=iR{wWl z_(_*yedR>S59-@M8~r8WcisIfvMypGl~&j8-yocK=k^4 z7HnH{Pnh>d1J!U7fY;gk_-z6g?b>;R{P6gOgFf8HPE-M9%R+7yQ|5A)_hf?`xf|)T z%_fZSxH`{JJWnXo6f!=lr6ge00uHom5Pk8Q2=>Q~LC4;kpo7&h>~f;OvnR8&vtpg}wRW@B)qDo(>r7;>xMl|AQG(9Bm3H?szbMDz> zqQ)5k)J^jvc%%0lD0UekcDx=HSgyh@p=#WTCohA+U?_o57?ms|tpn9prF^Ug8DjJ_NqMhy#{i1L>Q;gZU5h zSdMOquxGa%?JWH2bm5399O!eUQm<0H{LNRgZefwAU;hzkR*u1(=wxa#_Bm%->j>0f zdmD6jIl&=EZ}vyUduVYYPIOxGiZ=U262sIcR1Y#x*Q7+M^*4vxJiUf3-MfVR{dWSH z-|hf=W?I0mAGt(pumvy5(2?1J_mh}#D^ie|L^kal!QECn1W%^qqQASG$i8s}bY=1) zdg8_-qTVoyO2)|}t#eISF+7Xm48C;Q+P;<9rq+OWy{dqU8b@KpEH1mD^#bP_PmTl! zEk?h0#v$`!32I4;6D80zpg(#I`fSEQmSGKWQs)`?h1UfgcfNyZQz?vXeM3IIvmrZF zkFjam&deS2#c2H>Pg3-@3kCP?M;C<$P|_haGWz`y^wv2S1<7qCwyW%!Pfkkc-Og&{ zEVQ8W4ie&S7(zaBQ`r<#bNYDTAU#)w@dR)T(D#Ov{Bl7a7jyBPAU(q257g%SyK*}- z2ssyW1ML3h%+TfG%uJpkF%S$BmC?!!?5YP5_CLw79B-07d={+;NrOvP$!5JU z0m}D|Bu%%*bGmiLh_>)rN$LFz>@j5poe?!rbb0SpRN3?tq|AK)doC;_L#uxgucN(m z|&QBRvSGll0){5IzU;~goaMgJ*i{;fwWzn>=}T{rmaiX$Gb$wt}xdWGlz z#GvwCZxR!YM1?NLS+%5CV65p)?ImW+*=~&Om<`Bd^cEQTa537RIuD`YYEG!@bL2N~ zPpw>jF-Oj%vZhT~* z`@T+~f9Hzu>tX|VZ zCBxG%5t9#zz}h(ydVM_$Uw5d(2OHGUf5ETFh20lH8~`g#`z;05{=dVc_hIIOKH-+;DCha*gZ8 z`#a_VqeuS4^4?}Dtj>nMOYRcOr9tqn$xj$iJfdcD5<|MQOoa_56KQV53*p)Ir-|eD z6WC{BAvvy|!_l26C23cR;M0@G*qVd0aqQ|LP#NQc-S*A``F$sZ({J6vjjJx>Qt^Le zPr9tubOf=R%M)l%Ac2xsFW}=sby7e5DBT$?U{a2lvu842;G;9F(WZ#WPIYrE@F;J$ zJsMGJ43|{7pk4RcIiWoPsPgxG;i7H&WUR42S*KM(RLXTh=9BU8e6s=4vYp52PSRpb zBxhLfqoYA-|0D_q_TH7^fz^0#^ z;zK*BZchN@7TVL2f+!HbTbCr(rotn!=b_)27%(Zwk*u)2=d}F9cIJzTISM;$3akG; zhSP+Tk%w*wQ7ciP1=C{j;o=Tra;6NL?TUsY9RC9I*KM34wuR_>bdx%zhmc|nbq?@} zcz7&ZtyD&)e>egH1le%sO;dK`B26@@Y%=uMo&*zO+Ck~Nm9S{fE&3_D9a;M5!s7U7 ztTKNI$ZOVxde=J9E@gSrJF5y`QN0U%dnUlx&wZe$+kU0<6L+Q%R zoLPnA*x-)4L@;xdP}8#%W~NlI;;>KniDV3^&#B^U2@PUbxarm^Hf+KE&j+qq@3EzQ zeH-D#t2VIUbp>-RxA35hybgIVSfLBL6Hdd^dYULoc-!fy^InwY z6weH#JCiko=fJ@7AfoAL41-@g6RWhFB$pY9^!+-nMAb z19F2MI++Q^PR^nI#|2E#+#x*Ua~g^i-GI}AV^Ls#7<#~s;Y>{$Mp4HPaXu|S4WeGT z3#B@fsrA{1q?(jb3BA8h^k;3gf6(Fqe2Jd?NpRG^yMvD|}dO zEGg2|M}1qhf#=W$Xm1@uv#KY7w&1m>O~ne9Uoa&5yl&Ep)dgU*O*?VBD=9 z-*b#EO=PfQCa3=2ZgQb04eF8IP+rXe73}SBvNhcn41s4c^P7x-e0ut`_M0h*; z97)j4gVqr(tk?nIs=InHron+^O_H7exJ21|{sk*-yN)%-@M&y;78&b!o?Q@AM&o~d zgSIX9NUQS?v%F@2b-meSMH?jzJm*nEX^v^=?_)l2ux-L`!%RR*@e?$ zScO2@2<#hj4zKXtMFh{k6Z77&9IlEQYydNv6K?wg2uzx;#g8|6{+^|4es?yOLAR~}vNKM?}O6?A|z z73>+FN(u~jz&w2dRc<8!f1fpky>qbk+qnXUXEz49-4wtJ9vZLh z;V=zdDrO9Evrc2MXf9JmyXYq*C-bL{BTw_Z$3hAydA_LoJko$M` z<2Sz*(Rw2_CiE=BJwbRt{;en7?XJNLP1oS{ZT?*AKXMoI$YKwX{zwqYw>OB6lXXHn z#UD^^+Bj6aaR(b&p}+(mVexV2b#y3b620>=2;9z$#qRkQXnooi(weA@w;ryA+f^3A zsNHRj&SlQP@Z2S#!>7?Gt#btN`C&(AUoB_dD}p$eP6WcP;`s=})T{d$2)NN*Ykux1TCpU6X1Y6rPYz=62+AS3BoP_RGsOH$%G|l7 z6v#>MIGJh7G1>E{lK9iAjM2+9==(2=gs0?Dt?_e(H(iaOn+C&7U2~4G&7a{y`Bh++ zG*l#K(gg!|e5%bi{Z7m6C&N=4>x6c&VnKn91t!-|*YxaC$6Y;FC? zIo{@huO(WM@1IJs;>2U%6ZaJeSa*UA|4liuN`pjyPOwm`{~eH9w2*QNIfPYxMO>2# zL{a-+f%5B#SnFvr)%Uv0*|k?5#jBTr=?2qa*Y|~Nq{(AwVI3>VpI%3&x2up9FNElI z%rO+P*_U<|$_!;rma)GMTM+fnhf)63Y+!xU8A>1Ik)*U_Y@Vpej9qC$oxio?IL>(T zURjPCaX=Z}HaU)Vov*+%71C*a@@zVMTuk=7?IExKRl;<~biB7Llc{_-j-$VR2eW!> zC*C|a9*)u44flIGvEGl}IIRPf`145{q*qa-XYtSsztb=8D z`T$JwdIDQ+T!d>}bK#^p?Ib9=AG^!NvoM9rxNPx4b-Nr$#*$9t_Am<7Ewe*rTDNik zGb`kxz5%suo<`(@r!m&DS#-_6JQR_pMe8(T$#Hu#@})V2HMTXTYu)BilL3mqtCVpj z?^U2{crIulu>cP&W=O?;dtqrbkNY_A6!G#)2P@QOGvTuX7_n&=?lNj4yIDnsiO2)S z!XC2ef;!nY@f;E#JON$)>mXvKQW*SxJgxUxK(fd3IODPuL{TkgN$?yN>v-tV&Nb$u z#Scy)-JxpGI-CQe_85`wU=_-YYNA1{O58l9O`J_-N#yp?Sh7J^75;4;g>KxJgBNFA z1mchYFg@og%2qf_Tz(j!XS`N@8Yhw}UmR=Md+02zt^Y(@9*&5gi@gFMK>QkoCSY8|jX8C)syv zP}Jsr{CVCP5dAcb)UTL`I}B&i7oF#T?o(6Z5zR-DAv3{a{o8o`>D7Qszrjey`RK=3 zYwq)K9iVxBI&Hc*5AnW!!jl|Lrl9QwM^0w<=F(guiXJ%??-qW60WW{T3;+4E z9gjbtq?vnRgV=+f8xc$9#Tx>@y9>Z-G8vs8nnPULAdXzd!*!oVLDS`XKx&yBQgOV; zvGrWXfxW|cNmVeNa5jZ499_hTN>6~_(hngm|FYT-i+D&fC7C{xm{9a3gfZTAo@`E9 zNL%k8AY(R{2+NlpM>9T7pmHVpLaAah=zUQISDUAj0+K2Gx~7Mz3?v}-xdWK!{|Xsu z-@?_Wo+D1s6}WHXEuz(vMUOAK=G1mI7xX{7iVh}TC)MU#$jM`)f%{T-QLW)b^sQS7 z$7Eln&3!7+^Vd^OsK<8lWs3;vPMm?`)FDYcH3^l-k3{l+$3Z`}Gx$@fHg+oJ694%u z-rcJRejQSSCVKJg$=L7U_t;$0yZ@X}+k*oQgLcr?8y@6JmKN<;c~oRsVu%$h2Jq9~ zO;mTGH>c*OGIBKz0++vTfQtEcY-92Ww81w()Htj`|9#$uORsJ~>2f(}?~jSlU{eA& zJIR+VQK`ZA3iT0go{l{_-JtW^OC;w5pYv^C2>rCurj7;W*sJ_H{=NisW@)`2i&J~ALy-5MI~k1sqXu^qA5o$Kx|Po+9vOS7%GQ?3}ynQ?cd;eNfUhP zmIH^*s8I!0mwaqn!5YmnVuW-aa?qbmY>nzr7`p+5@1Ku5pC9E&54Lc2&Od_ZY0M`s zNu!xcmn1Ua8;Bm?7)#}Zy~4j2!m-_?7CS>UL96Nov(c8$OVhYN*fc9UHXnaCW#|`#p!rYY&bQUr6?-du?#7djD+jf z?t(*RbIA;KN2=L&gsdp-W@fBRbgI%{Ocd)YNI0zlMO}00o4MoQtfgz9bWR4)2$lW2 z?W@of8#|PD_#<2qSx&W7Bs8lp1-vr9$jGB7$}K#wr7ZX_v+(A7QSpg|2mo=&k!SY4f!8gk8J`~ zke6Dx@K5hMp?|v~Sh?^f2s?KJD>aTrGd2(6S9SY|m7O8r$aTR5tBxQTWX6ru_7k3d zph9=fs6eaEzTxIA?{NC$K92Ffy@UuBtrdzOJu9EfY$H4N@jQYGQ2BYWJQ@P^$bkL zdpv?LqjLcLyE2k)RhUC6AC7l2Lvvy24t2776adz$oy>|}F$lOG;p7x2LWyUp0QKoa zYOliKbZ#Q~Bt1k8hUQ@{wTmiRy*%4VHu_dI~FC7r;B&n_#KvJ(l053^nI<3afW*!jfGpae2c7 z@TX!cYa&#F#Wm&REz>Ey(6|a5wVXp|l?qAn>T1&cHcrGVh!utvRN z9W?{h7oP@?Qe9zrb22e1`A}Fe5XGJJmx_TP9`m%kJD z6*0oV;v;Z4bQy9hn?)A&%oByRp905!2ceD%RiwF~5z2kwI7Gi{f%Pl@aAF7#=2>Fm zk(MJYb)Uy7?$clz=1)Vx@j@I`Mp5+Njp(wWF^m%27b>M)hSkGCFfoSWXHv&Agc60o5j4!Dvs6Y{KwbG#vTi z9%&et1LcI-_#$H?v}~Ix)D64Oiatci@Jb*2-h3?W@RS$*d1a4g>pTG|$<9#kMiKV6 z(xf+imQp+ar%brB6&M-mK`zh)Vr%*iI4MRG3*!i2R~!T5U2cI0SwHCjKEjwB3=gdn z!G4=#RHsIpR^1bUD9)h6i_;(2Sw1VszztJaxpg}6<1ZI|>plq%-uVc2|2zP^H>?z; zWE>G{&|!}1mB%1$o)$GTdq&(ot-$HhIPT+j7nmCG7`U&`1sm=VoYZtnIK!Zwqfn)S z-%hy69M6TEp?U8~kFbccqr748q@|qt&}n$#5^X3Kl;h~U#*ak7?rTc+lVo_d6K$5w zN&R70=-yj?)Lg!SM#jkKKXY%uWPdI4CiNe@<(xzcUOz$MyaF7dY)qnhT!rA)Yg+lB z7XO(rmi>JDI=cMs5m#mHQ>K4C#lI_-qfy#JY`dH%)P63a>CdZZR{bazC%m91%`>t5 zqGVR1dTcFg5=SbBl#u-3C6uE*2_=FyC?2yAyX$4LLB@!!Z5#)G>8*uNTrRS&RF{e# zpYg@Zt;^X5Ka!A?7tDFLLZACplM91BbV2{JDiU_|G!maMpfAWJWO0vx|K8-H<$m7q z<<4M~*XNB&Oi#cW#_qKqU(eIZV1mbLodN&t{(%A>4uGp7WtKHaA)|L*rgO*YGZQ8X zIU7DGqMJkSkwJtK(Q$i0V(&xxegA%Hv(T67+$cj9Bkw_Pu81a|8wuO@9)l6%%HiM` zWoWkIDVP&6!bquYXWJpXjF3RkWN71+cO(lchlCXA>E6iS%fXcIT;E@g+ z^pzm6`k69{QC~#|8z&>}q{Fn@evmO6t45jeBT075W;im}5X-f8Lcfn6YikF(pnGa6 z`gLF-Dz9l`eleRFxFmz@(d(ce-y5NXTT#r-mVDMlwT*e<{tiU?)>7|m6`FT@m2lV3 z`=oy2UU={npjj6@q0{_*WLAKz?w@`_2Sy>NrS=rLT|G?_f0x72ZDnx4crNkV`Uk;^ zLHhLNb9~58iM?$uz}oLhDK?@crkEoGo{f%zVy;C9F32Qp8y*mI+1)tBU(Xz0;8vS2nkLd(*Y31Jy%2d_Qf9jS ztvI$l$v9>09XuF!j;QqQ20r$Ly&Sy&Kb8LggmFhnLsIx)+({{ATj6 zUEzqvzQWDW3jQ%@t^MzZ3kq^ACXC-_T+lR#3w#`z^l{0+A$26<9<&*G778&jZrku3_kCS!qb+W6Mo6OfEK=-3935wlGkpUs6n1f zbHDE-M}NJ;hJ`2LVGo9Jo^rf2c{s5-k<1AP6+xc3%+hGj2-ay>a|2bLT z-731RqfNfP>xNr%-$JCQ#h#jZ6Y_?*>h7ZlghX87nxi3qF_vDDWDE0$MeN`HA`{ zeVr2(v3H49+brfsg)z;u(W1j2l~B{qmH5Tzcx0@mOqY0EB;WcU;Xkh-Cw%cRIW{f? zKDd#`eBPZ2SffkAuQUS%L1(z^ZUJ)MgQ0l9my~8YQ@b@mr2E$%j>oVcBkW8NGT9}l zxkd z!{V8yB*MuEUQ5@9zb~d>Pc>&eu;LU5v!BAYvv2UXQUiL%Z!GqV#~jmMYZ?`MhkU$} zMAGlLh@?|uYF!pvlZfNWbaE~PCjW`yul7dJZDbGEjDAl&mZZVHLNifmu{vv%yB_SR z%S7q(cnHc5Q@_|L+^cCPS@)(eQne}#3FgfgTC?B54wG7Px+NVy&ZAJu;u%arDPcrzI=&7pD2-j zn*y{lQJyi=Q6pIK3EKF43u;+%5!q#M$$N($qFAf9M}qoDhu3Xrq?BQh%aiyCD8{zUFHTJRwpS?k^= z{gNbd(;t!U|7?+ZUM_Uov5Zvo-hm4@$iREYowW>YueN1l^z)cWlb&%@XRalL4IoJ^9#HwlKa^Yi6>LAF zOf(aE!L=lJ)ET7AHg7l&(>EW6yM8;WR^1s&tZRWbHy`wUrn5mqi$;`X<58KD)w_x;5~v z-a6E|#F0LoA;K2#58h1F}0jTl{Tfk?7N&mfFonBH8N|^4t#p>(M5$T$o2sjHO*-hW=`^BwtYMZT$HUz{rDLu|4TK!V0(#Ve@-VU3VpDn zq=p%*%H;;%Y2oCjy3nErmzjMh6?nft@PxML5uN-@ZlE zifcrooTf36oi{jM_Dg7geJOk2*%C<8P2rRiGf>adMxc5;6D{dKLN^Q>&;^yr%(90k zXmUsxU7UOjHTtf`&G*hCy{!)PrS5DJJH5X4i|$D{Wz z-yttSv`|q;iII=J0t+LylCEE=^s7NAnRQbi7AfCk-1DXoPh`u`kV2qUxkhy0&_!5v zX+Gyc{3|La&G-@7ZRAQc^9`8gX^+X$AM=H`R5yZjw@&6*;1r~I z^%AP`91pjK)X`l**0f+!1F_1mVkh5FV1D44xMNBxQ~l3TcpU6tcFju13%;I%*R$N= zgrXU&*5ye!Y5fwcXDrWnm>xt=ntw1xb54sigH_@Bv{>Z!%?b(Azd_t~3#M&-F6(8U za2keWq<}F7xT?O|m02mi~_WPI!-Zvw~G4Xpc6-ZXb~()Uimx#(J8x*=`jI zo46JCnEDZ;@eF?cbpzMWT3I;cd=MsH;WByCeVLovFB8-AnC{K#K;_4S>Ez+F#H{c? z9HbhDyezU{*69iKt$s7?WB-uDZhL_+P)q0@U@C%d_3809`|+&YOsIC#NaX1=o7sEO zA3DztK*hgX@LiRul+QiKP8JO^QC>X9YwHLZdYDDxRp!CG&q>Vc-DYs^orA>b&`U4| zRH2<~jlw9IDs<&-DO`Q$ zI-aNf4V!J{up^)@=}Zsdw%2-srnH;X@9=asudoQza({E=>_Wia^O`WeH4v&uGMN5t zXF+Iq6gczhJV6j3=2JdOHbvAf$CP>S)1E63Yr)j~U0B0)9~88Yc34utM|W1su*SuO z!u4feaA;PH3@e-f(}dONlG!HIt*t7)-I*h*_ALDfb(@rpU6 zu)U7pPJCNvqPCNy`Bj6U5P2DIa2h%={~~_SH;V*$=W*63zk^NFb|LS>85fz0-;S6FoG#kHm$61b@lqNb%CZ7kQ-1@&R|nyV za1Xj1r?9SA%Gskd1{OLxAvX&{JWF)~x^;dlHDDf+okRYN^guiLbitD@U)+Q57aHQw z)&uBygC-3O9Z&l9qFTRuyI@#t2q9K_@avYBjAeg4D40VyPs8q_#F9#(-j8myXIUjY zfX))zm+{nei79Edxeca_c#V2*B+BN(x12pfT@W{8HB9tgg&%x8MaL{vg{N-5 z=RBJ=nf!gMfYvWk$4gB;iJz+~DhJ7MXu(S8JbxTnBz_503i9#2;t(v*+vjxRQWZeY z_pn{#egeZWm&uPkr-f7FdV!}LqNVHYN!9Ew(r_n1gfy!;(xEX#8nv7rSDDV4!*oLH zpGU!jqvK)FEgLq;l)yUa8qv&?_EcQzfn68wLN5*{AyCYvj#H=9zx0v4&hUT>iBPOnA+%vK`Baic$e$^K^yh9W89dv?!(KL3$x~2FIpNn}4 z;m~vU8uZos5qXyABeKAIK>gPJ=*VAVq`$rw#!JS4+u>s9;Wh($FB5ApWnLU2JQ#qA zcK6Em`+Yd1+!@FAMzIgN{*u#Xx#Z6EC|vPI4NrJ|hvX#Md^q87E3x(+ zfeJF$l1|@5;dW1k6aTV<6-A#R1+!KOU%o#^)w`5LnQP{w6sZ9mH#HDO?Umw1ZaVbq zAzNB7{UtLZe>)=&d9v-gH)~I2kT=@}mKx1jH|2i~jArNenM_at5rE zX!6`pTy#kbPWn@hSKr03XSp$2lbrzFisOjy8fEs@izc=#U>?i~iiPc~N3maihKSrA z{pAQ;IjsHFVJOhM;*{yba9x%}z;%g`4Bbm7C!dXFlvL8_W1G8hg+($fhEvh}r}LoU z4>63Lzkr6AJ|S@Qb8s(YEDczvkCh`@$SVUqG}k8w+b-Hn3skf49up z{V;@9!K2X++>X1RYtTuyZJck5J7Ly#ZT9<&jmXga98A<|C*_LXFm-|sz5BokMQsR& z9yuK#>Xrhh@8D8$)Lw}^y0wo@FNnZ-ah7<)lYFXsNRFDtAEPz1=5YEOj=)S$9W=}I zJ(#=^knyKRu;uxw0IScVtg@`OroChQrflH6wrK+2U#Ft+7Ce&q?Xqxx*Jq-#l1oyx zTEME+MI=4#6j99j1E&vWlfY}XXq%lo3F>VnYF7%dgFKhyB|i|dz9Qrw9Uz>pJ_@;3 zt8n&Ti<9;9im134Ah|a|`23iuT*doqq4is9(tAxCd0XwmPn5fG|A%ZC=#l~2vaXJxN4{5Xw^oJRZJ{{{=PYLH@n7i(;|pDD4n1i{u672(YZ_oznHdro>}8jSw9is=07B9|`KA&W&ZSk69`Qpv?I~ zG_$k}_H334uW9%RLqm;7?FTt>s_Y5<+gyjkI!D0ucll(@yb2V3Y8Es0j|z1^ca1qP zRmepGJHjr>!%y{&!sgx_7_K!BdHd+mz+hEi=;4Cu`!&t{weNo@;| z){!H0n&U|7u(OJ+SmuMque!<{Z#d31&bW+M%)U-d?_6hek8b1@3?yAMPEzEqpBPJ9 zVwJ#2U?C3Zyw1_KP)BpyPoU?dfz>ux$!MsCQ@cz|)72i)%9b%e<8U9@e%=x_{~AeW z-P4C52TRbZoPIb`T=uP}F@23$OBi}F&P zscFXufRX~ZH`fG0tp({E-gj4Kr1c4aMI-R))bWT{I1Xfw#khQ3G5i%n zsA!`H>TR1!YAXe>mp>W4zV;d)ymS#{u6V+^UGj;c6!@QMeYnZb`iXPHoTZ>cQ1gxHHVS05wO+eb2r{w(}-ydTC%!%2p{fb_o#Wd^`W z+J&A0%~LG+dDVkkUa5zkj5bGo*Hoe1#b=bCkwc%(-by}&DYD$hPf_T%R?fura@@|W zYHT{LmGQYf3%?4x4_m{v;JU3Tto^o4ME!yQi)+*wbSM{nK6jNNksn0z3#UQ%t|O=i zEk)zs&xP}RN1=;jwb0h?edOEcMtJ?zSNy~HIM5B~WuLiaF?s#xQPRrsM3%|O+}3`E z!cEMX?awsnGJAVQmd(v1=S?SQSe2-E54OFRgP` zV}sV(kX=4%B~~y9U#hkqe=se zuL@hA{KWi_SW#)60uyy+r&E&cXq1x?0N-|pQr{WpSQ+t^s}WR%_igSV-qVxFq_wG_ z$S9O)9oym<5Tz@8S2zRK#c44i>F(_2_;~m!K@9u4I_d8CMkd70mE=_hU@3?Z4e>tV z?C1Hwa~4A!Tjj;6a5|cQ?K7yi_XYXdFGO<=%D5|8a`1nO&NQs1uZ_daQX!QjNhOs` znQE`$BoQH%BuNM%BuSD?l{9HCiIOBqLXp~Qt$mVIk|c8?%8(*7_$LYPdB5&Y=eqVf zd#&|6_wRnb_I&3C+x1uA6^cI^1&1UlWm`>{IGqb{%Sa-vA15kjvN56J{&A z(OvOLBEQ!`bl>e3mU<#s7?5!3iesiiPs@J(*I*7Tbku}?{|;a!R}Hwn@;tc59Ru&2 zol(%ZN!W~th-;@memNpg)K^+Zzw}K6i@n0=G^0<@-BKA<=sB>w#k0xSFQchr^EG^V z)n~{*ip5?h=g|)zg33-@d@4R8eH9cf4ijcZ&lJ5{@PMw_>Wj4GVIb@M^uqB7?mG2|civ1!sN5xd{_2 zKPph}_-;kf|N_0;9K$o#l2NPJG-xuSJIP(YXOk9U3S2(lk1@EwWT!hUb(Qb zCyUpMdBwlGvzE=+=mD)Rgd?kTDRO(+6sXA^#fJIc*#`MMHtkU>^7_}p#nzrc1v{V6 zI~C*E+`)BpL32L*{NyM*+L{EHt{N+Hi8%?3B@9o%4~n)<8jC#11gy8=04eex1d4$h zS+4E~mUhw*HO`O`)tQV!hu#EXJJlteQD>xBG0%qm{QVcGq&;O-r^8{Hu!fl4%>WHB z0T~*v;ia^#ISh`{%|Dh2hH2rbtH+l1^>)JW-w?%WN0a|neFcH*Kk}hAiNfps-|6Z3 z87wqT1Wqs8Nq@YY#LlW$GT1u|y?b^`d^@iia-N~|U{nZ`-5&NL z>%ZC9P1h9mt;oRD_t)`%lH&07?n}TlQ7G7pe<$wKIMbZi6~u> z^1>>y$^ms~nbi-wrH8>tzieW4=}zfPhY)x%?G|fu{s@U&D(tQsCaFzd5uO|}$HV(4 zpoOjTpxf#{687pE!rQ`e!K67X|L6t0-uMh|FfgExH_pT}RvTfDi&Jrz%TikQpo>-D zyY%aVOhL!Zk}mx=Pq32Q1ueD*(HxUr`1jNa)WXLB-{TFOx=bILyDdf(`u!+6w#pQB zZ`py%9!KEJ-svnQP+k~$F$X2wxy596{DY3$-N3(9YA7u77|}bx*xq%@Olp(`lKab{ z{1?U?Kqru)&Vu0Yk~t?ztSDbtwKt=hNx~{ z4EMRNfac5}PK)m+f^zEyeCj|xO{f$|qALeC?kPuBH*W}~QAW%zCl40iAIaq7r-|%$ zg;SH}G;a3AC1Qh{6WM+9U38U+EPCzTD6XD<5`HnfO8S+L@uyQyh;zEM`CrX};MMEZ zBwA~-5S-a*k$5Z~E*)qQdFC|YO`oh8^pu}6+PQqfwJ|bq*Sb8Qb$BH zrX0tIvLvD3q6gGvbO`t%=T6(FDWYfkW0Af^Fl#aT$#1MPrYWHhvBSPUaOA5@Y-M3X zJB}iHHzkH;c*=^LI+5ib)vH|1`O%^W zKPL!QV~=uBR=TWwZ55a8p^B;|9z~7fOTslROMLZNKXci!i{&^1fvaCg4#)VMGKfPGXuW@%AUCjqcL`RYUd6VI z-xdgU%(78l+8$ak@)#&seirZ7mlN{l5LTtpi)wGa;RcsoL5q$Vvaui3So;n{yE=bC zhlB*?y#EBWJ2G38M@zxnW%Fsm;$HMjGKo~}qZMx1_l3W*WDjt79nZ{O%Cdp+?n*#W=nPDOwF&yqGU0s6+I^Ssm*!T+8NTehfxr8RkiATJLp zo@2|9Ln#YcCWR8$H;el(HNu{|zI2F7I@?+!29M8L!EGCDfZ3i8q-4#0;pt0nngL71 zr@W{zu_poQe_xKdz#6z%sa%LTbwd>Nz*#)FN0!xYGQ_eqHfVz0RX8I~fxcH1(+h$V zuAeuQZM}aBPE4GR#>A?Kf>;ME_&SDGwrinl$6HuNp*tHeImufrbQ1!~tg*s@r7%J7 z7WPfJ%4>C9!hHH=AWLLXuK#?{^v_A~zUfcgLcW2upRBNL>OQ3TX_Dx)S+n@5(gbK5 ztb|OCCqa)hA9;`0uec*ePr_+iuCov}6#bnP4PAB&MY7Iy!U?tx&rDg0u6t=h$y{pO zY+Qv>XD8xuZ{{(B&{7=SornFenozI%E?C*t0?(X37mwMMOsl%3g-^%c(EObR!li%+ zns2yDSYUpd=vPHi?H?7;C^H8|UUwoPJC0A5WZ<;y zxvV!{O=vP|MKO!-F-yZ2kb9Q}bU#?3f#wKOu>28ocqPl)D<<0wv5 zn8niMzLSeblg8V7-lQEqm;Alu%@hq{?zOh=*!6>C-E1~gS$&r#YJSq zwMZ~cvjuy-KTT^~ib>lAE2z5p1G){$gg3nk?2c0@q`y|M8E-k!FrzR!?3*#jdhI0+ z`>e&AZ=pfeX813=AA1%lz?A)M!2D$}&01W9OsoLDlWf2?F1U$3_LhiVj8dWI zV=GAc(@eQjRu+iPN-+dK32zu zGV|m}`Yv$=n({jb->^C2Yy5I-5mQ-H&)$7rhMMX}kf?1BkY{!w^X`u1bNrqG53fn6bEgj5uNuOA zi}Da1)pmo$p}JhL;v)PVoF^|PcwhzB*)TL4(mM@ptZM09qVh2UHXVA1ZJ*4dWa2Nh zdEqU>G%Li8E?WE^c7Q8hVlDXWy#OChtpfit^vIXWk*Ix@5pYocj})vbhSq~AtRN7f z%F9Ej$FXVfQC}L+>8vLHaxcM)7cP=K#8;tk$wS_B{8nZ%tGm?kz)UbKBbB+X-NMoe zr_lFfyf z8}xOVfW6*}MY;cs(Q*3~$hE5vyjOh8RxM3nefy5n=s)k-!?Y4~rFsGgy!{ty+HcYs-SRAET;9G?op) z-pf1bx;_&uQl5(U*H5NTU@TVmzKevTqj0p@GL~@U0P}6`p>fmNg!|pA*}TjBEc?S; zsu(j&RH8-9ae;kx%sf&d8>GM*niZwX$%Pt z3Zc)3go%OXA7rx6kwoqM4*zbqgi_whPQI^*+(h&EG9%v7wTUr=M-M;|4 zd%5xb#-l)=fZ%GNhHH;Z5GDpzKoGup;a-u?B@NUyHxxs*w%9DR_Fo2dFJ+VLeGRS=}T#lJIw!rN3yB zXk2y;{p6VkZB_OG=_%=)@j+=p`K$qrqcI}K?+a+u<_GNgxLrbm`c%4|H-ew5`}yFG zWcd1;8a#Am1rEE~1+sN(z&8U%^>WUk!7pk!deuA@HX#ESsAh?dtJ#x)8Yw!vp#)BO zkD=GQ97*0p(z~@82O?X_$Z3y8Ec4_kT%bP>kKHp8EnIM@?9DFevdG$N{PzSZSntik zDRR%Lo+XD5G-x96!!xLA-y>+C8%N-W3?#B04zrHy^0m&Z(ZlmYna%Ed)KvZ@shd9^ z$#NTT=i_6dQUBd1dZlOCK%5`;pSjHPaOG(JjxohKb+XKNH?>%BU$rczt%7&ZS3{l> zR!~8qQ24WDK3=>jpR)YRtZjD$Te;>gI5zGaeHyQU8cXFF{v`|N$m=7OH5Z|Fi5wJ` zml35;D@jjg6e`{1!uOmtWqCvY2=2?OxW2}H@KK-=TCgn?)PKxHj_ns%kNQ~lK(`9Z zY3ye2C*EWW7u*}pK8l(B8OcTTbpY?+VQAslVa#ge2MeFq3q%J-AoyXr zEm-X`8Q&?aBQv7s1{R+lMO6pRsYH0iwQ}8O>k&|4ND<>FOkrV{?$CvkzlgO< zB+vQ)oh5gdWo@hH%0}xU|KI~aeJTgnwkHcJ^f`==C>QJA)f0Z^Z-bIy2KagZDfD~N z0FxhlDROO=5wCBxrMCad5Xf2xeJ$R?I{mA(eBoJAJk<%u>78aKuP<0Q9+x9N^UXyq zWyVO}=>!`1bPwe2C}DSo2Q&Yt@$^$wGBXUbKvt{oa9tkHamHFilKvFp00R?xKQtaT zkBo)q??ecRdl0qGizAxxC((;|2{U7?j-CIOi?`oXf{{E5bKCuYxg3Tu}&<>TbI4Sd35}XdLhMNDN8Ik!#c`W(7|q5YSP|8Wwu^K z#fJLy6H%g{tw+)8W-+4fK66@Fv>u+^o`Q`f+;_mz{d9+~3o9FGgkxU6p$**)bVHvN z-Pm*wZG4^u1BWePdYf*8hF$;Y&7a4?vPlSLjIk2C^nRp{?{$e|$9FmfZV|8cyob6@ zOY_mzcVKA6Wq|1cb~w#SsC1bpsxaI{&e>Mcx}SM;rFkZpA$0}i{M-yG7W%;3p);X< z!7ucAQV6pf7=`A^G{N@)9jw_-0Y8&XC9{90@H;2Ei3e=I5WiJZVWi%Ey2j+0Sb2dm z!m%FkVE7#HB)3AWcvYKRKd}!ipIZ&=Op00CYCFldJCAg{>=j9E(-eIUSEXG|TcP#5 z6U20~2Kg^Gka<+2x> z?`tYpXgQ3&KKlTdY!+b96ojR;wNd2g9c8TtJH_lo2yqF#B=~I3!*Qca=<8*3@H3UM z$Yys9`j%^m3OyH*Ga0Xt+e>vgtY8kQx`xqKOJina{(~l;k)xCUOhSJadEyS&gQDx> zRjH9d1Zyojgy$tgOZ$0aEc7Q{$FKk~`rmhM{5jdOSUXMHHKiXWMSGx>${eBH1K`ic zJLo~hQ*7#mR95LPD%DKxrys{l+#YX?SljnRK%6F`to}>z#;TF9as{P5`~1kguvFAA zD9v?jAIr{HeiSxK8o*xnr@*fxGT_?dA>c1eMgg@^to`L(3Vp8OdY@>P^e2bKp8XGc zIvpT)gLri4<#HC+H;1~1%7O!`44%HB&QzaY15GV6MIPTxp?j_z_~)vHt9mNHy6dy> z<@{Z69dW1rIbt@YU_5PKZVJmYPU0b&xfG>+<0iKp@@JJ$EXz)^8pY#YA{x_9k-F&9BTMt+j zNuhxU?m$oR6X;V57qY%Yz;^pWart@^VR=CatY5E;1J)*^x891v+TS&zgM0o8CWFp2 zt@jpMK0O)EJ#rf=be*TSwJ34vw8X=A9A?9>3ZSFU8rHNKimd->qv*x^kti+&%EuJ4 z*{SZ#Y{@~oWFU?0b8tmt9kYP+0)}_KDd#ur&A|FYrD&6x8ninEq3ZaZk;88Zd7E@u=nKhu@KL5vX&&8jB${>_=A+4}?@7Yv_au8x51C_e zR5bI90A2-#z-gx=u;$D2xTG?IRv7G%)DVYYcBO?1-{(@34HM|XC702-tvRq^%vz>5 zr#uE2hXBqOne zYkKI;^w&EDlw<0A@iL-JJG zJ?JV7%^Sh)43iP^9gOJ8X~x)3#|_=HQ2`A@enR`DPQ?A=6Hu-kN4;O{pkBkKW?CAdd_sW+k85Ij6kI_?WtFmu|@QbJ0R1b5Ly*x!Xf!eDAjobnDVF! z_6%aii7jE+FCF$_>_+T%FoWVx?c~GbVPu)JHAzxQhYcxfEZkME!ScU%;gCcXWM4Z6 zdpXBqok9gYtiRf0xCkFxrPG8(Jj2i#UJqffi1L-n|G zRPSLkNwFMDizK{r)m#}CJVG4~86AM^HaSqIs_QUey&d$9EhPNulgMo<&yeR`@?zg! zwqWBRdC{-HbiRet568#z3p)m()dFq0y=FRHQ+!@ z2rajNg~bbB!T1OFXyFGx7~(${J@^CRkoC`~Q}#)kxGx)po_j=>8f!xS_(i%puasz| z>C%kVgRFKz4oFN=7oHy(D^xi9(mJ;msxz*HdF0Jtht|zx=EFOn)$nPh_5}=H{EX}sWP*z=H$c02&#H*HeKqqp52Rg#Ji1in4!mWlHwKz0uEoHC5~^% z@`>I;;OW^A8=Z&KM8mnj5izt@cLnbHvj%x@eh4b3f1&FG9t)=Rzqo~+U&*h%5GSO5 zC2f7S(8{Elym5Vxd5d!(Ag=~46khVRvuEJ*4~`SXsjmF>AuS-w^dB5;bB0d)8bQZq z6$+=5ZQ1wgi;xqnX=hY8Eoc}=gaj=-B4;nDG=HbIZ1boEIzFT;!i` z@)9aGbl}L($Ell53tF!3kFwq`M@xoWgG-#|6YtxTF@Mz`#BAC{^Gqh7MBjKeqGC8Z z$Q#m>H6P%C(nM@GYlcYw{c*Z}(_GdVk%<)pWGxex)s~n|sKE1f%x8szE?nU>DlSY5 zra2cjz^Zjt5^P|V;M+eO7q%{AeU&p=adn!6&+w+7-pA2r57*O~`Z~01)KM5dJ{FzY z^baPOByjNtrF8nJERvx%6z{UV2VKjg*tEUZgnOf6xcrA!Q1^2jz28?3m2gxAf@>H@N81bv#CBf}d^=$5Rt$ zlc0ZYVn2h2RHu;x<)fon?|56-*R&Y^eKLVM{5yt5F?ldSejfGhALK*T(pf`V6D+e= zr#;tlpiT8(iLdS^88~K0%FEB2pDPU&tdyMjm-hB-+TA%soFhk*VuvvsiN9%1!)E9L zPl;#Bhry`>@l+~#3M&S$IJKHE=pY&ZzIN)o)}DESm(&}gyfsMd+`LiP5|9eZ%YLBd ziPos)%Vt5|J4}>j_*;(ba+6#Sm86)4mE9`@_0h<(>PFI$w3)TUTk(5f~Z0GUGxFk5MWE*Ct$7JwKT&) z8JE4V#?@E%(&lU<>@uX{75NTKeISqKK0Pet_GeNv(#?9WECt!?oT=1o z2;pXD6n6MNDGW~HB$|JktiM`x`|?us!%-btd(FjKAuI67xTEyS-P5dkR4fYgUBI4+ zlVI3`&D3m^6>>l91RJaGQmL9IQ2wQ#?sHc~tuL#AtiGmrjY&IAI6aPNZCHSsTQ$Wx z>NTj+?<36ab3#IMFzh?$&UVdC5nky3z^{MrCSkuN+`OlRH$zr1T9Qc#PKzk5fL*zz(gd;M)pXH?A%`RGiy)rH{A8b9h&m2$Za}pDcHm93cny; zczZ2Wcs3m@DcA~X|LqflygU5)HBq3l8iPGq+3c2@JnL`13rZbJMA2geob@e)=H;G; zuQn6`GtJ>tFLI91P_IjWYpxMN&3NLz_deSiH9_DbCjqrq1iQ|!BOeoZXw^K2JE^b( z&#QY0`lAYfcj;9!R5}E?&Yy=B-j8Ds<{!r(K1TG`lqc8q-jlLFB|zEA6_)R|K$aRB ztYKFXKa*FYH_IrlJIA2mi9~!UMg~1v6H@l_&tu8Cw8(tWElU`Fak8l9;ZwTftPSSw zeMiGf2GDsf2>}ZOviW5qUNR~N#OxeTt8_VJaO^ERSfs}E(@JPV&=}-i_znxVw~I_d z=TLwpVGnnN`m{v7Tj^19b!nFFVfqm{@>X$SH+Wz^9;?LTkhD{^6eX_|+)5ew#R4tdl z6w6`x*-m)lHa4GRFKi#sYP(eExI#)a#&0F;IHvTNc^S#K^l_Mo? z(kI*phigLp#_4plIF;@AXibU>?~%IaYZ*T`h;6>M6tsHZ7l({I3YCg|s76;TtJdEM z=B>O7b9!5Vg|`u>q;N#&uT|m}?9LTCL|hU+KM`Pt#PQN!8jR*m&ljFI7m6kr%oaaf z@E?`2(8t4P-hg^{vfzJV2WZ-zP@1DI!c+VLm>sr)J{cA8yNRmkMci;?*K3Uqb^n5e zD?Tyo>(0K9q}0dIN$5Ucfc*fip&K` zzA|0n&rrc0w++}+vqap@of4^iIY>^q7ZN0iUUF%6IX_|i#h z%Yu{dN*3jm;JU<#OdYEd%_Bq0KB`m`=l!zC#b_hKNAiX6TNtN=HPR1z&NIikG3?;t zEnw)SS_+r;NOC@EY=z%Ym}I1n!e^a_QF5yAf^HODBppa@`X`_a-AX=Y{}{Hopj*(; zxXx8>+yVW*--C9hM}XnNQ>Z37g)N>c#l98u*efrX&A*(_ERz~xfV?X?`I({8(%r0D zC71*by96d~`2th^YoYSG>70TJ77hNI2gTsbuzH>}~LNj<4)oxwGj-vS3??g?JfouKo% zA&Xe_m(Le5a7-$i4MsKz&eb?TKJ2sSo%2*T1%keBDq>US%GXm9A?FISS<6(eK zjPPycH3_fCi#N0z3Tu?s!VN7$@!3O}Xsf4`(DJ!KB(p|Z?DXE19#*-9{=|nuOZ*&K z9luNkvucvpyAVHnx}LS|a|S6%vEW{Yv1n)bXq01d6qT2cfi~%qnpt%yJ0%}RlS<=R z&TI#CD%}&vXWhdGDs;&=_w#s1k|LdRbOj7H+ytv`2MhCNxX{;r6ZyLIvuNb-mo@1xO$J3ePwds!ICG_XrE6Csc4>{l|MPJU4rE<;@B2Syc1dG$7<`%M$oa{tgK1-N06@SR@QNU@n@c zYe7_JiRpQZXnJ#d5m-I<0SZ^J2aP@fP|9^K92Wc)E&7?lW;%X?3GeD*ecdCrZmJH> zyK|6it;piDmfMRnM%7FD*Q!wOff;=@{hqk5qZL|D!|(lUw z>FoLpPS6QcfMa7Mj`>l~Nz|$!2rP@h1Jw?sdngRZ%oqe#@A@G5S%|*k6dYplik79s z6lU7cfq9%X&U^v(m(g_Z`89x5$}?yHCt%bYBMoG~bD86$Ez z7$bOn)rUtdG|Ju&q`)GJ?dU`H4Rn2!sd%lK1OE6^M^N<6Vkr-g3o^=8WY5T2w(r#u zv^+70-IT2Z%b*g@n7s*YPmrZM=A;Wtjx40Z>>M#xIR+LAPRMi47r{YBinO~Pr!OPd zvrL0#xKM(gS|93W8M#Vm>AXrDI`l6)c`S)V|B%Ng8uEeCizajp$g$K;GjdfhfK&6+ z(7WV9=J)*pZ1meH!eBdix+#`4_Ss@@|4Y=8uEt)||8H$R3Vkzz*$VTY)MLIBQ864Y z(%a2jtniG$iXkUxOZ_eJLi>>{GTIk>XlW8uHWWaw1Z`+q>quv<&|Wb9em(m5%d?LE$6tPC<_*=4*(@T90MYzcN|4B>&U6=Q{76 zJqx|Qx|ZdSQwMo7e~5+aYRGXzH1+*@kInfP3SX$k!61dVK+k9baV@?r#N^#2RyQ7t z>rYdmzw|Lc2Yj&IpI5MIZ;{~VGv2asVS+gFwGz|o*oyISiiAIPaI0A!bvXBhb~diT zd2bu3LfhF%xVCFNef+5b`mB?uPYoC;sq7Z?)~v^2j#lWq=V6*27K)=s zG#~?)1T3{JiT$_XEzYXFfx8}RfobE8;l{bE@y3Hu`1nb0Rso!By`w%TwHlH zoz+WH5?%7&Ns{~tadOOLoZtVLWG_xY!iP3k?zEEq$ms*iQ0diR`6bW! z>>(m2495&dvNFlkE8!5${U~9w2b}2SRX0VJUN>n)iK(EYQz_T~47zQ$I*Uiv?JfZ$@2{&Oz06yI|9%JhJ50d-A5y4}Coo0dyZavul@b!Q?@nDJGm2V!pa^ zZ=GG(d6VTrZ^}6C$#6>dFIdHh<0RH)YR?XjRp#x#s|gXwVVv2$>HLs^c=)!^9xdn_ z3EsWUfFCsV;o+zuLSxJ_96DqVSL;+nG)Deqmc5U#_RbyLrn~o9$XI<;=v~ZtPDmCm z`<9W#mvhOW?M(nL&19Rz)zBgdqP1W%JzeIFGN0-JkLEDqI(QartJ5L^lSpDl>|zgs z0PUZqLZ&$VUPyz`c!hhGLxg{lIyt+g%HY* z+Raxu%|jaZU%?wq#?*e?a`xZQ3FO@(AK2HbNNpX`c~15;FE>9AoSxJH(_>X=}SKiX*t53yG@L=wQ=5nv~s0 z7pg9XCrl+y@FAD6ZgLY^?r9Buyoa&mEmAac@F7>8w*+WiTnn0>NgO13AA!8tAKsrC zfatBFP@9Fywq{DQ4f53Xhc0qlnF~Iv>LBg2 z!$DE)K^o+&3_oubF%!{4&T!aBiwOt!Fcqc?O9!Kbzz9kuKL1iL<#4ERXA5-FSPfOa zU?Al>5-Q|o0^ci#iLdVv>T}^D$!u9kNyjTV?`8$)9kqm~Pb8?-oF>T2ppwvEaEHW)nRa3}dj5GE^nGW5 zYU))fEs~>Goj*ZC&p9x7t}T50x&%sHvf}jr*;A?Wi*7n;jAGks?*m8vm}JDF1{*o6 ziQRa>RanB@l-~Eui;7jmK!3v z1Av z0=ghs51ntiNwL9Qe&LQDbk;7J$X)}&kD*yWL#GQK+-xB63IUR*VNH4)=8!+HFLL8A zr<1WS<_UL$MJp$@Otp)XsOv%u$*g``8%=m>ZUnRI?j=&)A*|q1DNXxrAxh0Ygv*DSaoau~M|)6p4bxDjlNb@_)hWA%;{= zcDV3WSwZ->@-w+o)dA;4oMAF9AB4R%#^9~W01FOO0ahL6+`>qSOZDVw>h*REvp8Hp z!b9G|{?>Oyn|Q+nn|&}a#Tl8cxI{Osx<)Hwce7j7(sT-15B?2!vTXc-UaxK?ZsJQo zz{W6|__Cina-rVTIZ)}p5Y)ZAo%`I8Np)=OS+tfq4fBy@xnpPQ;pv{#;*?uzH z^*{qnx7TFVQy0;#@#|po4{6jBpi3Q7UV(^%l=aP#%qOe)L0d*hL2p%v=eo>eKB~1A zEvuc-gr(C+;EL&dQsh^lV>c6x9TG>%m4B9gT``hbRFuFLha!ut7-_Pi^$48&z(msf zl!rM^3FP&dCer7=98E)h&^t;IHmIE;OAD{i-m5&!E*ZkADn_BYwcfbx+G;2j)l`~0 zTOK(s{8>_1UPnD1!~wr;yXduAP4s@tCtCY`HQq6qN2b%eC^MC12}u*!-^R1Z>jxB! z!upV9ZZO1>jJ}p!79Y8D9Nphi0#!g|X(JuV5(>wm9OWx8Vw^qto~xragwD_`8z2m$o@hc_@}6)<-XXggb*%kB7W5jrlGdju zL9d)F*qB0*O??&fp79bq+&2zNmB~^2)vnO8;T)TB;VjL|bb|YK74u^TMleytJ!Els zHRF2;vDs^l$_fmHii5L228jgzaVpsAW+R<<_b5H`UK-{H2>3Q}X0EcE=<6;EP;y`v zw#;1#A}5z}+>@caxyT5rHY%X|Nuh93XQI%4={)oFy9xal{2^aApNFbz(s=)0S}4=m znA;N*EU09gK+_R7q0G`7Ft}wo)QAoz#m}#BMHOz)L{eL>mM>)m&xdhYvOg@^OVXh2 zUt>P?Ob9&@w+^-59?B$xSD9J<47xxo32@m1bns6PY5!tP^E(A7)pem{a_lym*YSZi zUwld0Dy-ST32*2J<2$(EdMvWCbtye;IS0MT>0r;V90Q-ujb&Xc?3n+VVq&*Zl_btx z40KJZX%;ajQk+p~MSBb_9P=M+Pm7~}Z#Dwq?Kg63lDtqL@lRT8X(KZe#PIw{ALhr9 zAV)b6R(+d!xZ46RK@MbGpNv~JpQj?j+C$pM(_rmKWtN$^rlia!J$D7nh#f1tfp)feKL(;>V zLhC~(!Is)&Hg}B%+H&zeeUtkS$ox5uf{hn3+Yg2o)BOxl-JE*9+;###_qi^VsxU+8 zF~x ztx+fH^LI1Vv~Ezn;|!gWITY$Aex^6hXVN;y3|Jo3T`ybHHQq{(Jd9xp`?Xp7ZC!Ng6Az^i*Fwj~ zANhh!)i8HOK2mU<2=^r>5FO=tNM-pNP-t@z7#0SBdoA%?P|9O=qudWw2s3^IQ!RSc;;6+x_J2~tzXg#vegw} z@WB^EDZ&wYiWAu*aEZ>=T>%Sif0y`a-J+k=t5C(1O)U7<8{WOw48f7(1$<;NSYhu6 zKF&)<3)VK%&(A|>+lkTe*ynxtNT&&=O4>Rp(1eLWX7a)ywfZVGkoM8ePGwBX-} z?a)I?av!pr0W{JN!?LYwsN&izR{n6Yh3%v^Zgd``;UlwQy-X62${R{AD=$Wsd_1nT ziv?rtvPt@}Y~Y&MN$zd@4(mpq2K}Ekh)7qF-gbY9HpxE#u67S;?Vo1&FH=SAb5%k7 z?05>lcGV3QJ4#E?Oz-8371W4=7st96P&)IVKC@W7k8cb4Aes5*0u(zrQa<|{bUUDf z&ULRML7m58$mCscR7fSOzn6v!FT`2YE!C$wjz#Qdh(z+T%8b(zxigDhl8lue;0FSw z#U((KdObVJrQ9=tEUJhFO1x`U&SOx;uY4-VPeyvu$tBay{71Z2y#cN^BWRxdXD&nA zhFw)3N^zt-&`>^t*5{Xl8mBNxMmn75(w71v}`mU7t1-?tO8EWT&UfZcs$5n^f2L&@KWJj|Q zNnLus`5EEP<~^|3={{e0_7eBQew$=n_?DXCN8~g(0SlIekaH(BsfW!TIC^+2(u!9i zEufRv4GQ47`&+n-m>Hbh&?HdizX${z`^j$BoRoC2<>9CD2_)t}C4N2(q76rrKq6A7 z(tBSEZzhiCsx6|4Q*sauz3am-@KT2tbk2ZdWu725CyE*NPl04-zOdw1GD<&wlrB7| zLcONvL2tKu0avd(0I&X)M+@AWfZHB>T6C|3%B(CTmmVEP#orGy;V@5B6wjd6pT~fuYdkWI zmf@9Dys67<6L?QQods^T<6Jwga@SW%rb-;jfxnx*gtmLjgmtpw3N2q2tez+#H40L zVX1MnDjj_A2&P7)lfUUHq&oisr!Y+$Z?GsJ!>l!_)VvKyw=xMuzWN1vhRC6G**EZm zeHRSUSVTqp3F-8aMxOGUB)(x0F#ml6N)0#Uyo=`0r*E=KYrW(JDfLLsv5s*18`iPP zLVKaUdjy-TGY_U#+t9MaTH@9@iH=XML;12Z@&6b)^S7G1Fbt>Cq`6d@B}tMrp0(CK z35gP-kdRO$Q~D~I(mY8rG%AvzQc|S$T5BJQBne4KLXwmr2}v2g{mc0S&b6<-*LvUQ zx$h0jLs{Rpvg+htp+rAKA380Wz^S(G0pM3Q{Fm0jj_BWkPNDkDmCHWxaK=}#ZR|@j z2Or_K)tnM2mTqEuykxnK?K7}sPn)1f77(2~?x9B(Ihgcj(`CcS zU}SRwEHsIP?P?LA`@1oAU(?5yu`|H$*+=k-K|Rjt-V0Wu?T?_?r-W!tEP}=2OK|&= zGi2d!LnKXt87cn{z^UUKHomtUwvI@VwTX9 za!!bm+YO(fq^3Q(dt3vL!Nu^?-2^zr>o1k5PG?G6LfK!kA92``0qXL{SU4atZ08ce z(y41d!nQHp?2&OVIbUZ)#DmAc8T}m~ATO7;ELn)oP=E}%OC(lcg$#$As~km+c*6lB zc>RDe@sqwJ2sRF&)7wAeM7>rZ+j9~focI$Qe4jy?LU}y!Z5H)}59mZr3)EeghS{$f zWP0%*HZNWPW{b&Eg-K`0pz~=|wYUPFpW{z1S}mtNh9Nj|jwUk8zKZL@#0a}71lLS- zwf`?N7CPNC!A%3^40c@xK0H^BejC?d5m{A6^Ix;y& zgWG878c2>zJ4br+5m&&%SvKl$MwhPQ(JUKT=5Y z)kZLP$5}z`95dK3-+&QoI|*fNb|NW-0q|b9j8uI~Cquep$aYZ*@;-f(hQG?hfy>vT zp6$+H_v;|^Ftich-+q`3O^kZG!)S*Gc%jP~7?}AFJkQSsap9GuQdb)r&ex#_J+x4%<`L*$@DO`X<@=KV ziJ;9NmcE@mmv;!a(mA%@DX^YN^3ztL6Y5v!zYlJtB#4z~f8vB7>8Y$WARQvSX1A3iuU`BA$(h z&q7Smkqc#T-h}&byVzZ_(n+1nY_~$fU+HkJXeB6HCxdF_L*WQoOe7aH3K-P}YCb6k z5AfRaMCmx1d&idBqbWfhoHxSo=q{W(szJhb4UoR~5==-nzmNVP!FtB1Vb7dgvhT!K zaMaX++i_D2MkwEeWhJHd9(#|$c%hKV*!UC%&h~j_3MkqLXRT6jIy+`k_ zS0j?**+5y}m@FQQrqi<45-$e_C_OP9_|7ipoPH$OH$|xcZCj;aD><_5KTjNQ z62}yzX)xXsG`1n~$R&1xJB1_VckB(~GC-v46+y@1=~$*=lE_Q>F!8h2 zg$~w!F#f z^%990&&5{R#bi}kJ-O#`76-dRq-z|7FWfm!F1K^o?z9FbPv3)7tw7-MavL)+_c#bL z1p+gXBVy}4VDQJ)XpPxc=sY`)XeH~?@Gd7-Eay|WyQ%W)vrW(oeb{{yvymH z42ITVHTGJQDOkO&7U=%m2TqKN@Kk+W^x0Z~b1SUJhU4L{XTYZOs+`K}6xi$Wm=z6K;6o!Wg2{33Nxq>6X*?ZHayP1o%$~Ej z?p^~n>B^|u$^a7Ox?AV z&vt4E@3*?qt&u~55BLult)B%t^ICy9nNODg(F5T{Ymok!gE-Fe9awsqg_%a4OulJ7 z^lYDttegMiT`)I^y6Ytpkarhv)$4=I<{{FlcD4%6x`G_8eis<5D@X0d3D|S?LDJZ4 z1U1y!Y1N}NEV)AfzR#{Ct9V!9e`XTgU6UK!;*ZH-*DptOHTgDFoYD)K8}3+e=?YwR z5)$pI8elXonhbf~1ewlNbny=lH1JvtQYyrvXDV`B z*^f_L{KMzm1g!jp1m>h(HRyRV7Rw8dBKO*-=uvx)VCd{j7?$xED(|&}Rclre>71i@ z%TE>La_IyF7q^hq+Ve2vXems4{G2uMJItOhUd3H0UO<{}N`it56DYKb=W0x3?O(@x z!Ka7D0#-$cQ)NDrabY~Sezh-CU~^S)>c2`{FW5`;t7V{^GN$1gSHNMPD)_P~7=(F0 zgMXbG(c0E6a9OM`e6>}RempmoOCE{0yr3fOCu8_#9<$$p!bwr$Sflv{ z{2lunC**seogK`+?_T4qLq|p>~2C zcEB%LV9+3-Y5wh`=B^x9(>RtMHL~HRCruZHeaMF&wtUBVN}pgwX+0^AZbNJyKV!ZY zit}X+$yCn(PCDi&z7pZj=T}M*sk3x2E%6rWJZr{5g(^(;=tR-VF9q;Kgdr#6&_~su zns8SqN^x#2$8cbNG1t3(9iAT^NeWvV;Y+{QC_e?E_(!_P+fs)7Sk}OaQX09CmNMYB zbqt#A|C&qIejuu1Epg%i&&aT6k=M6rn0g;dyPA?=zOFWPRGP-Tb394QX)DTK zaRqK#^$}e@7y#$3-HC2(Q9=|`*lA%1_0RsG6L@}6w~-cner`O|>!J@2R9bN@wz9-$ zOEKN@DxPtuNwhB;CyOfUJCWQie`1xfh0Yi#Cif2qP$^eEt|;<3+EK;t%3a^nS=ujw z@5U$=?&r^ShwPw~XCA0N&%0t*X2O8HkGSMQ1IOi93BF$!LhmEzX`O);ah%gbb54GP ztJEh^cF8!PbC&nH-Mc`heSE`dYfPmk$Nf27&2D_KbcS$ga}m-t`78(-et~YhpG3Z| zY6O`Z9}`ELM%=w*GUz_yNG>{sp`cIp%=a_1sr#!qA{~5;$Z7VFU^t5mOy0zXs7;1* zdnYq1242EyCK?UAI%IF%^%HL1twYPJrlCs}ezePcr*N&N4vky7h@?%LjEMCn+-8*k zy`Dd@zZcO4T|&n(HhK)Y_{d3E(U#4O+a<#Y9yZ)^oZOv%1UAJt(it`CNaKbTDNl|fpIx)Lnn{}kAyU<7;7KJhe>@*S zp6k4=`5Eq;J(;O`ZpgLvj3eH1;oO{zjri$9YwoEvOT07+Np+PFsrmY$%Bz3z{&iVI zz0iVdHH0X{Ur(g*HBs2+Ax|Q=b8z41d{`;%gg)IJPwS`g4Ep{c^wi7=z8%n`!oz1t ztV<-*I8Bx#zI@+L3gWH^3^zU#8#Dpq$< z?>Zam5FUwKO;lh_voSN=)+e};XvSRFVn-g9#ldvDVe%zrg!Jd%CSsIUgZ2T#HA@O79=P!kr7E62-oT%oCJ61MzNAoTN%BaJ_$(U!p&Ht4^(LQB21 zc*}Ydd`x{73~}VMBSqz8Z$<&htj-XrFOTI~>*CR`rFV(6ZWF=tzhP}~h-up3f_D$> z6DccAMqu-iN%%>k^K(f#b&(E_WJFN>zIAep)boOvt@8;mzHyOZdV-z{&BMY|X z`N67R^U3))f3WAZS~x9I5-8g6J1eJ;Ku>ZRNm@S><@;F*#_xQO$+HEd#DVX~DGm#c z^-PC99p)kn>puF+M4dbL;wiqn^%1$7W{5kxV!$gk1=_o8F`1ubMQ3_IdeK83e}DRp zOq_OI(6=d`xE-#ApB732i8ME|^1=-Gbb%WDE_EHZPa8!o`akg%zNeaB@&UV5-b8J$ z&8oldipM+C*TVh%dFb07Q!V#z4r(};0QU7PgypG6VOY{CI#;fnWSXw! zxf>8(@au%VCKE_{<^Z0QXi1zaH<5!YlHuE{5rFLfC@{28AroIV;h_oRQC3!#;Fx(O zvGUnTyb`KO^A|0)rFkmcoFj(TILQI`@*Y97_B~KXPQap7(d<(?thdsKrd`=UZ^?W-|w(e3E`DM-bo~G2|WrOkbzdv*Fo+@XO^JM}2d)bOrts4&qk9L47(bYgWxP({~ zSU@A`RlKVy8#MSxku_~YI8EyW%+J0qi0eFzG~a#)>)l4!%3o37+B1GmQ)OZEh}9!D zx2?c#e!gM6mgiT$nFCF{6p{NpRq{b27`lI(jl`Z_fYFk7Nu@9a*QYNgIR7k>5kFxU z?Dz?H{?TT~&OHWx$t6RTGIP8q@ieKew1(SwH-&}90jxacCKN*s!{OBi8@K7B+$To#a{8>@AWsqz*&vP(Cj}re0d!b%|8QR}=jLdo2z&eWk zvRkbE22}DS?GI9MuyXbmkV_{}^Rh^R;(=AfF2NOc=%kPZB`e|Ij-70l>KP!u{uFl2 z;djEnmg3jLQFtI_D)5rO!-|RhSAAxbceGJs(z$&X{vuSvnKT+D+vmW6fLh#Zr9`&9 z3j{9~@$13%u#H0@=7VnpppFgKvKyhF@NVK*hE%aD9b6H14$qUIUZa zF-P6#@3jX>(>rNA9PSP>&&bhF|IR_#ych7tq(o?X_$sa~m7_Y(kFfPye!(=w18jyy z1z!7Z1?fs(3WfZ;SMXp6r1=yJ3Z9MyrN=$-2{%KyFG?5AbU6)*B8KtVg)gC$l?AXE zX<+*l{*i4gA^IbSapc$%!MvGT)L_*F;??#J*c6rl`OZALAo#M~rPT$X^y?QeyfRO) zK_Y{QRisvRB@LjC?PE}KY$5CpO0!?Y_mf@$cjCBS3tt&BAPu5PxU_T>m|d_WJJQo& zyLTT5SXO55cq&~mCGsAaJ$MIyYcr+8q2mNwt{nodi+7Xd3Po_REdjM{oGr_UeOE?>vKeP2p=aw6yuKLmaWVu}7~WAv`+Bwiu6fEaB^g3?#p zN!gl>F#60f_MOW+P&R!n5Noev$3HrW#T7EZi)u-({PR`8LXB>6OSy{e`|>4Nk?@`#GFEs;Eq{M!Y|H|=4ar-`@$`M@hu=ZaHq{s8J@qGo%p$z3U^kK} zeru1?62PzR{_uj06%&+E1#cxDLf-!Iu+n28&7W0F3UyP7$IMGK{MjZX<9iotGfn93 ze@0-z`bS8(UIBN?oy51s@I8f!%XH1eU~HLu-#+^43Y1qZj`sAeX5@gHXz|vMfSAlh zvB@HMAm}4Z%nN|`7v#YCX)7?wselfPP7=w%C}6}R#`-%$k=)BnC{sQaYc|bCw@xde zYLy3QH^}eWdTRNH(`WtU&ev(kz!=%6J4{e(kNoLM2 zqml9wwhveO|WG#f;)DFds??kVx)z@5$T zs>2JU^G+QaFFS%C?3oLama8DefNwM~^c>t);t8F9Dbh(-4=^&<#sE7kgO5MS;(`x3 zku#S0bt*T2@%K;MsP%uWLHJo9bN2<9oG+l?RKJlwR)y>Z z<0(R$iS|rjW1FBo)f`$%9LIm3C=qdull0nbRRWamF|BtSz|1dm=;-DsTASktIotR4 z`5rsKeTNeuNZSk?diMp5Hop{HU1|i1gEI-yc?JX8)AtJ&M+>=mvw&$C7P%rs%FA5;>I^h&pV}^0}(# zRPy8}dOoI~^?WD>0|dtOP|+!@JYI@keyo5yKCm>ZNCX76x%j5uE9mj#3oIO!A_Ko8 z_yf%`n5B_Ucm80R4Uyw<_=y=rCo_tcFI_1t>N;tUlKaWByks~WuZL^xltl%XHV|&d z2e5U63sfDxMgzy~LFd3&gr_!>eeRF>t`z?bQZON%UxtB9lOGYw+E-OI$CUQp&cfmN z4d`9F6|J394cY|@smNs#c35pf#kWXvM(y@srcW*~d7y$)Cfvc=>$KpOfKhT+J(lc0 zaT8UJHG)N9d88*_gC6oq#V<7U(ARNeiJOIxB<#(@`@_bt&c>1OQte(=Lw+suwKf?H z{Y(&a-=B=0C;kF%Hy|n>_Ji%I$H|tp`m|uXEl@UkWFHthfi`V)C5K*aq}u0{0QpwV z9dWt{f9Jd47zr`>vatkgyXg<^G>s*5R(8Nyv9@^h*i7=onS;ArL(#x65UBSJ;(6ZJ zf#dd>0@0astk1sb@EB?R_lHw{ zUxcGWs_;&}zq`lDm_6tp4Tn<0!M>S4faC5$de8qD+%0vVYbp8I3f2eSp6?}6%mOY-?(DBy?%((j9At@nlUyN8`HEMy7O z@Sq5uFkFJxDGA^tKC8R%S0?d%_yoK2S+}K+7t@wRGyFgz2dN&O1lpobp(T1*IDX@9 zg34Dw(+{`l`KOWCSH(+Uaa)JB%#=iFWiFgiZVo39Oa)B6wxadb_mJ_5Yv9?{HdwRZBWx`qFmH7R z$ui*gX1V4lAR-$^H=V)_r*89^iG48DPnt9}`Bmr0*>W~EZCGrcqTs^gtu$Fbgqxw_ zPrBu&L;twB*t4(%M`m>p8GI5od|bx(cJ8Z6e7+Nd>=d%wR2;S!$#SJ98iB>K3Rw2y zi{LkX1eba!Ff!@9KjrEoIKc3Ct+E!%t#gogiqx%&X|#dpA&)1?SDAy)>e+k zbrb6q{1LnW}zKZYPy5I@GyaGU+Q5se-FyF+J+n#_JNaPcWCU^ zSbXk^C$)_H%r22v=Q?JlLPHMQhd4*0*eAS0cb5fr@90A}y!Fs}AWH@uPEj>4dtxf9 z$|PQyg{y%Zef7i)HhrxVBrK|h%~w-Va`AHbW6?yk#CjiHHGe&oi%fy;Q^JV0)CT+| zD-1QSOarr5c%sqOw_yLr4^XjB3obj*19z0Xr->`X;aI^ExFn{9b^H)lEuNkWN2;}G zRf9B`7@7`E(S0I5{SJ|-3L=sL=^)^{BeUw=L>hKojahLd77tIl077{;ibm-$a9$Bj zSC?%87FSN=YdRch*Jvhb^|k`bmQQ$8Rujosq|`^8{1VVR|>P@_j{GAE%Rd`8$xEz9DkmT1nJSPN6-@A=C%*s@1ecl;}Q*CgS&` z?wbs;E{Z2=i)1-9%YW#Zoeb96*h|9KYLz1Uc)^?>2 z$^DO_7P1Om*mj2r*%QYVCe6Zi6(VRaE`?V6^+EkNkg&oSxTf15{ju;sO%>Us|C=~n zwl>=g=~7Gnd~PjlyGu zu*!CXtg!56T6zu1Htk6Qza?eV@r4zei?{L~h?#Kb(@?Iw_arh|c?#zImt=LkTi* zVk>hZGM#OCy@zgioIsZ@<};%!tg1%@8SqomF;;@_sS6%QGa;9<0lQ&C^^Vk3_}TLW zi8pMZhO=YQ(BpnOuCIw}_@GKp@A?I##0N2xwwj0;MbTjXd$ldZ3O@ZMP1^0HIFNac zXh}@xc5FLCRbJl~iph!#hdlJ)tt&s!-m)uPD`y3dn#pl~QBUbWVkxa@%b{Xndn!$x z9XYXqiL6O!B((fFN^bM>MCYO+q8QK%BaVfV^Z#PF)^mrrfkw>!j+aJ0t(!PAvR)Lc zYG(g(e+-8%A4g)N8UoQ9Lm`klO5$CX(14|fNsOBkvn2H^37;R19!gw-RtfwbuUp$z z>);Pyb2y9oExG|r?oA>d!6&Fu%w{^=E{hKaS0bzAJHu)f`E1rZj%*JqcWQlrJkBp;@Vv14sPLAUnyn&-MJ4>sJx|x-{z7vZ#7BIA8+_k zQ2`D$pQA0)5W4q$GE6|K=&*zXmtP}BrE3n;^3OkjqV*xvE69YKgVo@8&2E%`?j3b{ zFdJ)5R^kK+&p}$>4Z7)pCRE-$mm1eRCzp0VL$5UK!D9;+%yX-!_wMM>MTa=<1EV8c z>GB+BwFMH>8zV%%H=C5!%RsCBl4SR{g)~H0Ky}0)Lf4of=(=+zvy*#H^PE4EE4;sJ zuj70go>NTPmz#3li(26dG=u9eSjNQ`*uoWcsr0(yTl)827yZ3!5?X&Z0U3T*LeI!4 zL=LpU+48DPpTH7VJS_%?_%>i<{$!Rvdf><(+pD?SID6f!*WB@qy6B2n2+0*z(`$ZJ z)buCc#jZ(3`vy{}>F`Dr^YJcp&q;>O{7yUXtr$M$Zi0NL7g0x{4(c_y2mLCZ;*FC| zG4&D+FyOfY_C4$bV%JAAYran6!Y6r=!1!4DdHPEjac2f3SJpG}_m^q zo07qZ`Qd8gc}IXkk^wn=(_ZNPcr~rVQ^?A%V}J!etM^}I11>a_1F@m?RX+nVP}bi{ z*!{f|&`>?msQDC5>`4XR{n$X7CA;7`CjmTkT#A_}?hH>%9w$Uthyyxq;7Z9s11>XZ0<(T3fy*D|^Q`zhuSW?Tsc1rjAzuVc`)iUeS;~2d50Fi^nOxxM^Hh6{k3(Npv2ZX< z8p?mpWh^FWi}WA1z?OLjI9L0r+~%h$+&7(8I(aw`1mSpYe91BFuKXKVtc<2gN8TWV zZeu)olOno!W+rtH7~tY?}vjlV{pQ6pO>}C;9j5=u_Ajcpv}i z)8u~6%B58sd`N!XBjhuKg-g^|bCOAa;A5#+^dk5MGp|D#4=+`wV-AXQ-Ojg3*|-5J zK6xC|`tLc42%+3*yVWSDEfOu*dYOiOZD%D->#5gd{(Y9MNjFY^!q3k9slUfKCLcQ<8=mO+xB=Z;@}70+ye?ku-I^;tAKot7hbGOEhZYj+aNC(cVmvF4;a%rwPf;|=YLo&Kw@K2<&IZ(>iRfri zHnMwHDE!Z_9J-Hg#%6A}k>ArQteO-=yUH|S*w7cUZPqd-K6O7eYdk8{JA9tm|6vAQ zwqMAJO5e~~6HYPN!)YQ3M`K~sJbwS8XU6?rmd|&9Q^|Bsf5d3|L!VlE?w#{-G#Dj^ z7@I=0yjevweeM=&l{l9x>Wf6j4tAqut~N+0*8z{bwPUXH5v$*_Gq6fgDL36+0h%?Y zaX!g+U|&Hhbu(6?4P#uGe_}DsxJ%I$XfC)@6A+4p`134 zdeMBZQ{4Hsm*5IKs1zFQ;(( zBxa$3uTk8pqB1T|LJIja4xIlO53c#MGdJ~DItpuffZmIaBfGnMnG>E-Xx;E3W_v^k zGz}+en$aS?73JR)aE(%mct{V_smm!P9KBj^1d|xsC*k$gV zy*{<9y#r_CWW2R_gnph{#sqEpOgn>h=&dX{=8o8SQT{<2LCw9Ys;YWNp>k3MVfL%i zNc$)@?OdC1soz*m9K0akN2y?=N~%z!A{>Oqod6M!GO*#u2yz=KKuQ5UP?c>ECGUO9 zL1cvLkMZ<Hz6FTh(B9|@*>n2oxUooTnY0hdss&4eva;5POKitN-Rs3|!Lp@}(#d0KSC zm0GeRc?w;;9bVa;=l{n6aGu|Fcb%Ca+BN18 zEIz-7yST}en-!qQ{jPsN+fq)0@2RmI5BgMba_`oiqA7YS$;65W zsK|lBPxY%fQwa$)Lm>uTpUg2jW5(c{oia4^*%&Ui{06zeexYXWvJ4y7jw1GB&OO){ zIlSD5vQo>a%XTsRJMab#d$JdeT-BixF%4wfb3YnXC&e_ID1*~Baw6>_5gyuPOpKrPrd`YQ}zf$MHN)J zH?q1v)|0X6oJu5@f5iz&tuU=55?-8biqpQ113mk)sn1hoCM4RBM&8{Ax-0@9dVliwcMdb);DAm`#i5aSMeyXKBn`jah$<}{=&^s9DE<98;gZ$oVWeaT zcE4DQ2KB12Zu$-|IrIvR25!>jji9wq=+?zy?Y)8d!HD8Tf*I2u#*#($e<-ZzoIM0tvSOn7m%`HIZC|5 z=ky=7(01)u@=KVGLT0U^T95aj;~HkjDB>BG(lZlwpF}7kcRpOOG?UC4XD%!YIm?x@ zv2e2Q8NPo%9a%C6Xi52$r2Vg8aUpLK75wpiyR>s9W%w6Z>n9#$~@JCGY;i zy}8P;G7d1)&QFE835lW&>j%M}w+~Ugz8j+@w*{N#Z(HaP#i3LWorgJ+Ps zV8g9y_RbGA;rVVmGPtpdUMS}M#Kl##`DQ-niF--mzFQ#e;V!)Rjw4y5Z$TF}Nh6JW z@=)Up--Vv>k?!0UM~c>s=O+31(H)Zugx_CtOyI{yz~C{9&I?a-DK2_Y&gm_^DJG%@ z)rYB6MH1~dF=WTen{&<{@d6%^0!ymzkc~cTklBs@$knEHD8tXjPjBADU5j4CDUn{oF{Z)Z%wd>$cybp}7oR|92|Y-J+BbCN&=KN& zc@xU>*#k%8F|u?}6Ren_0_C1ZQ^P$!t0fa;$!W1f`u9Afmj8OHryRS0I>aM-^kjG+lSAnc=H`LS<*Xq8cmtI5LKRh3I!7=?f9aFu6D%1<+s;S zSp`;5(0_}By{d#+Uv}WO(lDSJf1JoUN1@d!$%Or>!DTh(+K=#gfcirr)HgQ+T4eBZ ziSa)`qklOl*&>ZvkLbWx)RR=7a^w2fw}1nEpJ>`rM?eppV34ecoLjUGFY1tEx_nmP zkH%~9>)hK=KpmiM&;)kx@;3bXO)9ZEb{~1ax=MN%E+iJXnau`G@T<`**3;31WY(S$ zn1)v&R(>oxaaJ37ig|$Je23Y4^c}R@KTQ6Pq(HY{t&G`+SPXe7nX16SsRQ*mfkKYCEF-PdMOIN^f&NP}goI-4d&!O+PIt0p9Md1B^ zSLwyt8>BY7gwydFWoma`#fD5c8?~{3?6(|GBI2q*>zcE;!O4Iw5Gm5JiOJB%7{kgV zBQ>M5u zzlU@i;%8OyoO=XC7{Xp+tn!=m5gIxIT zM6juPE!rm0iaI>n1!C)R*vjOMIOVhi)C%5AE=rK{rlkU?0f^AMky5XiMj=y5fHZuYj#&tR{}hbwiOBu*+`T&Vb*3wcUKh!8g52QF_A=xsykl4QZ$Hn1 za;AYwqj*zTA2w>vpq&A-^o>QX;MCsh$hp`Io;lY>Lkh2h6Nz<1O85dntuWRv^@l*( zehpb}Za}10Y0>SS4e)N1BHR#gk=~v#8J)?Gf@X_$(+IQUU|Z@a4(e!!kvH$*n$Kd@ z+NFnyV}m{VWu!!2xhr#NFERwT(r@9a1DqlZl?EAmU!vT1HDpcoR)U`rHgCrsex4uB7Iw;zzzRRG`a>p~ZTJ_an2tqvJH~-U zE-6?r`8#NdEFsHn954F%a%{%QPv zL~$ow*Ypa$w{Hc}YcU+Y#Jlz8t|mSA00}TRhiuJsG}3kq?hj(Cie>82S$?jxI6a!3 z_*{a^vOEXU!jh=(PI2~7-FUik!#S*0g>vHtVE#)T zbhmXYd{g)pt@vjIWc5^G!P<%N+_-sQWxfeXyB%XMraOl{Cx4mKD!UIPdwhvlLL&7K zSx)yVr1QOuc$9c%Egc6HQEB!Q)-k-DWpy6_72`V~^ym{<+tN-l1IMF}GxtIHgM+M7 z;BLnAcPAKn|A|f8`kc?#&t`VJw$kR2`)sg>B7M4QE-ccYOwul|WSqKGxWTw0@U}w* z8ZUYTIy>Hg&F=qkDd&yx(5|acP*yFNG{clqp8&?taGhYVPJ!Hp475Af$eBJ^3D?e3 zz$qa?OvY9IyTh@-Fy|X8v27H<5|bxgxmO^HbnnUIHEzFGs_E zCk1uxdN}6gXYSSaK58BziOiSG;X45Bpm;(oK+mNat2hOyxO{{w$=Cp5ELS7pNNO!VW}zLm!ep4&xk+ zl{D~G8WFUmft&*eQFJo_>+eTUWj6wLn*1Td5l-ljrhxt3-~lR!R%3EVh8)s*K_W)O z;g)GfK=*oi`snFJc!ZCSR4p7&*RA*r-1d9}Y8?;ZKcD}|;wONNQ1br_XaO7fK#!|BUgunm8IckH-iZ**ImFmJw4F%3V#v5nbu z;IoyW{LO1LJa7~+$)tLz={;0=%R(@)gfA0WkANGaW6?aRXmH)yom|pZg|D+ykVE1M zqG29S5~Dl7?0G@Z{ErP*o?%4RY>Ede+KMRFC=BMz`Whh0B7K=Jg)Z27r^_7!Sb;6aEj>ptr-{GE6X{Jp2foKk8*5r3-5 zo`6-9%Mk-tw|1btH%chkYR^2-QsY@pc62bx5xbWzuWpx4!8iozi#CG5`!7z_-uA`&pX()IgO3( z-Uc&@Gq7{iA<&Wfj0D*(CN|H%vBEdjFnfv!{j&0*4_rs^vNr|zyw7}6o}ms3b{0{M zOAFz-gxfT*>JE+gO7V`tr=(|VCN1{SpmyK3!}9DNIQi)f7%6*3&>Aug%1=Lm<`gKx zmrut+tuhs;KAH&~LT3>7nE9a2^am@+=y9=6Bw%ND8}@mrMTyZz(k~`}`D-R4VXYC3 zo*fPorK;>>_x!{c|M1ULodTRrG~v1P>&V*!(J0co6b2csWM`aJV#Wk1lFl)DRrzK? zaK)61NPqcGTE5H?4A0&|_8Td}rCaxt10lbWR)ZITx9v zzSw66frfkbfN;kz#H?!qlfN&Eb@SQAE_q-F)RXpeOZk1?Xy*z%Yxh&M?T0%Fid+NM zxLE-YgLuX}cMPp>xeDyG@7Vj!u0#4~mI@^I#A3;r6DYVI3hm+1aVCKm}ZvK!9 zefjJIG|hX5^#_&#*En?$IO!M~FV2ArCysJe3+_}8&c6XwJ$E3Jd;+a_qN!9sn_m_YS@VhXrHm2hEB#2Y$Jw7dYq?6)BhmxQx4dFMx-Ep# z%6EwIE&KfxZm0Yf=r;criw8CGHSrCgQji|uA4}J{RLEhVo zNVY-~7k^V-w9D5SFbXSC>4xW=&-#C&O5aBIN9{0o_COQL|DD2SWLF6l<0|l^oNZ*s zV>9Yt5XuM@n(3c~O(^BuBy{nTG-KwW1Qq23NZX)}EKW>?CwJXtj}3mH&x*!Scd^&@ zi^nK2(jj(mMxiBlZ1F>!=BP&-uAOHNTI&kdeV2jH#GKJTwY7xf`9u}L1Uhd0kK9l2 z;U+gcK;uvDMbD=+(~}l^Ku5I(b?aJ%I{Y$0N3tqN^88KG)f~~@zvIYjm7knpL!-TN zx~*Wr+J5RNB~E19pRD8x`Fw4@+b~I+ z83@-s5NBBOkv#%1E|QwY>Fqs%y^GXELBdnQ6kjVcRCEt32ugSsa5hvo879h;i*ZWs zcQ{`{NOmZCk+TSrS8L8QM^@W$H4D_J=Pn6iCf`Vz<6d;U`X(-XoH+W-C34N9Y22yJ ztyt@@0XOY5Kg&>+;l!9_l?U2#P|b|r(Da%YbIz*shJk!!?Md3CD3VHL-xIQjkR(Yeac1T`5)zUmNu|;zNs_c_ zd(ZnP_&nzvGxz=dUYBT7hBV)mt-)nsJ=!z;5&xYOOx!-)WdDYI;QhY@G4AGq=cq6T z%QPo(ulCgPa-Gd!)u$J*xU~d_G?vkUjaS(2@*$W+ClZh6Bhk&a>0*WTcJOQXVf^r- zfykAchT}ZifwlE}=EK)3qB~HBzC61JMk;^dk{b4imivCG(4AGtt$tVs9&W8eo_U`E ze@{dBJB!7t%Gdc7p=M}{c`f-k(u}0tbilU6hc)?74c^s;h+AGB0F8n7(DnE=tjxp5 zz+0{X*OeT_hv8Pzvu7%KgWC9Rttjk$t-#VT+W=1SPA7_77#&EOiad5I@%J{r5CE3S%ai^qzb zcBfi)seeYl5@zuMf>Z zz#8G1Bhbd-QT*hMB~Y&OK2)0CKsv6}gPcnqWU8G8YLxpzE(uv9y>sex_z*(6`DRe= zy$>(9XAv%25sFRcCR2|hO|)}=BK>P32UD(@!ezc9lv4Ab(06--{~i8ExAsb+y8OK~ z!!HAz3VO_rEl4GGxuJ}jk0v{AY7P#b5D4aMJPcO07$cu6mf+Y@3sO5FjznxchPFnQ zla2tPSCBOw7MzttT3$Z&x#p6hgD=o@>rxDElF3- zz6uqsCy1)7PFA#xQ6@phmmt}|nK;zVkEmUff^}}Hq(iC-jk|uDc$rIz9wqbWB>F}k zKFgpo&t8+CE6yNa!993stS`Ge*BELxt)}-5nUd`rhVU9X6&?-g0~w-XYG&Y$&JR?> za}Rc*FT;WS!iYMOGUP;X!yKM^wr>Wl=ycl3Z3*3I@PwI;rajI>#h{!pm+)T zx*Efk>MAJm@nT#PAxRo5jo=MSBW&)%@xj|?A|p>HViDOwbN?HlU-m~pZ-GtdwBshN zow^K0OASK1Mm;{tnL5?zoi#T<()wv zWU?cPHU4vj4nDgL)!#hCBNtl3z;#pc-KY0ZXYxK?y(vdbl9Cbm)R2HMdiVDr1e~vet2`7j6A*)g}l8+e;zmqH*NcB>D(Q~PQ}ki^_|UR z{F8;Sv_?&|+HE^6E?6&Ax=C$6Ag%l%N|ehwq+xD^LK zc?DdPVu^yq7h2yl0$VSg#AIihg5$s%Pk63DntCn+!>Hx7>)R+EztAEZK4dX%PlX(< ziW$k@nU0_PJOypOK6t|7W29Nhi;kW-iG&W<hAZL_nK|+my{J^ zrH=ymU7M;mti_+La_~oej#&E#af|j=!VYO)Zei&tVm79p>$@zgXB};lMfi|+Wr;|{L030TlX;o zcR5sbf3?M&Bn6l^%Pb^8T;bsNay8KUb41$=U|Pr`C|Shb7>@US&3ioh*3o-{Cre zQTgq$0$2S!4SO1lgocyP;gq2pa2=8$i651O`(PPK&PyZeSGsW8upM^({hg>C{7k#< zdBgJQ3$Sr(2x#&(x1_fYV8e4cj7-eg3a1ly2)E%Md~|9nTvaHCtYs7F55Fe7W=|ll z3-|;j!Y5$2KsESZ+5&R>@eujZ@S2IAe1r3IPodoJ8lX8nh&*sh12*G3iQboAc;S*5 z7^gK6pBJ)Sfu0{L{Ix#d-cwoV@lPSkv(1gEdWfOqWEC?0y$bAXQH6427Bl%@N7L5* z1jlPDa^=sD!Cg}*`ct!(etVKaL=j`bu=8T_dvyaaI`9aZ9@Yjb6AyE;sWQlesm4md zd4y|H1sS{wjN9@MswI|z5{ns-eJpUi*KL7;1}rhTZwKec-33s68l0%_3-5J@WAm;w zV8)nYxTJ|(tLTOUz6RL#vNAmyqDjAhRRhJL)>x{`mdanUBj2SGAxRodoK#bA%(p<` zZ@>y2jVQ)A)(zF2tANiApTqYb>_lc;W{~+O8!Gnt1*2PQqEY!X76i6Lz=9=fp|^eo z=<>ghGgB0(sPqy^89 zkd*qfxNjf^db_+s9f#KmSuk}=xkYJkZb>kC7j+IZ(nhd);SCrpFSrE;epkdQtpbK@ zJ!p%BLvqCdxDKF!$#L0p~4Nq&z2wB++E}AQ6LiCcb3hi3aK| z;Y4Wk2;Mih0lSBXi^lt`U?r;i%b=o#z_z`>-@c#&qqbh4vyy7)O4gFNkQVR!80yVfp1f%lGE?)`h2?Y*>^NSrGsi`F4hZ21`VwIp+WJNKh7k17yfa~}Qv z+0U6+Gtg!>FHXB`lFj!Ss!{i2G4ITC!2Mk4dApJgqkjsVf0MhcAG7f4R{-7Xe4R+LE$f=|Qjg&h`ZQn^!Y_W@oMkaxayM17$$pE*lOpT1*+cXXDFP zO;PGTAq)9=H_N1%vZBylvePz&e!L$=UW%>Q(P2yYt_N{!{@ys~9Ggn3-2Cy4g4wuZ z!C3C%+{y6%#%v_0DKU^IN8(gxcj9#J)08%)Vd15jJn}c;Wd_u+EGtK)^LnY(_E#ru z{0*8NQb8)qmO`^95^S(<9^_bk^S>K$_!-ReZ{wN^5pson!N3+FfE z#b)%dMgcu&91d+r6pFuLObz=WQq(wIVYbQw=RRFR3I|_;L6Ql{?JCkg_X67I8vy$h zOSn_tLRp4|KqcmzMd{rrm^N9JJtEmjtzVQ^#M&xRbyaJ4^2|E2_>D8W;>m4#=WQnF zZ|(y5J9mTDnH`|y*PofyEcnGBYlRc4?4jp2nLRR z_t~h_E1L9+qi)z~jvBKAU1pOm4Vq-n|=n%~M%$ z?Wx;fmYs=c)pLFF>s&q{5@Ts+Vi8hz@uRo5jzIYf9pR7XqmWFM`c4=aJTM==<4Uzu3RIzsoM zHAr>)I%0j?ij=L;0d61*H{8u*W;n_c{j(2%=AOIgjjjJ8ungpM3)JmMujR zx`(cJ)>RO>)kdPSdGf~|5p3E1cQ)KXiQ)IVs#J#yD^(vn~>vpkxn zrOpO{rkS|kFZ>b&$!bDY zDyId!o*@I-?}#%Jcs}YT(b)CSe5Pl0C8%HfmXBQSfJ4q7B=clSsEl_cl{lydQ^JLL zfNBDL@<1E?-4{iEN7fK`ypL4n-KiKB`ew&Pt-`Fz9Cp4_2P#_e*lKe)E4QeQIP{z2 zf)gvL=lgT4f$MturD7wT^O=Q@D+I>)fobG&24E#MqxsoPJ-Dit3tC(+Gm5VV7)i~) z0*f_O7)Cz9!xqNa145eJ{~bNp6a(Ex#{j*x`Ea1sk8h2g4g-#)fks-w#!tG0*KWR# z3uBA1rs_=5TIa)L_LFo5>YG}I3G1WY*#E#b{6 zv&gFJBiN`tqhMyk6xQxwCo_c7z_&NMskc}W=loWHNt5GQd$M#rp z&Gf6JB&r@)YF5&TF5kfcuMK!`c_d8nYKMkS^Ej*LCvgWz!+wY7(dv6r=%;W`_d9C@ z;}lY$MqNIZygiewaq_g(T9w70NKk`C8RO};w+bM-$DZ^qQl&SqO{d?j0Q@^bEVj89 z03*NmfoQW6)aleqCL}Q$3g1btBQg&ciTChZ_HM-)%d^B~QWN<%MHw*T@=IjWq>nZ| zmE*HUUJy+k{=q9JjwGV$tEfrfK>bRwgE!_x;Av$)aLRlQwsN^6wYljF*vd&z%~nxV zu;wr9s@K7rYK&4j+nKZXTtzRF(^T}u7z4B~m z_930D{(cviJq-e58*QNIVje$zo(;SC`xca#A_)e=N3h{EA3gb4j2`y65LkK_&9!bq zdh26=1N@Jjd~+|7lPp126{c`;-7lfvaRDm`2DgJ%6=YR&7(MyvA$VROxb_r!8O_LO zfJv7?Dr=PqyHivAZ!0EUr3*QUJyp2vm6iDKjb%bd zA`tim2Ed|+{_tbI5vjIc$!B(Ii(2z?{)xt-b(~ zrkfKxlU>Z9{aLYV^JGhj#qT+<8)9xwcap%63M1}+PJs0K`=XXzeeA#nA^Y9r1MPl~ zf$6b&AdNnxGLP&~}&+5Qm+pDmCLM2BuHP}OI zC4u8DOZaq??S#6RHi(GcFo9sLVJ3zMJQE-HZSG^+(Q+B~y!3y-|sUf0T zm>#~`m^H=FTgEsdD9zB2k!x!B}u0~ku$&vzbD5jlLmMaGpwR%PuO zG-;$hdpU0iz3<)*CO*n$)zwv?-Qk}=!n=u6Iod&w_D`cNy5ESk@PA5T^+mSJ(n@sm zbvb@(IF6dFY++rGs#&FcPGm0Mc*EKjRMDz!+kwTIW^vHN(_}+ouE4Kn(L%!rew?c+ z>o!5>Cx5iV`BoCUjcqVxzRgMyqmciW4day>}E1f#)E>60n z#XjGB26A&QL4RMKx4pUzg!|8>^Yu>B6Vn`FOwI=6wBG~NuQotK8v9tLzlUBgj-c|o z1jR3Sz&-3{P?F9N5me@M?62*lbj$>}!S@#lHVK1vZev7eX1oDwPI-{w2RBh@+)voD zbv#?QHjPXYIQy?GKMNkVOQhDS3ciwSXZ3bkpr5u2*`$~KU`0rPcth$Dx-3HA0)qybh1VC(!4St zu`IQsdOkWDk8#?5NHuHgBS zFu4fh?kKW>QN z*14P*`it&8NJjL+TUgj_!Fd5!RL&<;?}LZY*K2YpGW$H7?B9Wx zZ$C^cr!Ams;$Fj!*ky=$??>XJ^~62$2Z-8+E?{%t3Ry9;#hNx{C@p0Y?0xVX`km{B zYTND#`#{WB&yW|%J>3YWZQMz$wLHMsVKrFhcb=74kwH2PRdCtRB*;w5Lup^4d0M@n zHrFkNtT9K_1V&#O}!}?y_rS9BIH{C1h=7klFG=@Mi8&UOC(x z|4j@Mz56u{cxpePal3c$Rkak>@2j`mTe%euoO=S9d!7}NGa&NJXRgw419a3m zhWZZau~jDwxrZx4*cG-Bq7#;$ti0!S69BeD{@}u+XQO_9}FX*L|2Vs)>DnYLjThcP;GpkD+(C4=n37 z-Ab-v8?!D&T2y!ZAywJB5V(2ui#N^a%##GQShPvPohqLq5>FDd{xY=N`(u8_026J<*$z znKW)llPd3(C1bvvg3k}Gq@5OFu)0fMRG;z;oLb}{ocjh)h*u^IoM^^Y2`qrew$^ac zSBARfN+>HDV%5zR(Z)run3xOo z5B!JB6#sA=4t-++4aU$5TW9h=`q!a&i#VKARtF=}{z2)fS1k=TFCZ-vzBqD4B5z-l zOl-JV%D(bsBxGC3_)q$@N@pfLW;_Ztysj6=ue%`dd{}rr|IPK3i+zcDHl zkwrJR^0(9P5sO=S;>hmP{N3YD5@2k)lKY9>H)UdzrSZ+Du(ylws*p z5kGuTn=Kx=1mU(~_NE^LIK50(Lg5_H&M1LOW)taw^(MT^y&K@$U_8u>(PK|6@rAGU zjuzcIcUvqW8A{TfM%yYJVL zjqZU6?uD%hi^8GhFy3a(Oqz_Vpe!4@?3&IopTh)UE9uU_!xq&RA=&Llpa^7 z@)1;t!cpUI5xx5D3HH^*+!u9aaom zZb{Rs<)w2OhiRfrbAhS(Mv640%drkIiC~f7J&!1tga0i{#o58}f{Rxh+Z>+*=helq zcIUP5H6blvD6j|o%TJ1>R_wrAmZ#ux-|OkH^vH@eO7il}tfC%&-4kVqC^MzJN~aKyni@JBo%8S(E0X16wb z@bV#QHf4K7@GJwcM{R=0D`7MWs}Y`Kc{%7foIp2c3%{9_DW0zqO*MtS@!xkdp!5VQhTUVipCS82+t;mv5{xb!a`0pu6;oKvpCM%5 z>g{yt@saGjgl2O1+H&B%jCGs#pwg9T^%%|FxfaQgGSh@WyJiU|^4t}0(*`oqU-f&&~aNa5yc_b9Sl8j>1rHjNlgQK})J#u*Uw2RD)I|;Po?*k(7G`>Qj zr(HDQ^qoDt&z*$iZ-)b#Ccsa6KeKCf0qr~Vnj3bp6{RK|#~E*eX_4y$ULw`SvcKLG zG>r}h$^WIpuKBMSHH#6f;e$%1WVa6J*4RnXKLw%@cW$HIIqI}z&N}uwvrA;U&JiCh zY~fbdctfss8n|BF1sff6$kBb4@cdRyyh6ASItMO-^nwhV{^1h*qh(RqKl7%z^y75S ze_0&AHEuRLc=t4U`bCmm8mI^_9zTc*4>`jWpAmTSBY8A9Yz^DgB5+T)KH?)1IZZ6e6qDD0++K?q57CJoM-@Yvb*i4*9^kzPW&t%SG4Z!V03ZEe`1ka9rM#>8wF;z)8H44pg*|d$sZ8 zn}4xbVR;tQGeaKF_!~y!ljIqZ%n(%Tmcw@29)ME1mVdA`4SrUL6L&~mq`~jjz^$W; z(1q0FNN>+3y6kq2C~4P4zQb@3uT~91ixf>^>&vCEZg~hfvcdru`;B59Tvq>$rc zd9IN-wsH-+xvLu>(@QWA6OS0{R7|fpzJfl32Vn7_IexiBM=XmKNZxUI=sR;h zalM!(z6)1Cw+%Z$=nqXsdRCHHZrl#8c4v8$g5d%PC>135>z=?0@k{>KF`)cLve#H3EzcYiFD3e*M?5?8Mo^c{tn z-nkV8o3xRx+|vs0&j5txS(6Jv;pnSLFM24WLw9IzW2f%(78SafVzbTbaqF>_@PX$g zux{697`~~Wd})$|-!gV!l?_E~+OA{JeXlk<^@S7)aJH%3r(ZAjF?h#4*__DFcMN1T zUYsFDd(~Or%0{@Uw*r+ZDj|2N_uQ!TQk3~-6_ga*6n|}vkh{4)-ByrH#veb0vrdjh zv5QPtnS~ofGYpsDskMt~W91T7KU3SPYv7?Jt~kklwERbO8`^+-!&9;L_WL-i{Ruq1 zmqB)81Nj!Q4tpa$nfT2-M6=KN(4+I)xB{8GL@qTFv`U!LZ&NQ~>*N1msc9g*adING z=rAPTGN<5^=5EOEPCn3YNAsgwP)OLL?X<3oL z>dx6rXHAKqW@_hQrEU!4H$4r7KG&n_>sFHipkX$CQVPjHCZ7t1U&x0s#UX4DUd_(Y`{L%wOiycXpz-2BkdP_7kzCyNW1qrkn4UP6r z09JO7aPO73_*~;{P~@G--~2TVz5uH9>9RY_*Y$_N%?Hcq{Tq^W=GaWqEqPrWx80oi zB7FCLI>pe~@g>Ya(qG679^_=ZhQaZCcRq7&JbX}giF z>4z}|qKE^{{3ZWc(^VY)M~$`9+dx;QujERF9;#!4vS{Hc z6*Q>21)a8C0}DiHbU1f5ZR!|Hj*fPtf|CKM&7BRmyh~)uo$ML4l2CSL_Y2(lyc{@i z_W_-{nok+mf;Z0h0&w3R^k3^SG}JH6ZnVo3Uytnr*2Y0dJ#-GzC~SgbWDQ}=;}>vj zqB1luS_yY{pC)gz<}sngNqnA%h*h)mM$Y}R3@GKM(rNksd1 zmf?ldrj!1Lot$FiCXv+l7N&3O1lVA=iY+mZWfk`BBsX1!b9??<^x<&>*}nx4sU{a( z+Ia>U&WnK6eY0p+w-X3d`VTtqJq}yGb>R~;HN@f(O4Pw976;HKVwW*ToK&8H4vsko z!V7;w`_p3pLCVy1a0%pH$aYUI+BPfo?E>6IP_aL0ZvS+ zKw&!0bW(Xatgd}M2$aoQaAQgr$ z996-QHpgjmt^udzU@Qs_n2*PsdDFL{3jBebMEJoz2)un8if>-^gC=!zLHnmW$l8l5 zH{N*K^0{9t;T@f^9Jh`3UMePe529#Z)ee67p$x|Q)y{HF$BQsh-H$Zbtw7!3yJ17Y zY^d^e9)^4jbsnoiny0^~_VI<#{CbqAuw*Z*G+qOL_E$%>qU-$jK56(se4f_Irch~} z+jNF`2^|hviRUa^#QRkn2%g2$I4OBH+&aw#<#$scGP?=OZaaegYqs-wU&K^>$1m>k zv4zkke=jdl(!-liy}+fE9N>!{g(CGSx!l|9c&MLQJ95kK^Rw;vD$nvcOs&4yP-f4w1wDlOPw|ghoUyfcqCbq-y)> z@ruSt^z^E8NTISbj6Q8`{~N<5zUbv@fO-4JE*4{-0>Oq4o)47r{q zbg=A4VYzZC@@e-8oLBl7Z)bIsTU1JUIZ#gbFI=yiHuQ^a` z6oR*}V_@&VNM!%G7OF_@Wdm<8Y>!4flo{mc{lyk=IE!WLM$G2T_NTIUvy7p4K^RS* zUrbyN%;Ic6-^RRxC2U(elLm&^Rb2K}Cfw;?_)UKVRbLziTX_!@=~0JMCQBkuV+$BM zeG;fjEJmI6w`tYJ4NQS%DeWD79dCH%LYKQQC*Cv95#N{dVBp~i(9^3I&lr`B8)Ir| zx_1_I$-cqAw!X)%UvL_i7954&ilgAi`~A?Mr-elC@Pvb7UcNzo(_C`f= z3K_G|&S4|Cy)^^weH#w=9S_hr^Kx7nu@GM=T2D%wq{u#<54gimlUz)5#a`D__|RqN ziS5~ABwb@0EwryDrQPRXl6?fS%6~xAO_#xRD>>TYbd%w{g5be7FTu*6SD;Ml8CW|f z5tJ;FBb{^A!C!<}DKZ8=e7Xr=n=D1$pf+ov9Z0)A90n^?@`>^%M5@eHXvp(*?8Nv= zDt%A}{P)Ek80eY{yQxsztRBd3+8+=4WY0nC6PXpCUtJ*`pWN8UwWCm>S^`*a?LnBz zM?BPuhtq{j_u!5-tj69k#5iCW1QcAtLoHGwZeRgfI%Pd=- zFreNs18M8+Cw${G+;I68pFcZ|rYj0;sQ?GEg0e*WUl~3VJ&~=K;+URqpZNG!@)i9r zmZF{NZK$-NP2023*Q(JkCOjb^)JSp%`GK1aD_CG z`>s&g_;Q~(FlUM7ol+q$wc{*X@YIp;J-bo;BMEfaYygI<#&SIs>$ypO-_Wr@NvP1N zMcfutz=4Dy>NmEUI90S-#_hQPJxU;}2S$iOHffROpkh+>XbYQo?1so}!~*8vyGS

{w3IU&r*4O|80s27!* zoGF}8CUHGNumA5dU+AQx2t8b9!-7yHtn62fwy#Tv&l|JI1*rhu?`kj@xX+LS<9o>_ zKLc#?^c}R%I7*7_PQjw&scg{7BUF-VQkh0G6d_Z<4asjo`*uDgPr@hCeWRkt@&!TU zDttv&t&M{xqcQ8;JQebr2<~2zh}04lP)x~LBs*g)JS6O}bkZ_Oc}XE@@%RGxoB-BL zTM}9A;E=ZBRq)AmlGrHVHi?^QfUW-Yee^S)?VQ)>ptMKyD;`-J?ZNQHl7??_78=ZNve zF}zK+J-iWq0x21eLh|9d{PXVrL_fBd^1#8I1l~(RT{CWjgU5oLxvw15E$IgqTm^|bhNQ?5Cow(*13ACxO`+QkGaF&(h7C<2~xoJ#di^kWHDgxz*kz#1iKUUOd|igd4pX6v)SkL79TN%;Xp z9G;?nt=S~uM+rKfuEhGD3&DkFU!vjV!Ki#v4jL;z4%t7wP10NX@!Y*D`6WhS++O=r zbjsb^@SWp*tX7l^hej}{bxS5LPWHgx9<9V~b96-YHyu#-ktK|`TR)0b(`9{Z^~vie zyXb_AGm*u&dGrGP3tej>LD#Yuu=jciXnS6Qe@xH8W2*c5R;<|GkFEMzHU9j31fZ-t0yv~HIdanh` zlqayKb%cJmWfflW{WZuZ3y?~Z9^Vf8Ns@{>gx`6R^|_kO@U4ex!EyMY#|Qpd`4Dm5 zxtHV^EoW?`y~O&P_aF3CkwotfqwIb~JDnpVq75KVFKdj|HNffB}V(#t!M9O+|RDJASV0(54uu;cg zxaR`&Hhzp(JLs`o`ZMP8lf$4sbqX{6{1!Cz`WUv=`44G5WXmQkTqQE2kFnF>6HcPx z!FBbFB2YMb3Z!8jMB&5BwY10Qg0n0}E)uTySOQRq!_On#3b zt?7byWG}P&8=%O4b1u$Jnn{y-Dp;k5BduCyaC1|NAG02-c2Moj0%^VJrdXzZBhl-| zu;ctZL|KU=)_MxC*XJ3qsY4Ea}jPjW}u7 zQ8v2e61BVULl0VKAp0r96`gzxDto3#=cv_?M{dKI?@R;^RVQi3H*dJDjg$X5}`r*4Q03n;WrE`8F85uuc5% z>~vzdb|L3$lTL@jr642IVX3ryD()6ALJ?yrV{8{B@Byp1$T79FSZM_Ee!_5V7q>!V zxeZY3-bF0Co*}PPelk<%hw`DHw&A}&_tBF5rX&`)Q5=%c06a&#_fC-ByM~W#QO|whKYH~tc0+WEDj3i zXI8|D`o=4XUtVq`kDp4hIib<;?`IwOW9>$=bjxOvQZkB_^-!RGLBD{j{zcec^_Kl4 z-2qK9d!a{PKiu%MjDB&MMP;0)5Rr~>R-5||m6eQvgYwbrK*fFLRj3c^KDL;fGwv10 zZ)Ztnohd)nG8Ws{t3s{Wvh3p`ITW$`C%dCJUtGFI0%{C8qUNwWu)TT()R=Q0E|V^X zT|kL=76roz&}Iw51qQG~JhwdaIa)I+5ve#VW_Om}BY{2tu^l@C z*()`2BrkXhn_)bX)%v5rs(D&M-K;>mZB{i=w}|2+8WM@-K0V%OQ3(7>rAWtYY4ZD6 z5HcK;!NEV8@%-l|c;nXHqHALXSNBX0xcagso3?2a+kNgP@wBU@Z41iL$lu`t6DtI^ zcwA-v&0mhTyp)IM@2S#(kRV~6^PDRl_zZV`w;(a`lf+wvzJ;8IJAQ?WVf$|#G0InA zqy3jbn|%k+-;<|7eIq3^%}V(>P7ImBZ>P7Wg`4~wz)tZ1OqQ6YX~ z`vndgoWpT@%NU7sA>vCDD!8uCkUL{<2pSFQ=rUai>OCpCyh!SX$Y^;DtCssq;GQ(W z=;90Dk?tLwdUZMfJnJErvD+v5)|O@YL-{ccn`6d1&b5HIX9WOJSRSAwqu|&tO(1V$ zCY!D934R_~0RpsF5%*akNL5^mqA&g-22(b%t`w!vDheja z*Al+B8w76(!yYbY+5XfO@WN_o_GdyVO#3~jvc<1PoW1B4*XNkTZ;vr#7oJQb5^|&1 z$@nPn+MI>f4)wt6R03OMvnbhm6$PlXjY6^9@R>+*Yah(LTDZ)488F6lUh+mD-hHFoB0}?U`7S7%Rn$u4* zecxN4dSYFL!@toqMW#~lap(ZuEq^(+1`3OF&wzqnXY}XwVQMiYnYhKrv9(90Q6d{k zD-^DSr+R-Y_HqfRI%^koYW|K}8oo0-tK(qKrzd23z&@_=u$m~(e+-=IHw)j1l4QsK zxDMtnRcB|6-UrK)7Loq@Nz|e63MVm;as9gALbga(i|m<^3O#iL!9GWUp_}IqB_;&n z4UW&~lL{k&JHDS=qu*5V@!)T`VAeGzp0^_Uxi=x3*M{5oeS+Qbw&c3mY{(h4S{`8A z@r~@4STUvoC|IQMnaXQnZ`LC+bXLq5ePBWMMN`@(Rwq#@aU^NRRk6fgc`&ErAap9&I4jyple;H*$T$(8UVU~1?s_(do1#}T90vAJVt^5Kct#L}Os&Xf^d&62ZPVkjMde+%^y-h+F>^WGb!3R_OsUro2EogWHfua{D#YKX@jj7&)#xI(Ou5P{$2XQD2$bANVXWPJxh#Jr| zB3y75Y~p6Mp5&{h0e0`2jcD+0AAWW14@%5RN5iRuFn`l8y#4WHRO`1A6$o6DRj=l< z%oG#&U&C3{EnY}oy{jR$--_{eaTc#8_l1}ab&!oNGr(2*6g;^55SegyD>~?R0sH=E zNo+SS=KP8rM0wGXTzm!#>#nY06?HsWO@V#6&19H>g%Ygnj8t5k$B=r^%XugLhos(x z!rHuP!rtR8SLJ>WRQ(NuCQJ*Cb{Qpp)htbS-d~5k$5@jQrY2&~)yvV^u)|=^*C=R? zQ~<+E(W%=W(dhqCbRPa({a+lHl`VUelA43R z1)}wX^_67ffhZX|?9E--(rh5AVIeu8ld;a3YQJ7OUlZ<F_@{Qgq|Z;} zYJE=ZCDOZTIp*+7fnZz+-g?|(;TMjA{rclTZpZGL>feq0!Pht7qv1|waO=k!O@~#O zUBDAa)N{VoEn@tq>z^|(TyFA@53BMIK3`C?$_scRx+fQ`YAfW;$z>oD{^DEagq?Y9TFFIz zYSs%Bb>D>1>fZdtcNA)SE={2HeKYY+Api@1`qicmo=g-Y@n&jR?)Z_-}%S5jH~(q%UYpY zYeB|;#e6%>RKZB&QKr1+Ud`>*akV;bk#M!&n}2C%FdrO+88*d+UH9CucAxGYSQ?j! zp$V46?~$yXZ7L)vZ@7+jS%b9&`gXPE(YN@+qY2n@=m@M5Pp!2-YmcFwVYP)H>cHr- zSAHW%Vrsu1udb;!%CCKp`ay8Itbi;{MwsQsH*k(^KEI*7k$>%Nk09$nJo)R` zTc$sM#eXkez~46aG4nR+HPKKXeCPk->t2}69A8|54K^zpibjjCH8&Khih`_)#nU!>Q?_uwye6k#WFut z9O7rp5QKy;t^Ji@$e+C_ku^yUr>o7zwWUWcLy~d>e_*x+KYCWOfCAoC9U1Xvb)rNV zbw^P_xy5pUfsZe!DE?Q|x8yIQT_eQ2Y%hU)@vk&%Oha{#kpgbFRl_Ok!qNI=Gd&qf zLH@e?B$hIX?fSA0MmP<{wC^|2DMeU)NQCu0@y2qYy$75{)n-bueH=(yNP+H*a3XoIv8AK zgeA_p6h4qkrEAjA@B0=EJuXC52ODtadI{dGVFi5n-3$AtPNc;13Rca{dn{FwuTqYz zHjbV-0uRJ)Xakcdn8eVY%n$8g8xl097M?J93WHwohT}uB6!r^Da82bEP73~UU zFuICE`1PVU)-dN#EZ&=Z9A}dGviWHIbS^m9m%)kq*U;QH3ZF)O2J0_Fkeet=Pd7-j zG76#?(_+ASx8^}FGagp<8_`l_2EX151m7h+;CCj&GU0AKhKIj`_x5Mu&EeBr|Aji1 zIoaao)wdxqc@JB;`#QC98OKP?n-u1lgZ)ciQo7wT82#@a#bjh+@cP$qZPN-of87ms z4LM-P$r21(wgN)w1$d7;1y#}7^uvc`6gv`VaFqwPkGNr0VFOln_k!{UIXa!YjrMfL zGV3k9crP1;kc;}lq`nq-`Ds66pc@N0YhswVu5|n*wup=!m%|PxJ)U>ydYbPyk%Es# z;j&}#v@IK=V~u1X!x2NS71k;4x|>&bjfFRClY z;>?n7Y(?}H+8V{D*(+?Y!O(+(Uf#xy`~4|;ngm{*V2Qe)dO$zrBrWaVj#q9=(TyTi zaCcP43lT3Vct;eujpX9bn)CSBdo{(|Hd1G5I~Kld=H>%utQ~B?$dhGIGwlyXF1i4^ zv7(4xw^1*B9hB-5c7z0g)GBj&+TTPOftRo~%Z^dENT)aEaa`w3EMAlS$H+7h?mwi3 zY5SGwy+;pb;RRs*3ShGOSt$9_1jgJxYv;E$Y|H{K8|`8YnRDi0-o#@}v-wewtBk?J zZu2+~awtUSt7Fh^U?)z?K)FLDjQ+!$xD6&iZ33;VAEBoG0qT56A15%{skFPCs;ns01p>#rq5Tk zal_0B=;pl;vr6mnfIxuD6BxAZ&B2{pBw3#g|KT%12s*y`1;Y>Id5K$ba~b?&T@C=gK0ydVL%ks?5O1y9PXyi^Aln@tmxjTVQ*|bgJC)i^)rN z#js8-7}1-^lNu?<6VdnK>8eze*q(svoIz_(I(Pc@sxaqh1C?Y*H{oE))h-65vx_CHZdU!QNhR+$D1z9tUkC ze!3hwPpHQ^qR(iZe>b_R$&prEI`)1SBOxbY+K2^ks&_8h&HsY0w_XEWwHedhqr(g!k3qn)SLgwJ-u!7p=$a{nP3Emxs9jfCM!5#(`i}39fMzLyS+r z70$*M}z~j4I$>H>Dd^&px#O7T? z@42C9YId9PD~raYDa&xq)h{r2O&y$y-;PS3WH8P4EIj1A(w+TMbf)D6jKsA zy!RDMUpqjhcT$j1wxt-w#Y>5r```s+%xeQ(D%cHnO;~@k# ze}#Gd`sf>(hyB&f@Tn*bb(c(|&1(~3PR8##IgnRa6?}& zjy-n_pUkqtBjZ-nk`EeqH)an`Sr!8tr*2^619RqMR3~~1Lh*LoLF5N~2i04rV5LMJ z`cK$R?n`YTbf+Wb7a8H1Tz?$xbHKM(+?lpy74R#b%!qUjLhj52%>3#OvFGLJUFv-} z@Yj$Y#;rhm^;0M_l7^c&&ScJ_JUV(rlj;7rl&XwvneY{>QS5#z2zH&tJT@PECSJm& z!>Oq1sZ8m9_7s~=6tc9PTE-v4YK1IPF3E-F_X8L1N#LwIvUu!2AI!h-2s9j= z@cEyW@GPhd_Xpe1wU7I#)435fueoEMemr&>tB`iO6aM#8f;>ZCg5};6wDEod-!7Tc zro5f#*dmI}Dl=#}q79Ngy>Oc0SJ0}HLD$`OG*L+lx8?XV7Ilp%C4U56PfeoJFGR38 zdK@~5gktN$cPOb{0FzW4G4QM`wkav2`i>L~Ult1sZWo}zq=&HKUk?m!^TJklMJ&F3 z9@@8>;Nk<%VSeItjQd#*F|$_EG|es)KeqtpmaQU%MCua?D?6LfYfgW;DxMT{wdpHGm8%vPAPXyMisYV?SF2fo)9=v@_QO63g>aaG> zHxZ*b!}2!^ z@Q3y;NbHCNU#>H{c2_z24;{y!;aX%-F9b@$$8d^45bPS80SWG9^h0+8u)UJl(svtX z$=j2(&2+r}<_3;u`slOFI`*PZ6g~4ig5~qyps~L!DMwU+SMvr86?%%E3NOJ(ECCk# zC^GNcWvISX0P`<&W9g0@ytiltX~mqx-|vNR|41;})Hvdz9xGaTV;!dWDPtj)a(;LN z_Lt;bO9wqEvVSy&)@|T=6$iGE)HMwnubB+)0w44*o=q9cALD7?Y4F}73-0MXz#hN9 zkW-tFC$^~5UaxGN_Ht>T-SJM6^QW{L6yxF>}i`zVYmYMd7h9x z?lwNZ&-FEZy30)48;*ynmZRdDt5EY%8&_~WI`{EU zQyfoD&%*+{Cc4)hhxNbgsO(M->~e1h^Go`)#OMHi;B#HD3+H3JRk)>lr5KhU*?@hU zgz>)kWc=c7fM27p;4YgwsQ3Gg)r=AyxOyH6@28QS@h9+G;z3{axVvZe1L_nMVCh0T z3{39DH2bsYYd~-KGBqji@Q*<_CVRMIqi-VT2Y;Cd(zs<7)ZKK8ihk@w?|U&csMZ4e_p~AJP#B8u7=ucWM9}i?d5k~Q z2C*27nN>R=DWDp!uX3c8)s7S}RD-9)Hlmc952|~Qqp+G|Xggymtvvq)&LpQ`w$g8S z&G8p2cf_NAr5e6XQzr5D`_R9`1I6n)AXLE=eHCU=_Zt;Fus@9X_k_#k*ZSezy_4yT zi#iI;nT7flr||5PYk2o4cUBBptY(dTwyZ)9-tf7 z1o&_A8u+nSlRB)e(XT*~WGg4YxqcNA7(3&dI3ennOF*k=Gn!a>k`mHvp(42yCr*jQ zusICYa_4{Z2}kmA^@YG&Q^{XlnVJ*UV(gAVST{=@)fUI#Igb)ZfB6buNLN9^jpLvw z=mO_ArP%u?26Hc70MVPvu&FB_c6Hj)GLE%mkQ{<`Tb9wGUDGJbFB`@R-I+?)=4<=ujv8b#nxc9C}Xxx=scFC^!A0-oDO zqLKGU&eN(zE+16k$j5kkuU?F&7k1!(K4WS9@nlTy_CqDRF>FPu9%f$J2fcdA;B#^t zT)(b>`=muM^LH=`AHGG$y3Szg(Hj)%uEZWVH3SfgExoA z}h(cSAaA3dg9h8e{i^z+aU{tc{U%%p)|L< zEd8KBNw1}>zQ#_o^jE5&-q$s-&1wpi9s0!>`%Q8y_dwvOn;QN_qM{t1$wOXlxnyc+rhL=(5$&eg*1~9gaKzT?7`(jiKG!cA4n7yNA)cG3hw=j%b1wI27_=E} zRFupdzv0(DXBrX-W7gWO=4C8<18H1Hy|udxjBpdvCJ2SiSHwVgLKQxU zUqDCQOkv-ui9D-4D=8vUhlIWSu)8>dM7M@udEpEmUrT|#dSbfee&rP;mT1G=|J03I zimJG}fv7e?17~@zqF17ike8#5nWk@8xrdjjBcPdH4{XAVF4nXq|0>Rk4yG?(^5L1Y zImS*CLQTzZ(!9G7>u(6tCu5H9K0^*&kM_}v%LnLeT?Wpx$;U9AO>{JiPv2$vcyss) zyZZe~Y_PqHUR=j|%h)a~1s_QKPZOVA&Op1ji{WkP3~pAN38`ATB=qDKUAMS~O&@`g z_?be7Gko!$4&kYU$xyXk6LSkEAW2G-qGubXn)yNM^bk-LDFMFkHTZ903w)M$V1>i$ zLE?iIOe(ZT(M|Qtrea?ZvkgFjv@gWPx`LAKJT$a2X2>UmQ=zB>E-uW9noY^*u+{f5T}X$ISk?aX!Y$B;i-C`)kL^K6vVUm(BWf zl$mBlkc)gjh=e!z{?J3+iCIGH3K12>b?^3#N1wHf^LXAHWxU_pNzVTfH z-N|y;t#}>YR?UU@&4By2t|683QO;K~6B}bz;PvRsXx_996Z#gTW>7XB(i3KbzzSFU zoGxbIir=p5`ay|cygY5pOLuz4Qx!~fqVaXLz-2n-r5W%h3OcyjO)zRd5Yiar?XZK zpFl2mDy*Ej)pF11Vz`h!4Mp^Zpyu}?Y%;h;{wq6CyCICO+qck>=}tI!w1V3Y>*9oz ziLfl|7dc+dW6sQ(h;0uIF`!Y8m22AscC$}n)vg7kKG?(?Ra4{z4Oil$-)#K9@?3ZCdzg%pTYlCc_)Ume9@J!cfHjfg$xe z6u@y>-l{KSbnlFS!|N?D%hm#Nvby11f()%-)alH+zvQ#9nM|gi#j>WQr10k|Y?$rI z=uI9Eis^QE$Z9&}zEProQf0u;lqJEh&v;yOA~USh%W-VY@IQ_w{j@=r?gWWY#$6jc zw$=kqIhUhfNGjLA9);JqZ|sUO?NBs2mU@~**kV-0%jM(PO)o0J|L_3wd(A|uOP+?- zI`_b4vku4cG!{sjakC1KyjOyI4#T(pJledo#T_)(YX6m_;?*mo_>N_ zCKTh>CEsX#+%70>93nOD&eVS5G@7MZBX9I1G)g*PxNHsXj=aw(yyJs$?=8@;nM`3L zcbIpA?Xm z;(8U@gLlDrkJo5dWJwM0)uGq2k1dRop(Ve}hzw?6$E20CvcUse--+@BMU~VPbKJ5Y z4pYH{a;77t6Zv1~py&5_bTnQI%hRS)yxAA*HFd-nhL_kKPFHE#+i#>*nU4mvf!_W; zfrU0FXZSH@|FbQK5Db+g!y$w!ceUu+4tyjUe44vb0M&5tFCqLFJG(2$X8zuBr~Q zs_M9WqBc8LuLZ3B?gY(fPrQ}40)n~hypNa#MvK-%lWze0YYoL-z62}ZUW@WFtmO|A zQ+B}P2^~G!#7LSLz}|a5Db}DAB45eC^!?ju+XH`^ekp;f#5Tc_DlXf)cLknUjd;ud z1BedBv7bxRY5Xn&lqr{G? zcqnHtt{dG6y}SzWE>ywG?)jHcSzZ zA8AI7;a>L7=UQwQ*Jk&0cfrj=sxWQ=fs<9yx91|sNj$~I>j5wBCaQ`5LCoMv=lmQG@so4q`NKk2d2L*$X$DQov=C4YO;uBF;CJr!(&*sAthAjlbGV%+d_J@y3~& z9(Tc;kzcoh39xFXf3Rc1o-qE-0t?lgQR7mOz#H^4xUDr-9KDqVCw z0}BmeiS%m_?Voaapg7p_s+X1?5T}gIQq=H66uSZrF-M~p;a9mtoNqXjIrPt&*{nSb z^&=`&KSc&kwfIrup>VW$?}FXiYH-wy4@+15hr`BmsA|nyCN5r*w{xx>9%9GhZ@Vr+Y3B?J{&s0vYVPoh77qQI6f^+>8#D zKp!`UVZ?z5%5rxGRvhrT)ExBUnNd-@J0`BVhG!PK!zqJlSYPmz^p6

A)uZ^8Fm9 zZQMc6?WW%zuUsh zqi=A#@FiL>=`pqq&0(G%K0uO9cIf-r54b{fn2#keJZUSgEqIDmY!p@$tpz=I8~AU< zHJCSKh);c2qY{o|_kRBcrH2oJ@!x&8F`|g$dIy8nMjQP3eh@yGdcfI~bGXLjJ?+2O zh|}ziEYHJK)_Z9K)mtb)lh7gfw@HK5ZYYH%T_em$Un14kdr^_kacXT#gGHmQpfNrS z6$@^{p^f8kZFvM+91uxWqvP?7^Iww9%A>a5(XdkXCX;le8k{*jw`QI>8IP-?{oQ*> zT1Oo9RL-H$Qd<<+Uk=H$hr!~D1m>#Efvw_UxS>g(;%+{I&@o%_yXYm{66BA4Lr+0Y zDF$^uhG6ugTVxq74rJc>j8WfIu14(&w_cqvv7g)9ri&ZcgJy=iVW(FNpty^aTm<0%`yL!vtAHQw7oz@fI_xe#LPyu`Vs-xBWy>P! z$-6!deLbgA?4S_|h2_J{LQXGel_H~8wOHJ95P!U~XMX%RLkm~!fVgfmvN@sa|jkxle? zD6SVqKl{HZ+hIj>Z=L}qk7PVI_a+li77G&`MQPV6Jsdat0ajdFfiIHsP|#M#1kTl< z#*wFJ9%%&l`8(S<`8S%u%@`^nQRl6X&IZ^n6uJm=Og%oxxb^&%^OzJXEc0pbE=Aig102KOaqC z)m~cAoF($?r`}xnv&a(j_wcFX#!>VwtAt>uakS^_BSz>7r*Cw%fW~DZ3OKhK@BV9` z7pxl8HHJY#d;~UrYDW-Kq^~I*)Vp552phNJ_mMXET9Qh=ObpG;6u}*DuF#8QW1Qoi zhF{Fm(39&+EtsQE2H&{*M#)p=)AHM(+4>82GS6utj6>xJj!ynZfwsDd(pafSP+0m7 z6`Tq%uIwhvFFZt}gTA=WtQ2)QUs~zXD-bk)9mh3N#<6FgLUUjWK2*`e32mp)$Rq~F z&ObwUicIO*%)R(NU5RWf9J#&IJa}o%fTP=d@(VGfqRk%gD}5DNh?(O-E(0gAUIceH zpC^+t?%kK&PI-1`K}w;7?8e`wrg%;hkuxII_i~gh{{_!#=FqjN3T%~!0}4GqLJ8dS z>~ydW^(>6w>hiZBK6MDsN8V(Ll5>gC8cRBh{a}Cd4vrPxiId$qecjR&e;Fu}sM}+x zQ8Z)Igp~1Dcm%ktFrYUhe`(RlaJiwARwh$BFdOFy zAA`M|hkTouCE4qmV;gT4yLV1L98*wbUmIpHUUz&@GwnMpnERQ&!W$4fBFt9h#DVdu znZ&!4N{21$VW@dMxI7BLWY;a&+%JOR+b3iAQzcl_MP#7Fpc1k^FOcULip)-3cGE>$d^=Vc)kB})jqd5ZIo<*b-iob+u~KMe_Mhec zqK)j$GskHWZzprFAscggB}kz2ABLQAVyG<@W|khrEnj1q5ze=vx9vNgJE25ji}`f9 zJc()=xpo&bZCbQ<9ByAW9@G?WL%~WpRIqu)k?p$hb@xT+7?vW(;%lh-b}>^dl!}r3 zeqerPVU}M8&c0cJ&p-n)AQ5tey>R;*MGS7;hFgk?q1ZPWASoY(w?xs_y&~9aa*;CX zF2fptz?>m*h_F(c>ov+?z}3S4mJHcH4k zvPXWllj-XMOZnxY)SjQkXb&Xf*&Z|eu;&wQzVR3?+x)@n+)Qh|!VAVGD6mpG@pRwe zCgoReK@sT(*t=jNUXb*pPi#AGIU0L5asoz_YIcZ@lB4+uDhE=fZMy;6=jc zZTdK{yop)z{s{WcSw|v2{Be%^W+q>FGU?o#iZ+Hbao_vxp!Gr+69O)wuVXoGKRE$c z2R?v;h0;it<51?kGdhH;qq9vW*^iY*gGIN&V@fcpsYJmkzjWaFFu3<|Ab2I}!F5`Q zIY$bK?-Yft73#30^B#4SThb(cJ@eq4B4muOB@B$iqLZ`W%wr8Iw0($c&K@DgR}6M! zs-odZE3CVyif`ts;iq4+tkahH*ds$~+~~~Bp$DKfOcoCw*anszGMsND zo?f07q5fetyl}}KFD&%KCkJNhcCZggbjXM*bUKRxLxE=CVl({^tm;j&6X=-HpdIo zd23}{khljW`4>QSbqwy_yMZ-Y!To6o((KJue0bNCff)-9!-ateTyM|>{1(oyv%Us3 zlM(ALUPXRj7#c6*&I4CR*6;9ZSkbu|^k*$$e(%nJgGcwGCf!Bn0}X*|kxTT?|)HvM;aF@9MbhWn=tp-9eK=%Yk1&~;=?=K~G8CxNh@F^=i^ zg}bFiSkqOR*!P9o3yzEx_mAh$;S!*-s0oe~0KF3FVzyelavaW`TwC=oOTk6IZNim&vF#;P6%At9a#3`5BU6iibZV>rpIu6-8#B_+<=<_r=r@wQCQhs3}5|EV|i2^YDPwL_UkO@%df}!?@@HoKpP`_ zm02fz3lHvoW(*$ep?AkV!0%Jha4@?Xe2dpxPN>enTZR7c^UEImvV9_54qt%XWxAwM zS^zm_wropL6z%s9f#KZotnRjPoE{{~jt1WY-SwBL;?F01ylpnVA6kR%$jI+x&T!|AEpIBLGdUW2ct~2Ky^uXqj`uQfJ$0P5CL8*Vo;p@! zkhYCj-M_5+ksS8a!*7;du@#h0wTy#sKEAy*7Z2Y3j*flY{9I*6gOcUAS$7@GG%ID_ zU%5cPGT+myEn9Jt;|ShyKZDMSK4fo^MmI+J7`0m!olVc?DM; zbfDsO!(`U^i($V0u5KCin3VsEU~;v`f~a3L_1~AKPun!$MoSYON)MrO zh4Z8)=wPnITVQds0vVlj$HgLsxY^()oBJ(?HeUc-=xfM6k?p4?Q<`w_L<)reGsMi7 zM`4taBE73MR4vc3L_1V)xgZR04@6_4L^G&&9;PL|i(vb|7tqOSV^o_6MT)buZAc&I zj9-BL#hG~bVFj5_`44{|^1;8`C$T=_Vc@eT3_nRlqSSyaZ+gxcYP+uwpMUMa7ah0Q zKUF%6iRX7bwQ&mmx^6*l<;P&9iwWBKsq-Y|`|y5=BCC4i7qoKwjy-OBU?gNSEmQY~ zgsurZ(Xu|gW?6w#*RG_f&rdLWUMJSJ#4B&{p2wC z9@vI{%f)E1*d1O_wK8wBMG5+DTS-Re^{FQ`3MJddV)=u6=o+TYFm+nYn1$`koUyAR z?C&JpH*Nr|zkkJTMpe`^p%eAJR^gVif2iYm2=kn3*nrtHQMjE&Pwwt|t@su%eNfTT zIx>x}tFqFe*c)Pzr zCYP^T!|iva{|T|q>8*4oJ_{W?UFhnVO5hB$oM%r4oK;mgU(H|q9W;q1jIRN^V=~lz za{?(zNixeLd(l5d2+d~=p_-&3S$2KHe?{{#_U%?M>U)P-$Cjg4$7wE?eh&`LD}aFS zBUownjP&pP;rc3+SVzxxSRr(ViSn345ve_3+a`v#>y7Z-E(2!MyW=SNIRfr@j)%)P zT`@ID9}A3jV6W9pkbFIbUGd{CeepdGcIFARwzeLxP5wwBbAw^Z^+CE*`V~Vbw_$Xq z3RW-r#JpIx45JJ)aopOO%-P#&pcM21%7!Kp|HCk|XS@$hNjZvo?>#VdRx`SNXn`cZ zOK4lah;F`VWwM`W@OFHYMFrh&Fwn*IC%oCrsJL%{g1|JG5iLZHGC(iOr$A=!AnP^A zWq>WX%=>nIbmuLi;22L_%ME%PojYmi+xeEJ6h;>Oa<4P)7{c7dTbnGJgKOfW}mv#K%DiR6MW|DxWOIhJ?xZFV~!$`*$GB zZNjbFJb)*qfuE(mkVK~)eV$N{jWaLdB=ZCGVB`Wf!$sh-gC|&PolUrAi9E$TIRR#E z^%&K=A4D5BBD}qTKVGf_c{3B-cY6-F#{EX`k9=A-uM30fO_`BPQ52czi6_s~1ut;2ooss(%j8E=_Z8vx0wiAbd z+r)rdzZH7L-hd^eTj0;@B>Yh=%5IP3V|K8x<@rA*?4|EdsWz~iu`XBvCWF#!^5{9( z!;=N0eMiZ({Sbw6``paN5GapQ#0M68QKNkr=%NTR4-(mTQt>1_(Gb->NwY0kHI&JZ z1+`Tru=}wW78Jz7I37K5J%s+-BZrU9G@*;Sm=~P>*N0dA!=Wnb7OPodKs)y; zLQOyt#xvC{y;TJhA?|J(7L4L)JrJ{j57Tyk#x0i5SoyVAk*9CO_DsLe`J=_D=GsqY z^o$Gi4Jo2M+r$37)_{R(lIS9J6MM3bqQAxfc4<1WbsEoM9b*b#T*y*QCtFH>1giI7o@uh?^fP!j4<2 zbXYqQ_bd*@P-8`Qr1>e^o-W5ePSnHQMceT3P6^!oBL|v)twlSSg~l9H_>o2>^9}+m zZ#8h9u_=pj|F{kKv?d1Y-F4vp)<$c3+`?lAq_-2P{OC+UibV)n^Nm|$d#5vf<$ zt^0Oa9(h=ZPhMo8kAEqsWeBsC_Nk~FnhC$J<-;U9&P$}+gXYcs%rP$pBNTOU>|qNm zi~E2xg(`7h>;>36c`DqG^CC0otH@Q=Xc$#^Khz?)c%R1!M~(aEwBSrN;9&I8RWD^SC*_%5R8qxv&77 z;!mN{Ms5y^SHO=NQ`wEJ524T{18O!0W5*U}#%3gx2^Gk|c9rvxt6jsfn_HOfNy9Yv z(H2a42c*2y7H=mtpyLJs9$4N_&kn}Wnw5c;efuKO?DRtB9j%3}gPw4O^C^Ze;54VS zOE|-C5XMyx;b-+qy0&2*23e-lqm_p-KgWbvPD3%=$erV>^SO6tCipuo$EVK6IDT~s z6KUp$o-53u(d07sO)f@}SXHXd*$=h7uSuQAv0$uD1}wD|@8zO^E=?%@r`>l^K3o2k?8Z1g#$E;c~>nXksQx3vwGV z{hTcB<`~n~ixqKkZUlA}RWYCQb!pqTUR>gq$|!tRrd=VOXd>bRe;WFj8wPc#@<)~) z{g?|gG#Pp}I}RU8>!aM{sW5HmF?Rm^2j)|5<0OgSIC>?8UKps8S8y~uoiml9=03nP z%FFQcj(VJ1UqTWa{{uJA2iP@v4Svr`K=1S__`LEWNRC;H)Y1u-hBCN8<|>GeW`G+0 z2H9!cGg~nXolYt6>_`y)tat%)W_F{<+RHdQ?FqWhRHgNyMjo5McS0WNq$gVDbyeZvquMZ{B#%M4WtbVLiC*Sn>=mthTrW)#U2w027j<_* zw7>)Oj^*N|{Pl3+oDh37sT|XN9%5vr8K&250_Zm7a%O;iCI7*>E#eTh;}#S3;3+(k z-GP~cahR_(i&j*QVTBT}ym)c1q}Rz0;>~wh`w; z6uSs&cA?C#A4_QZ;U?J9Zic!WGAU@q4RW*Cj$Mnl;)(t{_`^DuEbJ?J?Y4d>D3Ye5 zs+*{FwAu=|9Jl9EWE<2va6?5pGjFqqRr}&3`rv2}zdJ-p^Yk}5tkX=>hcscPix&P( zv!jI-wamEpOY!QaTeR@|7Me7^nApeN_-)TVJa)mH^09LT!1m1|wC2_AMc;@aZ zs=3jK;sb9`?VvLjEc%Wqi|QfSO__XF)c~$t3G3DBAYS+;E)C3Pa$9$?gL=hiy0Qps zW~Ae_@Mzc(H%e^@CA7YHCbMrq$x1I!A0_1hd)SFMt9*#DuXqky&rV}*OtA;G=#!M_ zF&TI7>E(&GPG&X1gburGrXWz*qm_2ppl^%a zyT;*4kv=RdxkdC-1N-N@W604HyrB=rs5WjLyU%9^g>7C!5edFz((nWyL~fzR&=Z(+ zP>R$V*0XIZpTfPLPe`G8F8tG$rGLv$lk4e9+V`1I@XH^!%SltnUv5|8nn!c5gz^%& zZ_dcFsWgr}5mo!a#7>SfXRXF(x}QLzd^6nfaKp#F5>VbcNY0OxS>Mrz=o(3IV3eUX zVgtD0Mhm)6cZRr#2CREwO;=r4;YF)xx^;FPMu=TQlUwq5YqliiUtNWgPNKXGf(kG=xP*U58kLt2%&nh>Jein^YU5UI(=SEul zSDL=ukmYq-oo5Xt&tNCh30^Ya;rC@}tls(>B|T&)NbU%`<8KN`{dW`_jL+gmju|1% z^*de9{KJg68er4V1r*ij!oRMo$zo?Zihq}-4~zCtv&t1ZH{J#%`1N3)m&^{H^1?v5 zNARz47_qyCr|u*}`Grv&pY#LNsh{QD>y5D-e`+!n7k&i4F#**1CJtvVzl|Oe1H7iw z=V;Ba1g|c&sUcx%VtI!Z4!f8xy zxOLhpmX&%1yx4{KD%%A=Uph}Ms>zU}^qRyg16ZrIXw^I4jn~nbi~JYIDJkbKDgGUV zU02hn!NHXJ_uG^6gs*tDs>`zTfhDVDu>m;ZEB#g5%`xHzQGFL5E;toop64yN^QnYB zpU~iW&b7tuUlK5KYZT4!QMU3bvt|Y-YOoXf{gZodPn4Ru+up|e=KXDU@X$+EXM0%d5cptejjF3V`dac3SwTgxnV-K&Ez z{P7b$4OvH0+g_5`8XtOedlNIq_%R-^@x_LBo}kb#Nx$THWT*5Cwkfre+uC62(EAM@ z>o?&1!!MZa*^fz<;}5{8`6Tx_76qH9)1tP`Y_y6Y?e11&*G`FtjMS+(Cieoh*16EM zS&~@lnS&uQLyTtaSWKjuBv7`Z<@Tp(u;?hZzBHpk;VaCq>;zWBz5u(f2tc-`mu1AR zL1^GaOglCi{a!JwjI#(nG)zLtb|pPZhT-1cZ zQ7@n?UzttK>%7e3USU+bijkIjVdEZ1?39ZZEpL792SHHu^ zT{EdMtd*R1Nbn5xX26lVpYX+x40N2IP2c;)*a@yzK@itPvln|{am+Pzt!PG>w^f|x zRYSQi3~}wI5V*7VG)5{IaoUO|+Y_RV_g3$v;djr-X5Se4V>*_?Zy69}2GLaM8!*eO zjfCs>qqDy~t2polwDL3Iv4$s3;BsD7UG?BRJCR;`&Bqw~9JFfx%gm@h!e%esORvSH zXwjDGY_(vNK3;W#!x8I1!l8ir+2i!-@>D38KaaE)UZ%IYKIl|xic>x{QPbZ68e_(VmY|Hi;;$avb7Je`Bb|U(OS=68~k-q^iMgjJ7_*PUF1l zl2>9N>KTt6{jJP4?bE~f|50?_@mxJ#7&o#dA{0fLnHk}pbM8mdP@z;LAsUiO^-ZO; z>|_%uAtQ=JNXSSe5)GuCh_qBxO2hB|{_fRy)$QJMp7T8Kr*9EW3ZKW*9^Xh&_jW?V z>vN3PL}6aR4nvDo`y|le;1(zw;dIvB>nVlHeJDHZ12+o(Z1xZyK z(3-;obNeC`9-P4Drnr%u@oKhlAd~g8?|@LD>(C+o9%eXfz>B^aR{3KvY~Q$!JOU4szTFrqTuq0sOKzb1);hTU<1TqB+Vcd9&2YC@307ZE zqs`?umgyd5%=+UDThk%SR&jd3F1dJKK}t1B9azs%+8j_{Wh47or z#PyFd;?M4J{lPo2)ximLt7XV#&OGvZEPzM82(X_5Q>ed51izN}VdP@2^EIV|GJR&! zcQmB@(m-?|Wil~X&1QBfk(sy|8}D)sTzzFR^~_aLoN7r^bvYgUejMHj9Ak#_b@B14 z>EsyiOk9;a720m%XlqNUr0qO+Rj+3~29j}7YzsV?$ItsFmjf4e?!XtjC*jM|+3X+H zUtr)6gI3}!nYT`7W#ZJ)h*(-X)ATSOnIKV?ptVC=>hf5A*)LJY5<+ zkM{&)cw2PF=%8B_YWKC!L8g>u=k`-=?Mj}UOc1a4dnDtPGepb6zJtY=-w>((3I6m3 z0_>Bb|ChT=sFB5Kf^TW+eq~-Q=NHjN%zyD>2*jW%9Py0{Gd~i zV-_l1qO7xLNU~lLZujVs{)&&pm%wGgZd!4gK?zNk6=sKapQ0_NZa{DaGV&hBV)XNz9#rtg~zsbP*FJ2&7bEWfaa<_qB#uaz~ZRlM0ie-WquA zzk|(cxPU6I+4%HqFqlXFz{9#SY@O|5-1U7bb!ke_I=?Gy+??|;*F75QXZWyE+l1J1 zr>R)zwuOE>9^{>U;zXwAyMZfgW*q03@LR1)`i83Z+A_Q(9~3^*gi~`J*wk!Km>#m3CjTcyO)sk~wa)Te4&AMVXC9ek#AUFdXEGg#NySk{3TM{%LgjEEll4l8ZbyBCIg*N8=Up<{oU0s-ydSfk0rjkJ@`@)O{j6|u` zBb>S9&&?qF3&HuZIvyI6V|iZH&=RIWVWtVlJZNU7{F+XBo)NhFyEul7E3+-VMdb6N z02O3j!xEis_-M^EO5dwPC--Zh{cJ@ztT>zUxj}}lws&;3MKs| zH2q%?cgRu1`Vdl}ovIVg$av--+H*i$K^=k@xC>CT)%`1%8t) ztm9hl@VgeV`4=>dpN|XQoyQa2P0+fUkABZg1%o_(&@j6SGq*m+ z1>1s{sy|!U>IZqq2%p2m)HKwe7y>7lC$u3bk1l`P$SCDZw0wJ74L?*EW7!r-)R@xF z7$!Y~j_SpXuhD5x?297}K{;&g>*Rg0Q)CklbLfunl@vZUjZGg#dTH{2CubwfYe?H> zkq~*4R(*KJtQlO7si})l)xa5_ZR$X|(Hm59*nrcNlyOA-1h4Z)B)Pk7W0lKHX!U)T zoaMGtd*LW*p4K99;Um~=%5_v6aA4J=N8sl3yL9-PKFG!W#2r0Zv=<9$ncqTusS}8L zg|c+-(;b?jo=8@o19qx8aE%xf&b!K@muH))H<6EdK(?#%zs>;@OTO1 zAl^y&-(=Y)*QZ!5rwM|i%PBMU0gB7TVD(EL80wwD*xR}^eVH3R(G8{_ZS(Q4`gvT4 z6LDq05N`R*!=6d}ygH*?jOHzYJudH1<(LDAU3P>8C#`8Xvk`>s=i?~>1MIUaWfh*O zlCiQlria{NLkF&~kJmdf`fiip`o~6ckjlY|?C%VJ<4If{ZqF92O(l`rWw6e4KJJ+s zL(C){tmK}-z2%X-Rijtw@C{+wdO?(z)tAMdJRgRCUv7bj3!5M=SO#+h_(_jnfZ|{H zu^F>2!NMP5$a@uwPq|&Hz-$NIy!(imQzeJ~W>qNpU<9wsGU2`(T=rd4mi}9`h1-#1 z=(gx`{HlE$CjE|K-`tyu_2XaRaN8r?9&m-X`TAtq7aj&}WfSpnR2r|s;iARUf$4Oa z$0Y|!ms7#;9!&P*k`qnwxFr`}qet}qeZdZkF<;~?d8zGJUX zj_l)qsq8Dqf1sry0CkE+maA90$x zJ;Czx^LCy&e;2%Z8%xrfoQF#_0dK2a=d|%k9MwVSQk%;xT$9DXgyksiEsxowqrlue zMi&rC@+&SGq~`g`cU z9%T_G(ZUl5x=o*iiy1aTfdQ|DA(B@EibtHpvUb_reoJc z`o7Sa>lj>)hpIl2ZA=VpC@esY;}2joDg_;R0~k3?ftoWHp{b<=Oe%Ved2{mU7k9gs zcaLUwU$~A9S2&Kw-$+VYs)>90t?H zmUx2Bj<2lYp-t48R}1VrHTFPqCcatJM3tu(;TPS56fr7Bf0TdYz#9`Be&YZyH{XCy zT!(b4nl?Ptna(8s?1L?;{3N+Lj9K}@ft_e^2|wrN;z$2Rw11xjvmGNz&xd=j9xq{n zcs;y+ODpvJwE(|OdJm@rFEC3dwm_;%D0BA4J1+koK}w$euy9EpZ~KXN)SBo;hn**o z&Z^(^N@oD?j$P&zcV1>`h-a}Nw3+&L_%kItIbT!bdhFz##ueOd{`BivS~$KF!@83| zLU$vtt2djR-UPAU_pE8oS{2%+F^~Q>snFFXH{AWl0biuc)6_K;ZY>_@qOUpOF ziTxbsD7BrwoloF&?h_c3n1_ykCQ%y4L|V4nj;e}mc-Fl;v1pS6ji1!V{`_Fry!R74 zyBmgccP@sb92eE~_&QV~9@FZ|`8nzZxIL{A4Y{u4OJmMd;$4kXSMi~r{X7ui<`RpR zemwcd2OSqIql~6mxY5@XjS45>+PHjtHJgXev>vf4d{=O#kt6JRH;&^{hOjbj8@#+U ziB_#0hW>YU2)$m|_$Qh@@IZ|k%#GmXzI^sS`$SgHM1y$;f0)#lgVducgV$3h!7`sG zsJ>2}wQ{c@@hjP|p?exGJ84Jno!>!>SsYH{x$*8!Y$9(iGb3O^1O+-YNc?;*Z_qE88Y|baR}Prs`h+I*Z%Ks> zS7NZOpdYS%%;e21slX~O2kuMy6g^ys5p&;T$cLlsxdU?8IC%oRjZkKEPlVw2W+N=E zssK~z519Y`IeC}~)0R>*cHN#(_T2Y%D0oF1HQE&M(^q1)C=XEbB6;Ro+z~un`UW3# z&0)iOj&VDsGNo@{L=V{iEUzYBf?`j`nb<=HJxd=PbauLM(2M{qbQTW%ljiN*LQMx9_H`;X{p2c=!GUs!Bzi%cL zrAOi`uJi0pV=sojDx*31h}#AavHw|LMpL0_P`f6Kcz`JF?Tiod_uok>t#CJyB;FCffcgoI+V8RiwkZv0)3M%c_1(9HH?m(VO*TcV8(p~xYw{0XUFX3?tx-h z6%@(KzIU4*J=??S&3vTs{tgv>=*4BcSe~C^2^)Ue%wnHqE3Gx}WIS8EvFOrDEaG%c z$<3FLw+Eor1Gy>mPqK{IwBFWn^d~ z-$Zs*rXD_vj-|I^K2R{+fme>YB)1(8D8IDPpej0;=8m+EDE;Z71!US$8MGQ z{@4|^w|6!B_r4hxYHkKKK^{mRQNaLvG3tFihTVr|u}8*wVO7%^Tx$`F%eY%*<_l?( ze;o@(e+J;@#CxdsnUA_>e8D9jYVe+(F6Dli%I##FpPuvCw54}K(+WFwx1|h@em%zB zmcsD&@2$M#r5kC9oewBo)kYIX2VUFO0LJ5+6zQ#-gifX(@lClqDhw|tzl0OK1n%GA z@|r;Q&zdRdbuJGz1#986`df_KJ_YkJf+uQq3FZBtW4YxTG90^rFV4I{>hWcJ1SX-_ zUU^vR6wUaXMWDasU#6nrNuU-g@WccYOH(L$9v*fC!e94NX@8{&#>HgeBU@>(t?+^y#$GrtEffRF2#QTq zaMjTb%mFPYsMzj^2C2tkZ7kuIAJ@P}(im&@a(=p-`Z&aWpU@)1=uK@Xhop^n}Ds>fxfOAkS%1^0;fKN-&DaQgVuU>=IRxG1&;tPa` zpMiL{0XWnbEA-jB{kh4@0`FI_ul$?U3sf%A2SnD(~w zu<_1OxRiVmmiG$drv_=9keh@2b_PMSR^sU3xifgFc@QwZmG@ByKP zrHqVyJ(DT(2uliYqV!)G$Wi`;N4qTH&;dSNXp#u8jlSZ`TfsOduK+EF!r>DrY9DZ8z(b7LI1#6aDT9m^_kSeY6@@mu zU`(oKknQ&s*!!(u4yS@mgY~VrPvI|smwh>;x&wzl02y9L@ zq4nFeF<2>pyZg+hSN+A<%y*YDjGG8K_vP^fzZ}!JtOHKw4l{*SLns^*fttbkIQDcY z?XGWQk{V;^bgU-xWb7K)=`Mhc57y8jn@zANNt)vtpM|S0i!s+(mD<^E7+k(jq9ksc=q(A(hK)Euh7j=L9vmvu7?{+on< z+xrKK%f2&%*BCYf>9iT0IO0p2 zZY{+WxnRsZHi=HW;lt>^n_%60?soJ`n}X6B!9%7CGY@HyXXHzI)8>n+iW~>9yB+td z&Lp2F9qf_?*?1#xA8IX@rL{l1conZ=EKE-3(!HlM>GHl9uD3;&7aCni@_kb9M*9Lv zd4|#cG&@qY`pk61W^(UTbrNXnf+8-rUiKr9j;E|3@%T9U@qQxRY-+<5x*F86w;t=> zo`pwg8?mms5W}b2ky&ykedB&!N{%UMZ@$X0R{*AmJ;$)kDY(3H0lsN;A^uF{W|$Qi ztnw3PZWcw4XS$SH`5dkP4&i07htRD!h;>%)spI=Kj8>b%@=DUsWnBqevhk-eg=RdE z+Y9{GVYs?Y#$rFm)w{`a!SXZPac!+I8D~~t%e{#-gZ#iy$ePU+kfeppMNshJ56PHo zVps5S${$~dM`WIo*9rlyFLDS!-OWWV!G+QRad0Z(Ahi{3!FF#i?2wp&(tLNgEYf{O zct;-%2zQ~2M;*R&KY?@qRzY6NDY8`0q;1yMnD~80Jh2TnI4GHf_mT~9m1hCd;Sd62 z?~gHGS!a&dCrX3SpP|u9oEN@n9i^$5km>vHD1O9+x|5dB^Mk|e*_soqOSKF0I<%G) z&9fP;_u?3lbrglQdq89VI;_xAAim4`81vnTKDQLJIdg+(reFl?*1neBbqCPoZ<&}Z zQ^)nN9iZ=AmgjDVD76e`(czR%km>iJ%};Y+YcA&pY7C^}jzCh-%EDhpNjSbD7KKtg zX!=w&T2oNSx+s|9x50hn$FcInwJI_4=W|Su1r*)QY0ix=A!p_>?027t&t@3VS=&=| zicc9Yy)}T4hp9LX@8T?fSMusMfQ9mV(faBV}yF`Uk_5xCezR{Mf&g1Pdsow z0))P)0YcpYQ^|_`kt)IZ+zwT!tG~&mqw4$L+}%=|uELn0Jqd zhfa*3Uv(zdbS|N%A5}2qehVD)EWn3*T2S$eI`wSqM1joN)RK~q;Tz7;ytN6a&>Idx znv>Z(;)}?^VitxP2I8ung1n9Qy-9i1Ua;vH1J!AD?AssZ7E)Tja306R^%UipNgMCs z$}eJ6bzm~@guru-BN50tE|x&6H9`1LL>Kv@56}$-QRK6p%-i+yDxRDmPeGCI-gFJT?$Ck%lXdlrD^ln|VfxCK27W1zSH4z6-)B;}q4PH#e1OCpH<9W=<8 zWT<1F=NItaDG1VaW^AjYF6{Lw$1d$My!wrw-6hU(8b55J31*2T80%r_7az@wUOx%l z$6e@`@?C1tIDp4qjibi$5xjgl2><+5z#_rfjAmRut;$P+(9(FElEu=`w?gdE`-gG- z%Y5w7{D+5sCeaw5HLsBKEeF|N!#(5M$@GJj<(E+-ivW2g_QpRA(rV3QI_txE=>ylv z?B;yv-`Rt4*I&{6IfrP>-5)mGxdS(bF3__Qt`B5Q22$M%dg2>Eu~UkvaicWV)aKwF zFMYcFA(S2|MZn!3xAFKgEj&L`jvQE?{w*7XQ~KsqU=xXzGbCZ_S8+1t_WMsY%4lWO zMr%_-IgP~~$778Uc07is|5PdMi8j5;*^95w9e@b^_sArcQlwoF_8xR(N1W5Kn(J9& z6i(8r35Ph}zdCv!RmHa^7cHhQ&cr2;A3#sZ3)IOxMsiifC_lEAyDwY9bG>4A;iMUK zj6Vn5ln%4QzQQ>0r;_;hZN+V6=V|jEQQ8}=Ot&S%@WQIqB=uYn*<|iE<2e%p9u`A( zn+o1*_=kqOE0`W9F z`}nc=gE+j(X@buY3CysVJE){_H_Mqk9OUjpUlhA&S_sEDEA^reHtCdJaE&JGI`h;Q zNbw%@*>fFnzi7395!|XZ!Wq*8@KC%ePF!D(l6Jdj_bdbKc#(ndwk_Nf7o>P(K{)8E^w9WNqPc*@-3-vQ8C>(dXNsN z4|0s^0$f-mys z9Cv0Gxs1)i)iz!f*b;|ruF0&3eJR>?)Ir5mVVXScJVdGmf&F?rdMpqOCw29){k#&Q z*irUFiw%jLiNqMQ<80DTWZQOhGcC%Tr}o}6davh0MURXj;1b6po&SXNmlV>DCH|0~ zd>RdhPm=oLA{@Sufh(%@d9jxsQbg$~?$4aYipaBUmbDJ~wCkX&3WIWkdf3kOst;*w zrX!uQ?6Kdy5W?=mBJu0!^C|&%TzE~&?t!p<`&`^Unt^ZccW}B@F5O?-h~XoubZ>zu zrCgcGG9za&MZ64VarZ%=2j@}2X9%8!{^0!1KD>stp7efv9~dr?L?a7b-l_aFi$^)% z@RQpxtRFp3$x3-#C+i)wlL_IeWbml>^Fj7^feOx@>yEiWBAET+C3(a>1+n%}-tN)+ zC^cq7w@j+&OTc@aF>?rQydv2eQFF{%+YRY`0%*4~5-7Ew74S z_E}r_hE?;FH@A?)Kqr#|T}oQ*ESV}Azl^%?HH^3emgtdoVLfoGT&RY=cV zreluiaaJ|#8fI`is^_y4bap@wnK&DqV!H(SR-Lersz#KDEk!tgTHt)#`}qWE8%WZ>TuFM|ZG`LRTm$~u z?U2WDJQ`+ihH1M(8DnlP3Ed}5A2bq~OIC|nF|SHYs!zj>Em@e@9uH}$(X{Sb5~&`! z!}xvd;B9kTiqB<(+!2N&A5(>fT-dpULbkJ@o^sB44$gaOYR2ZMuDyq zs7mW2MW**+tY0>-B%q4Ts+emb8*`JcPs?Tgv)hU9^SN12K8@QsFJqtiDS~bOI3>9W z9%ZfKrM`)wQHo#(_t?;H`u;k=YAy-AkEWR}nRw151^3q}QRmW2RC?E*)IVABuF9>(h?12g5od^RYc9a+W>plA zjl>C)3SsWO6`=ibF?N|9XL5_q(%D=-c6!@wG&m#yb+cDck!3LstH@$Y0)WrJC`7*# zBUW$W(ci9;p#kf__2O>@h&<76i(YI&|`2Lh7 zyAOh>9lHdJ3jV{EFQM#5TRob|?Sa1h=h(#PD0XJDHe)Rz14Ud`W6jMO_&Y%alszP< z)@uf9TyvTP$_hZkdloXqt7)s;bj;(6!4(fz^2+(IQ1iYHP6)ljdKK(t*GU^-U`QJy z)0)c2Y?_WsS5Ksu(?4TWr#X9k>TAdxiNu7SaAdv0aHK+n~G!^aJcNw{_QM-9k*@qbb1Unp9$evq|PU$0|%j2 zSQlMI{^J?l)@0mn&VI7;k;F)cf?8CuRY(k-Uy#Qt{k@HrvK81NdmrYe zhp}q|vOx0KUmE4KXXg!>mRftecZRh&vfg>FlL=Q1=p1a@K)pl_&Ab7gWEzW zLx>-(+^Qj2`w6<*{hz-zfg(3{B5{#?HbEx1fgm7Fi7CM|^Da=USz z%{m+u+(~QXIG?TUI<(54$;t`5fqe`4c|Uh(kb{sdYm(OV8tIt{V zpwbvQ3@4WREXJG%x7d@%N>JZPjg(|J;eSgWz_r6#fFeDpy74=HNp@osnTwcrO#+YH zXaKMIrQqFCz>Wlb!AD*a!2Ev9+xR_?yN9R1({Ne5ds-Szebd1iW0>{?1)N`zK>PjP zqFL_~Uitfzv^i0MTCeG{@(x3s_Rg{{Hz!;EVupD)&JQz@T%iej zChq1drHV9;aWx|zq5BRBOy9{nc6$&@e#cR}mmhwv3!;ZYyU|Eoi9R3fW<@3+qQzRp zG$-H`3wq9wvc{TKy%vel+lr|or2|hasv+0)HkdBo3M*u?DM4ABEVp%#kMkwotg~0Z zWgCOn(t0_TT0Um&FK5MsW}u%yDz2@04n_Qf(2(~YljobV>z8KHp~Uq#hgSg$zc^9i z_9$*IGG%M;Uc|PKmaNgTsqEHACrMlD3Y?q-boNDq5H6eSi9egNG1_L3{ooXbgZH}#v&JZAdIqcFpp3rb z|KZn7EW6m$nH6oj%|tG1#Tbq;(d@7q8w>?WnNJn(cV)4?pX>45>z5z~y}Z=zdE~$C z2Br_kve(w%Wovb~_m^`5UX}TU#n;Zkt>^)iww7WwReV@u{X?Lr!^65wM{wU35w<{3 zfH$#EA4M`=V4-UR+&HI8;&*qm_veXXg3e+R?fi|;)~WKM8}{PdvH&y`j$rpWe8y!t zTj*HpYTEd8AFpr3gr4>-0Mno!Fm9a9JEU}h$$l7wGdK;Yt)zrqx$Gns9@a)f{f9i2 z&2Q+_&>L#$djW&Hzk-4_$2v%O%eEW(qOE2V&-oD7wfN*OnXMmX->nRy!`*tgMD7PF zySIaDK0jUOm@&^nz(7?G$FY=-=IsKt))A0>l#fZ-eC)fqizqxa3HQ#@rqZRGDXf1v zcAYPP4cja5E|Ush!_PxctR<7&bP%V6d%&b-PX98uL%WG0Y{{>aC@C-*uicj-f6>Kw zdgU{moa~9_6&^5>t4iMl1z6p@!xjnQ-t=%pm&^TaVs5JGk()#`afft9690oc|Sit~UhlUY#VXDO;gbXAWa7r$pB-2Sdr? zG3L!^9E7`FWXg?vQQagQ8bY@)e;f@_;@KQ5cy)(kNM}MLEytJ{x#ZM$mYE;Bkv3>t z$K1)2*zIeULuf)JjW)f-CtJUO+NcJGKgz_mQ#>5^E<)b=BTVFtLT31>Fgm6S(=$&` zM(0;KrVhHIU0DR){Qe8)?>h%whX?RW-+Zbwdtl*hI+Nz-%*7AKr8#ey6_^FqGBU|) z;mxOT=54}GboP@Z0hJ)WzmX+x((M@Y7fp#B3b^mJP#`PjRoqa7dRrn4~V z%x-%8^$+IPt;MC>O{6l|mGj5@;yhnN?hgM9gr82p6<y;YaO`oXTytPGx)|ZAoj!? z7xKS_A3ML}zYFIfsr&}>Uz#-TD|iV1J&6R<3MJ6Xlmxd}QE+#)z|~upk?()U(Bgz5 ztnhtKN6v+y311Sktp5%(vLlg(MP0CQK?oE7{4wV@{RY{;8tAg72)vpQ1A%t(m}>kJ z+Bg=T|EnS#@vWhw3*@1&p$Qxd?C{5{S71By5d7)u1q)wx-8bz#-pMXF1^P!vWALigD2`a{9Y_Rvk zpO^QO+0KhlFl8lb*Lg8r`PXR6(;O81@PT<>nu1~xl9V*nhKXEmL?!PQXU~##fb+au`sn{7da8xOem57_RKQzzF?2hL;QPR+XofK5KFHsjO|DNKkd56EG?y=f6pbx3_Xa;1PG1BfW1^&BEI~dTpBE=CgVnmC zbgR&fW1I_ex< zPP#FPVg(qP$D!thGz<0HTKM9|FMPl4D`UFc4b*aragHa~g{*W8?g~0%+{P`0VWG^5 zmmU}qS`F#zu2A}qrTB2;BKC2_FzVKPL%}f%I+eTvyZ%kam6NVfVqF>LF5@TV?Q^g( zpbu|u)Z#jP!s*=CN<6yb53W}a#j+{I+&-`z(`;L*eeF#6tap@7d`!SliB`PY;Epe^ zL@+7rIk-4n#eAD5fUo^h@xZG@5P$muXK4I}K`S9jS$!6oBd+1`gi3UbzK)+dj^U)N zV#arW3W=IcVoqx=#df8GIB;MGX3MI<#{D-@>)`{uwD1mfiq=q%&|B+3TY_)DU{NE>Q9o}|b3y6AL}G|h>< zi2KsKaCVpvIl2mCf`|%jP!+^rV`G|{Hy3j2gXj$BgX_M05=L)k(p_Ud?73CR@yt1H z;oc;ipb|xMNBbZkCz=Uu+KXy$xz5z9PLw>j77Rv|ap`off5~T1|f8%EI| zR+3zRKGTsDLMui9U!2Wn((7(BHd|{b>Uk2nHU4ITe%n)7)I8WC8B8Ddc)&XWPGcMR z0N3)j(fFE0xaJMVPQ5I_PA^&w&!#)!3z>6xhQAc*T%yTL;~{O`x&&`6PNJ_>0<_6r z6eX@F!ulQZ=sTIy-F8YLTo{7%QgN!;(9VQ)RluHrX|%lL99%Nri9%@xbi7UzPLA#- zD?dBzUGM;M-g#2&`axRW5{zykTN%?ImndTJ42dX5LvPO{?z`-YC+F}O<->RIj%gFv zF1!G5PU_H>tLmuMw}ULWELFKuFf07M6<+Q+Kti^2XLOKCJL(< zQMU7QSfRve>k8KR=y4kS3vZ`i*>Ow{cUzA#*oML1bt(1$*R}K?*X1o6$ePXlhW%F? z@nTdr`dO`@-9r8NuKgRhU+e>m7F*J_2m<40vas!zI+G#dg_Z&c%Y{8idJD%F&1j@G zcI&b4a2Z6wP5fP44fYyS@#wo|t|zh#WL9({^ylF5MT?ovwpNrmA4u!=T4Thx2L6eQ zC#NeM7vyam8tro?ZO&JxxxtS4^V}Eeb#q`*Y!upkKZ4#`x-@}ThUecV(}ldvl$YIw zZPpVgD}4xBv$^LnOa~@L7}Dv6Zf-ZR!sdBbpu}@0-B~9>PF?c>G{q@lP=*41q_}QY zA<*j(B0KR#kZ3aj9rEwm`ULFN8f^TS)O= zER)O&z#yfIFw3EgTuoMB_vi{XO`c<2FB7D=lmTg7Bt7D1o$shC%k39t6M;hCUjFuNy0W)tIJcyA5f+I=3?mn_7Sc}r2f zFN<-XltS!bA!f6UEv{G>h`lQVFlEVPI1^Hfd9_cm`_x1F+FwdLxeh_z#vnH9}m^|eqIxlzz8Am=s^A$wzSK{c{-hms$k}1lrhpz09;_f=R zI6ZwU>To@Jwpa#VgMgaC`0r$V}`339c{f^i&@_|5yVj6oymN zuJ!bv`#THgAO1AN%}f%>DNNtiYQ|+o18F@B#)Wglz+%r@`lF@^woRd=hd%H_A&dOH z`@rDfLE0y3kIElXAU#x?Jvwa>v^^e!{Ry$?%;gvk8zj+q+;du~vKZI6oF&r@0(5Za z9l(W2px-bJi+*U(S{-3D+1~>{rKgbEt_J32Vl|k$Y0y!_EVy*!0NQ49-FhjL!ARpj z+No=c&PyJ`!S_y-wfY+^UK@sIJ_a+(_eWA>Og62Oi3TfkNxW~I07lvNj7Vw|UQeh8 zt!-&=pm8oq`Wa$oojGCeS*A=Nf|XLL2Yt<8GzhXrqd^7Q|91z}O55OzJIOfWT8aH@ z-0?}+d2G>rjQs5@ScR2|m}{sBapyyrWBqrb?CfG(mSK%ke}{wqw>Vna9gQyW`uHo= zAH^pQLd^0?Y;9CU(T{Ur(Lyn{@a{9F>;^xInh4{s{`>U%?_u!fZU#BY*{EcAnnjlp zI^}W(Ro@uXb(K}btKEjp0ihsztr1UGTf-H1PdMf6$IR`Q$C0RfSbOswO!@1N_NDx6 zrD;04Ma!UNuoU^Yn4%@uIlzsb?0P+Pw_L?Q_A;*9aeX>cDt!3ixZjMZ@x13hnp>3zl3Y=k5fE zRJ37=uFW8W^-(apW+#+=Z-Pv*bjI6x4=$PF2M+8_3w0@OW|lU?)yiCdXLcq;E&Gdx z3rp!uu`oDiuAqZvZHQ+iS)F4ppjOsQ52p>|x|&tEg6o$OJmrX00`suoNgnzHEoJQ8 zBbbmnQI1C{PA|5cW^$i@#^*X4u|X<=>(m^^Q@uH`c=1(S{=kL|wkt79zx0sGeO;Wj zQVL(qb%$k(3K@rb7p`mK8)N_RB#Qo@1Me8ke0@_!hj;MfKbgfec~S!GQ#gR9JnQga ztq?ob_7{tLc3Z5iNu@;BWk?ThUU^7&F9>pQE3g_GU`i}6o8&51?XmR1v54y0bS*Nk-aU96DDh7)6*bW z7`OsAF9>Ent45;l#f^k(U2x@-e-@QD9Z9l!GuHO%V3J}3l+Qng9-?m`TH^<1TIE4k zYbuoPzRwI_*a`OzSmEYkVHEXUM7|EqQ0XHEqDS>`-GUZaT6Y4Nu0?R)Z7r-AR)kyS z>+sE){j^Fp34gh#Fwpvet|WiMZkcrEQ1B!uL9QoA!vME_&1Z&wNl<#uZuHl=K~iTm zK$b5B42LJ+FM+49DTfag+})|c^cu~2v=$;fi*V8AG8~v+0)cy$!16y=VC&CN(DIy5 zYv;$4xtlAd@J2x;f#U?4AIG$S!(ejE7?+l9gz%3VpzGy=4|>%|;>Rtx?KTV3UK%66 z+Y#8mxekv^NtEkbCos25!AT-yh$ZRbNk|bj>+hYa7oT zt*J?tL&*$sOlu8spC`M?irul_oH^ zFshWx3GG~ovwEG-jPn6C?pcDtQJfdkMS%@@z;)}m2%`85GrU#viVS@c;m2lqe17`} z=Yv1SR&H-5mzs0fs$)W#iJQo2RU$V>^+C7oSM;$x0nZ+#gLG~_GhE<>g(=@aIz$8u z1fx*QVKOT{Qi;p|DB_DuSxR2%g(Df;seqf!ofc&PZ;BS>js~;A7m_S)f7nS*rd$Ur z?;%tEu!#K6T&JqwY2fK3LROD|qHB5!R!&ib9U}v<&Rh@MHdHZR3Jmbrf__{RVMYR< z#MsGaZj*jXA{MNRr3+3KD0?S~F^zZ)p*b?xHuo7Cly5;zn-Tay)^vG;5}gZN&5WJ9 zgDUnMDjv@aLirkJ2aS1R(0@vtDB zdv6N?v;USDw4YMNogH$t`{YNa?dD4yoUsSB>Q7^Ws30A^lm{2h2hb_gnPg4U8QjM) z0Ytfdzeo$m?c?Ci{aWTui5nOnafcGMYBV}EgWg(Ib9c-nDm*8E-^N|(TH;k$qwmfk z4?kl^u@F1%KxFQhXR&`rEorXfgS{zrSjaI?br~J{k|=>wMUzm-tPc;A9f7KZTjVPD zlpb9WWH)w|AaCPra#?T+#HK~k&A4&wmxBMT#MXnJD8*Q($QWbpVN4(@WMbC zI<)6O>fT-0SRu}F)6Syo)7`Y)Cmb!xlo?koPg=(PUI%`2j5eup*tI7aD@_=5fBzGu zzsAFYw0a1sR|WP^92kkm;OEaQZX95!ZTb^fs$~PqB&_gAjvzW$CPBEG2kfyafY3y1 zm|_!$d%XQ=TX{a$3ndJzKMa%E_zMhKIL1WUd4tWrPgMVTHzw)7VU(ZS5x=?_8Bjaa`oVmYMK<@CfFHYEV_|WSqoxv47lj z2%D7yAY_gNT|ZZj5+4tsT5L3ZHvI$kIYp>6eb_>v^*8B+Xw#2Xe;M0<87Llbf)dWY zXDoG`$RW-V4^DAIwQmD3Tci~;(h|r#$^mTRe6&5M2Jy?qB=+M|Sqy!=1v+cq!2X3xaMy!(OzaeXcHNv2rXxlX6?rkZA?-Q!be)BXEo1Q4 zLYZ75df1-6*{s3&By2I#pz-6g>Cp6xct0x}{2%^9^Un#4lgT6IiTopGD_;%>wyWdO zQ_{G^sDZO0*cYf`r3wj|4Zv7y$Lx`^3e#v?#+grTk=C-2Q z^)SVFvqglkJm<@~OOD~E8N)Fd;_rWq9zFN*-pvFi@P8DYhd)>E8^-OD%#hHsX^2wJ zeccBw+EcqUB<+NjmQl!zh>V7Ik*JK6ii%1l-xd|^p_I1v@A>@+Ue4#7=eh6edcU{b zqyg<$)B8J{q&F7_i0AwnIuv!~Nsg*=_wkp-Ja-$uC`IvJ*Uos@Q=JC?ok=(QD#FoB zbf`Y(plZ=4>h0*v?mrgu1O!M?b84V_>Jr)3UBfHyPN3e$u-C62dMvv@>n`ZCPxy0@ zXWS}wYM*KC=rCH_x(@3U3fSImBueutMV3}9+?P%cvyh*(8-u=r{WfA+DxKcHO>!ANi)IWgqn}5fqPL+D zH{R$(ZC@zM7B_kzq{xS{HkD4sZ9|8dJ2AOvsBE`l9A7ut>^vjnEceT@roR(2VXQkD z6WU^3$=`S~RE9c3J&1I|iFYs!+1CMNfhl;C0d4kAog*gpG z)rArG9Jc``e;12ykta4~|3}JVcCFLv6T56~LR-BT@Cdk#?{U4jY{+i*-eC=Mr3zje zvIfU@6W5I&O>M*caqR7L+S3q>b|q=p;I@To&ko13^<&BU!*-O2%-$Eheb8@a$>;U} zzG}V@BmHlYzVm;ixmyb*ah_;2tU&gi|EMJB67?;4Al0rcpjm&WV|dp&c%HkF?_{KRb%ux;{Ow z+(w3X+R4WoP4LTFiN;h$FpBqM=e$#F+G9V}PW(-s7Inkq_;yhG*b$G*@_B5(Gd$*D z4DG+H#x?&nVbp{jq!VdMzK87T!Idz|U2o0p6@vL{rxUoaUK3rm6=Lq?tRIcfR`zSX%0{+xJ7~3mBKzwA?2QPFi3cxcQjwef@Kq6)b%A4^|#>g)(2?6K2E+~E!fkU zJ!q>*qqFs*bL3>ag#1f9$oYrhkJoMBZU?RTnEM{SRrrT}CNX}!j-fGQy5p(QM9eAB zk)N-6N-I_yAx`ldEm$X*a860|c4-Jd`eZF%H{9tmqWQ4=xNZtQRhaPiD>1y(<}>wM z@4*k=G~#{g2Lui|hMc8-lJC2R{Q1o~IxN2Ny&@I3=j($g+iZndtL?G-Z#d0~+`y}Q zg~yOF>U}@SdVGeEZBC3>bJ$HnTi~0rA44cuNmwN`;S4 zbr_eXD8hT77fm}<#D|-PV@J+?tZdpU{GQ23s=f)!@o%8Ae;O_vTY$Ff-m&e9-SqFj z8bo^D#k>DvId|nVES+>hN>xwAlbrT^)uTXq{%{{O6jgX&n^9P~egJ2+?Zy+^^`MGH ze@RVaE;{7SJF_-mC3-G5^B9EWck9O`jPVQRjf|(?k5W;!9 z-*FRil#Qt=@GTYGsX*4^Fp;0_!y~?R#(c|nD5@xuj#ahr2lXd-yFjLVZODuE-=V{z z8@K4dDe|hX<}M#RP^*@PV&7*NR9G!I0;%-gaRBc&8iIc3E1}-Uo1TS;JNubt>492% z-t~GKnmz?$aCR&erdd-_e?__HoIrLq_oAsw-pNTX-qTU(4h{G@6?OZcO8>qZQ)Q|) zO`l?jw=Kg_E!b3kPfy9=Wpg?BO(7P?^y8JmicE73@y;i~a@wi}Iln$ax>29V(_b8< z)Ym8I{Sx5?I<%8URSVyc$5D)WU<~U^;r!QbtekNB9}a1MWzAVyoIm+KFPk?2>z*!z z**i^nz19INNl&92i!xaKODEcR(w7(e3%+NGF>h)cgV+^2*>;yb4H0unyC3_}IQ0u+ zw$Epa3}3nEktszTy8y*@9pHS>i(ma$$x8Kl2vv{f|LnS8@SFeyE**!_ua@$|ZUb1w zX*{etDbPeCQ`Fd0U{=&E_L(;xDM!3`sO?DhEv&+??^`fp#dT!3y~8`}BQ(P!iI12h zWB$`*e$XzCE{WdAoj?&TIXem=7w=N^8<86i(goK0;f-+#wVZ#?Zx?EDqW%$Vyn2N{ z>=R6j^Yds}AHlkAJ(x9DujJDk+^FDCUw%C86MCQDgI5O)QEIe?8pw@nvo+;8L3ATQMXoFQ-AF&&RB;>q^K zNp?M2z;_)sK*=TnQQwN?O`%{qYD`75y3@lyZgQOaY3ai#KSV7XLt~Z+{>^$VuE}P6 zI{Tm8S7imxPkPTUQ(sAb_dIZTeld;i)=qwTSsUZ#mdK}Vj*;5v3A|SA0h?ZFh5vHg z>FT7m>=W1%7Hp1T}gIWsmvIygfWapK-~K{(7anhl}e{tD%YS?Ng4K&eT(TJ*^H0#N*fzYg zxS3Wr7t*!PX~Jo_j$Ya|!%As9Uad-SUU#z{7irGKg@|-#<+suF+Bc5(UR@&T_BsPs zdW}f$6wE3)EQPJxN!LFWQ0$JsQghEUDx&r1OrQ1fzbqN;Hn!1aQZ8diXwg z4jG=FOvz∓z@9-_+cYm+*(emwIsgu{J#NMV>S@U%0O|)qzW!;pOyI+BB^OZ4PN8 ztw#6@BRo+lDMLHF3!Hki!?*`Al&q2tkyc`@Y#)U73F0!B0~De0T?#aCX210sSUfI_ z4qq@w>APr2d&UYpeVj#?%#$Ujo(c3OZwA`0)ZkN2o2cNIE^J3`=2{G=s9+VIaBhiU zPb$b=byF#LD6hW; zeY2Q@sG{o#X>UoVRh{78+J){P?9DxUWRmgAuQX@q4OBl;;^RY)<9z&hY1`M+JjgXx zD%D#K_V>k~VWqgCeVj~#!*HaFm{UA?No@QBK-M#GU2yaXiI~yi5y6Z3J(xGvHZ59 zrG1?q;AM9&jO`tRS!ET}d07{{k9|+kXQ9L}sG17jeJ?PsLhVh$ELTLU)P z^QARYlx5eEw$kG1gn5r!VN8J7vz%`YS=AB^mq)V8vfHxhKu!6rpoaDuWWq^p=J9yn zIoPb$nI32U#hsAllJ~}bl1h`O6xK2Zqkk@?`GJCwwjdRi{~YA4my*z^{gsZ2zHt}5 z@i-Wm&(HVd;Hyb3=~NqVx?8^3FIYKGejCao-fN=eQXi?Y`)e*&$l|0td&z#{Q~U_) zz-qM@QF&vGH2rK5r7W2Thhx8_!8H<&Ox5NJ)e&4h=nqTbiL5<34fh;Fc$cdRoDv7p z$g?}C-$6yF&%XoT>^zvIY9gWUY(AHgjj+C{QpEBT@T?>R@A475yS|uk`z@UAbcB<; zGQM>xp|z#vf$4DNgGYhSroM6}W)znAtytL~2JdVEeiwvHb zk-9 z-~^sHgu->FQ55?gs?KjyG`i80beCoCzU6?-VX~_jY@-Mo|AM%Tx zeWvcfap#3NVEYw)&9{)j=o-xLUWyNg?Wv-7IgeDW;PWcFvRXtGHuSjy<%^=n?!KI# zb~%bZ!fTvgS%X`W+OTFUG1Y5SooFimvjQXB4^rin&Gg969PO8@lit)>H0SR?Oc)x;-NqkA&P-iO*89V& zgZ`k}#zkU}Pm=6#hP|#FLVK+)&%J+Q$E5AT>WMAYj$Rl^dVtV1VgZk79zU=5Z!i6q67#mD*cuP29zh6LM z-WO_d&xE%5I`Xw-bJ7J9tP=1-5Qj0Np*JbNV3+|4fd6ZTWyMf1|XQOlv<*gW&BtldFRKH!j!X^jrtK5vLP?~86_ z_94o#6h81X>Qq`M)03rVC8K$7>C;+m{92?4mm%9wzgI_2i9Zjm=bG4iw;i|n;fRa= zNBBii2u?1&L}TW5VAbpk^3$lL&WO5$Qj)@xQ*?1d*hn#W$FCr zRC*Sxi1j)bVKFcsC(3?MMx8siek*g0=?v-j?KS!%6N391HNRR=ZyEor1fpBq}JD-+wIDu-zf^n zO6Z1*S_RU^>!SA{+~?z{(Alrt2r~tX{fT&Q4!xR1S!25Jw3-U;FNadg#1UK={6w64 z_2f>9k@V`{3traHg-5Td!{#HlsErfP^uFGxUvnS$?kO$$x5|SRo(Si7*(ueNeR9+E7 zPxe&fK%yC!Bt4S~KRo2*zXrHf>CH>0CXkg!0+wZ$Vna_S*(XKh^2}nLRW#3W$5pLJ zy*wX#EoQ-MURVCosXgws?8V3aBG0+onAT7E%FVs*aNsFjdEDYkgg>|DhS8^~;e9ka zb=Za#N>i}xr-ZaWmgw!V4j)bqK&RVpX!f^`7%;i5eDCH**nDV!R?<`C)H!f!buq^` z*kMr6e$GpeLa#jGCau;d!+%k5pYMT+x>-;(4~Ej-B(hxTjYumUI;J3a$(C!lt7;M= zeppC>ohR}wzaVi2vlaipmy{;F)iZwfMCF#J;MoPF^yvgSPB|-GP25Ay`E&6|Zx~j! zTF5ICT1X>v5*^8$f~5h9$nU(BeA~oR<&tu`d1C}^Je+~FXPd;1v>5H<`$;}&Ke=<_lscRInW> z4u@#7rGao|-=mb~&Qv4zqYKw{Gu~&emtxrX21Rh<(eK8J-3+lm<>Ye_{aS7&O@whH60so-NH2C4B0dF6#{Bg zWEkl{Hg==nO>t7pm)-O{Z?AO9Y`V0v-Bl^-tR~MfiWR&Y!q?*Sc=J7nBJw@R{8ki= zdpwO48Z>y*tu0c`>?j)KEL@%9{9YcYNw@Ev$GSa=vhhA+IqAf5_R~>CNNpO1x{eoT zVnsT>*cdTg#7^k%5!(2E0QIV|n#Q!#|1 zI!SOhTgB&p$J5It;pF~98CvQsJn{N*=kuPMvE!0Hw-$3x<6d@nlpKQ2yHqLh+c?tI zeT>x`cHw$dl(hZkL+O0?g)mf6M&5Q4tk^6VJ6CH-siPeRi=0VRqccpJ#?$3Td*LL1 zkZMmP($BIq)X#C@(>+vh=4`EVYR^GPvhc)MBXO6~6SEKh3Gn%NnC^8;r=KbP;S%{x z`fJjKhIbx9`;L7>W^N&=f5^fm^N%DKMIoSMFJCwr0iRYPm(~(Xt`;GbIPer!Z#sgG zEz{_FU0V!o`R%NArbH@A+Ky)j>?m)XGMpVM=<^2ylw|u$4SEffq}W}!!H!Cbb$wYB z)5vk!D*9oPL`5HVuy&&x1?W1Wa&H=}=a(SgHAku&*NDE`-JtV%5oR`JNY8c8lGQd7 ziPp_Wm2)mtUogbn%yW`L@K=sa^Wcq(=&{RAuJhA9GGF?JUX#|%|}oY^&kOZQ!1o3(b*u_wYY zS{f~lY!tcRBjMC>{V&PFHIn8`*2A4&;(Vewl8^74$-@_~7o5CJe8%9caO+0%yLS^i{d+-B)QoEm*s zS&57uZ76ns877@r4v&ffw5@C)J=I-{cJPPe)&B%LZ9N>*?Wz0hBu<~%m8M6Xp)Bz% z2L{dM5jB5k!L&|fRV!FFXD^{l=d`4KB1y8@cnUME^rS8%IGX?NN`tcoLHA5L+V?v{ zVW*Q>b%hIgEJ@=kvoPuT*!T3eDTO9_cjY5mc$$!b;7f_Le`6pZ!2%MyGKk=!0xY2fyZSk=kpku!cqC}#N^bAmVb10iA9_M4j>fo{dreqzgf!nJEi^OcZWFCDI851>O5>SMLivm$}a0@oc z5p-_TMjEu-9!p+NqB0e8>$dQvrU#lS*80%K$`)+h zmL<)M*vy0XJV!vfHL{P}D6Z`E3=R6)?0_J^!LK1=z+*HRFy)C zj!Hp?-6%6N9|4N-cst~RWZ3x^U69*k8ed2^P%TbUEdc;h?)4xZ~4fR5&%7 z-M?pG_J+gK@YBzw+1hHT_1{^Xly>XJ4OUrTt3B}UXvk`A+bN}OE@!wj&}HK&+`8t- zRU;$0vg`={%g>{N_6l;Z&mxbu{g$+0tr;%en!&$Ty_A~1^v2#kGw^Al@E%Ms;3;QL z!tzIZd0F8VN_?!?ZUW(Q#EM<*tpO+E^Fd?2m8xQS$JrV!Zm6 zj^KhS6ikUn_ksOn(<}Q(f8cox?)MSlM;iIx%$M{`RZn)BQ(`e;98v%oN1(@^2kfN%~^nZTc> zjKDSx!9oeBrCjl~{n6_Mqby!-o)gJO8=9m;=KJ}xG>xZBEut?kqB*s$oPECt5B0eu zZd!Jht-fz3&uP7IJ;+A%(_G|EZ+{`e;{_Y+>&d=5nmO*IzDvIK8PV5vmm@CB##i45 z7?)hfHQn3F1FuBjRzN>~_5PWp-Mbfxnl5rvVKhoV_2wJ*3|PhdK1M#@NRA6O%KJ*9 zcxd)#X^Zd^6i#r2*Kb9^S-t_6$B$&Uk{S3Dy_J8CtKju*k23l8#qr^tcexoy`_ErR`8Wy@WV-CWjiw^1horsS-UV=Fgb1_ z!J#8t)*p}}9rlsM4`9L)y0D_em#t*QN*#T&?8tGBt`)rEPpd%~5cI`^zaGqxklpC_+P$sepmBlc4pQcpzhmW~(n5reX(U_mzO&q!Dl%i;a8JpUpBP+#d1)6u zvbUSaF>RM08FY{n_pXBWtiG~G>38ngWhfn!`%rpIJK1N3J*SMnMo(7$<)ee6cv_Pk zr+JRTJFmsE;kEUgS7adH$V-L$=q5SsU68!ZVw~Vb2g#9kR&ryb0FN^BP9dsQ?Wm_uJ0(>$f1bx8ONxR`Z*dt282UHcY-<<0VeB+3HX? zk*8LbT~)53>z+`#eN$J|6}+VD13Sr+Ulqw$3L2qyE(sk~tzAYr+sLMOtkJWji2k`h zmp>G~l)BHHjCS*SK|}oR@++S4`lI)8>%FJT>27~uD6S3XB8~W-Nn-vXR-5(RmjLm#@$?duFUMpDq;?n5@LzD5vy_6Hif#EM$xxN z!5A~!L{@wvdZ$Yj-~IJch``@Zbt2CmD0 zK_#hra%hr+%gk6~*V}_VT$XCA3_5<$M^)6n@^N}!oI=0!ppu{Ty{JV#mOX}xWP`tHQc-_4D(_j_X zn;E)vU#X0pBP99lJR%+2jLGel$A2|<@m9OXnF8?TC+ZYb0-eq z+kNVA#QPyvt=fo5f0SAEd0o`152;)b+(%@hn$9fJW?5nYj7M|;cj~2Z#rj@IFZOS*OcM`0b zb3HgT<~WvGZH-5FQ_E`PmH(SNiS82r3w|l(+uPx<&qcny zE*zSddUBS(0e9X0AC6yHB;3&}WUF}*JhtVlRQAzAew*DB<(FE}^UihX1wE3dbexL` z1zY*uvderS;xMP4>JIbBE^VA<7NWc?n|l$ZAjijxCrlu36v*3N{@#r>l7?MAA% z>;nIZ^?YsBSh;;a6Dpq`l7KE{Z`gsd(1)% z+);pmH)451o-+@s-y@&UGjzFb7SG8aRb2KMYs;Z2x8yC&ml z{n*AD`+A|gc7UuE8_Cu^R={@NX4&KNDHIhs^Ro@U9QN!wx1R1Quitwd!!ozc4-y4;~s~^6@`W|XfxPF!wyQ*?V`Y^ezUI zyf0;z=*frnbmI>eRkZHaKi+UAj3>Bu=iI}ivG2zsxiNDkN3ArFhiw%3_!-aTKDO&+ z&FEoBiCZqeiRdaj4*QF4f=$wCW}aM*@7NnTLZ0?hd_$aXa_*C1bkDRc##trv*V*By znY5ofAFN^ZHD&bV^9;-vo({{=(|N>k!3`C8sNVsX@Ly7hEMGN6W9W5q@a!O;9aJpO zNqm7>&jk;Bdsmk#BU{;bw*@whI!12W9>`hzR=SAk_`AjyuCoGodi-;KmS2lgYeu;= zg?+&yR&#k9_g6A3Xve$6nO1jbciFeYN$Q~9!$lZlDY#uG5BA&13R{+Aj z*_>6obV1o#b9DURkC$UyX;ri}jZF%mgL_3cVofK`uGk^`E>40`W`TYeH=&aPhitUm1#?pt<6Q_@zRGy4#y4?aqDFWrTc(}*iJ z9f0}iany0>F1o!q8N+*>qp}NH_!l^m`}o-7UcF#mElA+ze|p^WmEZ;>wqeWEXHuDP zkL}HB$9C#l*m2B5jD6E2DZJ7}aYQY?Uk;a^cP@n6^Bmgh@d$0&TA;;!FlLTBOQ!dR zQ|6Np%pTf~D})oVRm*Oio8B4keh9DLtjTg>yQx?^Os@3% z(t0-4Y(mdtpD5^sD?GIR(xzE$*-iM)%J)B!hAq3z$>9g3HzIRa^gtJGrdOP26ik%f zHEyQgo##!ITShv%-!LeI1|ynUS;LI?CmRoFSSGnvMN#J*3v-a~4;Y&aVa zy)MN>2v#B=BiBP(eEd>(j0n9BJ8!{28vF)PYrHw+P9T3Ro-1em?8Os83VHvDO*CYV zBcB@5fwi`dg6FY%gl0s+^_Dt%hl$L)Nf`OxZorb*uI#vI2H(9Ci_nQ`5CS63x$zu> zJB31BoGXk5?WOzn1yFLGLG?d;*uky_7aN=4jnbHdhH}$xAMEu~p_b-c-rl_f^&Xpxi9Zh0g9Tl9_^@pVpZg0ob(8St znhp1Cx1MtsYDwXf6*;-%aKVVvhC!MRcbzwu9acI(p-!;%z25U1;na@#okZ3Pl`(qx zD)j#34kP^=(3o$EDHl?ub*4+O^m<=@GJYv;FFJ}RGjzE2z*Gv{xRzJsHIw#`G}L_4 z=QdY~haC~_yukSsl|oHsE9BoV{yCS8H)gGC7Etk<-LRcZ#)Tu$>@kY0g1VFS(V_HY!e#C-*PQRIJcQ+{>fAOr1Y>+ck=H(r z);!9gAz2?d(r6lmKaaxuge2TFf6HwnBgoz>5^I0o#ETaZvW1u>)xRx)!(@H7-D1a| zhHGQS(yeHpR1I}?C2ZNRgv!CMrN*S!7-8dqZPHmfDW2o4RESHTGI8_5Tx|Js z5iR+i+^M;k#zke4hjKYy81#{yY{Q&4ZLs1k-47w;h(rgoGk9ZKC6`6bB~{CCI8Rk) zWzmUrSnN+D_MRk_HM!U_Nr$diYhhyhiM)2}To}Yg@QcV3+(C5khrCb611n3B8PAb? ziaH?DG8GTnb>mw#KQYbOoR+!t#PXpX`S7*@9qNcLH3xkQ7{V( z2a&4FHT>DL5z9NB!_H}wF+TVP4V^dEsHUd?S9-mrY3HVkZ2KD8Qj7 z(E8E@oG-f~9e4dkCmL)7%Vs7muG);Fbq**Ke7uEew}?;uM0D~lz9AyRTbsjq!`7|H ze?5qg*Ssd#|1BT7wGM&V<~(&p0|o2+L-ScTai5CAs+)1-HTWzQhV`efQ!Kb?;Z;dZ zV=3>>Z5B+CEIRzs7qcsW(F$if?&N$01N!}t^kqd=iq6wv83aY&7Uuy#MyLfp6WXtN41p@YWWJL$zeR=;74}zZzpZZAHg5f zHEGAy6WDEYm9h&hd1Uu7c&1(m^@0DP?=VkXU+%){+k;tc-xRrOryW-wpT$*OqG@pD zbS_YJWvO%)GB>@$`)~0u=+GI7MXlNUYXY6KIf{YODDFI2IqqR>&#>ar)*re-$6HRLT-;gk>*pWbH#o=`J{C0%Tsp5Vf zRpRc_T8dHj<>K%^^dROFY~tF&eP0wlW<{g6T%Y@UbjGc`X^2^Sj8h5&q^G$0}HF5-vUUDBp7Yu{CUJ%`j4I{+`tr2`!2bW5=W7zHuyf$qRJLjd++Ldv9 zv0*Ii^lPN0=XL4GQX4sNZUlap{*`tR`yD z{133fu%jd$3Y4?A$4KuZ)L8xFSyIpaC0yILDS1;dU(ZTKR%I*RyO1~{`4t%)zJ%r@ z8mwUuL!Xd9_j(3V@4^{4-Nla-Y*fg9`2gBmf0`9^`*ZAyJM_m+WVlv6#Mull%rksR za}1_azvPR2QB_|!n73i3;5&XUt>^HOezd#$RGe)x=0moN*>cSu^nFo=OK;n9^-FQbt+_bXC+`p|^aE6Dx1J~hvFMUVNSi!Nq|DU5z8F3^A~Py7cvgXv zacU4@kY^+5olYnB(k`!`6AFe0E+Wi6Sog694_z0fRNhwk34W;hSr?(e;ojuBL#zUEq^vOq@QBH zCq7d{W~ajF=FEDluDp?zonYWv9??&vSJ7Tn{MZmx*)b zGjvLGkamn7$8D|k(fBzPoxk0oc^0bTd7BAKC1t+W^$Asfmk;MJcou~?#wV0xGg5bzFkR2u-du=n!u~!B1)RXJ9zROetznpG->Gw z{&U#~qkG(uDk|)0bft&fX!4%=N2^jm_D?q7^;OcDTaVql|4}RTFvzz((YD1F>wg45 z)$JwK2>!vTtm)1krbfURcS`+KO3K;8U_5ar0@gU=a_1TR*5w_K74xtc7J>~LIR>7h z%W56z3+oT&XjL4B-|h>g(@TyZC)=IlgQZokfw0W3L9R zZz{l|G0HqNEkp{`{EMi%*?hD{>}stC;QI=hqQ4}P_Kpnfj#`c(6LpaIZV?=thH$^> zWyp+JPH|3&a$U%AX^+Sixos<^6|tgcHLMM$1}N}=KB3TxZp3(fW4_p?kODiNMP`)+ z-~1OttF1><;;hS3v0D&!^n4)Iylj@b*{kB=hAC{j=nbqEi{F0_Gx^e^<#3yuK<>(i z$#zI9xXgdddtTnBeD(k6L}v|1&4Ll$v52OfGUK^sU0HVf;xeK{$3=bj1v+@N6br;0 z;qSz2rbmU7}l`d=7Ryr@#%Hw48IhZl3< z+)U|In`4NZd6i7xMq!<`Ia~Z!B5im7L8?Q{IU_1s(n}HFfZ`RX2%m$4`4POu#Da_M z=F_& zpt3|AWBe4MQTkIlIrtYHvHwaPI)~u%j z^|0_wSR10pzlYM$`UWhWFa!s@H^XpITZEJx#Bx&L@9`K~%ZU(3(a-$~ab z?T1>lS!)+vv0e+?*rix$l}&CfsxBHG(s|0LEl6JE%=U2#9KC%Pk1M__e=1fTMYQEE zW)&2|&|@AS*t}1ka^R%oemn>R^mmY18yDeM&SAgh7RX*$g!214=)-bvvbFamE5%?w z=hcn#seA=!n3SA9Q8!!NxKj!n^Wmbr< zGNKHXB6-iHFPN+Gl&W?NmB+O%XB9E)J>1$JovkXw-S{%KTCyGyNptw!wu5}a@3Ul` zSwI%o4e?P$^pIb^bQBz% zX;d`l5!+2q#QLkl5KxzaY#TL~PpS`v3+beE*SjyR9=KCJF|ad@+GK&+9Bo?gd@pCP z1Kcf_!RWV^%fr2A@v?6^k6AoRQuEKCssWyq5LeI7%xWq6Z)=yLR>gFu#hrU?uVb~- z2KcxmgOt>EVPaCVG$lv@Cu4yWOB>SpHjr%o6iX^5yXa@<< znpiJfonK3J&=C ziT!h5k=ajz)Bc|vCHS50SO1XC$el>u_DTMe@X^`p%~K3;(dFm&%IVJQLGs12GLBd5 zLq<_OG541(>&|_N8&A~8Aw8M&7g$5XK?@F#Dye(1JC}}d50B_zim!YBlKM)yxN^Tew{E3I?`>XU>#qU$@b3#2|2~XC zT`Psh?=?;R)`zZ(Y|K<^bygKSqD{h)pi$$7q)aQ;*h%sbi)8HBaE?>O-6mSN63kDE z?B?StT=NrWcI$8qf1D!ic2_}EyAzn+!Id0$iu~2Ljkp$YoNiq>Mo$w$`Qm|vtfVe_ zTEhnM?Xwf`_kCB+Tigl;DVB0zCktG!>BLLFZNT|isrWa?5lu0Q{HKT5h3$w&)r+On zJE=W)8+ls#V;sauTVCPvDI4TyJf<=I{!-$QduaJFmcy55Q^=+{a#5H%9T439hQ6jq z4>{=k@u{2S*zOPo^8-B1K8jx3zDf@|cajSo-$U`c;C!n%A){vnoQ`-xy~I#{eE%qq zfB(rjrH>WA_B<#Bq$UeLoe}y-KhZJL1=SaIx$W`C;DP@9^II!wDDm5nh-VCmei= z2<=2{y3UFzJ_z*XF<%)ccG-#TnTxy9jKoo#(|$E#{05@-QUou4e~+Bz>EhGwp@@4{ zi@)mUc>lWNh)(sAzUWx+zFje}Zf?Uvf1aa#|9z$EYj;WanITVV8ix4+XP|AWOG!%q z$X!L5KRpe>z`mQ=_f8TPnJlLTL&P&Qm+)C|860xAQ-e}_^z${ujVr-$xfsL#^ZeLd z<%zVpW*3iX{}XTa?3Q*u&XvNPH07!JUubhz(GBZ*hBs{Mfa(`nC>FWS&ii}t{oY&f zG5ayhqeJ0teVQMn1oNn|_arCHB(79^1G@p52ohPHiH^egy-wVttGDv<>@x1w;})v! z9--Myr4*o&3`r#fDW3+SS@RUG&G(QN*a#X4$_i?0uBg$)-B*}B3G z{la{4W>P=?kE40vq&lc9n@2e#jO8)S!+6!ziFD|&F?_WS@~?A5r+2Ah&iWykP(m1a z`v$s&sPOocUF7T?Dj0e>O7_k?CM)(|I$NBF>c0~`~_v2wU zU@=)5q~N#kyT$#}pi-L$>}q??`9klrnBW(NFJnHk=~xeXc4-cBe^2M3pF9`^3T#&V z9K9U6^HldbJc_CH`bk-~mDwD?ZWm`Ar|wtyQ7oH-*~s#fY8BHrH>C%=}+@zI&LVQp*%Zss)^;fb_?)hwCG8W$!DK1b^fO_ z5<{N-roXdNq|;HBRPcO1wz|0scXJ2UAJCT4)%W4Qi4N#DvsfD5x*UCLwvo!b*T`{> zr-92mpkP)D6|OU+w-?`&olP)qtUZtN*<)ZlMIC3J*zvdAcH;fr3d&B|%+CW5`cHT| zkE!w^uMtS-c$GfyYoT{9v?b#XT2MbZ9;UxF=!MfkI*_~)Nut-IR{oaObq>V&@U6V% z;72KNS!bTD@{~4S2;-(}!in@vORgW;8dc+rc+lKtDvmou_qzn3gW`VlG5JJ#h1V&C z94YF=P(C@`Ld>0Vxv;S-`#n>JMV1Ei_qD@~DZ{b3tP>}ljgUU3euur!BvP3Fl`c*% zq>$-8&d%C8((9R@rSMIc;kK|6Z>|(dPq+>LXy=MKlij$#bemF5T;V(AH4O^B3d3K= zu%}C;^Vqo7Tt7R;Sy}B8T0ZKc)j|{A^W_>{%>08MtG);yWi8%6v*zxLgIQ;LsBD}y zniW)suwB^(YE?g)E5EkKi-Bm*3uaJ_q&N;SA(t0g}*}RCD5fE+7i1RNeq zrq?QAM^FR^AC7_$^^1@*`!(Fk@WM-le3D$aj9Mh5851{8GSzXyKT68n4ECwj$<>nV z`{w`o5Eg=B-xj)(HJ;-Wa_h{siZD7W5fk5V9k9BE_>Pv#r52dAF9I#Ja-c=?1GERM zVU`H*!$$6X?r%&i=bsqD?m6lN<;%ch%T&Bt#`PN&PTK$j!;ad^~;HWr_UvKIggBIB$& z`wrvcVo!*AsQ^=!*@1K36%tZ1C*asuJ{?z)ccB1h%-e>>&l_QMOge%R{lH> z#*ZuEWs?vHduC$qn>8pWtcQE}eheO-I(-z-jFS{8{k`3#=C5wN?kP&^ZeW zudT$3etH}m@-O75InbZpV%uG7%ALzS_nm)G zlV>8<%;R%>_W@XLeU9@6&Bl)pkD-xkF08R?2i9gjJYS?s_4hw9ZMTBy_TU%1`?(gL z%lcxK$#-~ZC`6N$y3jCF5k}rFq!yDM%+Un{ppke1rgY|7UE6imDroT#Y|K@l@TX}*Dm`$z6C*X`7B6$4hdGvMgr=pl*dUWrf)#3Ncs82}>bUVxN%IGs_ zkkclu1M|>)AD3HCe1KKl?DC42Dkc0lKsm1d?DHQ;0gHmjUS>AjATQ40Cr5=M%P`}} zcXU2rhNz%|M;$)k-k;G>+EIXGs~st$LzVN#9K(^q@%ZRT8146wqwcT!aLKAD`Y+-p z=!t(vz2|(m)bI-@MAYGwseV}Cu1YUBj=cV(3{Z_LfCsr=IJ)2lJ+c+vp+}tsokLphU;klD?^k$M24juHyFD1($NVW#H4~miOvT#h3l_Ue_aD1cb z0XG$13#y#>3|(CxD!{1p=q$gp5=|?tnjHngkaS|(L8)CCW1>;j~%_{#^M|W=jNram#r5SK;-?kKdw6~Zk zTWHH%EtWy}Fv#>oSi_WGahUDd4JmTju;SEN(DSv2`sio~@Q6W&8|&e6LK-N_t%p)I z1$1?YLQi!eW`0Wn)QC+$Xq92oZx%v_=}C}vaRWVWK|N4A3!go80PkbdAvn(zL^r&F z{K6LS^UH#)n77OrK_;Bak3y$7Z%B9V!1BDkAY_unJes3{6EEjLq0}B&lK72zWck&q zd(~u^Zg&h;ytIYNAW763qX{D;xuA3_foXDfr+I2_xcl2AChXQ7Y;LJw47HlU$Fh$3 zv2G*Y2@Xf^;6qToJp#^HN^&gEdgkSoHBd5Q1N*!ULHC&xXs^)%^U6LeQRM`bIUWdd z|IDE$QVKR}i9()362`UhF*j_)$|=l|J-;#iXlN*Ta0650l&&ow=Jq z>m5*cpVlHYsmO!A0BUV*}p~+D@T>V&<)osb4^|$q`ICTsjxT3%)&kh6W zANugIMiD$Di@>x&3H!Q6n6D1%XtVwhSrn<^sUQ)0p!WbGzO2HG3ze`^;~c!LipLp! zt)OVF31TbyaX49>(cHTo4b%%5^GtJ;2|5FUHUHuBRvB2hITqeM>4)P5GMI5Q8J9$! zroz>#Ar6NZR8D9M%`wf$y&kcbzhi+w>UmX>_YsQo(7rr z6B+S~8E|r6Hk@TtagKp81oj<=*QJl)wL%c=bhW_oTwmn)p-nJh+($-6;uUoGYUA`A zJvx+c2j2HG7>&L4K#JxtR!kJMJ{$%InK))#^D403TFwmKRHe)b7diG*IWvEF0r8i9 zMzh00pcc1^S@$T0>-)Y3i{ubykN9V>nUsXTDvpAXbsf{k8;1vFH1X`|D-bKI3Q5y1 zk+Z)Jo&IYXWgW!TOvlWW1F+El33M=d*wI>xX(@qB+yX7|vONF- zyYXB`zy){N?1GV@YNlsv8#D5t2xQAA;>vC6Ftj8X;C*hKXpI3U=8 z^)qcDC&U|Gc63tLe0!KCT8#YWYq)44uw%!_VV(4Jnvt7FId^&dt3^?~EnYRaFfp9| zoY$oj6UV7XndQ^p;jY!s!E{p|TK$|sS9bYhKvN_J?&0PH zm*TL{YZ7IZn(E7OBKw|!;wxMe2@ z<()@Yj~Ei#7(jI)51Ba?95=S*2JY9-hxQ42sCA*4mXx1Ig}rm=@rRkzdg>wO$=jm# zovWDiK@_Gfj|7j4MQ9zVN9$voC^Je9W*VL4X7oF7b!9!{H(r{)eYC}&*8=b`KbNK( zn9_xb!KggZ6mlDRICZ8zJ?40JpV|(fMebM7x?oAUk1SdKFHLlPX~Pb2nUpp$0qFG_ z)8{VE^*q4Ci?8Hya=NnBiZ?rPVYwYPPMC?lcY9H0cO;hnl*09CFX7XnZ1!WrW>WA- zh5csZ+2{L=@aIiydKVYPEeqP|S)UL3JTRtLU%377!>*hkX*CWA|G>`|#BrjyE_TjW z0lx{7bneq+=76U?yRlT9y5u;{qxDog7IYsr?Q^H-zgtNtdJNdzPvUuhPlvE`_E@9j zfZctn%&Bj-(4}I`bh$g==k2rU=}B(>lD(D}*PqL|Xmlu0zYhhE>&WJX6~kYp2skK-z#*qR^a_P>rXoS(%9_i zt>p49h4i2P!sJ_Gl=$!*cAez9_j;3Q{i7$uILYCpn8UQ;-8|-v$9M`~Hk*#KnSzeVYqPG)t3PaRA53P{yX?eR$w+FN*-3y$1W+nlyjh5U58NLc#ogT;y>D-kvDJX&)xy zzRXqZYtaq#=go1>eY={SKYt_$}V>r~e8j_FOOtiDyOmC{4z{d!T#ua!loE;-7vdCLfP zy769eJe1OHW9hz>A$`{4)?O-4ad2=4ZQI@jZ%y){M!AdONf%(du-h1LkmuhG(ZKL*)-nli%VJy4>^uCS6`mizd0!gJ&P`#?4r)l^V-@PaUF9 z#_8}>@i66X*-3YAR^WoM%Le?rUCy{R$5#4i* zmvNlE)yOfTEH&8*7Y)3?UyV~QR*-2)7+vQZl|i$G6SK#zD^O(=JRhS)TMY zB^&#ydax(B6TW}j&AO#15gEUL-zu5xg)c&!ALS-$achL0*Z}sF-4eVqs65<Ub#t8(FV%|DzRmWDIf4tiLa zLr)INVou$6TIUsl6E%C#y{Z#0$EQ-1tTel*DGVpSUkXLlBWSR1DwSRzBGm|Y)O=?^ znX9wu!6FatSVd{;OO}%M8xTgX!+~QNG~Y26ciZt;Wur*!=6VX6U85M0%Q<&n4TAZ~ zRb+0p8uwxcT8PC%RCfT|G&z?Nzog;#Uq&nxejsBp15i~{htj@#BqlYT4rCQTd~F!z zye=X8j5>6e7s2y0`{|HGHYMFIq@0SY=pOc${j00Mel1&r`wnklC5}vFO-yyDEcZUC zXHR8iMSak{wVn2^j3Ci1oHzP%GkkuliJdo9Xmo)iHedNe-Q6-IvdsZ6eVBq%?s$;z zj#H#}ss=5>57D%l-59!UIjiOxhSRI&W8)P~)<|bMYrtiH&#OyuPHQz@#P4*{Je3Dm z_clR_^AIa?r^YI^-3L2|zQUHPt%Q>fQ|p7xm_OjaJ9bx+zSgI(aut`E?C6(BQ#^6# zh8jDq&;eGp5^wApcT|myA)}OPvxri*)N7dr~vkLfri#}R%UctqIPvIQL zGdkIqj@OTVqM$}|(k(D$XC+0kMvB?+ZFB<)U8w+vIfz`fgUvrJj#7;Ycrs-L{{1b< z9-M57o)*>AD`LYQXm;T5(%!`rF6+V1Bgs_r`VQF~ABC0mTS$JD9EG10rQ7Dk_{m$I z+4)qOmCQQ>-apOg_bVq7ebS0oZ}70Dx*U)FehP`(oHG-zbR z7bqw)SC+}MBhW_A*4uHJ&Jdo?d{;7x*TEmQp}2U9HY+?Kld6Y*!z9OUSh)K;nYV1g z^3XqcbXbPXwwp`=yc9HLRY?&12t8Z6I7fsqTMY3KZCi_%^E63L>NN4q^T;5-oRQ<$ zwdPWpI6;NLc$xx98H^{vZ)5D{Sj}pp2g%)RC2o+^#3AD&;FMWN%Y&6DR9B9UXold+ zVRN!ob7s|-ar1}u)7fnoCGbh_R4m(boQ9jVsd=#m+SvP|h}9XUH){pP21(FbqsbH) zaF|X!;^To8l9V*~37q}%**#JcgkAqY;qnEx>%v$p^BO?5ndQ7u8mu0z!-TD!sD4h7 z?!Uc5)_3;efGpR4Y`%)hqkSl!APsZEtFUIrdZzi}J1SV-j2YXG;+-8_Pe-m8R%WWP z>%{BGw>uR6>ZS31%#uJ>(ht*Lmf><=5%7o$fEOhmOp5+(IAN+zPLsHOr!0To@bTLe zf761tX(Ung&oUb2pQg6|tazmr7Cgfv8BA(^0eza60)PAiQGWCnYzt3D)zoLWzWo42 zUp$FnT@|$0S&Wzd(U9Z0^4Re&I6uO~@$C5gVQl2s`Vq5->1*X31p5@c{MwYgx;+d& z9lXGGf6C$JSr;nv?#BcvA-3uzp{JV*-XD3gPX|RR*sjPd=?TF3((c~6vik= z)1q;WKdMg_-U)_TZ`l7`CjdOy*I0w+vtRlawtH^$S zDtND(OH=DQ$!~Tl`l@^bmB|Uzqj-^w^OlhP(K9HsMTaM{shR6&MPYXTQP$GIg7s0b zApNjV@|A5P5gUE1TAEBxju%pffegEQ&tp)YVTz`c8&JVk2YW2l*e6B>c+zVtmPAS8 z(W$27<8p)O?+y(2(5Bp*LL`5XW3);g#e2*B@n@?oJNJ19#dC8wk&hhf{qh3dwyt^f zqCOff-RXvJkCl0KPw!g24%>$bE~8)|{Dr(XSW;q#AvQeS&C}a4oy0adu)+mznfCBH z+;DqOOA;&)gE`+t=>ew%|Uf~9AEsmb zUcF#mM%XfERRs94fa|Z#vB7L-J*wQh2c^10@#e2^G&Rh|2B&c}%S`IUa>HW ztT>N;8&x47{u^Yxt0g_7c6{S9pF$$$Q|5=8=y1pha~z6s+PW}i@bGGwuxJJPnY_nO z8Z8umTOJf%rr{556SNpgVLt5;q0i337~mU$pOjA!9@D3VFZba5=CM$rVTQf(AMnMQ zNmSgeh|j(1K{`{9cu!PWAJwCvoi&eL9h`yVW<^3u3CEBZ<#G|VrnuClfn#fYx6Ira zhOgH4!vZZs+>qXbzvuslrE|Z5ary&L*?fW>@i{~iPr^aPV?1m5LyPkc_|d*rKgj%PO1+QQ{C!c4ERJ35Wd zrI#lQ;C1B&UQ2ZzZANX%6!hSM=z2=LQiU%iHt-JmaBCw^6GlIqN0#6>X?bY*-X7n{OyvD)52U{3k?gfQ3+ZR9GtJsAOxqGwC@SGFdaqrN z!$uRx^3V&~VxxgdUQy%_C(QUR{D4}1wzO71o1Wf?#XCwK_+m8|U6>V1#@h_ZbGtN8 ze}NE=+`5J7Hv#3!Tkwp=9F$ezx-mK4m}j*UH0HO%?0_L=tm;Bix;viTcA^DC{W=-{ z&?vfhZ8=uG4Z{;b^^mIZ5Kg$%;nN+HXo18Oifo*ZBF!e)6vb8JM@zARbwQ`hRF+L? z#6vw-;E~Nln#bnDyYI2Ez3e$wizh%$WF+e4DdDoSo~-X-YjWKg1Ghx|*jbqiSh3A| zjNT`ICeX2mzCPw7G`Pb0r9*gb*L!*>szAl!u28smBbsvi2*Izv!-!lL+PNz6MiONy z=vx|k%!^~wUYN26Rk@CcRX23#xuR&LH6}d&k7lk;rGpDaS+$-)n0k$m>u<|*9^F(- zs2sv~sR+TP#jt7v$8VhGOMesU5oh1R;AwpNx?6~pU4*GcWfs;8XJL4=8vAD#i+e4% zV*Zo0xN>+VZ|T_{?)l60WAf*qo3%0T@tNCJ!6BzH$S4f=oOy)-5y?2Y{Q>%R`toAx z%*kN29eZu~I}^G`f<`uVKsB!$qu)w^L-7jUMbm0LWBDHIsx(MyT^5e&4B))*FbY+V zg^;yzFx5GSvGX{_`KTWO>z@eEmX>3G!!vr{a~@X~-=pjSZ8mXpIQV*{V(AorIF&QV zs8Ab0iI?FWiKCQ=h`|MK8hZN+@fzJCp1r_d>dHA3dnz zm?o|%Bz(pbC+s{)+dbEymZu19Hh%*ab5a=H)Xk_cKLB}yQm}O31!Fy34zw?5Q^cQl zR>xFz*-K`xIlt3acogA=O!X6}_7f$^tux4V^EF06Ya>r%b|!e~Dj^*I1JC9@wt7FH z2j}D`F;k{;pDScb(jWH2w)GY~C9&-kad0N>5sOD*j_Wx(&j%IMxc+$|uz5#S7#(Lz zs#Bc67+^K}6^Nq(--JAGdt=)ePx9yZY6nze!E?+gd%7Z=E_;TuckCDnB4si=cNeE~ zd24}qH^h%R;Wu?Tl8-OsGLI~#9do0a)&ypfa0S-)?IqXvaM}+i(LH1bioBXj%j}b> z-S7v#cX-6U=K58xNqtz8G!dVfok0JqVbH)a=oh((?}0dd?N+?B z#~c;1_po=k{I`aE1GJUP(;ta=_^>4!MEr{Iv{yK^9oUY?qzrID>?}55uK|T>oo1W@ z>{yAp2CU*;VdkyebcTKZik@e+!4tkK2tVz|$;l1$bKx{{Ix!s#DktFDZXs&O)&}`8 zxA6OecWlpgG16VW8x7_zXLq@B=h@s|n51)_Q95=TqVsK0*=rlU3(lY&nYYP;<4~4~ zEJblwag?h{##u4`s9`D&TT&unDpX+RNgtxh75J~Y1yebP-QTDVZ2LDB51K8+_V@X? z)(>Kx99X5Ur;JEx;X6D;tE;n9``KJcyzRnCb!E-H) zTEt=NmMyUC%X#MarZ;rouY*~5B?U`vm*Mx>HteBuInd+A{kAi?Swf;N|6->fPfDW@ zTMReS?09Vo=Q1OQt_R?{p{4j+=RbJ9C6Tc#@MFA+%|XfDUQ86y;9v8dXt}bpmZSy-;E%%wW{O}XFMPP3 ztOGij$IZ9U%5xQ+j-Ev+^A0l^{coUee?I3nO=X&k=tfNt?Rz7@ z4PLpp`M*dk)4pxx7hneK-z#EP&n4WuxSC2i&YGNIIVOHJLZeryOncT_Jm!8EM7Jhk zX+adJOyryqdsbulS7pHGOHfO_81*JhCeOAfU_6isJsdY<^C2O&Mq~jDZ=S_E+oa&8 zS~HM*tV4CBWAJwKG;HrV0q-kptVFj4AqHnaajyyn$2VeJMIhFN>0tqvoxfEO#h#cK zOE0*dseH5?yY-z8UhIpfk^L^Hu%UuJ6|F%LR+9K`9Z)E95L6P@Vac6P{9%P~L-87u zEk6cIc^Y(m+k30aR@$uDgLkNzG7c}TvqanMdmuQvaWA1C#Ck$C+j-0`Up!s@@W;Q7U5>sBYJFK|-RQo?PTcn34H>Kk}1I|0URs@4XWRNXz0UwXs zaOUq^M%d1T1lL4ai==ZXt@@1F9}+?Dqh_OTw-_dv#6y2~3gDefI8-D_Sy>_!P_2PX zpDxEO*29{dID8yxj#5_-u(iq6DBhI?r%#V1SNDYwyfFac-`vOT*Fs=2yBhCzPDT2| zun*oDQPB-&XxzJiP0TT1A8K;>y9Y}dMZ@o8tJ)8#yGuLFIwb6!i z+pG-2xt|+x%4|2Zk9vdm{fIYTXcCFNab(S|F9I8{4|qW(3NEZG!22IpfyV^o4M8!N zV`@g*gOllMQYy;5{D5bI>d8fZBkY~w1>;Jb8U2;9IPs?wZ2RdCFUL1BODB|}XJHLi z3@k=3u3NqR_auDlmJD-r4uOS*0M1A(Czs@_5E*g+g0=2}@xKH-oIVb9WRGCvn|$(G zp~u>$iPMuE&)`0n^BmS|$1lsGpm}p5{;8J1lwu2vd8UX7)w7wkOMw=@TLrVfSzv{a zE|zGAV@U8L@JQx7q!)cab6gX?e>;kw3)ew)Z6YlFZ!-qXzChMRIabf}o6(PRwKuxP zK+PrFo_@y5g;yDV%S1G{ z)4{iU74VAY7|fg`jc-FP!if`n@SMl3H;d!2L8A)P=DOp@efuHis4lksSWiQ%$K!&l zK4`Ug6HK041RrI^u50ZGva{Pv=|F);y^8aAAUI) z0t;q_;g!;Z_`7B!)TQ{5vp@oic~$Vr_87cX_=qD#)_8%8@WREV=)08L!x@Z#UAl$1 z?p8E(D38TD1FqvaqaAeidors145rlI^XF-wlyfPNB}nY=OvUMWZbd%%+NBAsyNs%Wgv--^SJxOcGw zY9zW@fm{N|vcKB08ND_C(CVowxGji;tzNrefnONx95WNuydS}w;eD`ArT~8{y~64_ zF&Nf-kj`w_g|!>BD0IqVaGS4%qh~#7BTo{4h#iMHuOiT7jWFouCE&O{2ch8VJe;$C z5!SY!#nrWHz|Ok^CyyGSFXvO6WtNBjyh-%)<_G41(`tOPelfh<7><3KDirMh7;f#Z zMxoE=;0fomIltu~sOJ8M^?p0>E60Lr^Vep!vBzQR>tArPU7v&_4Dh69CFZFlVIvNg^zPv||DWi2g8`fFD>&*K0LB6( z{81hOi{__c(yMf=;n)m=1D`>C`X9{T91ZucoMlos?ZwqeJdAk|i1oXJxc;Xsg`XXR z$@4q0;C&^|wKv3+Q}NL2Qv?yq)p0Z~1DG3AsZY}Y&kdPDYv&Cx4IGDSTx-z2fMb_i ztif8BZD8o0OE#;V(8;0z+&j+W>C}6;sKyX~X+*$)^C40VIgUw8JM%&?7dBuh{+3?D zc+d5~3tN^$0}7L`qX(9Mn1s)x{VC4xCRk!b-zh+I3Ex z0_G7}?=2;-mH9Ajt1I3sD2Mh+E08$xjgsmtU~1)gE^|=N`D(i8&OvdE;C#h%Pd}i~ zx}m)3mx|b3we6Vy&XtlyG-&NsMNHZ)hb70J;P>ArVCdj|rf!)BqcPzfX-gx|yd zZ(`Jf7UVupjC8~e(W`emehX@4C{heh&3uFB9`B;rEy)yLp$mJ)3X$LI7%Vqaf-ej1 zV9%drn8o$fzlZkXm%ml`w`eX#`1(P`lTmoY`S0``{-W@*J2+5big)G%b$hw9VnlL?Yb|+jxzPK)EUDRZPUtQY3wn$(KTnWSUi>GX7YLS_}lr>29w&tI6qy~|uQi6Wr~Bg~zN zb*#s&Jg^Q6K-mFbG_8MRrDJrK@g1yUib^*a#7K_DUtT+`tBBb5eih!l;iqu#*~<5LH!QhaNCe2IkP*=mfKV4 z`>9d%vpGc;mlL_pnkO!bc!Z+Ou9S0j82j99*`LFYAmf5L?UNdhX1PE|ZUkXn&2zYU z(vBj##c^P#9qznqf+2O~_y`}7@x?{x^+^gOo=ha3QZlCL7;rq5r*NjW9JZ@T5F@2P zM>(IsYwy=MEtqqN=yTu8u{!k1!mxkqdm6s>8%>u^#Ih~(NjWVKc4=IOPLVP!jd=qh zl^m~Omn`0!{F(M{kLBK(5@64nDypuJqKZ>#jMkE!%%=tSIX`JM3PP)3c*Qsx=b=Cg z&dZVE`$-`4gX=eDyhOc6!|2a}Sl3RQ$8H!-B~vv4Wb7;?_oOYfVs{ntG-SzzyMS?5 zB>p%YM=#b}RuY`{GuhalIq1-&lJqrFA#s8c?H zY(2J8WSKXvR{fI&q1X81?mtkkOAkRxx@H_Qj^LuML!;i|gS;Z|#GA{I7k zon)7}Bw#1U4~lF2fcIQO@GN^5@2-g;(*k|`eY1@7fOj&D2TQQU;syLyG!~9%g`=bM zO>*HJ##+a_Y2wDotjmVEV9Bw053Y=W%j)kLc|SeYz}*TCaDC!ot8=&}a3Z@cjAOiq z7|J4cj%2!T#UbsQ6QvbfVtycKtqT8Pa%{ zUL4_g=+wn{|DMA0J9m@LIcY-N#7z7*nnY5W+O#oCA5N6F!m^Y8tLkkm{ue z9&%*4UZ3us@SqD{o?*Q9R*d5rP|Nb2Ot1*|8;sk4n{R)>(~m32Txl^}c=Zr7O6`%V zt}rsxZHU4?n4gJL< zrahed`VPA9`VQXu>0o6%9abx-&^=#0rgTu7m3(8s<(B87{gXkM_hkT#XPu{jz}572 zYAj=O!kU-i+XiFzo8fIE8NBEb$jqwKhumXA;MV1ZsRAQX9R3E?IjX$WfN+|kW>3<` z#**;n)%0ax5gl!osWG+#CmLd3y#U zGU8Ec-g&kvb2D}3Fc^dzv&1b?I?o;6)yL^)E{FPg?m;X!o}M5tl7kF&UroxQuzktaNiwT_9w$; zO$9R9bO2s^m~kGIDadMCu)-0mN#=|dzH6S!e%U-s{QC{e`QaZ-piV!fG>FoSIR_w0 zzXz3qkI>1V=Jc@J2*S5VVJP2?%KX+~)wFk*x#I!*D#DF=xvuTbIxAL3e1IlLj-jdd zbTJxsprQOcEPQT9XE2|pmsioH*JE%A=Tc@~&c~o7h4{SUD9wFy2#P@n{=(!9#V?Kt6G~dU_$}MR*CbTf3~FlH%?$G zp@w$ITb1+ zkJ7b3chDa*jprdKLgaiT6Q>8zOOZM>wdhA>{xv#gdjtNv+Y0v4C&AzH1eU+L3zHuo zfs@w7cr^S4eKM%T?x4%gnY;;Bk4wgtzJVaC_l{|O+fQ<(f0?wUtOdaW8A7s>oZ{x=0&UE6k1?`SWW|R*9g4~R7j_X#y zOpC3AC2B=9Ugk5VU))N&^>^Y`Dj>{i| zRCpn%MNg(QJxSKX!3fUuo3i&>j^fXM^WopJP_Z zX0Rx>zTz^g8@WAzI&p+=55Y_%g1vJ)g{(J)z+Z7;w)!8UQBf*|wYqcv@oB2fT7}&z z5+ps^1|O1^fNDbkhO2MG$IS*HzP*j{@sfZw>orM!?i(v`(qVhuhEU~)1n%J0bE}ei zppENa7?%>+3v*V7!4RHUehxI*j76`355Po!wMx)3gP-4JnTEx|*j_xBb{7|b8^iKm zOYWmo*;(}O@(FYoO`^&(o6tRR4zDzH9s5K3tCcyIRWo;SVj|B~V)%MV4D8n@t0i3b zz09A!IbOjVTaQ87;1_nBTqMOjIKa-TAdVNVOl9Z3;J4)2l%Xk$K|45J*)9dDI#SEc zfEk?R>qViPPBFHxFX0055K4I$Nj{3DnD#da6+`Xl(Rhw)zDJ6_NIhfCzN?~P+9zE0 zL=7!2ox~}(A3$cA0=KT>SR)P_;oYqkn0iHjc9x)Z;F?-53&&owU{xh3@X;h zlkn_luy5E6y<4y1`a_X$!p|Q!oifCLr?c6(CPU)qP6F$C7uKO9z;# zd$PfQ_ZhVLyow&&O~J^1E?+HeLU^qWpSx|wry&mbNFo)leimhG9?k~!y@ya%%M2y$ zCh$IKyh0<6Ew)c*8J@S2=l$6)#F%(w<4*NMc-FKSE7g|brM0~nXbil+Hj0!fKbLJQ zbc9uUqI5qy4nlNRVQw7{8oJGS%S-A}_&^K35|*Z?f$`YK-A@vi)sg3zCGf3h2b{Xz zWaZ%#hh0;0Vb-dHFxqn$wszT2_+TJR(Ho~Xp-1D@}@!0`gdQ=)+o z?)lFP{>Q+{`#PQD!`;So4i2)#`yP%Qeu8(@B2nmJG?otpV`_6RRs3>;gZrhZK&1iw zJHly5K8!ntyUf9=A&k5J14~a$fzFkg9G|BjLwx|>UO$Vwg3B=DUIKhsG8wm9=K!zW zjjCcfE1=>N9GS@Rg+{wEC0U5Q^qKQJtGm(J+o{lU_8au3a`U)0VVa~NfXT~NV)@Zx zM)grD{JvU=DiIg(){#ZjK5B~Tn)(#bxs$u+Y^678DVTciFtkN`vw^FHF)cg`ot-)$ zDXEGa_B*23{1jZpu^hd3J))0MqWHym8huUNiq}sS^k^9R zUfH9Ss5t5i*0NFUl60MWmPBlwPkLO|!$0!QcCK65A=ojQPv&KR; zzQq_=H7;w~X^VzBNbZNaxS9HO(3rh~a`ubkI46SP&t@??;rkq&#FS`3niRQ@bK-n} zYoT_QE}348quYC3;g+*9CA%n7dCN^s=*{tIUaq8T+56~o>|{E@&B)}HEZDY?0$Qzc z2^M6npp3RH^yPsYc^6re@6d7TIc<)!xp@cg!(O@@KAD6p-C#|W5Ka-T!Wk>i!ux0i z)-JLe6Zcx;!=MhRl+DM<=Pyw2DQ!%Nzljm!x!zUge=G|7V4q4ed~M955+ak zbq?{R{ZH}L584S{ubs+QH>>484-FMO6>H&hJQ;qxYNX)aufGi=6Gr&Ge(p_o=9&m< z-mT$}WE2R*Mn@YjUZ~{Pi%qgtDd`nljf&#eTP)%In>Eaj)h*zse)&|dy*Nrxet%-q z+Fiddg_r6!O^u1+SMF%yEvXjc-}`OZbZbaL&?kJHZ*e@X(ZOw*p!WA_>(%<|g6xnv z{B_%x^EGb>HI?nww!V}pCTMDn;O7~h;(blKa>@JB5P$Z<`2uGNV?o3>J;BBGM%H3; z{t7<#%LvA_9pH5w7;22w{(dP@NygehC`ItEcVVM!Zvwx`c!Ynyf3F~5^SGw_*V_e) zOO{zb(0An*1^Wt&UMpD7{VUq!IVn%@>R>&;p)P<}`3&o0525apzwX*9c;mg7R}kAMxM|kLf4p*m;8&u6pLRxt?{(Qg(7V+};ME-` zxOV(KUn(H}Qb1hJrPg#|f#Svkya8tc-(B$vdpM?ucVwxeVE?F;b?I;nFHvo*z-hBu z<5a(V!IbCC{C|!2`MX~|6#V#ixY3zC!IydVAD^Ej(zr7AtsvZ4gb(*G3f{jS$G?%R z$5-xXy>u}nSFn_Sk-zuRc0sc8bAbsjlfOOSwIIlLt)T4SB|*ibK*7qm62a%_L%hY~ zg$0Pce4R~e1d3&s8<$U!vfey6i+}Sg@%`Rc37)jR=HIUD6xft(;+;EtnOQ@Z3zqTFOYRLE6`cXEZNKFfe-+|H1ou!PWkLzRcR zkMR$S$2NB5uV|VbEhPwB_l|!->8fB>IOP7=7g3OA0*rKJxaptb*uNa3F# zYIV8-_jKK-B9(1i=;akOI%pTRn5+dSziy%RHxJ>4USs;%FPd5z24T6)+C;EWg*5NG zgDQZs`0sZuiDo|{KR3-llb3Ng^stDNo19D?VI_KP|3^G;(`b6$vlRIje&oIgqNuP_ zk!&;O@D@Byk`$gpNwX}uqVo@boiu{z^S#6;Gu=^B+E{9PKm&jFoGkIzh@~!(72JgP zBD~|#Mb4w+2_5YbjAdS5qn3Fkc;8)p$%lq0?0LNuhqY{^F(;qmCF3jUj)xapyTN?hJ*NL1K#?mDIOfb*d1UJ(aJYxDO z@fDf()c9&2r?N;HbMNor;ln4yU2Q_dJa3^p-!OFIQVpW6Q%LLYr*YOe4vUoL;|e(; zT@s*24b~c=FaMt56Gh3iWYQMSJja2Ko7|7h4ro#J{fi~*pPa<6#x~Oh%cXGL+q&54 zbsMKNUxkD%52MYuq4?v?L`vfmarA&XetuGyc-4NSqyLC$Ls=tTxHkrWm!80xrxL|e&t~sJ0Ss&1X`L@TXa(NzA8mB4I(vro~qQ*<~25xZe7Rgw#;vDTR zTa9A>9HGPH2$rQYX-?=7yf?lWXKMVQ=943_)AO73XaGm6cUNL9FI`FgBRxsegHI@1 zf#AV&Kk(fghNc8<$0ruwqR=A%cl;eEIh8gJ%i~4Z{cslEpQkUeyS0SQlOnA)^vFq0 z*7o3co-=E7wbe$BO$^0bU-kk?)#iTcRX{% zzif}c%(s|!~^zNpGSoAd>?-*B#yf+!q5x0+^ylYZtRo54qrc^}X zn+j^F<&1ethPg2^nHYU|jDO9ViqT^Y(%GLwljg;6tNcrGn{qu;=(|N@UL2urPItMz zjf5onR8v+h8QKj3w?BPr^_BU8Q%2 zFHv}MxD-ddEC^C+ZcNxcWI~;`tW4 zxkO{M>>oW^Xvt0gBcP53jZ`;s0c!75ArY?Ukm-!&B)XVKXBMhq&@+#C%^4@ri5Q1| z#4AZ$+ycZ;pYO*h9kpDOy_w{K&n+wvd(rzVmSLyt0KD(b1!@}5O+}TmB++5Dq_pNH zc8`dr(_A32U-BC7%Rfw~RXjy`4|Onia|Sijjip9!KI1x-Ns_lOy2SydG5BV7F}B=1 zfyS=(gvlEw(@|V1?!3Q?7Rn~mp3PJ7CTT-exz2IL?GTS@<7n!~VR2unggUJpz~8z` zaG$FuxiFlC`|T3wMxzC|ZjmA}-lI$U97{Rg!&mgi{5rH{bUtNGpJC7WC$Z5+HHm)A zSjmW;61;(zi}#;RrdMWvph3JdbU&{e52^9+UDE=p-FBaj*Sdov4Xw%3AOW>Iq9$1# zQiN^xRdK(~=Si|Mx6@sfN%%lB;+8L0CDYu3sOi&#^u2N_$4^QTdi3W}#n>}=%v);` z=oHKqkpq0bvKbm(9kdgyu-U%7q)74IJ-X}HFT zOWb6|g0y9L#@AZp|858MTdP32&wJzae-6k`Q5$8RE2ck27)d%Mnw)x~nk3g;il6*5(vO56uD&vFheK6j0JUAREkdpMBMA)`p$x7%3e zUnM6L*3tfrD{%d+`#7L?vbcM02|bfuNpF)4^p<2i$qmiKm+)lC=S@fOy_LpP;ml~< z`BINYo)yzA=ltnZ)fd?B@^4g}4N$=5Oggvw4i+vNBMCV!z?TOW;ad3&8swKxJHPqS zEoY4IYG8wT)$#3!2N$X0^{>glD#DW*i@-W^HUm@CFn<}Q-OuWqAE&ujRZLN*4`1>D12 zWulxtKy4RkW6P8FWIWSJrNh0rxhS8?L`}r?7g_2m`wjDre5qxKF}9u)MqT#bq{T#@ zY-<~YD|+ry-r``i_Nf);;8cg(1{O-(mrtcf^?RuLTQz*DIgGBf`i155pP<|!O>UC+ z3^Joxjyt@d8XHR-xaSp@(Flo>a_mf zJ9_M-3rU)gN3AZfc!^RxjGO7NY=cD&f? zE&aLJj`XRuieq<96>j+a9!GsxM03U^;OD|59B!dN-gHGszQ~5sqgkVH%n3E39s82r zb5z1jt9IhSvES(?I|tUA;S%&`Qf|sUDthq|H}?8VriW^Yuk2Kk z)CJb#%dhw+9{ z;tLaUe9>)A@AVD(=XWYTxYQV>dn-wZUnuU|Vu%;YsfZ^UWYE^t?z9e>({+|z)cJk} zj&wLi7kl5NH^0W)oEz~3VSD#W}&jC76QAQ74`N)nIYSM8b_wZPmTI#U5hHgtagL`yfV^8a?R`}eL1QCPdRxDiF?-22WLj$<=In_mDMzQFyt^^cy270)@LPo z-Y%xbe=4xjw;pbWo18?)P=#a`e@8<+1<9Tf_t3Edcd8~mlg|F;v4C$VWMo@BUUzi@N{C;9y)OSi=8JSCo!Q2u48gb7{RmrO{`o!tqN$P#|4;HT*;^KpZ^riO#?$&jE>aC+f zj+J|p%P(7Sa7#KT^S6e3yX-Re*L5u}_?U>czt)w+BwnIb$1U-6`+S_mmC=2&QE1g; zD@g^f7ss6pM{AaC;0`}ml*GshC^Dl+M%5gjSJ5O9(+g?z*QpZ5Kb2Oy-Jq5+C%B`+ zCO9VG6TT(?8E?Yn^juaQnxmXT)uk!O?e}Yiat@d9v9t%Kf0 zHiD_FImPwsokEjBrwY<<{$+mhPH}%ys}adh;)l(Q;;op|B`CT1gi*aWhOeKZ%_i#< za>c)nBm3wC=24r4@KS08r?9pXt=inn1ca*yEq}yvXN}{~+5cYgt}QWRjUNSaKst*q z7}vu48PUoz5nEYN<#bVH;R~Kj$g8GmdpVG^OG$L>_#8pm*R8C<4>@pAXO8GjToO}v z@ISWxMjpG~XM*UIm8zg(tt$H_%}qR8!x0*|i}+dQV;MJJOTOOmuEy?%jrPwX3z&$y zL@r%55h?7rzy$Rk;r}+N<{9GPyJHnj))hX z^UeOJNvpVh=1pjSQa!AfaEvI8SFq zsc|#GhY8nt_pV7Z={2jl`cZZpxsnfA=}JT7K17Zv#{ zFrKgVgef6qob%Fh)RrK_jM4C6^e2>ZeUT-oGw?GbsaIzgD^zf0!_qU$9$EexdW#7U zO67)&Q_#xle@w1x5u-eqEk65SIy}&Pgr{)gF>}#1w0WesD zW1<@$3O1dr=4Obi(aYRWsn_faV-grG0_r}%=H@8o(qge-YI_A|?ox@oM~~-sfBqw= zBUN1J!fMo>*~hCH{aDbQlg^dfXQ0T==RAcf9(!4%f-78GhMMI5@Jv_b3Gj+?ZeL?L zYC7!4fAQuh-@YwdIJvi+Reg4eC*u}re_Cmy$a^pZ^xjQil!I~vZkOW4J&Fh5oWog+ z+C>9);m$HHprQl?dss7JXN`CljxQ0&OiyjhiQ6tXziuV#QL+k*z(HB;2!XF=LrOmU+8m9xyLm zBWgWV&yVN(^SzHRAFrfK&_X3O|8j;$_1nbP|M<+=Bn$-QOV?BG&F0_&I! zQikYjpK{JEr4;Ry+s+HR_?4$`tS*c^!ZF;G(M)FhL+0u0DlYp{C3?!g&a~$(XI_$0 zZjVX{vLyQU1vjL5gm1?I&)#LCMLy#h`{q8u#|eko-v#r9r;{G?hpb&0jmHVxm)Znm z=$*&c$XL!;W}Mr=8BZia+oWhnTD!choyOAx0#TU6`YP!87lgxz_U2e%Is73+j)EvQ^^l-OcQW&M^fYlpyD1E9n90c4T;$!m ztPZNr7>K-YstI;HAHyCkP~h}?<ozT-#TLG<19T zeI2Rnp4AncS8y3hcQxh*1zr@0j)aRpdv1n;loV{pGx(4LSUn zcT++6N)yr1$FrH?9SUHWU&#f)QY7<5x*j&g%%xwYT$M^GioC1FAJaR7?R-GuK7FraiO7z6!!6g6-F_(<%!%I<3tp+SC5gjZsT|GtGT4)YGjfTFEByY zjOn>7et)|M^{;wUq8B{r+yI((5?$-|ZRUr*D&wwiD@VgCeFW*(RR!gyCm8)n3cO1p zL%b_SRctS<;GRA$MU0fGR@CyBah6Nroc1OmWA8gm&!%Xm>)$3O<8H8!kn2schsU!2 zy^zi~c9;so*s1(Sp5ujK=jL&`mflG4`T;}quL=s68HggB^ue3XvrM-_jMUd!!P%#j zqis1?nR}=AvEiB(T<(HWH1X9hru(50`(6;wMQ)EpzX!Yd6_?Z5mjRWW@uG5+E-K}D zS@rN+s`Hutx@5silN2WS?^HJFTR9gyu?z(`N#_=OjRdcHB_OtKCf2);EQF z@hAyxlX^5PlhT-m&SFmf_enH#BU7+Ve=`%QGQ?9XO5@v1EaLr_F^2VPs^pwDRUl2- zh9-@ddCb(4Rh&el3K;@NyB#ym^L`bC3tLb4urf-Y?fut7-rjA!LfH0{^_bhqn3@50 z=z0aWV|N*+9jO zDHauNH{rb+e}%QNui~bSEJqbq7R>FMJ5uI#HFy4EIkJ3V$kPcpCaiU|6FYu%hc@%S z^V&70u+Kl2bAnA}sPVg#;C0U%=GFN-OwKk>x$j$}wv8qfA?nAo~11X@Pj z=H)g6X3DQCjDL!`a74>1-dU5GLe($D+#cy2??6Ke6Rlg(G&M;T)ZEq+Eh{_3cR0F{ z*|<2534ZcH@N$J6|GtxesbCLt8*|gp0ROYJpEt_rHjX)&%I*-41~v=*MCCeJ%#)m>j1I4giy2*sd@INC^gR2S>yB02MCVFW zoan$9H2h$L9e%SX55 zyjY84dhw*WB}e^5#$+Z)n_J4)n6AZ~28RSc3cZ+FUQPlHDNl6D@E74_UxAcU zZ_9K9?PAZDm2#hai&5L4EHh?S7h@eI)vNyHXtAav<8*m3s}QkDY;Lwt3M1ReY*0PK zY_hK4?#?PltN!~UouL>hbT&Q6-SIkz!e(A-nk>j?OIKHO+A@`BAYYa-7^TU2DpYg3 z^DEKg6Ae=D!6oLMv|qe3sY045zxe5j_B?+dYq5To4b+SaV1`-|D`Q&DIf7DD?-a_P zUb{-l`l#f1h2^Mn^KYixdnaS_gk#OqMv1-~2k}}ziJ2awQ=EcFHPV0ikY_B8XPXwk zW#tw_(WT4+{(#9tLGpuU#?<~7&u59eRGCakzo$jXJAFivbF>u|e_ zJC|RA9;2~r`_~@ESLG-tBX6W3 z@E&97sLwc_u4gCB{3GmoqRHEASj|YRe>0Me0m7QiTNq^%HDSVtLUvc(2H|6$VBR^k zKt@3z$JTCl6FRNVWCSCNgzL{$a7#{9p<~(Om@B(OScMN+jB`k}a8HXobM}o9`^ef@ zv{UX4+ho2#AoHY%`RN%fJ{!9l{@LXuxc59uik`yT(_e+zchYRhO83mRiTI z&af3}7X~rjVVQ!(4VB!Xu1a(~Qd>|{nI#Bx{lPw))-25B{@90H$hJ4^?-rFUc@8#& z{N#1dRu)`aqsu95(nXgh9$?xJ@C5N4qru=4O_6`A9NS%ID(HXH$A)W<6v^mC*~wTu z;(ZVAV|${9g(GLkvO%v+nO5okoH(Hj>E2CZy!(5Y@`VjTx%)QkA(btH{<8CI>MZaZK(Zc2%=XG?HoM{&w_$NE3Yc%l3tRYu zso!3~<*Y47{%a@mTSxz7ukqx>sy0`_+g+0d#{?nF;Aw3^p-Tl%zVi?x6ZnjuCY@(j z)G`*`Y4&HFVs^4OhPQH-yeK3H_{i`6_?7v6UQt>xCW_j(+wz|5z0CY)SIvRtRcKe2 z0!tjM1dj`oxpg0tP`2u zlJ-XsQI0MftYq9;Ma+%#axO`#51$+l;oI(b%4Waa#YNcuhi31&$OLGavi>R++_j-n zDL=A}@xIW(v^|iXS8b%{DdUYy{zLJD7-uD(je< zGYag&qeJY=w^pLR>WDefmBFT3RdI8r`bbCHl0Rmb9BbKa$;HZ9pjRfzOxJo<)@Si5 z?&!EcB)`s2Fkn@|cTCq}$H?aj9HQn3(j=3ZawkL4*W$@Qc6`wSZyB?rr#OwOYBXT?%bwV5X_Co3)6ng_zG-x;EWcZ2nD=P;DQ^9&Q>ZD4 z@ZZaLv5NBL+^o1#bbiDVf$G+JX3?=OcChW8(A4EOb8*g6VOC=<*SRqd8E(GU6k(#p z{<=`j$>d1y6I1wXnc^H+KMj!ZU_z=qEy2!~o1pMAAR35};ZKQBqGgPdt`)H!PPK5K zl-(H8q6$N&j3p>(9O*dc4dv!VqZ>;BdGgeaq})!0)^g`inEN2!s-Z%T=S%NM?z*5O zHqWsCKtBenc7P8vu7R6=ML%W`YnNI8HR;=S9;pQa*%DxB<_`bBZurAP zk+|im5@MPN8v0F9fd?Rl)vo0F#3x`__ci=*q7tNpUVsa7T3LM`BhYx&6kOmr0{2Uk zVW-P}^hCiQ6?Ao?l;_&ezIYP)rxOlLmi>VC!wTS6l`C96N*z}cZM-8l0xHpoNS8MQ zyR;B&ZoL9N*fa%kH(7GIY%Y2B;3II$SdH4gbc3AJerWT@Szu1!ASfJ`a?w`a0}DoU z0j_B;{BFmPx33|IS`-gg3hGeo2Me-fs{^T2-vqsHnjo#h+2n?F1V2_I4f@<$h&r=O z$a7^2a^g-hy!L($T4irV=D*>Q2h#5r_jo6&Ek@*XmnW$)jD<;G0J?6hM<)7AAyrlL zq3oYD^r-3p4!0`7z0pJL9HtPS7D8gQ#+jtuNQF_>Zm26x2}q_6!WU*P;JK|5OaUse z`0PSxXrBc*vn)8pWjd(o3W186M$qU&44U<69lCPqGkTq<3_AC3f(afBNjic^OkNa- z9?=iicG=}N`gP1XBnB$T4 z@8*(e>nve^OBGz>VMwCvjY-7Mv2gnaWh4nx#4lGH;P}lc(ATvF75)Vz+HgL(6MPho z_}G9nrj)`A-wSADV=!FXJshs8!r3Y?g+TJa&T6RCGnxahJ zscDj1?-Ib?+bhuSMUXt}_9Vl_hv9B9MI+T2;=6txk+NUF2gV0Rg}Z}G>-^xvp4-4s zd>Vepo)6y~?}A?@B*9Z`0Qx2qhhljxX#QXZT>SPf`rgMQ`6IoFW#%~$e-XjqOjBr2!G8)4(H9F-MeL|s5c!> zwf8~iyKG5oEGCOQ7H{_h#-m3f@!vA3S5X_Gi;8dwKMA7qTfo?p z7*d64aOGPW6!hUWz8^M#>%O@&{;J=eDae{*9bR;&ZlCs?31truYa@LaU1t{JTH0?4YJfK4;z!RockQ2EDL zbnwCj5dKvOW+!a|3fn2@&^ZjTuo`7X{=`WeN0Li{DKOtD0vW8FMZ)Xt$hEPNFxVv- z&5!5+7Q<1vWhto87!rR|5279u1x4IuRP4(VXY5LhR;Pg3c{a!^ z-kwY?m`k3|QHEN%$#D1&1WU5~pyBXgn7_;s4YleJbk&eJ?6?X*hrg<7}C=G$@Ti2m|8VR5>lSKiG zCc`_zU(hG}I1ay5j_qR>!FzmPH0cr`^L*UM{#yxf_&^p)x@ZC9m({BWcc%n6FTyKCRuoV4pF_X0*4Z1(Wa}W1f^Jzr~QXuiG4OIxwaRiYzjf) z+f!jhKR{z2DHGy7hJ>#>2;0v_qI+?G9LjVhG9UdxgYE+$-nt$v(iQ`?;ZNYW?``YCc{V5>o_;oOTX9o0tmx0Q)_u_iHEZpdo z1ViUkqOP;j9?_bQK1RsF+qwW<*@{U26?gJ=g%i~Mz7%~`mCDTZE@ZKE{Q@lnXkwWz zar-!f)LpX%H@MNz{4YzC>@Zny!UBv-I0^>8CxK1TO~7H%VtD&>9eO0)j~Ss0Nmuy- zxHTvVKG}AI-SFfV_-}d=l&?I45~~!*Uqej-ZoLKrTkN2bj}e|V+Z4-8jD%Ob#~{Un zX5_xM1&NCp35RYZLfLSZ&;&%@rW}OP`?jNV&os&Cmj>j*u|&9a^dM zR`%qtJ>dR1U8rMh1N{z-1N+B}1@1*zpjSQ-+;iCrW=VlG4`k&~`!hKy>n;__ue*Yl zCE1huGB2{?h8&dt+W?EGtLi!Q5A+9hKDkg( zHwQw8WvK2sBv#mim>$>zEZ3iaN9Ws-zr}z|Y2FPoNRK->c}S<6pf zz`hEGEEnOFUDv`XS^Givt7y33PZAVcI3TIboQySeB1&@kY-BtSZqj1O7d1krjphN{x?(tY zT`O1`)&lK5tAKSkF2LBAUx1n6ZTR|;4gBYmirO3EK*H`YqWT2O<0f=Su9Ph}KMnDkec;!)t?2KtEgAb|E~$00gp-RWqFsJ0?)l}57u|v&YgZ%C z37H6W_2=CIqbB&@EX|J~? zQw{v#c5(uJISt6OP4mb!ohjgJ-zc_rq zEPTdlYi>qMCndtQaTW;WF@(G9Kt6S=z^co3sBz9=a68EWIXR`n8mDFGcZe@|n6Hj* zXoSKkw`QUbBLR7B??grn8G%~+*T8i>APrMFHVKm(k-^Z-$_=?R z7(>T|87O~=4y;`|1XHiPW+&{v3@1z3fPMYx;6~LSxbu(_%2r(uw>?ThZF3)io}vwC zdx8b*9aKf@- zd2u(HS>yJAm*)<{66H(CD191v_Jk!1rzOIJ;dV%4))cU|={tP;9)QN01`sM`JAYNF z1zx^Kpp1MyayX<1#Mcs#+4E@7ILjVx_;HwB+G7l(rU$`F{XJ;0^t?7Y-;G@GNQ4Kf zmm)VC4e;!=8VdhW58`KAqxrAW;nHgxQO4at_QgdZ{PX!g!~>_1+w*Ziy&(^Frm)1y z-jTdiJ_Os^8qm3mM>E4FcUSbLJ z#hEO||uJQvp;|{)3HjU)W{y zqu2)P8(@}x2N+H|3R{;SMcxwH!6=*jJ{XU{1la&`rm zM5$v(pwkM^07Iv!KDt}iEswD zLr21#+CmUKasn*VG6yYDTfn=H>7-s@MtTlLgEh6~z)Nz2UHp~@W(=PN=bz{!wb!|5 zxpyzxp_K&VLZtO)iH1}^jVCWEDfm)gi*ic=nXufI^cGsc&n*RLZrn_gb^sIq{{O&< z@Ftl4*_H$>g`{E75Qs{Tz6wKZOXgdQ8sdRzlMf;%t=@>*CG=PS( zGul^?1dP!VwEpfW5~wZJ;~N=Z8F3mivQg-PTFu>0MPY!txo|>YRp5Z)KrN8EN40;X;^sawE9dlmd;b|A74w3z0z^B-Rf+ zh=J;4_@^r$ZQhB3o?cCNBpUlMiFoLmBsAB=B4UoKk0_J1?W4b;}X-bd3(-wwaLZN_Eio%>=3>&H#2P zwy;!H7pfecgWPGIyIbga3py8?L`6vO_SkQ(YWA?(ETBYb=MmOHu@CP^e$-+iv zB-)-I3EIM5!uGq9K|}jyxTwtu{?j-EH{7;@L(fhidK(dq$KK>=T?AaNY=R1VtjL3Y z9`P?SljaxIL*YSfG;?YsioIF@zIkfGg=f-$^Ro9~p8W*4>#zWAQPU&OuUL>z|Kec% z?-W#>pb5QFcS`T`*T84HO_BdLTQbW55%#Aqd=hpD9e8Gmu3Wu^>=m2XrN>Nw%2Hc` z`|OBsw$%AsdmhSsR|6aToZ$O?pIFJn$3S1elF(_0SWi_2u_ogYGm|CVPA=sA+tXl1 z!eltfH4vz+(1zCwGhoV|S~S#UPS#ZbGUI^-Ec;-Ll7H~Xe!e4-k-iqphxY#ihbJ6v diff --git a/pixel_classification/classifier.index b/pixel_classification/classifier.index deleted file mode 100644 index 438b1762236fe48d5c835ed26d7eac05438f8fc8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 490 zcmZQzVB=tvV&Y(AU=no7;9wMD6=LRC!=$l+(=_7YcX1|G5q-y$#9X+9!Uj%|9LtMj zdv+#~cteE510j&aNta2ijEqA0Kyel!CJq*b2{IZ2rgsBbEtyz_U`9b?7Jy`)bg*7y zXA%M%1QFf<5@u3<7R$gS84hw0C(s;ou&Wdf$Y@+>W-*<7>cAN$Rw;#Cty1cK8vqYbX1D+V diff --git a/pixel_classification/classifier.meta b/pixel_classification/classifier.meta deleted file mode 100644 index 66466195824ecadd2d98c00f403bc5bc28a30345..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 46620 zcmdsAX^bSvRqm{=zFznAbmts9lY7i=t#@~}*1O$ZeV2Uf*6iBY?2OklV~@e|6xH1| zJ>{vcYFAay&iG&#%jOaWa~Nzca|i>%U>RW{_<;~V5Fn61An^kv4q+q^SmF;z2+6!S zGa@o0D>5q^3~PBhE02ixUcB$U7cVaO!g>D_=b;MYFYN94li=MedDqF``}jA&zaPfG zr3V3T76>@^PjSs1_D=;24Nd!9c7 z-a&YImtQWx>`rsgSg9X&Uu}|CeSW7B-kE|zn?3@Vpf8ix_;rO`&%splu-|HTJLK04 z6!&|LJ-iH%fbP)%uaJP!qaopE;8bPND~uN>CZ}g+=T6Ko`lrr5P7t4K9`ClH4Sb;@olYk+xMY}FZ4^$>g+Y|Q4Q9#$yV|s zpngHEht{;u!Sp-!P(@mU`zqNsO5v>lFL(E^b#Dz05P4W@ySv{SkS0aJ1ODi^tOpG$x0=?%vTZu^U9k2faq8f7nI*H>C29 z=?`|42WP|TW?`l>McQe6+J{ehP`uo1bX5NQ^RU=!lDg1N)KR9b-Da#No`IP=&E{d9 z2w$mdxJk2X>YHJlv!BAuFH$>CRP8w!ztY?n%1XQg@e(K&sikuOm1)X7x&}a-b*1~R zL%0mZR~ox_Oc?4dtpd+eHS{C{D+OgxI5!5DVD^=EW4C#`+um#TqNvkBRyqw6%!;u; zL9tpm=fNjEIDHjS(>=I0z(0-N-j#0e0HKJ6lD+jyfA-g!WS zcvY%jX)zQNH|F1sGJQWxU2QV@P^Gzh zMi5}~5bb^I-nl7sxO1$kdZ%&F?Bku&ut4jnqcH0IHc`rRQ^6ukvG@9wNovyi=ok5w z5A9ZeaFH0gr_oq?gqt=@UTfUVSQ~oKbhW(#6W6GJQrs{)H(}_e(aAA~tBC6%khfz5 zwhU9(TKfmxmW+j@mWK|i3NT%nBwCI-{$`lvUl5lkA}8YM_OuYn@_7SUAi~xx#5{?4 z_FJ9O#5;(|^-n;t-}-Qq|Fmkt8Q|};NR2BQY$H!`MiY(RFpK#?c$j~Bsn_lIU+N5c z-NXAgT7%nYiP82VWz)Bsjloe5O&BUIG0KgdW}6yijLF*g5y26@wu1%$ZR{a27_rv5 z0&~{}-9r&Lr~-1HcR6v^4` zb_R`Br`hu-`-2_^%t65)E1ic~{0^NOwI=E#ViTT(nQz4iy8m+PPE$0ML{*5)^5n{M zE6dLmzlf|AO4FNTV`Fdh#>P}8$IBSS8%&}rs?tv|X7u%14oHx~qS!K; z5gIJ}b2XUu{IPNo%hw9`%4Z{m7okucgKvS;dcpm{-d^)=Qqix!Ri)cr^v`=x(#yUY z=e}c|dJrd6z1j;Aj!wXAb%rR9KR*4%LU~Sl`g5;)Wnz*^ATV+6%U}QM)8#4R+p(Jl zAI0EKYd-|7PlB$%f%qU+c;%s_@!CCT!U?T*pY-a3C`Ca`^9vrlq4i-GvvA|Kub7gQ zqCBY@5;P!X8KP+tU)0SCz6hVjT*HmqOT2^2JGuc<6X=YW@msZ9U%2Rn{9&)WzT0l} z`{=J`{3-1ESD(Le8;LNxz2vK5!iV?46jIb{qVt#i8A8#}`i7Q#3(jb*SzmFn2E-B& z#HwwQYz<7%Vm*1;gZIPP2n)GcgkSXF4QAANLHi zPDCsWgA)&dMUGbel$1#0a0gBr(qiU8w~dT4!xC|gNKqLv5nathXc(8&MKPXI(55Yc zA0j5A8#NK#z+@jiU5p{joujYKzUB)UV4M0cpf;oD3jQPGVAc1M{vpRdjF zkin}K(4Q&MOOko;B0jxdIX?4%#OG!nFkmpCRawez9=KRlv3cbOBSFIqOjRe+1PtG9 zS_YJ^#WKW`Z(0T+!erv4RoFMJ0;V!Z&|p{vN|k99m?Ahe*T*LsZj$i<|R8_Hg37sq>1h9n_@G%qrC|nEv z^qn@~8oWx{_o7XFkd6aGvM-0JBd`t^7~BshK6DJ>EVs#yfmFHn3`vaC%5 zK{D?%wP+Mji$npd>9KE%5bQnfB3wi0M+1ObHy-g_H=({34rs7}jX=$a1fpLOnwN_a ze{~j4SA9KQLIN5h6m<*)2SoK-J3iimD{R=9It)Ix5(pEQp;mZjn0^&`*nkwCrL2#_ zG!qJ@Z6s|xRg0owjS&{?m=mIE>L8hS!vxZeRa4nWN>DnyV(0oKRKM4QZJw~L?~-(E z9S6@yKDPcUCS-90n`C6`{rx4Df~{lT|FBL*#mP9VuO(LlJ*|~q3UCLWcA^CXt~(<8 z;|oVLQ=39ZG*O$G6vTHOXU}FLSlYe<$$@gP|CIMIlDF!%zXPhb|++iE@ z#93zkJ_K)cs*dtU>Z2DUaoBbh>Cw*8eLI7mYNE%jX~AFi;G!Fcn9V0C{UD9}14r#3 ztk_5tZ9fh_>A|uatz_J0i4N5GbbK>#=26yG20pln>RPIx=h| zS}#5hG+NN^!#wzR&sAyYh!z{z3R0he9dlF{MH&KRg#mj|d6rAhR3)AYB{e z=T`p}lZ!6w`pGk>9GjHOEJ!I~VJp4zTO$F)v+zvygcd*~#S^LgWhDoH`73GsC5~B; zPNRED!cpv4D2{eHbQ(YEXo8rttZ-+^^9J!9Gb1`r>X?1rOXUf1gotztS)bM2RaZuE z)&H5!Tau`ddP_&@h_`e!cUlXi@|8?%8GPj*rnQWf&@wvN7*}nkexVW?1-*~)c*I03 z!b!tKz`OWb2hmR$S$AJ!voOh2%#_xx5^(%r@;{s?Pcqdp-8cKZva4`XBuV>Av@arf zTNK^)>DnZA+l>K<@$$Kdz;McKl}uB5ADjxyBvgU@*IKnx*twlJMD93%)Wu<}5ZL76P9q&mE^ zlPSRISC>M%Tw)=iHuVbAB2AV~mM%pXKb-If|Lnm_qfr`3>DXE%$U`KCy1KMRsJEZM zg)JeoA0{)e>uoaiI-G?aJn7O})s<1wPB;Kt3(0V)kLg6XmTieFOXDW~L1Bvh+5#Oq zvSv>$1`R$_fVYfBkFTt=3-ZyYAO{gqZ%0I>A=q(v(SyJ9U^9zcifY;BK=T(0qzGs@CK$7qY23<=B(bpKYywjqo3$V$B+Q`>}Grzcr*3qRn&i&^w4 z#6RYYa^0S+s+~zv;rhqeHqs`j4azAlN~}emr9pj zU*Y#Ms_Ebe7iBH&_qs>8Ko>V6@aam|VC!Z^DF)T7U)ERPZwl~omUkQ}L#$4baV~-jiH|X!`g;$aO{gz1V2TI2sQStHs!cx1OVSTN0gwBS8Gbm*lWX{|$n>u~ z!1$HT4(=~0MLNYSL-ZtYWpqU{`V>6jKR%REj4GTo2D{D*iDDu+^H5?FAUYD;!8$}mf~Tz%m^_t}6*-Rlvb)}h5SC=JD{1*fWeNKl%1}uhP4$&D zJg-XdJ`GR#PZ;2hFMsHYR;ihQ!`h{gS-y0G2bpY>p0}g@^40x!0=aA?wOYt}-P;AX zNHgLOLayEk;W!OTPtyx&nlW}*KDH{4en(&7i!)ml@Df)*>q!;-S_NSSVk!%x1->{% zOh`sA==M1qS*{q#SVIE2_mWcx8mdqSo+KnPB&wn| z)j6iWsJ_mwwfPfD^ouDvWBa5|JfpMevk@O0{m26UqD#wN=Yt*REa1P0}rn zMpaZ7oWMCmk20|86mx6JI#ku*2+k{;Q!lkTtwF2NuCqN((vD z-OVuZt!|2~?VJ;{Fy2CexeT66R_xP7vA8)K`9b&a#F+y2>2SDy$5#blY_+8l8Q3L7 zY>I6&ocJa0whc9G^En!@ppt29s-cn@Lc=08TNPXt_^#w5s=}+k^IA$(;Pq;%;L?Od z_SoohJ4P~Z(ZYG6zJO2#A%Hkl;Ln&Uh!-``*y@hoZq3pbN%{m_6-4cEs=%MwXtC8Q zVgbN-^J%j09PV(x^bTeca0+#AiB7L#vmon@ex2==D>`OzLdSjK^4i4lBXGe^(j_u5 zGK7$*y>j2^@pnBqZ>`;Db5h+U77XJg8N(>l=*-rk+w1*^M|bX{rxvJt89KrTD~-TM z+v7||FB)ivlJx}-=7nSp#E}g>2)Th>^syR_q{gFd7%_>k+krVjrvW)Zr;V7Q)k@s` zFm`o#B{&`E8wGhEgU6DHI&SR!z6TE`)X&kEMsC4yevSiyINn5e^F)=#e#AEr2EXXR zLMUMag(1m7QZv|;{}_;XOIi#BiGjtCS{p&`Xrx{cH&q(l3wAADF#YDG#0y$U8jeJ= z0aNzvj;R;4)h@lD?FBP#Krd*0m2wi_g54C(Pi|HzEiCndtXd6$nk$o^Ik__VwyP)Y zMm#A{FUX}~;E(Lx#J!+RTsA?a!=B2AtuuquVCF=t8ORfK+K92?PF5@1h&RX( zKjaZFm_XEVV~=`4d;QW2+Fyy_Z#X};wsa3aqpeH5prBI2V0QfIJ8H{1Q^6Og7Zik< z8|uPeq=0idZEQZt$iDMjg+^ zB|uTdE5R_XtY-Qq%n2F|)exez5t3M?G|}IIF*&aw&P4x@=!MwtmZDPAj7X#Dxxhpg z9gPq+nb{qNU&1gt6pEo6bZcI2K*E^P;g}Fwl9y~eu;wI5;#RNc$(E$) z4M(?pUKoyr+B_N#a*9VW?p%(9xlw?tYsv1D5wqdiz|klTTb!qt#U8Gj2}&a7XNl3^ z!Y~v=5Q#>^Carj^X4scC$vwdb&}MK7A~u8Fx6zTnxl|!|4J`&MCbh-LMaa z_98BGiF?(W0MFk3F&+qTJ@G;)5JM?KLW3l!CAFumvEn^VLK9-bS?F$@u@G%4TiRA} zQWbbY4>c6+W~0}^bC&8oJdHE-B4(Er{_^oR={DVx(#8!OfEG*#PwWM{k6ft*%I)K# zooe^-k9WfyrH~)*YNzqlKYpg-#`ISTaE_LFbVyG%CAl4WV(pB)qzB1}9lx_V= zo45RPkGo5hF%D7Ch0avUn&}UvTrq_5jVWc!VDCfs7Y0QEZ z2~B)T)*Mq-PS(8o3up2f`l{Gg^O2&I#v?~nCQIXsEgbUagq^&}dxJb77A%Sz7z=W8 zf!?Brum|`)(m?ur{2{*2ZfNi0E%81dcoH`jZ}J=J`{b5<-z;DHU-1syB)8c6r*5j# zX0)3^r`b(-1)z^-`7UXRTy3f4ls{IRIOy&*+iDm#WkvU2<2++pBs(K`Nj-vCgn77c z1P^XM>D4~&O;6Fz?tBdo52^`ASqmK{-tXdlCIYkI9*gLjGzfjq;BT6;=6bW(W^V6$eiHuuz%zwEr-v@&ILd zH)$6GRRD806%h$XHHG7m;w-pRu;`zp6PxLaY~;=6AijoqKl&QZ-Dc-($7yxx#1fu@ ziu(XpFTL`f>T>!2u#%(ixL`1BQFN2(=2j;|6IJDX!3QM2M^8M?eyrK>a|UJWlCsd*M`&Bmt^On?!IavDq>Xq#vne-{%8Dg8^SnKBh>fOXo02l= zlTGqA^CcmPS@>y#ye&yy7^y|%x0O_6zQjjbc3Toi;ulkN+vzE@0oaz36Gi1Z%TW_@6vcImx0>M0sv4x6aCs=t}Us*P9 zte7_{=8aXO%!={be7r$#BT!n=f zdi$3f_sAB9%}&47?TGGS^qeMR2W5*^!9AE{Wuif#W#eLaMsN_RjHqDuXmEHms1NQR zHgVu!#%a^v*sZnkTw9;jh$)Kv`1v@=Z&F5RY{^*C4=%((7yd$6Z@#Qufr*zsbku13 z>`x#>D=Bjm8`qE-OsW(wHE?AH{WoBRx)p(&n!nf|urVIIzSi3%*8uN^DcVIvAti2y zrLF%;ItP)Q_+7l*Y;=73Z@@?umDrLyWN>)+F6?j_X1G!!y;48To!T5%vlbQ2tArXL z*&mdfp zSF3J-keIp-lLS*(Bv(~lgIh5}aU}<}wcO`8%}aV!F9ohXfamoht_Y#zZ@|e8IJPQo z7_uQy#TK3*A#6X#mMf%0M$A*NwU(n6hS(x~OaogLi7lndSf7(sD;2KRbL2|F7U`fS zVoRwvD;KdM!JvX;3pG~9)|$lDx@ccFe~;o~N?OseMbCl}E+{q5LZ7uNY;|tfLP8a6 zRRp$H^TXC^uGrehQ42$CkxYbvtyPIFrOJF2F!WoSIdY|7i=;6Uv8B|TFKZH8sIfY> zHYBz--GC7BTdO*@9NphgzqOV-wvbQ-TdM+FYc`MMhB?)5twpevzHb$3;Z}}X7-EYg zs|;+dNo*-q=Bt1qSKB#qrC^JsxDv6Y)SEAB5?iRTI<~eXwzf0+tu-B6Ys7Qr;J4Ou z#}*Q*U~5faYdt?~tw*qxJ|=t*oMK9XNt_(j@O7MtEX$R8D7#$iL0ql*Dxtzzo=FTm zjYo1u5}~mOliVpa=gZx75gMZ6GKfj4mIHaYk`ZL^>@0@+6HAVWeGkgW@lZMZoq z8x&Oswh;kYdIzScqLN||!pNM}P(c=x*VqU_rqmkS)5xlupm3&S7)Ity1(`LqAwj0p zoG*70WT?0X$SNVoRx^TZLkHPLP9WP9($B32hJ-30+Ylhz%nxLn`GQQzH3(tk0KK!$`WAlnik+s+SU+YykB z)F)H24?-BZfGj5Eu^oa;sWo3URKi6i`7knPD#)x^4+%1*=2%mZmF3Wrq2d}K+X_Lp zoiVAnt%J<57+X^DDopc1=4B!Nkx_P&kA@+o3d*(vS<`4RD;1`L;6<3DGfyb-)n)%Y z{k0pJD_&jZle(0dEssvDk3Ij;+qMd&uX=E5&}+0hjh%KAw}S1p$VK1(6VLx6JVgbH zH*RCubm|&gWI-;;lJV+tz8m1|m_3oiR4S1M2S+`R9k13c5dH$y< z{~xFR|L}|`JfSRz!B&ac{Wj3 Date: Mon, 4 Mar 2019 17:29:31 -0700 Subject: [PATCH 46/89] Working on custom objective function. Finished subtracting pixel-wise mean from all bands. --- .../compose_array_single_shapefile.py | 4 +- fully-conv-classification/data_generators.py | 64 ++++---- fully-conv-classification/data_utils.py | 149 ++++++++++++++---- fully-conv-classification/fully_conv.py | 110 ++++--------- fully-conv-classification/models.py | 6 +- fully-conv-classification/path_map.pkl | Bin 6 -> 0 bytes fully-conv-classification/prepare_images.py | 3 +- .../runner_from_shapefile.py | 135 +++++++++++----- fully-conv-classification/runspec.py | 1 + 9 files changed, 277 insertions(+), 195 deletions(-) delete mode 100644 fully-conv-classification/path_map.pkl diff --git a/fully-conv-classification/compose_array_single_shapefile.py b/fully-conv-classification/compose_array_single_shapefile.py index 4ce57ef..cdf2c4d 100644 --- a/fully-conv-classification/compose_array_single_shapefile.py +++ b/fully-conv-classification/compose_array_single_shapefile.py @@ -205,7 +205,9 @@ def tile_geometry(self): return wrs_meta class PTASingleShapefile: - + # TODO: This class can be easily modified + # for the extraction of pixel-wise data from landsat images + # on a large scale. def __init__(self, master_raster=None, shapefile_path=None, class_code=None, path=None, row=None, masked_raster=None, training_directory=None, paths_map=None, masks=None, instances=None, sz=1000, overwrite_points=None, kernel_size=None, data_filename=None): diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 8d3c204..819da37 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -107,7 +107,7 @@ def set_class_mask(self, class_mask): self.dict['class_mask'] = class_mask -def create_training_data(target_dict, shapefile_directory, image_directory, training_directory, +def extract_training_data(target_dict, shapefile_directory, image_directory, training_directory, count, save=True): ''' target_dict: {filename or string in filename : class_code} ''' done = set() @@ -127,7 +127,6 @@ def create_training_data(target_dict, shapefile_directory, image_directory, trai suffix = '{}_{}_{}.tif'.format(p, r, year) master_raster = os.path.join(image_directory, train_raster + suffix) mask_file = os.path.join(image_directory, mask_raster + suffix) # for rasterio.mask.mask - # this file is projected the same as the shapefile. masks = [] all_matches.append(f) shp = None @@ -201,54 +200,48 @@ def generate_training_data(training_directory, max_pools, random_sample=True, tr in subdirectories labeled class_n_train and that n_classes is a global variable.''' class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] + # ADD if statement in class dirs. generators = [] for d in class_dirs: generators.append(DataGen(d)) - # TODO: Apply image augmentation. + q = 0 while True: min_samples = np.inf data = [] for gen in generators: out = gen.next().copy() data.append(out) - n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) - if n_samples < min_samples: - min_samples = n_samples + if random_sample: + n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + if n_samples < min_samples: + min_samples = n_samples + for subset in data: if random_sample: samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, - fill_value=1) + fill_value=subset['class_code']) else: samp = subset['class_mask'] - samp[samp != NO_DATA] = 1 - one_hot = np.ones((NUM_CLASSES, samp.shape[1], samp.shape[2]))*NO_DATA - one_hot[int(subset['class_code']), :, :] = samp - for i in range(NUM_CLASSES): - if i != int(subset['class_code']): - one_hot[i, :, :][samp[0, :, :] != NO_DATA] = 0 - subset['class_mask'] = one_hot + samp[samp != NO_DATA] = subset['class_code'] + + subset['class_mask'] = samp masters = [] masks = [] + first = True for subset in data: master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) - masters.append(master) - masks.append(mask) + if first: + shape = master.shape + first = False + if master.shape == shape: + masters.append(master[0, :, :, :]) + masks.append(mask[0, :, :, :]) - if train: - augmented_masters = [] - augmented_masks = [] - for master, mask in zip(masters, masks): - ms, msk = augment_data(master, mask) - augmented_masters.append(ms) - augmented_masks.append(msk) - masters += augmented_masters - masks += augmented_masks - - # Shuffle order of data here? - for ms, msk in zip(masters, masks): - yield ms, msk + yield np.asarray(masters, dtype=np.float32), np.asarray(masks, dtype=np.int32) + # for ms, msk in zip(masters, masks): + # yield ms, msk def rotation(image, angle): @@ -318,7 +311,8 @@ def preprocess_data(master, mask, max_pools, return_cuts=False): if __name__ == '__main__': shapefile_directory = 'shapefile_data/' - image_directory = 'master_rasters/' + image_train = 'master_rasters/train/' + image_test = 'master_rasters/test/' irr1 = 'Huntley' irr2 = 'Sun_River' fallow = 'Fallow' @@ -329,12 +323,12 @@ def preprocess_data(master, mask, max_pools, return_cuts=False): train_dir = 'training_data/multiclass/train/' shp_train = 'shapefile_data/train/' count = 0 - save = False - count, pixel_dict = create_training_data(target_dict, shp_train, image_directory, train_dir, - count, save=save) + save = True + count, pixel_dict = extract_training_data(target_dict, shp_train, image_train, train_dir, + count, save=save) + # Need to parallelize the extraction of training data. print("You have {} instances per training epoch.".format(count)) print("And {} instances in each class.".format(pixel_dict)) - max_weight = max(pixel_dict.values()) for key in pixel_dict: print(key, max_weight / pixel_dict[key]) @@ -342,7 +336,7 @@ def preprocess_data(master, mask, max_pools, return_cuts=False): test_dir = 'training_data/multiclass/test/' shp_test = 'shapefile_data/test/' count = 0 - count, pixel_dict = create_training_data(target_dict, shp_test, image_directory, test_dir, + count, pixel_dict = extract_training_data(target_dict, shp_test, image_test, test_dir, count, save=save) print("You have {} instances per test epoch.".format(count)) print("And {} instances in each class.".format(pixel_dict)) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index a797517..ba5007b 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -15,14 +15,14 @@ from sklearn.neighbors import KDTree from sat_image.warped_vrt import warp_single_image -NO_DATA = -1 def get_features(gdf): tmp = json.loads(gdf.to_json()) features = [feature['geometry'] for feature in tmp['features']] return features -def generate_class_mask(shapefile, master_raster): + +def generate_class_mask(shapefile, master_raster, no_data=-1): ''' Generates a mask with class_val everywhere shapefile data is present and a no_data value everywhere else. no_data is -1 in this case, as it is never a valid class label. @@ -33,11 +33,15 @@ def generate_class_mask(shapefile, master_raster): with rasopen(master_raster, 'r') as src: shp = shp.to_crs(src.crs) features = get_features(shp) - out_image, out_transform = mask(src, shapes=features, nodata=NO_DATA) + out_image, out_transform = mask(src, shapes=features, nodata=no_data) return out_image -def create_master_raster(image_stack, path, row, year, raster_directory): +def create_master_raster(paths_map, path, row, year, raster_directory, mean_map): + """ Creates a master raster with depth given by the organization of the + paths_map. Paths map is a dictionary of lists, with keys the band names + (B1, B2...) and values the paths of the images in the filesystem + corresponding to that band. """ fname = "master_raster_{}_{}_{}.tif".format(path, row, year) pth = os.path.join(raster_directory, fname) mask_fname = "class_mask_{}_{}_{}.tif".format(path, row, year) @@ -45,7 +49,7 @@ def create_master_raster(image_stack, path, row, year, raster_directory): if os.path.isfile(pth): print("Master raster already created for {}_{}_{}.".format(path, row, year)) if os.path.isfile(mask_path): - print('Class mask template already created') + print('Class mask template already created for {}_{}_{}'.format(path, row, year)) return pth else: print("Creating class mask template.") @@ -61,47 +65,65 @@ def create_master_raster(image_stack, path, row, year, raster_directory): msk.write(out, 1) return pth - paths_map = image_stack.paths_map first = True stack = None - - for i, feat in enumerate(paths_map.keys()): # ordered dict ensures accuracy here. - - feature_raster = paths_map[feat] # maps bands to their location in filesystem. - - with rasopen(feature_raster, mode='r') as src: - arr = src.read() - raster_geo = src.meta.copy() - - if first: - first_geo = raster_geo.copy() - empty = zeros((len(paths_map.keys()), arr.shape[1], arr.shape[2]), float32) - stack = empty - stack[i, :, :] = arr - first = False - else: - try: - stack[i, :, :] = arr - except ValueError: - # error can be thrown here if source raster doesn't have crs - # OR ! Because rasterio version. - # However, deepcopy becomes an issue with the latest - # version of rasterio. - arr = warp_single_image(feature_raster, first_geo) - stack[i, :, :] = arr - - first_geo.update(count=1) - msk_out = zeros((1, stack.shape[1], stack.shape[2])).astype(float32) + num_rasters = 0 + for k in paths_map: + num_rasters += len(paths_map[k]) + + j = 0 + for feat in paths_map.keys(): + feature_rasters = paths_map[feat] # maps bands to their location in filesystem. + for feature_raster in feature_rasters: + band_mean = None + for band in mean_map: + if feature_raster.endswith(band): + band_mean = mean_map[band] + + if band_mean is None: + print("Band mean not found in mean_mapping for {}".format(feature_raster)) + return + + with rasopen(feature_raster, mode='r') as src: + arr = src.read().astype(type(band_mean)) + arr -= band_mean + raster_geo = src.meta.copy() + + if first: + first_geo = raster_geo.copy() + empty = zeros((num_rasters, arr.shape[1], arr.shape[2]), float32) + stack = empty + stack[j, :, :] = arr + j += 1 + first = False + else: + try: + stack[j, :, :] = arr + j += 1 + except ValueError: + # error can be thrown here if source raster doesn't have crs + # OR ! Because rasterio version. + # However, deepcopy becomes an issue with the latest + # version of rasterio. + arr = warp_single_image(feature_raster, first_geo) + stack[j, :, :] = arr + j += 1 + + msk_out = zeros((1, stack.shape[1], stack.shape[2])) + first_geo.update(count=1, dtype=msk_out.dtype) with rasopen(mask_path, mode='w', **first_geo) as msk: msk.write(msk_out) - first_geo.update(count=len(paths_map.keys())) + first_geo.update(count=num_rasters, dtype=stack.dtype) with rasopen(pth, mode='w', **first_geo) as dst: dst.write(stack) + print("Master raster saved to {}.".format(pth)) + return pth + def get_shapefile_lat_lon(shapefile): ''' Center of shapefile''' with fopen(shapefile, "r") as src: @@ -111,6 +133,7 @@ def get_shapefile_lat_lon(shapefile): return latc, lonc + def normalize_and_save_image(fname): norm = True with rasopen(fname, 'r') as rsrc: @@ -130,6 +153,31 @@ def normalize_and_save_image(fname): print("Normalized", fname) dst.update_tags(normalized=True) + +def raster_sum(raster): + with rasopen(raster, 'r') as src: + arr_masked = src.read(1, masked=True) # get rid of nodata values, + s = arr_masked.sum() + count = arr_masked.count() + return s, count + + +def bandwise_mean(paths_list, band_name): + ''' Need to center the data to have + a zero mean. This means iterating over all images, + and taking the "band-wise" mean, then subtracting + that mean from the band. This mean should + also only be computed for the test set, but applied + to the training set. ''' + n_pixels = 0 + pixel_value_sum = 0 + for filepath in paths_list: + p_sum, num_pix = raster_sum(filepath) + pixel_value_sum += p_sum + n_pixels += num_pix + return (pixel_value_sum / n_pixels, band_name) + + def download_images(project_directory, path, row, year, satellite=8, n_landsat=3): image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, @@ -295,6 +343,7 @@ def split_shapefile(base, base_shapefile, data_directory): for feat in unique[key]: dst.write(id_mapping[feat]) + def get_shapefile_path_row(shapefile): """This function assumes that the original shapefile has already been split, and relies on @@ -303,6 +352,36 @@ def get_shapefile_path_row(shapefile): # TODO: Find some way to update shapefile metadata shp = shapefile[-9:-4].split("_") return int(shp[0]), int(shp[1]) + + +def shapefile_area(shapefile): + summ = 0 + with fopen(shapefile, "r") as src: + for feat in src: + poly = shape(feat['geometry']) + summ += poly.area + return summ + + +def get_total_area(data_directory, filenames): + ''' Gets the total area of the polygons + in the files in filenames + TODO: Get an equal-area projection''' + + tot = 0 + for f in glob.glob(data_directory + "*.shp"): + if "sample" not in f: + for f2 in filenames: + if f2 in f: + tot += shapefile_area(f) + return tot + + +def required_points(shapefile, total_area, total_instances): + area = shapefile_area(shapefile) + frac = area / total_area + return int(total_instances * frac) + if __name__ == "__main__": pass diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 3816acc..26c28e5 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -25,36 +25,6 @@ NUM_CLASSES = 4 WRS2 = '../spatial_data/wrs2_descending_usa.shp' -def m_acc(y_true, y_pred): - ''' Calculate accuracy from masked data. - The built-in accuracy metric uses all data (masked & unmasked).''' - y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) - y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) - masked = tf.not_equal(y_true, NO_DATA) - indices = tf.where(masked) - indices = tf.to_int32(indices) - indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) - y_true_masked = tf.gather_nd(params=y_true, indices=indices) - y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) - return K.cast(K.equal(K.argmax(y_true_masked, axis=-1), K.argmax(y_pred_masked, axis=-1)), K.floatx()) - -def custom_objective_binary(y_true, y_pred): - '''I want to mask all values that - are not data, given a y_true - that has NODATA values. The boolean mask - operation is failing. It should output - a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) - tensor.''' - y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) - y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) - masked = tf.not_equal(y_true, NO_DATA) - indices = tf.where(masked) - indices = tf.to_int32(indices) - indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) - y_true_masked = tf.gather_nd(params=y_true, indices=indices) - y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) - return tf.keras.losses.binary_crossentropy(y_true_masked, y_pred_masked) - def custom_objective(y_true, y_pred): '''I want to mask all values that are not data, given a y_true @@ -62,15 +32,13 @@ def custom_objective(y_true, y_pred): operation is failing. It should output a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) tensor.''' - y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) - y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) - masked = tf.not_equal(y_true, NO_DATA) - indices = tf.where(masked) - indices = tf.to_int32(indices) - indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) - y_true_masked = tf.gather_nd(params=y_true, indices=indices) - y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) - return tf.keras.losses.categorical_crossentropy(y_true_masked, y_pred_masked) + y_true = tf.reshape(y_true, (K.shape(y_true)[0], K.shape(y_true)[1]*K.shape(y_true)[2])) + y_pred = tf.reshape(y_pred, (K.shape(y_pred)[0], K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) + y_true = tf.cast(y_true, tf.int32) + losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true) + mask = tf.not_equal(y_true, NO_DATA) + losses = tf.boolean_mask(losses, mask) + return tf.reduce_mean(losses) def evaluate_image(master_raster, model, max_pools, outfile=None, ii=None): @@ -153,10 +121,11 @@ def clip_rasters(evaluated_tif_dir, include_string): row = out[3:5] clip_raster(f, int(path), int(row), outfile=f) -def evaluate_images(image_directory, include_string, max_pools, exclude_string, prefix, save_dir): +def evaluate_images(image_directory, model, include_string, max_pools, exclude_string, prefix, save_dir): ii = 0 for f in glob(os.path.join(image_directory, "*.tif")): if exclude_string not in f and include_string in f: + print(f) out = os.path.basename(f) os.path.split(out)[1] out = out[out.find("_"):] @@ -213,12 +182,12 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo model = model(NUM_CLASSES) if NUM_CLASSES <= 2: model.compile(loss=custom_objective_binary, - metrics=[m_acc], + metrics=['accuracy'], optimizer='adam') else: model.compile(loss=custom_objective, - metrics=['accuracy', m_acc], - optimizer=tf.keras.optimizers.Adam(lr=0.0001)) + metrics=['accuracy'], + optimizer=tf.keras.optimizers.Adam(lr=1e-6)) tb = TensorBoard(log_dir='graphs/30epochssimple/') train = os.path.join(training_directory, 'train') @@ -229,51 +198,34 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo train=False, box_size=box_size) model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, + max_queue_size=2, epochs=epochs, verbose=1, - callbacks=[tb], class_weight=[31.0, 1, 2.16, 67.76], - use_multiprocessing=True) - #validation_data=test_generator, - #validation_steps=valid_steps, + use_multiprocessing=False) return model if __name__ == '__main__': - image_directory = 'master_rasters/' + image_directory = 'master_rasters/test/' training_directory = 'training_data/multiclass/' - m_dir = 'eval_test/multiclass/' + m_dir = 'compare_model_outputs/multiclass/' - #get_iou() - # models = [fcnn_functional, fcnn_functional_small, fcnn_model] - # save_dirs = [os.path.join(m_dir, "complex_fcnn"), os.path.join(m_dir, "simple_fcnn"), - # os.path.join(m_dir, 'no_pools')] - # model_names = ["multiclass_complex_fcnn.h5", 'multiclass_simple_fcnn.h5', - # 'multiclass_no_pools_fcnn.h5'] - # raster_names = ["complex_fcnnmulticlass", "simple_fcnnmulticlass", "no_poolsmulticlass"] - - models = [fcnn_functional] - save_dirs = [os.path.join(m_dir, "augmented/")] - model_names = ["complex_fcnn_augmented.h5"] - raster_names = ["class_weightscomplexaugmented"] - i = 1 max_pools = 5 - for model_func, save_dir, model_name, raster_name in zip(models, save_dirs, model_names, raster_names): - pth = os.path.join(save_dir, model_name) - if not os.path.isfile(pth): - model = train_model(training_directory, model_func, steps_per_epoch=764, - valid_steps=246, max_pools=max_pools, epochs=5) - model.save(pth) - else: - model = tf.keras.models.load_model(pth, - custom_objects={'m_acc':m_acc, 'custom_objective':custom_objective}) + model_name = 'tst.h5' + save_dir = 'models/' + pth = os.path.join(save_dir, model_name) + model_func = fcnn_model + if not os.path.isfile(pth): + model = train_model(training_directory, model_func, steps_per_epoch=764, + valid_steps=246, max_pools=max_pools, epochs=1) + model.save(pth) + else: + model = tf.keras.models.load_model(pth, + custom_objects={'custom_objective':custom_objective}) - evaluate_images(image_directory, include_string="37_28", - exclude_string="class", max_pools=max_pools, prefix=raster_name, save_dir=save_dir) - clip_rasters(save_dir, "37_28") - if i == 2: - max_pools = 3 - if i == 3: - max_pools = 0 - i += 1 + raster_name = 'doyouwork_' + evaluate_images(image_directory, model, include_string="37_28", + exclude_string="class", max_pools=max_pools, prefix=raster_name, save_dir=save_dir) + clip_rasters(save_dir, "37_28") diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 11cbac7..f4abb3d 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -15,7 +15,7 @@ def fcnn_model(n_classes): model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) model.add(tf.keras.layers.Dropout(0.5)) model.add(tf.keras.layers.Conv2D(filters=n_classes, kernel_size=2, padding='same', - activation='softmax')) # 1x1 convolutions for pixel-wise prediciton. + activation='relu')) # 1x1 convolutions for pixel-wise prediciton. # Take a look at the model summary #model.summary() return model @@ -59,7 +59,7 @@ def fcnn_functional_small(n_classes): u3_c1 = Concatenate()([u3, c1]) - c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u3_c1) + c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='relu', padding='same')(u3_c1) model = Model(inputs=x, outputs=c5) #model.summary() @@ -147,7 +147,7 @@ def fcnn_functional(n_classes): u5_c1 = Concatenate()([u5, c1]) - u6 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='softmax', padding='same')(u5_c1) + u6 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='relu', padding='same')(u5_c1) model = Model(inputs=x, outputs=u6) #model.summary() diff --git a/fully-conv-classification/path_map.pkl b/fully-conv-classification/path_map.pkl deleted file mode 100644 index 482c0e338367d0aeb4ddc43602ac727276f77052..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6 NcmZo*jxA)+0{{jJ0e}Di diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index dba7c95..eb614ae 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -20,7 +20,7 @@ abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(abspath) -from numpy import mean, datetime64 +from numpy import datetime64 from collections import OrderedDict from datetime import datetime from landsat.google_download import GoogleDownload @@ -206,7 +206,6 @@ def get_terrain(self): dem.terrain(attribute='aspect', out_file=aspect_name, save_and_return=True) elev = dem.terrain(attribute='elevation') - elev = elev - mean(elev) dem.save(elev, geometry=dem.target_profile, output_filename=dif_elev) def get_et(self): diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 49cf263..1848960 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -1,13 +1,15 @@ -import warnings import os import glob import pickle -from multiprocessing import Pool +from multiprocessing import Pool from numpy import save as nsave from compose_array_single_shapefile import PTASingleShapefile, ShapefileSamplePoints from fiona import open as fopen +from collections import defaultdict from shapely.geometry import shape -from data_utils import download_images, get_shapefile_path_row, split_shapefile, create_master_raster, filter_shapefile +from data_utils import (download_images, get_shapefile_path_row, split_shapefile, + create_master_raster, filter_shapefile, bandwise_mean) +from runspec import landsat_rasters, static_rasters def download_images_over_shapefile(shapefile, image_directory, year, master_raster_directory): @@ -27,10 +29,9 @@ def download_images_over_shapefile(shapefile, image_directory, year, master_rast else: ims = download_images(landsat_dir, p, r, year, satellite) - ms = create_master_raster(ims, p, r, year, master_raster_directory) - return ims + def download_from_pr(p, r, image_directory, year, master_raster_directory): '''Downloads p/r corresponding to the location of the shapefile, and creates master raster''' @@ -45,40 +46,14 @@ def download_from_pr(p, r, image_directory, year, master_raster_directory): else: ims = download_images(landsat_dir, p, r, year, satellite) - ms = create_master_raster(ims, p, r, year, master_raster_directory) - return ims + def sample_points_from_shapefile(shapefile_path, instances): ssp = ShapefileSamplePoints(shapefile_path, m_instances=instances) ssp.create_sample_points(save_points=True) return ssp.outfile -def shapefile_area(shapefile): - summ = 0 - with fopen(shapefile, "r") as src: - for feat in src: - poly = shape(feat['geometry']) - summ += poly.area - return summ - -def get_total_area(data_directory, filenames): - ''' Gets the total area of the polygons - in the files in filenames - TODO: Get an equal-area projection''' - - tot = 0 - for f in glob.glob(data_directory + "*.shp"): - if "sample" not in f: - for f2 in filenames: - if f2 in f: - tot += shapefile_area(f) - return tot - -def required_points(shapefile, total_area, total_instances): - area = shapefile_area(shapefile) - frac = area / total_area - return int(total_instances * frac) def split_shapefiles_multiproc(f): data_directory = 'split_shapefiles_west/' @@ -86,6 +61,7 @@ def split_shapefiles_multiproc(f): fname = os.path.basename(f) split_shapefile(shp_dir, fname, data_directory) + def download_all_images(image_directory, shapefile_directory, year=2013): ''' Downloads all images over each shapefile in shapefile directory ''' @@ -99,12 +75,76 @@ def download_all_images(image_directory, shapefile_directory, year=2013): if t not in done: done.add(t) ims = download_images_over_shapefile(f, image_directory, year, master) - all_paths.append(ims.paths_map) - with open("path_map.pkl", 'wb') as f: - pickle.dump(all_paths, f) + print("Done downloading images for {}. Make sure there were no 503 codes returned".format(shapefile_directory)) + + +def all_rasters(image_directory, satellite=8): + ''' Recursively get all rasters in image_directory + and its subdirectories, and adds them to band_map. ''' + band_map = defaultdict(list) + for band in landsat_rasters()[satellite]: + band_map[band] = [] + for band in static_rasters(): + band_map[band] = [] + + extensions = (".tif", ".TIF") + for dirpath, dirnames, filenames in os.walk(image_directory): + for f in filenames: + if any(ext in f for ext in extensions): + for band in band_map: + if f.endswith(band): + band_map[band].append(os.path.join(dirpath, f)) + return band_map + +def raster_means(image_directory, satellite=8): + """ Gets all means of all images stored + in image_directory and its subdirectories. + Images end with (.tif, .TIF) + Image_directory in a typical case would + be image_data/train/ """ + + outfile = os.path.join(image_directory, "mean_mapping.pkl") + if os.path.isfile(outfile): + with open(outfile, 'rb') as f: + mean_mapping = pickle.load(f) + return mean_mapping + + band_map = all_rasters(image_directory, satellite) + mean_mapping = {} + + paths_for_multiproc = [] + bands_for_multiproc = [] + for band in band_map: + bands_for_multiproc.append(band) + paths_for_multiproc.append(band_map[band]) + + with Pool() as pool: + means = pool.starmap(bandwise_mean, zip(paths_for_multiproc, bands_for_multiproc)) + + for mean, band in means: + mean_mapping[band] = mean + + with open(outfile, 'wb') as f: + pickle.dump(mean_mapping, f) + + return mean_mapping + - return all_paths +def create_all_master_rasters(image_directory, raster_save_directory, mean_mapping): + """ Creates a master raster for all images in image_directory. + Image directory is assumed to be a top-level directory that contains + all the path_row directories for test or train (image_data/test/path_row_year*/) + Image directory is image_data/test/ in this case.""" + dirs = os.listdir(image_directory) + for sub_dir in dirs: + out = os.path.join(image_directory, sub_dir) + if os.path.isdir(out): + paths_map = all_rasters(out) + path = sub_dir[:2] + row = sub_dir[3:5] + year = sub_dir[-4:] + create_master_raster(paths_map, path, row, year, raster_save_directory, mean_mapping) if __name__ == "__main__": @@ -112,9 +152,24 @@ def download_all_images(image_directory, shapefile_directory, year=2013): # shp = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/" # for f in glob.glob(shp + "*.shp"): # filter_shapefile(f, out_shapefile_directory) - - image_directory = 'image_data/' + # This project is becoming more complicated. + # Needs a test / train organization! + # 1. Filter shapefiles. + # 2. Download images over shapefiles + # 3. Create master rasters + # 4. Extract training data + # 5. Train network. + + image_train_directory = 'image_data/train' + image_test_directory = 'image_data/test' + image_dirs = [image_train_directory, image_test_directory] shapefile_directory = 'shapefile_data/all_shapefiles' - master = 'master_rasters/' + master_train = 'master_rasters/train' + master_test = 'master_rasters/test' + master_dirs = [master_train, master_test] year = 2013 - all_paths = download_all_images(image_directory, shapefile_directory, year) + #download_all_images(image_directory, shapefile_directory, year) + satellite = 8 + for im_dir, mas_dir in zip(image_dirs, master_dirs): + mean_map = raster_means(im_dir) + create_all_master_rasters(im_dir, mas_dir, mean_map) diff --git a/fully-conv-classification/runspec.py b/fully-conv-classification/runspec.py index d18aabf..e9e8f96 100644 --- a/fully-conv-classification/runspec.py +++ b/fully-conv-classification/runspec.py @@ -35,6 +35,7 @@ def landsat_rasters(): 'B7.TIF', 'B8.TIF', 'B9.TIF', 'B10.TIF', 'B11.TIF')} return b + def climate_rasters(root): return [f for f in glob(os.path.join(root, "*.tif")) if 'precip' in f] From 126f84c76d3b93c6305d014ee2620c79fafa445b Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 8 Mar 2019 15:14:56 -0700 Subject: [PATCH 47/89] Added multiclass support, ability to take means of multiple images, and included cloud/water fmask in selection of training data --- fully-conv-classification/data_generators.py | 112 ++++++++++++------ fully-conv-classification/data_utils.py | 16 ++- fully-conv-classification/fully_conv.py | 102 ++++++++++------ fully-conv-classification/models.py | 82 ++++++++++--- fully-conv-classification/prepare_images.py | 2 +- .../runner_from_shapefile.py | 44 ++++--- 6 files changed, 243 insertions(+), 115 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 819da37..dee1d6e 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -7,10 +7,12 @@ from glob import glob from random import sample, shuffle from sklearn.utils.class_weight import compute_class_weight +from runspec import mask_rasters from data_utils import generate_class_mask, get_shapefile_path_row from rasterio import open as rasopen from warnings import warn from skimage import transform +from sat_image.warped_vrt import warp_single_image NO_DATA = -1 CHUNK_SIZE = 608 # some value that is evenly divisible by 2^MAX_POOLS. @@ -107,8 +109,36 @@ def set_class_mask(self, class_mask): self.dict['class_mask'] = class_mask -def extract_training_data(target_dict, shapefile_directory, image_directory, training_directory, - count, save=True): +def get_masks(image_directory): + ''' Returns all masks in the image directory.''' + paths = [] + for dirpath, dirnames, filenames in os.walk(image_directory): + for f in filenames: + for suffix in mask_rasters(): + if f.endswith(suffix): + paths.append(os.path.join(dirpath, f)) + out = None + first_geo = None + n_masks = len(paths) + first = True + for mask_file in paths: + mask, meta = load_raster(mask_file) + # mask value here is 1. + if first: + first = False + first_geo = meta.copy() + out = np.zeros((mask.shape[1], mask.shape[2])) + try: + out[mask[0] == 1] = 1 # 0 index is for removing the (1, n, m) dimension. + except ValueError as e: + print(e) + mask = warp_single_image(mask_file, first_geo) + out[mask[0] == 1] = 1 + return out + + +def extract_training_data(target_dict, shapefile_directory, image_directory, + master_raster_directory, training_directory, count, save=True): ''' target_dict: {filename or string in filename : class_code} ''' done = set() pixel_dict = {} # counts number of pixels present in each class. @@ -125,13 +155,15 @@ def extract_training_data(target_dict, shapefile_directory, image_directory, tra done.add(match) p, r = get_shapefile_path_row(f) suffix = '{}_{}_{}.tif'.format(p, r, year) - master_raster = os.path.join(image_directory, train_raster + suffix) - mask_file = os.path.join(image_directory, mask_raster + suffix) # for rasterio.mask.mask - masks = [] + fmask = get_masks(os.path.join(image_directory, suffix[:-4])) + master_raster = os.path.join(master_raster_directory, train_raster + suffix) + mask_file = os.path.join(master_raster_directory, mask_raster + suffix) # for rasterio.mask.mask + masks = [] # these are class masks for the labelling of data. all_matches.append(f) shp = None for match in all_matches: msk = generate_class_mask(match, mask_file) + msk[0][fmask == 1] = NO_DATA shp = msk.shape cc = assign_class_code(target_dict, match) if cc is not None: @@ -145,14 +177,15 @@ def extract_training_data(target_dict, shapefile_directory, image_directory, tra for i in range(0, master.shape[1], CHUNK_SIZE): for j in range(0, master.shape[2], CHUNK_SIZE): sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - for msk in masks: - s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - if not np.all(s == NO_DATA): - pixel_dict[msk.class_code] += len(np.where(s != NO_DATA)[0]) - count += 1 - if save: - dt = DataTile(sub_master, s, msk.class_code) - dt.to_pickle(training_directory) + if sub_master.shape[1] == CHUNK_SIZE and sub_master.shape[2] == CHUNK_SIZE: + for msk in masks: + s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + if not np.all(s == NO_DATA): + pixel_dict[msk.class_code] += len(np.where(s != NO_DATA)[0]) + count += 1 + if save: + dt = DataTile(sub_master, s, msk.class_code) + dt.to_pickle(training_directory) return count, pixel_dict @@ -195,7 +228,7 @@ def _from_pickle(self, filename): return data -def generate_training_data(training_directory, max_pools, random_sample=True, train=True, box_size=0): +def generate_training_data(training_directory, max_pools, sample_random=True, box_size=0): ''' Assumes data is stored in training_directory in subdirectories labeled class_n_train and that n_classes is a global variable.''' @@ -204,20 +237,19 @@ def generate_training_data(training_directory, max_pools, random_sample=True, tr generators = [] for d in class_dirs: generators.append(DataGen(d)) - q = 0 while True: min_samples = np.inf data = [] for gen in generators: out = gen.next().copy() data.append(out) - if random_sample: + if sample_random: n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) if n_samples < min_samples: min_samples = n_samples for subset in data: - if random_sample: + if sample_random: samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, fill_value=subset['class_code']) else: @@ -229,19 +261,23 @@ def generate_training_data(training_directory, max_pools, random_sample=True, tr masters = [] masks = [] first = True + ccs = [] for subset in data: master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) - if first: + master = master[0, :, :, :] + mask = mask[0, :, :, 0] + if first: shape = master.shape first = False - if master.shape == shape: - masters.append(master[0, :, :, :]) - masks.append(mask[0, :, :, :]) + if master.shape == shape: # edges of images don't play very well. + masters.append(master) + masks.append(mask) + + if len(masters) != 4: + print("This should not happen.") + continue - - yield np.asarray(masters, dtype=np.float32), np.asarray(masks, dtype=np.int32) - # for ms, msk in zip(masters, masks): - # yield ms, msk + yield np.asarray(masters, dtype=np.float32), np.asarray(masks) def rotation(image, angle): @@ -311,8 +347,10 @@ def preprocess_data(master, mask, max_pools, return_cuts=False): if __name__ == '__main__': shapefile_directory = 'shapefile_data/' - image_train = 'master_rasters/train/' - image_test = 'master_rasters/test/' + master_train = 'master_rasters/train/' + master_test = 'master_rasters/test/' + image_train = 'image_data/train/' + image_test = 'image_data/test/' irr1 = 'Huntley' irr2 = 'Sun_River' fallow = 'Fallow' @@ -324,19 +362,19 @@ def preprocess_data(master, mask, max_pools, return_cuts=False): shp_train = 'shapefile_data/train/' count = 0 save = True - count, pixel_dict = extract_training_data(target_dict, shp_train, image_train, train_dir, - count, save=save) - # Need to parallelize the extraction of training data. - print("You have {} instances per training epoch.".format(count)) - print("And {} instances in each class.".format(pixel_dict)) - max_weight = max(pixel_dict.values()) - for key in pixel_dict: - print(key, max_weight / pixel_dict[key]) + # count, pixel_dict = extract_training_data(target_dict, shp_train, image_train, + # master_train, train_dir, count, save=save) + # # Need to parallelize the extraction of training data. + # print("You have {} instances per training epoch.".format(count)) + # print("And {} instances in each class.".format(pixel_dict)) + # max_weight = max(pixel_dict.values()) + # for key in pixel_dict: + # print(key, max_weight / pixel_dict[key]) tot = 0 test_dir = 'training_data/multiclass/test/' shp_test = 'shapefile_data/test/' count = 0 - count, pixel_dict = extract_training_data(target_dict, shp_test, image_test, test_dir, - count, save=save) + count, pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, + test_dir, count, save=save) print("You have {} instances per test epoch.".format(count)) print("And {} instances in each class.".format(pixel_dict)) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index ba5007b..113b9c5 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -68,11 +68,13 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map) first = True stack = None num_rasters = 0 - for k in paths_map: - num_rasters += len(paths_map[k]) + for key in paths_map: + num_rasters += len(paths_map[key]) j = 0 - for feat in paths_map.keys(): + for feat in sorted(paths_map.keys()): # ensures the stack is in the same order each time. + # Ordering within bands is assured by sorting the list that + # each band corresponding to, as that's essentially sorting by date. feature_rasters = paths_map[feat] # maps bands to their location in filesystem. for feature_raster in feature_rasters: band_mean = None @@ -85,8 +87,7 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map) return with rasopen(feature_raster, mode='r') as src: - arr = src.read().astype(type(band_mean)) - arr -= band_mean + arr = src.read() raster_geo = src.meta.copy() if first: @@ -156,7 +157,7 @@ def normalize_and_save_image(fname): def raster_sum(raster): with rasopen(raster, 'r') as src: - arr_masked = src.read(1, masked=True) # get rid of nodata values, + arr_masked = src.read(1, masked=True) # get rid of nodata values s = arr_masked.sum() count = arr_masked.count() return s, count @@ -175,6 +176,9 @@ def bandwise_mean(paths_list, band_name): p_sum, num_pix = raster_sum(filepath) pixel_value_sum += p_sum n_pixels += num_pix + if n_pixels == 0: + print("0 non masked pixels.") + return 1 return (pixel_value_sum / n_pixels, band_name) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 26c28e5..43dd0cf 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -2,7 +2,7 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import keras.backend as K import tensorflow as tf -#tf.enable_eager_execution() +tf.enable_eager_execution() import matplotlib.pyplot as plt import numpy as np import json @@ -18,7 +18,7 @@ from fiona import open as fopen from data_generators import generate_training_data, load_raster, preprocess_data from data_utils import generate_class_mask -from models import fcnn_functional, fcnn_model, fcnn_functional_small +from models import fcnn_functional, fcnn_model, fcnn_functional_small, unet NO_DATA = -1 CHUNK_SIZE = 608 # some value that is divisible by 2^MAX_POOLS. @@ -26,19 +26,28 @@ WRS2 = '../spatial_data/wrs2_descending_usa.shp' def custom_objective(y_true, y_pred): - '''I want to mask all values that - are not data, given a y_true - that has NODATA values. The boolean mask - operation is failing. It should output - a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) - tensor.''' - y_true = tf.reshape(y_true, (K.shape(y_true)[0], K.shape(y_true)[1]*K.shape(y_true)[2])) - y_pred = tf.reshape(y_pred, (K.shape(y_pred)[0], K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) - y_true = tf.cast(y_true, tf.int32) - losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true) + y_true_for_loss = y_true mask = tf.not_equal(y_true, NO_DATA) + y_true_for_loss = tf.where(mask, y_true, tf.zeros_like(y_true)) + y_pred_n = tf.nn.softmax(y_pred) + y_true_for_loss = tf.cast(y_true_for_loss, tf.int32) + losses = tf.keras.losses.sparse_categorical_crossentropy(y_true_for_loss, y_pred_n) + if np.any(np.isnan(losses.numpy())): + print("Nan value in loss.") losses = tf.boolean_mask(losses, mask) - return tf.reduce_mean(losses) + loss = tf.reduce_mean(losses) + return loss + + +def masked_acc(y_true, y_pred): + y_pred = tf.nn.softmax(y_pred) + y_pred = tf.argmax(y_pred, axis=3) + mask = tf.not_equal(y_true, NO_DATA) + y_true = tf.boolean_mask(y_true, mask) + y_pred = tf.boolean_mask(y_pred, mask) + y_true = tf.cast(y_true, tf.int64) + y_pred = tf.cast(y_pred, tf.int64) + return K.mean(tf.math.equal(y_true, y_pred)) def evaluate_image(master_raster, model, max_pools, outfile=None, ii=None): @@ -50,15 +59,28 @@ def evaluate_image(master_raster, model, max_pools, outfile=None, ii=None): master, meta = load_raster(master_raster) class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder out = np.zeros((master.shape[2], master.shape[1], NUM_CLASSES)) - + + # for i in range(master.shape[0]-2): + # fig, ax = plt.subplots(ncols=2) + # ax[0].imshow(master[i]) + # ax[1].imshow(master[i+1]) + # plt.show() + CHUNK_SIZE = 1248 for i in range(0, master.shape[1], CHUNK_SIZE): for j in range(0, master.shape[2], CHUNK_SIZE): sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, max_pools, return_cuts=True) + print(sub_master.shape) preds = model.predict(sub_master) - preds = preds[0, :, :, :] + print('pred', preds.shape) + + preds_exp = np.exp(preds) + preds_softmaxed = preds_exp / np.sum(preds_exp, axis=3, keepdims=True) + if np.any(np.isnan(preds)): + print("Nan prediction.") + preds = preds_softmaxed[0, :, :, :] if cut_cols == 0 and cut_rows == 0: out[j:j+CHUNK_SIZE, i:i+CHUNK_SIZE, :] = preds @@ -182,27 +204,35 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo model = model(NUM_CLASSES) if NUM_CLASSES <= 2: model.compile(loss=custom_objective_binary, - metrics=['accuracy'], + metrics=[masked_acc], optimizer='adam') else: - model.compile(loss=custom_objective, - metrics=['accuracy'], - optimizer=tf.keras.optimizers.Adam(lr=1e-6)) + model.compile( + loss=custom_objective, + optimizer=tf.train.AdamOptimizer(learning_rate=1e-3), + metrics=[masked_acc] + ) - tb = TensorBoard(log_dir='graphs/30epochssimple/') + import time + graph_path = os.path.join('graphs/', str(int(time.time()))) + os.mkdir(graph_path) + tb = TensorBoard(log_dir=graph_path, write_images=True, batch_size=4) train = os.path.join(training_directory, 'train') test = os.path.join(training_directory, 'test') - train_generator = generate_training_data(train, max_pools, random_sample=False, - train=True, box_size=box_size) - test_generator = generate_training_data(test, max_pools, random_sample=False, - train=False, box_size=box_size) + train_generator = generate_training_data(train, max_pools, sample_random=False, + box_size=box_size) + test_generator = generate_training_data(test, max_pools, sample_random=False, + box_size=box_size) model.fit_generator(train_generator, + validation_data=test_generator, + validation_steps=valid_steps, steps_per_epoch=steps_per_epoch, - max_queue_size=2, epochs=epochs, + callbacks=[tb, tf.keras.callbacks.TerminateOnNaN()], verbose=1, - class_weight=[31.0, 1, 2.16, 67.76], - use_multiprocessing=False) + class_weight=[30.0, 1.0, 2.73, 72.8], + use_multiprocessing=True) + return model @@ -216,16 +246,20 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo model_name = 'tst.h5' save_dir = 'models/' pth = os.path.join(save_dir, model_name) - model_func = fcnn_model + model_func = fcnn_functional + steps_per_epoch = 179 + valid_steps = 2 + epochs = 3 if not os.path.isfile(pth): - model = train_model(training_directory, model_func, steps_per_epoch=764, - valid_steps=246, max_pools=max_pools, epochs=1) - model.save(pth) + model = train_model(training_directory, model_func, steps_per_epoch=steps_per_epoch, + valid_steps=valid_steps, max_pools=max_pools, epochs=epochs) + # model.save(pth) else: model = tf.keras.models.load_model(pth, custom_objects={'custom_objective':custom_objective}) - raster_name = 'doyouwork_' - evaluate_images(image_directory, model, include_string="37_28", + raster_name = 'noclouds_' + pr_to_eval = '37_28' + evaluate_images(image_directory, model, include_string=pr_to_eval, exclude_string="class", max_pools=max_pools, prefix=raster_name, save_dir=save_dir) - clip_rasters(save_dir, "37_28") + clip_rasters(save_dir, pr_to_eval) diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index f4abb3d..5d14dca 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -3,7 +3,8 @@ import keras.backend as K import tensorflow as tf from tensorflow.keras.models import Model -from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, Dropout, UpSampling2D) +from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, + Dropout, UpSampling2D, BatchNormalization) def fcnn_model(n_classes): model = tf.keras.Sequential() @@ -15,8 +16,7 @@ def fcnn_model(n_classes): model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) model.add(tf.keras.layers.Dropout(0.5)) model.add(tf.keras.layers.Conv2D(filters=n_classes, kernel_size=2, padding='same', - activation='relu')) # 1x1 convolutions for pixel-wise prediciton. - # Take a look at the model summary + activation='linear')) #model.summary() return model @@ -30,7 +30,7 @@ def fcnn_functional_small(n_classes): c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(mp1) c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(c2) mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - mp2 = Dropout(0.5)(mp2) + #mp2 = Dropout(0.5)(mp2) c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(mp2) c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(c3) @@ -48,7 +48,7 @@ def fcnn_functional_small(n_classes): u2 = UpSampling2D(size=(2, 2))(u2) u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Dropout(0.5)(u2) + #u2 = Dropout(0.5)(u2) u2_c2 = Concatenate()([u2, c2]) u2_c2 = Dropout(0.5)(u2_c2) @@ -59,7 +59,7 @@ def fcnn_functional_small(n_classes): u3_c1 = Concatenate()([u3, c1]) - c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='relu', padding='same')(u3_c1) + c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='linear', padding='same')(u3_c1) model = Model(inputs=x, outputs=c5) #model.summary() @@ -71,7 +71,7 @@ def fcnn_functional(n_classes): x = Input((None, None, 36)) base = 2 # exp from 4 to 5. - exp = 6 + exp = 5 c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(x) c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c1) mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) @@ -81,21 +81,20 @@ def fcnn_functional(n_classes): c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp1) c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c2) mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - # mp2 = Dropout(0.5)(mp2) + #mp2 = Dropout(0.5)(mp2) exp+=1 c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp2) c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c3) mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) - #Jkj mp3 = Dropout(0.5)(mp3) + #mp3 = Dropout(0.5)(mp3) exp+=1 c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp3) c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c4) mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) - # mp4 = Dropout(0.5)(mp4) exp+=1 @@ -117,7 +116,7 @@ def fcnn_functional(n_classes): u2 = UpSampling2D(size=(2, 2))(u2) u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) - # u2 = Dropout(0.5)(u2) + #u2 = Dropout(0.5)(u2) u2_c4 = Concatenate()([u2, c4]) @@ -126,7 +125,7 @@ def fcnn_functional(n_classes): u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u2_c4) u3 = UpSampling2D(size=(2, 2))(u3) u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3) - #u3 = Dropout(0.5)(u3) + #u3 = Dropout(0.5)(u3) u3_c3 = Concatenate()([u3, c3]) @@ -135,6 +134,7 @@ def fcnn_functional(n_classes): u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3_c3) u4 = UpSampling2D(size=(2, 2))(u4) u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4) + u4 = BatchNormalization(axis=3)(u4) u4_c2 = Concatenate()([u4, c2]) @@ -144,11 +144,65 @@ def fcnn_functional(n_classes): u5 = UpSampling2D(size=(2, 2))(u5) u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) + u5 = BatchNormalization(axis=3)(u5) u5_c1 = Concatenate()([u5, c1]) - u6 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='relu', padding='same')(u5_c1) + u6 = Conv2D(filters=n_classes, kernel_size=(1, 1), activation='linear', padding='same')(u5_c1) + u6 = BatchNormalization(axis=3)(u6) model = Model(inputs=x, outputs=u6) - #model.summary() + model.summary() + return model + + +def unet(n_classes): + inputs = Input((None, None, 36)) + conv1 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs) + conv1 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1) + pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) + conv2 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1) + conv2 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2) + pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) + conv3 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2) + conv3 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3) + pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) + conv4 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3) + conv4 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4) + drop4 = Dropout(0.5)(conv4) + pool4 = MaxPooling2D(pool_size=(2, 2))(drop4) + + conv5 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4) + conv5 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5) + drop5 = Dropout(0.5)(conv5) + + up6 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop5)) + merge6 = Concatenate()([drop4,up6]) + conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6) + conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6) + + up7 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6)) + merge7 = Concatenate()([conv3, up7]) + conv7 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7) + conv7 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7) + + up8 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7)) + merge8 = Concatenate()([conv2,up8]) + conv8 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8) + conv8 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8) + + up9 = Conv2D(32, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8)) + merge9 = Concatenate()([conv1,up9]) + conv9 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9) + conv9 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9) + conv9 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9) + conv10 = Conv2D(n_classes, 1, activation='linear')(conv9) + bn1 = BatchNormalization(axis=3)(conv10) + + model = Model(inputs=inputs, outputs=bn1) + + #model.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = ['accuracy']) + + model.summary() + return model diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index eb614ae..4f6cd62 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -99,7 +99,7 @@ def build_training(self): self.paths_map, self.masks = self._order_images() def build_evaluating(self): - self.get_landsat(fmask=False) + self.get_landsat(fmask=True) self.profile = self.landsat.rasterio_geometry #self.get_et() #self.get_precip() diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 1848960..b0acf41 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -1,18 +1,19 @@ import os import glob import pickle +from pprint import pprint from multiprocessing import Pool from numpy import save as nsave from compose_array_single_shapefile import PTASingleShapefile, ShapefileSamplePoints from fiona import open as fopen -from collections import defaultdict +from collections import defaultdict, OrderedDict from shapely.geometry import shape from data_utils import (download_images, get_shapefile_path_row, split_shapefile, create_master_raster, filter_shapefile, bandwise_mean) from runspec import landsat_rasters, static_rasters -def download_images_over_shapefile(shapefile, image_directory, year, master_raster_directory): +def download_images_over_shapefile(shapefile, image_directory, year): '''Downloads p/r corresponding to the location of the shapefile, and creates master raster. Image_directory: where to save the raw images. @@ -55,13 +56,6 @@ def sample_points_from_shapefile(shapefile_path, instances): return ssp.outfile -def split_shapefiles_multiproc(f): - data_directory = 'split_shapefiles_west/' - shp_dir = '/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup' - fname = os.path.basename(f) - split_shapefile(shp_dir, fname, data_directory) - - def download_all_images(image_directory, shapefile_directory, year=2013): ''' Downloads all images over each shapefile in shapefile directory ''' @@ -74,8 +68,7 @@ def download_all_images(image_directory, shapefile_directory, year=2013): t = template.format(p, r, year) if t not in done: done.add(t) - ims = download_images_over_shapefile(f, image_directory, year, master) - + ims = download_images_over_shapefile(f, image_directory, year) print("Done downloading images for {}. Make sure there were no 503 codes returned".format(shapefile_directory)) @@ -95,6 +88,10 @@ def all_rasters(image_directory, satellite=8): for band in band_map: if f.endswith(band): band_map[band].append(os.path.join(dirpath, f)) + + for band in band_map: + band_map[band] = sorted(band_map[band]) # ensures ordering within bands. + return band_map def raster_means(image_directory, satellite=8): @@ -102,7 +99,8 @@ def raster_means(image_directory, satellite=8): in image_directory and its subdirectories. Images end with (.tif, .TIF) Image_directory in a typical case would - be image_data/train/ """ + be image_data/train/ + Does this even need to be calculated?""" outfile = os.path.join(image_directory, "mean_mapping.pkl") if os.path.isfile(outfile): @@ -113,18 +111,12 @@ def raster_means(image_directory, satellite=8): band_map = all_rasters(image_directory, satellite) mean_mapping = {} - paths_for_multiproc = [] - bands_for_multiproc = [] for band in band_map: - bands_for_multiproc.append(band) - paths_for_multiproc.append(band_map[band]) - - with Pool() as pool: - means = pool.starmap(bandwise_mean, zip(paths_for_multiproc, bands_for_multiproc)) - - for mean, band in means: + mean, bnd = bandwise_mean(band_map[band], band) mean_mapping[band] = mean + pprint(mean_mapping) + with open(outfile, 'wb') as f: pickle.dump(mean_mapping, f) @@ -144,6 +136,8 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi path = sub_dir[:2] row = sub_dir[3:5] year = sub_dir[-4:] + from pprint import pprint + print(paths_map) create_master_raster(paths_map, path, row, year, raster_save_directory, mean_mapping) @@ -163,13 +157,17 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi image_train_directory = 'image_data/train' image_test_directory = 'image_data/test' image_dirs = [image_train_directory, image_test_directory] + shp_train = 'shapefile_data/train/' + shp_test = 'shapefile_data/test/' + shp_dirs = [shp_train, shp_test] shapefile_directory = 'shapefile_data/all_shapefiles' master_train = 'master_rasters/train' master_test = 'master_rasters/test' master_dirs = [master_train, master_test] year = 2013 - #download_all_images(image_directory, shapefile_directory, year) - satellite = 8 + + for s, i in zip(shp_dirs, image_dirs): + download_all_images(i, s, year) for im_dir, mas_dir in zip(image_dirs, master_dirs): mean_map = raster_means(im_dir) create_all_master_rasters(im_dir, mas_dir, mean_map) From ff3c2552b91bb3bbea0ab3631ebe7380464acabd Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 13 Mar 2019 18:01:08 -0600 Subject: [PATCH 48/89] Added climate rasters. Added unet architecture, and multiclass support. --- fully-conv-classification/data_generators.py | 103 +++++-- fully-conv-classification/data_utils.py | 36 ++- fully-conv-classification/fully_conv.py | 260 +++++++++++++----- fully-conv-classification/models.py | 169 ++++++++---- fully-conv-classification/prepare_images.py | 55 ++-- .../runner_from_shapefile.py | 68 +++-- fully-conv-classification/runspec.py | 6 +- 7 files changed, 505 insertions(+), 192 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index dee1d6e..838aa68 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -15,7 +15,6 @@ from sat_image.warped_vrt import warp_single_image NO_DATA = -1 -CHUNK_SIZE = 608 # some value that is evenly divisible by 2^MAX_POOLS. NUM_CLASSES = 4 def random_sample(class_mask, n_instances, box_size=0, fill_value=1): @@ -136,9 +135,62 @@ def get_masks(image_directory): out[mask[0] == 1] = 1 return out +def extract_training_data_unet(target_dict, shapefile_directory, image_directory, + master_raster_directory, training_directory, count, save=True, chunk_size=572): + ''' target_dict: {filename or string in filename : class_code} ''' + CHUNK_SIZE = 572 # input to unet. + # this means the class mask must be 388x388 - chop 184 from height/width. + done = set() + pixel_dict = {} # counts number of pixels present in each class. + for class_code in target_dict.values(): + pixel_dict[class_code] = 0 + year = 2013 + train_raster = 'master_raster_' + mask_raster = 'class_mask_' + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if f not in done: + all_matches = all_matching_shapefiles(f, shapefile_directory) + done.add(f) + for match in all_matches: + done.add(match) + p, r = get_shapefile_path_row(f) + suffix = '{}_{}_{}.tif'.format(p, r, year) + fmask = get_masks(os.path.join(image_directory, suffix[:-4])) + master_raster = os.path.join(master_raster_directory, train_raster + suffix) + mask_file = os.path.join(master_raster_directory, mask_raster + suffix) # for rasterio.mask.mask + masks = [] # these are class masks for the labelling of data. + all_matches.append(f) + shp = None + for match in all_matches: + msk = generate_class_mask(match, mask_file) + msk[0][fmask == 1] = NO_DATA + shp = msk.shape + cc = assign_class_code(target_dict, match) + if cc is not None: + dm = DataMask(msk, cc) + masks.append(dm) + if save: + master, meta = load_raster(master_raster) + else: + master = np.zeros(shp) + + for i in range(0, master.shape[1], CHUNK_SIZE): + for j in range(0, master.shape[2], CHUNK_SIZE): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + if sub_master.shape[1] == CHUNK_SIZE and sub_master.shape[2] == CHUNK_SIZE: + for msk in masks: + s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + s = s[:, 92:-92, 92:-92] # clip edges for unet. + if not np.all(s == NO_DATA): + pixel_dict[msk.class_code] += len(np.where(s != NO_DATA)[0]) + count += 1 + if save: + dt = DataTile(sub_master, s, msk.class_code) + dt.to_pickle(training_directory) + return count, pixel_dict def extract_training_data(target_dict, shapefile_directory, image_directory, - master_raster_directory, training_directory, count, save=True): + master_raster_directory, training_directory, count, save=True, chunk_size=572): ''' target_dict: {filename or string in filename : class_code} ''' done = set() pixel_dict = {} # counts number of pixels present in each class. @@ -147,6 +199,7 @@ def extract_training_data(target_dict, shapefile_directory, image_directory, year = 2013 train_raster = 'master_raster_' mask_raster = 'class_mask_' + CHUNK_SIZE = chunk_size for f in glob(os.path.join(shapefile_directory, "*.shp")): if f not in done: all_matches = all_matching_shapefiles(f, shapefile_directory) @@ -233,7 +286,6 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo in subdirectories labeled class_n_train and that n_classes is a global variable.''' class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] - # ADD if statement in class dirs. generators = [] for d in class_dirs: generators.append(DataGen(d)) @@ -248,6 +300,8 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo if n_samples < min_samples: min_samples = n_samples + first = False + one_hot = None for subset in data: if sample_random: samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, @@ -255,27 +309,25 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo else: samp = subset['class_mask'] samp[samp != NO_DATA] = subset['class_code'] + samp[samp == NO_DATA] = 4 subset['class_mask'] = samp masters = [] masks = [] + shape = None first = True - ccs = [] for subset in data: master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) master = master[0, :, :, :] - mask = mask[0, :, :, 0] + mask = mask[0, :, :, 0] if first: shape = master.shape first = False if master.shape == shape: # edges of images don't play very well. - masters.append(master) - masks.append(mask) - - if len(masters) != 4: - print("This should not happen.") - continue + if np.any(mask != NO_DATA): + masters.append(master) + masks.append(mask) yield np.asarray(masters, dtype=np.float32), np.asarray(masks) @@ -349,32 +401,33 @@ def preprocess_data(master, mask, max_pools, return_cuts=False): shapefile_directory = 'shapefile_data/' master_train = 'master_rasters/train/' master_test = 'master_rasters/test/' - image_train = 'image_data/train/' - image_test = 'image_data/test/' + image_train = 'image_data/train/' # for fmasks. + image_test = 'image_data/test/' # for fmasks. irr1 = 'Huntley' irr2 = 'Sun_River' fallow = 'Fallow' forest = 'Forrest' other = 'other' target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} - year = 2013 - train_dir = 'training_data/multiclass/train/' - shp_train = 'shapefile_data/train/' + train_dir = 'training_data/multiclass/train_3927/' + shp_train = 'shapefile_data/train_3927/' count = 0 save = True - # count, pixel_dict = extract_training_data(target_dict, shp_train, image_train, - # master_train, train_dir, count, save=save) - # # Need to parallelize the extraction of training data. - # print("You have {} instances per training epoch.".format(count)) - # print("And {} instances in each class.".format(pixel_dict)) - # max_weight = max(pixel_dict.values()) - # for key in pixel_dict: - # print(key, max_weight / pixel_dict[key]) + count, pixel_dict = extract_training_data_unet(target_dict, shp_train, image_train, + master_train, train_dir, count, save=save) + # Need to parallelize the extraction of training data. + # Or maybe not. It seems like parallelizing the opening/closing + # of rasters can stomp on the data. + print("You have {} instances per training epoch.".format(count)) + print("And {} instances in each class.".format(pixel_dict)) + max_weight = max(pixel_dict.values()) + for key in pixel_dict: + print(key, max_weight / pixel_dict[key]) tot = 0 test_dir = 'training_data/multiclass/test/' shp_test = 'shapefile_data/test/' count = 0 - count, pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, + count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, test_dir, count, save=save) print("You have {} instances per test epoch.".format(count)) print("And {} instances in each class.".format(pixel_dict)) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 113b9c5..1a01424 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -6,7 +6,7 @@ from lxml import html from requests import get from copy import deepcopy -from numpy import zeros, asarray, array, reshape, nan +from numpy import zeros, asarray, array, reshape, nan, sqrt, std from shapely.geometry import shape from collections import defaultdict from rasterio import float32, open as rasopen @@ -37,7 +37,7 @@ def generate_class_mask(shapefile, master_raster, no_data=-1): return out_image -def create_master_raster(paths_map, path, row, year, raster_directory, mean_map): +def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, stddev_map): """ Creates a master raster with depth given by the organization of the paths_map. Paths map is a dictionary of lists, with keys the band names (B1, B2...) and values the paths of the images in the filesystem @@ -81,6 +81,7 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map) for band in mean_map: if feature_raster.endswith(band): band_mean = mean_map[band] + band_std = stddev_map[band] if band_mean is None: print("Band mean not found in mean_mapping for {}".format(feature_raster)) @@ -90,6 +91,9 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map) arr = src.read() raster_geo = src.meta.copy() + #arr = (arr - band_mean) / band_std + arr = (arr - arr.mean()) / std(arr) + if first: first_geo = raster_geo.copy() empty = zeros((num_rasters, arr.shape[1], arr.shape[2]), float32) @@ -158,11 +162,35 @@ def normalize_and_save_image(fname): def raster_sum(raster): with rasopen(raster, 'r') as src: arr_masked = src.read(1, masked=True) # get rid of nodata values - s = arr_masked.sum() - count = arr_masked.count() + s = arr_masked.sum() + count = arr_masked.count() return s, count +def raster_squared_sum(raster, mean): + with rasopen(raster, 'r') as src: + arr_masked = src.read(1, masked=True) # get rid of nodata values + squared_diff = (arr_masked - mean)**2 + s = squared_diff.sum() + count = squared_diff.count() + return s, count + + +def bandwise_stddev(paths_list, band_name, band_mean): + ''' Calculate the stddev of the pixel + values in a given band through time.''' + n_pixels = 0 + pixel_value_squared_sum = 0 + for filepath in paths_list: + p_sum, num_pix = raster_squared_sum(filepath, band_mean) + pixel_value_squared_sum += p_sum + n_pixels += num_pix + if n_pixels == 0: + print("0 non masked pixels.") + return 1 + return (sqrt(pixel_value_squared_sum / n_pixels), band_name) + + def bandwise_mean(paths_list, band_name): ''' Need to center the data to have a zero mean. This means iterating over all images, diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 43dd0cf..9892083 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -1,5 +1,6 @@ import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +import time import keras.backend as K import tensorflow as tf tf.enable_eager_execution() @@ -21,22 +22,49 @@ from models import fcnn_functional, fcnn_model, fcnn_functional_small, unet NO_DATA = -1 -CHUNK_SIZE = 608 # some value that is divisible by 2^MAX_POOLS. -NUM_CLASSES = 4 +CHUNK_SIZE = 572 # some value that is divisible by 2^MAX_POOLS. +NUM_CLASSES = 5 WRS2 = '../spatial_data/wrs2_descending_usa.shp' def custom_objective(y_true, y_pred): y_true_for_loss = y_true mask = tf.not_equal(y_true, NO_DATA) y_true_for_loss = tf.where(mask, y_true, tf.zeros_like(y_true)) - y_pred_n = tf.nn.softmax(y_pred) y_true_for_loss = tf.cast(y_true_for_loss, tf.int32) - losses = tf.keras.losses.sparse_categorical_crossentropy(y_true_for_loss, y_pred_n) - if np.any(np.isnan(losses.numpy())): - print("Nan value in loss.") - losses = tf.boolean_mask(losses, mask) - loss = tf.reduce_mean(losses) - return loss + losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true_for_loss) + # the above line works in eager mode, but not otherwise. + # losses = tf.keras.losses.sparse_categorical_crossentropy(y_true_for_loss, y_pred) + out = tf.boolean_mask(losses, mask) + return out + +def weighted_loss(weight_map): + # All I need to do is multiply the output loss + # by the weights that I input. + # Loss is of shape n_classesxwidthxheight + # what does weight map have to be in this case? + # + def loss(y_true, y_pred): + losses = 0 + pass + pass + + +def custom_objective_v2(y_true, y_pred): + '''I want to mask all values that + are not data, given a y_true + that has NODATA values. The boolean mask + operation is failing. It should output + a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) + tensor.''' + y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) + y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) + masked = tf.not_equal(y_true, NO_DATA) + indices = tf.where(masked) + indices = tf.to_int32(indices) + indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) + y_true_masked = tf.gather_nd(params=y_true, indices=indices) + y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) + return tf.keras.losses.categorical_crossentropy(y_true_masked, y_pred_masked) def masked_acc(y_true, y_pred): @@ -50,6 +78,72 @@ def masked_acc(y_true, y_pred): return K.mean(tf.math.equal(y_true, y_pred)) +def m_acc(y_true, y_pred): + ''' Calculate accuracy from masked data. + The built-in accuracy metric uses all data (masked & unmasked).''' + y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) + y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) + masked = tf.not_equal(y_true, NO_DATA) + indices = tf.where(masked) + indices = tf.to_int32(indices) + indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) + y_true_masked = tf.gather_nd(params=y_true, indices=indices) + y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) + return K.cast(K.equal(K.argmax(y_true_masked, axis=-1), K.argmax(y_pred_masked, axis=-1)), K.floatx()) + + +def evaluate_image_unet(master_raster, model, max_pools, outfile=None, ii=None): + + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive handling of this case. + else: + master, meta = load_raster(master_raster) + class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder + out = np.zeros((master.shape[2], master.shape[1], NUM_CLASSES)) + + CHUNK_SIZE = 572 + diff = 92 + stride = 388 + + for i in range(0, master.shape[1]-diff, stride): + for j in range(0, master.shape[2]-diff, stride): + sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] + sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, + max_pools, return_cuts=True) + if sub_master.shape[1] == 572 and sub_master.shape[2] == 572: + preds = model.predict(sub_master) + preds_exp = np.exp(preds) + preds_softmaxed = preds_exp / np.sum(preds_exp, axis=3, keepdims=True) + if np.any(np.isnan(preds)): + print("Nan prediction.") + preds = preds_softmaxed[0, :, :, :] + else: + continue + + if cut_cols == 0 and cut_rows == 0: + out[j+diff:j+CHUNK_SIZE-diff, i+diff:i+CHUNK_SIZE-diff, :] = preds + elif cut_cols == 0 and cut_rows != 0: + ofs = master.shape[1]-cut_rows + out[j+diff:j+CHUNK_SIZE-diff, i+diff:ofs-diff, :] = preds + elif cut_cols != 0 and cut_rows == 0: + ofs = master.shape[2]-cut_cols + out[j+diff:ofs-diff, i+diff:i+CHUNK_SIZE-diff, :] = preds + elif cut_cols != 0 and cut_rows != 0: + ofs_col = master.shape[2]-cut_cols + ofs_row = master.shape[1]-cut_rows + out[j+diff:ofs_col-diff, i+diff:ofs_row-diff, :] = preds + else: + print("whatcha got goin on here?") + + sys.stdout.write("N eval: {}. Percent done: {:.4f}\r".format(ii, i / master.shape[1])) + + out = np.swapaxes(out, 0, 2) + out = out.astype(np.float32) + if outfile: + save_raster(out, outfile, meta) + return out def evaluate_image(master_raster, model, max_pools, outfile=None, ii=None): if not os.path.isfile(master_raster): @@ -59,41 +153,37 @@ def evaluate_image(master_raster, model, max_pools, outfile=None, ii=None): master, meta = load_raster(master_raster) class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder out = np.zeros((master.shape[2], master.shape[1], NUM_CLASSES)) - - # for i in range(master.shape[0]-2): - # fig, ax = plt.subplots(ncols=2) - # ax[0].imshow(master[i]) - # ax[1].imshow(master[i+1]) - # plt.show() - CHUNK_SIZE = 1248 + + CHUNK_SIZE = 572 + for i in range(0, master.shape[1], CHUNK_SIZE): for j in range(0, master.shape[2], CHUNK_SIZE): sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, max_pools, return_cuts=True) - print(sub_master.shape) - preds = model.predict(sub_master) - print('pred', preds.shape) - - preds_exp = np.exp(preds) - preds_softmaxed = preds_exp / np.sum(preds_exp, axis=3, keepdims=True) - if np.any(np.isnan(preds)): - print("Nan prediction.") - preds = preds_softmaxed[0, :, :, :] - + if sub_master.shape[1] == 572 and sub_master.shape[2] == 572: + preds = model.predict(sub_master) + preds_exp = np.exp(preds) + preds_softmaxed = preds_exp / np.sum(preds_exp, axis=3, keepdims=True) + if np.any(np.isnan(preds)): + print("Nan prediction.") + preds = preds_softmaxed[0, :, :, :] + else: + continue + oss = 92 if cut_cols == 0 and cut_rows == 0: - out[j:j+CHUNK_SIZE, i:i+CHUNK_SIZE, :] = preds + out[j+oss:j+CHUNK_SIZE-oss, i+oss:i+CHUNK_SIZE-oss, :] = preds elif cut_cols == 0 and cut_rows != 0: ofs = master.shape[1]-cut_rows - out[j:j+CHUNK_SIZE, i:ofs, :] = preds + out[j+oss:j+CHUNK_SIZE-oss, i+oss:ofs-oss, :] = preds elif cut_cols != 0 and cut_rows == 0: ofs = master.shape[2]-cut_cols - out[j:ofs, i:i+CHUNK_SIZE, :] = preds + out[j+oss:ofs-oss, i+oss:i+CHUNK_SIZE-oss, :] = preds elif cut_cols != 0 and cut_rows != 0: ofs_col = master.shape[2]-cut_cols ofs_row = master.shape[1]-cut_rows - out[j:ofs_col, i:ofs_row, :] = preds + out[j+oss:ofs_col-oss, i+oss:ofs_row-oss, :] = preds else: print("whatcha got goin on here?") @@ -155,7 +245,7 @@ def evaluate_images(image_directory, model, include_string, max_pools, exclude_s out = prefix + out + ".tif" out = os.path.join(save_dir, out) ii += 1 - evaluate_image(f, model, max_pools=max_pools, outfile=out, ii=ii) + evaluate_image_unet(f, model, max_pools=max_pools, outfile=out, ii=ii) def compute_iou(y_pred, y_true): ''' This is slow. ''' @@ -197,31 +287,36 @@ def get_iou(): y_pred = np.argmax(y_pred, axis=0) print(f, compute_iou(y_pred, y_true)) -def train_model(training_directory, model, steps_per_epoch, valid_steps, max_pools, box_size=0, epochs=3): +def train_model(training_directory, model, steps_per_epoch, valid_steps, max_pools, box_size=0, + epochs=3, random_sample=False, restore=False, learning_rate=1e-3): ''' This function assumes that train/test data are subdirectories of training_directory, with the names train/test.''' - model = model(NUM_CLASSES) + if not restore: + model = model(NUM_CLASSES) if NUM_CLASSES <= 2: model.compile(loss=custom_objective_binary, - metrics=[masked_acc], + metrics=[m_acc], optimizer='adam') else: + # model.compile( + # loss=custom_objective, + # optimizer='adam', + # metrics=[masked_acc] + # ) model.compile( loss=custom_objective, - optimizer=tf.train.AdamOptimizer(learning_rate=1e-3), + optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), metrics=[masked_acc] ) - - import time graph_path = os.path.join('graphs/', str(int(time.time()))) os.mkdir(graph_path) tb = TensorBoard(log_dir=graph_path, write_images=True, batch_size=4) train = os.path.join(training_directory, 'train') test = os.path.join(training_directory, 'test') - train_generator = generate_training_data(train, max_pools, sample_random=False, + train_generator = generate_training_data(train, max_pools, sample_random=random_sample, box_size=box_size) - test_generator = generate_training_data(test, max_pools, sample_random=False, + test_generator = generate_training_data(test, max_pools, sample_random=random_sample, box_size=box_size) model.fit_generator(train_generator, validation_data=test_generator, @@ -230,36 +325,77 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo epochs=epochs, callbacks=[tb, tf.keras.callbacks.TerminateOnNaN()], verbose=1, - class_weight=[30.0, 1.0, 2.73, 72.8], + class_weight=[25.923, 1.0, 2.79, 61.128, .75], use_multiprocessing=True) + return model, graph_path + + +def save_model_info(outfile, args): + template = '{}={}|' + with open(outfile, 'a') as f: + for key in args: + f.write(template.format(key, args[key])) + f.write("\n-------------------\n") + print("wrote run info to {}".format(outfile)) - return model if __name__ == '__main__': - image_directory = 'master_rasters/test/' training_directory = 'training_data/multiclass/' - m_dir = 'compare_model_outputs/multiclass/' - - max_pools = 5 - model_name = 'tst.h5' - save_dir = 'models/' - pth = os.path.join(save_dir, model_name) - model_func = fcnn_functional - steps_per_epoch = 179 - valid_steps = 2 - epochs = 3 - if not os.path.isfile(pth): - model = train_model(training_directory, model_func, steps_per_epoch=steps_per_epoch, - valid_steps=valid_steps, max_pools=max_pools, epochs=epochs) - # model.save(pth) + info_file = 'run_information.txt' + + max_pools = 0 + model_name = 'unet_{}.h5'.format(int(time.time())) + #model_name = 'unet_random_sample100.h5' + model_dir = 'models/' + info_path = os.path.join(model_dir, info_file) + model_save_path = os.path.join(model_dir, model_name) + + model_func = unet + + steps_per_epoch = 157 #628 + valid_steps = 1 #233 + epochs = 1 + + train_more = False + eager = True + class_weights = True + learning_rate = 1e-4 + random_sample = False + augment = False + + raster_name = '5class' + pr_to_eval = '39_27' + image_directory = 'master_rasters/train/' + + param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, + 'raster_name':raster_name, 'learning_rate':learning_rate, 'eager':eager, + 'class_weights':class_weights, 'augmented':augment, 'random_sample':random_sample, 'graph_path':None} + + evaluating = True + if not os.path.isfile(model_save_path): + model, graph_path = train_model(training_directory, model_func, + steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, + max_pools=max_pools, epochs=epochs, + random_sample=random_sample, learning_rate=learning_rate) + evaluating = False + model.save(model_save_path) else: - model = tf.keras.models.load_model(pth, + model = tf.keras.models.load_model(model_save_path, custom_objects={'custom_objective':custom_objective}) + if train_more: + model, graph_path = train_model(training_directory, model, steps_per_epoch=steps_per_epoch, + valid_steps=valid_steps, random_sample=random_sample, + max_pools=max_pools, epochs=epochs, restore=True) + model_name = 'unet_random_sample100.h5' + model.save(os.path.join(model_dir, model_name)) - raster_name = 'noclouds_' - pr_to_eval = '37_28' - evaluate_images(image_directory, model, include_string=pr_to_eval, - exclude_string="class", max_pools=max_pools, prefix=raster_name, save_dir=save_dir) - clip_rasters(save_dir, pr_to_eval) + if not evaluating or train_more: + param_dict['graph_path'] = graph_path + save_model_info(info_path, param_dict) + + evaluate_images(image_directory, model, include_string=pr_to_eval, + exclude_string="class", max_pools=max_pools, prefix=raster_name, + save_dir='compare_model_outputs/blurry/') + #clip_rasters('compare_model_outputs/blurry/', pr_to_eval) diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 5d14dca..20ef49e 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -4,7 +4,7 @@ import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, - Dropout, UpSampling2D, BatchNormalization) + Dropout, UpSampling2D, BatchNormalization, Cropping2D) def fcnn_model(n_classes): model = tf.keras.Sequential() @@ -16,7 +16,7 @@ def fcnn_model(n_classes): model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) model.add(tf.keras.layers.Dropout(0.5)) model.add(tf.keras.layers.Conv2D(filters=n_classes, kernel_size=2, padding='same', - activation='linear')) + activation='softmax')) #model.summary() return model @@ -65,6 +65,106 @@ def fcnn_functional_small(n_classes): #model.summary() return model +def unet(n_classes, channel_depth=36): + x = Input((None, None, channel_depth)) + base = 2 + exp = 6 + + # 64 filters + c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(x) + c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c1) + mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) + + exp += 1 + # 128 filters + c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp1) + c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c3) + mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) + d1 = Dropout(0.5)(mp2) + + + exp += 1 + # 256 filters + c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(d1) + c6 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c5) + mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c6) + + exp += 1 + # 512 filters + c7 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp3) + c8 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c7) + mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c8) + d2 = Dropout(0.5)(mp4) + + exp += 1 + # 1024 filters + c9 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(d2) + c10 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c9) + + exp -= 1 + # 512 filters, making 1024 when concatenated with + # the corresponding layer from the contracting path. + u1 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + activation='relu')(c10) + + c8_cropped = Cropping2D(cropping=4)(c8) + concat_u1_c8 = Concatenate()([u1, c8_cropped]) + + # 512 filters + c11 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u1_c8) + + exp -= 1 + # 256 filters, making 512 when concatenated with the + # corresponding layer from the contracting path. + c12 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c11) + + u2 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + activation='relu')(c12) + + c6_cropped = Cropping2D(cropping=16)(c6) + concat_u2_c6 = Concatenate()([u2, c6_cropped]) + + # 256 filters + c13 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u2_c6) + bn1 = BatchNormalization(axis=3)(c13) + + exp -= 1 + # 128 filters, making 256 when concatenated with the + # corresponding layer from the contracting path. + c14 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn1) + + u3 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + activation='relu')(c14) + + c4_cropped = Cropping2D(cropping=40)(c4) + concat_u3_c4 = Concatenate()([u3, c4_cropped]) + + # 128 filters + c15 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u3_c4) + + exp -= 1 + # 64 filters, making 128 when concatenated with the + # corresponding layer from the contracting path. + c16 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c15) + + u4 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + activation='relu')(c16) + + c2_cropped = Cropping2D(cropping=88)(c2) + concat_u4_c2 = Concatenate()([u4, c2_cropped]) + + c17 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u4_c2) + + c18 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(c17) + + last = Conv2D(filters=n_classes, kernel_size=1, activation='linear', padding='valid')(c18) + return Model(inputs=x, outputs=last) + def fcnn_functional(n_classes): @@ -116,7 +216,7 @@ def fcnn_functional(n_classes): u2 = UpSampling2D(size=(2, 2))(u2) u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) - #u2 = Dropout(0.5)(u2) + u2 = Dropout(0.5)(u2) u2_c4 = Concatenate()([u2, c4]) @@ -125,7 +225,7 @@ def fcnn_functional(n_classes): u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u2_c4) u3 = UpSampling2D(size=(2, 2))(u3) u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3) - #u3 = Dropout(0.5)(u3) + u3 = Dropout(0.5)(u3) u3_c3 = Concatenate()([u3, c3]) @@ -134,7 +234,7 @@ def fcnn_functional(n_classes): u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3_c3) u4 = UpSampling2D(size=(2, 2))(u4) u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4) - u4 = BatchNormalization(axis=3)(u4) + #u4 = BatchNormalization(axis=3)(u4) u4_c2 = Concatenate()([u4, c2]) @@ -144,65 +244,12 @@ def fcnn_functional(n_classes): u5 = UpSampling2D(size=(2, 2))(u5) u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) - u5 = BatchNormalization(axis=3)(u5) + #u5 = BatchNormalization(axis=3)(u5) u5_c1 = Concatenate()([u5, c1]) - u6 = Conv2D(filters=n_classes, kernel_size=(1, 1), activation='linear', padding='same')(u5_c1) - u6 = BatchNormalization(axis=3)(u6) + u6 = Conv2D(filters=n_classes, kernel_size=(3, 3), activation='softmax', padding='same')(u5_c1) model = Model(inputs=x, outputs=u6) - model.summary() - return model - - -def unet(n_classes): - inputs = Input((None, None, 36)) - conv1 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs) - conv1 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1) - pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) - conv2 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1) - conv2 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2) - pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) - conv3 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2) - conv3 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3) - pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) - conv4 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3) - conv4 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4) - drop4 = Dropout(0.5)(conv4) - pool4 = MaxPooling2D(pool_size=(2, 2))(drop4) - - conv5 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4) - conv5 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5) - drop5 = Dropout(0.5)(conv5) - - up6 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop5)) - merge6 = Concatenate()([drop4,up6]) - conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6) - conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6) - - up7 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6)) - merge7 = Concatenate()([conv3, up7]) - conv7 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7) - conv7 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7) - - up8 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7)) - merge8 = Concatenate()([conv2,up8]) - conv8 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8) - conv8 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8) - - up9 = Conv2D(32, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8)) - merge9 = Concatenate()([conv1,up9]) - conv9 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9) - conv9 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9) - conv9 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9) - conv10 = Conv2D(n_classes, 1, activation='linear')(conv9) - bn1 = BatchNormalization(axis=3)(conv10) - - model = Model(inputs=inputs, outputs=bn1) - - #model.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = ['accuracy']) - - model.summary() - + # model.summary() return model diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index 4f6cd62..b57db3f 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -1,4 +1,4 @@ -# ============================================================================================= +# ============================================================================================= # Copyright 2018 dgketchum # # Licensed under the Apache License, Version 2 (the "License"); @@ -47,7 +47,9 @@ class ImageStack(object): Prepare a stack of images from Landsat, terrain, etc. Save stack in identical geometry. """ - def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None, max_cloud_pct=None, start=None, end=None, year=None, n_landsat=None): + def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None, + max_cloud_pct=None, start=None, end=None, year=None, n_landsat=None, + climate_targets=None): self.landsat_mapping = {'LT5': Landsat5, 'LE7': Landsat7, 'LC8': Landsat8} self.landsat_mapping_abv = {5: 'LT5', 7: 'LE7', 8: 'LC8'} @@ -80,6 +82,9 @@ def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None self.cdl_tif = None self.cdl_mask = None + self.climate_targets = climate_targets + if not self.climate_targets: + self.climate_targets = ['pr', 'pet', 'rmin', 'rmax', 'tmmn', 'tmmx', 'bi', 'etr'] self.n = n_landsat @@ -93,16 +98,19 @@ def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None def build_training(self): self.get_landsat(fmask=True) self.profile = self.landsat.rasterio_geometry - self.get_precip() + self.get_climate() self.get_et() self.get_terrain() self.paths_map, self.masks = self._order_images() + def get_climate(self): + self.get_precip() + def build_evaluating(self): self.get_landsat(fmask=True) self.profile = self.landsat.rasterio_geometry - #self.get_et() - #self.get_precip() + #self.get_et() This doesn't work reliably. + self.get_climate() self.get_terrain() self.paths_map, self.masks = self._order_images() # paths map is just path-> location # in filesystem. @@ -145,18 +153,18 @@ def get_landsat(self, fmask=False): if fmask: [self._make_fmask(d) for d in self.image_dirs] - def get_precip(self): + def _get_bounds(self): poly_in = self.landsat.get_tile_geometry() poly_in = Polygon(poly_in[0]['coordinates'][0]) project = partial( pytransform, Proj(self.profile['crs']), - Proj(init='epsg:32612')) + Proj(self.profile['crs'])) + # The above is not needed. for_bounds = partial( pytransform, Proj(self.profile['crs']), Proj(init='epsg:4326')) - dates = self.scenes['DATE_ACQUIRED'].values # Change the coordinate system # The issue: the CRSs for the bounding box and for the mask are different. # In _project, the incorrect CRS was making it throw an error. @@ -166,25 +174,32 @@ def get_precip(self): poly_bounds = transform(for_bounds, poly_in) poly = Polygon(poly.exterior.coords) geometry = [mapping(poly)] - geometry[0]['crs'] = CRS({'init':'epsg:32612'}) + geometry[0]['crs'] = CRS(self.profile['crs']) bounds = poly_bounds.bounds - for date in dates: - outfile = os.path.join(self.root, 'precip_{}.tif'.format(date)) - if not os.path.isfile(outfile): - print("Get {}".format(outfile)) - d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. - bds = GeoBounds(wsen=bounds) - gm = GridMet(variable='pr', clip_feature=geometry, - bbox=bds, target_profile=self.profile, date=d) - out = gm.get_data_subset() - gm.save_raster(out, self.landsat.rasterio_geometry, outfile) + return bounds, geometry + + + def get_precip(self): + bounds, geometry = self._get_bounds() + dates = self.scenes['DATE_ACQUIRED'].values + for target in self.climate_targets: + for date in dates: + outfile = os.path.join(self.root, '{}_{}.tif'.format(date, target)) + if not os.path.isfile(outfile): + print("Get {}".format(os.path.basename(outfile))) + d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. + bds = GeoBounds(wsen=bounds) + gm = GridMet(variable=target, clip_feature=geometry, + bbox=bds, target_profile=self.profile, date=d) + out = gm.get_data_subset() + gm.save_raster(out, self.landsat.rasterio_geometry, outfile) def get_terrain(self): """ Get digital elevation maps from amazon web services save in the project root directory with filenames enumerated - in the next three lines. + in the next three lines (slope, aspect, elevation_diff). """ diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index b0acf41..be0198f 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -9,8 +9,8 @@ from collections import defaultdict, OrderedDict from shapely.geometry import shape from data_utils import (download_images, get_shapefile_path_row, split_shapefile, - create_master_raster, filter_shapefile, bandwise_mean) -from runspec import landsat_rasters, static_rasters + create_master_raster, filter_shapefile, bandwise_mean, bandwise_stddev) +from runspec import landsat_rasters, static_rasters, climate_rasters def download_images_over_shapefile(shapefile, image_directory, year): @@ -80,6 +80,8 @@ def all_rasters(image_directory, satellite=8): band_map[band] = [] for band in static_rasters(): band_map[band] = [] + for band in climate_rasters(): + band_map[band] = [] extensions = (".tif", ".TIF") for dirpath, dirnames, filenames in os.walk(image_directory): @@ -94,13 +96,16 @@ def all_rasters(image_directory, satellite=8): return band_map + def raster_means(image_directory, satellite=8): """ Gets all means of all images stored in image_directory and its subdirectories. Images end with (.tif, .TIF) Image_directory in a typical case would - be image_data/train/ - Does this even need to be calculated?""" + be project_root/image_data/train/. + This returns band_map, which is a dict of lists with + keys band names (B1, B2...) and values lists of + the locations of the rasters in the filesystem.""" outfile = os.path.join(image_directory, "mean_mapping.pkl") if os.path.isfile(outfile): @@ -115,15 +120,41 @@ def raster_means(image_directory, satellite=8): mean, bnd = bandwise_mean(band_map[band], band) mean_mapping[band] = mean - pprint(mean_mapping) - with open(outfile, 'wb') as f: pickle.dump(mean_mapping, f) return mean_mapping -def create_all_master_rasters(image_directory, raster_save_directory, mean_mapping): +def raster_stds(image_directory, mean_map, satellite=8): + + outfile = os.path.join(image_directory, "stddev_mapping.pkl") + if os.path.isfile(outfile): + with open(outfile, 'rb') as f: + stddev_mapping = pickle.load(f) + return stddev_mapping + + band_map = all_rasters(image_directory, satellite) # get all rasters + # in the image directory + stddev_mapping = {} + + for band in band_map.keys(): + std, bnd = bandwise_stddev(band_map[band], band, mean_map[band]) + stddev_mapping[band] = std + + with open(outfile, 'wb') as f: + pickle.dump(stddev_mapping, f) + + pprint('STDMAP') + pprint(stddev_mapping) + print("-------") + pprint('MEANMAP') + pprint(mean_map) + + return stddev_mapping + + +def create_all_master_rasters(image_directory, raster_save_directory, mean_mapping, stddev_mapping): """ Creates a master raster for all images in image_directory. Image directory is assumed to be a top-level directory that contains all the path_row directories for test or train (image_data/test/path_row_year*/) @@ -136,9 +167,8 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi path = sub_dir[:2] row = sub_dir[3:5] year = sub_dir[-4:] - from pprint import pprint - print(paths_map) - create_master_raster(paths_map, path, row, year, raster_save_directory, mean_mapping) + create_master_raster(paths_map, path, row, year, raster_save_directory, mean_mapping, + stddev_mapping) if __name__ == "__main__": @@ -147,21 +177,22 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi # for f in glob.glob(shp + "*.shp"): # filter_shapefile(f, out_shapefile_directory) # This project is becoming more complicated. - # Needs a test / train organization! + # Needs a test / train organization # 1. Filter shapefiles. # 2. Download images over shapefiles - # 3. Create master rasters - # 4. Extract training data - # 5. Train network. + # 3. Get all means/stddevs + # 4. Create master rasters + # 5. Extract training data + # 6. Train network. - image_train_directory = 'image_data/train' + image_train_directory = 'image_data/train/' image_test_directory = 'image_data/test' image_dirs = [image_train_directory, image_test_directory] shp_train = 'shapefile_data/train/' shp_test = 'shapefile_data/test/' shp_dirs = [shp_train, shp_test] shapefile_directory = 'shapefile_data/all_shapefiles' - master_train = 'master_rasters/train' + master_train = 'master_rasters/train/' master_test = 'master_rasters/test' master_dirs = [master_train, master_test] year = 2013 @@ -169,5 +200,6 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi for s, i in zip(shp_dirs, image_dirs): download_all_images(i, s, year) for im_dir, mas_dir in zip(image_dirs, master_dirs): - mean_map = raster_means(im_dir) - create_all_master_rasters(im_dir, mas_dir, mean_map) + mean_map = raster_means(image_train_directory) + stddev_map = raster_stds(image_train_directory, mean_map) + create_all_master_rasters(im_dir, mas_dir, mean_map, stddev_map) diff --git a/fully-conv-classification/runspec.py b/fully-conv-classification/runspec.py index e9e8f96..680cf69 100644 --- a/fully-conv-classification/runspec.py +++ b/fully-conv-classification/runspec.py @@ -36,8 +36,10 @@ def landsat_rasters(): return b -def climate_rasters(root): - return [f for f in glob(os.path.join(root, "*.tif")) if 'precip' in f] +def climate_rasters(): + a = ('pr.tif', 'pet.tif', 'tmmn.tif', 'tmmx.tif', 'bi.tif', + 'etr.tif') + return a def ancillary_rasters(): From 281702b64a13f7095ec2a97396e08aa695aa8987 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 20 Mar 2019 09:35:06 -0600 Subject: [PATCH 49/89] Added weight map to improve prediction resolution and took an aggregate of climate rasters --- .../compose_array_single_shapefile.py | 2 +- fully-conv-classification/data_generators.py | 159 ++++---- fully-conv-classification/data_utils.py | 271 ++----------- fully-conv-classification/fully_conv.py | 356 ++++++++---------- fully-conv-classification/models.py | 244 +++++++++++- fully-conv-classification/prepare_images.py | 37 +- .../runner_from_shapefile.py | 72 +++- fully-conv-classification/runspec.py | 3 +- fully-conv-classification/shapefile_utils.py | 244 ++++++++++++ 9 files changed, 845 insertions(+), 543 deletions(-) create mode 100644 fully-conv-classification/shapefile_utils.py diff --git a/fully-conv-classification/compose_array_single_shapefile.py b/fully-conv-classification/compose_array_single_shapefile.py index cdf2c4d..81664a4 100644 --- a/fully-conv-classification/compose_array_single_shapefile.py +++ b/fully-conv-classification/compose_array_single_shapefile.py @@ -33,7 +33,7 @@ from rasterio import open as rasopen from shapely.geometry import shape, Point, mapping from shapely.ops import unary_union -from data_utils import get_shapefile_path_row +from shapefile_utils import get_shapefile_path_row loc = os.path.dirname(__file__) WRS_2 = loc.replace('pixel_classification', os.path.join('spatial_data', 'wrs2_usa_descending.shp')) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 838aa68..02b9fb0 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -2,13 +2,15 @@ import os import time import pickle -import sys import matplotlib.pyplot as plt from glob import glob from random import sample, shuffle -from sklearn.utils.class_weight import compute_class_weight +from skimage.segmentation import find_boundaries +from skimage.measure import label +from scipy.ndimage.morphology import distance_transform_edt from runspec import mask_rasters -from data_utils import generate_class_mask, get_shapefile_path_row +from data_utils import load_raster +from shapefile_utils import get_shapefile_path_row, generate_class_mask from rasterio import open as rasopen from warnings import warn from skimage import transform @@ -17,6 +19,17 @@ NO_DATA = -1 NUM_CLASSES = 4 + +def weight_map(mask, w0=10, sigma=10): + mask = mask.copy().astype(bool) + mask = ~mask # make the non-masked areas masked + distances = distance_transform_edt(mask) # ask where the closest masked pixel is + # distances are always positive, so 1-distances can be very negative. + # We're setting the e-folding time with sigma, and the + # border pixel value (y-intercept) with w0. + return w0*np.exp((1-distances) / sigma) + + def random_sample(class_mask, n_instances, box_size=0, fill_value=1): if box_size: n_instances /= box_size @@ -54,12 +67,6 @@ def random_sample(class_mask, n_instances, box_size=0, fill_value=1): return class_mask -def load_raster(master_raster): - with rasopen(master_raster, 'r') as src: - arr = src.read() - meta = src.meta.copy() - return arr, meta - def assign_class_code(target_dict, shapefilename): for key in target_dict: @@ -109,7 +116,7 @@ def set_class_mask(self, class_mask): def get_masks(image_directory): - ''' Returns all masks in the image directory.''' + ''' Returns all raster masks in the image directory.''' paths = [] for dirpath, dirnames, filenames in os.walk(image_directory): for f in filenames: @@ -135,10 +142,13 @@ def get_masks(image_directory): out[mask[0] == 1] = 1 return out + def extract_training_data_unet(target_dict, shapefile_directory, image_directory, master_raster_directory, training_directory, count, save=True, chunk_size=572): ''' target_dict: {filename or string in filename : class_code} ''' - CHUNK_SIZE = 572 # input to unet. + unet_output_size = 388 # input to unet. + unet_input_size = 572 # input to unet. + # this means the class mask must be 388x388 - chop 184 from height/width. done = set() pixel_dict = {} # counts number of pixels present in each class. @@ -174,13 +184,12 @@ def extract_training_data_unet(target_dict, shapefile_directory, image_directory else: master = np.zeros(shp) - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - if sub_master.shape[1] == CHUNK_SIZE and sub_master.shape[2] == CHUNK_SIZE: + for i in range(92, master.shape[1], unet_output_size): + for j in range(92, master.shape[2], unet_output_size): + sub_master = master[:, i-92:i+unet_output_size+92, j-92:j+unet_output_size+92] + if sub_master.shape[1] == unet_input_size and sub_master.shape[2] == unet_input_size: for msk in masks: - s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - s = s[:, 92:-92, 92:-92] # clip edges for unet. + s = msk.mask[:, i:i+unet_output_size, j:j+unet_output_size] if not np.all(s == NO_DATA): pixel_dict[msk.class_code] += len(np.where(s != NO_DATA)[0]) count += 1 @@ -189,6 +198,7 @@ def extract_training_data_unet(target_dict, shapefile_directory, image_directory dt.to_pickle(training_directory) return count, pixel_dict + def extract_training_data(target_dict, shapefile_directory, image_directory, master_raster_directory, training_directory, count, save=True, chunk_size=572): ''' target_dict: {filename or string in filename : class_code} ''' @@ -281,69 +291,87 @@ def _from_pickle(self, filename): return data -def generate_training_data(training_directory, max_pools, sample_random=True, box_size=0): +def generate_training_data(training_directory, max_pools, sample_random=True, box_size=0, + batch_size=8, class_weights={}, threshold=0.9, w0=40, sigma=10, channels='all', + train=True): ''' Assumes data is stored in training_directory in subdirectories labeled class_n_train and that n_classes is a global variable.''' class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] generators = [] + border_class = len(class_weights.keys()) for d in class_dirs: generators.append(DataGen(d)) while True: - min_samples = np.inf - data = [] - for gen in generators: - out = gen.next().copy() - data.append(out) - if sample_random: - n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) - if n_samples < min_samples: - min_samples = n_samples - - first = False - one_hot = None - for subset in data: - if sample_random: - samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, - fill_value=subset['class_code']) - else: - samp = subset['class_mask'] - samp[samp != NO_DATA] = subset['class_code'] - samp[samp == NO_DATA] = 4 - - subset['class_mask'] = samp - masters = [] masks = [] - shape = None - first = True - for subset in data: - master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) - master = master[0, :, :, :] - mask = mask[0, :, :, 0] - if first: - shape = master.shape - first = False - if master.shape == shape: # edges of images don't play very well. - if np.any(mask != NO_DATA): - masters.append(master) - masks.append(mask) - - yield np.asarray(masters, dtype=np.float32), np.asarray(masks) + weightings = [] + for _ in range(batch_size // len(class_weights.keys())): + min_samples = np.inf + data = [] + for gen in generators: + out = gen.next().copy() + data.append(out) + if sample_random: + n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + if n_samples < min_samples: + min_samples = n_samples + + first = False + one_hot = None + for subset in data: + if sample_random: + samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, + fill_value=subset['class_code']) + else: + samp = subset['class_mask'] + samp[samp != NO_DATA] = subset['class_code'] + + subset['class_mask'] = samp + # The above lines correspond to a sparse encoding. + shape = None + first = True + for subset in data: + master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) + if channels == 'all': + master = np.squeeze(master) + else: + master = master[:, :, :, channels] + master = np.squeeze(master) + mask = mask[0, :, :, 0] + mask[mask != -1] = 1 # make the mask binary. + mask[mask == -1] = 0 + weights = weight_map(mask, w0=w0, sigma=sigma) # create weight map - + labels = weights.copy() + labels[labels >= threshold] = border_class + labels[mask == 1] = subset['class_code'] + weights[weights < threshold] = 0 # threshold the weight values arbitrarily + weights[mask == 1] = class_weights[subset['class_code']] + multidim_weights = np.zeros((weights.shape[0], weights.shape[1], border_class+1)) + one_hot = np.zeros((labels.shape[0], labels.shape[1], border_class+1)) + one_hot[:, :, border_class][labels == border_class] = 1 + one_hot[:, :, subset['class_code']][labels == subset['class_code']] = 1 + for i in range(border_class + 1): + multidim_weights[:, :, i] = weights + if not train: + multidim_weights[multidim_weights != 0] = 1 + masters.append(master) + masks.append(one_hot) + weightings.append(multidim_weights) + + yield [np.asarray(masters, dtype=np.float32), np.asarray(weightings)], np.asarray(masks) def rotation(image, angle): return transform.rotate(image, angle, mode='constant', cval=NO_DATA) + def h_flip(image): return image[:, ::-1] + def augment_data(image, class_mask): '''Randomly augments an image.''' - # if np.random.randint(2): - # deg = np.random.uniform(-25, 25) - # image = rotation(image, deg) - # class_mask = rotation(class_mask, deg) if np.random.randint(2): image = h_flip(image) class_mask = h_flip(class_mask) @@ -355,9 +383,8 @@ def augment_data(image, class_mask): def preprocess_data(master, mask, max_pools, return_cuts=False): ''' This function preprocesses data in such a way - that downscaling it by 2 max_pools times will result - in an input that plays nicely with the FCNN expecting it. - Master, mask in this example are tiles from the original image.''' + so it will work with a FCNN with an arbitrary number of max pools. + Master, mask in this function are tiles from the original image.''' shp = master.shape rows = shp[1]; cols = shp[2] if max_pools != 0: @@ -398,7 +425,7 @@ def preprocess_data(master, mask, max_pools, return_cuts=False): if __name__ == '__main__': - shapefile_directory = 'shapefile_data/' + master_train = 'master_rasters/train/' master_test = 'master_rasters/test/' image_train = 'image_data/train/' # for fmasks. @@ -409,8 +436,8 @@ def preprocess_data(master, mask, max_pools, return_cuts=False): forest = 'Forrest' other = 'other' target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} - train_dir = 'training_data/multiclass/train_3927/' - shp_train = 'shapefile_data/train_3927/' + train_dir = 'training_data/multiclass/train/' + shp_train = 'shapefile_data/train/' count = 0 save = True count, pixel_dict = extract_training_data_unet(target_dict, shp_train, image_train, diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 1a01424..669cb11 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -12,31 +12,9 @@ from rasterio import float32, open as rasopen from rasterio.mask import mask from prepare_images import ImageStack -from sklearn.neighbors import KDTree from sat_image.warped_vrt import warp_single_image -def get_features(gdf): - tmp = json.loads(gdf.to_json()) - features = [feature['geometry'] for feature in tmp['features']] - return features - - -def generate_class_mask(shapefile, master_raster, no_data=-1): - ''' Generates a mask with class_val everywhere - shapefile data is present and a no_data value everywhere else. - no_data is -1 in this case, as it is never a valid class label. - Switching coordinate reference systems is important here, or - else the masking won't work. - ''' - shp = gpd.read_file(shapefile) - with rasopen(master_raster, 'r') as src: - shp = shp.to_crs(src.crs) - features = get_features(shp) - out_image, out_transform = mask(src, shapes=features, nodata=no_data) - return out_image - - def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, stddev_map): """ Creates a master raster with depth given by the organization of the paths_map. Paths map is a dictionary of lists, with keys the band names @@ -74,7 +52,7 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, j = 0 for feat in sorted(paths_map.keys()): # ensures the stack is in the same order each time. # Ordering within bands is assured by sorting the list that - # each band corresponding to, as that's essentially sorting by date. + # each band corresponding to, as that's sorting by date. feature_rasters = paths_map[feat] # maps bands to their location in filesystem. for feature_raster in feature_rasters: band_mean = None @@ -91,8 +69,7 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, arr = src.read() raster_geo = src.meta.copy() - #arr = (arr - band_mean) / band_std - arr = (arr - arr.mean()) / std(arr) + arr = (arr - band_mean) / band_std if first: first_geo = raster_geo.copy() @@ -129,16 +106,6 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, return pth -def get_shapefile_lat_lon(shapefile): - ''' Center of shapefile''' - with fopen(shapefile, "r") as src: - minx, miny, maxx, maxy = src.bounds - latc = (maxy + miny) / 2 - lonc = (maxx + minx) / 2 - - return latc, lonc - - def normalize_and_save_image(fname): norm = True with rasopen(fname, 'r') as rsrc: @@ -219,201 +186,45 @@ def download_images(project_directory, path, row, year, satellite=8, n_landsat=3 # a cloud mask. return image_stack -def construct_kdtree(wrs2): - centroids = [] - path_rows = [] # a mapping - features = [] - for feature in wrs2: - tile = shape(feature['geometry']) - centroid = tile.centroid.coords[0] - centroids.append([centroid[0], centroid[1]]) - z = feature['properties'] - p = z['PATH'] - r = z['ROW'] - path_rows.append(str(p) + "_" + str(r)) - features.append(feature) - - tree = KDTree(asarray(centroids)) - return tree, asarray(path_rows), asarray(features) - -def get_pr(poly, wrs2): - ls = [] - for feature in wrs2: - tile = shape(feature['geometry']) - if poly.within(tile): - z = feature['properties'] - p = z['PATH'] - r = z['ROW'] - ls.append(str(p) + "_" + str(r)) - return ls - -def get_pr_subset(poly, tiles): - ''' Use when you only want to iterate - over a subset of wrs2 tiles.''' - ls = [] - for feature in tiles: - tile = shape(feature['geometry']) - if poly.within(tile): - z = feature['properties'] - p = z['PATH'] - r = z['ROW'] - ls.append(str(p) + "_" + str(r)) - return ls - -def filter_shapefile(shapefile, out_directory): - """ Shapefiles may span multiple path/rows. - For training, we want all of the data available. - This function filters the polygons contained in - the shapefile into separate files for each path/row - contained in the shapefile. """ - path_row_map = defaultdict(list) - wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') - tree, path_rows, features = construct_kdtree(wrs2) - wrs2.close() - - cent_arr = array([0, 0]) - with fopen(shapefile, "r") as src: - meta = deepcopy(src.meta) - for feat in src: - poly = shape(feat['geometry']) - centroid = poly.centroid.coords[0] - cent_arr[0] = centroid[0] - cent_arr[1] = centroid[1] - centroid = cent_arr.reshape(1, -1) - dist, ind = tree.query(centroid, k=10) - tiles = features[ind[0]] - prs = get_pr_subset(poly, tiles) - for p in prs: - path_row_map[p].append(feat) - - outfile = os.path.basename(shapefile) - outfile = os.path.splitext(outfile)[0] - - for path_row in path_row_map: - out = outfile + path_row + ".shp" - with fopen(os.path.join(out_directory, out), 'w', **meta) as dst: - print("Saving {}".format(out)) - for feat in path_row_map[path_row]: - dst.write(feat) - - -def split_shapefile(base, base_shapefile, data_directory): - """ - Shapefiles may deal with data over multiple path/rows. - This is a method to get the minimum number of - path/rows required to cover all features. - Data directory: where the split shapefiles will be saved. - base: directory containing base_shapefile.""" - path_row = defaultdict(list) - id_mapping = {} - # TODO: un hardcode this directory. - wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') - tree, path_rows, features = construct_kdtree(wrs2) - wrs2.close() - - cent_arr = array([0, 0]) - with fopen(os.path.join(base, base_shapefile), "r") as src: - meta = deepcopy(src.meta) - for feat in src: - idd = feat['id'] - id_mapping[idd] = feat - poly = shape(feat['geometry']) - centroid = poly.centroid.coords[0] - cent_arr[0] = centroid[0] - cent_arr[1] = centroid[1] - centroid = cent_arr.reshape(1, -1) - dist, ind = tree.query(centroid, k=10) - tiles = features[ind[0]] - prs = get_pr_subset(poly, tiles) - for p in prs: - path_row[p].append(idd) - - non_unique_ids = defaultdict(list) - unique = defaultdict(list) - for key in path_row: - ls = path_row[key] # all features in a given path/row - placeholder = ls.copy() - for key1 in path_row: - if key != key1: - ls1 = path_row[key1] - # find unique keys in ls - placeholder = set(placeholder) - set(ls1) #all - # features present in placeholder that are not - # present in ls1; i.e. unique keys - unique[key] = list(placeholder) - if len(ls) != len(placeholder): - nu = set(ls) - set(placeholder) # all features present in ls that are not present in placeholder (non-unique) - for idd in list(nu): - non_unique_ids[idd].append(key) - - match_key = [] - for key in non_unique_ids: # unique ids - pr = None - hi = 0 - for pathrow in non_unique_ids[key]: # path/rows corresponding to non - # unique features - if len(unique[pathrow]) > hi: - pr = pathrow - hi = len(unique[pathrow]) - - if pr is not None: - unique[pr].append(key) - else: - choice = non_unique_ids[key] - choice.sort() - choice = choice[0] - unique[choice].append(key) - - prefix = os.path.splitext(base_shapefile)[0] - for key in unique: - if key is None: - continue - out = prefix + "_" + key + ".shp" - if len(unique[key]): - with fopen(os.path.join(data_directory, out), 'w', **meta) as dst: - print("Saving split shapefile to: {}".format(os.path.join(data_directory, out))) - for feat in unique[key]: - dst.write(id_mapping[feat]) - - -def get_shapefile_path_row(shapefile): - """This function assumes that the original - shapefile has already been split, and relies on - the naming convention to get the path and row. """ - # strip extension - # TODO: Find some way to update shapefile metadata - shp = shapefile[-9:-4].split("_") - return int(shp[0]), int(shp[1]) - - -def shapefile_area(shapefile): - summ = 0 - with fopen(shapefile, "r") as src: - for feat in src: - poly = shape(feat['geometry']) - summ += poly.area - return summ - - -def get_total_area(data_directory, filenames): - ''' Gets the total area of the polygons - in the files in filenames - TODO: Get an equal-area projection''' - - tot = 0 - for f in glob.glob(data_directory + "*.shp"): - if "sample" not in f: - for f2 in filenames: - if f2 in f: - tot += shapefile_area(f) - return tot - - -def required_points(shapefile, total_area, total_instances): - area = shapefile_area(shapefile) - frac = area / total_area - return int(total_instances * frac) - + +def clip_rasters(evaluated_tif_dir, include_string): + for f in glob(os.path.join(evaluated_tif_dir, "*.tif")): + if include_string in f: + out = os.path.basename(f) + out = out[out.find("_")+1:] + out = out[out.find("_")+1:] + out = out[out.find("_")+1:] + path = out[:2] + row = out[3:5] + clip_raster(f, int(path), int(row), outfile=f) + + +def clip_raster(evaluated, path, row, outfile=None): + + shp = gpd.read_file(WRS2) + + with rasopen(evaluated, 'r') as src: + shp = shp.to_crs(src.crs) + meta = src.meta.copy() + features = get_features(shp, path, row) + out_image, out_transform = mask(src, shapes=features, nodata=np.nan) + + if outfile: + save_raster(out_image, outfile, meta) + + +def save_raster(arr, outfile, meta, count=4): + meta.update(count=count+1) + with rasopen(outfile, 'w', **meta) as dst: + dst.write(arr) + + +def load_raster(master_raster): + with rasopen(master_raster, 'r') as src: + arr = src.read() + meta = src.meta.copy() + return arr, meta + if __name__ == "__main__": pass diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 9892083..8e203ad 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -3,12 +3,10 @@ import time import keras.backend as K import tensorflow as tf -tf.enable_eager_execution() +#tf.enable_eager_execution() import matplotlib.pyplot as plt import numpy as np -import json -import geopandas as gpd -import sys +from sys import stdout from glob import glob from skimage import transform, util from sklearn.metrics import confusion_matrix @@ -16,14 +14,16 @@ from rasterio import open as rasopen from rasterio.mask import mask from shapely.geometry import shape +from pickle import load as pload from fiona import open as fopen from data_generators import generate_training_data, load_raster, preprocess_data -from data_utils import generate_class_mask -from models import fcnn_functional, fcnn_model, fcnn_functional_small, unet +from data_utils import clip_rasters, save_raster +from shapefile_utils import get_features, generate_class_mask +from models import (fcnn_functional, fcnn_model, fcnn_functional_small, unet, unet_weighted, + weighted_unet_no_transpose_conv) NO_DATA = -1 CHUNK_SIZE = 572 # some value that is divisible by 2^MAX_POOLS. -NUM_CLASSES = 5 WRS2 = '../spatial_data/wrs2_descending_usa.shp' def custom_objective(y_true, y_pred): @@ -37,34 +37,16 @@ def custom_objective(y_true, y_pred): out = tf.boolean_mask(losses, mask) return out -def weighted_loss(weight_map): + +def weighted_loss(target, output): # All I need to do is multiply the output loss # by the weights that I input. # Loss is of shape n_classesxwidthxheight - # what does weight map have to be in this case? - # - def loss(y_true, y_pred): - losses = 0 - pass - pass - - -def custom_objective_v2(y_true, y_pred): - '''I want to mask all values that - are not data, given a y_true - that has NODATA values. The boolean mask - operation is failing. It should output - a Tensor of shape (M, N_CLASSES), but instead outputs a (M, ) - tensor.''' - y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) - y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) - masked = tf.not_equal(y_true, NO_DATA) - indices = tf.where(masked) - indices = tf.to_int32(indices) - indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) - y_true_masked = tf.gather_nd(params=y_true, indices=indices) - y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) - return tf.keras.losses.categorical_crossentropy(y_true_masked, y_pred_masked) + # Weight map: + # Raster of shape widthxheightx1, with weights + # of zero where there is no data and weights of whatever the + # correct weights are for all the other classes. + return -tf.reduce_sum(target*output, len(output.get_shape())-1) def masked_acc(y_true, y_pred): @@ -81,18 +63,18 @@ def masked_acc(y_true, y_pred): def m_acc(y_true, y_pred): ''' Calculate accuracy from masked data. The built-in accuracy metric uses all data (masked & unmasked).''' - y_true = tf.reshape(y_true, (K.shape(y_true)[1]*K.shape(y_true)[2], NUM_CLASSES)) - y_pred = tf.reshape(y_pred, (K.shape(y_pred)[1]*K.shape(y_pred)[2], NUM_CLASSES)) - masked = tf.not_equal(y_true, NO_DATA) - indices = tf.where(masked) - indices = tf.to_int32(indices) - indices = tf.slice(indices, [0, 0], [K.shape(indices)[0], 1]) - y_true_masked = tf.gather_nd(params=y_true, indices=indices) - y_pred_masked = tf.gather_nd(params=y_pred, indices=indices) - return K.cast(K.equal(K.argmax(y_true_masked, axis=-1), K.argmax(y_pred_masked, axis=-1)), K.floatx()) + mask = tf.not_equal(K.mean(y_pred, axis=len(y_pred.get_shape())-1), 0) + y_true = tf.math.argmax(y_true, axis=len(y_true.get_shape())-1) + y_true_msk = tf.boolean_mask(y_true, mask) + y_pred_exp = tf.math.exp(y_pred) + y_pred_arg = tf.math.argmax(y_pred_exp, axis=len(y_pred_exp.get_shape())-1) + # so undo that operation with softmax (could also use tf.exp). + y_pred_msk = tf.boolean_mask(y_pred_arg, mask) # mask the 0 values + return K.mean(tf.math.equal(y_true_msk, y_pred_msk)) # get the accuracy -def evaluate_image_unet(master_raster, model, max_pools, outfile=None, ii=None): +def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_classes=4, + outfile=None, ii=None): if not os.path.isfile(master_raster): print("Master raster not created for {}".format(suffix)) @@ -100,8 +82,9 @@ def evaluate_image_unet(master_raster, model, max_pools, outfile=None, ii=None): else: master, meta = load_raster(master_raster) class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder - out = np.zeros((master.shape[2], master.shape[1], NUM_CLASSES)) + out = np.zeros((master.shape[2], master.shape[1], num_classes+1)) + # All U-Net specific. CHUNK_SIZE = 572 diff = 92 stride = 388 @@ -112,8 +95,11 @@ def evaluate_image_unet(master_raster, model, max_pools, outfile=None, ii=None): sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, max_pools, return_cuts=True) + if channels != 'all': + sub_master = sub_master[:, :, :, channels] + sub_msk = np.ones((1, 388, 388, 5)) # a placeholder if sub_master.shape[1] == 572 and sub_master.shape[2] == 572: - preds = model.predict(sub_master) + preds = model.predict([sub_master, sub_msk]) preds_exp = np.exp(preds) preds_softmaxed = preds_exp / np.sum(preds_exp, axis=3, keepdims=True) if np.any(np.isnan(preds)): @@ -121,7 +107,6 @@ def evaluate_image_unet(master_raster, model, max_pools, outfile=None, ii=None): preds = preds_softmaxed[0, :, :, :] else: continue - if cut_cols == 0 and cut_rows == 0: out[j+diff:j+CHUNK_SIZE-diff, i+diff:i+CHUNK_SIZE-diff, :] = preds elif cut_cols == 0 and cut_rows != 0: @@ -137,57 +122,7 @@ def evaluate_image_unet(master_raster, model, max_pools, outfile=None, ii=None): else: print("whatcha got goin on here?") - sys.stdout.write("N eval: {}. Percent done: {:.4f}\r".format(ii, i / master.shape[1])) - - out = np.swapaxes(out, 0, 2) - out = out.astype(np.float32) - if outfile: - save_raster(out, outfile, meta) - return out -def evaluate_image(master_raster, model, max_pools, outfile=None, ii=None): - - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive handling of this case. - else: - master, meta = load_raster(master_raster) - class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder - out = np.zeros((master.shape[2], master.shape[1], NUM_CLASSES)) - - CHUNK_SIZE = 572 - - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, - max_pools, return_cuts=True) - if sub_master.shape[1] == 572 and sub_master.shape[2] == 572: - preds = model.predict(sub_master) - preds_exp = np.exp(preds) - preds_softmaxed = preds_exp / np.sum(preds_exp, axis=3, keepdims=True) - if np.any(np.isnan(preds)): - print("Nan prediction.") - preds = preds_softmaxed[0, :, :, :] - else: - continue - oss = 92 - if cut_cols == 0 and cut_rows == 0: - out[j+oss:j+CHUNK_SIZE-oss, i+oss:i+CHUNK_SIZE-oss, :] = preds - elif cut_cols == 0 and cut_rows != 0: - ofs = master.shape[1]-cut_rows - out[j+oss:j+CHUNK_SIZE-oss, i+oss:ofs-oss, :] = preds - elif cut_cols != 0 and cut_rows == 0: - ofs = master.shape[2]-cut_cols - out[j+oss:ofs-oss, i+oss:i+CHUNK_SIZE-oss, :] = preds - elif cut_cols != 0 and cut_rows != 0: - ofs_col = master.shape[2]-cut_cols - ofs_row = master.shape[1]-cut_rows - out[j+oss:ofs_col-oss, i+oss:ofs_row-oss, :] = preds - else: - print("whatcha got goin on here?") - - sys.stdout.write("N eval: {}. Percent done: {:.4f}\r".format(ii, i / master.shape[1])) + stdout.write("N eval: {}. Percent done: {:.4f}\r".format(ii, i / master.shape[1])) out = np.swapaxes(out, 0, 2) out = out.astype(np.float32) @@ -195,45 +130,9 @@ def evaluate_image(master_raster, model, max_pools, outfile=None, ii=None): save_raster(out, outfile, meta) return out -def save_raster(arr, outfile, meta, count=NUM_CLASSES): - meta.update(count=count) - with rasopen(outfile, 'w', **meta) as dst: - dst.write(arr) - - -def get_features(gdf, path, row): - tmp = json.loads(gdf.to_json()) - features = [] - for feature in tmp['features']: - if feature['properties']['PATH'] == path and feature['properties']['ROW'] == row: - features.append(feature['geometry']) - return features -def clip_raster(evaluated, path, row, outfile=None): - - shp = gpd.read_file(WRS2) - - with rasopen(evaluated, 'r') as src: - shp = shp.to_crs(src.crs) - meta = src.meta.copy() - features = get_features(shp, path, row) - out_image, out_transform = mask(src, shapes=features, nodata=np.nan) - - if outfile: - save_raster(out_image, outfile, meta) - -def clip_rasters(evaluated_tif_dir, include_string): - for f in glob(os.path.join(evaluated_tif_dir, "*.tif")): - if include_string in f: - out = os.path.basename(f) - out = out[out.find("_")+1:] - out = out[out.find("_")+1:] - out = out[out.find("_")+1:] - path = out[:2] - row = out[3:5] - clip_raster(f, int(path), int(row), outfile=f) - -def evaluate_images(image_directory, model, include_string, max_pools, exclude_string, prefix, save_dir): +def evaluate_images(image_directory, model, include_string, max_pools, exclude_string, prefix, + save_dir, channels): ii = 0 for f in glob(os.path.join(image_directory, "*.tif")): if exclude_string not in f and include_string in f: @@ -245,7 +144,9 @@ def evaluate_images(image_directory, model, include_string, max_pools, exclude_s out = prefix + out + ".tif" out = os.path.join(save_dir, out) ii += 1 - evaluate_image_unet(f, model, max_pools=max_pools, outfile=out, ii=ii) + evaluate_image_unet(f, model, max_pools=max_pools, channels=channels, + outfile=out, ii=ii) + def compute_iou(y_pred, y_true): ''' This is slow. ''' @@ -261,6 +162,7 @@ def compute_iou(y_pred, y_true): IoU = intersection / union.astype(np.float32) return np.mean(IoU) + def get_iou(): shpfiles = [ 'shapefile_data/test/MT_Huntley_Main_2013_372837_28.shp', @@ -287,45 +189,43 @@ def get_iou(): y_pred = np.argmax(y_pred, axis=0) print(f, compute_iou(y_pred, y_true)) + def train_model(training_directory, model, steps_per_epoch, valid_steps, max_pools, box_size=0, - epochs=3, random_sample=False, restore=False, learning_rate=1e-3): + epochs=3, random_sample=False, threshold=0.9, w0=50, sigma=10, channels='all', + restore=False, learning_rate=1e-3, num_classes=4): ''' This function assumes that train/test data are subdirectories of training_directory, with the names train/test.''' - if not restore: - model = model(NUM_CLASSES) - if NUM_CLASSES <= 2: - model.compile(loss=custom_objective_binary, - metrics=[m_acc], - optimizer='adam') + if channels == 'all': + channel_depth = 51 else: - # model.compile( - # loss=custom_objective, - # optimizer='adam', - # metrics=[masked_acc] - # ) - model.compile( - loss=custom_objective, - optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), - metrics=[masked_acc] - ) + channel_depth = channels.shape[0] + shp = (572, 572, channel_depth) + model = model(shp, num_classes+1) # + 1 for border class + model.compile( + loss=weighted_loss, + optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), + metrics=[m_acc] + ) graph_path = os.path.join('graphs/', str(int(time.time()))) os.mkdir(graph_path) - tb = TensorBoard(log_dir=graph_path, write_images=True, batch_size=4) + tb = TensorBoard(log_dir=graph_path) train = os.path.join(training_directory, 'train') test = os.path.join(training_directory, 'test') + class_weight = {0:25.923, 1:1.0, 2:2.79, 3:61.128} train_generator = generate_training_data(train, max_pools, sample_random=random_sample, - box_size=box_size) + box_size=box_size, batch_size=4, w0=w0, sigma=sigma, + class_weights=class_weight, channels=channels) test_generator = generate_training_data(test, max_pools, sample_random=random_sample, - box_size=box_size) + train=False, box_size=box_size, batch_size=4, + class_weights=class_weight, channels=channels) model.fit_generator(train_generator, + steps_per_epoch=steps_per_epoch, validation_data=test_generator, validation_steps=valid_steps, - steps_per_epoch=steps_per_epoch, epochs=epochs, - callbacks=[tb, tf.keras.callbacks.TerminateOnNaN()], verbose=1, - class_weight=[25.923, 1.0, 2.79, 61.128, .75], + callbacks=[tb, tf.keras.callbacks.TerminateOnNaN()], use_multiprocessing=True) return model, graph_path @@ -340,62 +240,114 @@ def save_model_info(outfile, args): print("wrote run info to {}".format(outfile)) +def gradient_wrt_inputs(model, data): + layer_output = model.output + loss = -tf.reduce_mean(layer_output) + grads = K.gradients(loss, model.input[0])[0] + sess = tf.Session() + sess.run(tf.global_variables_initializer()) + weights = np.ones((1, 388, 388, 5)) + results = sess.run(grads, feed_dict={model.input[0]:data, model.input[1]:weights}) + return results + + if __name__ == '__main__': + band_dict = {'B1.TIF':np.arange(0, 2+1), + 'B10.TIF':np.arange(3, 5+1), + 'B11.TIF':np.arange(6, 8+1), + 'B2.TIF':np.arange(9, 11+1), + 'B3.TIF':np.arange(12, 14+1), + 'B4.TIF':np.arange(15, 17+1), + 'B5.TIF':np.arange(18, 20+1), + 'B6.TIF':np.arange(21, 23+1), + 'B7.TIF':np.arange(24, 26+1), + 'B8.TIF':np.arange(27, 29+1), + 'B9.TIF':np.arange(30, 32+1), + 'aspect.tif':np.array([33]), + 'elevation_diff.tif':np.array([34]), + 'etr.tif':np.arange(35, 37+1), + 'pet.tif':np.arange(38, 40+1), + 'pr.tif':np.arange(41, 43+1), + 'slope.tif':np.array([44]), + 'tmmn.tif':np.arange(45, 47+1), + 'tmmx.tif':np.arange(48, 50+1)} + training_directory = 'training_data/multiclass/' info_file = 'run_information.txt' - max_pools = 0 - model_name = 'unet_{}.h5'.format(int(time.time())) - #model_name = 'unet_random_sample100.h5' + model_name = 'unet_border_weights{}.h5'.format(int(time.time())) + model_name ='unet_gradientwrtinputs.h5' model_dir = 'models/' info_path = os.path.join(model_dir, info_file) model_save_path = os.path.join(model_dir, model_name) - model_func = unet + model_func = weighted_unet_no_transpose_conv - steps_per_epoch = 157 #628 - valid_steps = 1 #233 + steps_per_epoch = 334 # 334 the number in the max class. + valid_steps = 4 # 233 epochs = 1 + w0 = 0 + sigma = 2 + threshold = 0.9 train_more = False - eager = True - class_weights = True - learning_rate = 1e-4 + eager = False + class_weights = False + learning_rate = 1e-3 random_sample = False augment = False - - raster_name = '5class' - pr_to_eval = '39_27' - image_directory = 'master_rasters/train/' - - param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, - 'raster_name':raster_name, 'learning_rate':learning_rate, 'eager':eager, - 'class_weights':class_weights, 'augmented':augment, 'random_sample':random_sample, 'graph_path':None} - - evaluating = True - if not os.path.isfile(model_save_path): - model, graph_path = train_model(training_directory, model_func, - steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, - max_pools=max_pools, epochs=epochs, - random_sample=random_sample, learning_rate=learning_rate) - evaluating = False - model.save(model_save_path) - else: - model = tf.keras.models.load_model(model_save_path, - custom_objects={'custom_objective':custom_objective}) - if train_more: - model, graph_path = train_model(training_directory, model, steps_per_epoch=steps_per_epoch, - valid_steps=valid_steps, random_sample=random_sample, - max_pools=max_pools, epochs=epochs, restore=True) - model_name = 'unet_random_sample100.h5' - model.save(os.path.join(model_dir, model_name)) - - if not evaluating or train_more: - param_dict['graph_path'] = graph_path - save_model_info(info_path, param_dict) - - evaluate_images(image_directory, model, include_string=pr_to_eval, - exclude_string="class", max_pools=max_pools, prefix=raster_name, - save_dir='compare_model_outputs/blurry/') - #clip_rasters('compare_model_outputs/blurry/', pr_to_eval) + exclude = ['etr.tif', 'pet.tif', 'slope.tif', 'tmmn.tif', 'tmmx.tif', 'pr.tif'] + for exx in exclude: + model_name = 'exclude_{}.h5'.format(exx[:-4]) + model_save_path = os.path.join(model_dir, model_name) + channels = [band_dict[x] for x in band_dict if exx not in x] + channels = np.hstack(channels) + print(model_name) + + raster_name = 'exclude_{}'.format(exx[:-4]) + pr_to_eval = '37_28' + image_directory = 'master_rasters/test/' + + param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, + 'raster_name':raster_name, 'learning_rate':learning_rate, 'eager':eager, + 'class_weights':class_weights, 'augmented':augment, 'random_sample':random_sample, + 'graph_path':None, 'bands':channels, 'w0':w0, 'sigma':sigma} + + evaluating = True + if not os.path.isfile(model_save_path): + print("Training new model") + model, graph_path = train_model(training_directory, model_func, + steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, + max_pools=max_pools, epochs=epochs, random_sample=random_sample, + learning_rate=learning_rate, channels=channels, w0=w0, sigma=sigma, + threshold=threshold) + evaluating = False + model.save(model_save_path) + else: + model = tf.keras.models.load_model(model_save_path, + custom_objects={'weighted_loss':weighted_loss}) + model.compile( + loss=weighted_loss, + optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), + ) + if train_more: + model, graph_path = train_model(training_directory, model, steps_per_epoch=steps_per_epoch, + valid_steps=valid_steps, channel_depth=channel_depth, random_sample=random_sample, + max_pools=max_pools, epochs=epochs, restore=True) + model_name = 'unet_random_sample100.h5' + model.save(os.path.join(model_dir, model_name)) + + if not evaluating or train_more: + param_dict['graph_path'] = graph_path + save_model_info(info_path, param_dict) + # s = '1553014193.4813933' + # f = 'training_data/multiclass/train/class_2_data/{}.pkl'.format(s) + # with open(f, 'rb') as f: + # data = pload(f) + # data = np.expand_dims(data['data'], axis=0) + # data = np.swapaxes(data, 1, 3) + # gradient_wrt_inputs(model, data) + evaluate_images(image_directory, model, include_string=pr_to_eval, + exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name, save_dir='compare_model_outputs/blurry/') + #clip_rasters('compare_model_outputs/blurry/', pr_to_eval) diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 20ef49e..20c3380 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -3,8 +3,7 @@ import keras.backend as K import tensorflow as tf from tensorflow.keras.models import Model -from tensorflow.keras.layers import (Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, - Dropout, UpSampling2D, BatchNormalization, Cropping2D) +from tensorflow.keras.layers import (multiply, Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, Dropout, UpSampling2D, BatchNormalization, Cropping2D, Lambda) def fcnn_model(n_classes): model = tf.keras.Sequential() @@ -65,10 +64,237 @@ def fcnn_functional_small(n_classes): #model.summary() return model +_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) + +def weighted_unet_no_transpose_conv(input_shape, n_classes): + ''' This model does not use any Conv2DTranspose layers. + Instead a Upsampling2D layer with a Conv layer after + with same padding. ''' + inp1 = Input(input_shape) + weighted_input = Input(shape=(388, 388, 5)) + base = 2 + exp = 6 + + # 64 filters + c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(inp1) + c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c1) + mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) + + exp += 1 + # 128 filters + c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp1) + c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c3) + mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) + + + exp += 1 + # 256 filters + c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp2) + c6 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c5) + mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c6) + + exp += 1 + # 512 filters + c7 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp3) + c8 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c7) + mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c8) + + exp += 1 + # 1024 filters + c9 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp4) + c10 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c9) + + exp -= 1 + # 512 filters, making 1024 when concatenated with + # the corresponding layer from the contracting path. + # u1 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + # activation='relu')(c10) + u1 = UpSampling2D(size=(2, 2))(c10) + u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) + + c8_cropped = Cropping2D(cropping=4)(c8) + concat_u1_c8 = Concatenate()([u1, c8_cropped]) + + # 512 filters + c11 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u1_c8) + + exp -= 1 + # 256 filters, making 512 when concatenated with the + # corresponding layer from the contracting path. + c12 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c11) + + # u2 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + # activation='relu')(c12) + u2 = UpSampling2D(size=(2, 2))(c12) + u2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u2) + + c6_cropped = Cropping2D(cropping=16)(c6) + concat_u2_c6 = Concatenate()([u2, c6_cropped]) + + # 256 filters + c13 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u2_c6) + bn1 = BatchNormalization(axis=3)(c13) + + exp -= 1 + # 128 filters, making 256 when concatenated with the + # corresponding layer from the contracting path. + c14 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn1) + + # u3 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + # activation='relu')(c14) + u3 = UpSampling2D(size=(2, 2))(c14) + u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3) + + c4_cropped = Cropping2D(cropping=40)(c4) + concat_u3_c4 = Concatenate()([u3, c4_cropped]) + + # 128 filters + c15 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u3_c4) + bn2 = BatchNormalization(axis=3)(c15) + + exp -= 1 + # 64 filters, making 128 when concatenated with the + # corresponding layer from the contracting path. + c16 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn2) + + #u4 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + # activation='relu')(c16) + u4 = UpSampling2D(size=(2, 2))(c16) + u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4) + + c2_cropped = Cropping2D(cropping=88)(c2) + concat_u4_c2 = Concatenate()([u4, c2_cropped]) + + c17 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u4_c2) + bn3 = BatchNormalization(axis=3)(c17) + + c18 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(bn3) + + last_conv = Conv2D(filters=n_classes, kernel_size=1, activation='softmax', padding='valid')(c18) + + last = Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(last_conv) + last = Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(last) + last = Lambda(lambda x: K.log(x))(last) + weighted_sum = multiply([last, weighted_input]) + return Model(inputs=[inp1, weighted_input], outputs=[weighted_sum]) + + +def unet_weighted(input_shape, n_classes): + inp1 = Input(input_shape) + weighted_input = Input(shape=(388, 388, 5)) + base = 2 + exp = 6 + + # 64 filters + c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(inp1) + c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c1) + mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) + + exp += 1 + # 128 filters + c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp1) + c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c3) + mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) + + + exp += 1 + # 256 filters + c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp2) + c6 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c5) + mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c6) + + exp += 1 + # 512 filters + c7 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp3) + c8 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c7) + mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c8) + + exp += 1 + # 1024 filters + c9 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp4) + c10 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c9) + + exp -= 1 + # 512 filters, making 1024 when concatenated with + # the corresponding layer from the contracting path. + u1 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + activation='relu')(c10) + + c8_cropped = Cropping2D(cropping=4)(c8) + concat_u1_c8 = Concatenate()([u1, c8_cropped]) + + # 512 filters + c11 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u1_c8) + + exp -= 1 + # 256 filters, making 512 when concatenated with the + # corresponding layer from the contracting path. + c12 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c11) + + u2 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + activation='relu')(c12) + + c6_cropped = Cropping2D(cropping=16)(c6) + concat_u2_c6 = Concatenate()([u2, c6_cropped]) + + # 256 filters + c13 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u2_c6) + bn1 = BatchNormalization(axis=3)(c13) + + exp -= 1 + # 128 filters, making 256 when concatenated with the + # corresponding layer from the contracting path. + c14 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn1) + + u3 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + activation='relu')(c14) + + c4_cropped = Cropping2D(cropping=40)(c4) + concat_u3_c4 = Concatenate()([u3, c4_cropped]) + + # 128 filters + c15 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u3_c4) + bn2 = BatchNormalization(axis=3)(c15) + + exp -= 1 + # 64 filters, making 128 when concatenated with the + # corresponding layer from the contracting path. + c16 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn2) + + u4 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), + activation='relu')(c16) + + c2_cropped = Cropping2D(cropping=88)(c2) + concat_u4_c2 = Concatenate()([u4, c2_cropped]) + + c17 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(concat_u4_c2) + bn3 = BatchNormalization(axis=3)(c17) + + c18 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', + padding='valid')(bn3) + + last_conv = Conv2D(filters=n_classes, kernel_size=1, activation='softmax', padding='valid')(c18) + + last = Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(last_conv) + last = Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(last) + last = Lambda(lambda x: K.log(x))(last) + weighted_sum = multiply([last, weighted_input]) + return Model(inputs=[inp1, weighted_input], outputs=[weighted_sum]) + + def unet(n_classes, channel_depth=36): x = Input((None, None, channel_depth)) base = 2 - exp = 6 + exp = 5 # 64 filters c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(x) @@ -80,12 +306,11 @@ def unet(n_classes, channel_depth=36): c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp1) c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c3) mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) - d1 = Dropout(0.5)(mp2) exp += 1 # 256 filters - c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(d1) + c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp2) c6 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c5) mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c6) @@ -94,11 +319,10 @@ def unet(n_classes, channel_depth=36): c7 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp3) c8 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c7) mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c8) - d2 = Dropout(0.5)(mp4) exp += 1 # 1024 filters - c9 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(d2) + c9 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp4) c10 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c9) exp -= 1 @@ -144,11 +368,12 @@ def unet(n_classes, channel_depth=36): # 128 filters c15 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(concat_u3_c4) + bn2 = BatchNormalization(axis=3)(c15) exp -= 1 # 64 filters, making 128 when concatenated with the # corresponding layer from the contracting path. - c16 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c15) + c16 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn2) u4 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), activation='relu')(c16) @@ -158,9 +383,10 @@ def unet(n_classes, channel_depth=36): c17 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(concat_u4_c2) + bn3 = BatchNormalization(axis=3)(c17) c18 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(c17) + padding='valid')(bn3) last = Conv2D(filters=n_classes, kernel_size=1, activation='linear', padding='valid')(c18) return Model(inputs=x, outputs=last) diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index b57db3f..a434bdf 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -20,9 +20,9 @@ abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(abspath) -from numpy import datetime64 +from numpy import datetime64, arange, zeros from collections import OrderedDict -from datetime import datetime +from datetime import datetime, timedelta from landsat.google_download import GoogleDownload from sat_image.image import Landsat5, Landsat7, Landsat8 from sat_image.fmask import Fmask @@ -84,7 +84,7 @@ def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None self.cdl_mask = None self.climate_targets = climate_targets if not self.climate_targets: - self.climate_targets = ['pr', 'pet', 'rmin', 'rmax', 'tmmn', 'tmmx', 'bi', 'etr'] + self.climate_targets = ['pr', 'pet', 'tmmn', 'tmmx', 'etr'] self.n = n_landsat @@ -98,19 +98,17 @@ def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None def build_training(self): self.get_landsat(fmask=True) self.profile = self.landsat.rasterio_geometry - self.get_climate() + self.get_climate_timeseries() self.get_et() self.get_terrain() self.paths_map, self.masks = self._order_images() - def get_climate(self): - self.get_precip() def build_evaluating(self): self.get_landsat(fmask=True) self.profile = self.landsat.rasterio_geometry #self.get_et() This doesn't work reliably. - self.get_climate() + self.get_climate_timeseries() self.get_terrain() self.paths_map, self.masks = self._order_images() # paths map is just path-> location # in filesystem. @@ -179,20 +177,31 @@ def _get_bounds(self): return bounds, geometry - def get_precip(self): + def get_climate_timeseries(self): bounds, geometry = self._get_bounds() dates = self.scenes['DATE_ACQUIRED'].values + all_dates = arange(datetime(self.year, 3, 1), max(dates)+1, + timedelta(days=1)).astype(datetime64) for target in self.climate_targets: - for date in dates: - outfile = os.path.join(self.root, '{}_{}.tif'.format(date, target)) - if not os.path.isfile(outfile): - print("Get {}".format(os.path.basename(outfile))) + out_arr = None + first = True + check = [os.path.isfile(os.path.join(self.root, '{}_{}.tif'.format(q, target))) for q in dates] + if False in check: + for date in all_dates: d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. bds = GeoBounds(wsen=bounds) gm = GridMet(variable=target, clip_feature=geometry, bbox=bds, target_profile=self.profile, date=d) - out = gm.get_data_subset() - gm.save_raster(out, self.landsat.rasterio_geometry, outfile) + out = gm.get_data_subset_nonconform() + if first: + out_arr = zeros(out.shape) + first = False + out_arr += out + if date in dates: + outfile = os.path.join(self.root, '{}_{}.tif'.format(dd, target)) + print("Saving {}".format(outfile)) + out_final = gm.conform(out_arr) + gm.save_raster(out_final, self.landsat.rasterio_geometry, outfile) def get_terrain(self): diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index be0198f..201ee57 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -1,23 +1,22 @@ import os -import glob import pickle +from glob import glob from pprint import pprint -from multiprocessing import Pool from numpy import save as nsave -from compose_array_single_shapefile import PTASingleShapefile, ShapefileSamplePoints from fiona import open as fopen from collections import defaultdict, OrderedDict from shapely.geometry import shape -from data_utils import (download_images, get_shapefile_path_row, split_shapefile, - create_master_raster, filter_shapefile, bandwise_mean, bandwise_stddev) +from data_utils import download_images, create_master_raster, bandwise_mean, bandwise_stddev +from shapefile_utils import get_shapefile_path_row, split_shapefile, filter_shapefile from runspec import landsat_rasters, static_rasters, climate_rasters +from data_generators import extract_training_data_unet + def download_images_over_shapefile(shapefile, image_directory, year): '''Downloads p/r corresponding to the location of - the shapefile, and creates master raster. - Image_directory: where to save the raw images. - mr_directory: " " master_rasters.''' + the shapefile. Image_directory: where to save the raw images. + ''' p, r = get_shapefile_path_row(shapefile) suff = str(p) + '_' + str(r) + "_" + str(year) landsat_dir = os.path.join(image_directory, suff) @@ -50,20 +49,14 @@ def download_from_pr(p, r, image_directory, year, master_raster_directory): return ims -def sample_points_from_shapefile(shapefile_path, instances): - ssp = ShapefileSamplePoints(shapefile_path, m_instances=instances) - ssp.create_sample_points(save_points=True) - return ssp.outfile - - def download_all_images(image_directory, shapefile_directory, year=2013): ''' Downloads all images over each shapefile in - shapefile directory ''' + shapefile directory, and places them in image_directory.''' template = "{}_{}_{}" done = set() satellite = 8 all_paths = [] - for f in glob.glob(os.path.join(shapefile_directory, "*.shp")): + for f in glob(os.path.join(shapefile_directory, "*.shp")): p, r = get_shapefile_path_row(f) t = template.format(p, r, year) if t not in done: @@ -92,7 +85,7 @@ def all_rasters(image_directory, satellite=8): band_map[band].append(os.path.join(dirpath, f)) for band in band_map: - band_map[band] = sorted(band_map[band]) # ensures ordering within bands. + band_map[band] = sorted(band_map[band]) # ensures ordering within bands - sort by time. return band_map @@ -164,6 +157,16 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi out = os.path.join(image_directory, sub_dir) if os.path.isdir(out): paths_map = all_rasters(out) + i = 0 + # for key in sorted(paths_map.keys()): + # if key in ('aspect.tif', 'elevation_diff.tif', 'slope.tif'): + # print("'{}':np.array([{}]),".format(key, i)) + # i += 1 + # else: + # print("'{}':np.arange({}, {}+1), ".format(key, i, i+2)) + # i += 3 + + # break path = sub_dir[:2] row = sub_dir[3:5] year = sub_dir[-4:] @@ -174,8 +177,6 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi if __name__ == "__main__": # out_shapefile_directory = 'shapefile_data' # shp = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/" - # for f in glob.glob(shp + "*.shp"): - # filter_shapefile(f, out_shapefile_directory) # This project is becoming more complicated. # Needs a test / train organization # 1. Filter shapefiles. @@ -203,3 +204,36 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi mean_map = raster_means(image_train_directory) stddev_map = raster_stds(image_train_directory, mean_map) create_all_master_rasters(im_dir, mas_dir, mean_map, stddev_map) + + master_train = 'master_rasters/train/' + master_test = 'master_rasters/test/' + image_train = 'image_data/train/' # for fmasks. + image_test = 'image_data/test/' # for fmasks. + irr1 = 'Huntley' + irr2 = 'Sun_River' + fallow = 'Fallow' + forest = 'Forrest' + other = 'other' + target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} + train_dir = 'training_data/multiclass/train/' + shp_train = 'shapefile_data/train/' + count = 0 + save = True + count, pixel_dict = extract_training_data_unet(target_dict, shp_train, image_train, + master_train, train_dir, count, save=save) + # Need to parallelize the extraction of training data. + # Or maybe not. It seems like parallelizing the opening/closing + # of rasters can stomp on the data. + print("You have {} instances per training epoch.".format(count)) + print("And {} instances in each class.".format(pixel_dict)) + max_weight = max(pixel_dict.values()) + for key in pixel_dict: + print(key, max_weight / pixel_dict[key]) + tot = 0 + test_dir = 'training_data/multiclass/test/' + shp_test = 'shapefile_data/test/' + count = 0 + count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, + test_dir, count, save=save) + print("You have {} instances per test epoch.".format(count)) + print("And {} instances in each class.".format(pixel_dict)) diff --git a/fully-conv-classification/runspec.py b/fully-conv-classification/runspec.py index 680cf69..2fdd81d 100644 --- a/fully-conv-classification/runspec.py +++ b/fully-conv-classification/runspec.py @@ -37,8 +37,7 @@ def landsat_rasters(): def climate_rasters(): - a = ('pr.tif', 'pet.tif', 'tmmn.tif', 'tmmx.tif', 'bi.tif', - 'etr.tif') + a = ('pr.tif', 'pet.tif', 'tmmn.tif', 'tmmx.tif', 'etr.tif') return a diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py new file mode 100644 index 0000000..1354e75 --- /dev/null +++ b/fully-conv-classification/shapefile_utils.py @@ -0,0 +1,244 @@ +import geopandas as gpd +import os +from json import loads +from numpy import zeros, asarray, array, reshape, nan, sqrt, std +from copy import deepcopy +from fiona import open as fopen +from rasterio.mask import mask +from rasterio import open as rasopen +from shapely.geometry import shape +from sklearn.neighbors import KDTree +from collections import defaultdict + + +def get_features(gdf): + tmp = loads(gdf.to_json()) + features = [feature['geometry'] for feature in tmp['features']] + return features + + +def generate_class_mask(shapefile, master_raster, no_data=-1): + ''' Generates a mask with 1 everywhere + shapefile data is present and a no_data value everywhere else. + no_data is -1 in this case, as it is never a valid class label. + Switching coordinate reference systems is important here, or + else the masking won't work. + ''' + shp = gpd.read_file(shapefile) + with rasopen(master_raster, 'r') as src: + shp = shp.to_crs(src.crs) + features = get_features(shp) + out_image, out_transform = mask(src, shapes=features, nodata=no_data) + return out_image + + +def get_shapefile_lat_lon(shapefile): + ''' Center of shapefile''' + with fopen(shapefile, "r") as src: + minx, miny, maxx, maxy = src.bounds + latc = (maxy + miny) / 2 + lonc = (maxx + minx) / 2 + + return latc, lonc + + +def construct_kdtree(wrs2): + centroids = [] + path_rows = [] # a mapping + features = [] + for feature in wrs2: + tile = shape(feature['geometry']) + centroid = tile.centroid.coords[0] + centroids.append([centroid[0], centroid[1]]) + z = feature['properties'] + p = z['PATH'] + r = z['ROW'] + path_rows.append(str(p) + "_" + str(r)) + features.append(feature) + + tree = KDTree(asarray(centroids)) + return tree, asarray(path_rows), asarray(features) + + +def get_pr(poly, wrs2): + ls = [] + for feature in wrs2: + tile = shape(feature['geometry']) + if poly.within(tile): + z = feature['properties'] + p = z['PATH'] + r = z['ROW'] + ls.append(str(p) + "_" + str(r)) + return ls + + +def get_pr_subset(poly, tiles): + ''' Use when you only want to iterate + over a subset of wrs2 tiles.''' + ls = [] + for feature in tiles: + tile = shape(feature['geometry']) + if poly.within(tile): + z = feature['properties'] + p = z['PATH'] + r = z['ROW'] + ls.append(str(p) + "_" + str(r)) + return ls + + +def filter_shapefile(shapefile, out_directory): + """ Shapefiles may span multiple path/rows. + For training, we want all of the data available. + This function filters the polygons contained in + the shapefile into separate files for each path/row + contained in the shapefile. """ + path_row_map = defaultdict(list) + wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') + tree, path_rows, features = construct_kdtree(wrs2) + wrs2.close() + + cent_arr = array([0, 0]) + with fopen(shapefile, "r") as src: + meta = deepcopy(src.meta) + for feat in src: + poly = shape(feat['geometry']) + centroid = poly.centroid.coords[0] + cent_arr[0] = centroid[0] + cent_arr[1] = centroid[1] + centroid = cent_arr.reshape(1, -1) + dist, ind = tree.query(centroid, k=10) + tiles = features[ind[0]] + prs = get_pr_subset(poly, tiles) + for p in prs: + path_row_map[p].append(feat) + + outfile = os.path.basename(shapefile) + outfile = os.path.splitext(outfile)[0] + + for path_row in path_row_map: + out = outfile + path_row + ".shp" + with fopen(os.path.join(out_directory, out), 'w', **meta) as dst: + print("Saving {}".format(out)) + for feat in path_row_map[path_row]: + dst.write(feat) + + +def split_shapefile(base, base_shapefile, data_directory): + """ + Shapefiles may deal with data over multiple path/rows. + This is a method to get the minimum number of + path/rows required to cover all features. + Data directory: where the split shapefiles will be saved. + base: directory containing base_shapefile.""" + path_row = defaultdict(list) + id_mapping = {} + # TODO: un hardcode this directory. + wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') + tree, path_rows, features = construct_kdtree(wrs2) + wrs2.close() + + cent_arr = array([0, 0]) + with fopen(os.path.join(base, base_shapefile), "r") as src: + meta = deepcopy(src.meta) + for feat in src: + idd = feat['id'] + id_mapping[idd] = feat + poly = shape(feat['geometry']) + centroid = poly.centroid.coords[0] + cent_arr[0] = centroid[0] + cent_arr[1] = centroid[1] + centroid = cent_arr.reshape(1, -1) + dist, ind = tree.query(centroid, k=10) + tiles = features[ind[0]] + prs = get_pr_subset(poly, tiles) + for p in prs: + path_row[p].append(idd) + + non_unique_ids = defaultdict(list) + unique = defaultdict(list) + for key in path_row: + ls = path_row[key] # all features in a given path/row + placeholder = ls.copy() + for key1 in path_row: + if key != key1: + ls1 = path_row[key1] + # find unique keys in ls + placeholder = set(placeholder) - set(ls1) #all + # features present in placeholder that are not + # present in ls1; i.e. unique keys + unique[key] = list(placeholder) + if len(ls) != len(placeholder): + nu = set(ls) - set(placeholder) # all features present in ls that are not present in placeholder (non-unique) + for idd in list(nu): + non_unique_ids[idd].append(key) + + match_key = [] + for key in non_unique_ids: # unique ids + pr = None + hi = 0 + for pathrow in non_unique_ids[key]: # path/rows corresponding to non + # unique features + if len(unique[pathrow]) > hi: + pr = pathrow + hi = len(unique[pathrow]) + + if pr is not None: + unique[pr].append(key) + else: + choice = non_unique_ids[key] + choice.sort() + choice = choice[0] + unique[choice].append(key) + + prefix = os.path.splitext(base_shapefile)[0] + for key in unique: + if key is None: + continue + out = prefix + "_" + key + ".shp" + if len(unique[key]): + with fopen(os.path.join(data_directory, out), 'w', **meta) as dst: + print("Saving split shapefile to: {}".format(os.path.join(data_directory, out))) + for feat in unique[key]: + dst.write(id_mapping[feat]) + + +def get_shapefile_path_row(shapefile): + """This function assumes that the original + shapefile has already been split, and relies on + the naming convention to get the path and row. """ + # strip extension + # TODO: Find some way to update shapefile metadata + shp = shapefile[-9:-4].split("_") + return int(shp[0]), int(shp[1]) + + +def shapefile_area(shapefile): + summ = 0 + with fopen(shapefile, "r") as src: + for feat in src: + poly = shape(feat['geometry']) + summ += poly.area + return summ + + +def get_total_area(data_directory, filenames): + ''' Gets the total area of the polygons + in the files in filenames + TODO: Get an equal-area projection''' + + tot = 0 + for f in glob.glob(data_directory + "*.shp"): + if "sample" not in f: + for f2 in filenames: + if f2 in f: + tot += shapefile_area(f) + return tot + + +def required_points(shapefile, total_area, total_instances): + area = shapefile_area(shapefile) + frac = area / total_area + return int(total_instances * frac) + +if __name__ == '__main__': + pass From fc7d9087ee8f19f802528427b5855b0143816c65 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 22 Mar 2019 13:44:36 -0600 Subject: [PATCH 50/89] Set class weight to 1 --- fully-conv-classification/data_generators.py | 122 ++++++------ fully-conv-classification/data_utils.py | 50 +++-- fully-conv-classification/fully_conv.py | 177 ++++++++---------- fully-conv-classification/models.py | 13 +- fully-conv-classification/prepare_images.py | 3 +- .../runner_from_shapefile.py | 23 ++- 6 files changed, 207 insertions(+), 181 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 02b9fb0..2641375 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -136,8 +136,7 @@ def get_masks(image_directory): out = np.zeros((mask.shape[1], mask.shape[2])) try: out[mask[0] == 1] = 1 # 0 index is for removing the (1, n, m) dimension. - except ValueError as e: - print(e) + except (ValueError, IndexError) as e: mask = warp_single_image(mask_file, first_geo) out[mask[0] == 1] = 1 return out @@ -165,15 +164,25 @@ def extract_training_data_unet(target_dict, shapefile_directory, image_directory done.add(match) p, r = get_shapefile_path_row(f) suffix = '{}_{}_{}.tif'.format(p, r, year) + if "37_27" in suffix: + continue fmask = get_masks(os.path.join(image_directory, suffix[:-4])) master_raster = os.path.join(master_raster_directory, train_raster + suffix) mask_file = os.path.join(master_raster_directory, mask_raster + suffix) # for rasterio.mask.mask - masks = [] # these are class masks for the labelling of data. + masks = [] all_matches.append(f) shp = None for match in all_matches: msk = generate_class_mask(match, mask_file) - msk[0][fmask == 1] = NO_DATA + # try: + # msk[0][fmask == 1] = NO_DATA + # except IndexError: + # print(match, msk.shape, fmask.shape) + # # What's going on here? + # # Fmasks and masks have different shapes... + # # Probably need to warp_vrt? + # #msk[:, :][fmask == 1] = NO_DATA + shp = msk.shape cc = assign_class_code(target_dict, match) if cc is not None: @@ -184,6 +193,7 @@ def extract_training_data_unet(target_dict, shapefile_directory, image_directory else: master = np.zeros(shp) + # 92 is unet offset. for i in range(92, master.shape[1], unet_output_size): for j in range(92, master.shape[2], unet_output_size): sub_master = master[:, i-92:i+unet_output_size+92, j-92:j+unet_output_size+92] @@ -295,8 +305,7 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo batch_size=8, class_weights={}, threshold=0.9, w0=40, sigma=10, channels='all', train=True): ''' Assumes data is stored in training_directory - in subdirectories labeled class_n_train - and that n_classes is a global variable.''' + in subdirectories labeled class_n_train with n the class code ''' class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] generators = [] border_class = len(class_weights.keys()) @@ -306,58 +315,57 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo masters = [] masks = [] weightings = [] - for _ in range(batch_size // len(class_weights.keys())): - min_samples = np.inf - data = [] - for gen in generators: - out = gen.next().copy() - data.append(out) - if sample_random: - n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) - if n_samples < min_samples: - min_samples = n_samples + min_samples = np.inf + data = [] + for gen in generators: + out = gen.next().copy() + data.append(out) + if sample_random: + n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + if n_samples < min_samples: + min_samples = n_samples + + first = False + one_hot = None + for subset in data: + if sample_random: + samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, + fill_value=subset['class_code']) + else: + samp = subset['class_mask'] + samp[samp != NO_DATA] = subset['class_code'] - first = False - one_hot = None - for subset in data: - if sample_random: - samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, - fill_value=subset['class_code']) - else: - samp = subset['class_mask'] - samp[samp != NO_DATA] = subset['class_code'] - - subset['class_mask'] = samp - # The above lines correspond to a sparse encoding. - shape = None - first = True - for subset in data: - master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) - if channels == 'all': - master = np.squeeze(master) - else: - master = master[:, :, :, channels] - master = np.squeeze(master) - mask = mask[0, :, :, 0] - mask[mask != -1] = 1 # make the mask binary. - mask[mask == -1] = 0 - weights = weight_map(mask, w0=w0, sigma=sigma) # create weight map - - labels = weights.copy() - labels[labels >= threshold] = border_class - labels[mask == 1] = subset['class_code'] - weights[weights < threshold] = 0 # threshold the weight values arbitrarily - weights[mask == 1] = class_weights[subset['class_code']] - multidim_weights = np.zeros((weights.shape[0], weights.shape[1], border_class+1)) - one_hot = np.zeros((labels.shape[0], labels.shape[1], border_class+1)) - one_hot[:, :, border_class][labels == border_class] = 1 - one_hot[:, :, subset['class_code']][labels == subset['class_code']] = 1 - for i in range(border_class + 1): - multidim_weights[:, :, i] = weights - if not train: - multidim_weights[multidim_weights != 0] = 1 - masters.append(master) - masks.append(one_hot) - weightings.append(multidim_weights) + subset['class_mask'] = samp + + for subset in data: + master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) + if channels == 'all': + master = np.squeeze(master) + else: + master = master[:, :, :, channels] + master = np.squeeze(master) + mask = mask[0, :, :, 0] + mask[mask != -1] = 1 # make the mask binary. + mask[mask == -1] = 0 # -1 is NO_DATA. + weights = weight_map(mask, w0=w0, sigma=sigma) # create weight map + labels = weights.copy() + labels[labels >= threshold] = border_class + labels[mask == 1] = subset['class_code'] + weights[weights < threshold] = 0 # threshold the weight values arbitrarily + weights[weights != 0] = 1#class_weights[4] + weights[mask == 1] = class_weights[subset['class_code']] + multidim_weights = np.zeros((weights.shape[0], weights.shape[1], border_class+1)) # + one_hot = np.zeros((labels.shape[0], labels.shape[1], border_class+1)) + one_hot[:, :, border_class][labels == border_class] = 1 + one_hot[:, :, subset['class_code']][labels == subset['class_code']] = 1 + # above is circular but will allow for changing to a sparse encoding easily + for i in range(border_class + 1): + multidim_weights[:, :, i] = weights + if not train: + multidim_weights[multidim_weights != 0] = 1 + masters.append(master) + masks.append(one_hot) + weightings.append(multidim_weights) yield [np.asarray(masters, dtype=np.float32), np.asarray(weightings)], np.asarray(masks) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 669cb11..8aafbbd 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -1,8 +1,8 @@ -import glob import os import geopandas as gpd import json from fiona import open as fopen +from glob import glob from lxml import html from requests import get from copy import deepcopy @@ -11,6 +11,7 @@ from collections import defaultdict from rasterio import float32, open as rasopen from rasterio.mask import mask +from pickle import load from prepare_images import ImageStack from sat_image.warped_vrt import warp_single_image @@ -56,6 +57,7 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, feature_rasters = paths_map[feat] # maps bands to their location in filesystem. for feature_raster in feature_rasters: band_mean = None + band_std = None for band in mean_map: if feature_raster.endswith(band): band_mean = mean_map[band] @@ -65,11 +67,15 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, print("Band mean not found in mean_mapping for {}".format(feature_raster)) return + if band_std is None: + print("Band std not found in std_mapping for {}".format(feature_raster)) + return + with rasopen(feature_raster, mode='r') as src: arr = src.read() raster_geo = src.meta.copy() - arr = (arr - band_mean) / band_std + #arr = (arr - band_mean) / band_std if first: first_geo = raster_geo.copy() @@ -83,10 +89,6 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, stack[j, :, :] = arr j += 1 except ValueError: - # error can be thrown here if source raster doesn't have crs - # OR ! Because rasterio version. - # However, deepcopy becomes an issue with the latest - # version of rasterio. arr = warp_single_image(feature_raster, first_geo) stack[j, :, :] = arr j += 1 @@ -159,12 +161,6 @@ def bandwise_stddev(paths_list, band_name, band_mean): def bandwise_mean(paths_list, band_name): - ''' Need to center the data to have - a zero mean. This means iterating over all images, - and taking the "band-wise" mean, then subtracting - that mean from the band. This mean should - also only be computed for the test set, but applied - to the training set. ''' n_pixels = 0 pixel_value_sum = 0 for filepath in paths_list: @@ -226,5 +222,35 @@ def load_raster(master_raster): return arr, meta +def get_class_weighting(training_directory, w0=15, sigma=2, threshold=0.7*15, n_classes=4): + ''' This function should return the correct number of pixels per class + to be used in weighting the classes. ''' + pixel_dict = {} + border_class = n_classes + for i in range(n_classes+1): + pixel_dict[i] = 0 + for f in os.listdir(training_directory): + if os.path.isdir(os.path.join(training_directory, f)): + for data in glob(os.path.join(training_directory, f, "*.pkl")): + with open(data, 'rb') as f: + d = load(f) + mask = d['class_mask'][0, :, :] + mask[mask != -1] = 1 # make the mask binary. + mask[mask == -1] = 0 # -1 is NO_DATA. + weights = weight_map(mask, w0=w0, sigma=sigma) # create weight map + labels = weights.copy() + labels[labels >= threshold] = border_class + labels[mask == 1] = d['class_code'] + pixel_dict[d['class_code']] += labels[labels == d['class_code']].size + pixel_dict[border_class] += labels[labels == border_class].size + + pd = count_all_pixels(training_directory) + out = {} + mx = max(pd.values()) + for key in pd: + out[key] = mx / pd[key] + return out + + if __name__ == "__main__": pass diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 8e203ad..9c085fb 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -10,7 +10,7 @@ from glob import glob from skimage import transform, util from sklearn.metrics import confusion_matrix -from tensorflow.keras.callbacks import TensorBoard +from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint from rasterio import open as rasopen from rasterio.mask import mask from shapely.geometry import shape @@ -49,30 +49,6 @@ def weighted_loss(target, output): return -tf.reduce_sum(target*output, len(output.get_shape())-1) -def masked_acc(y_true, y_pred): - y_pred = tf.nn.softmax(y_pred) - y_pred = tf.argmax(y_pred, axis=3) - mask = tf.not_equal(y_true, NO_DATA) - y_true = tf.boolean_mask(y_true, mask) - y_pred = tf.boolean_mask(y_pred, mask) - y_true = tf.cast(y_true, tf.int64) - y_pred = tf.cast(y_pred, tf.int64) - return K.mean(tf.math.equal(y_true, y_pred)) - - -def m_acc(y_true, y_pred): - ''' Calculate accuracy from masked data. - The built-in accuracy metric uses all data (masked & unmasked).''' - mask = tf.not_equal(K.mean(y_pred, axis=len(y_pred.get_shape())-1), 0) - y_true = tf.math.argmax(y_true, axis=len(y_true.get_shape())-1) - y_true_msk = tf.boolean_mask(y_true, mask) - y_pred_exp = tf.math.exp(y_pred) - y_pred_arg = tf.math.argmax(y_pred_exp, axis=len(y_pred_exp.get_shape())-1) - # so undo that operation with softmax (could also use tf.exp). - y_pred_msk = tf.boolean_mask(y_pred_arg, mask) # mask the 0 values - return K.mean(tf.math.equal(y_true_msk, y_pred_msk)) # get the accuracy - - def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_classes=4, outfile=None, ii=None): @@ -201,31 +177,36 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo else: channel_depth = channels.shape[0] shp = (572, 572, channel_depth) - model = model(shp, num_classes+1) # + 1 for border class - model.compile( - loss=weighted_loss, - optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), - metrics=[m_acc] - ) + if restore: + model = model(shp, num_classes+1) # + 1 for border class + # model.compile( + # loss=weighted_loss, + # optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), + # metrics=['accuracy'] + # ) graph_path = os.path.join('graphs/', str(int(time.time()))) os.mkdir(graph_path) tb = TensorBoard(log_dir=graph_path) + ckpt_path = os.path.join(graph_path, "chkpt{epoch:02d}-{val_loss:.2f}.hdf5") + mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_acc', save_best_only=True, + mode='max', verbose=1) train = os.path.join(training_directory, 'train') test = os.path.join(training_directory, 'test') - class_weight = {0:25.923, 1:1.0, 2:2.79, 3:61.128} + class_weight = {0:30.089, 1:1.0, 2:2.738, 3:72.958} + class_weight = {0:1.0, 1:1.0, 2:1.0, 3:1.0} train_generator = generate_training_data(train, max_pools, sample_random=random_sample, - box_size=box_size, batch_size=4, w0=w0, sigma=sigma, + box_size=box_size, threshold=threshold, batch_size=4, w0=w0, sigma=sigma, class_weights=class_weight, channels=channels) test_generator = generate_training_data(test, max_pools, sample_random=random_sample, train=False, box_size=box_size, batch_size=4, class_weights=class_weight, channels=channels) model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, - validation_data=test_generator, - validation_steps=valid_steps, epochs=epochs, verbose=1, - callbacks=[tb, tf.keras.callbacks.TerminateOnNaN()], + validation_data=test_generator, + validation_steps=valid_steps, + callbacks=[tb, mdlcheck, tf.keras.callbacks.TerminateOnNaN()], use_multiprocessing=True) return model, graph_path @@ -241,6 +222,13 @@ def save_model_info(outfile, args): def gradient_wrt_inputs(model, data): + # s = '1553014193.4813933' + # f = 'training_data/multiclass/train/class_2_data/{}.pkl'.format(s) + # with open(f, 'rb') as f: + # data = pload(f) + # data = np.expand_dims(data['data'], axis=0) + # data = np.swapaxes(data, 1, 3) + # gradient_wrt_inputs(model, data) layer_output = model.output loss = -tf.reduce_mean(layer_output) grads = K.gradients(loss, model.input[0])[0] @@ -273,7 +261,7 @@ def gradient_wrt_inputs(model, data): 'tmmn.tif':np.arange(45, 47+1), 'tmmx.tif':np.arange(48, 50+1)} - training_directory = 'training_data/multiclass/' + training_directory = 'training_data/' info_file = 'run_information.txt' max_pools = 0 model_name = 'unet_border_weights{}.h5'.format(int(time.time())) @@ -284,70 +272,63 @@ def gradient_wrt_inputs(model, data): model_func = weighted_unet_no_transpose_conv - steps_per_epoch = 334 # 334 the number in the max class. - valid_steps = 4 # 233 + steps_per_epoch = 2000 # 334 the number in the max class. + valid_steps = 30 #233 epochs = 1 - w0 = 0 + w0 = 15 sigma = 2 - threshold = 0.9 - - train_more = False + threshold = 0.8*w0 + train_more = True eager = False - class_weights = False - learning_rate = 1e-3 + class_weights = True + learning_rate = 1e-4 random_sample = False augment = False exclude = ['etr.tif', 'pet.tif', 'slope.tif', 'tmmn.tif', 'tmmx.tif', 'pr.tif'] - for exx in exclude: - model_name = 'exclude_{}.h5'.format(exx[:-4]) - model_save_path = os.path.join(model_dir, model_name) - channels = [band_dict[x] for x in band_dict if exx not in x] - channels = np.hstack(channels) - print(model_name) - - raster_name = 'exclude_{}'.format(exx[:-4]) - pr_to_eval = '37_28' - image_directory = 'master_rasters/test/' - - param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, - 'raster_name':raster_name, 'learning_rate':learning_rate, 'eager':eager, - 'class_weights':class_weights, 'augmented':augment, 'random_sample':random_sample, - 'graph_path':None, 'bands':channels, 'w0':w0, 'sigma':sigma} - - evaluating = True - if not os.path.isfile(model_save_path): - print("Training new model") - model, graph_path = train_model(training_directory, model_func, - steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, - max_pools=max_pools, epochs=epochs, random_sample=random_sample, - learning_rate=learning_rate, channels=channels, w0=w0, sigma=sigma, - threshold=threshold) - evaluating = False - model.save(model_save_path) - else: - model = tf.keras.models.load_model(model_save_path, - custom_objects={'weighted_loss':weighted_loss}) - model.compile( - loss=weighted_loss, - optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), - ) - if train_more: - model, graph_path = train_model(training_directory, model, steps_per_epoch=steps_per_epoch, - valid_steps=valid_steps, channel_depth=channel_depth, random_sample=random_sample, - max_pools=max_pools, epochs=epochs, restore=True) - model_name = 'unet_random_sample100.h5' - model.save(os.path.join(model_dir, model_name)) - - if not evaluating or train_more: - param_dict['graph_path'] = graph_path - save_model_info(info_path, param_dict) - # s = '1553014193.4813933' - # f = 'training_data/multiclass/train/class_2_data/{}.pkl'.format(s) - # with open(f, 'rb') as f: - # data = pload(f) - # data = np.expand_dims(data['data'], axis=0) - # data = np.swapaxes(data, 1, 3) - # gradient_wrt_inputs(model, data) - evaluate_images(image_directory, model, include_string=pr_to_eval, - exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name, save_dir='compare_model_outputs/blurry/') - #clip_rasters('compare_model_outputs/blurry/', pr_to_eval) + model_name = 'sblessfilters1000.h5' + model_save_path = os.path.join(model_dir, model_name) + channels = [band_dict[x] for x in band_dict] #if 'B' in x]# if x not in exclude] + channels = np.hstack(channels) + channels = channels[0:36] + raster_name = 'sblessfilters2000' + pr_to_eval = '41_27' + image_directory = 'master_rasters/train/' + param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, + 'raster_name':raster_name, 'learning_rate':learning_rate, 'eager':eager, + 'class_weights':class_weights, 'augmented':augment, 'random_sample':random_sample, + 'graph_path':None, 'bands':channels, 'w0':w0, 'sigma':sigma} + + evaluating = True + if not os.path.isfile(model_save_path): + print("Training new model") + model, graph_path = train_model(training_directory, model_func, + steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, + max_pools=max_pools, epochs=epochs, random_sample=random_sample, + learning_rate=learning_rate, channels=channels, w0=w0, sigma=sigma, + threshold=threshold) + evaluating = False + model.save(model_save_path) + else: + model = tf.keras.models.load_model(model_save_path, + custom_objects={'weighted_loss':weighted_loss}) + model.compile( + loss=weighted_loss, + metrics=['accuracy'], + optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), + ) + if train_more: + train_model(training_directory, model, + steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, + max_pools=max_pools, epochs=epochs, random_sample=random_sample, + learning_rate=learning_rate, channels=channels, w0=w0, sigma=sigma, + threshold=threshold) + + model_name = 'sblessfilters4000.h5' + model.save(os.path.join(model_dir, model_name)) + + if not evaluating or train_more: + param_dict['graph_path'] = graph_path + save_model_info(info_path, param_dict) + evaluate_images(image_directory, model, include_string=pr_to_eval, + exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name, save_dir='compare_model_outputs/blurry/') + #clip_rasters('compare_model_outputs/blurry/', pr_to_eval) diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 20c3380..2078a09 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -73,17 +73,19 @@ def weighted_unet_no_transpose_conv(input_shape, n_classes): inp1 = Input(input_shape) weighted_input = Input(shape=(388, 388, 5)) base = 2 - exp = 6 + exp = 5 # 64 filters c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(inp1) c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c1) + c2 = BatchNormalization()(c2) mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) exp += 1 # 128 filters c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp1) c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c3) + c4 = BatchNormalization()(c4) mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) @@ -91,12 +93,15 @@ def weighted_unet_no_transpose_conv(input_shape, n_classes): # 256 filters c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp2) c6 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c5) + c6 = BatchNormalization()(c6) mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c6) exp += 1 # 512 filters c7 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp3) c8 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c7) + c8 = BatchNormalization()(c8) + mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c8) exp += 1 @@ -135,7 +140,7 @@ def weighted_unet_no_transpose_conv(input_shape, n_classes): # 256 filters c13 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(concat_u2_c6) - bn1 = BatchNormalization(axis=3)(c13) + bn1 = BatchNormalization()(c13) exp -= 1 # 128 filters, making 256 when concatenated with the @@ -153,7 +158,7 @@ def weighted_unet_no_transpose_conv(input_shape, n_classes): # 128 filters c15 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(concat_u3_c4) - bn2 = BatchNormalization(axis=3)(c15) + bn2 = BatchNormalization()(c15) exp -= 1 # 64 filters, making 128 when concatenated with the @@ -170,7 +175,7 @@ def weighted_unet_no_transpose_conv(input_shape, n_classes): c17 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(concat_u4_c2) - bn3 = BatchNormalization(axis=3)(c17) + bn3 = BatchNormalization()(c17) c18 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn3) diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index a434bdf..07a2777 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -185,6 +185,7 @@ def get_climate_timeseries(self): for target in self.climate_targets: out_arr = None first = True + last = None check = [os.path.isfile(os.path.join(self.root, '{}_{}.tif'.format(q, target))) for q in dates] if False in check: for date in all_dates: @@ -198,7 +199,7 @@ def get_climate_timeseries(self): first = False out_arr += out if date in dates: - outfile = os.path.join(self.root, '{}_{}.tif'.format(dd, target)) + outfile = os.path.join(self.root, '{}_{}.tif'.format(date, target)) print("Saving {}".format(outfile)) out_final = gm.conform(out_arr) gm.save_raster(out_final, self.landsat.rasterio_geometry, outfile) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 201ee57..4be7f70 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -188,6 +188,7 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi image_train_directory = 'image_data/train/' image_test_directory = 'image_data/test' + image_dirs = [image_train_directory, image_test_directory] shp_train = 'shapefile_data/train/' shp_test = 'shapefile_data/test/' @@ -196,10 +197,16 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi master_train = 'master_rasters/train/' master_test = 'master_rasters/test' master_dirs = [master_train, master_test] - year = 2013 - - for s, i in zip(shp_dirs, image_dirs): - download_all_images(i, s, year) + year = 2018 + download_from_pr(41, 27, image_train_directory, year, master_train) + paths_map = all_rasters(image_train_directory + "41_27_2018") + mean_map = raster_means(image_train_directory) + stddev_map = raster_stds(image_train_directory, mean_map) + create_master_raster(paths_map, 41, 27, 2018, master_train, mean_map, stddev_map) + ''' + + # for s, i in zip(shp_dirs, image_dirs): + # download_all_images(i, s, year) for im_dir, mas_dir in zip(image_dirs, master_dirs): mean_map = raster_means(image_train_directory) stddev_map = raster_stds(image_train_directory, mean_map) @@ -215,25 +222,23 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi forest = 'Forrest' other = 'other' target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} - train_dir = 'training_data/multiclass/train/' + train_dir = 'training_data/train/' shp_train = 'shapefile_data/train/' count = 0 save = True count, pixel_dict = extract_training_data_unet(target_dict, shp_train, image_train, master_train, train_dir, count, save=save) - # Need to parallelize the extraction of training data. - # Or maybe not. It seems like parallelizing the opening/closing - # of rasters can stomp on the data. print("You have {} instances per training epoch.".format(count)) print("And {} instances in each class.".format(pixel_dict)) max_weight = max(pixel_dict.values()) for key in pixel_dict: print(key, max_weight / pixel_dict[key]) tot = 0 - test_dir = 'training_data/multiclass/test/' + test_dir = 'training_data/test/' shp_test = 'shapefile_data/test/' count = 0 count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, test_dir, count, save=save) print("You have {} instances per test epoch.".format(count)) print("And {} instances in each class.".format(pixel_dict)) + ''' From c292d9152af5bb46381b9744c13c5869e727016e Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 22 Mar 2019 13:49:50 -0600 Subject: [PATCH 51/89] Removed mean/std mapping --- fully-conv-classification/data_utils.py | 31 +++++++++++++------------ 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 8aafbbd..4670272 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -16,7 +16,8 @@ from sat_image.warped_vrt import warp_single_image -def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, stddev_map): +def create_master_raster(paths_map, path, row, year, raster_directory, mean_map=None, + stddev_map=None): """ Creates a master raster with depth given by the organization of the paths_map. Paths map is a dictionary of lists, with keys the band names (B1, B2...) and values the paths of the images in the filesystem @@ -56,20 +57,20 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map, # each band corresponding to, as that's sorting by date. feature_rasters = paths_map[feat] # maps bands to their location in filesystem. for feature_raster in feature_rasters: - band_mean = None - band_std = None - for band in mean_map: - if feature_raster.endswith(band): - band_mean = mean_map[band] - band_std = stddev_map[band] - - if band_mean is None: - print("Band mean not found in mean_mapping for {}".format(feature_raster)) - return - - if band_std is None: - print("Band std not found in std_mapping for {}".format(feature_raster)) - return + # band_mean = None + # band_std = None + # for band in mean_map: + # if feature_raster.endswith(band): + # band_mean = mean_map[band] + # band_std = stddev_map[band] + + # if band_mean is None: + # print("Band mean not found in mean_mapping for {}".format(feature_raster)) + # return + + # if band_std is None: + # print("Band std not found in std_mapping for {}".format(feature_raster)) + # return with rasopen(feature_raster, mode='r') as src: arr = src.read() From d0868bec92952e6b376f31709c4330c45f8c7694 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 22 Mar 2019 13:50:32 -0600 Subject: [PATCH 52/89] Removed mean/std mapping --- .../runner_from_shapefile.py | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 4be7f70..426cda5 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -197,19 +197,17 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi master_train = 'master_rasters/train/' master_test = 'master_rasters/test' master_dirs = [master_train, master_test] - year = 2018 - download_from_pr(41, 27, image_train_directory, year, master_train) - paths_map = all_rasters(image_train_directory + "41_27_2018") - mean_map = raster_means(image_train_directory) - stddev_map = raster_stds(image_train_directory, mean_map) - create_master_raster(paths_map, 41, 27, 2018, master_train, mean_map, stddev_map) - ''' - - # for s, i in zip(shp_dirs, image_dirs): - # download_all_images(i, s, year) + #year = 2018 + #download_from_pr(41, 27, image_train_directory, year, master_train) + #paths_map = all_rasters(image_train_directory + "41_27_2018") + #mean_map = raster_means(image_train_directory) + #stddev_map = raster_stds(image_train_directory, mean_map) + #create_master_raster(paths_map, 41, 27, 2018, master_train, mean_map, stddev_map) + for s, i in zip(shp_dirs, image_dirs): + download_all_images(i, s, year) for im_dir, mas_dir in zip(image_dirs, master_dirs): - mean_map = raster_means(image_train_directory) - stddev_map = raster_stds(image_train_directory, mean_map) + #mean_map = raster_means(image_train_directory) + #stddev_map = raster_stds(image_train_directory, mean_map) create_all_master_rasters(im_dir, mas_dir, mean_map, stddev_map) master_train = 'master_rasters/train/' @@ -241,4 +239,3 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi test_dir, count, save=save) print("You have {} instances per test epoch.".format(count)) print("And {} instances in each class.".format(pixel_dict)) - ''' From 6348bbc0eea0bbeb1bf7332108da35d11f0bdfe9 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Thu, 4 Apr 2019 12:33:52 -0600 Subject: [PATCH 53/89] Updated accuracy function. Investigating low-res predictions. --- fully-conv-classification/data_generators.py | 104 +++++++++--------- fully-conv-classification/data_utils.py | 2 +- fully-conv-classification/fully_conv.py | 75 +++++++------ fully-conv-classification/models.py | 2 +- fully-conv-classification/prepare_images.py | 8 +- .../runner_from_shapefile.py | 31 ++---- 6 files changed, 116 insertions(+), 106 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 2641375..53f86c3 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -315,61 +315,65 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo masters = [] masks = [] weightings = [] - min_samples = np.inf - data = [] - for gen in generators: - out = gen.next().copy() - data.append(out) - if sample_random: - n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) - if n_samples < min_samples: - min_samples = n_samples - - first = False - one_hot = None - for subset in data: - if sample_random: - samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, - fill_value=subset['class_code']) - else: - samp = subset['class_mask'] - samp[samp != NO_DATA] = subset['class_code'] - - subset['class_mask'] = samp + for _ in range(2): + min_samples = np.inf + data = [] + for gen in generators: + out = gen.next().copy() + data.append(out) + if sample_random: + n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + if n_samples < min_samples: + min_samples = n_samples - for subset in data: - master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) - if channels == 'all': - master = np.squeeze(master) - else: - master = master[:, :, :, channels] - master = np.squeeze(master) - mask = mask[0, :, :, 0] - mask[mask != -1] = 1 # make the mask binary. - mask[mask == -1] = 0 # -1 is NO_DATA. - weights = weight_map(mask, w0=w0, sigma=sigma) # create weight map - labels = weights.copy() - labels[labels >= threshold] = border_class - labels[mask == 1] = subset['class_code'] - weights[weights < threshold] = 0 # threshold the weight values arbitrarily - weights[weights != 0] = 1#class_weights[4] - weights[mask == 1] = class_weights[subset['class_code']] - multidim_weights = np.zeros((weights.shape[0], weights.shape[1], border_class+1)) # - one_hot = np.zeros((labels.shape[0], labels.shape[1], border_class+1)) - one_hot[:, :, border_class][labels == border_class] = 1 - one_hot[:, :, subset['class_code']][labels == subset['class_code']] = 1 - # above is circular but will allow for changing to a sparse encoding easily - for i in range(border_class + 1): - multidim_weights[:, :, i] = weights - if not train: - multidim_weights[multidim_weights != 0] = 1 - masters.append(master) - masks.append(one_hot) - weightings.append(multidim_weights) + first = False + one_hot = None + for subset in data: + if sample_random: + samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, + fill_value=subset['class_code']) + else: + samp = subset['class_mask'] + samp[samp != NO_DATA] = subset['class_code'] + + subset['class_mask'] = samp + + for subset in data: + master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) + if channels == 'all': + master = np.squeeze(master) + else: + master = master[:, :, :, channels] + master = np.squeeze(master) + mask = mask[0, :, :, 0] + mask[mask != -1] = 1 # make the mask binary. + mask[mask == -1] = 0 # -1 is NO_DATA. + weights = weight_map(mask, w0=w0, sigma=sigma) # create weight map + labels = weights.copy() + labels[labels >= threshold] = border_class + labels[mask == 1] = subset['class_code'] + weights[weights < threshold] = 0 # threshold the weight values arbitrarily + weights[weights != 0] = 0 #remove the border weights + weights[mask == 1] = class_weights[subset['class_code']] + multidim_weights = np.zeros((weights.shape[0], weights.shape[1], border_class)) # + one_hot = np.zeros((labels.shape[0], labels.shape[1], border_class)) + #one_hot[:, :, border_class][labels == border_class] = 1 + one_hot[:, :, subset['class_code']][labels == subset['class_code']] = 1 + # above is circular but will allow for changing to a sparse encoding easily + for i in range(border_class): + multidim_weights[:, :, i] = weights + if not train: + multidim_weights[multidim_weights != 0] = 1 + masters.append(master) + masks.append(one_hot) + weightings.append(multidim_weights) yield [np.asarray(masters, dtype=np.float32), np.asarray(weightings)], np.asarray(masks) + + + def rotation(image, angle): return transform.rotate(image, angle, mode='constant', cval=NO_DATA) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 4670272..3b985cc 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -211,7 +211,7 @@ def clip_raster(evaluated, path, row, outfile=None): def save_raster(arr, outfile, meta, count=4): - meta.update(count=count+1) + meta.update(count=count) with rasopen(outfile, 'w', **meta) as dst: dst.write(arr) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 9c085fb..7ebe95b 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -46,7 +46,19 @@ def weighted_loss(target, output): # Raster of shape widthxheightx1, with weights # of zero where there is no data and weights of whatever the # correct weights are for all the other classes. - return -tf.reduce_sum(target*output, len(output.get_shape())-1) + out = -tf.reduce_sum(target*output, len(output.get_shape())-1) + mask = tf.not_equal(out, 0) + return tf.boolean_mask(out, mask) + + +def c_acc(y_true, y_pred): + y_pred_sum = tf.reduce_sum(y_pred, axis=3) + mask = tf.not_equal(y_pred_sum, 0) + y_arg = tf.argmax(y_pred, axis=-1) + y_t_arg = tf.argmax(y_true, axis=-1) + y_arg_mask = tf.boolean_mask(y_arg, mask) + y_t_arg_mask = tf.boolean_mask(y_t_arg, mask) + return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_classes=4, @@ -58,7 +70,7 @@ def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_cla else: master, meta = load_raster(master_raster) class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder - out = np.zeros((master.shape[2], master.shape[1], num_classes+1)) + out = np.zeros((master.shape[2], master.shape[1], num_classes)) # All U-Net specific. CHUNK_SIZE = 572 @@ -73,7 +85,7 @@ def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_cla max_pools, return_cuts=True) if channels != 'all': sub_master = sub_master[:, :, :, channels] - sub_msk = np.ones((1, 388, 388, 5)) # a placeholder + sub_msk = np.ones((1, 388, 388, 4)) # a placeholder if sub_master.shape[1] == 572 and sub_master.shape[2] == 572: preds = model.predict([sub_master, sub_msk]) preds_exp = np.exp(preds) @@ -168,7 +180,7 @@ def get_iou(): def train_model(training_directory, model, steps_per_epoch, valid_steps, max_pools, box_size=0, epochs=3, random_sample=False, threshold=0.9, w0=50, sigma=10, channels='all', - restore=False, learning_rate=1e-3, num_classes=4): + train_more=False, learning_rate=1e-3, num_classes=4): ''' This function assumes that train/test data are subdirectories of training_directory, with the names train/test.''' @@ -177,28 +189,28 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo else: channel_depth = channels.shape[0] shp = (572, 572, channel_depth) - if restore: - model = model(shp, num_classes+1) # + 1 for border class - # model.compile( - # loss=weighted_loss, - # optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), - # metrics=['accuracy'] - # ) + if not train_more: + model = model(shp, 4) # + 1 for border class + model.compile( + loss=weighted_loss, + optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), + metrics=[c_acc] + ) graph_path = os.path.join('graphs/', str(int(time.time()))) os.mkdir(graph_path) tb = TensorBoard(log_dir=graph_path) ckpt_path = os.path.join(graph_path, "chkpt{epoch:02d}-{val_loss:.2f}.hdf5") - mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_acc', save_best_only=True, + mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_c_acc', save_best_only=True, mode='max', verbose=1) train = os.path.join(training_directory, 'train') test = os.path.join(training_directory, 'test') - class_weight = {0:30.089, 1:1.0, 2:2.738, 3:72.958} - class_weight = {0:1.0, 1:1.0, 2:1.0, 3:1.0} + class_weight = {0:30.756, 1:1.0, 2:2.1659, 3:67.517} + #class_weight = {0:1.0, 1:1.0, 2:1.0, 3:1.0} train_generator = generate_training_data(train, max_pools, sample_random=random_sample, box_size=box_size, threshold=threshold, batch_size=4, w0=w0, sigma=sigma, class_weights=class_weight, channels=channels) test_generator = generate_training_data(test, max_pools, sample_random=random_sample, - train=False, box_size=box_size, batch_size=4, + train=True, box_size=box_size, batch_size=4, class_weights=class_weight, channels=channels) model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, @@ -265,19 +277,17 @@ def gradient_wrt_inputs(model, data): info_file = 'run_information.txt' max_pools = 0 model_name = 'unet_border_weights{}.h5'.format(int(time.time())) - model_name ='unet_gradientwrtinputs.h5' + model_name = 'no_border_class.h5' model_dir = 'models/' info_path = os.path.join(model_dir, info_file) model_save_path = os.path.join(model_dir, model_name) - model_func = weighted_unet_no_transpose_conv - - steps_per_epoch = 2000 # 334 the number in the max class. - valid_steps = 30 #233 - epochs = 1 - w0 = 15 + steps_per_epoch = 10 + valid_steps = 7 + epochs = 1000 + w0 = 5 sigma = 2 - threshold = 0.8*w0 + threshold = 0.9*w0 train_more = True eager = False class_weights = True @@ -285,14 +295,13 @@ def gradient_wrt_inputs(model, data): random_sample = False augment = False exclude = ['etr.tif', 'pet.tif', 'slope.tif', 'tmmn.tif', 'tmmx.tif', 'pr.tif'] - model_name = 'sblessfilters1000.h5' model_save_path = os.path.join(model_dir, model_name) - channels = [band_dict[x] for x in band_dict] #if 'B' in x]# if x not in exclude] + channels = [band_dict[x] for x in band_dict] channels = np.hstack(channels) - channels = channels[0:36] - raster_name = 'sblessfilters2000' - pr_to_eval = '41_27' - image_directory = 'master_rasters/train/' + channels = 'all' #channels[0:39] + raster_name = '20000steps' + pr_to_eval = '37_28' + image_directory = '/home/thomas/share/master_rasters/test/' param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, 'raster_name':raster_name, 'learning_rate':learning_rate, 'eager':eager, 'class_weights':class_weights, 'augmented':augment, 'random_sample':random_sample, @@ -310,18 +319,18 @@ def gradient_wrt_inputs(model, data): model.save(model_save_path) else: model = tf.keras.models.load_model(model_save_path, - custom_objects={'weighted_loss':weighted_loss}) + custom_objects={'weighted_loss':weighted_loss, 'c_acc':c_acc}) model.compile( loss=weighted_loss, - metrics=['accuracy'], - optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), + metrics=[c_acc], + optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate) ) if train_more: train_model(training_directory, model, steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, max_pools=max_pools, epochs=epochs, random_sample=random_sample, learning_rate=learning_rate, channels=channels, w0=w0, sigma=sigma, - threshold=threshold) + threshold=threshold, train_more=train_more) model_name = 'sblessfilters4000.h5' model.save(os.path.join(model_dir, model_name)) diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 2078a09..0bbc85d 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -71,7 +71,7 @@ def weighted_unet_no_transpose_conv(input_shape, n_classes): Instead a Upsampling2D layer with a Conv layer after with same padding. ''' inp1 = Input(input_shape) - weighted_input = Input(shape=(388, 388, 5)) + weighted_input = Input(shape=(388, 388, 4)) base = 2 exp = 5 diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index 07a2777..f38fa4b 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -186,7 +186,7 @@ def get_climate_timeseries(self): out_arr = None first = True last = None - check = [os.path.isfile(os.path.join(self.root, '{}_{}.tif'.format(q, target))) for q in dates] + check = [os.path.isfile(os.path.join(self.root, 'climate_rasters', '{}_{}.tif'.format(q, target))) for q in dates] if False in check: for date in all_dates: d = datetime.utcfromtimestamp(date.tolist()/1e9) # convert to a nicer format. @@ -199,7 +199,11 @@ def get_climate_timeseries(self): first = False out_arr += out if date in dates: - outfile = os.path.join(self.root, '{}_{}.tif'.format(date, target)) + out_dir = 'climate_rasters' + out_dir = os.path.join(self.root, out_dir) + if not os.path.isdir(out_dir): + os.mkdir(out_dir) + outfile = os.path.join(out_dir, '{}_{}.tif'.format(date, target)) print("Saving {}".format(outfile)) out_final = gm.conform(out_arr) gm.save_raster(out_final, self.landsat.rasterio_geometry, outfile) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 426cda5..787dafa 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -147,7 +147,8 @@ def raster_stds(image_directory, mean_map, satellite=8): return stddev_mapping -def create_all_master_rasters(image_directory, raster_save_directory, mean_mapping, stddev_mapping): +def create_all_master_rasters(image_directory, raster_save_directory, mean_mapping=None, + stddev_mapping=None): """ Creates a master raster for all images in image_directory. Image directory is assumed to be a top-level directory that contains all the path_row directories for test or train (image_data/test/path_row_year*/) @@ -186,34 +187,26 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi # 5. Extract training data # 6. Train network. - image_train_directory = 'image_data/train/' - image_test_directory = 'image_data/test' + image_train_directory = '/home/thomas/share/image_data/train/' + image_test_directory = '/home/thomas/share/image_data/test' image_dirs = [image_train_directory, image_test_directory] shp_train = 'shapefile_data/train/' shp_test = 'shapefile_data/test/' shp_dirs = [shp_train, shp_test] shapefile_directory = 'shapefile_data/all_shapefiles' - master_train = 'master_rasters/train/' - master_test = 'master_rasters/test' + master_train = '/home/thomas/share/master_rasters/train/' + master_test = '/home/thomas/share/master_rasters/test' master_dirs = [master_train, master_test] - #year = 2018 - #download_from_pr(41, 27, image_train_directory, year, master_train) - #paths_map = all_rasters(image_train_directory + "41_27_2018") - #mean_map = raster_means(image_train_directory) - #stddev_map = raster_stds(image_train_directory, mean_map) - #create_master_raster(paths_map, 41, 27, 2018, master_train, mean_map, stddev_map) + year = 2013 for s, i in zip(shp_dirs, image_dirs): download_all_images(i, s, year) for im_dir, mas_dir in zip(image_dirs, master_dirs): - #mean_map = raster_means(image_train_directory) - #stddev_map = raster_stds(image_train_directory, mean_map) - create_all_master_rasters(im_dir, mas_dir, mean_map, stddev_map) - - master_train = 'master_rasters/train/' - master_test = 'master_rasters/test/' - image_train = 'image_data/train/' # for fmasks. - image_test = 'image_data/test/' # for fmasks. + create_all_master_rasters(im_dir, mas_dir) + master_train = '/home/thomas/share/master_rasters/train/' + master_test = '/home/thomas/share/master_rasters/test/' + image_train = '/home/thomas/share/image_data/train/' + image_test = '/home/thomas/share/image_data/test/' irr1 = 'Huntley' irr2 = 'Sun_River' fallow = 'Fallow' From 1d54be1243e78821002b64cbba71391c21398e84 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 5 Apr 2019 15:09:07 -0600 Subject: [PATCH 54/89] Know why loss of resolution occurs at borders --- fully-conv-classification/argmax_rasters.py | 37 +++++++++++ fully-conv-classification/data_generators.py | 61 ++++++++----------- fully-conv-classification/data_utils.py | 18 ------ fully-conv-classification/fully_conv.py | 55 ++++++++--------- .../runner_from_shapefile.py | 16 ++--- fully-conv-classification/shapefile_utils.py | 3 +- 6 files changed, 100 insertions(+), 90 deletions(-) create mode 100755 fully-conv-classification/argmax_rasters.py diff --git a/fully-conv-classification/argmax_rasters.py b/fully-conv-classification/argmax_rasters.py new file mode 100755 index 0000000..0075cd7 --- /dev/null +++ b/fully-conv-classification/argmax_rasters.py @@ -0,0 +1,37 @@ +import numpy as np +from rasterio import open as rasopen +from rasterio import int32 +from glob import glob +from os.path import basename, join +from sys import argv + + +im_path = 'compare_model_outputs/systematic/' +save_path = 'compare_model_outputs/argmax/' + +def get_argmax(f, outfile): + with rasopen(f, 'r') as src: + arr = src.read() + meta = src.meta.copy() + + arg = np.argmax(arr, axis=0) + arg = np.expand_dims(arg, axis=0) + arg = arg.astype(np.int32) + meta.update(count=1, dtype=int32) + with rasopen(outfile, 'w', **meta) as dst: + dst.write(arg) + return None + + +def main(f): + b = basename(f) + suff = b[:-14] + pref = b[-14:] + outfile = join(save_path, suff + 'argmax_' + pref) + print('Saving argmax raster to {}'.format(outfile)) + get_argmax(f, outfile) + + +if __name__ == '__main__': + in_f = argv[1] + main(in_f) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 53f86c3..1c75943 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -115,31 +115,31 @@ def set_class_mask(self, class_mask): self.dict['class_mask'] = class_mask -def get_masks(image_directory): - ''' Returns all raster masks in the image directory.''' +def concatenate_fmasks(image_directory, class_mask, class_mask_geo): paths = [] for dirpath, dirnames, filenames in os.walk(image_directory): for f in filenames: for suffix in mask_rasters(): if f.endswith(suffix): paths.append(os.path.join(dirpath, f)) - out = None - first_geo = None - n_masks = len(paths) - first = True - for mask_file in paths: - mask, meta = load_raster(mask_file) - # mask value here is 1. - if first: - first = False - first_geo = meta.copy() - out = np.zeros((mask.shape[1], mask.shape[2])) + init = np.zeros(class_mask.shape) # no fmasks, just data + init[0] = class_mask[0].copy() + j_fmasks = np.zeros(class_mask.shape) + for fmask_file in paths: + fmask, fmeta = load_raster(fmask_file) try: - out[mask[0] == 1] = 1 # 0 index is for removing the (1, n, m) dimension. + class_mask[fmask == 1] = NO_DATA # 0 index is for removing the (1, n, m) dimension. + j_fmasks[fmask == 1] = 1 except (ValueError, IndexError) as e: - mask = warp_single_image(mask_file, first_geo) - out[mask[0] == 1] = 1 - return out + fmask = warp_single_image(fmask_file, class_mask_geo) + class_mask[fmask == 1] = NO_DATA + + # fig, ax = plt.subplots(ncols=3) + # ax[0].imshow(class_mask[0]) + # ax[1].imshow(j_fmasks[0]) + # ax[2].imshow(init[0]) + # plt.show() + return class_mask def extract_training_data_unet(target_dict, shapefile_directory, image_directory, @@ -158,31 +158,24 @@ def extract_training_data_unet(target_dict, shapefile_directory, image_directory mask_raster = 'class_mask_' for f in glob(os.path.join(shapefile_directory, "*.shp")): if f not in done: - all_matches = all_matching_shapefiles(f, shapefile_directory) + all_matches = all_matching_shapefiles(f, shapefile_directory) # get all + # shapefiles in the same path / row + all_matches.append(f) done.add(f) for match in all_matches: done.add(match) p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - if "37_27" in suffix: - continue - fmask = get_masks(os.path.join(image_directory, suffix[:-4])) + suffix = '{}_{}_{}.tif'.format(p, r, year) #image directory master_raster = os.path.join(master_raster_directory, train_raster + suffix) mask_file = os.path.join(master_raster_directory, mask_raster + suffix) # for rasterio.mask.mask masks = [] - all_matches.append(f) shp = None for match in all_matches: - msk = generate_class_mask(match, mask_file) - # try: - # msk[0][fmask == 1] = NO_DATA - # except IndexError: - # print(match, msk.shape, fmask.shape) - # # What's going on here? - # # Fmasks and masks have different shapes... - # # Probably need to warp_vrt? - # #msk[:, :][fmask == 1] = NO_DATA - + msk, mask_meta = generate_class_mask(match, mask_file) # mask file is + # a blank raster - expedites loading and makes troubleshooting easier.. + # This should be removed in a finished product. + print(match) + msk = concatenate_fmasks(os.path.join(image_directory, suffix[:-4]), msk, mask_meta) shp = msk.shape cc = assign_class_code(target_dict, match) if cc is not None: @@ -192,7 +185,7 @@ def extract_training_data_unet(target_dict, shapefile_directory, image_directory master, meta = load_raster(master_raster) else: master = np.zeros(shp) - + # 92 is unet offset. for i in range(92, master.shape[1], unet_output_size): for j in range(92, master.shape[2], unet_output_size): diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 3b985cc..6d3cc95 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -57,27 +57,9 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map= # each band corresponding to, as that's sorting by date. feature_rasters = paths_map[feat] # maps bands to their location in filesystem. for feature_raster in feature_rasters: - # band_mean = None - # band_std = None - # for band in mean_map: - # if feature_raster.endswith(band): - # band_mean = mean_map[band] - # band_std = stddev_map[band] - - # if band_mean is None: - # print("Band mean not found in mean_mapping for {}".format(feature_raster)) - # return - - # if band_std is None: - # print("Band std not found in std_mapping for {}".format(feature_raster)) - # return - with rasopen(feature_raster, mode='r') as src: arr = src.read() raster_geo = src.meta.copy() - - #arr = (arr - band_mean) / band_std - if first: first_geo = raster_geo.copy() empty = zeros((num_rasters, arr.shape[1], arr.shape[2]), float32) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 7ebe95b..dbbd373 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -10,7 +10,7 @@ from glob import glob from skimage import transform, util from sklearn.metrics import confusion_matrix -from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint +from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, LearningRateScheduler from rasterio import open as rasopen from rasterio.mask import mask from shapely.geometry import shape @@ -51,7 +51,7 @@ def weighted_loss(target, output): return tf.boolean_mask(out, mask) -def c_acc(y_true, y_pred): +def acc(y_true, y_pred): y_pred_sum = tf.reduce_sum(y_pred, axis=3) mask = tf.not_equal(y_pred_sum, 0) y_arg = tf.argmax(y_pred, axis=-1) @@ -178,9 +178,17 @@ def get_iou(): print(f, compute_iou(y_pred, y_true)) +def lr_schedule(epoch, lr): + if epoch == 0: + lr = 1e-3 + else: + lr = 1e-3*np.exp(-(epoch / 500)) + return lr + + def train_model(training_directory, model, steps_per_epoch, valid_steps, max_pools, box_size=0, epochs=3, random_sample=False, threshold=0.9, w0=50, sigma=10, channels='all', - train_more=False, learning_rate=1e-3, num_classes=4): + train_more=False, beta_1=0.9999, beta_2=0.999999, learning_rate=1e-3, num_classes=4): ''' This function assumes that train/test data are subdirectories of training_directory, with the names train/test.''' @@ -193,18 +201,19 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo model = model(shp, 4) # + 1 for border class model.compile( loss=weighted_loss, - optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), - metrics=[c_acc] + optimizer=tf.keras.optimizers.Adam(lr=learning_rate, beta_1=beta_1, beta_2=beta_2), + metrics=[acc] ) graph_path = os.path.join('graphs/', str(int(time.time()))) os.mkdir(graph_path) tb = TensorBoard(log_dir=graph_path) ckpt_path = os.path.join(graph_path, "chkpt{epoch:02d}-{val_loss:.2f}.hdf5") - mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_c_acc', save_best_only=True, + scheduler = LearningRateScheduler(lr_schedule, verbose=1) + mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_acc', save_best_only=True, mode='max', verbose=1) train = os.path.join(training_directory, 'train') test = os.path.join(training_directory, 'test') - class_weight = {0:30.756, 1:1.0, 2:2.1659, 3:67.517} + class_weight = {0:28.101, 1:1.0, 2:2.9614, 3:103.8927} #class_weight = {0:1.0, 1:1.0, 2:1.0, 3:1.0} train_generator = generate_training_data(train, max_pools, sample_random=random_sample, box_size=box_size, threshold=threshold, batch_size=4, w0=w0, sigma=sigma, @@ -218,7 +227,7 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo verbose=1, validation_data=test_generator, validation_steps=valid_steps, - callbacks=[tb, mdlcheck, tf.keras.callbacks.TerminateOnNaN()], + callbacks=[tb, scheduler, mdlcheck, tf.keras.callbacks.TerminateOnNaN()], use_multiprocessing=True) return model, graph_path @@ -276,30 +285,25 @@ def gradient_wrt_inputs(model, data): training_directory = 'training_data/' info_file = 'run_information.txt' max_pools = 0 - model_name = 'unet_border_weights{}.h5'.format(int(time.time())) - model_name = 'no_border_class.h5' model_dir = 'models/' info_path = os.path.join(model_dir, info_file) - model_save_path = os.path.join(model_dir, model_name) model_func = weighted_unet_no_transpose_conv steps_per_epoch = 10 valid_steps = 7 - epochs = 1000 + epochs = 30000 w0 = 5 sigma = 2 threshold = 0.9*w0 - train_more = True + train_more = False eager = False class_weights = True - learning_rate = 1e-4 + learning_rate = 1e-3 random_sample = False augment = False - exclude = ['etr.tif', 'pet.tif', 'slope.tif', 'tmmn.tif', 'tmmx.tif', 'pr.tif'] + channels = 'all' + raster_name = 'lrschedule' + model_name = 'lrschedule.h5' model_save_path = os.path.join(model_dir, model_name) - channels = [band_dict[x] for x in band_dict] - channels = np.hstack(channels) - channels = 'all' #channels[0:39] - raster_name = '20000steps' pr_to_eval = '37_28' image_directory = '/home/thomas/share/master_rasters/test/' param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, @@ -320,24 +324,17 @@ def gradient_wrt_inputs(model, data): else: model = tf.keras.models.load_model(model_save_path, custom_objects={'weighted_loss':weighted_loss, 'c_acc':c_acc}) - model.compile( - loss=weighted_loss, - metrics=[c_acc], - optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate) - ) if train_more: train_model(training_directory, model, steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, max_pools=max_pools, epochs=epochs, random_sample=random_sample, learning_rate=learning_rate, channels=channels, w0=w0, sigma=sigma, threshold=threshold, train_more=train_more) - - model_name = 'sblessfilters4000.h5' + model_name = '240eptest.h5' model.save(os.path.join(model_dir, model_name)) if not evaluating or train_more: - param_dict['graph_path'] = graph_path save_model_info(info_path, param_dict) evaluate_images(image_directory, model, include_string=pr_to_eval, - exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name, save_dir='compare_model_outputs/blurry/') - #clip_rasters('compare_model_outputs/blurry/', pr_to_eval) + exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name, + save_dir='compare_model_outputs/systematic/') diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 787dafa..cee7cee 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -199,10 +199,10 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi master_test = '/home/thomas/share/master_rasters/test' master_dirs = [master_train, master_test] year = 2013 - for s, i in zip(shp_dirs, image_dirs): - download_all_images(i, s, year) - for im_dir, mas_dir in zip(image_dirs, master_dirs): - create_all_master_rasters(im_dir, mas_dir) + # for s, i in zip(shp_dirs, image_dirs): + # download_all_images(i, s, year) + # for im_dir, mas_dir in zip(image_dirs, master_dirs): + # create_all_master_rasters(im_dir, mas_dir) master_train = '/home/thomas/share/master_rasters/train/' master_test = '/home/thomas/share/master_rasters/test/' image_train = '/home/thomas/share/image_data/train/' @@ -228,7 +228,7 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi test_dir = 'training_data/test/' shp_test = 'shapefile_data/test/' count = 0 - count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, - test_dir, count, save=save) - print("You have {} instances per test epoch.".format(count)) - print("And {} instances in each class.".format(pixel_dict)) + # count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, + # test_dir, count, save=save) + # print("You have {} instances per test epoch.".format(count)) + # print("And {} instances in each class.".format(pixel_dict)) diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 1354e75..5c95d29 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -29,7 +29,8 @@ def generate_class_mask(shapefile, master_raster, no_data=-1): shp = shp.to_crs(src.crs) features = get_features(shp) out_image, out_transform = mask(src, shapes=features, nodata=no_data) - return out_image + meta = src.meta + return out_image, meta def get_shapefile_lat_lon(shapefile): From 14d891a38ca1dcaf55fe372efc4d7ad99e909f83 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 9 Apr 2019 10:01:34 -0600 Subject: [PATCH 55/89] Investigating effect of buffering shapefile data before training on it --- fully-conv-classification/data_generators.py | 12 +-- fully-conv-classification/fully_conv.py | 86 +++++-------------- .../runner_from_shapefile.py | 11 ++- fully-conv-classification/shapefile_utils.py | 22 ++++- 4 files changed, 51 insertions(+), 80 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 1c75943..e618fac 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -122,23 +122,14 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo): for suffix in mask_rasters(): if f.endswith(suffix): paths.append(os.path.join(dirpath, f)) - init = np.zeros(class_mask.shape) # no fmasks, just data - init[0] = class_mask[0].copy() - j_fmasks = np.zeros(class_mask.shape) for fmask_file in paths: fmask, fmeta = load_raster(fmask_file) try: class_mask[fmask == 1] = NO_DATA # 0 index is for removing the (1, n, m) dimension. - j_fmasks[fmask == 1] = 1 except (ValueError, IndexError) as e: fmask = warp_single_image(fmask_file, class_mask_geo) class_mask[fmask == 1] = NO_DATA - # fig, ax = plt.subplots(ncols=3) - # ax[0].imshow(class_mask[0]) - # ax[1].imshow(j_fmasks[0]) - # ax[2].imshow(init[0]) - # plt.show() return class_mask @@ -276,6 +267,7 @@ def __init__(self, class_filename): def _get_files(self): self.file_list = [x[2] for x in os.walk(self.class_filename)][0] self.file_list = [os.path.join(self.class_filename, x) for x in self.file_list] + self.file_list = self.file_list[:4] def next(self): if self.idx == self.n_files or self.idx == 0: @@ -321,6 +313,7 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo first = False one_hot = None + weighting_dict = {} for subset in data: if sample_random: samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, @@ -330,6 +323,7 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo samp[samp != NO_DATA] = subset['class_code'] subset['class_mask'] = samp + weighting_dict[subset['class_code']] = len(np.where(samp != NO_DATA)[0]) for subset in data: master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index dbbd373..b2078d0 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -10,7 +10,8 @@ from glob import glob from skimage import transform, util from sklearn.metrics import confusion_matrix -from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, LearningRateScheduler +from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler, + ReduceLROnPlateau) from rasterio import open as rasopen from rasterio.mask import mask from shapely.geometry import shape @@ -136,59 +137,14 @@ def evaluate_images(image_directory, model, include_string, max_pools, exclude_s outfile=out, ii=ii) -def compute_iou(y_pred, y_true): - ''' This is slow. ''' - y_pred = y_pred.flatten() - y_true = y_true.flatten() - current = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3]) - print(current) - # compute mean iou - intersection = np.diag(current) - ground_truth_set = current.sum(axis=1) - predicted_set = current.sum(axis=0) - union = ground_truth_set + predicted_set - intersection - IoU = intersection / union.astype(np.float32) - return np.mean(IoU) - - -def get_iou(): - shpfiles = [ - 'shapefile_data/test/MT_Huntley_Main_2013_372837_28.shp', - 'shapefile_data/test/MT_FLU_2017_Fallow_372837_28.shp', - 'shapefile_data/test/MT_FLU_2017_Forrest_372837_28.shp', - 'shapefile_data/test/MT_other_372837_28.shp'] - - m_dir = 'eval_test/all_ims/' - ls = [] - mask = image_directory + 'class_mask_37_28_2013.tif' - for f in shpfiles: - msk = generate_class_mask(f, mask) - msk[msk != NO_DATA] = 1 - ls.append(msk) - y_true = np.vstack(ls) - indices = np.where(y_true != NO_DATA) - y_true = y_true[:, indices[1], indices[2]] - y_true = np.argmax(y_true, axis=0) - for f in glob(m_dir + "*.tif"): - y_pred, meta = load_raster(f) - y_pred = y_pred[:, indices[1], indices[2]] - y_pred = np.round(y_pred) - y_pred.astype(np.int32) - y_pred = np.argmax(y_pred, axis=0) - print(f, compute_iou(y_pred, y_true)) - - def lr_schedule(epoch, lr): - if epoch == 0: - lr = 1e-3 - else: - lr = 1e-3*np.exp(-(epoch / 500)) + lr = lr*np.exp(-(epoch / 1000)) return lr def train_model(training_directory, model, steps_per_epoch, valid_steps, max_pools, box_size=0, epochs=3, random_sample=False, threshold=0.9, w0=50, sigma=10, channels='all', - train_more=False, beta_1=0.9999, beta_2=0.999999, learning_rate=1e-3, num_classes=4): + train_more=False, raster_name=None,beta_1=0.9999, beta_2=0.999999, learning_rate=1e-3, num_classes=4): ''' This function assumes that train/test data are subdirectories of training_directory, with the names train/test.''' @@ -201,20 +157,21 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo model = model(shp, 4) # + 1 for border class model.compile( loss=weighted_loss, - optimizer=tf.keras.optimizers.Adam(lr=learning_rate, beta_1=beta_1, beta_2=beta_2), + optimizer=tf.keras.optimizers.SGD(lr=learning_rate, momentum=0.99), metrics=[acc] ) graph_path = os.path.join('graphs/', str(int(time.time()))) os.mkdir(graph_path) tb = TensorBoard(log_dir=graph_path) - ckpt_path = os.path.join(graph_path, "chkpt{epoch:02d}-{val_loss:.2f}.hdf5") + ckpt_path = os.path.join('models', raster_name+"_{epoch:02d}-{val_acc:.2f}.hdf5") scheduler = LearningRateScheduler(lr_schedule, verbose=1) mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_acc', save_best_only=True, mode='max', verbose=1) + reduce_on_plateau = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=30) train = os.path.join(training_directory, 'train') - test = os.path.join(training_directory, 'test') - class_weight = {0:28.101, 1:1.0, 2:2.9614, 3:103.8927} - #class_weight = {0:1.0, 1:1.0, 2:1.0, 3:1.0} + test = os.path.join('training_data', 'test') + #class_weight = {0:28.101, 1:1.0, 2:2.9614, 3:103.8927} for no buffer + class_weight = {0:36.807, 1:1.0, 2:2.8765, 3:93.984} train_generator = generate_training_data(train, max_pools, sample_random=random_sample, box_size=box_size, threshold=threshold, batch_size=4, w0=w0, sigma=sigma, class_weights=class_weight, channels=channels) @@ -227,7 +184,8 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo verbose=1, validation_data=test_generator, validation_steps=valid_steps, - callbacks=[tb, scheduler, mdlcheck, tf.keras.callbacks.TerminateOnNaN()], + callbacks=[tb, scheduler, mdlcheck, tf.keras.callbacks.TerminateOnNaN(), + reduce_on_plateau], use_multiprocessing=True) return model, graph_path @@ -282,7 +240,7 @@ def gradient_wrt_inputs(model, data): 'tmmn.tif':np.arange(45, 47+1), 'tmmx.tif':np.arange(48, 50+1)} - training_directory = 'training_data/' + training_directory = 'training_data/buffered/' info_file = 'run_information.txt' max_pools = 0 model_dir = 'models/' @@ -290,22 +248,24 @@ def gradient_wrt_inputs(model, data): model_func = weighted_unet_no_transpose_conv steps_per_epoch = 10 valid_steps = 7 - epochs = 30000 + epochs = 1250 w0 = 5 sigma = 2 threshold = 0.9*w0 train_more = False eager = False class_weights = True - learning_rate = 1e-3 + learning_rate = 5e-2 random_sample = False augment = False channels = 'all' - raster_name = 'lrschedule' - model_name = 'lrschedule.h5' + raster_name = 'nobuffer' + model_name = 'doesitdiverge.h5' model_save_path = os.path.join(model_dir, model_name) - pr_to_eval = '37_28' - image_directory = '/home/thomas/share/master_rasters/test/' + #model_save_path = 'graphs/1554563634/chkpt1826-7.36.hdf5' #83% acc, no buffering + #model_save_path = 'graphs/1554764803/chkpt640-4.93.hdf5' #83% acc, buffering + pr_to_eval = '39_27' + image_directory = '/home/thomas/share/master_rasters/train/' param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, 'raster_name':raster_name, 'learning_rate':learning_rate, 'eager':eager, 'class_weights':class_weights, 'augmented':augment, 'random_sample':random_sample, @@ -318,12 +278,12 @@ def gradient_wrt_inputs(model, data): steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, max_pools=max_pools, epochs=epochs, random_sample=random_sample, learning_rate=learning_rate, channels=channels, w0=w0, sigma=sigma, - threshold=threshold) + threshold=threshold, raster_name=raster_name) evaluating = False model.save(model_save_path) else: model = tf.keras.models.load_model(model_save_path, - custom_objects={'weighted_loss':weighted_loss, 'c_acc':c_acc}) + custom_objects={'weighted_loss':weighted_loss, 'acc':acc}) if train_more: train_model(training_directory, model, steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index cee7cee..dd934db 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -191,10 +191,9 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi image_test_directory = '/home/thomas/share/image_data/test' image_dirs = [image_train_directory, image_test_directory] - shp_train = 'shapefile_data/train/' - shp_test = 'shapefile_data/test/' + shp_train = 'shapefile_data/buffered/train/' + shp_test = 'shapefile_data/buffered/test/' shp_dirs = [shp_train, shp_test] - shapefile_directory = 'shapefile_data/all_shapefiles' master_train = '/home/thomas/share/master_rasters/train/' master_test = '/home/thomas/share/master_rasters/test' master_dirs = [master_train, master_test] @@ -213,8 +212,8 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi forest = 'Forrest' other = 'other' target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} - train_dir = 'training_data/train/' - shp_train = 'shapefile_data/train/' + train_dir = 'training_data/buffered/train/' + shp_train = 'shapefile_data/buffered//train/' count = 0 save = True count, pixel_dict = extract_training_data_unet(target_dict, shp_train, image_train, @@ -225,7 +224,7 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi for key in pixel_dict: print(key, max_weight / pixel_dict[key]) tot = 0 - test_dir = 'training_data/test/' + test_dir = 'training_data/buffered/test/' shp_test = 'shapefile_data/test/' count = 0 # count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 5c95d29..6f0f90b 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -6,7 +6,7 @@ from fiona import open as fopen from rasterio.mask import mask from rasterio import open as rasopen -from shapely.geometry import shape +from shapely.geometry import shape, mapping from sklearn.neighbors import KDTree from collections import defaultdict @@ -25,6 +25,7 @@ def generate_class_mask(shapefile, master_raster, no_data=-1): else the masking won't work. ''' shp = gpd.read_file(shapefile) + shp = shp[shp.geometry.notnull()] with rasopen(master_raster, 'r') as src: shp = shp.to_crs(src.crs) features = get_features(shp) @@ -241,5 +242,22 @@ def required_points(shapefile, total_area, total_instances): frac = area / total_area return int(total_instances * frac) + +def buffer_shapefile(shp): + + buf = -0.00050 + with fopen(shp, 'r') as polys: + out = [] + meta = polys.meta + with fopen(shp, 'w', **meta) as dst: + for feat in polys: + feat['geometry'] = mapping(shape(feat['geometry']).buffer(buf)) + dst.write(feat) + if __name__ == '__main__': - pass + + pth = 'shapefile_data/all_shapefiles/MT_Sun_River_2013_392739_27.shp' + path = 'shapefile_data/buffered/test/' + from glob import glob + for f in glob(path + '*.shp'): + buffer_shapefile(f) From fb21b583977663c5fe3c65499e6fed74c49203c1 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Mon, 15 Apr 2019 09:40:19 -0600 Subject: [PATCH 56/89] Working on focal loss + border weights --- fully-conv-classification/data_generators.py | 71 +++++--- fully-conv-classification/fully_conv.py | 165 ++++++++++-------- fully-conv-classification/models.py | 14 +- .../runner_from_shapefile.py | 28 ++- 4 files changed, 164 insertions(+), 114 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index e618fac..2d1ee62 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -20,6 +20,16 @@ NUM_CLASSES = 4 +def distance_map(mask): + mask = mask.copy().astype(bool) + mask = ~mask # make the non-masked areas masked + distances = distance_transform_edt(mask) # ask where the closest masked pixel is + # distances are always positive, so 1-distances can be very negative. + # We're setting the e-folding time with sigma, and the + # border pixel value (y-intercept) with w0. + return distances + + def weight_map(mask, w0=10, sigma=10): mask = mask.copy().astype(bool) mask = ~mask # make the non-masked areas masked @@ -256,21 +266,23 @@ def all_matching_shapefiles(to_match, shapefile_directory): class DataGen: + ''' Infinite data generator. Pulls files from + a directory named "class_dir".''' - def __init__(self, class_filename): + def __init__(self, class_dir, augment=False, random_augment=False): self.file_list = None - self.class_filename = class_filename + self.class_dir = class_dir self._get_files() self.n_files = len(self.file_list) self.idx = 0 - + self.shuffled = sample(self.file_list, self.n_files) + def _get_files(self): - self.file_list = [x[2] for x in os.walk(self.class_filename)][0] - self.file_list = [os.path.join(self.class_filename, x) for x in self.file_list] - self.file_list = self.file_list[:4] + self.file_list = [x[2] for x in os.walk(self.class_dir)][0] + self.file_list = [os.path.join(self.class_dir, x) for x in self.file_list] def next(self): - if self.idx == self.n_files or self.idx == 0: + if self.idx == self.n_files: self.idx = 0 self.shuffled = sample(self.file_list, self.n_files) out = self.shuffled[self.idx] @@ -287,15 +299,18 @@ def _from_pickle(self, filename): def generate_training_data(training_directory, max_pools, sample_random=True, box_size=0, - batch_size=8, class_weights={}, threshold=0.9, w0=40, sigma=10, channels='all', - train=True): + batch_size=8, threshold=None, sigma=None, w0=None, class_weights={}, channels='all', train=True): ''' Assumes data is stored in training_directory in subdirectories labeled class_n_train with n the class code ''' class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] + if not len(class_dirs): + class_dirs = [training_directory] generators = [] border_class = len(class_weights.keys()) + i = 0 for d in class_dirs: generators.append(DataGen(d)) + while True: masters = [] masks = [] @@ -303,17 +318,25 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo for _ in range(2): min_samples = np.inf data = [] + weighting_dict = {} + count_dict = {} for gen in generators: out = gen.next().copy() data.append(out) + n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + weighting_dict[out['class_code']] = n_samples + count_dict[out['class_code']] = n_samples if sample_random: n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + print(n_samples) if n_samples < min_samples: min_samples = n_samples - first = False - one_hot = None - weighting_dict = {} + maxx = max(weighting_dict.values()) + for key in weighting_dict: + weighting_dict[key] = np.log((1.1*(maxx / weighting_dict[key]))) + # print(weighting_dict) + # print(count_dict) for subset in data: if sample_random: samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, @@ -323,7 +346,6 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo samp[samp != NO_DATA] = subset['class_code'] subset['class_mask'] = samp - weighting_dict[subset['class_code']] = len(np.where(samp != NO_DATA)[0]) for subset in data: master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) @@ -337,28 +359,25 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo mask[mask == -1] = 0 # -1 is NO_DATA. weights = weight_map(mask, w0=w0, sigma=sigma) # create weight map labels = weights.copy() + labels[labels < threshold] = 0 labels[labels >= threshold] = border_class labels[mask == 1] = subset['class_code'] weights[weights < threshold] = 0 # threshold the weight values arbitrarily - weights[weights != 0] = 0 #remove the border weights - weights[mask == 1] = class_weights[subset['class_code']] - multidim_weights = np.zeros((weights.shape[0], weights.shape[1], border_class)) # - one_hot = np.zeros((labels.shape[0], labels.shape[1], border_class)) - #one_hot[:, :, border_class][labels == border_class] = 1 + #weights[mask == 1] = class_weights[subset['class_code']] + weights[weights != 0] = abs(np.log((1.1*(maxx / len(np.where(weights != 0)[0]))))) + #weights[weights != 0] = (1.1*(maxx / len(np.where(weights != 0)[0]))) + weights[mask == 1] = weighting_dict[subset['class_code']] + multidim_weights = np.zeros((weights.shape[0], weights.shape[1], border_class+1)) # + one_hot = np.zeros((labels.shape[0], labels.shape[1], border_class+1)) + one_hot[:, :, border_class][labels == border_class] = 1 one_hot[:, :, subset['class_code']][labels == subset['class_code']] = 1 - # above is circular but will allow for changing to a sparse encoding easily - for i in range(border_class): + for i in range(border_class+1): multidim_weights[:, :, i] = weights - if not train: - multidim_weights[multidim_weights != 0] = 1 masters.append(master) masks.append(one_hot) weightings.append(multidim_weights) - yield [np.asarray(masters, dtype=np.float32), np.asarray(weightings)], np.asarray(masks) - - - + yield [np.asarray(masters), np.asarray(weightings)], np.asarray(masks) def rotation(image, angle): diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index b2078d0..229a602 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -40,17 +40,20 @@ def custom_objective(y_true, y_pred): def weighted_loss(target, output): - # All I need to do is multiply the output loss - # by the weights that I input. - # Loss is of shape n_classesxwidthxheight # Weight map: - # Raster of shape widthxheightx1, with weights - # of zero where there is no data and weights of whatever the - # correct weights are for all the other classes. + out = -tf.reduce_sum(target*output, len(output.get_shape())-1) + #mask = tf.not_equal(out, 0)#tf.boolean_mask(out, mask) + return out + +def weighted_focal_loss(target, output, gamma=1.3): + exp = tf.exp(output) + pt = tf.pow(1-exp, gamma) out = -tf.reduce_sum(target*output, len(output.get_shape())-1) mask = tf.not_equal(out, 0) + pt_ce = tf.multiply([pt, output]) + out = -tf.reduce_sum(pt_ce*target, len(output.get_shape()) -1) return tf.boolean_mask(out, mask) - + def acc(y_true, y_pred): y_pred_sum = tf.reduce_sum(y_pred, axis=3) @@ -71,7 +74,7 @@ def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_cla else: master, meta = load_raster(master_raster) class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder - out = np.zeros((master.shape[2], master.shape[1], num_classes)) + out = np.zeros((master.shape[2], master.shape[1], num_classes+1)) # All U-Net specific. CHUNK_SIZE = 572 @@ -86,7 +89,7 @@ def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_cla max_pools, return_cuts=True) if channels != 'all': sub_master = sub_master[:, :, :, channels] - sub_msk = np.ones((1, 388, 388, 4)) # a placeholder + sub_msk = np.ones((1, 388, 388, 5)) # a placeholder if sub_master.shape[1] == 572 and sub_master.shape[2] == 572: preds = model.predict([sub_master, sub_msk]) preds_exp = np.exp(preds) @@ -111,12 +114,12 @@ def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_cla else: print("whatcha got goin on here?") - stdout.write("N eval: {}. Percent done: {:.4f}\r".format(ii, i / master.shape[1])) + stdout.write("N eval: {}. Percent done: {:.2f}\r".format(ii, i / master.shape[1])) out = np.swapaxes(out, 0, 2) out = out.astype(np.float32) if outfile: - save_raster(out, outfile, meta) + save_raster(out, outfile, meta, count=5) return out @@ -138,13 +141,11 @@ def evaluate_images(image_directory, model, include_string, max_pools, exclude_s def lr_schedule(epoch, lr): - lr = lr*np.exp(-(epoch / 1000)) - return lr + return 0.01*np.exp(-epoch/1000) def train_model(training_directory, model, steps_per_epoch, valid_steps, max_pools, box_size=0, - epochs=3, random_sample=False, threshold=0.9, w0=50, sigma=10, channels='all', - train_more=False, raster_name=None,beta_1=0.9999, beta_2=0.999999, learning_rate=1e-3, num_classes=4): + epochs=3, random_sample=False, threshold=None, sigma=None, w0=None, channels='all', train_more=False, raster_name=None, learning_rate=1e-3, num_classes=5): ''' This function assumes that train/test data are subdirectories of training_directory, with the names train/test.''' @@ -153,39 +154,41 @@ def train_model(training_directory, model, steps_per_epoch, valid_steps, max_poo else: channel_depth = channels.shape[0] shp = (572, 572, channel_depth) + weight_shape = (388, 388, num_classes) if not train_more: - model = model(shp, 4) # + 1 for border class + model = model(shp, weight_shape, num_classes, base_exp=5) model.compile( - loss=weighted_loss, - optimizer=tf.keras.optimizers.SGD(lr=learning_rate, momentum=0.99), + loss=weighted_focal_loss, + optimizer=tf.keras.optimizers.Adam(lr=learning_rate), metrics=[acc] ) + model.summary() graph_path = os.path.join('graphs/', str(int(time.time()))) os.mkdir(graph_path) tb = TensorBoard(log_dir=graph_path) - ckpt_path = os.path.join('models', raster_name+"_{epoch:02d}-{val_acc:.2f}.hdf5") + ckpt_path = os.path.join(graph_path, raster_name+"_{epoch:02d}-{val_acc:.2f}.hdf5") scheduler = LearningRateScheduler(lr_schedule, verbose=1) mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_acc', save_best_only=True, mode='max', verbose=1) - reduce_on_plateau = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=30) train = os.path.join(training_directory, 'train') - test = os.path.join('training_data', 'test') - #class_weight = {0:28.101, 1:1.0, 2:2.9614, 3:103.8927} for no buffer - class_weight = {0:36.807, 1:1.0, 2:2.8765, 3:93.984} + test = os.path.join(training_directory, 'test') + class_weight = {0:28.101, 1:1.0, 2:2.9614, 3:103.8927} #for no buffer + #class_weight = {0:1, 1:1.0, 2:1, 3:1} #for no buffer + train_generator = generate_training_data(train, max_pools, sample_random=random_sample, - box_size=box_size, threshold=threshold, batch_size=4, w0=w0, sigma=sigma, - class_weights=class_weight, channels=channels) - test_generator = generate_training_data(test, max_pools, sample_random=random_sample, - train=True, box_size=box_size, batch_size=4, - class_weights=class_weight, channels=channels) + box_size=box_size, class_weights=class_weight, channels=channels, threshold=threshold, + sigma=sigma, w0=w0) + test_generator = generate_training_data(test, max_pools, sample_random=False, + box_size=box_size, batch_size=4, w0=w0, threshold=threshold, + sigma=sigma, class_weights=class_weight, channels=channels) + model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=1, validation_data=test_generator, validation_steps=valid_steps, - callbacks=[tb, scheduler, mdlcheck, tf.keras.callbacks.TerminateOnNaN(), - reduce_on_plateau], + callbacks=[tb, scheduler, mdlcheck, tf.keras.callbacks.TerminateOnNaN()], use_multiprocessing=True) return model, graph_path @@ -240,61 +243,87 @@ def gradient_wrt_inputs(model, data): 'tmmn.tif':np.arange(45, 47+1), 'tmmx.tif':np.arange(48, 50+1)} - training_directory = 'training_data/buffered/' + training_directory = 'training_data/' info_file = 'run_information.txt' max_pools = 0 model_dir = 'models/' info_path = os.path.join(model_dir, info_file) model_func = weighted_unet_no_transpose_conv - steps_per_epoch = 10 - valid_steps = 7 - epochs = 1250 - w0 = 5 - sigma = 2 - threshold = 0.9*w0 + steps_per_epoch = 100 + valid_steps = 20 + epochs = 320 + w0 = 10 + sigma = 5 + threshold = 0.7*w0 + train_iter = 1 train_more = False eager = False class_weights = True - learning_rate = 5e-2 + learning_rate = 1e-3 random_sample = False augment = False channels = 'all' - raster_name = 'nobuffer' - model_name = 'doesitdiverge.h5' + raster_name = 'w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) + model_name = 'w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) model_save_path = os.path.join(model_dir, model_name) - #model_save_path = 'graphs/1554563634/chkpt1826-7.36.hdf5' #83% acc, no buffering - #model_save_path = 'graphs/1554764803/chkpt640-4.93.hdf5' #83% acc, buffering - pr_to_eval = '39_27' - image_directory = '/home/thomas/share/master_rasters/train/' + pr_to_eval = '37_28' + if pr_to_eval == '39_27': + image_directory = '/home/thomas/share/master_rasters/train/' + else: + image_directory = '/home/thomas/share/master_rasters/test/' + param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, 'raster_name':raster_name, 'learning_rate':learning_rate, 'eager':eager, 'class_weights':class_weights, 'augmented':augment, 'random_sample':random_sample, 'graph_path':None, 'bands':channels, 'w0':w0, 'sigma':sigma} evaluating = True + num_classes = 5 if not os.path.isfile(model_save_path): print("Training new model") - model, graph_path = train_model(training_directory, model_func, - steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, - max_pools=max_pools, epochs=epochs, random_sample=random_sample, - learning_rate=learning_rate, channels=channels, w0=w0, sigma=sigma, - threshold=threshold, raster_name=raster_name) - evaluating = False - model.save(model_save_path) - else: - model = tf.keras.models.load_model(model_save_path, - custom_objects={'weighted_loss':weighted_loss, 'acc':acc}) - if train_more: - train_model(training_directory, model, - steps_per_epoch=steps_per_epoch, valid_steps=valid_steps, - max_pools=max_pools, epochs=epochs, random_sample=random_sample, - learning_rate=learning_rate, channels=channels, w0=w0, sigma=sigma, - threshold=threshold, train_more=train_more) - model_name = '240eptest.h5' - model.save(os.path.join(model_dir, model_name)) - - if not evaluating or train_more: - save_model_info(info_path, param_dict) - evaluate_images(image_directory, model, include_string=pr_to_eval, - exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name, - save_dir='compare_model_outputs/systematic/') + shp = (572, 572, 51) + weight_shape = (388, 388, num_classes) + model = weighted_unet_no_transpose_conv(shp, weight_shape, num_classes, base_exp=5) + model.compile( + loss=weighted_loss, + optimizer=tf.keras.optimizers.Adam(lr=learning_rate), + metrics=[acc] + ) + graph_path = os.path.join('graphs/', str(int(time.time()))) + os.mkdir(graph_path) + tb = TensorBoard(log_dir=graph_path) + ckpt_path = os.path.join(graph_path, raster_name+"_{epoch:02d}-{val_acc:.2f}.hdf5") + scheduler = LearningRateScheduler(lr_schedule, verbose=1) + mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_acc', save_best_only=True, + mode='max', verbose=1) + train = os.path.join(training_directory, 'train') + test = os.path.join(training_directory, 'test') + class_weight = {0:28.101, 1:1.0, 2:2.9614, 3:103.8927} #for no buffer + #class_weight = {0:1, 1:1.0, 2:1, 3:1} #for no buffer + + train_generator = generate_training_data(train, max_pools, sample_random=False, + class_weights=class_weight, channels=channels, threshold=threshold, + sigma=sigma, w0=w0) + test_generator = generate_training_data(test, max_pools, sample_random=False, + batch_size=4, w0=w0, threshold=threshold, + sigma=sigma, class_weights=class_weight, channels=channels) + + i = 0 + k = 0 + train_iter = 150 + for data, labels in train_generator: + out = model.train_on_batch(x=data, + y=labels) + # Loss, accuracy? + print(out) + if i > train_iter: + evaluate_images(image_directory, model, include_string=pr_to_eval, + exclude_string="class", channels=channels, max_pools=max_pools, + prefix=raster_name+'step_{}'.format((k+1)*train_iter), + save_dir='compare_model_outputs/during-the-day/') + k+=1 + i = 0 + i += 1 + + raster_name+='train_iter_final'.format(i) + evaluate_images(image_directory, model, include_string=pr_to_eval, exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name, save_dir='compare_model_outputs/systematic/') diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 0bbc85d..74231e5 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -66,14 +66,16 @@ def fcnn_functional_small(n_classes): _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) -def weighted_unet_no_transpose_conv(input_shape, n_classes): - ''' This model does not use any Conv2DTranspose layers. +def weighted_unet_no_transpose_conv(input_shape, weighted_input_shape, n_classes, base_exp=5): + ''' + This model does not use any Conv2DTranspose layers. Instead a Upsampling2D layer with a Conv layer after - with same padding. ''' + with same padding. + ''' inp1 = Input(input_shape) - weighted_input = Input(shape=(388, 388, 4)) + weighted_input = Input(shape=weighted_input_shape) base = 2 - exp = 5 + exp = base_exp # 64 filters c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(inp1) @@ -108,6 +110,7 @@ def weighted_unet_no_transpose_conv(input_shape, n_classes): # 1024 filters c9 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp4) c10 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c9) + c10 = BatchNormalization()(c10) exp -= 1 # 512 filters, making 1024 when concatenated with @@ -179,6 +182,7 @@ def weighted_unet_no_transpose_conv(input_shape, n_classes): c18 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn3) + c18 = BatchNormalization()(c18) last_conv = Conv2D(filters=n_classes, kernel_size=1, activation='softmax', padding='valid')(c18) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index dd934db..dffc9f6 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -191,17 +191,17 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi image_test_directory = '/home/thomas/share/image_data/test' image_dirs = [image_train_directory, image_test_directory] - shp_train = 'shapefile_data/buffered/train/' - shp_test = 'shapefile_data/buffered/test/' + shp_train = 'shapefile_data/train/' + shp_test = 'shapefile_data//test/' shp_dirs = [shp_train, shp_test] master_train = '/home/thomas/share/master_rasters/train/' master_test = '/home/thomas/share/master_rasters/test' master_dirs = [master_train, master_test] year = 2013 - # for s, i in zip(shp_dirs, image_dirs): - # download_all_images(i, s, year) - # for im_dir, mas_dir in zip(image_dirs, master_dirs): - # create_all_master_rasters(im_dir, mas_dir) + for s, i in zip(shp_dirs, image_dirs): + download_all_images(i, s, year) + for im_dir, mas_dir in zip(image_dirs, master_dirs): + create_all_master_rasters(im_dir, mas_dir) master_train = '/home/thomas/share/master_rasters/train/' master_test = '/home/thomas/share/master_rasters/test/' image_train = '/home/thomas/share/image_data/train/' @@ -212,22 +212,20 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi forest = 'Forrest' other = 'other' target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} - train_dir = 'training_data/buffered/train/' - shp_train = 'shapefile_data/buffered//train/' + train_dir = 'training_data/train/' + shp_train = 'shapefile_data/train/' count = 0 save = True count, pixel_dict = extract_training_data_unet(target_dict, shp_train, image_train, master_train, train_dir, count, save=save) - print("You have {} instances per training epoch.".format(count)) - print("And {} instances in each class.".format(pixel_dict)) + print("{} instances in each class.".format(pixel_dict)) max_weight = max(pixel_dict.values()) for key in pixel_dict: print(key, max_weight / pixel_dict[key]) tot = 0 - test_dir = 'training_data/buffered/test/' + test_dir = 'training_data/test/' shp_test = 'shapefile_data/test/' count = 0 - # count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, - # test_dir, count, save=save) - # print("You have {} instances per test epoch.".format(count)) - # print("And {} instances in each class.".format(pixel_dict)) + count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, + test_dir, count, save=save) + print("And {} instances in each class.".format(pixel_dict)) From d4ddd41569e05780afd063bd77216123e3c7a551 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 16 Apr 2019 16:46:13 -0600 Subject: [PATCH 57/89] Starting refactor --- fully-conv-classification/data_generators.py | 13 ++++++---- fully-conv-classification/data_utils.py | 27 +++++++++++++++----- fully-conv-classification/fully_conv.py | 21 ++++++++------- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 2d1ee62..b9bcb53 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -334,7 +334,8 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo maxx = max(weighting_dict.values()) for key in weighting_dict: - weighting_dict[key] = np.log((1.1*(maxx / weighting_dict[key]))) + #weighting_dict[key] = np.log((1.1*(maxx / weighting_dict[key]))) + weighting_dict[key] = maxx / weighting_dict[key] # print(weighting_dict) # print(count_dict) for subset in data: @@ -363,10 +364,12 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo labels[labels >= threshold] = border_class labels[mask == 1] = subset['class_code'] weights[weights < threshold] = 0 # threshold the weight values arbitrarily - #weights[mask == 1] = class_weights[subset['class_code']] - weights[weights != 0] = abs(np.log((1.1*(maxx / len(np.where(weights != 0)[0]))))) - #weights[weights != 0] = (1.1*(maxx / len(np.where(weights != 0)[0]))) - weights[mask == 1] = weighting_dict[subset['class_code']] + weights[weights != 0] = 1#maxx / len(np.where(weights != 0)[0]) + #weights[mask == 1] = weighting_dict[subset['class_code']] + weights[mask == 1] = 1#class_weights[subset['class_code']] + if subset['class_code'] != 0: + weights[mask != 1] = 0 + labels[mask != 1] = 0 multidim_weights = np.zeros((weights.shape[0], weights.shape[1], border_class+1)) # one_hot = np.zeros((labels.shape[0], labels.shape[1], border_class+1)) one_hot[:, :, border_class][labels == border_class] = 1 diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 6d3cc95..92aa6a5 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -13,8 +13,10 @@ from rasterio.mask import mask from pickle import load from prepare_images import ImageStack +from shapefile_utils import get_features from sat_image.warped_vrt import warp_single_image +WRS2 = '../spatial_data/wrs2_descending_usa.shp' def create_master_raster(paths_map, path, row, year, raster_directory, mean_map=None, stddev_map=None): @@ -177,22 +179,35 @@ def clip_rasters(evaluated_tif_dir, include_string): row = out[3:5] clip_raster(f, int(path), int(row), outfile=f) +def get_wrs2_features(path, row): + + with fopen(WRS2) as src: + for feat in src: + poly = shape(feat['geometry']) + propr = feat['properties'] + if propr['PATH'] == path and propr['ROW'] == row: + return [feat] + return None + def clip_raster(evaluated, path, row, outfile=None): shp = gpd.read_file(WRS2) + out = shp[shp['PATH'] == path] + out = out[out['ROW'] == row] + with rasopen(evaluated, 'r') as src: - shp = shp.to_crs(src.crs) + out = out.to_crs(src.crs) meta = src.meta.copy() - features = get_features(shp, path, row) - out_image, out_transform = mask(src, shapes=features, nodata=np.nan) + features = get_features(out) + out_image, out_transform = mask(src, shapes=features, crop=True, nodata=nan) - if outfile: - save_raster(out_image, outfile, meta) + outfile = evaluated + save_raster(out_image, outfile, meta) -def save_raster(arr, outfile, meta, count=4): +def save_raster(arr, outfile, meta, count=5): meta.update(count=count) with rasopen(outfile, 'w', **meta) as dst: dst.write(arr) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 229a602..bc7e3d7 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -45,12 +45,12 @@ def weighted_loss(target, output): #mask = tf.not_equal(out, 0)#tf.boolean_mask(out, mask) return out -def weighted_focal_loss(target, output, gamma=1.3): +def weighted_focal_loss(target, output, gamma=1): exp = tf.exp(output) pt = tf.pow(1-exp, gamma) out = -tf.reduce_sum(target*output, len(output.get_shape())-1) mask = tf.not_equal(out, 0) - pt_ce = tf.multiply([pt, output]) + pt_ce = tf.multiply(pt, output) out = -tf.reduce_sum(pt_ce*target, len(output.get_shape()) -1) return tf.boolean_mask(out, mask) @@ -263,8 +263,11 @@ def gradient_wrt_inputs(model, data): random_sample = False augment = False channels = 'all' - raster_name = 'w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) - model_name = 'w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) + raster_name = '2_w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) + model_name = '2_w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) + raster_name = 'unit_irr_weights_normal_loss_w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) + model_name = 'unit_1border_weights_irr_weights_normal_loss_w0_weight:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) + # Next try raw weights from weight map. model_save_path = os.path.join(model_dir, model_name) pr_to_eval = '37_28' if pr_to_eval == '39_27': @@ -279,6 +282,7 @@ def gradient_wrt_inputs(model, data): evaluating = True num_classes = 5 + model_save_path = 'models/modelsunit_1border_weights_irr_weights_normal_loss_w0_weight:10-th:7.0-sigma:5-lr:0.001step_2850' if not os.path.isfile(model_save_path): print("Training new model") shp = (572, 572, 51) @@ -304,10 +308,6 @@ def gradient_wrt_inputs(model, data): train_generator = generate_training_data(train, max_pools, sample_random=False, class_weights=class_weight, channels=channels, threshold=threshold, sigma=sigma, w0=w0) - test_generator = generate_training_data(test, max_pools, sample_random=False, - batch_size=4, w0=w0, threshold=threshold, - sigma=sigma, class_weights=class_weight, channels=channels) - i = 0 k = 0 train_iter = 150 @@ -317,6 +317,7 @@ def gradient_wrt_inputs(model, data): # Loss, accuracy? print(out) if i > train_iter: + model.save('models'+model_name+'step_{}'.format((k+1)*train_iter)) evaluate_images(image_directory, model, include_string=pr_to_eval, exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name+'step_{}'.format((k+1)*train_iter), @@ -325,5 +326,7 @@ def gradient_wrt_inputs(model, data): i = 0 i += 1 - raster_name+='train_iter_final'.format(i) + raster_name='final_eval' + model = tf.keras.models.load_model(model_save_path, + custom_objects={'weighted_loss':weighted_loss}) evaluate_images(image_directory, model, include_string=pr_to_eval, exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name, save_dir='compare_model_outputs/systematic/') From 59c879e316868a05df3df77d07e5c3d007787534 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 17 Apr 2019 14:42:21 -0600 Subject: [PATCH 58/89] Refactoring code and adding offline data augmentation --- fully-conv-classification/data_generators.py | 429 +++++++----------- fully-conv-classification/fully_conv.py | 6 +- .../runner_from_shapefile.py | 29 +- fully-conv-classification/shapefile_utils.py | 13 +- 4 files changed, 179 insertions(+), 298 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index b9bcb53..af822de 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -3,7 +3,7 @@ import time import pickle import matplotlib.pyplot as plt -from glob import glob +from glob import glob, iglob from random import sample, shuffle from skimage.segmentation import find_boundaries from skimage.measure import label @@ -16,9 +16,6 @@ from skimage import transform from sat_image.warped_vrt import warp_single_image -NO_DATA = -1 -NUM_CLASSES = 4 - def distance_map(mask): mask = mask.copy().astype(bool) @@ -40,11 +37,11 @@ def weight_map(mask, w0=10, sigma=10): return w0*np.exp((1-distances) / sigma) -def random_sample(class_mask, n_instances, box_size=0, fill_value=1): +def random_sample(class_mask, n_instances, box_size=0, fill_value=1, nodata=0): if box_size: n_instances /= box_size - out = np.where(class_mask != NO_DATA) + out = np.where(class_mask != nodata) class_mask = class_mask.copy() try: out_x = out[1] @@ -58,7 +55,7 @@ def random_sample(class_mask, n_instances, box_size=0, fill_value=1): out_y = out_y[indices] try: - class_mask[:, :, :] = NO_DATA + class_mask[:, :, :] = nodata if box_size == 0: class_mask[0, out_x, out_y] = fill_value else: @@ -67,7 +64,7 @@ def random_sample(class_mask, n_instances, box_size=0, fill_value=1): class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = fill_value except IndexError as e: - class_mask[:, :] = NO_DATA + class_mask[:, :] = nodata if box_size == 0: class_mask[out_x, out_y] = fill_value else: @@ -115,17 +112,8 @@ def to_pickle(self, training_directory): else: print("What? Contact administrator.") - def set_data(self, data): - self.dict['data'] = data - - def set_code(self, class_code): - self.dict['class_code'] = class_code - - def set_class_mask(self, class_mask): - self.dict['class_mask'] = class_mask - -def concatenate_fmasks(image_directory, class_mask, class_mask_geo): +def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): paths = [] for dirpath, dirnames, filenames in os.walk(image_directory): for f in filenames: @@ -135,34 +123,46 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo): for fmask_file in paths: fmask, fmeta = load_raster(fmask_file) try: - class_mask[fmask == 1] = NO_DATA # 0 index is for removing the (1, n, m) dimension. + class_mask[fmask == 1] = nodata # 0 index is for removing the (1, n, m) dimension. except (ValueError, IndexError) as e: fmask = warp_single_image(fmask_file, class_mask_geo) - class_mask[fmask == 1] = NO_DATA + class_mask[fmask == 1] = nodata return class_mask -def extract_training_data_unet(target_dict, shapefile_directory, image_directory, - master_raster_directory, training_directory, count, save=True, chunk_size=572): - ''' target_dict: {filename or string in filename : class_code} ''' - unet_output_size = 388 # input to unet. - unet_input_size = 572 # input to unet. +def extract_training_data(target_dict, shapefile_directory, image_directory, + master_raster_directory, training_directory, save=True, tile_size=608, + year=2013, fmask=True, nodata=0, augment_dict={}): + ''' + target_dict: {filename or string in filename : class_code} + This function extracts training data from master_rasters in master_raster_directory. Master + rasters are rasters containing all feature bands. + To do this, we iterate over the shapefile (vector) data in shapefile directory + and mask the corresponding raster with the vector data. We also ignore all pixels + in the master_rasters that have any clouds or water in them. + steps: + pull a shapefile at random from shapefile_directory. + get all the other shapefiles that are in the same path and row. + use these shapefiles to create a binary mask: 0 where there's no + data and 1 where there is data. + Assign each binary mask to a class. + Iterate over the master raster in that path / row and + create a new datatile object for each binary mask that contains + data, and save it. + ''' - # this means the class mask must be 388x388 - chop 184 from height/width. done = set() - pixel_dict = {} # counts number of pixels present in each class. + pixel_dict = {} # pixel dict counts number of pixels present in each class. for class_code in target_dict.values(): - pixel_dict[class_code] = 0 - year = 2013 - train_raster = 'master_raster_' - mask_raster = 'class_mask_' + pixel_dict[class_code] = 0 + year = year #TODO: incorporate year attr. from shapefile + train_raster = 'master_raster_' # template + mask_raster = 'class_mask_' # template for f in glob(os.path.join(shapefile_directory, "*.shp")): if f not in done: - all_matches = all_matching_shapefiles(f, shapefile_directory) # get all - # shapefiles in the same path / row - all_matches.append(f) done.add(f) + all_matches = all_matching_shapefiles(f, shapefile_directory) # get all shapefiles in the same path / row for match in all_matches: done.add(match) p, r = get_shapefile_path_row(f) @@ -172,102 +172,75 @@ def extract_training_data_unet(target_dict, shapefile_directory, image_directory masks = [] shp = None for match in all_matches: - msk, mask_meta = generate_class_mask(match, mask_file) # mask file is - # a blank raster - expedites loading and makes troubleshooting easier.. - # This should be removed in a finished product. - print(match) - msk = concatenate_fmasks(os.path.join(image_directory, suffix[:-4]), msk, mask_meta) - shp = msk.shape cc = assign_class_code(target_dict, match) - if cc is not None: - dm = DataMask(msk, cc) - masks.append(dm) - if save: - master, meta = load_raster(master_raster) - else: - master = np.zeros(shp) - - # 92 is unet offset. - for i in range(92, master.shape[1], unet_output_size): - for j in range(92, master.shape[2], unet_output_size): - sub_master = master[:, i-92:i+unet_output_size+92, j-92:j+unet_output_size+92] - if sub_master.shape[1] == unet_input_size and sub_master.shape[2] == unet_input_size: - for msk in masks: - s = msk.mask[:, i:i+unet_output_size, j:j+unet_output_size] - if not np.all(s == NO_DATA): - pixel_dict[msk.class_code] += len(np.where(s != NO_DATA)[0]) - count += 1 - if save: - dt = DataTile(sub_master, s, msk.class_code) - dt.to_pickle(training_directory) - return count, pixel_dict - - -def extract_training_data(target_dict, shapefile_directory, image_directory, - master_raster_directory, training_directory, count, save=True, chunk_size=572): - ''' target_dict: {filename or string in filename : class_code} ''' - done = set() - pixel_dict = {} # counts number of pixels present in each class. - for class_code in target_dict.values(): - pixel_dict[class_code] = 0 - year = 2013 - train_raster = 'master_raster_' - mask_raster = 'class_mask_' - CHUNK_SIZE = chunk_size - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if f not in done: - all_matches = all_matching_shapefiles(f, shapefile_directory) - done.add(f) - for match in all_matches: - done.add(match) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) - fmask = get_masks(os.path.join(image_directory, suffix[:-4])) - master_raster = os.path.join(master_raster_directory, train_raster + suffix) - mask_file = os.path.join(master_raster_directory, mask_raster + suffix) # for rasterio.mask.mask - masks = [] # these are class masks for the labelling of data. - all_matches.append(f) - shp = None - for match in all_matches: - msk = generate_class_mask(match, mask_file) - msk[0][fmask == 1] = NO_DATA + msk, mask_meta = generate_class_mask(match, mask_file, nodata=nodata) + if fmask: + msk = concatenate_fmasks(os.path.join(image_directory, suffix[:-4]), msk, + mask_meta, nodata=nodata) # Need to make sure this is doing what I expect. shp = msk.shape - cc = assign_class_code(target_dict, match) + print(match, cc) if cc is not None: - dm = DataMask(msk, cc) + dm = DataMask(msk, cc) # a binary mask that has a class_code attributed to it. masks.append(dm) + if save: master, meta = load_raster(master_raster) else: master = np.zeros(shp) - - for i in range(0, master.shape[1], CHUNK_SIZE): - for j in range(0, master.shape[2], CHUNK_SIZE): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - if sub_master.shape[1] == CHUNK_SIZE and sub_master.shape[2] == CHUNK_SIZE: - for msk in masks: - s = msk.mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - if not np.all(s == NO_DATA): - pixel_dict[msk.class_code] += len(np.where(s != NO_DATA)[0]) - count += 1 - if save: - dt = DataTile(sub_master, s, msk.class_code) - dt.to_pickle(training_directory) - return count, pixel_dict + + for datamask in masks: + if augment_dict[datamask.class_code]: + pixel_dict = _iterate_over_raster(master, datamask, pixel_dict, + tile_size, save=save, augment=True) + else: + pixel_dict = _iterate_over_raster(master, datamask, pixel_dict, + tile_size, save=save) + + return pixel_dict + + +def _iterate_over_raster(raster, datamask, pixel_dict, tile_size=608, augment=False, + save=True): + step = tile_size + if augment: + step = np.random.randint(tile_size // 3, tile_size // 2) + for i in range(0, raster.shape[1]-tile_size, step): + for j in range(0, raster.shape[2]-tile_size, step): + sub_raster = raster[:, i:i+tile_size, j:j+tile_size] + sub_mask = datamask.mask[:, i:i+tile_size, j:j+tile_size] + if _check_dimensions_and_content(sub_raster, sub_mask, tile_size): + pixel_dict[datamask.class_code] += len(np.where(sub_mask != 0)[0]) + if save: + dt = DataTile(sub_master, sub_mask, datamask.class_code) + dt.to_pickle(training_directory) + return pixel_dict + + +def _check_dimensions_and_content(sub_raster, sub_mask, tile_size): + if np.all(sub_mask == 0): + return False + if sub_mask.shape[1] != tile_size or sub_mask.shape[2] != tile_size: + return False + if sub_raster.shape[1] != tile_size or sub_raster.shape[2] != tile_size: + return False + return True def all_matching_shapefiles(to_match, shapefile_directory): out = [] pr = get_shapefile_path_row(to_match) for f in glob(os.path.join(shapefile_directory, "*.shp")): - if get_shapefile_path_row(f) == pr and to_match not in f: + if get_shapefile_path_row(f) == pr: out.append(f) return out class DataGen: ''' Infinite data generator. Pulls files from - a directory named "class_dir".''' + a directory named "class_dir". + Class dir can have multiple directories full of data files + in it. + ''' def __init__(self, class_dir, augment=False, random_augment=False): self.file_list = None @@ -278,8 +251,10 @@ def __init__(self, class_dir, augment=False, random_augment=False): self.shuffled = sample(self.file_list, self.n_files) def _get_files(self): - self.file_list = [x[2] for x in os.walk(self.class_dir)][0] - self.file_list = [os.path.join(self.class_dir, x) for x in self.file_list] + self.file_list = [x for x in iglob(self.class_dir + "**", recursive=True)] + print(self.file_list) + self.file_list = [os.path.join(self.class_dir, x) for x in self.file_list if + os.path.isfile(os.path.join(self.class_dir, x))] def next(self): if self.idx == self.n_files: @@ -297,9 +272,58 @@ def _from_pickle(self, filename): data = pickle.load(f) return data +def make_border_labels(mask, border_width): + ''' Border width: Pixel width. ''' + distance_map = distance_map(mask) + distance_map[distance > border_width] = 0 + return distance_map + -def generate_training_data(training_directory, max_pools, sample_random=True, box_size=0, - batch_size=8, threshold=None, sigma=None, w0=None, class_weights={}, channels='all', train=True): +def generate_unbalanced_data(training_directory, max_pools, threshold=None, sigma=None, + w0=None, batch_size=8, class_weights={}, channels='all', nodata=0, n_classes=5): + ''' Assumes data is stored in training_directory ''' + border_class = len(class_weights.keys()) + gen = DataGen(training_directory) + while True: + masters = [] + one_hots = [] + weightings = [] + tile_shape = None + for _ in range(batch_size): + data_tiles = [] + weighting_dict = {} + count_dict = {} + out = gen.next().copy() + if tile_shape is None: + tile_shape = out['class_mask'].shape + data_tiles.append(out) + n_samples = len(np.where(out['class_mask'] != nodata)[0]) + weighting_dict[out['class_code']] = n_samples + count_dict[out['class_code']] = n_samples + + maxx = max(weighting_dict.values()) + for key in weighting_dict: + weighting_dict[key] = maxx / weighting_dict[key] + + for tile in data_tiles: + one_hot = np.zeros((tile_shape[1], tile_shape[2], n_classes)) + weights = np.zeros((tile_shape[1], tile_shape[2])) + labels = tile['class_mask'] + one_hot[:, :, tile['class_code']] = labels + weights[labels == 1] = class_weights[tile['class_code']] + if tile['class_code'] == 0: + border_labels = make_border_labels(tile['class_mask'], border_width=2) + one_hot[:, :, border_class] = border_labels + weights[border_labels == 1] = class_weights[border_class] + masters.append(np.squeeze(tile['data'])) + one_hots.append(one_hot) + weightings.append(weights) + + yield np.asarray(masters), np.asarray(masks), np.asarray(weightings) + + +def generate_training_data(training_directory, max_pools, threshold=None, sigma=None, + w0=None, class_weights={}, channels='all', nodata=0, n_classes=5): ''' Assumes data is stored in training_directory in subdirectories labeled class_n_train with n the class code ''' class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] @@ -307,175 +331,46 @@ def generate_training_data(training_directory, max_pools, sample_random=True, bo class_dirs = [training_directory] generators = [] border_class = len(class_weights.keys()) - i = 0 for d in class_dirs: generators.append(DataGen(d)) - while True: masters = [] - masks = [] + one_hots = [] weightings = [] + tile_shape = None for _ in range(2): - min_samples = np.inf - data = [] + data_tiles = [] weighting_dict = {} count_dict = {} for gen in generators: out = gen.next().copy() - data.append(out) - n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) + if tile_shape is None: + tile_shape = out['class_mask'].shape + data_tiles.append(out) + n_samples = len(np.where(out['class_mask'] != nodata)[0]) weighting_dict[out['class_code']] = n_samples count_dict[out['class_code']] = n_samples - if sample_random: - n_samples = len(np.where(out['class_mask'] != NO_DATA)[0]) - print(n_samples) - if n_samples < min_samples: - min_samples = n_samples maxx = max(weighting_dict.values()) for key in weighting_dict: - #weighting_dict[key] = np.log((1.1*(maxx / weighting_dict[key]))) weighting_dict[key] = maxx / weighting_dict[key] - # print(weighting_dict) - # print(count_dict) - for subset in data: - if sample_random: - samp = random_sample(subset['class_mask'], min_samples, box_size=box_size, - fill_value=subset['class_code']) - else: - samp = subset['class_mask'] - samp[samp != NO_DATA] = subset['class_code'] - - subset['class_mask'] = samp - - for subset in data: - master, mask = preprocess_data(subset['data'], subset['class_mask'], max_pools) - if channels == 'all': - master = np.squeeze(master) - else: - master = master[:, :, :, channels] - master = np.squeeze(master) - mask = mask[0, :, :, 0] - mask[mask != -1] = 1 # make the mask binary. - mask[mask == -1] = 0 # -1 is NO_DATA. - weights = weight_map(mask, w0=w0, sigma=sigma) # create weight map - labels = weights.copy() - labels[labels < threshold] = 0 - labels[labels >= threshold] = border_class - labels[mask == 1] = subset['class_code'] - weights[weights < threshold] = 0 # threshold the weight values arbitrarily - weights[weights != 0] = 1#maxx / len(np.where(weights != 0)[0]) - #weights[mask == 1] = weighting_dict[subset['class_code']] - weights[mask == 1] = 1#class_weights[subset['class_code']] - if subset['class_code'] != 0: - weights[mask != 1] = 0 - labels[mask != 1] = 0 - multidim_weights = np.zeros((weights.shape[0], weights.shape[1], border_class+1)) # - one_hot = np.zeros((labels.shape[0], labels.shape[1], border_class+1)) - one_hot[:, :, border_class][labels == border_class] = 1 - one_hot[:, :, subset['class_code']][labels == subset['class_code']] = 1 - for i in range(border_class+1): - multidim_weights[:, :, i] = weights - masters.append(master) - masks.append(one_hot) - weightings.append(multidim_weights) - - yield [np.asarray(masters), np.asarray(weightings)], np.asarray(masks) - - -def rotation(image, angle): - return transform.rotate(image, angle, mode='constant', cval=NO_DATA) - - -def h_flip(image): - return image[:, ::-1] - - -def augment_data(image, class_mask): - '''Randomly augments an image.''' - if np.random.randint(2): - image = h_flip(image) - class_mask = h_flip(class_mask) - if np.random.randint(2): - image = np.flipud(image) - class_mask = np.flipud(class_mask) - return image, class_mask - - -def preprocess_data(master, mask, max_pools, return_cuts=False): - ''' This function preprocesses data in such a way - so it will work with a FCNN with an arbitrary number of max pools. - Master, mask in this function are tiles from the original image.''' - shp = master.shape - rows = shp[1]; cols = shp[2] - if max_pools != 0: - cut_rows = rows % (2**max_pools) - cut_cols = cols % (2**max_pools) - else: - cut_rows = 0 - cut_cols = 0 - out_m = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - - if cut_cols != 0 and cut_rows != 0: - out_m[0, :, :, :] = master[:, :-cut_rows, :-cut_cols] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :-cut_rows, :-cut_cols] - elif cut_cols == 0 and cut_rows != 0: - out_m[0, :, :, :] = master[:, :-cut_rows, :] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :-cut_rows, :] - elif cut_cols != 0 and cut_rows == 0: - out_m[0, :, :, :] = master[:, :, :-cut_cols] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :, :-cut_cols] - else: - out_m[0, :, :, :] = master[:, :, :] - shp = mask.shape - out_mask = np.zeros((1, shp[0], shp[1] - cut_rows, shp[2] - cut_cols)) - out_mask[0, :, :, :] = mask[:, :, :] - - out_m = np.swapaxes(out_m, 1, 3) - out_mask = np.swapaxes(out_mask, 1, 3) - if return_cuts: - return out_m, out_mask, cut_rows, cut_cols - - return out_m, out_mask + + for tile in data_tiles: + one_hot = np.zeros((tile_shape[1], tile_shape[2], n_classes)) + weights = np.zeros((tile_shape[1], tile_shape[2])) + labels = tile['class_mask'] + one_hot[:, :, tile['class_code']] = labels + weights[labels == 1] = class_weights[tile['class_code']] + if tile['class_code'] == 0: + border_labels = make_border_labels(tile['class_mask'], border_width=2) + one_hot[:, :, border_class] = border_labels + weights[border_labels == 1] = class_weights[border_class] + masters.append(np.squeeze(tile['data'])) + one_hots.append(one_hot) + weightings.append(weights) + + yield np.asarray(masters), np.asarray(masks), np.asarray(weightings) if __name__ == '__main__': - - master_train = 'master_rasters/train/' - master_test = 'master_rasters/test/' - image_train = 'image_data/train/' # for fmasks. - image_test = 'image_data/test/' # for fmasks. - irr1 = 'Huntley' - irr2 = 'Sun_River' - fallow = 'Fallow' - forest = 'Forrest' - other = 'other' - target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} - train_dir = 'training_data/multiclass/train/' - shp_train = 'shapefile_data/train/' - count = 0 - save = True - count, pixel_dict = extract_training_data_unet(target_dict, shp_train, image_train, - master_train, train_dir, count, save=save) - # Need to parallelize the extraction of training data. - # Or maybe not. It seems like parallelizing the opening/closing - # of rasters can stomp on the data. - print("You have {} instances per training epoch.".format(count)) - print("And {} instances in each class.".format(pixel_dict)) - max_weight = max(pixel_dict.values()) - for key in pixel_dict: - print(key, max_weight / pixel_dict[key]) - tot = 0 - test_dir = 'training_data/multiclass/test/' - shp_test = 'shapefile_data/test/' - count = 0 - count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, - test_dir, count, save=save) - print("You have {} instances per test epoch.".format(count)) - print("And {} instances in each class.".format(pixel_dict)) + pass diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index bc7e3d7..8663df4 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -17,7 +17,7 @@ from shapely.geometry import shape from pickle import load as pload from fiona import open as fopen -from data_generators import generate_training_data, load_raster, preprocess_data +from data_generators import generate_training_data, load_raster from data_utils import clip_rasters, save_raster from shapefile_utils import get_features, generate_class_mask from models import (fcnn_functional, fcnn_model, fcnn_functional_small, unet, unet_weighted, @@ -65,7 +65,7 @@ def acc(y_true, y_pred): return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) -def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_classes=4, +def evaluate_image(master_raster, model, max_pools, channels='all', num_classes=4, outfile=None, ii=None): if not os.path.isfile(master_raster): @@ -85,8 +85,6 @@ def evaluate_image_unet(master_raster, model, max_pools, channels='all', num_cla for j in range(0, master.shape[2]-diff, stride): sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_master, sub_mask, cut_rows, cut_cols = preprocess_data(sub_master, sub_mask, - max_pools, return_cuts=True) if channels != 'all': sub_master = sub_master[:, :, :, channels] sub_msk = np.ones((1, 388, 388, 5)) # a placeholder diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index dffc9f6..33801d5 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -9,8 +9,7 @@ from data_utils import download_images, create_master_raster, bandwise_mean, bandwise_stddev from shapefile_utils import get_shapefile_path_row, split_shapefile, filter_shapefile from runspec import landsat_rasters, static_rasters, climate_rasters -from data_generators import extract_training_data_unet - +from data_generators import extract_training_data def download_images_over_shapefile(shapefile, image_directory, year): @@ -159,15 +158,6 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi if os.path.isdir(out): paths_map = all_rasters(out) i = 0 - # for key in sorted(paths_map.keys()): - # if key in ('aspect.tif', 'elevation_diff.tif', 'slope.tif'): - # print("'{}':np.array([{}]),".format(key, i)) - # i += 1 - # else: - # print("'{}':np.arange({}, {}+1), ".format(key, i, i+2)) - # i += 3 - - # break path = sub_dir[:2] row = sub_dir[3:5] year = sub_dir[-4:] @@ -198,10 +188,10 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi master_test = '/home/thomas/share/master_rasters/test' master_dirs = [master_train, master_test] year = 2013 - for s, i in zip(shp_dirs, image_dirs): - download_all_images(i, s, year) - for im_dir, mas_dir in zip(image_dirs, master_dirs): - create_all_master_rasters(im_dir, mas_dir) + # for s, i in zip(shp_dirs, image_dirs): + # download_all_images(i, s, year) + # for im_dir, mas_dir in zip(image_dirs, master_dirs): + # create_all_master_rasters(im_dir, mas_dir) master_train = '/home/thomas/share/master_rasters/train/' master_test = '/home/thomas/share/master_rasters/test/' image_train = '/home/thomas/share/image_data/train/' @@ -212,12 +202,13 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi forest = 'Forrest' other = 'other' target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} + augment_dict = {0:True, 1:False, 2:False, 3:True} train_dir = 'training_data/train/' shp_train = 'shapefile_data/train/' count = 0 save = True - count, pixel_dict = extract_training_data_unet(target_dict, shp_train, image_train, - master_train, train_dir, count, save=save) + count, pixel_dict = extract_training_data(target_dict, shp_train, image_train, + master_train, train_dir, save=save, augment_dict=augment_dict) print("{} instances in each class.".format(pixel_dict)) max_weight = max(pixel_dict.values()) for key in pixel_dict: @@ -226,6 +217,6 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi test_dir = 'training_data/test/' shp_test = 'shapefile_data/test/' count = 0 - count, pixel_dict = extract_training_data_unet(target_dict, shp_test, image_test, master_test, - test_dir, count, save=save) + count, pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, + test_dir, save=save) print("And {} instances in each class.".format(pixel_dict)) diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 6f0f90b..83984a4 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -17,7 +17,7 @@ def get_features(gdf): return features -def generate_class_mask(shapefile, master_raster, no_data=-1): +def generate_class_mask(shapefile, master_raster, nodata=-1): ''' Generates a mask with 1 everywhere shapefile data is present and a no_data value everywhere else. no_data is -1 in this case, as it is never a valid class label. @@ -29,7 +29,9 @@ def generate_class_mask(shapefile, master_raster, no_data=-1): with rasopen(master_raster, 'r') as src: shp = shp.to_crs(src.crs) features = get_features(shp) - out_image, out_transform = mask(src, shapes=features, nodata=no_data) + out_image, out_transform = mask(src, shapes=features, nodata=-1) + out_image[out_image != -1] = 1 + out_image[out_image == -1] = 0 meta = src.meta return out_image, meta @@ -255,9 +257,4 @@ def buffer_shapefile(shp): dst.write(feat) if __name__ == '__main__': - - pth = 'shapefile_data/all_shapefiles/MT_Sun_River_2013_392739_27.shp' - path = 'shapefile_data/buffered/test/' - from glob import glob - for f in glob(path + '*.shp'): - buffer_shapefile(f) + pass From 2abcbfbd190b2c0e8cdbf071bb923b8a547e2680 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 17 Apr 2019 19:44:06 -0600 Subject: [PATCH 59/89] Upgrading to tf2.0 --- fully-conv-classification/data_generators.py | 89 +++++------ .../runner_from_shapefile.py | 8 +- fully-conv-classification/tf_fcn.py | 147 ++++++++++++++++++ 3 files changed, 192 insertions(+), 52 deletions(-) create mode 100644 fully-conv-classification/tf_fcn.py diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index af822de..79eab27 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -191,19 +191,21 @@ def extract_training_data(target_dict, shapefile_directory, image_directory, for datamask in masks: if augment_dict[datamask.class_code]: pixel_dict = _iterate_over_raster(master, datamask, pixel_dict, - tile_size, save=save, augment=True) + tile_size, save=save, augment=True, + training_directory=training_directory) else: pixel_dict = _iterate_over_raster(master, datamask, pixel_dict, - tile_size, save=save) + tile_size, save=save, training_directory=training_directory) return pixel_dict def _iterate_over_raster(raster, datamask, pixel_dict, tile_size=608, augment=False, - save=True): + save=True, training_directory=None): step = tile_size if augment: - step = np.random.randint(tile_size // 3, tile_size // 2) + step = np.random.randint(tile_size // 4, tile_size // 2) + print("Augmenting w/ step:", step) for i in range(0, raster.shape[1]-tile_size, step): for j in range(0, raster.shape[2]-tile_size, step): sub_raster = raster[:, i:i+tile_size, j:j+tile_size] @@ -211,7 +213,7 @@ def _iterate_over_raster(raster, datamask, pixel_dict, tile_size=608, augment=Fa if _check_dimensions_and_content(sub_raster, sub_mask, tile_size): pixel_dict[datamask.class_code] += len(np.where(sub_mask != 0)[0]) if save: - dt = DataTile(sub_master, sub_mask, datamask.class_code) + dt = DataTile(sub_raster, sub_mask, datamask.class_code) dt.to_pickle(training_directory) return pixel_dict @@ -252,9 +254,8 @@ def __init__(self, class_dir, augment=False, random_augment=False): def _get_files(self): self.file_list = [x for x in iglob(self.class_dir + "**", recursive=True)] - print(self.file_list) - self.file_list = [os.path.join(self.class_dir, x) for x in self.file_list if - os.path.isfile(os.path.join(self.class_dir, x))] + self.file_list = [x for x in self.file_list if + os.path.isfile(x)] def next(self): if self.idx == self.n_files: @@ -272,17 +273,18 @@ def _from_pickle(self, filename): data = pickle.load(f) return data + def make_border_labels(mask, border_width): ''' Border width: Pixel width. ''' - distance_map = distance_map(mask) - distance_map[distance > border_width] = 0 - return distance_map + dm = distance_map(mask) + dm[dm > border_width] = 0 + return dm -def generate_unbalanced_data(training_directory, max_pools, threshold=None, sigma=None, - w0=None, batch_size=8, class_weights={}, channels='all', nodata=0, n_classes=5): +def generate_unbalanced_data(training_directory, border_width=2, + batch_size=2, class_weights={}, channels='all', nodata=0, n_classes=5): ''' Assumes data is stored in training_directory ''' - border_class = len(class_weights.keys()) + border_class = len(class_weights.keys()) - 1 gen = DataGen(training_directory) while True: masters = [] @@ -290,39 +292,29 @@ def generate_unbalanced_data(training_directory, max_pools, threshold=None, sigm weightings = [] tile_shape = None for _ in range(batch_size): - data_tiles = [] - weighting_dict = {} - count_dict = {} - out = gen.next().copy() + tile = gen.next().copy() if tile_shape is None: - tile_shape = out['class_mask'].shape - data_tiles.append(out) - n_samples = len(np.where(out['class_mask'] != nodata)[0]) - weighting_dict[out['class_code']] = n_samples - count_dict[out['class_code']] = n_samples - - maxx = max(weighting_dict.values()) - for key in weighting_dict: - weighting_dict[key] = maxx / weighting_dict[key] - - for tile in data_tiles: - one_hot = np.zeros((tile_shape[1], tile_shape[2], n_classes)) - weights = np.zeros((tile_shape[1], tile_shape[2])) - labels = tile['class_mask'] - one_hot[:, :, tile['class_code']] = labels - weights[labels == 1] = class_weights[tile['class_code']] - if tile['class_code'] == 0: - border_labels = make_border_labels(tile['class_mask'], border_width=2) - one_hot[:, :, border_class] = border_labels - weights[border_labels == 1] = class_weights[border_class] - masters.append(np.squeeze(tile['data'])) - one_hots.append(one_hot) - weightings.append(weights) - - yield np.asarray(masters), np.asarray(masks), np.asarray(weightings) - - -def generate_training_data(training_directory, max_pools, threshold=None, sigma=None, + tile_shape = tile['class_mask'].shape + one_hot = np.zeros((tile_shape[1], tile_shape[2], n_classes)) + weights = np.zeros((tile_shape[1], tile_shape[2])) + labels = tile['class_mask'][0] + one_hot[:, :, tile['class_code']] = labels + weights[labels == 1] = class_weights[tile['class_code']] + if tile['class_code'] == 0: + border_labels = make_border_labels(tile['class_mask'], + border_width=border_width) + one_hot[:, :, border_class] = border_labels + weights[border_labels[0] == 1] = class_weights[border_class] + m = np.squeeze(tile['data']) + m = np.swapaxes(m, 0, 2) + masters.append(m) + one_hots.append(one_hot) + weightings.append(weights) + + yield np.asarray(masters), np.asarray(one_hots), np.asarray(weightings) + + +def generate_training_data(training_directory, threshold=None, sigma=None, w0=None, class_weights={}, channels='all', nodata=0, n_classes=5): ''' Assumes data is stored in training_directory in subdirectories labeled class_n_train with n the class code ''' @@ -365,7 +357,10 @@ def generate_training_data(training_directory, max_pools, threshold=None, sigma= border_labels = make_border_labels(tile['class_mask'], border_width=2) one_hot[:, :, border_class] = border_labels weights[border_labels == 1] = class_weights[border_class] - masters.append(np.squeeze(tile['data'])) + + m = np.squeeze(tile['data']) + m = np.swapaxes(m, 0, 2) + masters.append(m) one_hots.append(one_hot) weightings.append(weights) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 33801d5..2a6f181 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -205,9 +205,8 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi augment_dict = {0:True, 1:False, 2:False, 3:True} train_dir = 'training_data/train/' shp_train = 'shapefile_data/train/' - count = 0 save = True - count, pixel_dict = extract_training_data(target_dict, shp_train, image_train, + pixel_dict = extract_training_data(target_dict, shp_train, image_train, master_train, train_dir, save=save, augment_dict=augment_dict) print("{} instances in each class.".format(pixel_dict)) max_weight = max(pixel_dict.values()) @@ -216,7 +215,6 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi tot = 0 test_dir = 'training_data/test/' shp_test = 'shapefile_data/test/' - count = 0 - count, pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, - test_dir, save=save) + pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, + test_dir, save=save, augment_dict=augment_dict) print("And {} instances in each class.".format(pixel_dict)) diff --git a/fully-conv-classification/tf_fcn.py b/fully-conv-classification/tf_fcn.py new file mode 100644 index 0000000..f66d2f1 --- /dev/null +++ b/fully-conv-classification/tf_fcn.py @@ -0,0 +1,147 @@ +import tensorflow as tf +import numpy as np +import matplotlib.pyplot as plt +from tensorflow.keras.layers import (Activation, Conv2D, UpSampling2D, BatchNormalization, MaxPooling2D, Input, Concatenate, Lambda) +from tensorflow.keras.models import Model +from tensorflow.keras.activations import relu +from tensorflow.keras.regularizers import l2 +from tensorflow.data import Dataset +from data_generators import generate_unbalanced_data + +import tensorflow.keras.backend as K + +def ConvBlock(x, filters=64, expanding_path=False): + + x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', + kernel_regularizer=l2(0.01))(x) + x = BatchNormalization()(x) + x = Activation(relu)(x) + if expanding_path: + x = Conv2D(filters=filters // 2, kernel_size=3, strides=1, padding='same', + kernel_regularizer=l2(0.01))(x) + else: + x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', + kernel_regularizer=l2(0.01))(x) + x = BatchNormalization()(x) + return Activation(relu)(x) + + +_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) + +def model_func(input_shape, initial_exp=6, n_classes=5): + + inp = Input(shape=input_shape) + _power = initial_exp + exp = 2 + + c1 = ConvBlock(inp, exp**_power) + mp1 = MaxPooling2D(pool_size=2, strides=2)(c1) + + _power += 1 + + c2 = ConvBlock(mp1, exp**_power) + mp2 = MaxPooling2D(pool_size=2, strides=2)(c2) + + _power += 1 + + c3 = ConvBlock(mp2, exp**_power) + mp3 = MaxPooling2D(pool_size=2, strides=2)(c3) + + _power += 1 + + c4 = ConvBlock(mp3, exp**_power) + mp4 = MaxPooling2D(pool_size=2, strides=2)(c4) + + _power += 1 + + # 1024 filters + c5 = Conv2D(filters=exp**_power, kernel_size=3, strides=1, padding='same')(mp4) + _power -= 1 + c5 = Conv2D(filters=exp**_power, kernel_size=3, strides=1, padding='same')(c5) + + u1 = UpSampling2D(size=(2, 2))(c5) + + u1_c4 = Concatenate()([u1, c4]) + + c6 = ConvBlock(u1_c4, filters=exp**_power, expanding_path=True) + + u2 = UpSampling2D(size=(2, 2))(c6) + + u2_c3 = Concatenate()([u2, c3]) + + _power -= 1 + c7 = ConvBlock(u2_c3, filters=exp**_power, expanding_path=True) + + u3 = UpSampling2D(size=(2, 2))(c7) + + u3_c2 = Concatenate()([u3, c2]) + + _power -= 1 + c8 = ConvBlock(u3_c2, filters=exp**_power, expanding_path=True) + + u4 = UpSampling2D(size=(2, 2))(c8) + + u4_c1 = Concatenate()([u4, c1]) + + _power -= 1 + c9 = ConvBlock(u4_c1, filters=exp**_power) + last_conv = Conv2D(filters=n_classes, kernel_size=1, padding='same', activation=None)(c9) + return Model(inputs=[inp], outputs=[last_conv]) + + +def weighted_loss(y_true, logits, weights): + ''' y_true: one-hot encoding of labels. + y_pred: tensor of probabilities. + weights: tensor of weights, 0 where there isn't data. + Recall: + L = a0*CE | focal: L = a0*(1-pt)^gamma*CE + ''' + unweighted_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true, logits=logits) + weighted_loss = unweighted_loss*weights + mask = tf.not_equal(weights, 0) + weighted_loss = tf.boolean_mask(weighted_loss, mask) + return tf.reduce_mean(weighted_loss) + +def accuracy(y_true, logits): + + mask = tf.not_equal(tf.sum(y_true, axis=len(y_true.get_shape())-1), 0) + y_pred = tf.nn.softmax(logits) + y_pred = tf.math.argmax(y_pred) + y_true = tf.math.argmax(y_true) + y_true = tf.boolean_mask(y_true, mask) + y_pred = tf.boolean_mask(y_pred, mask) + return tf.reduce_mean(tf.equal(y_true, y_pred)) + + +input_shape = (None, None, 51) +learning_rate = 1e-3 +epochs = 1 +model = model_func(input_shape, n_classes=5) +optimizer = tf.keras.optimizers.Adam(lr=learning_rate) +loss_fn = weighted_loss +training_directory = 'training_data/train/' +class_weights = {0:4.5, 1:1.0, 2:2.96, 3:14.972, 4:10} +train_data = generate_unbalanced_data(training_directory, class_weights=class_weights) +#train_data = Dataset(train_data) +loss_metric = tf.keras.metrics.Mean(name='train_loss') +acc_metric = tf.keras.metrics.Mean(name='acc') + +#@tf.function +def train_step(inputs, labels, weights): + with tf.GradientTape() as tape: + logits = model(inputs, training=True) + acc = accuracy(labels, logits) + reg_loss = tf.math.add_n(model.losses) + pred_loss = loss_fn(labels, logits, weights) + total_loss = pred_loss + gradients = tape.gradient(total_loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + loss_metric.update_state(total_loss) + acc_metric.update_state(acc) + +step = 0 +for epoch in range(epochs): + for inputs, labels, weights in train_data: + loss = train_step(inputs, labels, weights) + if step > 100: + break From 29713722a56a743d5dc48eaf414ed8c731080404 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sat, 20 Apr 2019 15:55:43 -0600 Subject: [PATCH 60/89] Simplifying input pipeline, adding offline data augmentation --- fully-conv-classification/data_generators.py | 103 ++++- fully-conv-classification/data_utils.py | 8 + .../evaluate_accuracy.py | 95 +++++ fully-conv-classification/evaluate_image.py | 55 +++ fully-conv-classification/fully_conv.py | 343 +++------------- fully-conv-classification/models.py | 370 ++++++------------ .../runner_from_shapefile.py | 2 +- fully-conv-classification/shapefile_utils.py | 27 +- 8 files changed, 431 insertions(+), 572 deletions(-) create mode 100644 fully-conv-classification/evaluate_accuracy.py create mode 100644 fully-conv-classification/evaluate_image.py diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 79eab27..90e7fa1 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -5,8 +5,6 @@ import matplotlib.pyplot as plt from glob import glob, iglob from random import sample, shuffle -from skimage.segmentation import find_boundaries -from skimage.measure import label from scipy.ndimage.morphology import distance_transform_edt from runspec import mask_rasters from data_utils import load_raster @@ -15,6 +13,7 @@ from warnings import warn from skimage import transform from sat_image.warped_vrt import warp_single_image +from tensorflow.keras.utils import Sequence def distance_map(mask): @@ -133,7 +132,7 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): def extract_training_data(target_dict, shapefile_directory, image_directory, master_raster_directory, training_directory, save=True, tile_size=608, - year=2013, fmask=True, nodata=0, augment_dict={}): + year=2013, min_pixels=2000, fmask=True, nodata=0, augment_dict={}): ''' target_dict: {filename or string in filename : class_code} This function extracts training data from master_rasters in master_raster_directory. Master @@ -191,26 +190,28 @@ def extract_training_data(target_dict, shapefile_directory, image_directory, for datamask in masks: if augment_dict[datamask.class_code]: pixel_dict = _iterate_over_raster(master, datamask, pixel_dict, - tile_size, save=save, augment=True, + tile_size=tile_size, save=save, augment=True, min_pixels=min_pixels, training_directory=training_directory) else: - pixel_dict = _iterate_over_raster(master, datamask, pixel_dict, - tile_size, save=save, training_directory=training_directory) + pixel_dict = _iterate_over_raster(master, datamask, + pixel_dict, min_pixels=min_pixels, + tile_size=tile_size, save=save, + training_directory=training_directory) return pixel_dict def _iterate_over_raster(raster, datamask, pixel_dict, tile_size=608, augment=False, - save=True, training_directory=None): + save=True, training_directory=None, min_pixels=None): step = tile_size if augment: - step = np.random.randint(tile_size // 4, tile_size // 2) + step = np.random.randint(50, tile_size // 2) print("Augmenting w/ step:", step) for i in range(0, raster.shape[1]-tile_size, step): for j in range(0, raster.shape[2]-tile_size, step): sub_raster = raster[:, i:i+tile_size, j:j+tile_size] sub_mask = datamask.mask[:, i:i+tile_size, j:j+tile_size] - if _check_dimensions_and_content(sub_raster, sub_mask, tile_size): + if _check_dimensions_and_content(sub_raster, sub_mask, tile_size, min_pixels): pixel_dict[datamask.class_code] += len(np.where(sub_mask != 0)[0]) if save: dt = DataTile(sub_raster, sub_mask, datamask.class_code) @@ -218,8 +219,8 @@ def _iterate_over_raster(raster, datamask, pixel_dict, tile_size=608, augment=Fa return pixel_dict -def _check_dimensions_and_content(sub_raster, sub_mask, tile_size): - if np.all(sub_mask == 0): +def _check_dimensions_and_content(sub_raster, sub_mask, tile_size, min_pixels): + if len(np.where(sub_mask != 0)[0]) < min_pixels: return False if sub_mask.shape[1] != tile_size or sub_mask.shape[2] != tile_size: return False @@ -254,8 +255,7 @@ def __init__(self, class_dir, augment=False, random_augment=False): def _get_files(self): self.file_list = [x for x in iglob(self.class_dir + "**", recursive=True)] - self.file_list = [x for x in self.file_list if - os.path.isfile(x)] + self.file_list = [x for x in self.file_list if os.path.isfile(x)] def next(self): if self.idx == self.n_files: @@ -281,8 +281,80 @@ def make_border_labels(mask, border_width): return dm -def generate_unbalanced_data(training_directory, border_width=2, - batch_size=2, class_weights={}, channels='all', nodata=0, n_classes=5): +class SatDataSequence(Sequence): + + def __init__(self, data_directory, batch_size, class_weights={}, + border_width=1): + self.data_directory = data_directory + self.class_weights = class_weights + self.batch_size = batch_size + self.border_width = border_width + self._get_files() + self.n_files = len(self.file_list) + self.idx = 0 + self.shuffled = sample(self.file_list, self.n_files) + + def _get_files(self): + self.file_list = [x for x in iglob(self.data_directory + "**", recursive=True)] + self.file_list = [x for x in self.file_list if os.path.isfile(x)] + + def __len__(self): + + return int(np.ceil(self.n_files / self.batch_size)) + + def on_epoch_end(self): + + self.shuffled = sample(self.file_list, self.n_files) + + def __getitem__(self, idx): + + batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] + data_tiles = [self._from_pickle(x) for x in batch] + processed = self._make_weights_labels_and_features(data_tiles) + batch_x = processed[0] + batch_y = processed[1] + return batch_x, batch_y + + def _from_pickle(self, filename): + with open(filename, 'rb') as f: + data = pickle.load(f) + return data + + def _make_weights_labels_and_features(self, data_tiles): + return _preprocess_input_data(data_tiles, self.class_weights, self.border_width) + + + +def _preprocess_input_data(data_tiles, class_weights, border_width=1): + features = [] + one_hots = [] + weightings = [] + border_class = len(class_weights) - 1 + n_classes = len(class_weights) + for tile in data_tiles: + tile_shape = tile['data'].shape + one_hot = np.zeros((tile_shape[1], tile_shape[2], n_classes)) + weights = np.zeros((tile_shape[1], tile_shape[2], n_classes)) + labels = tile['class_mask'][0] + one_hot[:, :, tile['class_code']] = labels + weights[:][labels == 1] = class_weights[tile['class_code']] + if tile['class_code'] == 0: + border_labels = make_border_labels(tile['class_mask'], + border_width=border_width) + one_hot[:, :, border_class] = border_labels + weights[:][border_labels[0] == 1] = class_weights[border_class] + m = np.squeeze(tile['data']) + m = np.swapaxes(m, 0, 2) + m = np.swapaxes(m, 0, 1) + features.append(m) + one_hots.append(one_hot) + weightings.append(weights) + return [np.asarray(features), np.asarray(weightings)], [np.asarray(one_hots)] + + +def generate_unbalanced_data(training_directory='training_data/train/', border_width=2, + batch_size=2, class_weights = {0:4.5, 1:1.0, 2:2.96, 3:14.972, 4:10}, + channels='all', nodata=0, n_classes=5): ''' Assumes data is stored in training_directory ''' border_class = len(class_weights.keys()) - 1 gen = DataGen(training_directory) @@ -307,6 +379,7 @@ def generate_unbalanced_data(training_directory, border_width=2, weights[border_labels[0] == 1] = class_weights[border_class] m = np.squeeze(tile['data']) m = np.swapaxes(m, 0, 2) + m = np.swapaxes(m, 0, 1) masters.append(m) one_hots.append(one_hot) weightings.append(weights) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 92aa6a5..e5555e9 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -250,5 +250,13 @@ def get_class_weighting(training_directory, w0=15, sigma=2, threshold=0.7*15, n_ return out +def save_model_info(outfile, args): + template = '{}={}|' + with open(outfile, 'a') as f: + for key in args: + f.write(template.format(key, args[key])) + f.write("\n-------------------\n") + print("wrote run info to {}".format(outfile)) + if __name__ == "__main__": pass diff --git a/fully-conv-classification/evaluate_accuracy.py b/fully-conv-classification/evaluate_accuracy.py new file mode 100644 index 0000000..d162ff8 --- /dev/null +++ b/fully-conv-classification/evaluate_accuracy.py @@ -0,0 +1,95 @@ +import os +import numpy as np +import matplotlib.pyplot as plt +from glob import glob +from pprint import pprint +from sys import argv +from data_utils import load_raster +from data_generators import assign_class_code, concatenate_fmasks +from sklearn.metrics import confusion_matrix, jaccard_similarity_score +from shapefile_utils import generate_class_mask, get_shapefile_path_row + + +def evaluate_accuracy(argmaxed_raster, shapefile_test_dir, master_raster_dir, target_dict, + show=False): + shp_dict = {} + # TODO: A weighted accuracy metric might be better. + pr = None + for f in glob(shapefile_test_dir + "*.shp"): + pr = get_shapefile_path_row(f) + cc = assign_class_code(target_dict, f) + shp_dict[cc] = f + + class_mask_template = os.path.join(master_raster_dir, "class_mask_{}_{}_2013.tif".format(pr[0], pr[1])) + first = True + out = None + nodata = -1 + for class_code in sorted(shp_dict.keys()): + mask, mask_meta = generate_class_mask(shp_dict[class_code], class_mask_template, + nodata) + if first: + out = np.ones((mask.shape[1], mask.shape[2], len(shp_dict)))*-1 + first = False + out[:, :, class_code][mask[0] != nodata] = 1 + + image_dir = '/home/thomas/share/image_data/test/{}_{}_2013'.format(pr[0], pr[1]) + mask = np.zeros_like(mask) + fmask = concatenate_fmasks(image_dir, mask, mask_meta) + for i in range(out.shape[2]): + out[:, :, i][fmask[0] != 0] = -1 + + bool_mask = np.not_equal(np.sum(out, axis=2), -4) + y_pred, _ = load_raster(argmaxed_raster) + if 'argmax' not in argmaxed_raster: + y_pred = np.argmax(y_pred, axis=0) + y_true = np.argmax(out, axis=2) + + for i in range(5): + y_pred_irr = y_pred[y_true == i] + print("Class {} acc: {}".format(i, np.sum(np.not_equal(y_pred_irr, i)) / y_pred_irr.size)) + + y_pred_masked = y_pred[bool_mask] + y_true_masked = y_true[bool_mask] + print("Confusion mat for {} (all classes):".format(argmaxed_raster)) + cmat = confusion_matrix(y_true_masked, y_pred_masked) + pprint(cmat) + final = np.mean(np.equal(y_pred_masked, y_true_masked)) + print("pixel wise acc {}".format(final)) + print("Class precision:") + print(np.diag(cmat) / np.sum(cmat, axis=0)) + print("Class recall:") + print(np.diag(cmat) / np.sum(cmat, axis=1)) + if show: + fig, ax = plt.subplots(ncols=3) + ax[0].imshow(y_pred[0]) + ax[1].imshow(y_true) + ax[2].imshow(bool_mask) + plt.suptitle('F: {} | acc: {}'.format(argmaxed_raster, final)) + plt.show() + return final + +if __name__ == '__main__': + + irr1 = 'Huntley' + irr2 = 'Sun_River' + fallow = 'Fallow' + forest = 'Forrest' + other = 'other' + target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} + shapefile_test_dir = 'shapefile_data/test/' + master_raster_dir = '/home/thomas/share/master_rasters/test/' + if len(argv) > 1: + argmaxed_raster = argv[1] + evaluate_accuracy(argmaxed_raster, shapefile_test_dir, master_raster_dir, target_dict) + else: + rsa = [f for f in glob('compare_model_outputs/during-the-day/' + '*.tif')] + accs = {} + for argmaxed_raster in rsa: + print("-------------------------") + print(argmaxed_raster) + acc = evaluate_accuracy(argmaxed_raster, shapefile_test_dir, master_raster_dir, target_dict) + accs[argmaxed_raster] = acc + + sort = sorted(acc.items(), key=lambda kv: kv[1]) + for key in sort: + print("Raster: {} | acc: {}".format(key, accs[key])) diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py new file mode 100644 index 0000000..cd8ee7b --- /dev/null +++ b/fully-conv-classification/evaluate_image.py @@ -0,0 +1,55 @@ +import os +import numpy as np +import tensorflow as tf +from data_utils import clip_rasters, save_raster +from sys import stdout +from tensorflow.keras.models import load_model +from data_generators import load_raster +import matplotlib.pyplot +import keras.backend as K +from fully_conv import weighted_loss + +_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) + + +def evaluate_image(master_raster, model, num_classes=4, outfile=None, ii=None): + + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive handling of this case. + else: + master, meta = load_raster(master_raster) + class_mask = np.ones((master.shape[1], master.shape[2], num_classes)) # Just a placeholder + out = np.zeros((master.shape[2], master.shape[1], num_classes)) + chunk_size = 608 + diff = 608 + stride = 608 + for i in range(0, master.shape[1]-diff, stride): + for j in range(0, master.shape[2]-diff, stride): + sub_master = master[:, i:i+chunk_size, j:j+chunk_size] + sub_mask = class_mask[i:i+chunk_size, j:j+chunk_size, :] + sub_master = np.swapaxes(sub_master, 0, 2) + sub_master = np.swapaxes(sub_master, 0, 1) + sub_master = np.expand_dims(sub_master, 0) + sub_mask = np.expand_dims(sub_mask, 0) + preds = model.predict([sub_master, sub_mask]) + preds = np.exp(preds) + soft = preds / np.sum(preds, axis=-1, keepdims=True) + soft = np.swapaxes(soft, 1, 2) + out[j:j+chunk_size, i:i+chunk_size, :] = soft + + stdout.write("N eval: {}. Percent done: {:.2f}\r".format(ii, i / master.shape[1])) + + out = np.swapaxes(out, 0, 2) + out = out.astype(np.float32) + if outfile: + save_raster(out, outfile, meta, count=num_classes) + return out + +if __name__ == '__main__': + master_raster = '/home/thomas/share/master_rasters/test/master_raster_37_28_2013.tif' + model = 'checkpoints_2.h5' + model = load_model(model, custom_objects={'tf':tf, '_epsilon':_epsilon, + 'weighted_loss':weighted_loss}) + outfile = '300ep.tif' + evaluate_image(master_raster, model, outfile=outfile, num_classes=4) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 8663df4..de2935d 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -1,8 +1,9 @@ import os -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import time import keras.backend as K import tensorflow as tf +print(tf.__version__) #tf.enable_eager_execution() import matplotlib.pyplot as plt import numpy as np @@ -10,53 +11,34 @@ from glob import glob from skimage import transform, util from sklearn.metrics import confusion_matrix -from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler, - ReduceLROnPlateau) +from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) from rasterio import open as rasopen from rasterio.mask import mask from shapely.geometry import shape from pickle import load as pload from fiona import open as fopen -from data_generators import generate_training_data, load_raster +from data_generators import generate_unbalanced_data, SatDataSequence from data_utils import clip_rasters, save_raster from shapefile_utils import get_features, generate_class_mask -from models import (fcnn_functional, fcnn_model, fcnn_functional_small, unet, unet_weighted, - weighted_unet_no_transpose_conv) - -NO_DATA = -1 -CHUNK_SIZE = 572 # some value that is divisible by 2^MAX_POOLS. -WRS2 = '../spatial_data/wrs2_descending_usa.shp' - -def custom_objective(y_true, y_pred): - y_true_for_loss = y_true - mask = tf.not_equal(y_true, NO_DATA) - y_true_for_loss = tf.where(mask, y_true, tf.zeros_like(y_true)) - y_true_for_loss = tf.cast(y_true_for_loss, tf.int32) - losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true_for_loss) - # the above line works in eager mode, but not otherwise. - # losses = tf.keras.losses.sparse_categorical_crossentropy(y_true_for_loss, y_pred) - out = tf.boolean_mask(losses, mask) - return out +from models import unet_same_padding def weighted_loss(target, output): # Weight map: out = -tf.reduce_sum(target*output, len(output.get_shape())-1) - #mask = tf.not_equal(out, 0)#tf.boolean_mask(out, mask) return out def weighted_focal_loss(target, output, gamma=1): - exp = tf.exp(output) - pt = tf.pow(1-exp, gamma) - out = -tf.reduce_sum(target*output, len(output.get_shape())-1) - mask = tf.not_equal(out, 0) - pt_ce = tf.multiply(pt, output) - out = -tf.reduce_sum(pt_ce*target, len(output.get_shape()) -1) - return tf.boolean_mask(out, mask) + # L = a0 *(1-pt)^gamma * ce + # Output of model is CE. + # Target is one-hot encoded. + soft = tf.nn.softmax(output, axis=-1) + pt = tf.pow(1-soft, gamma) # probability + return -tf.reduce_sum(target*output*pt, len(output.get_shape())-1) def acc(y_true, y_pred): - y_pred_sum = tf.reduce_sum(y_pred, axis=3) + y_pred_sum = tf.reduce_sum(y_pred, axis=-1) mask = tf.not_equal(y_pred_sum, 0) y_arg = tf.argmax(y_pred, axis=-1) y_t_arg = tf.argmax(y_true, axis=-1) @@ -64,267 +46,46 @@ def acc(y_true, y_pred): y_t_arg_mask = tf.boolean_mask(y_t_arg, mask) return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) - -def evaluate_image(master_raster, model, max_pools, channels='all', num_classes=4, - outfile=None, ii=None): - - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive handling of this case. - else: - master, meta = load_raster(master_raster) - class_mask = np.zeros((2, master.shape[1], master.shape[2])) # Just a placeholder - out = np.zeros((master.shape[2], master.shape[1], num_classes+1)) - - # All U-Net specific. - CHUNK_SIZE = 572 - diff = 92 - stride = 388 - - for i in range(0, master.shape[1]-diff, stride): - for j in range(0, master.shape[2]-diff, stride): - sub_master = master[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - sub_mask = class_mask[:, i:i+CHUNK_SIZE, j:j+CHUNK_SIZE] - if channels != 'all': - sub_master = sub_master[:, :, :, channels] - sub_msk = np.ones((1, 388, 388, 5)) # a placeholder - if sub_master.shape[1] == 572 and sub_master.shape[2] == 572: - preds = model.predict([sub_master, sub_msk]) - preds_exp = np.exp(preds) - preds_softmaxed = preds_exp / np.sum(preds_exp, axis=3, keepdims=True) - if np.any(np.isnan(preds)): - print("Nan prediction.") - preds = preds_softmaxed[0, :, :, :] - else: - continue - if cut_cols == 0 and cut_rows == 0: - out[j+diff:j+CHUNK_SIZE-diff, i+diff:i+CHUNK_SIZE-diff, :] = preds - elif cut_cols == 0 and cut_rows != 0: - ofs = master.shape[1]-cut_rows - out[j+diff:j+CHUNK_SIZE-diff, i+diff:ofs-diff, :] = preds - elif cut_cols != 0 and cut_rows == 0: - ofs = master.shape[2]-cut_cols - out[j+diff:ofs-diff, i+diff:i+CHUNK_SIZE-diff, :] = preds - elif cut_cols != 0 and cut_rows != 0: - ofs_col = master.shape[2]-cut_cols - ofs_row = master.shape[1]-cut_rows - out[j+diff:ofs_col-diff, i+diff:ofs_row-diff, :] = preds - else: - print("whatcha got goin on here?") - - stdout.write("N eval: {}. Percent done: {:.2f}\r".format(ii, i / master.shape[1])) - - out = np.swapaxes(out, 0, 2) - out = out.astype(np.float32) - if outfile: - save_raster(out, outfile, meta, count=5) - return out - - -def evaluate_images(image_directory, model, include_string, max_pools, exclude_string, prefix, - save_dir, channels): - ii = 0 - for f in glob(os.path.join(image_directory, "*.tif")): - if exclude_string not in f and include_string in f: - print(f) - out = os.path.basename(f) - os.path.split(out)[1] - out = out[out.find("_"):] - out = os.path.splitext(out)[0] - out = prefix + out + ".tif" - out = os.path.join(save_dir, out) - ii += 1 - evaluate_image_unet(f, model, max_pools=max_pools, channels=channels, - outfile=out, ii=ii) - - -def lr_schedule(epoch, lr): - return 0.01*np.exp(-epoch/1000) - - -def train_model(training_directory, model, steps_per_epoch, valid_steps, max_pools, box_size=0, - epochs=3, random_sample=False, threshold=None, sigma=None, w0=None, channels='all', train_more=False, raster_name=None, learning_rate=1e-3, num_classes=5): - ''' This function assumes that train/test data are - subdirectories of training_directory, with - the names train/test.''' - if channels == 'all': - channel_depth = 51 - else: - channel_depth = channels.shape[0] - shp = (572, 572, channel_depth) - weight_shape = (388, 388, num_classes) - if not train_more: - model = model(shp, weight_shape, num_classes, base_exp=5) - model.compile( - loss=weighted_focal_loss, - optimizer=tf.keras.optimizers.Adam(lr=learning_rate), - metrics=[acc] - ) - model.summary() - graph_path = os.path.join('graphs/', str(int(time.time()))) - os.mkdir(graph_path) - tb = TensorBoard(log_dir=graph_path) - ckpt_path = os.path.join(graph_path, raster_name+"_{epoch:02d}-{val_acc:.2f}.hdf5") - scheduler = LearningRateScheduler(lr_schedule, verbose=1) - mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_acc', save_best_only=True, - mode='max', verbose=1) - train = os.path.join(training_directory, 'train') - test = os.path.join(training_directory, 'test') - class_weight = {0:28.101, 1:1.0, 2:2.9614, 3:103.8927} #for no buffer - #class_weight = {0:1, 1:1.0, 2:1, 3:1} #for no buffer - - train_generator = generate_training_data(train, max_pools, sample_random=random_sample, - box_size=box_size, class_weights=class_weight, channels=channels, threshold=threshold, - sigma=sigma, w0=w0) - test_generator = generate_training_data(test, max_pools, sample_random=False, - box_size=box_size, batch_size=4, w0=w0, threshold=threshold, - sigma=sigma, class_weights=class_weight, channels=channels) - - model.fit_generator(train_generator, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - verbose=1, - validation_data=test_generator, - validation_steps=valid_steps, - callbacks=[tb, scheduler, mdlcheck, tf.keras.callbacks.TerminateOnNaN()], - use_multiprocessing=True) - - return model, graph_path - - -def save_model_info(outfile, args): - template = '{}={}|' - with open(outfile, 'a') as f: - for key in args: - f.write(template.format(key, args[key])) - f.write("\n-------------------\n") - print("wrote run info to {}".format(outfile)) - - -def gradient_wrt_inputs(model, data): - # s = '1553014193.4813933' - # f = 'training_data/multiclass/train/class_2_data/{}.pkl'.format(s) - # with open(f, 'rb') as f: - # data = pload(f) - # data = np.expand_dims(data['data'], axis=0) - # data = np.swapaxes(data, 1, 3) - # gradient_wrt_inputs(model, data) - layer_output = model.output - loss = -tf.reduce_mean(layer_output) - grads = K.gradients(loss, model.input[0])[0] - sess = tf.Session() - sess.run(tf.global_variables_initializer()) - weights = np.ones((1, 388, 388, 5)) - results = sess.run(grads, feed_dict={model.input[0]:data, model.input[1]:weights}) - return results - +def lr_schedule(epoch): + lr = 1e-4 + if epoch > 100: + lr /= 32. + elif epoch > 80: + lr /= 16. + elif epoch > 40: + lr /= 8. + elif epoch > 25: + lr /= 4. + elif epoch > 10: + lr /= 2. + print('Learning rate: ', lr) + return lr if __name__ == '__main__': - band_dict = {'B1.TIF':np.arange(0, 2+1), - 'B10.TIF':np.arange(3, 5+1), - 'B11.TIF':np.arange(6, 8+1), - 'B2.TIF':np.arange(9, 11+1), - 'B3.TIF':np.arange(12, 14+1), - 'B4.TIF':np.arange(15, 17+1), - 'B5.TIF':np.arange(18, 20+1), - 'B6.TIF':np.arange(21, 23+1), - 'B7.TIF':np.arange(24, 26+1), - 'B8.TIF':np.arange(27, 29+1), - 'B9.TIF':np.arange(30, 32+1), - 'aspect.tif':np.array([33]), - 'elevation_diff.tif':np.array([34]), - 'etr.tif':np.arange(35, 37+1), - 'pet.tif':np.arange(38, 40+1), - 'pr.tif':np.arange(41, 43+1), - 'slope.tif':np.array([44]), - 'tmmn.tif':np.arange(45, 47+1), - 'tmmx.tif':np.arange(48, 50+1)} - - training_directory = 'training_data/' - info_file = 'run_information.txt' - max_pools = 0 - model_dir = 'models/' - info_path = os.path.join(model_dir, info_file) - model_func = weighted_unet_no_transpose_conv - steps_per_epoch = 100 - valid_steps = 20 - epochs = 320 - w0 = 10 - sigma = 5 - threshold = 0.7*w0 - train_iter = 1 - train_more = False - eager = False - class_weights = True - learning_rate = 1e-3 - random_sample = False - augment = False - channels = 'all' - raster_name = '2_w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) - model_name = '2_w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) - raster_name = 'unit_irr_weights_normal_loss_w0:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) - model_name = 'unit_1border_weights_irr_weights_normal_loss_w0_weight:{}-th:{}-sigma:{}-lr:{}'.format(w0, threshold, sigma, learning_rate) - # Next try raw weights from weight map. - model_save_path = os.path.join(model_dir, model_name) - pr_to_eval = '37_28' - if pr_to_eval == '39_27': - image_directory = '/home/thomas/share/master_rasters/train/' - else: - image_directory = '/home/thomas/share/master_rasters/test/' - - param_dict = {'model_name':model_name, 'epochs':epochs, 'steps_per_epoch':steps_per_epoch, - 'raster_name':raster_name, 'learning_rate':learning_rate, 'eager':eager, - 'class_weights':class_weights, 'augmented':augment, 'random_sample':random_sample, - 'graph_path':None, 'bands':channels, 'w0':w0, 'sigma':sigma} - - evaluating = True - num_classes = 5 - model_save_path = 'models/modelsunit_1border_weights_irr_weights_normal_loss_w0_weight:10-th:7.0-sigma:5-lr:0.001step_2850' - if not os.path.isfile(model_save_path): - print("Training new model") - shp = (572, 572, 51) - weight_shape = (388, 388, num_classes) - model = weighted_unet_no_transpose_conv(shp, weight_shape, num_classes, base_exp=5) - model.compile( - loss=weighted_loss, - optimizer=tf.keras.optimizers.Adam(lr=learning_rate), - metrics=[acc] - ) - graph_path = os.path.join('graphs/', str(int(time.time()))) - os.mkdir(graph_path) - tb = TensorBoard(log_dir=graph_path) - ckpt_path = os.path.join(graph_path, raster_name+"_{epoch:02d}-{val_acc:.2f}.hdf5") - scheduler = LearningRateScheduler(lr_schedule, verbose=1) - mdlcheck = ModelCheckpoint(ckpt_path, monitor='val_acc', save_best_only=True, - mode='max', verbose=1) - train = os.path.join(training_directory, 'train') - test = os.path.join(training_directory, 'test') - class_weight = {0:28.101, 1:1.0, 2:2.9614, 3:103.8927} #for no buffer - #class_weight = {0:1, 1:1.0, 2:1, 3:1} #for no buffer - - train_generator = generate_training_data(train, max_pools, sample_random=False, - class_weights=class_weight, channels=channels, threshold=threshold, - sigma=sigma, w0=w0) - i = 0 - k = 0 - train_iter = 150 - for data, labels in train_generator: - out = model.train_on_batch(x=data, - y=labels) - # Loss, accuracy? - print(out) - if i > train_iter: - model.save('models'+model_name+'step_{}'.format((k+1)*train_iter)) - evaluate_images(image_directory, model, include_string=pr_to_eval, - exclude_string="class", channels=channels, max_pools=max_pools, - prefix=raster_name+'step_{}'.format((k+1)*train_iter), - save_dir='compare_model_outputs/during-the-day/') - k+=1 - i = 0 - i += 1 - - raster_name='final_eval' - model = tf.keras.models.load_model(model_save_path, - custom_objects={'weighted_loss':weighted_loss}) - evaluate_images(image_directory, model, include_string=pr_to_eval, exclude_string="class", channels=channels, max_pools=max_pools, prefix=raster_name, save_dir='compare_model_outputs/systematic/') + n_classes = 4 + input_shape = (None, None, 51) + weight_shape = (None, None, n_classes) + filepath = './focal_loss.h5' + + # Prepare callbacks for model saving and for learning rate adjustment. + checkpoint = ModelCheckpoint(filepath=filepath, + monitor='val_acc', + verbose=1, + save_best_only=True) + + tensorboard = TensorBoard(log_dir='graphs/{}'.format(time.time())) + lr_scheduler = LearningRateScheduler(lr_schedule) + model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=6) + opt = tf.keras.optimizers.Adam(1e-4) + model.compile(opt, loss=weighted_focal_loss, metrics=[acc]) + class_weights = {0:4.5, 1:1.0, 2:2.96, 3:14.972} + class_weights_valid = {0:1.0, 1:1.0, 2:1.0, 3:1.0} + generator = SatDataSequence('training_data/train/', batch_size=2, class_weights=class_weights) + valid_generator = SatDataSequence('training_data/test/', batch_size=2, + class_weights=class_weights_valid) + model.fit_generator(generator, + epochs=300, + validation_data=valid_generator, + callbacks=[checkpoint, lr_scheduler, tensorboard], + verbose=1) diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 74231e5..1024361 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -1,72 +1,124 @@ import os +import tensorflow as tf os.environ['KERAS_BACKEND'] = 'tensorflow' import keras.backend as K -import tensorflow as tf from tensorflow.keras.models import Model -from tensorflow.keras.layers import (multiply, Conv2D, Input, MaxPooling2D, Conv2DTranspose, Concatenate, Dropout, UpSampling2D, BatchNormalization, Cropping2D, Lambda) - -def fcnn_model(n_classes): - model = tf.keras.Sequential() - # Must define the input shape in the first layer of the neural network - model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=8, padding='same', activation='relu', - input_shape=(None, None, 36))) - model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=4, padding='same', activation='relu')) - model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=4, padding='same', activation='relu')) - model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=2, padding='same', activation='relu')) - model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Conv2D(filters=n_classes, kernel_size=2, padding='same', - activation='softmax')) - #model.summary() - return model - -def fcnn_functional_small(n_classes): - x = Input((None, None, 36)) - - c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x) - c1 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(c1) - mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) - - c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(mp1) - c2 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(c2) - mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - #mp2 = Dropout(0.5)(mp2) - - c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(mp2) - c3 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(c3) - mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) - - last_conv = Conv2D(filters=256, kernel_size=(3,3), activation='relu', padding='same')(mp3) - - u1 = UpSampling2D(size=(2, 2))(last_conv) - u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) - u1 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1) - - u1_c3 = Concatenate()([c3, u1]) - - u2 = Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same')(u1_c3) - u2 = UpSampling2D(size=(2, 2))(u2) - u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(u2) - #u2 = Dropout(0.5)(u2) - - u2_c2 = Concatenate()([u2, c2]) - u2_c2 = Dropout(0.5)(u2_c2) - - c4 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u2_c2) - u3 = UpSampling2D(size=(2, 2))(c4) - u3 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(u3) - - u3_c1 = Concatenate()([u3, c1]) - - c5 = Conv2D(filters=n_classes, kernel_size=(3,3), activation='linear', padding='same')(u3_c1) - - model = Model(inputs=x, outputs=c5) - #model.summary() - return model +from tensorflow.keras.layers import (multiply, Conv2D, Input, MaxPooling2D, Conv2DTranspose, + Concatenate, Dropout, UpSampling2D, BatchNormalization, Cropping2D, Lambda, Activation) +from tensorflow.keras.regularizers import l2 +from tensorflow.keras.activations import relu + + +def gradient_wrt_inputs(model, data): + # s = '1553014193.4813933' + # f = 'training_data/multiclass/train/class_2_data/{}.pkl'.format(s) + # with open(f, 'rb') as f: + # data = pload(f) + # data = np.expand_dims(data['data'], axis=0) + # data = np.swapaxes(data, 1, 3) + # gradient_wrt_inputs(model, data) + layer_output = model.output + loss = -tf.reduce_mean(layer_output) + grads = K.gradients(loss, model.input[0])[0] + sess = tf.Session() + sess.run(tf.global_variables_initializer()) + weights = np.ones((1, 388, 388, 5)) + results = sess.run(grads, feed_dict={model.input[0]:data, model.input[1]:weights}) + return results + _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) -def weighted_unet_no_transpose_conv(input_shape, weighted_input_shape, n_classes, base_exp=5): + +def ConvBlock(x, filters=64, expanding_path=False): + + x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', + kernel_regularizer=l2(0.01))(x) + x = BatchNormalization()(x) + x = Activation(relu)(x) + if expanding_path: + x = Conv2D(filters=filters // 2, kernel_size=3, strides=1, padding='same', + kernel_regularizer=l2(0.01))(x) + else: + x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', + kernel_regularizer=l2(0.01))(x) + x = BatchNormalization()(x) + return Activation(relu)(x) + + +_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) + + +def unet_same_padding(input_shape, weight_shape, initial_exp=6, n_classes=5): + + features = Input(shape=input_shape) + weights = Input(shape=weight_shape) + _power = initial_exp + exp = 2 + + c1 = ConvBlock(features, exp**_power) + mp1 = MaxPooling2D(pool_size=2, strides=2)(c1) + + _power += 1 + + c2 = ConvBlock(mp1, exp**_power) + mp2 = MaxPooling2D(pool_size=2, strides=2)(c2) + + _power += 1 + + c3 = ConvBlock(mp2, exp**_power) + mp3 = MaxPooling2D(pool_size=2, strides=2)(c3) + + _power += 1 + + c4 = ConvBlock(mp3, exp**_power) + mp4 = MaxPooling2D(pool_size=2, strides=2)(c4) + + _power += 1 + + # 1024 filters + c5 = Conv2D(filters=exp**_power, kernel_size=3, strides=1, padding='same', + kernel_regularizer=l2(0.01))(mp4) + _power -= 1 + c5 = Conv2D(filters=exp**_power, kernel_size=3, strides=1, padding='same', + kernel_regularizer=l2(0.01))(c5) + + u1 = UpSampling2D(size=(2, 2))(c5) + + u1_c4 = Concatenate()([u1, c4]) + + c6 = ConvBlock(u1_c4, filters=exp**_power, expanding_path=True) + + u2 = UpSampling2D(size=(2, 2))(c6) + + u2_c3 = Concatenate()([u2, c3]) + + _power -= 1 + c7 = ConvBlock(u2_c3, filters=exp**_power, expanding_path=True) + + u3 = UpSampling2D(size=(2, 2))(c7) + + u3_c2 = Concatenate()([u3, c2]) + + _power -= 1 + c8 = ConvBlock(u3_c2, filters=exp**_power, expanding_path=True) + + u4 = UpSampling2D(size=(2, 2))(c8) + + u4_c1 = Concatenate()([u4, c1]) + + _power -= 1 + c9 = ConvBlock(u4_c1, filters=exp**_power) + last_conv = Conv2D(filters=n_classes, kernel_size=1, padding='same', activation='softmax')(c9) + + last = Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(last_conv) + last = Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(last) + last = Lambda(lambda x: K.log(x))(last) + weighted_xen = multiply([last, weights]) + return Model(inputs=[features, weights], outputs=[weighted_xen]) + + +def unet_valid_padding(input_shape, weighted_input_shape, n_classes, base_exp=5): ''' This model does not use any Conv2DTranspose layers. Instead a Upsampling2D layer with a Conv layer after @@ -193,113 +245,6 @@ def weighted_unet_no_transpose_conv(input_shape, weighted_input_shape, n_classes return Model(inputs=[inp1, weighted_input], outputs=[weighted_sum]) -def unet_weighted(input_shape, n_classes): - inp1 = Input(input_shape) - weighted_input = Input(shape=(388, 388, 5)) - base = 2 - exp = 6 - - # 64 filters - c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(inp1) - c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c1) - mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - - exp += 1 - # 128 filters - c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp1) - c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c3) - mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) - - - exp += 1 - # 256 filters - c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp2) - c6 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c5) - mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c6) - - exp += 1 - # 512 filters - c7 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp3) - c8 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c7) - mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c8) - - exp += 1 - # 1024 filters - c9 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp4) - c10 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c9) - - exp -= 1 - # 512 filters, making 1024 when concatenated with - # the corresponding layer from the contracting path. - u1 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - activation='relu')(c10) - - c8_cropped = Cropping2D(cropping=4)(c8) - concat_u1_c8 = Concatenate()([u1, c8_cropped]) - - # 512 filters - c11 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u1_c8) - - exp -= 1 - # 256 filters, making 512 when concatenated with the - # corresponding layer from the contracting path. - c12 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c11) - - u2 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - activation='relu')(c12) - - c6_cropped = Cropping2D(cropping=16)(c6) - concat_u2_c6 = Concatenate()([u2, c6_cropped]) - - # 256 filters - c13 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u2_c6) - bn1 = BatchNormalization(axis=3)(c13) - - exp -= 1 - # 128 filters, making 256 when concatenated with the - # corresponding layer from the contracting path. - c14 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn1) - - u3 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - activation='relu')(c14) - - c4_cropped = Cropping2D(cropping=40)(c4) - concat_u3_c4 = Concatenate()([u3, c4_cropped]) - - # 128 filters - c15 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u3_c4) - bn2 = BatchNormalization(axis=3)(c15) - - exp -= 1 - # 64 filters, making 128 when concatenated with the - # corresponding layer from the contracting path. - c16 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn2) - - u4 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - activation='relu')(c16) - - c2_cropped = Cropping2D(cropping=88)(c2) - concat_u4_c2 = Concatenate()([u4, c2_cropped]) - - c17 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u4_c2) - bn3 = BatchNormalization(axis=3)(c17) - - c18 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(bn3) - - last_conv = Conv2D(filters=n_classes, kernel_size=1, activation='softmax', padding='valid')(c18) - - last = Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(last_conv) - last = Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(last) - last = Lambda(lambda x: K.log(x))(last) - weighted_sum = multiply([last, weighted_input]) - return Model(inputs=[inp1, weighted_input], outputs=[weighted_sum]) - - def unet(n_classes, channel_depth=36): x = Input((None, None, channel_depth)) base = 2 @@ -401,90 +346,3 @@ def unet(n_classes, channel_depth=36): return Model(inputs=x, outputs=last) -def fcnn_functional(n_classes): - - x = Input((None, None, 36)) - base = 2 - # exp from 4 to 5. - exp = 5 - c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(x) - c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c1) - mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c1) - - exp+=1 - - c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp1) - c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c2) - mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - #mp2 = Dropout(0.5)(mp2) - - exp+=1 - - c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp2) - c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c3) - mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c3) - #mp3 = Dropout(0.5)(mp3) - - exp+=1 - - c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp3) - c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c4) - mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) - - exp+=1 - - c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp4) - c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(c5) - mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c5) - - last_conv = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(mp4) - - u1 = UpSampling2D(size=(2, 2))(last_conv) - u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) - u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) - - exp-=1 - - u1_c5 = Concatenate()([c5, u1]) - - u2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1_c5) - u2 = UpSampling2D(size=(2, 2))(u2) - u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Conv2D(filters=base**exp, kernel_size=(3, 3), activation='relu', padding='same')(u2) - u2 = Dropout(0.5)(u2) - - u2_c4 = Concatenate()([u2, c4]) - - exp-=1 - - u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u2_c4) - u3 = UpSampling2D(size=(2, 2))(u3) - u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3) - u3 = Dropout(0.5)(u3) - - u3_c3 = Concatenate()([u3, c3]) - - exp-=1 - - u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3_c3) - u4 = UpSampling2D(size=(2, 2))(u4) - u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4) - #u4 = BatchNormalization(axis=3)(u4) - - u4_c2 = Concatenate()([u4, c2]) - - exp-=1 - - u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4_c2) - u5 = UpSampling2D(size=(2, 2))(u5) - u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) - u5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u5) - #u5 = BatchNormalization(axis=3)(u5) - - u5_c1 = Concatenate()([u5, c1]) - - u6 = Conv2D(filters=n_classes, kernel_size=(3, 3), activation='softmax', padding='same')(u5_c1) - - model = Model(inputs=x, outputs=u6) - # model.summary() - return model diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 2a6f181..7d35ed2 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -201,7 +201,7 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi fallow = 'Fallow' forest = 'Forrest' other = 'other' - target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} + target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:2} augment_dict = {0:True, 1:False, 2:False, 3:True} train_dir = 'training_data/train/' shp_train = 'shapefile_data/train/' diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 83984a4..7882ec9 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -91,12 +91,13 @@ def get_pr_subset(poly, tiles): def filter_shapefile(shapefile, out_directory): - """ Shapefiles may span multiple path/rows. + """ Shapefiles may span multiple path/rows/years. For training, we want all of the data available. This function filters the polygons contained in - the shapefile into separate files for each path/row + the shapefile into separate files for each path/row/year contained in the shapefile. """ - path_row_map = defaultdict(list) + # Problem: Not every polygon has a year attribute. + path_row_year_map = defaultdict(list) wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') tree, path_rows, features = construct_kdtree(wrs2) wrs2.close() @@ -112,18 +113,19 @@ def filter_shapefile(shapefile, out_directory): centroid = cent_arr.reshape(1, -1) dist, ind = tree.query(centroid, k=10) tiles = features[ind[0]] - prs = get_pr_subset(poly, tiles) + prs = get_pr_subset(poly, tiles) # gets the matching path/rows + for p in prs: - path_row_map[p].append(feat) + path_row_year_map[p].append(feat) outfile = os.path.basename(shapefile) outfile = os.path.splitext(outfile)[0] - for path_row in path_row_map: - out = outfile + path_row + ".shp" + for path_row_year in path_row_year_map: + out = outfile + path_row_year + ".shp" with fopen(os.path.join(out_directory, out), 'w', **meta) as dst: print("Saving {}".format(out)) - for feat in path_row_map[path_row]: + for feat in path_row_year_map[path_row_year]: dst.write(feat) @@ -257,4 +259,11 @@ def buffer_shapefile(shp): dst.write(feat) if __name__ == '__main__': - pass + + from glob import glob + out_dir = 'shapefile_data/all_shapefiles/test' + for f in glob("shapefile_data/all_shapefiles/" + '*.shp'): + print(f) + filter_shapefile(f, out_dir) + break + From d9da3ed39057b9586126243b0372a87424bd6e86 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 23 Apr 2019 09:12:25 -0600 Subject: [PATCH 61/89] Simplifying data/training pipeline. Starting to work on real-time data augmentation --- fully-conv-classification/data_generators.py | 12 +++-- fully-conv-classification/data_utils.py | 5 +- fully-conv-classification/evaluate_image.py | 13 +++-- fully-conv-classification/fully_conv.py | 50 ++++++++----------- .../runner_from_shapefile.py | 10 ++-- 5 files changed, 44 insertions(+), 46 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 90e7fa1..7c3d8b0 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -303,12 +303,15 @@ def __len__(self): return int(np.ceil(self.n_files / self.batch_size)) def on_epoch_end(self): - self.shuffled = sample(self.file_list, self.n_files) def __getitem__(self, idx): - - batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] + # TODO: + # How to feed examples into the network? + # Balanced in each batch is good, but when + # n classes exceeds batch size, we can't balance + # classes in each batch. + batch = self.shuffled[idx * self.batch_size:(idx + 1)*self.batch_size] data_tiles = [self._from_pickle(x) for x in batch] processed = self._make_weights_labels_and_features(data_tiles) batch_x = processed[0] @@ -324,7 +327,6 @@ def _make_weights_labels_and_features(self, data_tiles): return _preprocess_input_data(data_tiles, self.class_weights, self.border_width) - def _preprocess_input_data(data_tiles, class_weights, border_width=1): features = [] one_hots = [] @@ -353,7 +355,7 @@ def _preprocess_input_data(data_tiles, class_weights, border_width=1): def generate_unbalanced_data(training_directory='training_data/train/', border_width=2, - batch_size=2, class_weights = {0:4.5, 1:1.0, 2:2.96, 3:14.972, 4:10}, + batch_size=2, class_weights={}, channels='all', nodata=0, n_classes=5): ''' Assumes data is stored in training_directory ''' border_class = len(class_weights.keys()) - 1 diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index e5555e9..17cfdcf 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -158,10 +158,11 @@ def bandwise_mean(paths_list, band_name): return (pixel_value_sum / n_pixels, band_name) -def download_images(project_directory, path, row, year, satellite=8, n_landsat=3): +def download_images(project_directory, path, row, year, satellite=8, n_landsat=3, + max_cloud_pct=40): image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, - max_cloud_pct=70, n_landsat=n_landsat, year=year) + max_cloud_pct=max_cloud_pct, n_landsat=n_landsat, year=year) image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is # a cloud mask. diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index cd8ee7b..787a392 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -7,7 +7,7 @@ from data_generators import load_raster import matplotlib.pyplot import keras.backend as K -from fully_conv import weighted_loss +from fully_conv import weighted_loss, weighted_focal_loss _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) @@ -47,9 +47,12 @@ def evaluate_image(master_raster, model, num_classes=4, outfile=None, ii=None): return out if __name__ == '__main__': - master_raster = '/home/thomas/share/master_rasters/test/master_raster_37_28_2013.tif' - model = 'checkpoints_2.h5' - model = load_model(model, custom_objects={'tf':tf, '_epsilon':_epsilon, + master_raster_t = '/home/thomas/share/master_rasters/test/master_raster_37_28_2013.tif' + master_raster = '/home/thomas/share/master_rasters/train/master_raster_39_27_2013.tif' + model_name = 'normal_loss_50_irr_weight.h5' + model = load_model("models/" + model_name, custom_objects={'tf':tf, '_epsilon':_epsilon, 'weighted_loss':weighted_loss}) - outfile = '300ep.tif' + outfile = 'compare_model_outputs/new-feed-method/{}_39_27.tif'.format(model_name[:-3]) evaluate_image(master_raster, model, outfile=outfile, num_classes=4) + outfile = 'compare_model_outputs/new-feed-method/{}_37_28.tif'.format(model_name[:-3]) + evaluate_image(master_raster_t, model, outfile=outfile, num_classes=4) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index de2935d..dfa38f6 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -1,33 +1,19 @@ import os -#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import time import keras.backend as K import tensorflow as tf -print(tf.__version__) -#tf.enable_eager_execution() -import matplotlib.pyplot as plt import numpy as np -from sys import stdout -from glob import glob -from skimage import transform, util -from sklearn.metrics import confusion_matrix from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) -from rasterio import open as rasopen -from rasterio.mask import mask -from shapely.geometry import shape -from pickle import load as pload -from fiona import open as fopen from data_generators import generate_unbalanced_data, SatDataSequence -from data_utils import clip_rasters, save_raster -from shapefile_utils import get_features, generate_class_mask from models import unet_same_padding def weighted_loss(target, output): - # Weight map: out = -tf.reduce_sum(target*output, len(output.get_shape())-1) return out + def weighted_focal_loss(target, output, gamma=1): # L = a0 *(1-pt)^gamma * ce # Output of model is CE. @@ -46,17 +32,20 @@ def acc(y_true, y_pred): y_t_arg_mask = tf.boolean_mask(y_t_arg, mask) return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) + def lr_schedule(epoch): lr = 1e-4 if epoch > 100: + lr /= 64 + if epoch > 45: lr /= 32. - elif epoch > 80: + elif epoch > 30: lr /= 16. - elif epoch > 40: + elif epoch > 15: lr /= 8. - elif epoch > 25: - lr /= 4. elif epoch > 10: + lr /= 4. + elif epoch > 5: lr /= 2. print('Learning rate: ', lr) return lr @@ -66,26 +55,27 @@ def lr_schedule(epoch): n_classes = 4 input_shape = (None, None, 51) weight_shape = (None, None, n_classes) - filepath = './focal_loss.h5' - + filepath = './models/normal_loss_20_irr_weight.h5' # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True) - tensorboard = TensorBoard(log_dir='graphs/{}'.format(time.time())) lr_scheduler = LearningRateScheduler(lr_schedule) - model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=6) + model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=5) + #model.summary() opt = tf.keras.optimizers.Adam(1e-4) - model.compile(opt, loss=weighted_focal_loss, metrics=[acc]) - class_weights = {0:4.5, 1:1.0, 2:2.96, 3:14.972} + model.compile(opt, loss=weighted_loss, metrics=[acc]) + class_weights = {0:20, 1:1.0, 2:2.96, 3:50} class_weights_valid = {0:1.0, 1:1.0, 2:1.0, 3:1.0} - generator = SatDataSequence('training_data/train/', batch_size=2, class_weights=class_weights) - valid_generator = SatDataSequence('training_data/test/', batch_size=2, - class_weights=class_weights_valid) + batch_size = 4 + generator = SatDataSequence('training_data/train/', batch_size=batch_size, + class_weights=class_weights) + valid_generator = SatDataSequence('training_data/test/', batch_size=batch_size, + class_weights=class_weights) model.fit_generator(generator, - epochs=300, + epochs=50, validation_data=valid_generator, callbacks=[checkpoint, lr_scheduler, tensorboard], verbose=1) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 7d35ed2..fa86dfa 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -188,10 +188,10 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi master_test = '/home/thomas/share/master_rasters/test' master_dirs = [master_train, master_test] year = 2013 - # for s, i in zip(shp_dirs, image_dirs): - # download_all_images(i, s, year) - # for im_dir, mas_dir in zip(image_dirs, master_dirs): - # create_all_master_rasters(im_dir, mas_dir) + for s, i in zip(shp_dirs, image_dirs): + download_all_images(i, s, year) + for im_dir, mas_dir in zip(image_dirs, master_dirs): + create_all_master_rasters(im_dir, mas_dir) master_train = '/home/thomas/share/master_rasters/train/' master_test = '/home/thomas/share/master_rasters/test/' image_train = '/home/thomas/share/image_data/train/' @@ -206,6 +206,7 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi train_dir = 'training_data/train/' shp_train = 'shapefile_data/train/' save = True + ''' pixel_dict = extract_training_data(target_dict, shp_train, image_train, master_train, train_dir, save=save, augment_dict=augment_dict) print("{} instances in each class.".format(pixel_dict)) @@ -218,3 +219,4 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, test_dir, save=save, augment_dict=augment_dict) print("And {} instances in each class.".format(pixel_dict)) + ''' From 98eadf185ebc3e7d4167acd52360034fe30d1146 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 23 Apr 2019 15:07:19 -0600 Subject: [PATCH 62/89] Working on data augmentation + class balance. Doesn't seem to be working --- fully-conv-classification/data_generators.py | 98 +++++++++--- fully-conv-classification/evaluate_image.py | 2 +- fully-conv-classification/fully_conv.py | 24 +-- fully-conv-classification/keras_cnn.py | 90 ----------- .../runner_from_shapefile.py | 4 +- fully-conv-classification/tf_fcn.py | 147 ------------------ 6 files changed, 94 insertions(+), 271 deletions(-) delete mode 100644 fully-conv-classification/keras_cnn.py delete mode 100644 fully-conv-classification/tf_fcn.py diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 7c3d8b0..b1e7ece 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -4,7 +4,7 @@ import pickle import matplotlib.pyplot as plt from glob import glob, iglob -from random import sample, shuffle +from random import sample, shuffle, choice from scipy.ndimage.morphology import distance_transform_edt from runspec import mask_rasters from data_utils import load_raster @@ -284,50 +284,76 @@ def make_border_labels(mask, border_width): class SatDataSequence(Sequence): def __init__(self, data_directory, batch_size, class_weights={}, - border_width=1): + border_width=1, classes_to_augment=None): self.data_directory = data_directory self.class_weights = class_weights self.batch_size = batch_size + self.classes_to_augment = classes_to_augment self.border_width = border_width self._get_files() self.n_files = len(self.file_list) self.idx = 0 self.shuffled = sample(self.file_list, self.n_files) + def _get_files(self): - self.file_list = [x for x in iglob(self.data_directory + "**", recursive=True)] - self.file_list = [x for x in self.file_list if os.path.isfile(x)] + # Now, get n lists where n is the number of classes (excluding border class). + # Then, sample from the minority lists until we have + # the same number of data tiles from each class, then concatenate + # all the lists and shuffle. on epoch end, do this process again. + self.file_dict = {} + i = 0 + for (dirpath, dirnames, filenames) in os.walk(self.data_directory): + if dirpath != self.data_directory: + self.file_dict[i] = [os.path.join(dirpath, x) for x in filenames] + i += 1 + self.lengths = [len(self.file_dict[k]) for k in self.file_dict] + self._create_file_list() + + + def _create_file_list(self): + max_instances = max(self.lengths) + self.file_list = [] + for class_dir in self.file_dict: + files = self.file_dict[class_dir] + self.file_list.extend(files) + if len(files) != max_instances: + if len(files) < (max_instances - len(files)): + files *= (max_instances // len(files)) + additional_files = sample(files, max_instances - len(files)) + self.file_list.extend(additional_files) - def __len__(self): + def __len__(self): return int(np.ceil(self.n_files / self.batch_size)) + def on_epoch_end(self): + self._create_file_list() self.shuffled = sample(self.file_list, self.n_files) + def __getitem__(self, idx): - # TODO: - # How to feed examples into the network? - # Balanced in each batch is good, but when - # n classes exceeds batch size, we can't balance - # classes in each batch. batch = self.shuffled[idx * self.batch_size:(idx + 1)*self.batch_size] data_tiles = [self._from_pickle(x) for x in batch] - processed = self._make_weights_labels_and_features(data_tiles) + processed = self._make_weights_labels_and_features(data_tiles, self.classes_to_augment) batch_x = processed[0] batch_y = processed[1] return batch_x, batch_y + def _from_pickle(self, filename): with open(filename, 'rb') as f: data = pickle.load(f) return data - def _make_weights_labels_and_features(self, data_tiles): - return _preprocess_input_data(data_tiles, self.class_weights, self.border_width) + + def _make_weights_labels_and_features(self, data_tiles, classes_to_augment): + return _preprocess_input_data(data_tiles, self.class_weights, + border_width=self.border_width, classes_to_augment=classes_to_augment) -def _preprocess_input_data(data_tiles, class_weights, border_width=1): +def _preprocess_input_data(data_tiles, class_weights, classes_to_augment=None, border_width=1): features = [] one_hots = [] weightings = [] @@ -345,15 +371,51 @@ def _preprocess_input_data(data_tiles, class_weights, border_width=1): border_width=border_width) one_hot[:, :, border_class] = border_labels weights[:][border_labels[0] == 1] = class_weights[border_class] - m = np.squeeze(tile['data']) - m = np.swapaxes(m, 0, 2) - m = np.swapaxes(m, 0, 1) - features.append(m) + feature_tile = np.squeeze(tile['data']) + feature_tile = np.swapaxes(feature_tile, 0, 2) # This is necessary b/c tf expected columns_last (GeoTiffs are columns first). + feature_tile = np.swapaxes(feature_tile, 0, 1) + if classes_to_augment is not None: + if classes_to_augment[tile['class_code']]: + # fig, ax = plt.subplots(ncols=2, nrows=2) + # ax[0, 0].imshow(feature_tile[:, :, 18]) + # ax[0, 1].imshow(weights[:, :, tile['class_code']]) + feature_tile, one_hot, weights = _augment_data(feature_tile, one_hot, weights) + # ax[1, 0].imshow(feature_tile[:, :, 18]) + # ax[1, 1].imshow(weights[:, :, tile['class_code']]) + # plt.show() + + features.append(feature_tile) one_hots.append(one_hot) weightings.append(weights) return [np.asarray(features), np.asarray(weightings)], [np.asarray(one_hots)] +def _yes_or_no(): + return choice([True, False]) + + +def _augment_data(feature_tile, one_hot, weights): + ''' Applies mirroring and flipping, or doesn't. ''' + if _yes_or_no(): + # Flip the data l-r. + for i in range(feature_tile.shape[2]): + feature_tile[:, :, i] = np.fliplr(feature_tile[:, :, i]) + for i in range(one_hot.shape[2]): + one_hot[:, :, i] = np.fliplr(one_hot[:, :, i]) + weights[:, :, i] = np.fliplr(weights[:, :, i]) + return feature_tile, one_hot, weights + if _yes_or_no(): + # Flip the data u-d. + for i in range(feature_tile.shape[2]): + feature_tile[:, :, i] = np.flipud(feature_tile[:, :, i]) + for i in range(one_hot.shape[2]): + one_hot[:, :, i] = np.flipud(one_hot[:, :, i]) + weights[:, :, i] = np.flipud(weights[:, :, i]) + return feature_tile, one_hot, weights + return feature_tile, one_hot, weights + + + def generate_unbalanced_data(training_directory='training_data/train/', border_width=2, batch_size=2, class_weights={}, channels='all', nodata=0, n_classes=5): diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 787a392..30f2235 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -49,7 +49,7 @@ def evaluate_image(master_raster, model, num_classes=4, outfile=None, ii=None): if __name__ == '__main__': master_raster_t = '/home/thomas/share/master_rasters/test/master_raster_37_28_2013.tif' master_raster = '/home/thomas/share/master_rasters/train/master_raster_39_27_2013.tif' - model_name = 'normal_loss_50_irr_weight.h5' + model_name = 'augment_100_irr_weight.h5' model = load_model("models/" + model_name, custom_objects={'tf':tf, '_epsilon':_epsilon, 'weighted_loss':weighted_loss}) outfile = 'compare_model_outputs/new-feed-method/{}_39_27.tif'.format(model_name[:-3]) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index dfa38f6..4cbecca 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -34,18 +34,18 @@ def acc(y_true, y_pred): def lr_schedule(epoch): - lr = 1e-4 + lr = 1e-3 if epoch > 100: lr /= 64 if epoch > 45: lr /= 32. elif epoch > 30: lr /= 16. - elif epoch > 15: + elif epoch > 20: lr /= 8. - elif epoch > 10: + elif epoch > 15: lr /= 4. - elif epoch > 5: + elif epoch > 10: lr /= 2. print('Learning rate: ', lr) return lr @@ -55,7 +55,7 @@ def lr_schedule(epoch): n_classes = 4 input_shape = (None, None, 51) weight_shape = (None, None, n_classes) - filepath = './models/normal_loss_20_irr_weight.h5' + filepath = './models/augment_20_irr_weight_more_filters.h5' # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', @@ -63,19 +63,19 @@ def lr_schedule(epoch): save_best_only=True) tensorboard = TensorBoard(log_dir='graphs/{}'.format(time.time())) lr_scheduler = LearningRateScheduler(lr_schedule) - model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=5) - #model.summary() - opt = tf.keras.optimizers.Adam(1e-4) + model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=6) + opt = tf.keras.optimizers.Adam() model.compile(opt, loss=weighted_loss, metrics=[acc]) - class_weights = {0:20, 1:1.0, 2:2.96, 3:50} + class_weights = {0:50, 1:1.0, 2:2.5, 3:50} class_weights_valid = {0:1.0, 1:1.0, 2:1.0, 3:1.0} - batch_size = 4 + classes_to_augment = {0:True, 1:False, 2:False, 3:False} + batch_size = 2 generator = SatDataSequence('training_data/train/', batch_size=batch_size, - class_weights=class_weights) + class_weights=class_weights, classes_to_augment=classes_to_augment) valid_generator = SatDataSequence('training_data/test/', batch_size=batch_size, class_weights=class_weights) model.fit_generator(generator, - epochs=50, + epochs=100, validation_data=valid_generator, callbacks=[checkpoint, lr_scheduler, tensorboard], verbose=1) diff --git a/fully-conv-classification/keras_cnn.py b/fully-conv-classification/keras_cnn.py deleted file mode 100644 index cb33b0b..0000000 --- a/fully-conv-classification/keras_cnn.py +++ /dev/null @@ -1,90 +0,0 @@ -import h5py -import os -from glob import glob -import tensorflow as tf -from sklearn.model_selection import train_test_split -from tensorflow.keras.callbacks import TensorBoard -import numpy as np -from shuffle_data import one_epoch - -def keras_model(kernel_size, n_classes): - model = tf.keras.Sequential() - # Must define the input shape in the first layer of the neural network - model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', - input_shape=(36, kernel_size, kernel_size))) - model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) - model.add(tf.keras.layers.Dropout(0.3)) - model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')) - model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) - model.add(tf.keras.layers.Dropout(0.3)) - model.add(tf.keras.layers.Flatten()) - model.add(tf.keras.layers.Dense(256, activation='relu')) - model.add(tf.keras.layers.Dropout(0.5)) - model.add(tf.keras.layers.Dense(n_classes, activation='softmax')) - # Take a look at the model summary - model.summary() - model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - return model - -def train_next_batch(model, features, labels, n_classes=4, epochs=5, batch_size=128): - - # shuffle the labels again - - tb = TensorBoard(log_dir='graphs/cnn/') - x_train, x_test, y_train, y_test = train_test_split(features, labels, - test_size=0.01, random_state=42) - model.fit(x_train, - y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - callbacks=[tb]) - return model - - -def evaluate_model(features, labels): - score = model.evaluate(features, labels, verbose=0) - print('\n', 'Test accuracy:', score[1], '\n') - -def make_one_hot(labels, n_classes): - ret = np.zeros((len(labels), n_classes)) - for i, e in enumerate(labels): - ret[i, int(e)] = 1 - return ret - -def get_next_batch(file_map, n_classes=4): - features, labels = next_batch(file_map) - labels = make_one_hot(labels, n_classes) - return features, labels - -def is_it(f, targets): - for e in targets: - if e in f and 'sample' not in f: - return True - return False - -def fnames(class_code): - return "training_data/class_{}_train.h5".format(class_code) - -# Yield the concatenated training array? - -if __name__ == '__main__': - train_dir = 'training_data/' - model_dir = 'models/' - n_epochs = 1 - kernel_size = 41 - model_name = 'model_kernel_{}'.format(kernel_size) - total_instances = 100000 - - model_path = os.path.join(model_dir, model_name) - model = keras_model(41, 2) - model = tf.keras.models.load_model(model_path) - features = np.zeros((128, 36, 41, 41)) - labels = np.zeros((128, 4)) - train_next_batch(model, features, labels) - if not os.path.isfile(model_path): - model.save(model_path) - - diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index fa86dfa..77abe9e 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -178,7 +178,7 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi # 6. Train network. image_train_directory = '/home/thomas/share/image_data/train/' - image_test_directory = '/home/thomas/share/image_data/test' + image_test_directory = '/home/thomas/share/image_data/test/' image_dirs = [image_train_directory, image_test_directory] shp_train = 'shapefile_data/train/' @@ -206,7 +206,6 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi train_dir = 'training_data/train/' shp_train = 'shapefile_data/train/' save = True - ''' pixel_dict = extract_training_data(target_dict, shp_train, image_train, master_train, train_dir, save=save, augment_dict=augment_dict) print("{} instances in each class.".format(pixel_dict)) @@ -219,4 +218,3 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, test_dir, save=save, augment_dict=augment_dict) print("And {} instances in each class.".format(pixel_dict)) - ''' diff --git a/fully-conv-classification/tf_fcn.py b/fully-conv-classification/tf_fcn.py deleted file mode 100644 index f66d2f1..0000000 --- a/fully-conv-classification/tf_fcn.py +++ /dev/null @@ -1,147 +0,0 @@ -import tensorflow as tf -import numpy as np -import matplotlib.pyplot as plt -from tensorflow.keras.layers import (Activation, Conv2D, UpSampling2D, BatchNormalization, MaxPooling2D, Input, Concatenate, Lambda) -from tensorflow.keras.models import Model -from tensorflow.keras.activations import relu -from tensorflow.keras.regularizers import l2 -from tensorflow.data import Dataset -from data_generators import generate_unbalanced_data - -import tensorflow.keras.backend as K - -def ConvBlock(x, filters=64, expanding_path=False): - - x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', - kernel_regularizer=l2(0.01))(x) - x = BatchNormalization()(x) - x = Activation(relu)(x) - if expanding_path: - x = Conv2D(filters=filters // 2, kernel_size=3, strides=1, padding='same', - kernel_regularizer=l2(0.01))(x) - else: - x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', - kernel_regularizer=l2(0.01))(x) - x = BatchNormalization()(x) - return Activation(relu)(x) - - -_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) - -def model_func(input_shape, initial_exp=6, n_classes=5): - - inp = Input(shape=input_shape) - _power = initial_exp - exp = 2 - - c1 = ConvBlock(inp, exp**_power) - mp1 = MaxPooling2D(pool_size=2, strides=2)(c1) - - _power += 1 - - c2 = ConvBlock(mp1, exp**_power) - mp2 = MaxPooling2D(pool_size=2, strides=2)(c2) - - _power += 1 - - c3 = ConvBlock(mp2, exp**_power) - mp3 = MaxPooling2D(pool_size=2, strides=2)(c3) - - _power += 1 - - c4 = ConvBlock(mp3, exp**_power) - mp4 = MaxPooling2D(pool_size=2, strides=2)(c4) - - _power += 1 - - # 1024 filters - c5 = Conv2D(filters=exp**_power, kernel_size=3, strides=1, padding='same')(mp4) - _power -= 1 - c5 = Conv2D(filters=exp**_power, kernel_size=3, strides=1, padding='same')(c5) - - u1 = UpSampling2D(size=(2, 2))(c5) - - u1_c4 = Concatenate()([u1, c4]) - - c6 = ConvBlock(u1_c4, filters=exp**_power, expanding_path=True) - - u2 = UpSampling2D(size=(2, 2))(c6) - - u2_c3 = Concatenate()([u2, c3]) - - _power -= 1 - c7 = ConvBlock(u2_c3, filters=exp**_power, expanding_path=True) - - u3 = UpSampling2D(size=(2, 2))(c7) - - u3_c2 = Concatenate()([u3, c2]) - - _power -= 1 - c8 = ConvBlock(u3_c2, filters=exp**_power, expanding_path=True) - - u4 = UpSampling2D(size=(2, 2))(c8) - - u4_c1 = Concatenate()([u4, c1]) - - _power -= 1 - c9 = ConvBlock(u4_c1, filters=exp**_power) - last_conv = Conv2D(filters=n_classes, kernel_size=1, padding='same', activation=None)(c9) - return Model(inputs=[inp], outputs=[last_conv]) - - -def weighted_loss(y_true, logits, weights): - ''' y_true: one-hot encoding of labels. - y_pred: tensor of probabilities. - weights: tensor of weights, 0 where there isn't data. - Recall: - L = a0*CE | focal: L = a0*(1-pt)^gamma*CE - ''' - unweighted_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true, logits=logits) - weighted_loss = unweighted_loss*weights - mask = tf.not_equal(weights, 0) - weighted_loss = tf.boolean_mask(weighted_loss, mask) - return tf.reduce_mean(weighted_loss) - -def accuracy(y_true, logits): - - mask = tf.not_equal(tf.sum(y_true, axis=len(y_true.get_shape())-1), 0) - y_pred = tf.nn.softmax(logits) - y_pred = tf.math.argmax(y_pred) - y_true = tf.math.argmax(y_true) - y_true = tf.boolean_mask(y_true, mask) - y_pred = tf.boolean_mask(y_pred, mask) - return tf.reduce_mean(tf.equal(y_true, y_pred)) - - -input_shape = (None, None, 51) -learning_rate = 1e-3 -epochs = 1 -model = model_func(input_shape, n_classes=5) -optimizer = tf.keras.optimizers.Adam(lr=learning_rate) -loss_fn = weighted_loss -training_directory = 'training_data/train/' -class_weights = {0:4.5, 1:1.0, 2:2.96, 3:14.972, 4:10} -train_data = generate_unbalanced_data(training_directory, class_weights=class_weights) -#train_data = Dataset(train_data) -loss_metric = tf.keras.metrics.Mean(name='train_loss') -acc_metric = tf.keras.metrics.Mean(name='acc') - -#@tf.function -def train_step(inputs, labels, weights): - with tf.GradientTape() as tape: - logits = model(inputs, training=True) - acc = accuracy(labels, logits) - reg_loss = tf.math.add_n(model.losses) - pred_loss = loss_fn(labels, logits, weights) - total_loss = pred_loss - gradients = tape.gradient(total_loss, model.trainable_variables) - optimizer.apply_gradients(zip(gradients, model.trainable_variables)) - loss_metric.update_state(total_loss) - acc_metric.update_state(acc) - -step = 0 -for epoch in range(epochs): - for inputs, labels, weights in train_data: - loss = train_step(inputs, labels, weights) - if step > 100: - break From 972213ff342a72eed678a36342a8f3a09723544c Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Thu, 25 Apr 2019 11:17:58 -0600 Subject: [PATCH 63/89] Added data augmentation + overlapping image evaluation + CE and Dice Loss + started downloading images for CO, WY, UT, and MT --- fully-conv-classification/data_generators.py | 11 ++- fully-conv-classification/evaluate_image.py | 48 ++++++++++-- fully-conv-classification/fully_conv.py | 26 +++++-- fully-conv-classification/models.py | 64 ++++++++-------- .../runner_from_shapefile.py | 76 +++++++++++-------- fully-conv-classification/shapefile_utils.py | 7 +- 6 files changed, 148 insertions(+), 84 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index b1e7ece..b055760 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -395,7 +395,16 @@ def _yes_or_no(): def _augment_data(feature_tile, one_hot, weights): - ''' Applies mirroring and flipping, or doesn't. ''' + ''' Applies lr and ud flipping, or doesn't. ''' + if _yes_or_no(): + # Rotate the data. + rot = np.random.randint(-25, 25) + for i in range(feature_tile.shape[2]): + feature_tile[:, :, i] = transform.rotate(feature_tile[:, :, i], rot, cval=0) + for i in range(one_hot.shape[2]): + one_hot[:, :, i] = transform.rotate(one_hot[:, :, i], rot, cval=0) + weights[:, :, i] = transform.rotate(weights[:, :, i], rot, cval=0) + return feature_tile, one_hot, weights if _yes_or_no(): # Flip the data l-r. for i in range(feature_tile.shape[2]): diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 30f2235..dbfd72e 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -11,8 +11,44 @@ _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) +def evaluate_image_many_shot(master_raster, model, num_classes=4, n_overlaps=20, outfile=None, ii=None): + ''' To recover from same padding, slide many different patches over the image. ''' -def evaluate_image(master_raster, model, num_classes=4, outfile=None, ii=None): + if not os.path.isfile(master_raster): + print("Master raster not created for {}".format(suffix)) + # TODO: More extensive handling of this case. + else: + master, meta = load_raster(master_raster) + class_mask = np.ones((master.shape[1], master.shape[2], num_classes)) # Just a placeholder + out = np.zeros((master.shape[2], master.shape[1], num_classes)) + chunk_size = 608 + diff = 608 + stride = 608 + overlap_step = 10 + for k in range(0, n_overlaps*overlap_step, overlap_step): + for i in range(k, master.shape[1]-diff, stride): + for j in range(k, master.shape[2]-diff, stride): + sub_master = master[:, i:i+chunk_size, j:j+chunk_size] + sub_mask = class_mask[i:i+chunk_size, j:j+chunk_size, :] + sub_master = np.swapaxes(sub_master, 0, 2) + sub_master = np.swapaxes(sub_master, 0, 1) + sub_master = np.expand_dims(sub_master, 0) + sub_mask = np.expand_dims(sub_mask, 0) + preds = model.predict([sub_master, sub_mask]) + preds = np.exp(preds) + soft = preds / np.sum(preds, axis=-1, keepdims=True) + soft = np.swapaxes(soft, 1, 2) + out[j:j+chunk_size, i:i+chunk_size, :] += soft[0] + + stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k / overlap_step, n_overlaps, i / master.shape[1])) + + out = np.swapaxes(out, 0, 2) + out = out.astype(np.float32) + if outfile: + save_raster(out, outfile, meta, count=num_classes) + return out + +def evaluate_image_one_shot(master_raster, model, num_classes=4, outfile=None, ii=None): if not os.path.isfile(master_raster): print("Master raster not created for {}".format(suffix)) @@ -49,10 +85,12 @@ def evaluate_image(master_raster, model, num_classes=4, outfile=None, ii=None): if __name__ == '__main__': master_raster_t = '/home/thomas/share/master_rasters/test/master_raster_37_28_2013.tif' master_raster = '/home/thomas/share/master_rasters/train/master_raster_39_27_2013.tif' - model_name = 'augment_100_irr_weight.h5' + model_name = 'augment_60_irr_weight_more_filters.h5' model = load_model("models/" + model_name, custom_objects={'tf':tf, '_epsilon':_epsilon, 'weighted_loss':weighted_loss}) - outfile = 'compare_model_outputs/new-feed-method/{}_39_27.tif'.format(model_name[:-3]) - evaluate_image(master_raster, model, outfile=outfile, num_classes=4) outfile = 'compare_model_outputs/new-feed-method/{}_37_28.tif'.format(model_name[:-3]) - evaluate_image(master_raster_t, model, outfile=outfile, num_classes=4) + #evaluate_image_many_shot(master_raster_t, model, outfile=outfile, num_classes=4) + evaluate_image_one_shot(master_raster_t, model, outfile=outfile, num_classes=4) + outfile = 'compare_model_outputs/new-feed-method/{}_39_27.tif'.format(model_name[:-3]) + #evaluate_image_many_shot(master_raster, model, outfile=outfile, num_classes=4) + evaluate_image_one_shot(master_raster, model, outfile=outfile, num_classes=4) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 4cbecca..7338bcf 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -13,6 +13,20 @@ def weighted_loss(target, output): out = -tf.reduce_sum(target*output, len(output.get_shape())-1) return out +def weighted_loss_ce_and_dl(target, output): + # Target: One hot encoding of segmentation mask. + # Output: Output of network. In this case, log(softmax). + soft = tf.nn.softmax(output) + numerator = tf.reduce_sum(soft*target, 1) + numerator = tf.reduce_sum(numerator, 2) + sum_ui_k = tf.reduce_sum(soft, 1) + sum_ui_k = tf.reduce_sum(sum_ui_k, 2) + sum_vi_k = tf.reduce_sum(target, 1) + sum_vi_k = tf.reduce_sum(sum_vi_k, 2) + + final = (-2/4)*tf.reduce_sum(numerator / (sum_ui_k + sum_vi_k), 1) + out = -tf.reduce_sum(target*output, len(output.get_shape())-1) + return final + out def weighted_focal_loss(target, output, gamma=1): # L = a0 *(1-pt)^gamma * ce @@ -34,7 +48,7 @@ def acc(y_true, y_pred): def lr_schedule(epoch): - lr = 1e-3 + lr = 1e-4 if epoch > 100: lr /= 64 if epoch > 45: @@ -55,7 +69,7 @@ def lr_schedule(epoch): n_classes = 4 input_shape = (None, None, 51) weight_shape = (None, None, n_classes) - filepath = './models/augment_20_irr_weight_more_filters.h5' + filepath = './models/augment_w_rotation_140_irr_weight_33m_params.h5' # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', @@ -66,16 +80,16 @@ def lr_schedule(epoch): model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=6) opt = tf.keras.optimizers.Adam() model.compile(opt, loss=weighted_loss, metrics=[acc]) - class_weights = {0:50, 1:1.0, 2:2.5, 3:50} - class_weights_valid = {0:1.0, 1:1.0, 2:1.0, 3:1.0} + model.summary() #line_length argument + class_weights = {0:140, 1:1.0, 2:1.0, 3:25} classes_to_augment = {0:True, 1:False, 2:False, 3:False} - batch_size = 2 + batch_size = 1 generator = SatDataSequence('training_data/train/', batch_size=batch_size, class_weights=class_weights, classes_to_augment=classes_to_augment) valid_generator = SatDataSequence('training_data/test/', batch_size=batch_size, class_weights=class_weights) model.fit_generator(generator, - epochs=100, + epochs=50, validation_data=valid_generator, callbacks=[checkpoint, lr_scheduler, tensorboard], verbose=1) diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 1024361..54e8f13 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -30,17 +30,18 @@ def gradient_wrt_inputs(model, data): _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) -def ConvBlock(x, filters=64, expanding_path=False): - +def ConvBlock(x, filters=64): x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', kernel_regularizer=l2(0.01))(x) x = BatchNormalization()(x) x = Activation(relu)(x) - if expanding_path: - x = Conv2D(filters=filters // 2, kernel_size=3, strides=1, padding='same', - kernel_regularizer=l2(0.01))(x) - else: - x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', + x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', + kernel_regularizer=l2(0.01))(x) + x = BatchNormalization()(x) + return Activation(relu)(x) + +def ConvBNRelu(x, filters=64): + x = Conv2D(filters=filters, kernel_size=3, strides=1, padding='same', kernel_regularizer=l2(0.01))(x) x = BatchNormalization()(x) return Activation(relu)(x) @@ -77,40 +78,35 @@ def unet_same_padding(input_shape, weight_shape, initial_exp=6, n_classes=5): _power += 1 # 1024 filters - c5 = Conv2D(filters=exp**_power, kernel_size=3, strides=1, padding='same', - kernel_regularizer=l2(0.01))(mp4) + c5 = ConvBlock(mp4, exp**_power) _power -= 1 - c5 = Conv2D(filters=exp**_power, kernel_size=3, strides=1, padding='same', - kernel_regularizer=l2(0.01))(c5) u1 = UpSampling2D(size=(2, 2))(c5) + c6 = ConvBNRelu(u1, filters=exp**_power) + u1_c4 = Concatenate()([c6, c4]) + c7 = ConvBlock(u1_c4, filters=exp**_power) - u1_c4 = Concatenate()([u1, c4]) - - c6 = ConvBlock(u1_c4, filters=exp**_power, expanding_path=True) - - u2 = UpSampling2D(size=(2, 2))(c6) - - u2_c3 = Concatenate()([u2, c3]) - - _power -= 1 - c7 = ConvBlock(u2_c3, filters=exp**_power, expanding_path=True) - - u3 = UpSampling2D(size=(2, 2))(c7) - - u3_c2 = Concatenate()([u3, c2]) - - _power -= 1 - c8 = ConvBlock(u3_c2, filters=exp**_power, expanding_path=True) - - u4 = UpSampling2D(size=(2, 2))(c8) + _power -= 1 + + u2 = UpSampling2D(size=(2, 2))(c7) + c8 = ConvBNRelu(u2, filters=exp**_power) + u2_c3 = Concatenate()([c8, c3]) + c9 = ConvBlock(u2_c3, filters=exp**_power) - u4_c1 = Concatenate()([u4, c1]) + _power -= 1 + + u3 = UpSampling2D(size=(2, 2))(c9) + c10 = ConvBNRelu(u3, filters=exp**_power) + u3_c2 = Concatenate()([c10, c2]) + c11 = ConvBlock(u3_c2, filters=exp**_power) - _power -= 1 - c9 = ConvBlock(u4_c1, filters=exp**_power) - last_conv = Conv2D(filters=n_classes, kernel_size=1, padding='same', activation='softmax')(c9) + _power -= 1 + u4 = UpSampling2D(size=(2, 2))(c11) + c12 = ConvBNRelu(u4, filters=exp**_power) + u4_c1 = Concatenate()([c12, c1]) + c13 = ConvBlock(u4_c1, filters=exp**_power) + last_conv = Conv2D(filters=n_classes, kernel_size=1, padding='same', activation='softmax')(c13) last = Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(last_conv) last = Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(last) last = Lambda(lambda x: K.log(x))(last) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 77abe9e..abae00a 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -179,42 +179,52 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi image_train_directory = '/home/thomas/share/image_data/train/' image_test_directory = '/home/thomas/share/image_data/test/' - image_dirs = [image_train_directory, image_test_directory] shp_train = 'shapefile_data/train/' - shp_test = 'shapefile_data//test/' + shp_test = 'shapefile_data/test/' shp_dirs = [shp_train, shp_test] master_train = '/home/thomas/share/master_rasters/train/' master_test = '/home/thomas/share/master_rasters/test' master_dirs = [master_train, master_test] - year = 2013 - for s, i in zip(shp_dirs, image_dirs): - download_all_images(i, s, year) - for im_dir, mas_dir in zip(image_dirs, master_dirs): - create_all_master_rasters(im_dir, mas_dir) - master_train = '/home/thomas/share/master_rasters/train/' - master_test = '/home/thomas/share/master_rasters/test/' - image_train = '/home/thomas/share/image_data/train/' - image_test = '/home/thomas/share/image_data/test/' - irr1 = 'Huntley' - irr2 = 'Sun_River' - fallow = 'Fallow' - forest = 'Forrest' - other = 'other' - target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:2} - augment_dict = {0:True, 1:False, 2:False, 3:True} - train_dir = 'training_data/train/' - shp_train = 'shapefile_data/train/' - save = True - pixel_dict = extract_training_data(target_dict, shp_train, image_train, - master_train, train_dir, save=save, augment_dict=augment_dict) - print("{} instances in each class.".format(pixel_dict)) - max_weight = max(pixel_dict.values()) - for key in pixel_dict: - print(key, max_weight / pixel_dict[key]) - tot = 0 - test_dir = 'training_data/test/' - shp_test = 'shapefile_data/test/' - pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, - test_dir, save=save, augment_dict=augment_dict) - print("And {} instances in each class.".format(pixel_dict)) + shapefile_directory = 'shapefile_data/western_us/split_shapefiles/clipped_to_target_states/' + i = 0 + for f in glob(shapefile_directory + "*.shp"): + if '2013' in f: + year = 2013 + elif '2014' in f: + year = 2014 + elif '2015' in f: + year = 2015 + elif '2016' in f: + year = 2016 + else: + year = 2014 + if 'irrigated' in f: + i += 1 + download_images_over_shapefile(f, image_train_directory, year) + # master_train = '/home/thomas/share/master_rasters/train/' + # master_test = '/home/thomas/share/master_rasters/test/' + # image_train = '/home/thomas/share/image_data/train/' + # image_test = '/home/thomas/share/image_data/test/' + # irr1 = 'Huntley' + # irr2 = 'Sun_River' + # fallow = 'Fallow' + # forest = 'Forrest' + # other = 'other' + # target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:2} + # augment_dict = {0:True, 1:False, 2:False, 3:True} + # train_dir = 'training_data/train/' + # shp_train = 'shapefile_data/train/' + # save = True + # pixel_dict = extract_training_data(target_dict, shp_train, image_train, + # master_train, train_dir, save=save, augment_dict=augment_dict) + # print("{} instances in each class.".format(pixel_dict)) + # max_weight = max(pixel_dict.values()) + # for key in pixel_dict: + # print(key, max_weight / pixel_dict[key]) + # tot = 0 + # test_dir = 'training_data/test/' + # shp_test = 'shapefile_data/test/' + # pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, + # test_dir, save=save, augment_dict=augment_dict) + # print("And {} instances in each class.".format(pixel_dict)) diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 7882ec9..ba13d44 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -261,9 +261,6 @@ def buffer_shapefile(shp): if __name__ == '__main__': from glob import glob - out_dir = 'shapefile_data/all_shapefiles/test' - for f in glob("shapefile_data/all_shapefiles/" + '*.shp'): - print(f) + out_dir = 'shapefile_data/western_us/split_shapefiles/' + for f in glob("/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup/non-irrigated-reprojected/" + '*.shp'): filter_shapefile(f, out_dir) - break - From b215d6859acb1741701a899414b0a91e5b564e51 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 26 Apr 2019 18:52:35 -0600 Subject: [PATCH 64/89] Added more data augmentation + rmtree for gridMet tmp_dir --- fully-conv-classification/data_generators.py | 82 +++++++++++-------- fully-conv-classification/evaluate_image.py | 12 +-- fully-conv-classification/fully_conv.py | 19 +++-- fully-conv-classification/prepare_images.py | 3 +- .../runner_from_shapefile.py | 48 ++++++----- 5 files changed, 94 insertions(+), 70 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index b055760..360ddf6 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -12,6 +12,7 @@ from rasterio import open as rasopen from warnings import warn from skimage import transform +from scipy.ndimage.interpolation import shift from sat_image.warped_vrt import warp_single_image from tensorflow.keras.utils import Sequence @@ -113,6 +114,11 @@ def to_pickle(self, training_directory): def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): + ''' Fmasks are masks of clouds and water. We don't clouds/water in + the training set, so this function gets all the fmasks for a landsat + scene (contained in image_directory), and merges them into one raster. + They may not all be the same size, so warp_vrt is used to make them align. + ''' paths = [] for dirpath, dirnames, filenames in os.walk(image_directory): for f in filenames: @@ -175,7 +181,7 @@ def extract_training_data(target_dict, shapefile_directory, image_directory, msk, mask_meta = generate_class_mask(match, mask_file, nodata=nodata) if fmask: msk = concatenate_fmasks(os.path.join(image_directory, suffix[:-4]), msk, - mask_meta, nodata=nodata) # Need to make sure this is doing what I expect. + mask_meta, nodata=nodata) shp = msk.shape print(match, cc) if cc is not None: @@ -205,6 +211,7 @@ def _iterate_over_raster(raster, datamask, pixel_dict, tile_size=608, augment=Fa save=True, training_directory=None, min_pixels=None): step = tile_size if augment: + # This is offline augmentation. step = np.random.randint(50, tile_size // 2) print("Augmenting w/ step:", step) for i in range(0, raster.shape[1]-tile_size, step): @@ -372,7 +379,7 @@ def _preprocess_input_data(data_tiles, class_weights, classes_to_augment=None, b one_hot[:, :, border_class] = border_labels weights[:][border_labels[0] == 1] = class_weights[border_class] feature_tile = np.squeeze(tile['data']) - feature_tile = np.swapaxes(feature_tile, 0, 2) # This is necessary b/c tf expected columns_last (GeoTiffs are columns first). + feature_tile = np.swapaxes(feature_tile, 0, 2) # This is necessary b/c tf expects columns_last (GeoTiffs are columns first). feature_tile = np.swapaxes(feature_tile, 0, 1) if classes_to_augment is not None: if classes_to_augment[tile['class_code']]: @@ -390,39 +397,50 @@ def _preprocess_input_data(data_tiles, class_weights, classes_to_augment=None, b return [np.asarray(features), np.asarray(weightings)], [np.asarray(one_hots)] -def _yes_or_no(): - return choice([True, False]) +def _flip_lr(feature_tile, one_hot, weights): + for i in range(feature_tile.shape[2]): + feature_tile[:, :, i] = np.fliplr(feature_tile[:, :, i]) + for i in range(one_hot.shape[2]): + one_hot[:, :, i] = np.fliplr(one_hot[:, :, i]) + weights[:, :, i] = np.fliplr(weights[:, :, i]) + return feature_tile, one_hot, weights -def _augment_data(feature_tile, one_hot, weights): - ''' Applies lr and ud flipping, or doesn't. ''' - if _yes_or_no(): - # Rotate the data. - rot = np.random.randint(-25, 25) - for i in range(feature_tile.shape[2]): - feature_tile[:, :, i] = transform.rotate(feature_tile[:, :, i], rot, cval=0) - for i in range(one_hot.shape[2]): - one_hot[:, :, i] = transform.rotate(one_hot[:, :, i], rot, cval=0) - weights[:, :, i] = transform.rotate(weights[:, :, i], rot, cval=0) - return feature_tile, one_hot, weights - if _yes_or_no(): - # Flip the data l-r. - for i in range(feature_tile.shape[2]): - feature_tile[:, :, i] = np.fliplr(feature_tile[:, :, i]) - for i in range(one_hot.shape[2]): - one_hot[:, :, i] = np.fliplr(one_hot[:, :, i]) - weights[:, :, i] = np.fliplr(weights[:, :, i]) - return feature_tile, one_hot, weights - if _yes_or_no(): - # Flip the data u-d. - for i in range(feature_tile.shape[2]): - feature_tile[:, :, i] = np.flipud(feature_tile[:, :, i]) - for i in range(one_hot.shape[2]): - one_hot[:, :, i] = np.flipud(one_hot[:, :, i]) - weights[:, :, i] = np.flipud(weights[:, :, i]) - return feature_tile, one_hot, weights +def _flip_ud(feature_tile, one_hot, weights): + for i in range(feature_tile.shape[2]): + feature_tile[:, :, i] = np.flipud(feature_tile[:, :, i]) + for i in range(one_hot.shape[2]): + one_hot[:, :, i] = np.flipud(one_hot[:, :, i]) + weights[:, :, i] = np.flipud(weights[:, :, i]) return feature_tile, one_hot, weights - + + +def _rotate(feature_tile, one_hot, weights): + # Rotate data. + rot = np.random.randint(-25, 25) + for i in range(feature_tile.shape[2]): + feature_tile[:, :, i] = transform.rotate(feature_tile[:, :, i], rot, cval=0) + for i in range(one_hot.shape[2]): + one_hot[:, :, i] = transform.rotate(one_hot[:, :, i], rot, cval=0) + weights[:, :, i] = transform.rotate(weights[:, :, i], rot, cval=0) + return feature_tile, one_hot, weights + + +def _flip_lr_ud(feature_tile, one_hot, weights): + feature_tile, one_hot, weights = _flip_lr(feature_tile, one_hot, weights) + feature_tile, one_hot, weights = _flip_ud(feature_tile, one_hot, weights) + return feature_tile, one_hot, weights + + +def _do_nothing(feature_tile, one_hot, weights): + return feature_tile, one_hot, weights + + +def _augment_data(feature_tile, one_hot, weights): + ''' Applies rotation | lr | ud | lr_ud | flipping, or doesn't. ''' + possible_augments = [_rotate, _flip_ud, _flip_lr, _flip_lr_ud, _do_nothing] + possible_augments = [_do_nothing] + return choice(possible_augments)(feature_tile, one_hot, weights) def generate_unbalanced_data(training_directory='training_data/train/', border_width=2, diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index dbfd72e..8b4b496 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -11,7 +11,7 @@ _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) -def evaluate_image_many_shot(master_raster, model, num_classes=4, n_overlaps=20, outfile=None, ii=None): +def evaluate_image_many_shot(master_raster, model, num_classes=4, n_overlaps=4, outfile=None, ii=None): ''' To recover from same padding, slide many different patches over the image. ''' if not os.path.isfile(master_raster): @@ -85,12 +85,12 @@ def evaluate_image_one_shot(master_raster, model, num_classes=4, outfile=None, i if __name__ == '__main__': master_raster_t = '/home/thomas/share/master_rasters/test/master_raster_37_28_2013.tif' master_raster = '/home/thomas/share/master_rasters/train/master_raster_39_27_2013.tif' - model_name = 'augment_60_irr_weight_more_filters.h5' + model_name = 'all_classes_augmented_flips_rotations_90_irr_weight.h5' model = load_model("models/" + model_name, custom_objects={'tf':tf, '_epsilon':_epsilon, 'weighted_loss':weighted_loss}) outfile = 'compare_model_outputs/new-feed-method/{}_37_28.tif'.format(model_name[:-3]) - #evaluate_image_many_shot(master_raster_t, model, outfile=outfile, num_classes=4) - evaluate_image_one_shot(master_raster_t, model, outfile=outfile, num_classes=4) + evaluate_image_many_shot(master_raster_t, model, outfile=outfile, num_classes=4) + #evaluate_image_one_shot(master_raster_t, model, outfile=outfile, num_classes=4) outfile = 'compare_model_outputs/new-feed-method/{}_39_27.tif'.format(model_name[:-3]) - #evaluate_image_many_shot(master_raster, model, outfile=outfile, num_classes=4) - evaluate_image_one_shot(master_raster, model, outfile=outfile, num_classes=4) + evaluate_image_many_shot(master_raster, model, outfile=outfile, num_classes=4) + #evaluate_image_one_shot(master_raster, model, outfile=outfile, num_classes=4) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 7338bcf..171e60f 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -1,5 +1,5 @@ import os -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import time import keras.backend as K import tensorflow as tf @@ -48,7 +48,7 @@ def acc(y_true, y_pred): def lr_schedule(epoch): - lr = 1e-4 + lr = 1e-3 if epoch > 100: lr /= 64 if epoch > 45: @@ -69,7 +69,7 @@ def lr_schedule(epoch): n_classes = 4 input_shape = (None, None, 51) weight_shape = (None, None, n_classes) - filepath = './models/augment_w_rotation_140_irr_weight_33m_params.h5' + filepath = './models/all_augmented_same_probability_80_irr_weight.h5' # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', @@ -77,19 +77,20 @@ def lr_schedule(epoch): save_best_only=True) tensorboard = TensorBoard(log_dir='graphs/{}'.format(time.time())) lr_scheduler = LearningRateScheduler(lr_schedule) - model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=6) + model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=5) opt = tf.keras.optimizers.Adam() model.compile(opt, loss=weighted_loss, metrics=[acc]) - model.summary() #line_length argument - class_weights = {0:140, 1:1.0, 2:1.0, 3:25} - classes_to_augment = {0:True, 1:False, 2:False, 3:False} - batch_size = 1 + #model.summary() #line_length argument + class_weights = {0:80, 1:1.0, 2:1.0, 3:50} + class_weights_valid = {0:1.0, 1:1.0, 2:1.0, 3:1.0} + classes_to_augment = {0:True, 1:True, 2:True, 3:False} + batch_size = 3 generator = SatDataSequence('training_data/train/', batch_size=batch_size, class_weights=class_weights, classes_to_augment=classes_to_augment) valid_generator = SatDataSequence('training_data/test/', batch_size=batch_size, class_weights=class_weights) model.fit_generator(generator, - epochs=50, + epochs=500, validation_data=valid_generator, callbacks=[checkpoint, lr_scheduler, tensorboard], verbose=1) diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index f38fa4b..3d5fe7c 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -28,6 +28,7 @@ from sat_image.fmask import Fmask from sat_image.warped_vrt import warp_vrt from met.thredds import GridMet, TopoWX +from shutil import rmtree from bounds import RasterBounds, GeoBounds from dem import AwsDem from ssebop_app.image import get_image @@ -103,7 +104,6 @@ def build_training(self): self.get_terrain() self.paths_map, self.masks = self._order_images() - def build_evaluating(self): self.get_landsat(fmask=True) self.profile = self.landsat.rasterio_geometry @@ -207,6 +207,7 @@ def get_climate_timeseries(self): print("Saving {}".format(outfile)) out_final = gm.conform(out_arr) gm.save_raster(out_final, self.landsat.rasterio_geometry, outfile) + rmtree(gm.temp_dir) def get_terrain(self): diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index abae00a..09c43fa 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -188,6 +188,8 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi master_dirs = [master_train, master_test] shapefile_directory = 'shapefile_data/western_us/split_shapefiles/clipped_to_target_states/' i = 0 + fs = [f for f in glob(shapefile_directory + "*.shp")] + print(len(fs)) for f in glob(shapefile_directory + "*.shp"): if '2013' in f: year = 2013 @@ -199,32 +201,34 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi year = 2016 else: year = 2014 - if 'irrigated' in f: - i += 1 download_images_over_shapefile(f, image_train_directory, year) - # master_train = '/home/thomas/share/master_rasters/train/' - # master_test = '/home/thomas/share/master_rasters/test/' - # image_train = '/home/thomas/share/image_data/train/' - # image_test = '/home/thomas/share/image_data/test/' - # irr1 = 'Huntley' - # irr2 = 'Sun_River' - # fallow = 'Fallow' - # forest = 'Forrest' - # other = 'other' - # target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:2} - # augment_dict = {0:True, 1:False, 2:False, 3:True} - # train_dir = 'training_data/train/' - # shp_train = 'shapefile_data/train/' - # save = True - # pixel_dict = extract_training_data(target_dict, shp_train, image_train, - # master_train, train_dir, save=save, augment_dict=augment_dict) - # print("{} instances in each class.".format(pixel_dict)) - # max_weight = max(pixel_dict.values()) - # for key in pixel_dict: - # print(key, max_weight / pixel_dict[key]) + print("{} of {} done.".format(i+1, len(fs))) + i += 1 + ''' + master_train = '/home/thomas/share/master_rasters/train/' + master_test = '/home/thomas/share/master_rasters/test/' + image_train = '/home/thomas/share/image_data/train/' + image_test = '/home/thomas/share/image_data/test/' + irr1 = 'irrigated' + irr2 = 'uncultivated' + fallow = 'unirrigated' + forest = 'wetlands' + other = 'other' + target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:2} + augment_dict = {0:False, 1:False, 2:False, 3:True} + train_dir = 'training_data/train/' + shp_train = 'shapefile_data/train/' + save = True + pixel_dict = extract_training_data(target_dict, shp_train, image_train, + master_train, train_dir, save=save, augment_dict=augment_dict) + print("{} instances in each class.".format(pixel_dict)) + max_weight = max(pixel_dict.values()) + for key in pixel_dict: + print(key, max_weight / pixel_dict[key]) # tot = 0 # test_dir = 'training_data/test/' # shp_test = 'shapefile_data/test/' # pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, # test_dir, save=save, augment_dict=augment_dict) # print("And {} instances in each class.".format(pixel_dict)) + ''' From d0558c654f1866694230469b15a7344c53b37832 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 30 Apr 2019 12:44:48 -0600 Subject: [PATCH 65/89] No more saving imagestack to disk. Simplified the way class codes and year attributes are assigned when extracting training data. --- fully-conv-classification/data_generators.py | 266 ++++-------------- fully-conv-classification/data_utils.py | 190 +++++++++++-- fully-conv-classification/fully_conv.py | 18 +- .../runner_from_shapefile.py | 154 +++------- 4 files changed, 263 insertions(+), 365 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 360ddf6..189706c 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -7,8 +7,8 @@ from random import sample, shuffle, choice from scipy.ndimage.morphology import distance_transform_edt from runspec import mask_rasters -from data_utils import load_raster -from shapefile_utils import get_shapefile_path_row, generate_class_mask +from data_utils import load_raster, paths_map, stack_rasters +from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile from rasterio import open as rasopen from warnings import warn from skimage import transform @@ -75,14 +75,6 @@ def random_sample(class_mask, n_instances, box_size=0, fill_value=1, nodata=0): return class_mask -def assign_class_code(target_dict, shapefilename): - for key in target_dict: - if key in shapefilename: - return target_dict[key] - print("{} has no known match in target_dict.".format(shapefilename)) - return None - - class DataMask(object): def __init__(self, mask, class_code): @@ -136,93 +128,74 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): return class_mask -def extract_training_data(target_dict, shapefile_directory, image_directory, - master_raster_directory, training_directory, save=True, tile_size=608, - year=2013, min_pixels=2000, fmask=True, nodata=0, augment_dict={}): - ''' - target_dict: {filename or string in filename : class_code} - This function extracts training data from master_rasters in master_raster_directory. Master - rasters are rasters containing all feature bands. - To do this, we iterate over the shapefile (vector) data in shapefile directory - and mask the corresponding raster with the vector data. We also ignore all pixels - in the master_rasters that have any clouds or water in them. - steps: - pull a shapefile at random from shapefile_directory. - get all the other shapefiles that are in the same path and row. - use these shapefiles to create a binary mask: 0 where there's no - data and 1 where there is data. - Assign each binary mask to a class. - Iterate over the master raster in that path / row and - create a new datatile object for each binary mask that contains - data, and save it. - ''' - - done = set() +def extract_training_data(shapefile_directory, image_directory, + training_directory, save=True, tile_size=608, + assign_shapefile_year=None, assign_shapefile_class_code=None, + min_pixels=50, fmask=True, n_classes=4, nodata=0, augment_dict={}): + + if isinstance(assign_shapefile_year, type(None)): + raise ValueError("Please provide a function to assign shapefile year.") + if isinstance(assign_shapefile_class_code, type(None)): + raise ValueError("Please provide a function to assign shapefile class code.") + pixel_dict = {} # pixel dict counts number of pixels present in each class. - for class_code in target_dict.values(): + for class_code in range(n_classes): pixel_dict[class_code] = 0 - year = year #TODO: incorporate year attr. from shapefile - train_raster = 'master_raster_' # template - mask_raster = 'class_mask_' # template - for f in glob(os.path.join(shapefile_directory, "*.shp")): + done = set() + all_shapefiles = [f for f in glob(os.path.join(shapefile_directory, "*.shp"))] + for f in all_shapefiles: if f not in done: done.add(f) - all_matches = all_matching_shapefiles(f, shapefile_directory) # get all shapefiles in the same path / row + all_matches = all_matching_shapefiles(f, shapefile_directory, assign_shapefile_year) # get all shapefiles + # in the same path / row / year for match in all_matches: done.add(match) - p, r = get_shapefile_path_row(f) - suffix = '{}_{}_{}.tif'.format(p, r, year) #image directory - master_raster = os.path.join(master_raster_directory, train_raster + suffix) - mask_file = os.path.join(master_raster_directory, mask_raster + suffix) # for rasterio.mask.mask - masks = [] - shp = None + p, r = get_shapefile_path_row(f) #TODO: error checking on this function. + year = assign_shapefile_year(f) + suffix = '{}_{}_{}'.format(p, r, year) + paths_mapping = paths_map(os.path.join(image_directory, suffix)) + try: + master = stack_rasters(paths_mapping, p, r, year) + except Exception as e: + print(e) + print("Bad image data in", suffix) + continue + mask_file = paths_mapping['B1.TIF'][0] + masks = [] for match in all_matches: - cc = assign_class_code(target_dict, match) - msk, mask_meta = generate_class_mask(match, mask_file, nodata=nodata) + cc = assign_shapefile_class_code(match) + if cc is None: + raise ValueError("Shapefile {} not provided with a class code.".format(os.path.basename(match))) + msk, mask_meta = mask_raster_to_shapefile(match, mask_file, return_binary=True) if fmask: - msk = concatenate_fmasks(os.path.join(image_directory, suffix[:-4]), msk, + msk = concatenate_fmasks(os.path.join(image_directory, suffix), msk, mask_meta, nodata=nodata) - shp = msk.shape - print(match, cc) - if cc is not None: - dm = DataMask(msk, cc) # a binary mask that has a class_code attributed to it. - masks.append(dm) - - if save: - master, meta = load_raster(master_raster) - else: - master = np.zeros(shp) - - for datamask in masks: - if augment_dict[datamask.class_code]: - pixel_dict = _iterate_over_raster(master, datamask, pixel_dict, - tile_size=tile_size, save=save, augment=True, min_pixels=min_pixels, - training_directory=training_directory) - else: - pixel_dict = _iterate_over_raster(master, datamask, - pixel_dict, min_pixels=min_pixels, - tile_size=tile_size, save=save, - training_directory=training_directory) + dm = DataMask(msk, cc) # a binary mask that has a class_code attributed to it. + masks.append(dm) + print("Extracting data for {}. CC: {}. Year: {}".format(os.path.basename(match), cc, + year)) + + pixel_dict = _iterate_over_raster(master, masks, pixel_dict, + tile_size=tile_size, save=save, min_pixels=min_pixels, + training_directory=training_directory) + print("{} of {} shapefiles done. ".format(len(done), len(all_shapefiles))) return pixel_dict -def _iterate_over_raster(raster, datamask, pixel_dict, tile_size=608, augment=False, +def _iterate_over_raster(raster, datamasks, pixel_dict, tile_size=608, save=True, training_directory=None, min_pixels=None): - step = tile_size - if augment: - # This is offline augmentation. - step = np.random.randint(50, tile_size // 2) - print("Augmenting w/ step:", step) + step = tile_size for i in range(0, raster.shape[1]-tile_size, step): for j in range(0, raster.shape[2]-tile_size, step): sub_raster = raster[:, i:i+tile_size, j:j+tile_size] - sub_mask = datamask.mask[:, i:i+tile_size, j:j+tile_size] - if _check_dimensions_and_content(sub_raster, sub_mask, tile_size, min_pixels): - pixel_dict[datamask.class_code] += len(np.where(sub_mask != 0)[0]) - if save: - dt = DataTile(sub_raster, sub_mask, datamask.class_code) - dt.to_pickle(training_directory) + for datamask in datamasks: + sub_mask = datamask.mask[:, i:i+tile_size, j:j+tile_size] + if _check_dimensions_and_content(sub_raster, sub_mask, tile_size, min_pixels): + pixel_dict[datamask.class_code] += len(np.where(sub_mask != 0)[0]) + if save: + dt = DataTile(sub_raster, sub_mask, datamask.class_code) + dt.to_pickle(training_directory) return pixel_dict @@ -236,51 +209,16 @@ def _check_dimensions_and_content(sub_raster, sub_mask, tile_size, min_pixels): return True -def all_matching_shapefiles(to_match, shapefile_directory): +def all_matching_shapefiles(to_match, shapefile_directory, assign_shapefile_year): out = [] pr = get_shapefile_path_row(to_match) + year = assign_shapefile_year(to_match) for f in glob(os.path.join(shapefile_directory, "*.shp")): - if get_shapefile_path_row(f) == pr: - out.append(f) + if get_shapefile_path_row(f) == pr and assign_shapefile_year(f) == year: + out.append(f) return out -class DataGen: - ''' Infinite data generator. Pulls files from - a directory named "class_dir". - Class dir can have multiple directories full of data files - in it. - ''' - - def __init__(self, class_dir, augment=False, random_augment=False): - self.file_list = None - self.class_dir = class_dir - self._get_files() - self.n_files = len(self.file_list) - self.idx = 0 - self.shuffled = sample(self.file_list, self.n_files) - - def _get_files(self): - self.file_list = [x for x in iglob(self.class_dir + "**", recursive=True)] - self.file_list = [x for x in self.file_list if os.path.isfile(x)] - - def next(self): - if self.idx == self.n_files: - self.idx = 0 - self.shuffled = sample(self.file_list, self.n_files) - out = self.shuffled[self.idx] - self.idx += 1 - else: - out = self.shuffled[self.idx] - self.idx += 1 - return self._from_pickle(out) - - def _from_pickle(self, filename): - with open(filename, 'rb') as f: - data = pickle.load(f) - return data - - def make_border_labels(mask, border_width): ''' Border width: Pixel width. ''' dm = distance_map(mask) @@ -327,6 +265,7 @@ def _create_file_list(self): if len(files) != max_instances: if len(files) < (max_instances - len(files)): files *= (max_instances // len(files)) + shuffle(files) additional_files = sample(files, max_instances - len(files)) self.file_list.extend(additional_files) @@ -439,97 +378,8 @@ def _do_nothing(feature_tile, one_hot, weights): def _augment_data(feature_tile, one_hot, weights): ''' Applies rotation | lr | ud | lr_ud | flipping, or doesn't. ''' possible_augments = [_rotate, _flip_ud, _flip_lr, _flip_lr_ud, _do_nothing] - possible_augments = [_do_nothing] return choice(possible_augments)(feature_tile, one_hot, weights) -def generate_unbalanced_data(training_directory='training_data/train/', border_width=2, - batch_size=2, class_weights={}, - channels='all', nodata=0, n_classes=5): - ''' Assumes data is stored in training_directory ''' - border_class = len(class_weights.keys()) - 1 - gen = DataGen(training_directory) - while True: - masters = [] - one_hots = [] - weightings = [] - tile_shape = None - for _ in range(batch_size): - tile = gen.next().copy() - if tile_shape is None: - tile_shape = tile['class_mask'].shape - one_hot = np.zeros((tile_shape[1], tile_shape[2], n_classes)) - weights = np.zeros((tile_shape[1], tile_shape[2])) - labels = tile['class_mask'][0] - one_hot[:, :, tile['class_code']] = labels - weights[labels == 1] = class_weights[tile['class_code']] - if tile['class_code'] == 0: - border_labels = make_border_labels(tile['class_mask'], - border_width=border_width) - one_hot[:, :, border_class] = border_labels - weights[border_labels[0] == 1] = class_weights[border_class] - m = np.squeeze(tile['data']) - m = np.swapaxes(m, 0, 2) - m = np.swapaxes(m, 0, 1) - masters.append(m) - one_hots.append(one_hot) - weightings.append(weights) - - yield np.asarray(masters), np.asarray(one_hots), np.asarray(weightings) - - -def generate_training_data(training_directory, threshold=None, sigma=None, - w0=None, class_weights={}, channels='all', nodata=0, n_classes=5): - ''' Assumes data is stored in training_directory - in subdirectories labeled class_n_train with n the class code ''' - class_dirs = [os.path.join(training_directory, x) for x in os.listdir(training_directory)] - if not len(class_dirs): - class_dirs = [training_directory] - generators = [] - border_class = len(class_weights.keys()) - for d in class_dirs: - generators.append(DataGen(d)) - while True: - masters = [] - one_hots = [] - weightings = [] - tile_shape = None - for _ in range(2): - data_tiles = [] - weighting_dict = {} - count_dict = {} - for gen in generators: - out = gen.next().copy() - if tile_shape is None: - tile_shape = out['class_mask'].shape - data_tiles.append(out) - n_samples = len(np.where(out['class_mask'] != nodata)[0]) - weighting_dict[out['class_code']] = n_samples - count_dict[out['class_code']] = n_samples - - maxx = max(weighting_dict.values()) - for key in weighting_dict: - weighting_dict[key] = maxx / weighting_dict[key] - - for tile in data_tiles: - one_hot = np.zeros((tile_shape[1], tile_shape[2], n_classes)) - weights = np.zeros((tile_shape[1], tile_shape[2])) - labels = tile['class_mask'] - one_hot[:, :, tile['class_code']] = labels - weights[labels == 1] = class_weights[tile['class_code']] - if tile['class_code'] == 0: - border_labels = make_border_labels(tile['class_mask'], border_width=2) - one_hot[:, :, border_class] = border_labels - weights[border_labels == 1] = class_weights[border_class] - - m = np.squeeze(tile['data']) - m = np.swapaxes(m, 0, 2) - masters.append(m) - one_hots.append(one_hot) - weightings.append(weights) - - yield np.asarray(masters), np.asarray(masks), np.asarray(weightings) - - if __name__ == '__main__': pass diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 17cfdcf..4613ed0 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -15,9 +15,68 @@ from prepare_images import ImageStack from shapefile_utils import get_features from sat_image.warped_vrt import warp_single_image +from runspec import landsat_rasters, static_rasters, climate_rasters WRS2 = '../spatial_data/wrs2_descending_usa.shp' +def paths_map(image_directory, satellite=8): + ''' Recursively get all rasters in image_directory + and its subdirectories, and adds them to band_map. ''' + band_map = defaultdict(list) + for band in landsat_rasters()[satellite]: + band_map[band] = [] + for band in static_rasters(): + band_map[band] = [] + for band in climate_rasters(): + band_map[band] = [] + + extensions = (".tif", ".TIF") + for dirpath, dirnames, filenames in os.walk(image_directory): + for f in filenames: + if any(ext in f for ext in extensions): + for band in band_map: + if f.endswith(band): + band_map[band].append(os.path.join(dirpath, f)) + + for band in band_map: + band_map[band] = sorted(band_map[band]) # ensures ordering within bands - sort by time. + + return band_map + + +def stack_rasters(paths_map, path, row, year): + first = True + stack = None + num_rasters = 0 + for key in paths_map: + num_rasters += len(paths_map[key]) + j = 0 + for feat in sorted(paths_map.keys()): # ensures the stack is in the same order each time. + # Ordering within bands is assured by sorting the list that + # each band corresponding to, as that's sorting by date. + feature_rasters = paths_map[feat] # maps bands to their location in filesystem. + for feature_raster in feature_rasters: + with rasopen(feature_raster, mode='r') as src: + arr = src.read() + raster_geo = src.meta.copy() + if first: + first_geo = raster_geo.copy() + empty = zeros((num_rasters, arr.shape[1], arr.shape[2]), float32) + stack = empty + stack[j, :, :] = arr + j += 1 + first = False + else: + try: + stack[j, :, :] = arr + j += 1 + except ValueError: + arr = warp_single_image(feature_raster, first_geo) + stack[j, :, :] = arr + j += 1 + return stack + + def create_master_raster(paths_map, path, row, year, raster_directory, mean_map=None, stddev_map=None): """ Creates a master raster with depth given by the organization of the @@ -191,6 +250,107 @@ def get_wrs2_features(path, row): return None +def all_rasters(image_directory, satellite=8): + ''' Recursively get all rasters in image_directory + and its subdirectories, and adds them to band_map. ''' + band_map = defaultdict(list) + for band in landsat_rasters()[satellite]: + band_map[band] = [] + for band in static_rasters(): + band_map[band] = [] + for band in climate_rasters(): + band_map[band] = [] + + extensions = (".tif", ".TIF") + for dirpath, dirnames, filenames in os.walk(image_directory): + for f in filenames: + if any(ext in f for ext in extensions): + for band in band_map: + if f.endswith(band): + band_map[band].append(os.path.join(dirpath, f)) + + for band in band_map: + band_map[band] = sorted(band_map[band]) # ensures ordering within bands - sort by time. + + return band_map + + +def raster_means(image_directory, satellite=8): + """ Gets all means of all images stored + in image_directory and its subdirectories. + Images end with (.tif, .TIF) + Image_directory in a typical case would + be project_root/image_data/train/. + This returns band_map, which is a dict of lists with + keys band names (B1, B2...) and values lists of + the locations of the rasters in the filesystem.""" + + outfile = os.path.join(image_directory, "mean_mapping.pkl") + if os.path.isfile(outfile): + with open(outfile, 'rb') as f: + mean_mapping = pickle.load(f) + return mean_mapping + + band_map = all_rasters(image_directory, satellite) + mean_mapping = {} + + for band in band_map: + mean, bnd = bandwise_mean(band_map[band], band) + mean_mapping[band] = mean + + with open(outfile, 'wb') as f: + pickle.dump(mean_mapping, f) + + return mean_mapping + + +def raster_stds(image_directory, mean_map, satellite=8): + + outfile = os.path.join(image_directory, "stddev_mapping.pkl") + if os.path.isfile(outfile): + with open(outfile, 'rb') as f: + stddev_mapping = pickle.load(f) + return stddev_mapping + + band_map = all_rasters(image_directory, satellite) # get all rasters + # in the image directory + stddev_mapping = {} + + for band in band_map.keys(): + std, bnd = bandwise_stddev(band_map[band], band, mean_map[band]) + stddev_mapping[band] = std + + with open(outfile, 'wb') as f: + pickle.dump(stddev_mapping, f) + + pprint('STDMAP') + pprint(stddev_mapping) + print("-------") + pprint('MEANMAP') + pprint(mean_map) + + return stddev_mapping + + +def create_all_master_rasters(image_directory, raster_save_directory, mean_mapping=None, + stddev_mapping=None): + """ Creates a master raster for all images in image_directory. + Image directory is assumed to be a top-level directory that contains + all the path_row directories for test or train (image_data/test/path_row_year*/) + Image directory is image_data/test/ in this case.""" + dirs = os.listdir(image_directory) + for sub_dir in dirs: + out = os.path.join(image_directory, sub_dir) + if os.path.isdir(out): + paths_map = all_rasters(out) + i = 0 + path = sub_dir[:2] + row = sub_dir[3:5] + year = sub_dir[-4:] + create_master_raster(paths_map, path, row, year, raster_save_directory, mean_mapping, + stddev_mapping) + + def clip_raster(evaluated, path, row, outfile=None): shp = gpd.read_file(WRS2) @@ -221,36 +381,6 @@ def load_raster(master_raster): return arr, meta -def get_class_weighting(training_directory, w0=15, sigma=2, threshold=0.7*15, n_classes=4): - ''' This function should return the correct number of pixels per class - to be used in weighting the classes. ''' - pixel_dict = {} - border_class = n_classes - for i in range(n_classes+1): - pixel_dict[i] = 0 - for f in os.listdir(training_directory): - if os.path.isdir(os.path.join(training_directory, f)): - for data in glob(os.path.join(training_directory, f, "*.pkl")): - with open(data, 'rb') as f: - d = load(f) - mask = d['class_mask'][0, :, :] - mask[mask != -1] = 1 # make the mask binary. - mask[mask == -1] = 0 # -1 is NO_DATA. - weights = weight_map(mask, w0=w0, sigma=sigma) # create weight map - labels = weights.copy() - labels[labels >= threshold] = border_class - labels[mask == 1] = d['class_code'] - pixel_dict[d['class_code']] += labels[labels == d['class_code']].size - pixel_dict[border_class] += labels[labels == border_class].size - - pd = count_all_pixels(training_directory) - out = {} - mx = max(pd.values()) - for key in pd: - out[key] = mx / pd[key] - return out - - def save_model_info(outfile, args): template = '{}={}|' with open(outfile, 'a') as f: diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 171e60f..31bf3ad 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -13,6 +13,7 @@ def weighted_loss(target, output): out = -tf.reduce_sum(target*output, len(output.get_shape())-1) return out + def weighted_loss_ce_and_dl(target, output): # Target: One hot encoding of segmentation mask. # Output: Output of network. In this case, log(softmax). @@ -28,6 +29,7 @@ def weighted_loss_ce_and_dl(target, output): out = -tf.reduce_sum(target*output, len(output.get_shape())-1) return final + out + def weighted_focal_loss(target, output, gamma=1): # L = a0 *(1-pt)^gamma * ce # Output of model is CE. @@ -48,7 +50,7 @@ def acc(y_true, y_pred): def lr_schedule(epoch): - lr = 1e-3 + lr = 1e-4 if epoch > 100: lr /= 64 if epoch > 45: @@ -64,12 +66,13 @@ def lr_schedule(epoch): print('Learning rate: ', lr) return lr + if __name__ == '__main__': - n_classes = 4 + n_classes = 5 input_shape = (None, None, 51) weight_shape = (None, None, n_classes) - filepath = './models/all_augmented_same_probability_80_irr_weight.h5' + filepath = './models/all_data.h5' # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', @@ -81,16 +84,17 @@ def lr_schedule(epoch): opt = tf.keras.optimizers.Adam() model.compile(opt, loss=weighted_loss, metrics=[acc]) #model.summary() #line_length argument - class_weights = {0:80, 1:1.0, 2:1.0, 3:50} - class_weights_valid = {0:1.0, 1:1.0, 2:1.0, 3:1.0} - classes_to_augment = {0:True, 1:True, 2:True, 3:False} + # irrigated, uncultivated, unirrigated, wetlands, border + # need to make a test set for this class set + class_weights = {0:50, 1:1.0, 2:25, 3:50, 4:50} + classes_to_augment = {0:True, 1:False, 2:False, 3:False, 4:False} batch_size = 3 generator = SatDataSequence('training_data/train/', batch_size=batch_size, class_weights=class_weights, classes_to_augment=classes_to_augment) valid_generator = SatDataSequence('training_data/test/', batch_size=batch_size, class_weights=class_weights) model.fit_generator(generator, - epochs=500, + epochs=100, validation_data=valid_generator, callbacks=[checkpoint, lr_scheduler, tensorboard], verbose=1) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 09c43fa..5ce8b76 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -64,105 +64,27 @@ def download_all_images(image_directory, shapefile_directory, year=2013): print("Done downloading images for {}. Make sure there were no 503 codes returned".format(shapefile_directory)) -def all_rasters(image_directory, satellite=8): - ''' Recursively get all rasters in image_directory - and its subdirectories, and adds them to band_map. ''' - band_map = defaultdict(list) - for band in landsat_rasters()[satellite]: - band_map[band] = [] - for band in static_rasters(): - band_map[band] = [] - for band in climate_rasters(): - band_map[band] = [] - - extensions = (".tif", ".TIF") - for dirpath, dirnames, filenames in os.walk(image_directory): - for f in filenames: - if any(ext in f for ext in extensions): - for band in band_map: - if f.endswith(band): - band_map[band].append(os.path.join(dirpath, f)) - - for band in band_map: - band_map[band] = sorted(band_map[band]) # ensures ordering within bands - sort by time. - - return band_map - - -def raster_means(image_directory, satellite=8): - """ Gets all means of all images stored - in image_directory and its subdirectories. - Images end with (.tif, .TIF) - Image_directory in a typical case would - be project_root/image_data/train/. - This returns band_map, which is a dict of lists with - keys band names (B1, B2...) and values lists of - the locations of the rasters in the filesystem.""" - - outfile = os.path.join(image_directory, "mean_mapping.pkl") - if os.path.isfile(outfile): - with open(outfile, 'rb') as f: - mean_mapping = pickle.load(f) - return mean_mapping - - band_map = all_rasters(image_directory, satellite) - mean_mapping = {} - - for band in band_map: - mean, bnd = bandwise_mean(band_map[band], band) - mean_mapping[band] = mean - - with open(outfile, 'wb') as f: - pickle.dump(mean_mapping, f) - - return mean_mapping - - -def raster_stds(image_directory, mean_map, satellite=8): - - outfile = os.path.join(image_directory, "stddev_mapping.pkl") - if os.path.isfile(outfile): - with open(outfile, 'rb') as f: - stddev_mapping = pickle.load(f) - return stddev_mapping - - band_map = all_rasters(image_directory, satellite) # get all rasters - # in the image directory - stddev_mapping = {} - - for band in band_map.keys(): - std, bnd = bandwise_stddev(band_map[band], band, mean_map[band]) - stddev_mapping[band] = std - - with open(outfile, 'wb') as f: - pickle.dump(stddev_mapping, f) - - pprint('STDMAP') - pprint(stddev_mapping) - print("-------") - pprint('MEANMAP') - pprint(mean_map) - - return stddev_mapping - - -def create_all_master_rasters(image_directory, raster_save_directory, mean_mapping=None, - stddev_mapping=None): - """ Creates a master raster for all images in image_directory. - Image directory is assumed to be a top-level directory that contains - all the path_row directories for test or train (image_data/test/path_row_year*/) - Image directory is image_data/test/ in this case.""" - dirs = os.listdir(image_directory) - for sub_dir in dirs: - out = os.path.join(image_directory, sub_dir) - if os.path.isdir(out): - paths_map = all_rasters(out) - i = 0 - path = sub_dir[:2] - row = sub_dir[3:5] - year = sub_dir[-4:] - create_master_raster(paths_map, path, row, year, raster_save_directory, mean_mapping, - stddev_mapping) +def assign_shapefile_class_code(shapefile): + if 'irrigated' in shapefile and 'unirrigated' not in shapefile: + return 0 + if 'unirrigated' in shapefile: + return 1 + if 'uncultivated' in shapefile: + return 2 + if 'wetlands' in shapefile: + return 3 + + +def assign_shapefile_year(shapefile): + if '2013' in shapefile: + return 2013 + if '2014' in shapefile: + return 2014 + if '2015' in shapefile: + return 2015 + if '2016' in shapefile: + return 2016 + return 2014 if __name__ == "__main__": @@ -188,6 +110,7 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi master_dirs = [master_train, master_test] shapefile_directory = 'shapefile_data/western_us/split_shapefiles/clipped_to_target_states/' i = 0 + ''' fs = [f for f in glob(shapefile_directory + "*.shp")] print(len(fs)) for f in glob(shapefile_directory + "*.shp"): @@ -205,30 +128,21 @@ def create_all_master_rasters(image_directory, raster_save_directory, mean_mappi print("{} of {} done.".format(i+1, len(fs))) i += 1 ''' - master_train = '/home/thomas/share/master_rasters/train/' - master_test = '/home/thomas/share/master_rasters/test/' image_train = '/home/thomas/share/image_data/train/' - image_test = '/home/thomas/share/image_data/test/' - irr1 = 'irrigated' - irr2 = 'uncultivated' - fallow = 'unirrigated' - forest = 'wetlands' - other = 'other' - target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:2} - augment_dict = {0:False, 1:False, 2:False, 3:True} - train_dir = 'training_data/train/' + image_test = '/home/thomas/share/image_data/train/' + train_dir = '/home/thomas/share/training_data/train/' shp_train = 'shapefile_data/train/' - save = True - pixel_dict = extract_training_data(target_dict, shp_train, image_train, - master_train, train_dir, save=save, augment_dict=augment_dict) + pixel_dict = extract_training_data(shp_train, image_train, + train_dir, assign_shapefile_year=assign_shapefile_year, + assign_shapefile_class_code=assign_shapefile_class_code) print("{} instances in each class.".format(pixel_dict)) max_weight = max(pixel_dict.values()) for key in pixel_dict: print(key, max_weight / pixel_dict[key]) - # tot = 0 - # test_dir = 'training_data/test/' - # shp_test = 'shapefile_data/test/' - # pixel_dict = extract_training_data(target_dict, shp_test, image_test, master_test, - # test_dir, save=save, augment_dict=augment_dict) - # print("And {} instances in each class.".format(pixel_dict)) - ''' + tot = 0 + test_dir = '/home/thomas/share/training_data/test/' + shp_test = 'shapefile_data/test/' + pixel_dict = extract_training_data(shp_test, image_test, + test_dir, assign_shapefile_year=assign_shapefile_year, + assign_shapefile_class_code=assign_shapefile_class_code) + print("And {} instances in each class.".format(pixel_dict)) From 7fdae3d32df9b64189f55699e98ebe85d324a9d5 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 2 Jun 2019 10:57:39 -0600 Subject: [PATCH 66/89] No more saving split shapefiles to disk, now all path/rows over shapefiles are kept in memory --- fully-conv-classification/data_generators.py | 92 +++++----------- fully-conv-classification/evaluate_image.py | 13 ++- fully-conv-classification/fully_conv.py | 32 +++--- fully-conv-classification/prepare_images.py | 5 +- .../runner_from_shapefile.py | 103 ++++++------------ fully-conv-classification/shapefile_utils.py | 53 ++++++--- 6 files changed, 122 insertions(+), 176 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 189706c..f899e57 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -3,78 +3,27 @@ import time import pickle import matplotlib.pyplot as plt -from glob import glob, iglob +import warnings +from glob import glob from random import sample, shuffle, choice from scipy.ndimage.morphology import distance_transform_edt -from runspec import mask_rasters -from data_utils import load_raster, paths_map, stack_rasters -from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile from rasterio import open as rasopen -from warnings import warn from skimage import transform -from scipy.ndimage.interpolation import shift from sat_image.warped_vrt import warp_single_image from tensorflow.keras.utils import Sequence +from runspec import mask_rasters +from data_utils import load_raster, paths_map, stack_rasters +from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile + def distance_map(mask): mask = mask.copy().astype(bool) mask = ~mask # make the non-masked areas masked distances = distance_transform_edt(mask) # ask where the closest masked pixel is - # distances are always positive, so 1-distances can be very negative. - # We're setting the e-folding time with sigma, and the - # border pixel value (y-intercept) with w0. return distances -def weight_map(mask, w0=10, sigma=10): - mask = mask.copy().astype(bool) - mask = ~mask # make the non-masked areas masked - distances = distance_transform_edt(mask) # ask where the closest masked pixel is - # distances are always positive, so 1-distances can be very negative. - # We're setting the e-folding time with sigma, and the - # border pixel value (y-intercept) with w0. - return w0*np.exp((1-distances) / sigma) - - -def random_sample(class_mask, n_instances, box_size=0, fill_value=1, nodata=0): - if box_size: - n_instances /= box_size - - out = np.where(class_mask != nodata) - class_mask = class_mask.copy() - try: - out_x = out[1] - out_y = out[2] - except IndexError as e: - out_x = out[0] - out_y = out[1] - - indices = np.random.choice(len(out_x), size=n_instances, replace=False) - out_x = out_x[indices] - out_y = out_y[indices] - - try: - class_mask[:, :, :] = nodata - if box_size == 0: - class_mask[0, out_x, out_y] = fill_value - else: - ofs = box_size // 2 - for x, y in zip(out_x, out_y): - class_mask[0, x-ofs:x+ofs+1, y-ofs:y+ofs+1] = fill_value - - except IndexError as e: - class_mask[:, :] = nodata - if box_size == 0: - class_mask[out_x, out_y] = fill_value - else: - ofs = box_size // 2 - for x, y in zip(out_x, out_y): - class_mask[x-ofs:x+ofs, y-ofs:y+ofs] = fill_value - - return class_mask - - class DataMask(object): def __init__(self, mask, class_code): @@ -106,10 +55,11 @@ def to_pickle(self, training_directory): def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): - ''' Fmasks are masks of clouds and water. We don't clouds/water in + ''' + ``Fmasks'' are masks of clouds and water. We don't want clouds/water in the training set, so this function gets all the fmasks for a landsat scene (contained in image_directory), and merges them into one raster. - They may not all be the same size, so warp_vrt is used to make them align. + They may not be the same size, so warp_vrt is used to make them align. ''' paths = [] for dirpath, dirnames, filenames in os.walk(image_directory): @@ -118,9 +68,9 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): if f.endswith(suffix): paths.append(os.path.join(dirpath, f)) for fmask_file in paths: - fmask, fmeta = load_raster(fmask_file) + fmask, _ = load_raster(fmask_file) try: - class_mask[fmask == 1] = nodata # 0 index is for removing the (1, n, m) dimension. + class_mask[fmask == 1] = nodata except (ValueError, IndexError) as e: fmask = warp_single_image(fmask_file, class_mask_geo) class_mask[fmask == 1] = nodata @@ -131,7 +81,7 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): def extract_training_data(shapefile_directory, image_directory, training_directory, save=True, tile_size=608, assign_shapefile_year=None, assign_shapefile_class_code=None, - min_pixels=50, fmask=True, n_classes=4, nodata=0, augment_dict={}): + min_pixels=500, fmask=True, n_classes=4, nodata=0, augment_dict={}): if isinstance(assign_shapefile_year, type(None)): raise ValueError("Please provide a function to assign shapefile year.") @@ -153,15 +103,21 @@ def extract_training_data(shapefile_directory, image_directory, p, r = get_shapefile_path_row(f) #TODO: error checking on this function. year = assign_shapefile_year(f) suffix = '{}_{}_{}'.format(p, r, year) + if not os.path.isdir(os.path.join(image_directory, suffix)): + # TODO: Figure out why the warning isn't working. + print("Images for {} not in given image directory ({}). Skipping extraction of data for following shapefiles: {}".format(suffix, image_directory, [os.path.basename(x) for x in all_matches])) + continue paths_mapping = paths_map(os.path.join(image_directory, suffix)) try: - master = stack_rasters(paths_mapping, p, r, year) + master = stack_rasters(paths_mapping, p, r, year) #todo; error check empty + # paths_mapping except Exception as e: print(e) print("Bad image data in", suffix) continue - mask_file = paths_mapping['B1.TIF'][0] + mask_file = paths_mapping['B1.TIF'][0] #TODO: this shouldn't be hardcoded. masks = [] + # TODO: Only warp fmasks/load them into memory once. for match in all_matches: cc = assign_shapefile_class_code(match) if cc is None: @@ -178,7 +134,7 @@ def extract_training_data(shapefile_directory, image_directory, pixel_dict = _iterate_over_raster(master, masks, pixel_dict, tile_size=tile_size, save=save, min_pixels=min_pixels, training_directory=training_directory) - print("{} of {} shapefiles done. ".format(len(done), len(all_shapefiles))) + print("{} of {} shapefiles done. ".format(len(done), len(all_shapefiles))) return pixel_dict @@ -276,7 +232,8 @@ def __len__(self): def on_epoch_end(self): self._create_file_list() - self.shuffled = sample(self.file_list, self.n_files) + shuffle(self.file_list) + self.shuffled = self.file_list def __getitem__(self, idx): @@ -317,6 +274,7 @@ def _preprocess_input_data(data_tiles, class_weights, classes_to_augment=None, b border_width=border_width) one_hot[:, :, border_class] = border_labels weights[:][border_labels[0] == 1] = class_weights[border_class] + feature_tile = np.squeeze(tile['data']) feature_tile = np.swapaxes(feature_tile, 0, 2) # This is necessary b/c tf expects columns_last (GeoTiffs are columns first). feature_tile = np.swapaxes(feature_tile, 0, 1) @@ -377,7 +335,7 @@ def _do_nothing(feature_tile, one_hot, weights): def _augment_data(feature_tile, one_hot, weights): ''' Applies rotation | lr | ud | lr_ud | flipping, or doesn't. ''' - possible_augments = [_rotate, _flip_ud, _flip_lr, _flip_lr_ud, _do_nothing] + possible_augments = [_flip_ud, _flip_lr, _flip_lr_ud, _do_nothing] return choice(possible_augments)(feature_tile, one_hot, weights) diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 8b4b496..0b5698f 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -40,7 +40,7 @@ def evaluate_image_many_shot(master_raster, model, num_classes=4, n_overlaps=4, soft = np.swapaxes(soft, 1, 2) out[j:j+chunk_size, i:i+chunk_size, :] += soft[0] - stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k / overlap_step, n_overlaps, i / master.shape[1])) + stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k // overlap_step + 1, n_overlaps, i / master.shape[1])) out = np.swapaxes(out, 0, 2) out = out.astype(np.float32) @@ -85,12 +85,13 @@ def evaluate_image_one_shot(master_raster, model, num_classes=4, outfile=None, i if __name__ == '__main__': master_raster_t = '/home/thomas/share/master_rasters/test/master_raster_37_28_2013.tif' master_raster = '/home/thomas/share/master_rasters/train/master_raster_39_27_2013.tif' - model_name = 'all_classes_augmented_flips_rotations_90_irr_weight.h5' + n_classes = 5 + model_name = 'augmentation_irr_and_wetlands_no_class_weights.h5' model = load_model("models/" + model_name, custom_objects={'tf':tf, '_epsilon':_epsilon, 'weighted_loss':weighted_loss}) outfile = 'compare_model_outputs/new-feed-method/{}_37_28.tif'.format(model_name[:-3]) - evaluate_image_many_shot(master_raster_t, model, outfile=outfile, num_classes=4) - #evaluate_image_one_shot(master_raster_t, model, outfile=outfile, num_classes=4) + evaluate_image_many_shot(master_raster_t, model, outfile=outfile, num_classes=n_classes) + #evaluate_image_one_shot(master_raster_t, model, outfile=outfile, num_classes=n_classes) outfile = 'compare_model_outputs/new-feed-method/{}_39_27.tif'.format(model_name[:-3]) - evaluate_image_many_shot(master_raster, model, outfile=outfile, num_classes=4) - #evaluate_image_one_shot(master_raster, model, outfile=outfile, num_classes=4) + evaluate_image_many_shot(master_raster, model, outfile=outfile, num_classes=n_classes) + #evaluate_image_one_shot(master_raster, model, outfile=outfile, num_classes=n_classes) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 31bf3ad..9f7aebf 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -5,7 +5,7 @@ import tensorflow as tf import numpy as np from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) -from data_generators import generate_unbalanced_data, SatDataSequence +from data_generators import SatDataSequence from models import unet_same_padding @@ -51,11 +51,15 @@ def acc(y_true, y_pred): def lr_schedule(epoch): lr = 1e-4 - if epoch > 100: + if epoch > 150: + lr /= 256 + elif epoch > 100: + lr /= 128 + elif epoch > 50: lr /= 64 - if epoch > 45: - lr /= 32. elif epoch > 30: + lr /= 32. + elif epoch > 25: lr /= 16. elif epoch > 20: lr /= 8. @@ -72,7 +76,7 @@ def lr_schedule(epoch): n_classes = 5 input_shape = (None, None, 51) weight_shape = (None, None, n_classes) - filepath = './models/all_data.h5' + filepath = './models/augmentation_irr_and_wetlands_no_class_weights.h5' # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', @@ -85,16 +89,18 @@ def lr_schedule(epoch): model.compile(opt, loss=weighted_loss, metrics=[acc]) #model.summary() #line_length argument # irrigated, uncultivated, unirrigated, wetlands, border - # need to make a test set for this class set - class_weights = {0:50, 1:1.0, 2:25, 3:50, 4:50} - classes_to_augment = {0:True, 1:False, 2:False, 3:False, 4:False} + class_weights = {0:1.0, 1:1.0, 2:1.0, 3:1.0, 4:1.0} + classes_to_augment = {0:True, 1:False, 2:False, 3:True, 4:True} batch_size = 3 - generator = SatDataSequence('training_data/train/', batch_size=batch_size, + generator = SatDataSequence('/home/thomas/share/training_data/train/', batch_size=batch_size, class_weights=class_weights, classes_to_augment=classes_to_augment) - valid_generator = SatDataSequence('training_data/test/', batch_size=batch_size, - class_weights=class_weights) + valid_generator = SatDataSequence('/home/thomas/share/training_data/test/', + batch_size=batch_size, class_weights=class_weights) model.fit_generator(generator, - epochs=100, + epochs=1000, + callbacks=[lr_scheduler, checkpoint, tensorboard], + use_multiprocessing=True, validation_data=valid_generator, - callbacks=[checkpoint, lr_scheduler, tensorboard], + workers=12, + max_queue_size=20, verbose=1) diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index 3d5fe7c..d57a7ed 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -105,8 +105,9 @@ def build_training(self): self.paths_map, self.masks = self._order_images() def build_evaluating(self): + # Multiprocessing on this may not be plausible. self.get_landsat(fmask=True) - self.profile = self.landsat.rasterio_geometry + self.profile = self.landsat.rasterio_geometry # fix this? #self.get_et() This doesn't work reliably. self.get_climate_timeseries() self.get_terrain() @@ -207,7 +208,7 @@ def get_climate_timeseries(self): print("Saving {}".format(outfile)) out_final = gm.conform(out_arr) gm.save_raster(out_final, self.landsat.rasterio_geometry, outfile) - rmtree(gm.temp_dir) + rmtree(gm.temp_dir) def get_terrain(self): diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 5ce8b76..8db2020 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -2,12 +2,14 @@ import pickle from glob import glob from pprint import pprint +import time from numpy import save as nsave from fiona import open as fopen from collections import defaultdict, OrderedDict +from random import choice from shapely.geometry import shape -from data_utils import download_images, create_master_raster, bandwise_mean, bandwise_stddev -from shapefile_utils import get_shapefile_path_row, split_shapefile, filter_shapefile +from data_utils import download_images +from shapefile_utils import filter_shapefile_overlapping from runspec import landsat_rasters, static_rasters, climate_rasters from data_generators import extract_training_data @@ -31,7 +33,7 @@ def download_images_over_shapefile(shapefile, image_directory, year): return ims -def download_from_pr(p, r, image_directory, year, master_raster_directory): +def download_from_pr(p, r, year, image_directory): '''Downloads p/r corresponding to the location of the shapefile, and creates master raster''' suff = str(p) + '_' + str(r) + "_" + str(year) @@ -44,26 +46,9 @@ def download_from_pr(p, r, image_directory, year, master_raster_directory): ims = download_images(landsat_dir, p, r, year, satellite) else: ims = download_images(landsat_dir, p, r, year, satellite) - return ims -def download_all_images(image_directory, shapefile_directory, year=2013): - ''' Downloads all images over each shapefile in - shapefile directory, and places them in image_directory.''' - template = "{}_{}_{}" - done = set() - satellite = 8 - all_paths = [] - for f in glob(os.path.join(shapefile_directory, "*.shp")): - p, r = get_shapefile_path_row(f) - t = template.format(p, r, year) - if t not in done: - done.add(t) - ims = download_images_over_shapefile(f, image_directory, year) - print("Done downloading images for {}. Make sure there were no 503 codes returned".format(shapefile_directory)) - - def assign_shapefile_class_code(shapefile): if 'irrigated' in shapefile and 'unirrigated' not in shapefile: return 0 @@ -76,6 +61,8 @@ def assign_shapefile_class_code(shapefile): def assign_shapefile_year(shapefile): + # get the shapefile name, not the whole path. + shapefile = os.path.basename(shapefile) if '2013' in shapefile: return 2013 if '2014' in shapefile: @@ -84,65 +71,37 @@ def assign_shapefile_year(shapefile): return 2015 if '2016' in shapefile: return 2016 + if '2017' in shapefile: + return 2017 return 2014 + if __name__ == "__main__": # out_shapefile_directory = 'shapefile_data' # shp = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/" # This project is becoming more complicated. # Needs a test / train organization - # 1. Filter shapefiles. + # 1. Filter shapefiles. Can I fix this? Yes. Online splitting. # 2. Download images over shapefiles - # 3. Get all means/stddevs - # 4. Create master rasters - # 5. Extract training data - # 6. Train network. + # 3. Extract training data + # 4. Train network. + + # Need to download images. - image_train_directory = '/home/thomas/share/image_data/train/' - image_test_directory = '/home/thomas/share/image_data/test/' - image_dirs = [image_train_directory, image_test_directory] - shp_train = 'shapefile_data/train/' - shp_test = 'shapefile_data/test/' - shp_dirs = [shp_train, shp_test] - master_train = '/home/thomas/share/master_rasters/train/' - master_test = '/home/thomas/share/master_rasters/test' - master_dirs = [master_train, master_test] - shapefile_directory = 'shapefile_data/western_us/split_shapefiles/clipped_to_target_states/' - i = 0 - ''' - fs = [f for f in glob(shapefile_directory + "*.shp")] - print(len(fs)) - for f in glob(shapefile_directory + "*.shp"): - if '2013' in f: - year = 2013 - elif '2014' in f: - year = 2014 - elif '2015' in f: - year = 2015 - elif '2016' in f: - year = 2016 - else: - year = 2014 - download_images_over_shapefile(f, image_train_directory, year) - print("{} of {} done.".format(i+1, len(fs))) - i += 1 - ''' - image_train = '/home/thomas/share/image_data/train/' - image_test = '/home/thomas/share/image_data/train/' - train_dir = '/home/thomas/share/training_data/train/' - shp_train = 'shapefile_data/train/' - pixel_dict = extract_training_data(shp_train, image_train, - train_dir, assign_shapefile_year=assign_shapefile_year, - assign_shapefile_class_code=assign_shapefile_class_code) - print("{} instances in each class.".format(pixel_dict)) - max_weight = max(pixel_dict.values()) - for key in pixel_dict: - print(key, max_weight / pixel_dict[key]) - tot = 0 - test_dir = '/home/thomas/share/training_data/test/' - shp_test = 'shapefile_data/test/' - pixel_dict = extract_training_data(shp_test, image_test, - test_dir, assign_shapefile_year=assign_shapefile_year, - assign_shapefile_class_code=assign_shapefile_class_code) - print("And {} instances in each class.".format(pixel_dict)) + shapefile_directory = '/home/thomas/IrrigationGIS/western_states_irrgis/reprojected_western_gis/post-2013' + image_directory = '/home/thomas/share/image_data/train/' + shapefiles = [f for f in glob(os.path.join(shapefile_directory, "*.shp"))] + for f in shapefiles: + print("Downloading images for {}".format(f)) + path_row_map = filter_shapefile_overlapping(f) + year = assign_shapefile_year(f) + for path_row in path_row_map: + path = int(path_row[0:2]) + row = int(path_row[-2:]) + try: + print(path, row, year) + download_from_pr(path, row, year, image_directory) + except Exception as e: + print(e) + time.sleep(3) diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index ba13d44..3ea2841 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -17,6 +17,26 @@ def get_features(gdf): return features +def mask_raster_to_shapefile(shapefile, raster, return_binary=True): + ''' Generates a mask with 1 everywhere + shapefile data is present and a no_data value everywhere else. + no_data is -1 in this case, as it is never a valid class label. + Switching coordinate reference systems is important here, or + else the masking won't work. + ''' + shp = gpd.read_file(shapefile) + shp = shp[shp.geometry.notnull()] + with rasopen(raster, 'r') as src: + shp = shp.to_crs(src.crs) + features = get_features(shp) + arr = src.read() + out_image, out_transform = mask(src, shapes=features) + if return_binary: + out_image[out_image != 0] = 1 + meta = src.meta + return out_image, meta + + def generate_class_mask(shapefile, master_raster, nodata=-1): ''' Generates a mask with 1 everywhere shapefile data is present and a no_data value everywhere else. @@ -46,7 +66,7 @@ def get_shapefile_lat_lon(shapefile): return latc, lonc -def construct_kdtree(wrs2): +def _construct_kdtree(wrs2): centroids = [] path_rows = [] # a mapping features = [] @@ -90,16 +110,15 @@ def get_pr_subset(poly, tiles): return ls -def filter_shapefile(shapefile, out_directory): +def filter_shapefile_overlapping(shapefile, save=False, out_directory=None): """ Shapefiles may span multiple path/rows/years. For training, we want all of the data available. This function filters the polygons contained in the shapefile into separate files for each path/row/year contained in the shapefile. """ - # Problem: Not every polygon has a year attribute. - path_row_year_map = defaultdict(list) + path_row_map = defaultdict(list) wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') - tree, path_rows, features = construct_kdtree(wrs2) + tree, path_rows, features = _construct_kdtree(wrs2) wrs2.close() cent_arr = array([0, 0]) @@ -116,20 +135,23 @@ def filter_shapefile(shapefile, out_directory): prs = get_pr_subset(poly, tiles) # gets the matching path/rows for p in prs: - path_row_year_map[p].append(feat) + path_row_map[p].append(feat) + + if not save: + return path_row_map outfile = os.path.basename(shapefile) outfile = os.path.splitext(outfile)[0] - for path_row_year in path_row_year_map: - out = outfile + path_row_year + ".shp" + for path_row in path_row_map: + out = outfile + path_row + ".shp" with fopen(os.path.join(out_directory, out), 'w', **meta) as dst: print("Saving {}".format(out)) - for feat in path_row_year_map[path_row_year]: + for feat in path_row_map[path_row]: dst.write(feat) -def split_shapefile(base, base_shapefile, data_directory): +def filter_shapefile_non_overlapping(base, base_shapefile, data_directory): """ Shapefiles may deal with data over multiple path/rows. This is a method to get the minimum number of @@ -140,7 +162,7 @@ def split_shapefile(base, base_shapefile, data_directory): id_mapping = {} # TODO: un hardcode this directory. wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') - tree, path_rows, features = construct_kdtree(wrs2) + tree, path_rows, features = _construct_kdtree(wrs2) wrs2.close() cent_arr = array([0, 0]) @@ -248,7 +270,6 @@ def required_points(shapefile, total_area, total_instances): def buffer_shapefile(shp): - buf = -0.00050 with fopen(shp, 'r') as polys: out = [] @@ -259,8 +280,8 @@ def buffer_shapefile(shp): dst.write(feat) if __name__ == '__main__': - from glob import glob - out_dir = 'shapefile_data/western_us/split_shapefiles/' - for f in glob("/home/thomas/IrrigationGIS/western_states_irrgis/western_gis_backup/non-irrigated-reprojected/" + '*.shp'): - filter_shapefile(f, out_dir) + out_dir = '/home/thomas/IrrigationGIS/UT_CO_MT_WY_split/' + for f in glob('/home/thomas/IrrigationGIS/UT_CO_MT_WY/' + "*.shp"): + if 'unirrigated' in f: + filter_shapefile(f, out_dir) From 46335806d4e2f154e3551e5fc889d38b536657ec Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sat, 8 Jun 2019 15:54:47 -0600 Subject: [PATCH 67/89] Added a clip vector function --- fully-conv-classification/shapefile_utils.py | 43 ++++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 3ea2841..11ffb4b 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -110,6 +110,35 @@ def get_pr_subset(poly, tiles): return ls +def clip_shapefile_to_geometry(shapefile, clip_shapefile, out_filename, outside_filename, + out_directory): + """ Mask a shapefile with another shapefile.""" + + intersection = [] + outside = [] + with fopen(shapefile, "r") as src: + meta = deepcopy(src.meta) + with fopen(clip_shapefile) as clip_with: + for feat in src: + poly = shape(feat['geometry']) + for clip_feat in clip_with: + clip_poly = shape(clip_feat['geometry']) + if poly.within(clip_poly): + intersection.append(feat) + else: + outside.append(feat) + + with fopen(os.path.join(out_directory, out_filename), 'w', **meta) as dst: + print("Saving {}".format(out_filename)) + for feat in intersection: + dst.write(feat) + + with fopen(os.path.join(out_directory, outside_filename), 'w', **meta) as dst: + print("Saving {}".format(outside_filename)) + for feat in outside: + dst.write(feat) + + def filter_shapefile_overlapping(shapefile, save=False, out_directory=None): """ Shapefiles may span multiple path/rows/years. For training, we want all of the data available. @@ -281,7 +310,13 @@ def buffer_shapefile(shp): if __name__ == '__main__': from glob import glob - out_dir = '/home/thomas/IrrigationGIS/UT_CO_MT_WY_split/' - for f in glob('/home/thomas/IrrigationGIS/UT_CO_MT_WY/' + "*.shp"): - if 'unirrigated' in f: - filter_shapefile(f, out_dir) + + pth = '/home/thomas/IrrigationGIS/western_states_irrgis/reprojected_western_gis/post-2013' + wrs2 = '/home/thomas/IrrMapper/spatial_data/wrs2_descending_test_path_rows.shp' + for f in glob(pth + "/*.shp"): + out_directory = 'shapefile_data/' + filename, _ = os.path.splitext(f) + filename = os.path.basename(filename) + test_file = filename + "_test.shp" + train_file = filename + "_train.shp" + clip_shapefile_to_geometry(f, wrs2, test_file, train_file, out_directory) From dc671909d0314651f1d568fc4ccac978dab5271c Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 25 Jun 2019 12:48:07 -0600 Subject: [PATCH 68/89] Evaluate_image no longer requires a master raster'' instead stacks rasters in memory --- fully-conv-classification/evaluate_image.py | 107 +++++++----------- .../runner_from_shapefile.py | 74 +++++++++--- fully-conv-classification/shapefile_utils.py | 2 +- 3 files changed, 105 insertions(+), 78 deletions(-) diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 0b5698f..745061c 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -1,7 +1,7 @@ import os import numpy as np import tensorflow as tf -from data_utils import clip_rasters, save_raster +from data_utils import clip_rasters, save_raster, stack_rasters, paths_map from sys import stdout from tensorflow.keras.models import load_model from data_generators import load_raster @@ -11,57 +11,29 @@ _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) -def evaluate_image_many_shot(master_raster, model, num_classes=4, n_overlaps=4, outfile=None, ii=None): +def evaluate_image_many_shot(path, row, year, image_directory, model, num_classes=4, n_overlaps=4, outfile=None, ii=None): ''' To recover from same padding, slide many different patches over the image. ''' - - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive handling of this case. - else: - master, meta = load_raster(master_raster) - class_mask = np.ones((master.shape[1], master.shape[2], num_classes)) # Just a placeholder - out = np.zeros((master.shape[2], master.shape[1], num_classes)) - chunk_size = 608 - diff = 608 - stride = 608 - overlap_step = 10 - for k in range(0, n_overlaps*overlap_step, overlap_step): - for i in range(k, master.shape[1]-diff, stride): - for j in range(k, master.shape[2]-diff, stride): - sub_master = master[:, i:i+chunk_size, j:j+chunk_size] - sub_mask = class_mask[i:i+chunk_size, j:j+chunk_size, :] - sub_master = np.swapaxes(sub_master, 0, 2) - sub_master = np.swapaxes(sub_master, 0, 1) - sub_master = np.expand_dims(sub_master, 0) - sub_mask = np.expand_dims(sub_mask, 0) - preds = model.predict([sub_master, sub_mask]) - preds = np.exp(preds) - soft = preds / np.sum(preds, axis=-1, keepdims=True) - soft = np.swapaxes(soft, 1, 2) - out[j:j+chunk_size, i:i+chunk_size, :] += soft[0] - - stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k // overlap_step + 1, n_overlaps, i / master.shape[1])) - - out = np.swapaxes(out, 0, 2) - out = out.astype(np.float32) - if outfile: - save_raster(out, outfile, meta, count=num_classes) - return out - -def evaluate_image_one_shot(master_raster, model, num_classes=4, outfile=None, ii=None): - - if not os.path.isfile(master_raster): - print("Master raster not created for {}".format(suffix)) - # TODO: More extensive handling of this case. - else: - master, meta = load_raster(master_raster) - class_mask = np.ones((master.shape[1], master.shape[2], num_classes)) # Just a placeholder - out = np.zeros((master.shape[2], master.shape[1], num_classes)) - chunk_size = 608 - diff = 608 - stride = 608 - for i in range(0, master.shape[1]-diff, stride): - for j in range(0, master.shape[2]-diff, stride): + suffix = '{}_{}_{}'.format(path, row, year) + image_path = os.path.join(image_directory, suffix) + if not os.path.isdir(image_path): + print('Images not downloaded for {}'.format(image_path)) + return + paths_mapping = paths_map(image_path) + try: + _, meta = load_raster(paths_mapping['B1.TIF'][0]) + master = stack_rasters(paths_mapping, path, row, year) + except Exception as e: + print(e) + return + class_mask = np.ones((master.shape[1], master.shape[2], num_classes)) # Just a placeholder + out = np.zeros((master.shape[2], master.shape[1], num_classes)) + chunk_size = 608 + diff = 608 + stride = 608 + overlap_step = 10 + for k in range(0, n_overlaps*overlap_step, overlap_step): + for i in range(k, master.shape[1]-diff, stride): + for j in range(k, master.shape[2]-diff, stride): sub_master = master[:, i:i+chunk_size, j:j+chunk_size] sub_mask = class_mask[i:i+chunk_size, j:j+chunk_size, :] sub_master = np.swapaxes(sub_master, 0, 2) @@ -72,26 +44,35 @@ def evaluate_image_one_shot(master_raster, model, num_classes=4, outfile=None, i preds = np.exp(preds) soft = preds / np.sum(preds, axis=-1, keepdims=True) soft = np.swapaxes(soft, 1, 2) - out[j:j+chunk_size, i:i+chunk_size, :] = soft - - stdout.write("N eval: {}. Percent done: {:.2f}\r".format(ii, i / master.shape[1])) - + out[j:j+chunk_size, i:i+chunk_size, :] += soft[0] + stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k // overlap_step + 1, n_overlaps, i / master.shape[1])) out = np.swapaxes(out, 0, 2) out = out.astype(np.float32) + meta.update(dtype=np.float32) + out /= n_overlaps if outfile: save_raster(out, outfile, meta, count=num_classes) return out if __name__ == '__main__': - master_raster_t = '/home/thomas/share/master_rasters/test/master_raster_37_28_2013.tif' - master_raster = '/home/thomas/share/master_rasters/train/master_raster_39_27_2013.tif' + mt_path = [42, 41, 40, 39, 38, 37, 36, 35, 42, 41, 40, 39, 38, 37, 36, 35, 41, 40, 39, 38, 37, + 36, 35, 34, 40, 39, 38, 37, 36, 35, 34] + mt_row = [26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, + 28, 28, 28, 29, 29, 29, 29, 29, 29, 29] + years = [2013, 2014, 2015, 2016, 2017, 2018, 2019] n_classes = 5 model_name = 'augmentation_irr_and_wetlands_no_class_weights.h5' + image_directory = '/home/thomas/share/image_data/train/' + save_directory = '/home/thomas/share/evaluated_mt/' model = load_model("models/" + model_name, custom_objects={'tf':tf, '_epsilon':_epsilon, 'weighted_loss':weighted_loss}) - outfile = 'compare_model_outputs/new-feed-method/{}_37_28.tif'.format(model_name[:-3]) - evaluate_image_many_shot(master_raster_t, model, outfile=outfile, num_classes=n_classes) - #evaluate_image_one_shot(master_raster_t, model, outfile=outfile, num_classes=n_classes) - outfile = 'compare_model_outputs/new-feed-method/{}_39_27.tif'.format(model_name[:-3]) - evaluate_image_many_shot(master_raster, model, outfile=outfile, num_classes=n_classes) - #evaluate_image_one_shot(master_raster, model, outfile=outfile, num_classes=n_classes) + for year in years: + for path, row in zip(mt_path, mt_row): + print("Evaluating", path, row, year) + suffix = 'irr_{}_{}_{}.tif'.format(path, row, year) + outfile = os.path.join(save_directory, suffix) + if not os.path.isfile(outfile): + evaluate_image_many_shot(path, row, year, image_directory, + model, outfile=outfile, num_classes=n_classes) + else: + print("Image {} already exists.".format(outfile)) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 8db2020..618fca2 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -5,6 +5,7 @@ import time from numpy import save as nsave from fiona import open as fopen +from rasterio.errors import RasterioIOError from collections import defaultdict, OrderedDict from random import choice from shapely.geometry import shape @@ -86,22 +87,67 @@ def assign_shapefile_year(shapefile): # 2. Download images over shapefiles # 3. Extract training data # 4. Train network. - # Need to download images. + mt_path = [42, 41, 40, 39, 38, 37, 36, 35, 42, 41, 40, 39, 38, 37, 36, 35, 41, 40, 39, 38, 37, + 36, 35, 34, 40, 39, 38, 37, 36, 35, 34] + mt_row = [26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, + 28, 28, 28, 29, 29, 29, 29, 29, 29, 29] + years = [2013, 2014, 2015, 2016, 2017, 2018, 2019] shapefile_directory = '/home/thomas/IrrigationGIS/western_states_irrgis/reprojected_western_gis/post-2013' + print(len(mt_path), len(mt_row)) image_directory = '/home/thomas/share/image_data/train/' + for year in years: + for path, row in zip(mt_path, mt_row): + for attempt in range(60): + try: + print(path, row, year) + download_from_pr(path, row, year, image_directory) + break + except Exception as e: + print("EE_------------------------------------------") + print(e) + if type(e) == RasterioIOError: + print(e) + string = e.args[0] + first = string.find('/') + end = string[first:] + end = end[:end.find(',')] + os.remove(end) + time.sleep(2) + print("EE_------------------------------------------") + + shapefiles = [f for f in glob(os.path.join(shapefile_directory, "*.shp"))] - for f in shapefiles: - print("Downloading images for {}".format(f)) - path_row_map = filter_shapefile_overlapping(f) - year = assign_shapefile_year(f) - for path_row in path_row_map: - path = int(path_row[0:2]) - row = int(path_row[-2:]) - try: - print(path, row, year) - download_from_pr(path, row, year, image_directory) - except Exception as e: - print(e) - time.sleep(3) + + + +# for f in shapefiles: +# print("Downloading images for {}".format(f)) +# path_row_map = filter_shapefile_overlapping(f) +# year = assign_shapefile_year(f) +# for i, path_row in enumerate(path_row_map): +# path, row = path_row.split('_') +# for attempt in range(60): +# try: +# print(path, row, year, "{} of {} path/rows done.".format(i+1, len(path_row_map))) +# download_from_pr(path, row, year, image_directory) +# break +# except Exception as e: +# if type(e) == RasterioIOError: +# print(e) +# string = e.args[0] +# first = string.find('/') +# end = string[first:] +# end = end[:end.find(',')] +# os.remove(end) + + + + + + + + + + diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 11ffb4b..83e11d2 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -123,7 +123,7 @@ def clip_shapefile_to_geometry(shapefile, clip_shapefile, out_filename, outside_ poly = shape(feat['geometry']) for clip_feat in clip_with: clip_poly = shape(clip_feat['geometry']) - if poly.within(clip_poly): + if clip_poly.contains(poly): intersection.append(feat) else: outside.append(feat) From e7771ecec0f0c56d056ca13e1fc052bf88391ea8 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Thu, 18 Jul 2019 13:40:01 -0600 Subject: [PATCH 69/89] Commit before refactor of extraction of training data --- fully-conv-classification/data_utils.py | 12 --- fully-conv-classification/evaluate_image.py | 79 ++++++++++++++----- .../runner_from_shapefile.py | 10 --- 3 files changed, 59 insertions(+), 42 deletions(-) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 4613ed0..98d1e6f 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -228,17 +228,6 @@ def download_images(project_directory, path, row, year, satellite=8, n_landsat=3 return image_stack -def clip_rasters(evaluated_tif_dir, include_string): - for f in glob(os.path.join(evaluated_tif_dir, "*.tif")): - if include_string in f: - out = os.path.basename(f) - out = out[out.find("_")+1:] - out = out[out.find("_")+1:] - out = out[out.find("_")+1:] - path = out[:2] - row = out[3:5] - clip_raster(f, int(path), int(row), outfile=f) - def get_wrs2_features(path, row): with fopen(WRS2) as src: @@ -357,7 +346,6 @@ def clip_raster(evaluated, path, row, outfile=None): out = shp[shp['PATH'] == path] out = out[out['ROW'] == row] - with rasopen(evaluated, 'r') as src: out = out.to_crs(src.crs) meta = src.meta.copy() diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 745061c..633962c 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -1,16 +1,35 @@ import os import numpy as np +import matplotlib.pyplot as plt +import keras.backend as K import tensorflow as tf -from data_utils import clip_rasters, save_raster, stack_rasters, paths_map +import pdb from sys import stdout from tensorflow.keras.models import load_model -from data_generators import load_raster -import matplotlib.pyplot -import keras.backend as K +from glob import glob +from rasterio.errors import RasterioIOError + +from data_utils import save_raster, stack_rasters, paths_map, load_raster, clip_raster from fully_conv import weighted_loss, weighted_focal_loss +from data_generators import concatenate_fmasks + _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) +def fmask_evaluated_image(evaluated_image, path, row, year, landsat_directory): + image, meta = load_raster(evaluated_image) + suffix = str(path) + '_' + str(row) + '_' + str(year) + image_subdirectory = os.path.join(landsat_directory, suffix) + temp_mask = np.expand_dims(np.zeros_like(image)[0], 0) + meta.update(count=1) + masked_image = concatenate_fmasks(image_subdirectory, temp_mask, meta, nodata=1) + for i in range(image.shape[0]): + image[i, :, :][masked_image[0]==1] = np.nan + meta.update(count=image.shape[0]) + meta.update(nodata=np.nan) + return image, meta + + def evaluate_image_many_shot(path, row, year, image_directory, model, num_classes=4, n_overlaps=4, outfile=None, ii=None): ''' To recover from same padding, slide many different patches over the image. ''' suffix = '{}_{}_{}'.format(path, row, year) @@ -60,19 +79,39 @@ def evaluate_image_many_shot(path, row, year, image_directory, model, num_classe mt_row = [26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29] years = [2013, 2014, 2015, 2016, 2017, 2018, 2019] - n_classes = 5 - model_name = 'augmentation_irr_and_wetlands_no_class_weights.h5' - image_directory = '/home/thomas/share/image_data/train/' - save_directory = '/home/thomas/share/evaluated_mt/' - model = load_model("models/" + model_name, custom_objects={'tf':tf, '_epsilon':_epsilon, - 'weighted_loss':weighted_loss}) - for year in years: - for path, row in zip(mt_path, mt_row): - print("Evaluating", path, row, year) - suffix = 'irr_{}_{}_{}.tif'.format(path, row, year) - outfile = os.path.join(save_directory, suffix) - if not os.path.isfile(outfile): - evaluate_image_many_shot(path, row, year, image_directory, - model, outfile=outfile, num_classes=n_classes) - else: - print("Image {} already exists.".format(outfile)) + + landsat_directory = '/home/thomas/share/image_data/train/' + save_directory = '/home/thomas/share/fmask_evaluated_mt/' + for f in glob("/home/thomas/share/evaluated_mt/" + "*.tif"): + _, path, row, year = os.path.basename(f).split('_') + year = year[:-4] + outfile = os.path.join(save_directory, os.path.basename(f)) + if not os.path.isfile(outfile): + print(path, row, year) + try: + out, meta = fmask_evaluated_image(f, path, row, year, landsat_directory) + save_raster(out, outfile, meta) + clip_raster(outfile, int(path), int(row)) + except RasterioIOError as e: + print(e) + + # n_classes = 5 + # model_name = 'augmentation_irr_and_wetlands_no_class_weights.h5' + # image_directory = '/home/thomas/share/image_data/train/' + # save_directory = '/home/thomas/share/evaluated_mt/' + # model = load_model("models/" + model_name, custom_objects={'tf':tf, '_epsilon':_epsilon, + # 'weighted_loss':weighted_loss}) + # for year in years: + # for path, row in zip(mt_path, mt_row): + # print("Evaluating", path, row, year) + # suffix = 'irr_{}_{}_{}.tif'.format(path, row, year) + # outfile = os.path.join(save_directory, suffix) + # if not os.path.isfile(outfile): + # try: + # evaluate_image_many_shot(path, row, year, image_directory, + # model, outfile=outfile, num_classes=n_classes) + # except Exception as e: + # print(e) + # continue + # else: + # print("Image {} already exists.".format(outfile)) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 618fca2..0e9a7af 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -141,13 +141,3 @@ def assign_shapefile_year(shapefile): # end = string[first:] # end = end[:end.find(',')] # os.remove(end) - - - - - - - - - - From e5476b6cbaf999a8ecd7d2c3368919f3acf1cdbf Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 26 Jul 2019 12:36:39 -0600 Subject: [PATCH 70/89] Train/test over mt --- fully-conv-classification/data_generators.py | 188 ++++++++- fully-conv-classification/data_utils.py | 365 +++++++----------- fully-conv-classification/fully_conv.py | 4 +- .../runner_from_shapefile.py | 134 ++----- fully-conv-classification/shapefile_utils.py | 50 ++- 5 files changed, 379 insertions(+), 362 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index f899e57..fb998a4 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -1,20 +1,26 @@ import numpy as np +import numpy.ma as ma import os import time import pickle -import matplotlib.pyplot as plt +from matplotlib.pyplot import imshow, show, subplots, colorbar import warnings +import pdb + from glob import glob from random import sample, shuffle, choice from scipy.ndimage.morphology import distance_transform_edt from rasterio import open as rasopen +from rasterio.errors import RasterioIOError from skimage import transform from sat_image.warped_vrt import warp_single_image from tensorflow.keras.utils import Sequence +from multiprocessing import Pool +from collections import defaultdict from runspec import mask_rasters -from data_utils import load_raster, paths_map, stack_rasters -from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile +from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess +from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features def distance_map(mask): @@ -24,20 +30,14 @@ def distance_map(mask): return distances -class DataMask(object): - - def __init__(self, mask, class_code): - self.mask = mask - self.class_code = class_code - - class DataTile(object): - def __init__(self, data, class_mask, class_code): + def __init__(self, data, one_hot, class_code): self.dict = {} - self.dict['data'] = data - self.dict['class_mask'] = class_mask + self.dict['data'] = data.astype(np.float32) + self.dict['one_hot'] = one_hot self.dict['class_code'] = class_code + # Need to split the data into separate classes to play with class balance. def to_pickle(self, training_directory): if not os.path.isdir(training_directory): @@ -54,6 +54,19 @@ def to_pickle(self, training_directory): print("What? Contact administrator.") +def _pickle_datatile(datatile, training_directory): + template = os.path.join(training_directory, + 'class_{}_data/'.format(datatile.dict['class_code'])) + if not os.path.isdir(template): + os.mkdir(template) + outfile = os.path.join(template, str(time.time()) + ".pkl") + if not os.path.isfile(outfile): + with open(outfile, 'wb') as f: + pickle.dump(datatile.dict, f, protocol=pickle.HIGHEST_PROTOCOL) + else: + pass + + def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): ''' ``Fmasks'' are masks of clouds and water. We don't want clouds/water in @@ -67,17 +80,129 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): for suffix in mask_rasters(): if f.endswith(suffix): paths.append(os.path.join(dirpath, f)) + paths = [p for p in paths if 'water' not in p] for fmask_file in paths: fmask, _ = load_raster(fmask_file) + # why is water being misregistered? + # clouds, water present where fmask == 1. try: - class_mask[fmask == 1] = nodata + class_mask = ma.masked_where(fmask == 1, class_mask) except (ValueError, IndexError) as e: fmask = warp_single_image(fmask_file, class_mask_geo) - class_mask[fmask == 1] = nodata - + class_mask = ma.masked_where(fmask == 1, class_mask) + return class_mask +def extract_training_data_v2(split_shapefile_directory, image_directory, + training_data_directory, assign_shapefile_year, assign_shapefile_class_code, n_classes=5): + + split_shapefiles = [f for f in glob(os.path.join(split_shapefile_directory, "*.shp"))] + + done = set() + + total_time = 0 + + for counter, shapefile in enumerate(split_shapefiles): + begin_time = time.time() + if shapefile in done: + continue + _, path, row = os.path.splitext(shapefile)[0][-7:].split('_') + year = assign_shapefile_year(shapefile) + path_row_year = path + '_' + row + '_' + str(year) + print("Extracting data for", path_row_year) + shapefiles_over_same_path_row = all_matching_shapefiles(shapefile, + split_shapefile_directory, assign_shapefile_year) + done.update(shapefiles_over_same_path_row) + image_path = os.path.join(image_directory, path_row_year) + if not os.path.isdir(image_path): + print('hooby dooby') + continue + image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) + mask_file = image_path_map['B1.TIF'][0] + mask, mask_meta = load_raster(mask_file) + try: + image_stack = stack_rasters_multiprocess(image_path_map, target_geo=mask_meta, target_shape=mask.shape) + except RasterioIOError as e: + print("Redownload images for", path_row_year) + # TODO: remove corrupted file and redownload images. + continue + mask = np.zeros_like(mask).astype(np.int) + fmask = concatenate_fmasks(os.path.join(image_directory, path_row_year), mask, + mask_meta) + if fmask.mask.all(): + print("All pixels covered by cloud for {}".format(path_row_year)) + continue + + one_hots = [] + class_codes = [] + + for f in shapefiles_over_same_path_row: + class_code = assign_shapefile_class_code(f) + out = _one_hot_from_shapefile(f, mask_file, class_code, n_classes) + if out is not None: + one_hots.append(out) + class_codes.append(class_code) + # b/c tf expects columns first, we swapaxes here. + one_hot_copy = [] + for one_hot, class_code in zip(one_hots, class_codes): + for i in range(n_classes): + one_hot[i, :, :][fmask.mask[0]] = ma.masked # why can't i vectorize this + if class_code == 0: # apply border class to only irrigated pixels + border_labels = make_border_labels(one_hot[i, :, :], border_width=1) + border_labels.astype(bool) + one_hot[n_classes-1, :, :] = border_labels + one_hot = np.swapaxes(one_hot, 0, 2) + one_hot_copy.append(one_hot) + image_stack = np.swapaxes(image_stack, 0, 2) + _iterate_over_raster_v2(image_stack, one_hot_copy, class_codes, training_data_directory) + end_time = time.time() + diff = end_time - begin_time + total_time += diff + print('single iteration time:', diff, 'avg.', total_time / (counter + 1)) + + +def _iterate_over_raster_v2(raster, one_hots, class_codes, training_directory, tile_size=608): + out = [] + # ... could rewrite it in cython. + for i in range(0, raster.shape[0]-tile_size, tile_size): + for j in range(0, raster.shape[1]-tile_size, tile_size): + for one_hot, class_code in zip(one_hots, class_codes): + sub_one_hot = one_hot[i:i+tile_size, j:j+tile_size, :] + if not _check_dimensions_and_min_pixels(sub_one_hot, tile_size): + continue + sub_raster = raster[i:i+tile_size, j:j+tile_size, :] + dt = DataTile(sub_raster, sub_one_hot, class_code) + out.append(dt) + if len(out): + with Pool() as pool: + td = [training_directory]*len(out) + pool.starmap(_pickle_datatile, zip(out, td)) + + +def _one_hot_from_shapefile(shapefile, mask_file, shapefile_class_code, n_classes): + class_labels, _ = mask_raster_to_shapefile(shapefile, mask_file, return_binary=False) + if class_labels.mask.all(): + return None + one_hot = _one_hot_from_labels(class_labels, shapefile_class_code, n_classes) + return one_hot + + +def _one_hot_from_labels(labels, class_code, n_classes): + out = np.zeros((n_classes, labels.shape[1], labels.shape[2])) + out[class_code, :, :][~labels.mask[0]] = 1 + return out.astype(np.int) + + +def _check_dimensions_and_min_pixels(sub_one_hot, tile_size): + # 200 is the minimum amount of pixels required to save the data. + if sub_one_hot.shape[0] != tile_size or sub_one_hot.shape[1] != tile_size: + return False + if len(np.nonzero(sub_one_hot)[0]) < 200: + return False + return True + + def extract_training_data(shapefile_directory, image_directory, training_directory, save=True, tile_size=608, assign_shapefile_year=None, assign_shapefile_class_code=None, @@ -109,7 +234,7 @@ def extract_training_data(shapefile_directory, image_directory, continue paths_mapping = paths_map(os.path.join(image_directory, suffix)) try: - master = stack_rasters(paths_mapping, p, r, year) #todo; error check empty + master = stack_rasters(paths_mapping, p, r, year) # paths_mapping except Exception as e: print(e) @@ -185,10 +310,12 @@ def make_border_labels(mask, border_width): class SatDataSequence(Sequence): def __init__(self, data_directory, batch_size, class_weights={}, - border_width=1, classes_to_augment=None): + border_width=1, n_classes=5, classes_to_augment=None): self.data_directory = data_directory + self.n_classes = n_classes self.class_weights = class_weights self.batch_size = batch_size + self._no_augment = classes_to_augment is None self.classes_to_augment = classes_to_augment self.border_width = border_width self._get_files() @@ -252,8 +379,29 @@ def _from_pickle(self, filename): def _make_weights_labels_and_features(self, data_tiles, classes_to_augment): - return _preprocess_input_data(data_tiles, self.class_weights, - border_width=self.border_width, classes_to_augment=classes_to_augment) + return self._preprocess_input_data(data_tiles, self.class_weights, + classes_to_augment=classes_to_augment) + + + def _preprocess_input_data(self, data_tiles, class_weights, classes_to_augment=None): + features = [] + one_hots = [] + weight_list = [] + for tile in data_tiles: + data = tile['data'] + one_hot = tile['one_hot'].astype(np.int) + weights = np.zeros_like(one_hot) + class_code = tile['class_code'] + weights[:][one_hot[:, :, class_code]] = class_weights[class_code] + if class_code == 0: + weights[:][one_hot[:, :, self.n_classes-1]] = class_weights[self.n_classes-1] + if not self._no_augment: + if classes_to_augment[tile['class_code']]: + data, one_hot, weights = _augment_data(data, one_hot, weights) + features.append(data) + one_hots.append(one_hot) + weight_list.append(weights) + return [np.asarray(features), np.asarray(weight_list)], [np.asarray(one_hots)] def _preprocess_input_data(data_tiles, class_weights, classes_to_augment=None, border_width=1): diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 98d1e6f..a7bbdea 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -1,17 +1,22 @@ import os import geopandas as gpd import json +import pdb +import datetime + from fiona import open as fopen from glob import glob from lxml import html from requests import get from copy import deepcopy -from numpy import zeros, asarray, array, reshape, nan, sqrt, std +from numpy import zeros, asarray, array, reshape, nan, sqrt, std, uint16 from shapely.geometry import shape from collections import defaultdict from rasterio import float32, open as rasopen from rasterio.mask import mask from pickle import load +from multiprocessing import Pool + from prepare_images import ImageStack from shapefile_utils import get_features from sat_image.warped_vrt import warp_single_image @@ -19,7 +24,117 @@ WRS2 = '../spatial_data/wrs2_descending_usa.shp' -def paths_map(image_directory, satellite=8): +def download_images_over_shapefile(shapefile, image_directory, year): + '''Downloads p/r corresponding to the location of + the shapefile. Image_directory: where to save the raw images. + ''' + p, r = get_shapefile_path_row(shapefile) + suff = str(p) + '_' + str(r) + "_" + str(year) + landsat_dir = os.path.join(image_directory, suff) + satellite = 8 + if year < 2013: + satellite = 7 + if not os.path.isdir(landsat_dir): + os.mkdir(landsat_dir) + ims = download_images(landsat_dir, p, r, year, satellite) + else: + ims = download_images(landsat_dir, p, r, year, satellite) + + return ims + + +def download_from_pr(p, r, year, image_directory): + '''Downloads p/r corresponding to the location of + the shapefile, and creates master raster''' + suff = str(p) + '_' + str(r) + "_" + str(year) + landsat_dir = os.path.join(image_directory, suff) + satellite = 8 + if year < 2013: + satellite = 7 + if not os.path.isdir(landsat_dir): + os.mkdir(landsat_dir) + ims = download_images(landsat_dir, p, r, year, satellite) + else: + ims = download_images(landsat_dir, p, r, year, satellite) + return ims + + +def _parse_landsat_capture_date(landsat_scene): + ''' + returns: calendar date of scene capture + landsat_scene is a directory (i.e data/38_27_2013/') + scene ID: + LXSPPPRRRYYYYDDDGSIVV + L = Landsat + X = Sensor + S = Satellite + PPP = WRS Path + RRR = WRS Row + YYYY = Year + DDD = Julian day + GSI = Ground station ident + VV = Archived version number + ''' + julian_year_day = landsat_scene[-10:-5] + return datetime.datetime.strptime(julian_year_day, '%y%j').date() + + +def _landsat_band_map(subdirectory, satellite=8): + band_map = dict() + + for band in landsat_rasters()[satellite]: + band_map[band] = None + for band in static_rasters(): + band_map[band] = None + for band in climate_rasters(): + band_map[band] = None + + extensions = (".tif", ".TIF") + for dirpath, dirnames, filenames in os.walk(subdirectory): + for f in filenames: + if any(ext in f for ext in extensions): + for band in band_map: + if f.endswith(band): + band_map[band] = os.path.join(dirpath, f) + return band_map + + +def _climate_band_map(directory, band_map, date): + + files = glob(os.path.join(directory, '*.tif')) + files.extend(glob(os.path.join(directory, '*.TIF'))) + for f in files: + datestring = os.path.basename(f)[:10] + cur = datetime.datetime.strptime(datestring, '%Y-%m-%d').date() + if date == cur: + for band in band_map: + if f.endswith(band): + band_map[band] = os.path.join(directory, f) + return band_map + + +def paths_mappings_single_scene(landsat_directory): + directories = [os.path.join(landsat_directory, f) for f in os.listdir(landsat_directory) if + os.path.isdir(os.path.join(landsat_directory, f))] + climate_directory = os.path.join(landsat_directory, 'climate_rasters') + other_rasters = [os.path.join(landsat_directory, f) for f in os.listdir(landsat_directory) if + not os.path.isdir(os.path.join(landsat_directory, f))] + date_dict = dict() + for d in directories: + if 'climate' in d: + continue + pm = _landsat_band_map(d) + date = _parse_landsat_capture_date(d) + cm = _climate_band_map(climate_directory, pm, date) + for raster in other_rasters: + for band in static_rasters(): + if raster.endswith(band): + pm[band] = raster + date_dict[date] = pm + return date_dict + + +def paths_map_multiple_scenes(image_directory, satellite=8): ''' Recursively get all rasters in image_directory and its subdirectories, and adds them to band_map. ''' band_map = defaultdict(list) @@ -44,74 +159,52 @@ def paths_map(image_directory, satellite=8): return band_map -def stack_rasters(paths_map, path, row, year): +def _maybe_warp(feature_raster, target_geo, target_shape): + arr, _ = load_raster(feature_raster) + if not arr.shape == target_shape: + arr = warp_single_image(feature_raster, target_geo) + return arr, feature_raster + + +def stack_rasters_multiprocess(paths_map, target_geo, target_shape): first = True stack = None num_rasters = 0 for key in paths_map: num_rasters += len(paths_map[key]) j = 0 + feature_rasters = [feature_raster for feat in paths_map.keys() for feature_raster in + paths_map[feat]] + tg = [target_geo]*len(feature_rasters) + ts = [target_shape]*len(feature_rasters) + with Pool() as pool: + # Multiprocess the loading of rasters into memory. + # Speedup of ~40s. + out = pool.starmap(_maybe_warp, zip(feature_rasters, tg, ts)) + rasters = {feature_raster: array for (array, feature_raster) in out} for feat in sorted(paths_map.keys()): # ensures the stack is in the same order each time. # Ordering within bands is assured by sorting the list that - # each band corresponding to, as that's sorting by date. + # each band corresponding to, as that's sorted by date. feature_rasters = paths_map[feat] # maps bands to their location in filesystem. for feature_raster in feature_rasters: - with rasopen(feature_raster, mode='r') as src: - arr = src.read() - raster_geo = src.meta.copy() + arr = rasters[feature_raster] if first: - first_geo = raster_geo.copy() - empty = zeros((num_rasters, arr.shape[1], arr.shape[2]), float32) - stack = empty + stack = zeros((num_rasters, target_shape[1], target_shape[2]), uint16) stack[j, :, :] = arr j += 1 first = False else: - try: - stack[j, :, :] = arr - j += 1 - except ValueError: - arr = warp_single_image(feature_raster, first_geo) - stack[j, :, :] = arr - j += 1 + stack[j, :, :] = arr + j += 1 return stack -def create_master_raster(paths_map, path, row, year, raster_directory, mean_map=None, - stddev_map=None): - """ Creates a master raster with depth given by the organization of the - paths_map. Paths map is a dictionary of lists, with keys the band names - (B1, B2...) and values the paths of the images in the filesystem - corresponding to that band. """ - fname = "master_raster_{}_{}_{}.tif".format(path, row, year) - pth = os.path.join(raster_directory, fname) - mask_fname = "class_mask_{}_{}_{}.tif".format(path, row, year) - mask_path = os.path.join(raster_directory, mask_fname) - if os.path.isfile(pth): - print("Master raster already created for {}_{}_{}.".format(path, row, year)) - if os.path.isfile(mask_path): - print('Class mask template already created for {}_{}_{}'.format(path, row, year)) - return pth - else: - print("Creating class mask template.") - with rasopen(pth, 'r') as src: - meta = src.meta.copy() - h = meta['height'] - w = meta['width'] - - meta.update(count=1, dtype=float32) - - with rasopen(mask_path, 'w', **meta) as msk: - out = zeros((h, w)).astype(float32) - msk.write(out, 1) - return pth - +def stack_rasters(paths_map, target_geo, target_shape): first = True stack = None num_rasters = 0 for key in paths_map: num_rasters += len(paths_map[key]) - j = 0 for feat in sorted(paths_map.keys()): # ensures the stack is in the same order each time. # Ordering within bands is assured by sorting the list that @@ -120,11 +213,8 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map= for feature_raster in feature_rasters: with rasopen(feature_raster, mode='r') as src: arr = src.read() - raster_geo = src.meta.copy() if first: - first_geo = raster_geo.copy() - empty = zeros((num_rasters, arr.shape[1], arr.shape[2]), float32) - stack = empty + stack = zeros((num_rasters, target_shape[1], target_shape[2]), uint16) stack[j, :, :] = arr j += 1 first = False @@ -133,99 +223,10 @@ def create_master_raster(paths_map, path, row, year, raster_directory, mean_map= stack[j, :, :] = arr j += 1 except ValueError: - arr = warp_single_image(feature_raster, first_geo) + arr = warp_single_image(feature_raster, target_geo) stack[j, :, :] = arr j += 1 - - msk_out = zeros((1, stack.shape[1], stack.shape[2])) - first_geo.update(count=1, dtype=msk_out.dtype) - with rasopen(mask_path, mode='w', **first_geo) as msk: - msk.write(msk_out) - - first_geo.update(count=num_rasters, dtype=stack.dtype) - - with rasopen(pth, mode='w', **first_geo) as dst: - dst.write(stack) - - print("Master raster saved to {}.".format(pth)) - - return pth - - -def normalize_and_save_image(fname): - norm = True - with rasopen(fname, 'r') as rsrc: - if "normalized" in rsrc.tags(): - return - else: - rass_arr = rsrc.read() - rass_arr = rass_arr.astype(float32) - profile = rsrc.profile.copy() - profile.update(dtype=float32) - rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) - scaler = StandardScaler() # z-normalization - scaler.fit(rass_arr) - rass_arr = scaler.transform(rass_arr) - with rasopen(fname, 'w', **profile) as dst: - dst.write(rass_arr, 1) - print("Normalized", fname) - dst.update_tags(normalized=True) - - -def raster_sum(raster): - with rasopen(raster, 'r') as src: - arr_masked = src.read(1, masked=True) # get rid of nodata values - s = arr_masked.sum() - count = arr_masked.count() - return s, count - - -def raster_squared_sum(raster, mean): - with rasopen(raster, 'r') as src: - arr_masked = src.read(1, masked=True) # get rid of nodata values - squared_diff = (arr_masked - mean)**2 - s = squared_diff.sum() - count = squared_diff.count() - return s, count - - -def bandwise_stddev(paths_list, band_name, band_mean): - ''' Calculate the stddev of the pixel - values in a given band through time.''' - n_pixels = 0 - pixel_value_squared_sum = 0 - for filepath in paths_list: - p_sum, num_pix = raster_squared_sum(filepath, band_mean) - pixel_value_squared_sum += p_sum - n_pixels += num_pix - if n_pixels == 0: - print("0 non masked pixels.") - return 1 - return (sqrt(pixel_value_squared_sum / n_pixels), band_name) - - -def bandwise_mean(paths_list, band_name): - n_pixels = 0 - pixel_value_sum = 0 - for filepath in paths_list: - p_sum, num_pix = raster_sum(filepath) - pixel_value_sum += p_sum - n_pixels += num_pix - if n_pixels == 0: - print("0 non masked pixels.") - return 1 - return (pixel_value_sum / n_pixels, band_name) - - -def download_images(project_directory, path, row, year, satellite=8, n_landsat=3, - max_cloud_pct=40): - - image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, - max_cloud_pct=max_cloud_pct, n_landsat=n_landsat, year=year) - - image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is - # a cloud mask. - return image_stack + return stack def get_wrs2_features(path, row): @@ -264,82 +265,6 @@ def all_rasters(image_directory, satellite=8): return band_map -def raster_means(image_directory, satellite=8): - """ Gets all means of all images stored - in image_directory and its subdirectories. - Images end with (.tif, .TIF) - Image_directory in a typical case would - be project_root/image_data/train/. - This returns band_map, which is a dict of lists with - keys band names (B1, B2...) and values lists of - the locations of the rasters in the filesystem.""" - - outfile = os.path.join(image_directory, "mean_mapping.pkl") - if os.path.isfile(outfile): - with open(outfile, 'rb') as f: - mean_mapping = pickle.load(f) - return mean_mapping - - band_map = all_rasters(image_directory, satellite) - mean_mapping = {} - - for band in band_map: - mean, bnd = bandwise_mean(band_map[band], band) - mean_mapping[band] = mean - - with open(outfile, 'wb') as f: - pickle.dump(mean_mapping, f) - - return mean_mapping - - -def raster_stds(image_directory, mean_map, satellite=8): - - outfile = os.path.join(image_directory, "stddev_mapping.pkl") - if os.path.isfile(outfile): - with open(outfile, 'rb') as f: - stddev_mapping = pickle.load(f) - return stddev_mapping - - band_map = all_rasters(image_directory, satellite) # get all rasters - # in the image directory - stddev_mapping = {} - - for band in band_map.keys(): - std, bnd = bandwise_stddev(band_map[band], band, mean_map[band]) - stddev_mapping[band] = std - - with open(outfile, 'wb') as f: - pickle.dump(stddev_mapping, f) - - pprint('STDMAP') - pprint(stddev_mapping) - print("-------") - pprint('MEANMAP') - pprint(mean_map) - - return stddev_mapping - - -def create_all_master_rasters(image_directory, raster_save_directory, mean_mapping=None, - stddev_mapping=None): - """ Creates a master raster for all images in image_directory. - Image directory is assumed to be a top-level directory that contains - all the path_row directories for test or train (image_data/test/path_row_year*/) - Image directory is image_data/test/ in this case.""" - dirs = os.listdir(image_directory) - for sub_dir in dirs: - out = os.path.join(image_directory, sub_dir) - if os.path.isdir(out): - paths_map = all_rasters(out) - i = 0 - path = sub_dir[:2] - row = sub_dir[3:5] - year = sub_dir[-4:] - create_master_raster(paths_map, path, row, year, raster_save_directory, mean_mapping, - stddev_mapping) - - def clip_raster(evaluated, path, row, outfile=None): shp = gpd.read_file(WRS2) @@ -362,8 +287,8 @@ def save_raster(arr, outfile, meta, count=5): dst.write(arr) -def load_raster(master_raster): - with rasopen(master_raster, 'r') as src: +def load_raster(raster_name): + with rasopen(raster_name, 'r') as src: arr = src.read() meta = src.meta.copy() return arr, meta diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 9f7aebf..89cbaca 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -76,7 +76,7 @@ def lr_schedule(epoch): n_classes = 5 input_shape = (None, None, 51) weight_shape = (None, None, n_classes) - filepath = './models/augmentation_irr_and_wetlands_no_class_weights.h5' + filepath = './models/whoknows.h5' # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', @@ -97,7 +97,7 @@ def lr_schedule(epoch): valid_generator = SatDataSequence('/home/thomas/share/training_data/test/', batch_size=batch_size, class_weights=class_weights) model.fit_generator(generator, - epochs=1000, + epochs=2, callbacks=[lr_scheduler, checkpoint, tensorboard], use_multiprocessing=True, validation_data=valid_generator, diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 0e9a7af..9bb142b 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -1,53 +1,21 @@ import os import pickle +import time +import pdb from glob import glob from pprint import pprint -import time from numpy import save as nsave from fiona import open as fopen from rasterio.errors import RasterioIOError from collections import defaultdict, OrderedDict from random import choice from shapely.geometry import shape -from data_utils import download_images +from multiprocessing import Pool + from shapefile_utils import filter_shapefile_overlapping +from data_utils import paths_mappings_single_scene, paths_map_multiple_scenes from runspec import landsat_rasters, static_rasters, climate_rasters -from data_generators import extract_training_data - - -def download_images_over_shapefile(shapefile, image_directory, year): - '''Downloads p/r corresponding to the location of - the shapefile. Image_directory: where to save the raw images. - ''' - p, r = get_shapefile_path_row(shapefile) - suff = str(p) + '_' + str(r) + "_" + str(year) - landsat_dir = os.path.join(image_directory, suff) - satellite = 8 - if year < 2013: - satellite = 7 - if not os.path.isdir(landsat_dir): - os.mkdir(landsat_dir) - ims = download_images(landsat_dir, p, r, year, satellite) - else: - ims = download_images(landsat_dir, p, r, year, satellite) - - return ims - - -def download_from_pr(p, r, year, image_directory): - '''Downloads p/r corresponding to the location of - the shapefile, and creates master raster''' - suff = str(p) + '_' + str(r) + "_" + str(year) - landsat_dir = os.path.join(image_directory, suff) - satellite = 8 - if year < 2013: - satellite = 7 - if not os.path.isdir(landsat_dir): - os.mkdir(landsat_dir) - ims = download_images(landsat_dir, p, r, year, satellite) - else: - ims = download_images(landsat_dir, p, r, year, satellite) - return ims +from data_generators import extract_training_data_v2 def assign_shapefile_class_code(shapefile): @@ -59,22 +27,13 @@ def assign_shapefile_class_code(shapefile): return 2 if 'wetlands' in shapefile: return 3 + if 'fallow' in shapefile: + return 4 def assign_shapefile_year(shapefile): # get the shapefile name, not the whole path. - shapefile = os.path.basename(shapefile) - if '2013' in shapefile: - return 2013 - if '2014' in shapefile: - return 2014 - if '2015' in shapefile: - return 2015 - if '2016' in shapefile: - return 2016 - if '2017' in shapefile: - return 2017 - return 2014 + return 2013 @@ -88,56 +47,27 @@ def assign_shapefile_year(shapefile): # 3. Extract training data # 4. Train network. # Need to download images. - - mt_path = [42, 41, 40, 39, 38, 37, 36, 35, 42, 41, 40, 39, 38, 37, 36, 35, 41, 40, 39, 38, 37, - 36, 35, 34, 40, 39, 38, 37, 36, 35, 34] - mt_row = [26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, - 28, 28, 28, 29, 29, 29, 29, 29, 29, 29] - years = [2013, 2014, 2015, 2016, 2017, 2018, 2019] - shapefile_directory = '/home/thomas/IrrigationGIS/western_states_irrgis/reprojected_western_gis/post-2013' - print(len(mt_path), len(mt_row)) + + in_train_shapefile_directory = '/home/thomas/current_training_montana/train/' + in_test_shapefile_directory = '/home/thomas/current_training_montana/test/' + split_out_train_shapefile_directory = 'shapefile_data/train/' + split_out_test_shapefile_directory = 'shapefile_data/test/' image_directory = '/home/thomas/share/image_data/train/' - for year in years: - for path, row in zip(mt_path, mt_row): - for attempt in range(60): - try: - print(path, row, year) - download_from_pr(path, row, year, image_directory) - break - except Exception as e: - print("EE_------------------------------------------") - print(e) - if type(e) == RasterioIOError: - print(e) - string = e.args[0] - first = string.find('/') - end = string[first:] - end = end[:end.find(',')] - os.remove(end) - time.sleep(2) - print("EE_------------------------------------------") - - - shapefiles = [f for f in glob(os.path.join(shapefile_directory, "*.shp"))] - - - -# for f in shapefiles: -# print("Downloading images for {}".format(f)) -# path_row_map = filter_shapefile_overlapping(f) -# year = assign_shapefile_year(f) -# for i, path_row in enumerate(path_row_map): -# path, row = path_row.split('_') -# for attempt in range(60): -# try: -# print(path, row, year, "{} of {} path/rows done.".format(i+1, len(path_row_map))) -# download_from_pr(path, row, year, image_directory) -# break -# except Exception as e: -# if type(e) == RasterioIOError: -# print(e) -# string = e.args[0] -# first = string.find('/') -# end = string[first:] -# end = end[:end.find(',')] -# os.remove(end) + training_data_directory = '/home/thomas/share/training_data/train/' + test_data_directory = '/home/thomas/share/training_data/test/' + fs = [f for f in glob(in_train_shapefile_directory + "*.shp")] + tf = [split_out_train_shapefile_directory] * len(fs) + + # with Pool() as pool: + # pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) + extract_training_data_v2(split_out_train_shapefile_directory, image_directory, + training_data_directory, assign_shapefile_year, assign_shapefile_class_code, n_classes=6) + extract_training_data_v2(split_out_test_shapefile_directory, image_directory, + test_data_directory, assign_shapefile_year, assign_shapefile_class_code, n_classes=6) + # directories = os.listdir(image_directory) + # test = image_directory + directories[0] + # scene_dates_and_mappings = paths_mappings_single_scene(test) + # from pprint import pprint + # for s in scene_dates_and_mappings: + # print(s) + # pprint(scene_dates_and_mappings[s]) diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 83e11d2..8fb126e 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -6,7 +6,7 @@ from fiona import open as fopen from rasterio.mask import mask from rasterio import open as rasopen -from shapely.geometry import shape, mapping +from shapely.geometry import shape, mapping, Polygon from sklearn.neighbors import KDTree from collections import defaultdict @@ -30,13 +30,30 @@ def mask_raster_to_shapefile(shapefile, raster, return_binary=True): shp = shp.to_crs(src.crs) features = get_features(shp) arr = src.read() - out_image, out_transform = mask(src, shapes=features) + out_image, out_transform = mask(src, shapes=features, filled=False) if return_binary: out_image[out_image != 0] = 1 meta = src.meta return out_image, meta +def mask_raster_to_features(raster, features, features_meta): + # This function is useful when you don't have access to the + # file from which the features came or if the file doesn't exist. + + gdf = gpd.GeoDataFrame.from_features(features, features_meta) # do I need + # the whole metadata? + gdf = gdf[gdf.geometry.notnull()] + with rasopen(raster, 'r') as src: + shp = gdf.to_crs(src.crs) + features = get_features(shp) + arr = src.read() + out_image, out_transform = mask(src, shapes=features) + out_image[out_image != 0] = 1 + meta = src.meta + return out_image, meta + + def generate_class_mask(shapefile, master_raster, nodata=-1): ''' Generates a mask with 1 everywhere shapefile data is present and a no_data value everywhere else. @@ -139,7 +156,7 @@ def clip_shapefile_to_geometry(shapefile, clip_shapefile, out_filename, outside_ dst.write(feat) -def filter_shapefile_overlapping(shapefile, save=False, out_directory=None): +def filter_shapefile_overlapping(shapefile, out_directory=None): """ Shapefiles may span multiple path/rows/years. For training, we want all of the data available. This function filters the polygons contained in @@ -166,14 +183,14 @@ def filter_shapefile_overlapping(shapefile, save=False, out_directory=None): for p in prs: path_row_map[p].append(feat) - if not save: - return path_row_map + if out_directory is None: + return path_row_map, meta outfile = os.path.basename(shapefile) outfile = os.path.splitext(outfile)[0] for path_row in path_row_map: - out = outfile + path_row + ".shp" + out = outfile + "_" + path_row + ".shp" with fopen(os.path.join(out_directory, out), 'w', **meta) as dst: print("Saving {}".format(out)) for feat in path_row_map[path_row]: @@ -266,7 +283,13 @@ def get_shapefile_path_row(shapefile): # strip extension # TODO: Find some way to update shapefile metadata shp = shapefile[-9:-4].split("_") - return int(shp[0]), int(shp[1]) + try: + p = int(shp[0]) + r = int(shp[1]) + except ValueError: + p = int(shp[1]) + r = int(shp[2]) + return p, r def shapefile_area(shapefile): @@ -309,14 +332,5 @@ def buffer_shapefile(shp): dst.write(feat) if __name__ == '__main__': - from glob import glob - - pth = '/home/thomas/IrrigationGIS/western_states_irrgis/reprojected_western_gis/post-2013' - wrs2 = '/home/thomas/IrrMapper/spatial_data/wrs2_descending_test_path_rows.shp' - for f in glob(pth + "/*.shp"): - out_directory = 'shapefile_data/' - filename, _ = os.path.splitext(f) - filename = os.path.basename(filename) - test_file = filename + "_test.shp" - train_file = filename + "_train.shp" - clip_shapefile_to_geometry(f, wrs2, test_file, train_file, out_directory) + pass + From 1c5c4d896403d8b0ca658d5cbc640244a2c47e0e Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 2 Aug 2019 13:02:46 -0600 Subject: [PATCH 71/89] Small changes, working on getting a good model over Montana --- fully-conv-classification/data_generators.py | 204 ++++++------------ fully-conv-classification/data_utils.py | 35 ++- fully-conv-classification/evaluate_image.py | 72 ++----- fully-conv-classification/fully_conv.py | 49 +++-- .../runner_from_shapefile.py | 14 +- 5 files changed, 155 insertions(+), 219 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index fb998a4..1726cd0 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -17,9 +17,10 @@ from tensorflow.keras.utils import Sequence from multiprocessing import Pool from collections import defaultdict +from sys import getsizeof from runspec import mask_rasters -from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess +from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features @@ -32,11 +33,12 @@ def distance_map(mask): class DataTile(object): - def __init__(self, data, one_hot, class_code): + def __init__(self, data, one_hot, weights, class_code): self.dict = {} - self.dict['data'] = data.astype(np.float32) + self.dict['data'] = data self.dict['one_hot'] = one_hot self.dict['class_code'] = class_code + self.dict['weights'] = weights # Need to split the data into separate classes to play with class balance. def to_pickle(self, training_directory): @@ -80,7 +82,6 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): for suffix in mask_rasters(): if f.endswith(suffix): paths.append(os.path.join(dirpath, f)) - paths = [p for p in paths if 'water' not in p] for fmask_file in paths: fmask, _ = load_raster(fmask_file) # why is water being misregistered? @@ -116,17 +117,18 @@ def extract_training_data_v2(split_shapefile_directory, image_directory, done.update(shapefiles_over_same_path_row) image_path = os.path.join(image_directory, path_row_year) if not os.path.isdir(image_path): - print('hooby dooby') + download_from_pr(path, row, year, image_directory) continue image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) - mask_file = image_path_map['B1.TIF'][0] - mask, mask_meta = load_raster(mask_file) try: - image_stack = stack_rasters_multiprocess(image_path_map, target_geo=mask_meta, target_shape=mask.shape) - except RasterioIOError as e: - print("Redownload images for", path_row_year) - # TODO: remove corrupted file and redownload images. - continue + mask_file = image_path_map['B1.TIF'][0] + except IndexError: + os.rmdir(os.path.join(image_directory, path_row_year)) + download_from_pr(path, row, year, image_directory) + image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) + mask_file = image_path_map['B1.TIF'][0] + + mask, mask_meta = load_raster(mask_file) mask = np.zeros_like(mask).astype(np.int) fmask = concatenate_fmasks(os.path.join(image_directory, path_row_year), mask, mask_meta) @@ -134,52 +136,70 @@ def extract_training_data_v2(split_shapefile_directory, image_directory, print("All pixels covered by cloud for {}".format(path_row_year)) continue - one_hots = [] - class_codes = [] - + first = True + class_labels = None for f in shapefiles_over_same_path_row: class_code = assign_shapefile_class_code(f) - out = _one_hot_from_shapefile(f, mask_file, class_code, n_classes) - if out is not None: - one_hots.append(out) - class_codes.append(class_code) - # b/c tf expects columns first, we swapaxes here. - one_hot_copy = [] - for one_hot, class_code in zip(one_hots, class_codes): - for i in range(n_classes): - one_hot[i, :, :][fmask.mask[0]] = ma.masked # why can't i vectorize this - if class_code == 0: # apply border class to only irrigated pixels - border_labels = make_border_labels(one_hot[i, :, :], border_width=1) - border_labels.astype(bool) - one_hot[n_classes-1, :, :] = border_labels - one_hot = np.swapaxes(one_hot, 0, 2) - one_hot_copy.append(one_hot) + print(f, class_code) + out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) + if first: + class_labels = out + class_labels[~class_labels.mask] = class_code + first = False + else: + class_labels[~out.mask] = class_code + try: + image_stack = stack_rasters_multiprocess(image_path_map, target_geo=mask_meta, target_shape=mask.shape) + except RasterioIOError as e: + print("Redownload images for", path_row_year) + # TODO: remove corrupted file and redownload images. + continue + image_stack = np.swapaxes(image_stack, 0, 2) - _iterate_over_raster_v2(image_stack, one_hot_copy, class_codes, training_data_directory) + class_labels = np.swapaxes(class_labels, 0, 2) + class_labels = np.squeeze(class_labels) + _iterate_over_image_stack_and_save_training_data(image_stack, class_labels, + training_data_directory, n_classes=n_classes) end_time = time.time() diff = end_time - begin_time total_time += diff print('single iteration time:', diff, 'avg.', total_time / (counter + 1)) -def _iterate_over_raster_v2(raster, one_hots, class_codes, training_directory, tile_size=608): +def _iterate_over_image_stack_and_save_training_data(image_stack, class_labels, + training_data_directory, n_classes): out = [] # ... could rewrite it in cython. - for i in range(0, raster.shape[0]-tile_size, tile_size): - for j in range(0, raster.shape[1]-tile_size, tile_size): - for one_hot, class_code in zip(one_hots, class_codes): - sub_one_hot = one_hot[i:i+tile_size, j:j+tile_size, :] - if not _check_dimensions_and_min_pixels(sub_one_hot, tile_size): - continue - sub_raster = raster[i:i+tile_size, j:j+tile_size, :] - dt = DataTile(sub_raster, sub_one_hot, class_code) + tile_size = 608 + for i in range(0, image_stack.shape[0]-tile_size, tile_size): + for j in range(0, image_stack.shape[1]-tile_size, tile_size): + class_label_tile = class_labels[i:i+tile_size, j:j+tile_size] + if not _check_dimensions_and_min_pixels(class_label_tile, tile_size): + continue + unique = np.unique(class_label_tile) + unique = unique[~unique.mask] + for class_code in unique: + sub_one_hot = _one_hot_from_labels(class_label_tile, class_code, n_classes) + weights = _weights_from_one_hot(sub_one_hot, n_classes) + sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] + dt = DataTile(sub_image_stack, sub_one_hot, weights, class_code) out.append(dt) + # for d in out/ + # _pickle_datatile(d, training_data_directory) if len(out): with Pool() as pool: - td = [training_directory]*len(out) + td = [training_data_directory]*len(out) pool.starmap(_pickle_datatile, zip(out, td)) +def _weights_from_one_hot(one_hot, n_classes): + weights = np.zeros_like(one_hot) + tmp = np.sum(one_hot, 2) + for i in range(n_classes): + weights[:, :, i] = tmp + return weights.astype(bool) + + def _one_hot_from_shapefile(shapefile, mask_file, shapefile_class_code, n_classes): class_labels, _ = mask_raster_to_shapefile(shapefile, mask_file, return_binary=False) if class_labels.mask.all(): @@ -189,103 +209,21 @@ def _one_hot_from_shapefile(shapefile, mask_file, shapefile_class_code, n_classe def _one_hot_from_labels(labels, class_code, n_classes): - out = np.zeros((n_classes, labels.shape[1], labels.shape[2])) - out[class_code, :, :][~labels.mask[0]] = 1 - return out.astype(np.int) + one_hot = np.zeros((labels.shape[0], labels.shape[1], n_classes)) + one_hot[:, :, class_code][labels == class_code] = 1 + if class_code == 0: # apply border class to only irrigated pixels + border_labels = make_border_labels(one_hot[:, :, 0], border_width=1) + border_labels.astype(np.int) + one_hot[:, :, n_classes-1] = border_labels + return one_hot.astype(np.int) def _check_dimensions_and_min_pixels(sub_one_hot, tile_size): # 200 is the minimum amount of pixels required to save the data. if sub_one_hot.shape[0] != tile_size or sub_one_hot.shape[1] != tile_size: return False - if len(np.nonzero(sub_one_hot)[0]) < 200: - return False - return True - - -def extract_training_data(shapefile_directory, image_directory, - training_directory, save=True, tile_size=608, - assign_shapefile_year=None, assign_shapefile_class_code=None, - min_pixels=500, fmask=True, n_classes=4, nodata=0, augment_dict={}): - - if isinstance(assign_shapefile_year, type(None)): - raise ValueError("Please provide a function to assign shapefile year.") - if isinstance(assign_shapefile_class_code, type(None)): - raise ValueError("Please provide a function to assign shapefile class code.") - - pixel_dict = {} # pixel dict counts number of pixels present in each class. - for class_code in range(n_classes): - pixel_dict[class_code] = 0 - done = set() - all_shapefiles = [f for f in glob(os.path.join(shapefile_directory, "*.shp"))] - for f in all_shapefiles: - if f not in done: - done.add(f) - all_matches = all_matching_shapefiles(f, shapefile_directory, assign_shapefile_year) # get all shapefiles - # in the same path / row / year - for match in all_matches: - done.add(match) - p, r = get_shapefile_path_row(f) #TODO: error checking on this function. - year = assign_shapefile_year(f) - suffix = '{}_{}_{}'.format(p, r, year) - if not os.path.isdir(os.path.join(image_directory, suffix)): - # TODO: Figure out why the warning isn't working. - print("Images for {} not in given image directory ({}). Skipping extraction of data for following shapefiles: {}".format(suffix, image_directory, [os.path.basename(x) for x in all_matches])) - continue - paths_mapping = paths_map(os.path.join(image_directory, suffix)) - try: - master = stack_rasters(paths_mapping, p, r, year) - # paths_mapping - except Exception as e: - print(e) - print("Bad image data in", suffix) - continue - mask_file = paths_mapping['B1.TIF'][0] #TODO: this shouldn't be hardcoded. - masks = [] - # TODO: Only warp fmasks/load them into memory once. - for match in all_matches: - cc = assign_shapefile_class_code(match) - if cc is None: - raise ValueError("Shapefile {} not provided with a class code.".format(os.path.basename(match))) - msk, mask_meta = mask_raster_to_shapefile(match, mask_file, return_binary=True) - if fmask: - msk = concatenate_fmasks(os.path.join(image_directory, suffix), msk, - mask_meta, nodata=nodata) - dm = DataMask(msk, cc) # a binary mask that has a class_code attributed to it. - masks.append(dm) - print("Extracting data for {}. CC: {}. Year: {}".format(os.path.basename(match), cc, - year)) - - pixel_dict = _iterate_over_raster(master, masks, pixel_dict, - tile_size=tile_size, save=save, min_pixels=min_pixels, - training_directory=training_directory) - print("{} of {} shapefiles done. ".format(len(done), len(all_shapefiles))) - - return pixel_dict - - -def _iterate_over_raster(raster, datamasks, pixel_dict, tile_size=608, - save=True, training_directory=None, min_pixels=None): - step = tile_size - for i in range(0, raster.shape[1]-tile_size, step): - for j in range(0, raster.shape[2]-tile_size, step): - sub_raster = raster[:, i:i+tile_size, j:j+tile_size] - for datamask in datamasks: - sub_mask = datamask.mask[:, i:i+tile_size, j:j+tile_size] - if _check_dimensions_and_content(sub_raster, sub_mask, tile_size, min_pixels): - pixel_dict[datamask.class_code] += len(np.where(sub_mask != 0)[0]) - if save: - dt = DataTile(sub_raster, sub_mask, datamask.class_code) - dt.to_pickle(training_directory) - return pixel_dict - - -def _check_dimensions_and_content(sub_raster, sub_mask, tile_size, min_pixels): - if len(np.where(sub_mask != 0)[0]) < min_pixels: - return False - if sub_mask.shape[1] != tile_size or sub_mask.shape[2] != tile_size: - return False - if sub_raster.shape[1] != tile_size or sub_raster.shape[2] != tile_size: + xx = np.nonzero(sub_one_hot) + if len(xx[0]) == 0: return False return True @@ -390,7 +328,7 @@ def _preprocess_input_data(self, data_tiles, class_weights, classes_to_augment=N for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) - weights = np.zeros_like(one_hot) + weights = tile['weights'].astype(np.int) class_code = tile['class_code'] weights[:][one_hot[:, :, class_code]] = class_weights[class_code] if class_code == 0: diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index a7bbdea..2fc24cf 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -36,9 +36,9 @@ def download_images_over_shapefile(shapefile, image_directory, year): satellite = 7 if not os.path.isdir(landsat_dir): os.mkdir(landsat_dir) - ims = download_images(landsat_dir, p, r, year, satellite) + ims = _download_images(landsat_dir, p, r, year, satellite) else: - ims = download_images(landsat_dir, p, r, year, satellite) + ims = _download_images(landsat_dir, p, r, year, satellite) return ims @@ -46,6 +46,8 @@ def download_images_over_shapefile(shapefile, image_directory, year): def download_from_pr(p, r, year, image_directory): '''Downloads p/r corresponding to the location of the shapefile, and creates master raster''' + # TODO: add rasterioIOError error checking + # and resolution here. suff = str(p) + '_' + str(r) + "_" + str(year) landsat_dir = os.path.join(image_directory, suff) satellite = 8 @@ -53,12 +55,23 @@ def download_from_pr(p, r, year, image_directory): satellite = 7 if not os.path.isdir(landsat_dir): os.mkdir(landsat_dir) - ims = download_images(landsat_dir, p, r, year, satellite) + ims = _download_images(landsat_dir, p, r, year, satellite) else: - ims = download_images(landsat_dir, p, r, year, satellite) + ims = _download_images(landsat_dir, p, r, year, satellite) return ims +def _download_images(project_directory, path, row, year, satellite=8, n_landsat=3, + max_cloud_pct=40): + + image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, + max_cloud_pct=max_cloud_pct, n_landsat=n_landsat, year=year) + + image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is + # a cloud mask. + return image_stack + + def _parse_landsat_capture_date(landsat_scene): ''' returns: calendar date of scene capture @@ -273,12 +286,18 @@ def clip_raster(evaluated, path, row, outfile=None): with rasopen(evaluated, 'r') as src: out = out.to_crs(src.crs) - meta = src.meta.copy() features = get_features(out) + # if crop == true for mask, you have to update the metadata. out_image, out_transform = mask(src, shapes=features, crop=True, nodata=nan) - - outfile = evaluated - save_raster(out_image, outfile, meta) + meta = src.meta.copy() + count = out_image.shape[0] + + meta.update({"driver": "GTiff", + "height": out_image.shape[1], + "width": out_image.shape[2], + "transform": out_transform}) + if outfile is not None: + save_raster(out_image, outfile, meta, count) def save_raster(arr, outfile, meta, count=5): diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 633962c..0d65300 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -1,6 +1,5 @@ import os import numpy as np -import matplotlib.pyplot as plt import keras.backend as K import tensorflow as tf import pdb @@ -8,8 +7,10 @@ from tensorflow.keras.models import load_model from glob import glob from rasterio.errors import RasterioIOError +from matplotlib.pyplot import imshow, show, subplots +from multiprocessing import Pool -from data_utils import save_raster, stack_rasters, paths_map, load_raster, clip_raster +from data_utils import save_raster, stack_rasters, paths_map_multiple_scenes, load_raster, clip_raster from fully_conv import weighted_loss, weighted_focal_loss from data_generators import concatenate_fmasks @@ -24,7 +25,7 @@ def fmask_evaluated_image(evaluated_image, path, row, year, landsat_directory): meta.update(count=1) masked_image = concatenate_fmasks(image_subdirectory, temp_mask, meta, nodata=1) for i in range(image.shape[0]): - image[i, :, :][masked_image[0]==1] = np.nan + image[i, :, :][masked_image.mask[0]] = np.nan meta.update(count=image.shape[0]) meta.update(nodata=np.nan) return image, meta @@ -37,15 +38,16 @@ def evaluate_image_many_shot(path, row, year, image_directory, model, num_classe if not os.path.isdir(image_path): print('Images not downloaded for {}'.format(image_path)) return - paths_mapping = paths_map(image_path) + paths_mapping = paths_map_multiple_scenes(image_path) try: - _, meta = load_raster(paths_mapping['B1.TIF'][0]) - master = stack_rasters(paths_mapping, path, row, year) + template, meta = load_raster(paths_mapping['B1.TIF'][0]) + image_stack = stack_rasters(paths_mapping, meta, template.shape) except Exception as e: print(e) return class_mask = np.ones((master.shape[1], master.shape[2], num_classes)) # Just a placeholder out = np.zeros((master.shape[2], master.shape[1], num_classes)) + image_stack = np.swapaxes(image_stack, 0, 2) chunk_size = 608 diff = 608 stride = 608 @@ -53,65 +55,31 @@ def evaluate_image_many_shot(path, row, year, image_directory, model, num_classe for k in range(0, n_overlaps*overlap_step, overlap_step): for i in range(k, master.shape[1]-diff, stride): for j in range(k, master.shape[2]-diff, stride): - sub_master = master[:, i:i+chunk_size, j:j+chunk_size] + sub_master = master[i:i+chunk_size, j:j+chunk_size, :] sub_mask = class_mask[i:i+chunk_size, j:j+chunk_size, :] - sub_master = np.swapaxes(sub_master, 0, 2) - sub_master = np.swapaxes(sub_master, 0, 1) - sub_master = np.expand_dims(sub_master, 0) - sub_mask = np.expand_dims(sub_mask, 0) preds = model.predict([sub_master, sub_mask]) preds = np.exp(preds) soft = preds / np.sum(preds, axis=-1, keepdims=True) - soft = np.swapaxes(soft, 1, 2) out[j:j+chunk_size, i:i+chunk_size, :] += soft[0] stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k // overlap_step + 1, n_overlaps, i / master.shape[1])) out = np.swapaxes(out, 0, 2) out = out.astype(np.float32) + temp_mask = np.zeros((1, out.shape[1], out.shape[2])) + masked_image = concatenate_fmasks(image_path, temp_mask, meta, nodata=1) + for i in range(out.shape[0]): + out[i, :, :][masked_image.mask[0]] = np.nan meta.update(dtype=np.float32) out /= n_overlaps if outfile: save_raster(out, outfile, meta, count=num_classes) return out -if __name__ == '__main__': - mt_path = [42, 41, 40, 39, 38, 37, 36, 35, 42, 41, 40, 39, 38, 37, 36, 35, 41, 40, 39, 38, 37, - 36, 35, 34, 40, 39, 38, 37, 36, 35, 34] - mt_row = [26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, - 28, 28, 28, 29, 29, 29, 29, 29, 29, 29] - years = [2013, 2014, 2015, 2016, 2017, 2018, 2019] - landsat_directory = '/home/thomas/share/image_data/train/' - save_directory = '/home/thomas/share/fmask_evaluated_mt/' - for f in glob("/home/thomas/share/evaluated_mt/" + "*.tif"): - _, path, row, year = os.path.basename(f).split('_') - year = year[:-4] - outfile = os.path.join(save_directory, os.path.basename(f)) - if not os.path.isfile(outfile): - print(path, row, year) - try: - out, meta = fmask_evaluated_image(f, path, row, year, landsat_directory) - save_raster(out, outfile, meta) - clip_raster(outfile, int(path), int(row)) - except RasterioIOError as e: - print(e) +if __name__ == '__main__': - # n_classes = 5 - # model_name = 'augmentation_irr_and_wetlands_no_class_weights.h5' - # image_directory = '/home/thomas/share/image_data/train/' - # save_directory = '/home/thomas/share/evaluated_mt/' - # model = load_model("models/" + model_name, custom_objects={'tf':tf, '_epsilon':_epsilon, - # 'weighted_loss':weighted_loss}) - # for year in years: - # for path, row in zip(mt_path, mt_row): - # print("Evaluating", path, row, year) - # suffix = 'irr_{}_{}_{}.tif'.format(path, row, year) - # outfile = os.path.join(save_directory, suffix) - # if not os.path.isfile(outfile): - # try: - # evaluate_image_many_shot(path, row, year, image_directory, - # model, outfile=outfile, num_classes=n_classes) - # except Exception as e: - # print(e) - # continue - # else: - # print("Image {} already exists.".format(outfile)) + path = 39 + row = 27 + year = 2013 + image_directory = "/home/thomas/share/image_data/train/" + model = 'models/train_test_montana/2019-08-01-77pacc.h5' + evaluate_image_many_shot(path, row, year, image_directory, model, num_classes=6, n_overlaps=1, outfile='testing_trained_model.tif') diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 89cbaca..26761b9 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -50,57 +50,70 @@ def acc(y_true, y_pred): def lr_schedule(epoch): - lr = 1e-4 - if epoch > 150: + lr = 1e-2 + if epoch > 15: lr /= 256 - elif epoch > 100: + elif epoch > 13: lr /= 128 - elif epoch > 50: + elif epoch > 11: lr /= 64 - elif epoch > 30: + elif epoch > 9: lr /= 32. - elif epoch > 25: + elif epoch > 7: lr /= 16. - elif epoch > 20: + elif epoch > 5: lr /= 8. - elif epoch > 15: + elif epoch > 3: lr /= 4. - elif epoch > 10: + elif epoch > 1: lr /= 2. print('Learning rate: ', lr) return lr +class Model(object): + + def __init__(self, model, weights, augmentation_dict, n_classes): + self.dict = {} + self.model = model + self.dict['weights'] = weights + self.dict['augmentation_dict'] = augmentation_dict + self.dict['n_classes'] = n_classes + + if __name__ == '__main__': - n_classes = 5 + n_classes = 6 input_shape = (None, None, 51) weight_shape = (None, None, n_classes) - filepath = './models/whoknows.h5' + filepath = './models/template_to_fill_in/model.h5' + tb_path = './models/template_to_fill_in/graphs/' + if not os.path.isdir(tb_path): + os.makedirs(tb_path) # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True) - tensorboard = TensorBoard(log_dir='graphs/{}'.format(time.time())) + tensorboard = TensorBoard(log_dir=tb_path) lr_scheduler = LearningRateScheduler(lr_schedule) model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=5) opt = tf.keras.optimizers.Adam() model.compile(opt, loss=weighted_loss, metrics=[acc]) - #model.summary() #line_length argument + # model.summary() #line_length argument # irrigated, uncultivated, unirrigated, wetlands, border - class_weights = {0:1.0, 1:1.0, 2:1.0, 3:1.0, 4:1.0} - classes_to_augment = {0:True, 1:False, 2:False, 3:True, 4:True} + class_weights = {0:1, 1:1.0, 2:1.0, 3:1, 4:1.0, 5:1} + classes_to_augment = {0:True, 1:False, 2:False, 3:True, 4:True, 5:False} batch_size = 3 generator = SatDataSequence('/home/thomas/share/training_data/train/', batch_size=batch_size, class_weights=class_weights, classes_to_augment=classes_to_augment) valid_generator = SatDataSequence('/home/thomas/share/training_data/test/', batch_size=batch_size, class_weights=class_weights) model.fit_generator(generator, - epochs=2, + epochs=20, callbacks=[lr_scheduler, checkpoint, tensorboard], - use_multiprocessing=True, + use_multiprocessing=False, validation_data=valid_generator, - workers=12, + workers=1, max_queue_size=20, verbose=1) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 9bb142b..66d0158 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -4,12 +4,7 @@ import pdb from glob import glob from pprint import pprint -from numpy import save as nsave -from fiona import open as fopen -from rasterio.errors import RasterioIOError from collections import defaultdict, OrderedDict -from random import choice -from shapely.geometry import shape from multiprocessing import Pool from shapefile_utils import filter_shapefile_overlapping @@ -55,9 +50,12 @@ def assign_shapefile_year(shapefile): image_directory = '/home/thomas/share/image_data/train/' training_data_directory = '/home/thomas/share/training_data/train/' test_data_directory = '/home/thomas/share/training_data/test/' - fs = [f for f in glob(in_train_shapefile_directory + "*.shp")] - tf = [split_out_train_shapefile_directory] * len(fs) - + # fs = [f for f in glob(in_test_shapefile_directory + "*.shp")] + # tf = [split_out_test_shapefile_directory] * len(fs) + # with Pool() as pool: + # pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) + # fs = [f for f in glob(in_train_shapefile_directory + "*.shp")] + # tf = [split_out_train_shapefile_directory] * len(fs) # with Pool() as pool: # pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) extract_training_data_v2(split_out_train_shapefile_directory, image_directory, From e732d5cb832b0b41febf8d6979960547a3fdeda6 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Mon, 5 Aug 2019 08:54:44 -0600 Subject: [PATCH 72/89] commit before installation of internal SSD --- fully-conv-classification/evaluate_image.py | 36 ++++++++++++--------- fully-conv-classification/fully_conv.py | 8 +++-- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 0d65300..1e9d406 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -31,10 +31,12 @@ def fmask_evaluated_image(evaluated_image, path, row, year, landsat_directory): return image, meta -def evaluate_image_many_shot(path, row, year, image_directory, model, num_classes=4, n_overlaps=4, outfile=None, ii=None): +def evaluate_image_many_shot(path, row, year, image_directory, model_path, num_classes=4, n_overlaps=4, outfile=None, ii=None): ''' To recover from same padding, slide many different patches over the image. ''' - suffix = '{}_{}_{}'.format(path, row, year) + suffix = '{}_{}_{}'.format(path, row, year) image_path = os.path.join(image_directory, suffix) + model = load_model(model_path, custom_objects={'weighted_loss':weighted_loss, 'tf':tf, + '_epsilon':_epsilon}) if not os.path.isdir(image_path): print('Images not downloaded for {}'.format(image_path)) return @@ -45,23 +47,26 @@ def evaluate_image_many_shot(path, row, year, image_directory, model, num_classe except Exception as e: print(e) return - class_mask = np.ones((master.shape[1], master.shape[2], num_classes)) # Just a placeholder - out = np.zeros((master.shape[2], master.shape[1], num_classes)) + class_mask = np.ones((1, image_stack.shape[2], image_stack.shape[1], num_classes)) # Just a placeholder + out = np.zeros((image_stack.shape[2], image_stack.shape[1], num_classes)) image_stack = np.swapaxes(image_stack, 0, 2) + image_stack = np.expand_dims(image_stack, 0) + imshow(image_stack[0, :, :, 9]) + show() chunk_size = 608 diff = 608 stride = 608 overlap_step = 10 for k in range(0, n_overlaps*overlap_step, overlap_step): - for i in range(k, master.shape[1]-diff, stride): - for j in range(k, master.shape[2]-diff, stride): - sub_master = master[i:i+chunk_size, j:j+chunk_size, :] - sub_mask = class_mask[i:i+chunk_size, j:j+chunk_size, :] - preds = model.predict([sub_master, sub_mask]) + for i in range(k, image_stack.shape[1]-diff, stride): + for j in range(k, image_stack.shape[2]-diff, stride): + sub_image_stack = image_stack[:, i:i+chunk_size, j:j+chunk_size, :] + sub_mask = class_mask[:, i:i+chunk_size, j:j+chunk_size, :] + preds = model.predict([sub_image_stack, sub_mask]) preds = np.exp(preds) soft = preds / np.sum(preds, axis=-1, keepdims=True) - out[j:j+chunk_size, i:i+chunk_size, :] += soft[0] - stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k // overlap_step + 1, n_overlaps, i / master.shape[1])) + out[i:i+chunk_size, j:j+chunk_size, :] += soft[0] + stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k // overlap_step + 1, n_overlaps, i / image_stack.shape[1])) out = np.swapaxes(out, 0, 2) out = out.astype(np.float32) temp_mask = np.zeros((1, out.shape[1], out.shape[2])) @@ -77,9 +82,10 @@ def evaluate_image_many_shot(path, row, year, image_directory, model, num_classe if __name__ == '__main__': - path = 39 - row = 27 + path = 37 + row = 28 year = 2013 image_directory = "/home/thomas/share/image_data/train/" - model = 'models/train_test_montana/2019-08-01-77pacc.h5' - evaluate_image_many_shot(path, row, year, image_directory, model, num_classes=6, n_overlaps=1, outfile='testing_trained_model.tif') + model_path = '/home/thomas/IrrMapper/fully-conv-classification/models/2019-03-08_40pacc_all_unit_weights/model.h5' + evaluate_image_many_shot(path, row, year, image_directory, model_path, num_classes=6, + n_overlaps=1, outfile='bad_accuracy.tif') diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 26761b9..5921f1d 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -7,6 +7,8 @@ from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) from data_generators import SatDataSequence from models import unet_same_padding +config = tf.ConfigProto() +config.gpu_options.allow_growth def weighted_loss(target, output): @@ -102,7 +104,7 @@ def __init__(self, model, weights, augmentation_dict, n_classes): model.compile(opt, loss=weighted_loss, metrics=[acc]) # model.summary() #line_length argument # irrigated, uncultivated, unirrigated, wetlands, border - class_weights = {0:1, 1:1.0, 2:1.0, 3:1, 4:1.0, 5:1} + class_weights = {0:100, 1:1.0, 2:1.0, 3:100, 4:100.0, 5:1.0} classes_to_augment = {0:True, 1:False, 2:False, 3:True, 4:True, 5:False} batch_size = 3 generator = SatDataSequence('/home/thomas/share/training_data/train/', batch_size=batch_size, @@ -112,8 +114,8 @@ def __init__(self, model, weights, augmentation_dict, n_classes): model.fit_generator(generator, epochs=20, callbacks=[lr_scheduler, checkpoint, tensorboard], - use_multiprocessing=False, + use_multiprocessing=True, validation_data=valid_generator, - workers=1, + workers=12, max_queue_size=20, verbose=1) From 15742f8f2ac9ef7598122ba4afefc79e81b5933b Mon Sep 17 00:00:00 2001 From: thomas <40218556+tcolligan4@users.noreply.github.com> Date: Wed, 14 Aug 2019 10:23:07 -0400 Subject: [PATCH 73/89] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 1586322..6375cd3 100644 --- a/README.md +++ b/README.md @@ -19,4 +19,3 @@ Then get the latest gdal: Then the latest master branch of rasterio: ```pip install git+https://github.com/mapbox/rasterio.git``` -Don't install latest version of rasterio. Install rasterio version=1.0a12. From 7f1d01e274f29837280c2e0851122546321ebe4e Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 18 Aug 2019 11:01:43 -0600 Subject: [PATCH 74/89] Added multiple classes per tile with better results --- fully-conv-classification/argmax_rasters.py | 40 +- .../compose_array_single_shapefile.py | 413 ------------------ fully-conv-classification/data_generators.py | 315 ++----------- .../evaluate_accuracy.py | 95 ---- fully-conv-classification/evaluate_image.py | 29 +- fully-conv-classification/fully_conv.py | 47 +- fully-conv-classification/run_model_cli.py | 138 ++++++ .../runner_from_shapefile.py | 45 +- fully-conv-classification/shuffle_data.py | 57 --- 9 files changed, 259 insertions(+), 920 deletions(-) delete mode 100644 fully-conv-classification/compose_array_single_shapefile.py delete mode 100644 fully-conv-classification/evaluate_accuracy.py create mode 100644 fully-conv-classification/run_model_cli.py delete mode 100644 fully-conv-classification/shuffle_data.py diff --git a/fully-conv-classification/argmax_rasters.py b/fully-conv-classification/argmax_rasters.py index 0075cd7..b045a15 100755 --- a/fully-conv-classification/argmax_rasters.py +++ b/fully-conv-classification/argmax_rasters.py @@ -2,14 +2,11 @@ from rasterio import open as rasopen from rasterio import int32 from glob import glob -from os.path import basename, join -from sys import argv +from os.path import basename, join, dirname, splitext +import argparse +def compute_argmax(f, outfile): -im_path = 'compare_model_outputs/systematic/' -save_path = 'compare_model_outputs/argmax/' - -def get_argmax(f, outfile): with rasopen(f, 'r') as src: arr = src.read() meta = src.meta.copy() @@ -20,18 +17,27 @@ def get_argmax(f, outfile): meta.update(count=1, dtype=int32) with rasopen(outfile, 'w', **meta) as dst: dst.write(arg) - return None + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument("-f", + "--file", + help='geoTIFF to perform argmax on', + required=True) + parser.add_argument('-o', + '--outfile', + help='optional filename for outfile') + + args = parser.parse_args() + if not args.outfile: + outfile = basename(args.file) + outdir = dirname(args.file) + outfile = splitext(outfile)[0] + '_argmax.tif' + outfile = join(outdir, outfile) + compute_argmax(args.file, outfile) + -def main(f): - b = basename(f) - suff = b[:-14] - pref = b[-14:] - outfile = join(save_path, suff + 'argmax_' + pref) - print('Saving argmax raster to {}'.format(outfile)) - get_argmax(f, outfile) -if __name__ == '__main__': - in_f = argv[1] - main(in_f) diff --git a/fully-conv-classification/compose_array_single_shapefile.py b/fully-conv-classification/compose_array_single_shapefile.py deleted file mode 100644 index 81664a4..0000000 --- a/fully-conv-classification/compose_array_single_shapefile.py +++ /dev/null @@ -1,413 +0,0 @@ -# ============================================================================================= -# Copyright 2018 dgketchum -# -# Licensed under the Apache License, Version 2. (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================================= - -import os -import sys - -abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(abspath) -import pickle -from copy import deepcopy -from warnings import warn -from datetime import datetime -from fiona import open as fopen -from numpy import linspace, max, nan, unique, ndarray, swapaxes, zeros, asarray -import h5py -from numpy.random import shuffle -from pandas import DataFrame, Series -import warnings -from pyproj import Proj, transform -from rasterio import open as rasopen -from shapely.geometry import shape, Point, mapping -from shapely.ops import unary_union -from shapefile_utils import get_shapefile_path_row -loc = os.path.dirname(__file__) -WRS_2 = loc.replace('pixel_classification', - os.path.join('spatial_data', 'wrs2_usa_descending.shp')) - -''' -This script contains a class meant to gather data from rasters using a polygon shapefile. -The high-level method `extract_sample` will return an object ready for a -learning algorithm. -''' -loc = os.path.dirname(__file__) -WRS_2 = loc.replace('pixel_classification', - os.path.join('spatial_data', 'wrs2_descending.shp')) - -class NoCoordinateReferenceError(Exception): - pass - - -class UnexpectedCoordinateReferenceSystemError(Exception): - pass - - -class ShapefileSamplePoints: - - def __init__(self, shapefile_path=None, sample_point_directory=None, m_instances=None): - self.outfile = os.path.splitext(shapefile_path)[0] - self.outfile += "_sample_points.shp" - if sample_point_directory: - self.outfile = os.path.join(sample_point_directory, self.outfile) - - self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) - self.m_instances = m_instances - self.object_id = 0 - self.shapefile_path = shapefile_path - self.path, self.row = get_shapefile_path_row(shapefile_path) - - def _random_points(self, coords): - min_x, max_x = coords[0], coords[2] - min_y, max_y = coords[1], coords[3] - x_range = linspace(min_x, max_x, num=2 * self.m_instances) - y_range = linspace(min_y, max_y, num=2 * self.m_instances) - shuffle(x_range), shuffle(y_range) - return x_range, y_range - - def _add_entry(self, coord, val=0): - # TODO: Encode class_code in shapefile schema. - self.extracted_points = self.extracted_points.append({'FID': int(self.object_id), - 'X': coord[0], - 'Y': coord[1], - 'POINT_TYPE': val}, - ignore_index=True) - self.object_id += 1 - - def save_sample_points(self): - - points_schema = { - 'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]), - 'geometry': 'Point'} - meta = self.tile_geometry.copy() - meta['schema'] = points_schema - - with fopen(self.outfile, 'w', **meta) as output: - for index, row in self.extracted_points.iterrows(): - props = dict([('FID', row['FID']), ('POINT_TYPE', row['POINT_TYPE'])]) - pt = Point(row['X'], row['Y']) - output.write({'properties': props, - 'geometry': mapping(pt)}) - return None - - def _get_polygons(self, vector): - with fopen(vector, 'r') as src: - crs = src.crs - if not crs: - raise NoCoordinateReferenceError( - 'Provided shapefile has no reference data.') - if crs['init'] != 'epsg:4326': - raise UnexpectedCoordinateReferenceSystemError( - 'Provided shapefile should be in unprojected (geographic)' - 'coordinate system, i.e., WGS84, EPSG 4326, {} is not'.format( - vector)) - clipped = src.filter(mask=self.tile_bbox) - polys = [] - bad_geo_count = 0 - for feat in clipped: - try: - geo = shape(feat['geometry']) - polys.append(geo) - except AttributeError: - bad_geo_count += 1 - - return polys - - def create_sample_points(self, save_points=True): - """ Create a clipped training set from polygon shapefiles. - - This complicated-looking function finds the wrs_2 descending Landsat tile corresponding - to the path row provided, gets the bounding box and profile (aka meta) from - compose_array.get_tile_geometry, clips the training data to the landsat tile, then perform - s a union to reduce the number of polygon objects. - The dict object this uses has a template in pixel_classification.runspec.py. - Approach is to loop through the polygons, create a random grid of points over the - extent of each polygon, random shuffle order of points, loop over points, check if - point is within polygon, and if within, create a sample point. - - If a relatively simple geometry is available, use create_negative_sample_points(), though if - there are > 10**4 polygons, it will probably hang on unary_union(). """ - - polygons = self._get_polygons(self.shapefile_path) - instance_count = 0 - print("Making sample points. You have {} polygons".format(len(polygons))) - print("N_instances:", self.m_instances) - - if len(polygons) > self.m_instances: - areas = zip(polygons, [x.area for x in polygons]) - srt = sorted(areas, key=lambda x: x[1], reverse=True) - polygons = [x for x, y in srt[:self.m_instances]] - - if not isinstance(polygons, list): - polygons = [polygons] # for the case of a single polygon. - - positive_area = sum([x.area for x in polygons]) # the sum of all - # the areas. - class_count = 0 - - for i, poly in enumerate(polygons): - if class_count >= self.m_instances: - break - fractional_area = poly.area / positive_area # percent of - # total area that this polygon occupies - required_points = max([1, fractional_area * self.m_instances]) # how - # many points overall that are required to evenly - # sample from each polygon, based on area. - poly_pt_ct = 0 - x_range, y_range = self._random_points(poly.bounds) - for coord in zip(x_range, y_range): - if instance_count >= self.m_instances: - break - if Point(coord[0], coord[1]).within(poly): - self._add_entry(coord) - poly_pt_ct += 1 - instance_count += 1 - # print(instance_count) - if poly_pt_ct >= required_points: - break - class_count += poly_pt_ct - - if save_points: - self.save_sample_points() - - @property - def tile_bbox(self): - with fopen(WRS_2, 'r') as wrs: - for feature in wrs: - fp = feature['properties'] - if fp['PATH'] == self.path and fp['ROW'] == self.row: - bbox = feature['geometry'] - return bbox - - def _get_crs(self): - for key, val in self.paths_map.items(): - with rasopen(val, 'r') as src: - crs = src.crs - break - return crs - - @property - def tile_geometry(self): - with fopen(WRS_2, 'r') as wrs: - wrs_meta = wrs.meta.copy() - return wrs_meta - -class PTASingleShapefile: - # TODO: This class can be easily modified - # for the extraction of pixel-wise data from landsat images - # on a large scale. - def __init__(self, master_raster=None, shapefile_path=None, class_code=None, path=None, - row=None, masked_raster=None, training_directory=None, paths_map=None, masks=None, - instances=None, sz=1000, overwrite_points=None, kernel_size=None, data_filename=None): - self.shapefile_path = shapefile_path - self.path = path - self.object_id = 0 - self.data_filename = data_filename - self.paths_map = paths_map - self.masks = masks - self.row = row - self.training_directory = training_directory - self.overwrite_points=overwrite_points - self.class_code = class_code - self.crs = self._get_crs() - self.m_instances = instances - self.sz = sz - self.master_raster = master_raster - self.data = None - self.kernel_size = kernel_size - self.extracted_points = DataFrame(columns=['FID', 'X', 'Y', 'POINT_TYPE']) - - def extract_sample(self, save_points=True): - # TODO: Pare down this class' methods. - # Because of the large data size, pickling output data - # (and therefore using a one-band at a time extraction approach) - # is not feasible. - - out = os.path.splitext(self.shapefile_path)[0] - out += "_sample_points.shp" - if os.path.isfile(out): - print("sample points already created") - self._populate_array_from_points(out) - else: - print("Sample points not detected at {}".format(out)) - if self.master_raster is not None: - self.training_data_from_master_raster() - else: - self.populate_raster_data_array() - - def _populate_array_from_points(self, fname): - - with fopen(fname, 'r') as src: - for feat in src: - coords = feat['geometry']['coordinates'] - val = feat['properties']['POINT_TYPE'] - self._add_entry(coords, val=val) - - def _dump_data(self, data): - n = "class_{}_train.h5".format(self.class_code) - if self.data_filename is None: - to_save = os.path.join(self.training_directory, n) - else: - to_save = self.data_filename - with h5py.File(to_save, 'a') as f: - pref = os.path.basename(self.shapefile_path) - dset = f.create_dataset("{}_{}".format(pref, - str(datetime.now())), data=data) - - def training_data_from_master_raster(self): - - ofs = self.kernel_size // 2 - sz = self.sz # some heuristic that indicates when I run out of memory - tmp_arr = [] - with rasopen(self.master_raster, 'r') as rsrc: - rass_arr = rsrc.read() - affine = rsrc.transform - - for ind, row in self.extracted_points.iterrows(): - # iterate through extracted points. - if (ind+1) % sz == 0: - print("Writing to disk...") - qq = asarray(tmp_arr) - del tmp_arr - self._dump_data(qq) - del qq - tmp_arr = [] - - x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) - c, r = ~affine * (x, y) - try: - rr = int(r); cc = int(c) - raster_subgrid = rass_arr[:, rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] - tmp_arr.append(raster_subgrid) - - except IndexError as e: - print(e) - - if len(tmp_arr): - print("Writing to disk...") - qq = asarray(tmp_arr) - self._dump_data(qq) - del qq - del tmp_arr - - def populate_raster_data_array(self, save=True): - - for key, val in self.paths_map.items(): - s = self._grid_raster_extract(val, _name=key) - print('Extracting {}'.format(key)) - self.extracted_points = self.extracted_points.join(s, how='outer') - - for key, val in self.masks.items(): - s = self._grid_raster_extract(val, _name=key) - print('Extracting {}'.format(key)) - self.extracted_points = self.extracted_points.join(s, how='outer') - - data_array, targets = self._purge_raster_array() - data = {'df': data_array, - 'features': data_array.columns.values, - 'data': data_array.values, - 'target_values': targets, - 'paths_map': self.paths_map} - - print('feature dimensions: {}'.format(data_array.shape)) - - for key, val in data.items(): - setattr(self, key, val) - - def _purge_raster_array(self): - data_array = deepcopy(self.extracted_points) - target_vals = Series(data_array.POINT_TYPE.values, name='POINT_TYPE') - data_array.drop(['X', 'Y', 'FID', 'POINT_TYPE'], axis=1, inplace=True) - try: - for msk in self.masks.keys(): - for idx, sub_raster in enumerate(data_array[msk]): - if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 1.: - data_array.loc[idx, :] = nan # make whole row NaN - except TypeError as e: - print(sub_raster, msk, idx) - data_array.loc[idx, :] = nan - - try: - for bnd in self.paths_map.keys(): - for idx, sub_raster in enumerate(data_array[bnd]): - if sub_raster[self.kernel_size // 2][self.kernel_size // 2] == 0.: - data_array.loc[idx, :] = nan - except TypeError as e: - data_array.loc[idx, :] = nan - - data_array = data_array.join(target_vals, how='outer') - - data_array.dropna(axis=0, inplace=True) - data_array.drop(self.masks, axis=1, inplace=True) - target_vals = data_array.POINT_TYPE.values - - data_array = data_array.drop(['POINT_TYPE'], - axis=1, inplace=False) - return data_array, target_vals - - def _geo_point_to_projected_coords(self, x, y): - - in_crs = Proj(init='epsg:4326') - out_crs = Proj(init=self.crs['init']) - x, y = transform(in_crs, out_crs, x, y) - return x, y - - def _grid_raster_extract(self, raster, _name): - """ - Open the raster. Store the points in a Series - a labeled - numpy array. Then in _purge array, we iterate over the masks - and the paths_map and drop pixels where masks = 1 and pixels where bound = 0. - """ - - with rasopen(raster, 'r') as rsrc: - rass_arr = rsrc.read() - rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) - affine = rsrc.transform - - s = Series(index=range(0, self.extracted_points.shape[0]), name=_name, dtype=object) - for ind, row in self.extracted_points.iterrows(): - x, y = self._geo_point_to_projected_coords(row['X'], row['Y']) - c, r = ~affine * (x, y) - try: - ofs = self.kernel_size // 2 - rr = int(r); cc = int(c) - raster_subgrid = rass_arr[rr-ofs:rr+ofs+1, cc-ofs:cc+ofs+1] # possible issues: edges of image - s[ind] = raster_subgrid - except IndexError: - s[ind] = None - - return s - - @property - def tile_bbox(self): - with fopen(WRS_2, 'r') as wrs: - for feature in wrs: - fp = feature['properties'] - if fp['PATH'] == self.path and fp['ROW'] == self.row: - bbox = feature['geometry'] - return bbox - - def _get_crs(self): - for key, val in self.paths_map.items(): - with rasopen(val, 'r') as src: - crs = src.crs - break - return crs - - @property - def tile_geometry(self): - with fopen(WRS_2, 'r') as wrs: - wrs_meta = wrs.meta.copy() - return wrs_meta diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 1726cd0..0ad687d 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -3,9 +3,9 @@ import os import time import pickle -from matplotlib.pyplot import imshow, show, subplots, colorbar import warnings import pdb +import matplotlib.pyplot as plt from glob import glob from random import sample, shuffle, choice @@ -24,242 +24,21 @@ from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features -def distance_map(mask): - mask = mask.copy().astype(bool) - mask = ~mask # make the non-masked areas masked - distances = distance_transform_edt(mask) # ask where the closest masked pixel is - return distances - - -class DataTile(object): - - def __init__(self, data, one_hot, weights, class_code): - self.dict = {} - self.dict['data'] = data - self.dict['one_hot'] = one_hot - self.dict['class_code'] = class_code - self.dict['weights'] = weights - # Need to split the data into separate classes to play with class balance. - - def to_pickle(self, training_directory): - if not os.path.isdir(training_directory): - os.mkdir(training_directory) - template = os.path.join(training_directory, - 'class_{}_data/'.format(self.dict['class_code'])) - if not os.path.isdir(template): - os.mkdir(template) - outfile = os.path.join(template, str(time.time()) + ".pkl") - if not os.path.isfile(outfile): - with open(outfile, 'wb') as f: - pickle.dump(self.dict, f, protocol=pickle.HIGHEST_PROTOCOL) - else: - print("What? Contact administrator.") - - -def _pickle_datatile(datatile, training_directory): - template = os.path.join(training_directory, - 'class_{}_data/'.format(datatile.dict['class_code'])) - if not os.path.isdir(template): - os.mkdir(template) - outfile = os.path.join(template, str(time.time()) + ".pkl") - if not os.path.isfile(outfile): - with open(outfile, 'wb') as f: - pickle.dump(datatile.dict, f, protocol=pickle.HIGHEST_PROTOCOL) - else: - pass - - -def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): - ''' - ``Fmasks'' are masks of clouds and water. We don't want clouds/water in - the training set, so this function gets all the fmasks for a landsat - scene (contained in image_directory), and merges them into one raster. - They may not be the same size, so warp_vrt is used to make them align. - ''' - paths = [] - for dirpath, dirnames, filenames in os.walk(image_directory): - for f in filenames: - for suffix in mask_rasters(): - if f.endswith(suffix): - paths.append(os.path.join(dirpath, f)) - for fmask_file in paths: - fmask, _ = load_raster(fmask_file) - # why is water being misregistered? - # clouds, water present where fmask == 1. - try: - class_mask = ma.masked_where(fmask == 1, class_mask) - except (ValueError, IndexError) as e: - fmask = warp_single_image(fmask_file, class_mask_geo) - class_mask = ma.masked_where(fmask == 1, class_mask) - - return class_mask - - -def extract_training_data_v2(split_shapefile_directory, image_directory, - training_data_directory, assign_shapefile_year, assign_shapefile_class_code, n_classes=5): - - split_shapefiles = [f for f in glob(os.path.join(split_shapefile_directory, "*.shp"))] - - done = set() - - total_time = 0 - - for counter, shapefile in enumerate(split_shapefiles): - begin_time = time.time() - if shapefile in done: - continue - _, path, row = os.path.splitext(shapefile)[0][-7:].split('_') - year = assign_shapefile_year(shapefile) - path_row_year = path + '_' + row + '_' + str(year) - print("Extracting data for", path_row_year) - shapefiles_over_same_path_row = all_matching_shapefiles(shapefile, - split_shapefile_directory, assign_shapefile_year) - done.update(shapefiles_over_same_path_row) - image_path = os.path.join(image_directory, path_row_year) - if not os.path.isdir(image_path): - download_from_pr(path, row, year, image_directory) - continue - image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) - try: - mask_file = image_path_map['B1.TIF'][0] - except IndexError: - os.rmdir(os.path.join(image_directory, path_row_year)) - download_from_pr(path, row, year, image_directory) - image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) - mask_file = image_path_map['B1.TIF'][0] - - mask, mask_meta = load_raster(mask_file) - mask = np.zeros_like(mask).astype(np.int) - fmask = concatenate_fmasks(os.path.join(image_directory, path_row_year), mask, - mask_meta) - if fmask.mask.all(): - print("All pixels covered by cloud for {}".format(path_row_year)) - continue - - first = True - class_labels = None - for f in shapefiles_over_same_path_row: - class_code = assign_shapefile_class_code(f) - print(f, class_code) - out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) - if first: - class_labels = out - class_labels[~class_labels.mask] = class_code - first = False - else: - class_labels[~out.mask] = class_code - try: - image_stack = stack_rasters_multiprocess(image_path_map, target_geo=mask_meta, target_shape=mask.shape) - except RasterioIOError as e: - print("Redownload images for", path_row_year) - # TODO: remove corrupted file and redownload images. - continue - - image_stack = np.swapaxes(image_stack, 0, 2) - class_labels = np.swapaxes(class_labels, 0, 2) - class_labels = np.squeeze(class_labels) - _iterate_over_image_stack_and_save_training_data(image_stack, class_labels, - training_data_directory, n_classes=n_classes) - end_time = time.time() - diff = end_time - begin_time - total_time += diff - print('single iteration time:', diff, 'avg.', total_time / (counter + 1)) - - -def _iterate_over_image_stack_and_save_training_data(image_stack, class_labels, - training_data_directory, n_classes): - out = [] - # ... could rewrite it in cython. - tile_size = 608 - for i in range(0, image_stack.shape[0]-tile_size, tile_size): - for j in range(0, image_stack.shape[1]-tile_size, tile_size): - class_label_tile = class_labels[i:i+tile_size, j:j+tile_size] - if not _check_dimensions_and_min_pixels(class_label_tile, tile_size): - continue - unique = np.unique(class_label_tile) - unique = unique[~unique.mask] - for class_code in unique: - sub_one_hot = _one_hot_from_labels(class_label_tile, class_code, n_classes) - weights = _weights_from_one_hot(sub_one_hot, n_classes) - sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] - dt = DataTile(sub_image_stack, sub_one_hot, weights, class_code) - out.append(dt) - # for d in out/ - # _pickle_datatile(d, training_data_directory) - if len(out): - with Pool() as pool: - td = [training_data_directory]*len(out) - pool.starmap(_pickle_datatile, zip(out, td)) - - -def _weights_from_one_hot(one_hot, n_classes): - weights = np.zeros_like(one_hot) - tmp = np.sum(one_hot, 2) - for i in range(n_classes): - weights[:, :, i] = tmp - return weights.astype(bool) - - -def _one_hot_from_shapefile(shapefile, mask_file, shapefile_class_code, n_classes): - class_labels, _ = mask_raster_to_shapefile(shapefile, mask_file, return_binary=False) - if class_labels.mask.all(): - return None - one_hot = _one_hot_from_labels(class_labels, shapefile_class_code, n_classes) - return one_hot - - -def _one_hot_from_labels(labels, class_code, n_classes): - one_hot = np.zeros((labels.shape[0], labels.shape[1], n_classes)) - one_hot[:, :, class_code][labels == class_code] = 1 - if class_code == 0: # apply border class to only irrigated pixels - border_labels = make_border_labels(one_hot[:, :, 0], border_width=1) - border_labels.astype(np.int) - one_hot[:, :, n_classes-1] = border_labels - return one_hot.astype(np.int) - - -def _check_dimensions_and_min_pixels(sub_one_hot, tile_size): - # 200 is the minimum amount of pixels required to save the data. - if sub_one_hot.shape[0] != tile_size or sub_one_hot.shape[1] != tile_size: - return False - xx = np.nonzero(sub_one_hot) - if len(xx[0]) == 0: - return False - return True - - -def all_matching_shapefiles(to_match, shapefile_directory, assign_shapefile_year): - out = [] - pr = get_shapefile_path_row(to_match) - year = assign_shapefile_year(to_match) - for f in glob(os.path.join(shapefile_directory, "*.shp")): - if get_shapefile_path_row(f) == pr and assign_shapefile_year(f) == year: - out.append(f) - return out - - -def make_border_labels(mask, border_width): - ''' Border width: Pixel width. ''' - dm = distance_map(mask) - dm[dm > border_width] = 0 - return dm - - class SatDataSequence(Sequence): def __init__(self, data_directory, batch_size, class_weights={}, - border_width=1, n_classes=5, classes_to_augment=None): + balance=True, single_class_per_tile=True, n_classes=6, classes_to_augment=None): self.data_directory = data_directory - self.n_classes = n_classes self.class_weights = class_weights + self.n_classes = n_classes + self.single_class_per_tile = single_class_per_tile self.batch_size = batch_size self._no_augment = classes_to_augment is None self.classes_to_augment = classes_to_augment - self.border_width = border_width + self.balance = balance self._get_files() self.n_files = len(self.file_list) self.idx = 0 - self.shuffled = sample(self.file_list, self.n_files) def _get_files(self): @@ -275,6 +54,8 @@ def _get_files(self): i += 1 self.lengths = [len(self.file_dict[k]) for k in self.file_dict] self._create_file_list() + shuffle(self.file_list) + def _create_file_list(self): @@ -283,12 +64,15 @@ def _create_file_list(self): for class_dir in self.file_dict: files = self.file_dict[class_dir] self.file_list.extend(files) - if len(files) != max_instances: - if len(files) < (max_instances - len(files)): - files *= (max_instances // len(files)) - shuffle(files) - additional_files = sample(files, max_instances - len(files)) - self.file_list.extend(additional_files) + if self.balance: + if len(files) < max_instances: + s = len(files) + if len(files) < (max_instances - len(files)): + files *= (max_instances // len(files)) + shuffle(files) + additional_files = sample(files, max_instances-s) + self.file_list.extend(additional_files) + shuffle(self.file_list) def __len__(self): @@ -297,12 +81,10 @@ def __len__(self): def on_epoch_end(self): self._create_file_list() - shuffle(self.file_list) - self.shuffled = self.file_list def __getitem__(self, idx): - batch = self.shuffled[idx * self.batch_size:(idx + 1)*self.batch_size] + batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] data_tiles = [self._from_pickle(x) for x in batch] processed = self._make_weights_labels_and_features(data_tiles, self.classes_to_augment) batch_x = processed[0] @@ -317,22 +99,31 @@ def _from_pickle(self, filename): def _make_weights_labels_and_features(self, data_tiles, classes_to_augment): - return self._preprocess_input_data(data_tiles, self.class_weights, - classes_to_augment=classes_to_augment) - - - def _preprocess_input_data(self, data_tiles, class_weights, classes_to_augment=None): + class_weights = self.class_weights features = [] one_hots = [] weight_list = [] for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) + one_hot[0, 0, :] = 0 weights = tile['weights'].astype(np.int) class_code = tile['class_code'] - weights[:][one_hot[:, :, class_code]] = class_weights[class_code] - if class_code == 0: - weights[:][one_hot[:, :, self.n_classes-1]] = class_weights[self.n_classes-1] + if not self.single_class_per_tile: + weights = np.zeros_like(one_hot) + for cc in range(self.n_classes): + for dd in range(self.n_classes): + weights[:, :, cc][one_hot[:, :, dd] == 1] = class_weights[dd] + + assert np.all(weights[:, :, 0] == weights[:, :, 1]) + + if self.single_class_per_tile: + for i in range(self.n_classes): + weights[:, :, i][one_hot[:, :, class_code] == 1] = class_weights[class_code] + + if class_code == 0 and self.single_class_per_tile: + for i in range(self.n_classes): + weights[:, :, i][one_hot[:, :, self.n_classes-1] == 1] = class_weights[self.n_classes-1] if not self._no_augment: if classes_to_augment[tile['class_code']]: data, one_hot, weights = _augment_data(data, one_hot, weights) @@ -342,44 +133,6 @@ def _preprocess_input_data(self, data_tiles, class_weights, classes_to_augment=N return [np.asarray(features), np.asarray(weight_list)], [np.asarray(one_hots)] -def _preprocess_input_data(data_tiles, class_weights, classes_to_augment=None, border_width=1): - features = [] - one_hots = [] - weightings = [] - border_class = len(class_weights) - 1 - n_classes = len(class_weights) - for tile in data_tiles: - tile_shape = tile['data'].shape - one_hot = np.zeros((tile_shape[1], tile_shape[2], n_classes)) - weights = np.zeros((tile_shape[1], tile_shape[2], n_classes)) - labels = tile['class_mask'][0] - one_hot[:, :, tile['class_code']] = labels - weights[:][labels == 1] = class_weights[tile['class_code']] - if tile['class_code'] == 0: - border_labels = make_border_labels(tile['class_mask'], - border_width=border_width) - one_hot[:, :, border_class] = border_labels - weights[:][border_labels[0] == 1] = class_weights[border_class] - - feature_tile = np.squeeze(tile['data']) - feature_tile = np.swapaxes(feature_tile, 0, 2) # This is necessary b/c tf expects columns_last (GeoTiffs are columns first). - feature_tile = np.swapaxes(feature_tile, 0, 1) - if classes_to_augment is not None: - if classes_to_augment[tile['class_code']]: - # fig, ax = plt.subplots(ncols=2, nrows=2) - # ax[0, 0].imshow(feature_tile[:, :, 18]) - # ax[0, 1].imshow(weights[:, :, tile['class_code']]) - feature_tile, one_hot, weights = _augment_data(feature_tile, one_hot, weights) - # ax[1, 0].imshow(feature_tile[:, :, 18]) - # ax[1, 1].imshow(weights[:, :, tile['class_code']]) - # plt.show() - - features.append(feature_tile) - one_hots.append(one_hot) - weightings.append(weights) - return [np.asarray(features), np.asarray(weightings)], [np.asarray(one_hots)] - - def _flip_lr(feature_tile, one_hot, weights): for i in range(feature_tile.shape[2]): feature_tile[:, :, i] = np.fliplr(feature_tile[:, :, i]) diff --git a/fully-conv-classification/evaluate_accuracy.py b/fully-conv-classification/evaluate_accuracy.py deleted file mode 100644 index d162ff8..0000000 --- a/fully-conv-classification/evaluate_accuracy.py +++ /dev/null @@ -1,95 +0,0 @@ -import os -import numpy as np -import matplotlib.pyplot as plt -from glob import glob -from pprint import pprint -from sys import argv -from data_utils import load_raster -from data_generators import assign_class_code, concatenate_fmasks -from sklearn.metrics import confusion_matrix, jaccard_similarity_score -from shapefile_utils import generate_class_mask, get_shapefile_path_row - - -def evaluate_accuracy(argmaxed_raster, shapefile_test_dir, master_raster_dir, target_dict, - show=False): - shp_dict = {} - # TODO: A weighted accuracy metric might be better. - pr = None - for f in glob(shapefile_test_dir + "*.shp"): - pr = get_shapefile_path_row(f) - cc = assign_class_code(target_dict, f) - shp_dict[cc] = f - - class_mask_template = os.path.join(master_raster_dir, "class_mask_{}_{}_2013.tif".format(pr[0], pr[1])) - first = True - out = None - nodata = -1 - for class_code in sorted(shp_dict.keys()): - mask, mask_meta = generate_class_mask(shp_dict[class_code], class_mask_template, - nodata) - if first: - out = np.ones((mask.shape[1], mask.shape[2], len(shp_dict)))*-1 - first = False - out[:, :, class_code][mask[0] != nodata] = 1 - - image_dir = '/home/thomas/share/image_data/test/{}_{}_2013'.format(pr[0], pr[1]) - mask = np.zeros_like(mask) - fmask = concatenate_fmasks(image_dir, mask, mask_meta) - for i in range(out.shape[2]): - out[:, :, i][fmask[0] != 0] = -1 - - bool_mask = np.not_equal(np.sum(out, axis=2), -4) - y_pred, _ = load_raster(argmaxed_raster) - if 'argmax' not in argmaxed_raster: - y_pred = np.argmax(y_pred, axis=0) - y_true = np.argmax(out, axis=2) - - for i in range(5): - y_pred_irr = y_pred[y_true == i] - print("Class {} acc: {}".format(i, np.sum(np.not_equal(y_pred_irr, i)) / y_pred_irr.size)) - - y_pred_masked = y_pred[bool_mask] - y_true_masked = y_true[bool_mask] - print("Confusion mat for {} (all classes):".format(argmaxed_raster)) - cmat = confusion_matrix(y_true_masked, y_pred_masked) - pprint(cmat) - final = np.mean(np.equal(y_pred_masked, y_true_masked)) - print("pixel wise acc {}".format(final)) - print("Class precision:") - print(np.diag(cmat) / np.sum(cmat, axis=0)) - print("Class recall:") - print(np.diag(cmat) / np.sum(cmat, axis=1)) - if show: - fig, ax = plt.subplots(ncols=3) - ax[0].imshow(y_pred[0]) - ax[1].imshow(y_true) - ax[2].imshow(bool_mask) - plt.suptitle('F: {} | acc: {}'.format(argmaxed_raster, final)) - plt.show() - return final - -if __name__ == '__main__': - - irr1 = 'Huntley' - irr2 = 'Sun_River' - fallow = 'Fallow' - forest = 'Forrest' - other = 'other' - target_dict = {irr2:0, irr1:0, fallow:1, forest:2, other:3} - shapefile_test_dir = 'shapefile_data/test/' - master_raster_dir = '/home/thomas/share/master_rasters/test/' - if len(argv) > 1: - argmaxed_raster = argv[1] - evaluate_accuracy(argmaxed_raster, shapefile_test_dir, master_raster_dir, target_dict) - else: - rsa = [f for f in glob('compare_model_outputs/during-the-day/' + '*.tif')] - accs = {} - for argmaxed_raster in rsa: - print("-------------------------") - print(argmaxed_raster) - acc = evaluate_accuracy(argmaxed_raster, shapefile_test_dir, master_raster_dir, target_dict) - accs[argmaxed_raster] = acc - - sort = sorted(acc.items(), key=lambda kv: kv[1]) - for key in sort: - print("Raster: {} | acc: {}".format(key, accs[key])) diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 1e9d406..59a7641 100644 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -1,4 +1,5 @@ import os +# os.environ['CUDA_VISIBLE_DEVICES'] = "-1" import numpy as np import keras.backend as K import tensorflow as tf @@ -12,7 +13,7 @@ from data_utils import save_raster, stack_rasters, paths_map_multiple_scenes, load_raster, clip_raster from fully_conv import weighted_loss, weighted_focal_loss -from data_generators import concatenate_fmasks +from extract_training_data import concatenate_fmasks _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) @@ -49,14 +50,13 @@ def evaluate_image_many_shot(path, row, year, image_directory, model_path, num_c return class_mask = np.ones((1, image_stack.shape[2], image_stack.shape[1], num_classes)) # Just a placeholder out = np.zeros((image_stack.shape[2], image_stack.shape[1], num_classes)) - image_stack = np.swapaxes(image_stack, 0, 2) - image_stack = np.expand_dims(image_stack, 0) - imshow(image_stack[0, :, :, 9]) - show() chunk_size = 608 diff = 608 stride = 608 overlap_step = 10 + image_stack = np.swapaxes(image_stack, 0, 2) + image_stack = np.expand_dims(image_stack, 0) + print(image_stack.shape) for k in range(0, n_overlaps*overlap_step, overlap_step): for i in range(k, image_stack.shape[1]-diff, stride): for j in range(k, image_stack.shape[2]-diff, stride): @@ -82,10 +82,17 @@ def evaluate_image_many_shot(path, row, year, image_directory, model_path, num_c if __name__ == '__main__': - path = 37 - row = 28 - year = 2013 + paths = [37, 39, 41] + rows = [28, 27, 27] + years = [2013, 2013, 2013] image_directory = "/home/thomas/share/image_data/train/" - model_path = '/home/thomas/IrrMapper/fully-conv-classification/models/2019-03-08_40pacc_all_unit_weights/model.h5' - evaluate_image_many_shot(path, row, year, image_directory, model_path, num_classes=6, - n_overlaps=1, outfile='bad_accuracy.tif') + model_dirs = [d for d in os.listdir('./models/') if 'template_to_fill_in' in d] + model_paths = ['/home/thomas/IrrMapper/fully-conv-classification/models/{}/model.h5'.format(model_dir) for model_dir in model_dirs] + outfile_path = '/home/thomas/IrrMapper/fully-conv-classification/models/{}/' + # outfile_path = outfile_path + "evaluated_{}_{}_{}.tif" + outfile_paths = [outfile_path.format(model_dir) + "evaluated_{}_{}_{}.tif" for model_dir in model_dirs] + for model_path, outfile_path in zip(model_paths, outfile_paths): + print(model_path, outfile_path) + for path, row, year in zip(paths, rows, years): + evaluate_image_many_shot(path, row, year, image_directory, model_path, num_classes=6, + n_overlaps=1, outfile=outfile_path.format(path, row, year)) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py index 5921f1d..51aeb56 100644 --- a/fully-conv-classification/fully_conv.py +++ b/fully-conv-classification/fully_conv.py @@ -6,6 +6,7 @@ import numpy as np from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) from data_generators import SatDataSequence +from functools import partial from models import unet_same_padding config = tf.ConfigProto() config.gpu_options.allow_growth @@ -51,8 +52,8 @@ def acc(y_true, y_pred): return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) -def lr_schedule(epoch): - lr = 1e-2 +def lr_schedule(epoch, initial_learning_rate): + lr = 0.0001 if epoch > 15: lr /= 256 elif epoch > 13: @@ -73,23 +74,14 @@ def lr_schedule(epoch): return lr -class Model(object): - - def __init__(self, model, weights, augmentation_dict, n_classes): - self.dict = {} - self.model = model - self.dict['weights'] = weights - self.dict['augmentation_dict'] = augmentation_dict - self.dict['n_classes'] = n_classes - - if __name__ == '__main__': n_classes = 6 input_shape = (None, None, 51) + initial_learning_rate = 1e-4 weight_shape = (None, None, n_classes) filepath = './models/template_to_fill_in/model.h5' - tb_path = './models/template_to_fill_in/graphs/' + tb_path = './models/template_to_fill_in/' + str(time.time()) if not os.path.isdir(tb_path): os.makedirs(tb_path) # Prepare callbacks for model saving and for learning rate adjustment. @@ -97,22 +89,33 @@ def __init__(self, model, weights, augmentation_dict, n_classes): monitor='val_acc', verbose=1, save_best_only=True) - tensorboard = TensorBoard(log_dir=tb_path) - lr_scheduler = LearningRateScheduler(lr_schedule) + tensorboard = TensorBoard(log_dir=tb_path, update_freq=30, profile_batch=0) + lr_schedule_func = partial(lr_schedule, initial_learning_rate=initial_learning_rate) + lr_scheduler = LearningRateScheduler(lr_schedule_func) model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=5) opt = tf.keras.optimizers.Adam() model.compile(opt, loss=weighted_loss, metrics=[acc]) # model.summary() #line_length argument # irrigated, uncultivated, unirrigated, wetlands, border - class_weights = {0:100, 1:1.0, 2:1.0, 3:100, 4:100.0, 5:1.0} - classes_to_augment = {0:True, 1:False, 2:False, 3:True, 4:True, 5:False} + class_weights = {0:1, 1:1.0, 2:1.0, 3:1, 4:1.0, 5:1} + classes_to_augment = {0:False, 1:False, 2:False, 3:False, 4:False, 7:False} batch_size = 3 - generator = SatDataSequence('/home/thomas/share/training_data/train/', batch_size=batch_size, - class_weights=class_weights, classes_to_augment=classes_to_augment) - valid_generator = SatDataSequence('/home/thomas/share/training_data/test/', - batch_size=batch_size, class_weights=class_weights) + balance = True + generator = SatDataSequence('/home/thomas/ssd/training_data/train_mc/', batch_size=batch_size, + class_weights=class_weights, single_class_per_tile=False, balance=balance, n_classes=n_classes, classes_to_augment=classes_to_augment) + valid_generator = SatDataSequence('/home/thomas/ssd/training_data/test_mc/', + batch_size=batch_size, balance=False, n_classes=n_classes, single_class_per_tile=False, + class_weights=class_weights) + # m,model.fit_generator(generator, + # m, epochs=20, + # m, callbacks=[lr_scheduler, checkpoint, tensorboard], + # m, use_multiprocessing=False, + # m, validation_data=valid_generator, + # m, workers=1, + # m, max_queue_size=20, + # m, verbose=1) model.fit_generator(generator, - epochs=20, + epochs=4, callbacks=[lr_scheduler, checkpoint, tensorboard], use_multiprocessing=True, validation_data=valid_generator, diff --git a/fully-conv-classification/run_model_cli.py b/fully-conv-classification/run_model_cli.py new file mode 100644 index 0000000..4f20e19 --- /dev/null +++ b/fully-conv-classification/run_model_cli.py @@ -0,0 +1,138 @@ +import argparse +import os +import datetime +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +import time +import tensorflow as tf +import tensorflow.keras.backend as K +import numpy as np +import random +from functools import partial +from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) +from tensorflow.keras.models import load_model +from data_generators import SatDataSequence +from models import unet_same_padding + + +_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) + + +def weighted_loss(target, output): + out = -tf.reduce_sum(target*output, len(output.get_shape())-1) + return out + + +def acc(y_true, y_pred): + y_pred_sum = tf.reduce_sum(y_pred, axis=-1) + mask = tf.not_equal(y_pred_sum, 0) + y_arg = tf.argmax(y_pred, axis=-1) + y_t_arg = tf.argmax(y_true, axis=-1) + y_arg_mask = tf.boolean_mask(y_arg, mask) + y_t_arg_mask = tf.boolean_mask(y_t_arg, mask) + return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) + + +def lr_schedule(epoch, initial_learning_rate): + lr = initial_learning_rate + if epoch > 9: + lr /= 32. + elif epoch > 7: + lr /= 16. + elif epoch > 5: + lr /= 8. + elif epoch > 3: + lr /= 4. + elif epoch > 1: + lr /= 3. + print('Learning rate: ', lr) + return lr + + +def _list_to_dict(ls): + dct = {} + for i, e in enumerate(ls): + dct[i] = e + return dct + + +def _save_model_info(root_directory, accuracy, loss, weights, augment, learning_rate, balance, + train_mc): + directory_name = os.path.join("./models", "{:.3f}acc".format(accuracy)) + if os.path.isdir(directory_name): + directory_name = os.path.join("./models", "{:.5f}acc".format(accuracy)) + filename = os.path.join(directory_name, "run_info_{:.3f}acc.txt".format(accuracy)) + os.rename(root_directory, directory_name) + print(filename) + with open(filename, 'w') as f: + print("acc: {:.3f}".format(accuracy), file=f) + print("loss: {}".format(loss), file=f) + print("weights: {}".format(weights), file=f) + print("augment scheme: {}".format(augment), file=f) + print("lr: {}".format(learning_rate), file=f) + print("balance: {}".format(balance), file=f) + print('train w multiple classes per tile: {}'.format(train_mc)) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(fromfile_prefix_chars='@') + parser.add_argument('-f', '--file') + parser.add_argument("-lr", "--learning_rate", type=float) + parser.add_argument("-b", "--balance", type=int) + parser.add_argument("-w", "--weights", nargs=1) + parser.add_argument("-a", "--augment", nargs=1) + args = parser.parse_args() + learning_rate_scheduler = partial(lr_schedule, initial_learning_rate=args.learning_rate) + print("--------------------------------------------------") + args.augment = [bool(int(x)) for x in args.augment[0].split(' ')] + args.weights = [float(x) for x in args.weights[0].split(' ')] + print("---------------HYPERPARAMETERS--------------------") + print(args.learning_rate, args.balance, args.augment, args.weights) + print("--------------------------------------------------") + n_classes = 6 + input_shape = (None, None, 51) + weight_shape = (None, None, n_classes) + unique_path = str(time.time()) + root_directory = './models/{}/'.format(unique_path) + model_path = './models/{}/model.h5'.format(unique_path) + tb_path = './models/{}/graphs/'.format(unique_path) + if not os.path.isdir(tb_path): + os.makedirs(tb_path) + # Prepare callbacks for model saving and for learning rate adjustment. + checkpoint = ModelCheckpoint(filepath=model_path, + monitor='val_acc', + verbose=1, + save_best_only=True) + tensorboard = TensorBoard(log_dir=tb_path, update_freq=30, profile_batch=0) + lr_scheduler = LearningRateScheduler(learning_rate_scheduler) + model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=5) + # model.summary() + opt = tf.keras.optimizers.Adam() + model.compile(opt, loss=weighted_loss, metrics=[acc]) + class_weights = _list_to_dict(args.weights) + classes_to_augment = _list_to_dict(args.augment) + batch_size = 3 + balance = args.balance + train_mc = True + generator = SatDataSequence('/home/thomas/ssd/training_data/train_mc/', batch_size=batch_size, + class_weights=class_weights, balance=balance, n_classes=n_classes, + classes_to_augment=classes_to_augment) + class_weights = {0:1, 1:1, 2:1, 3:1, 4:1, 5:1, 6:1} + valid_generator = SatDataSequence('/home/thomas/ssd/training_data/test_mc/', + batch_size=batch_size, balance=False, n_classes=n_classes, + single_class_per_tile=True, class_weights=class_weights) + model.fit_generator(generator, + epochs=7, + callbacks=[lr_scheduler, checkpoint, tensorboard], + use_multiprocessing=True, + validation_data=valid_generator, + workers=12, + shuffle=False, + verbose=1) + model = load_model(model_path, custom_objects={'weighted_loss':weighted_loss, 'tf':tf, + '_epsilon':_epsilon, 'acc':acc}) + accuracy = model.evaluate_generator(valid_generator) + loss = accuracy[0] + accuracy = accuracy[1] + _save_model_info(root_directory, accuracy, loss, class_weights, classes_to_augment, + args.learning_rate, args.balance, train_mc) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index 66d0158..ba643fc 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -9,8 +9,7 @@ from shapefile_utils import filter_shapefile_overlapping from data_utils import paths_mappings_single_scene, paths_map_multiple_scenes -from runspec import landsat_rasters, static_rasters, climate_rasters -from data_generators import extract_training_data_v2 +from extract_training_data import extract_training_data_multiple_classes_per_instance def assign_shapefile_class_code(shapefile): @@ -31,15 +30,24 @@ def assign_shapefile_year(shapefile): return 2013 +# fs = [f for f in glob(in_test_shapefile_directory + "*.shp")] +# tf = [split_out_test_shapefile_directory] * len(fs) +# with Pool() as pool: +# pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) +# fs = [f for f in glob(in_train_shapefile_directory + "*.shp")] +# tf = [split_out_train_shapefile_directory] * len(fs) +# with Pool() as pool: +# pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) if __name__ == "__main__": # out_shapefile_directory = 'shapefile_data' # shp = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/" # This project is becoming more complicated. # Needs a test / train organization - # 1. Filter shapefiles. Can I fix this? Yes. Online splitting. + # 1. Filter shapefiles. # 2. Download images over shapefiles # 3. Extract training data + # - with offline augmentation. # 4. Train network. # Need to download images. @@ -48,24 +56,13 @@ def assign_shapefile_year(shapefile): split_out_train_shapefile_directory = 'shapefile_data/train/' split_out_test_shapefile_directory = 'shapefile_data/test/' image_directory = '/home/thomas/share/image_data/train/' - training_data_directory = '/home/thomas/share/training_data/train/' - test_data_directory = '/home/thomas/share/training_data/test/' - # fs = [f for f in glob(in_test_shapefile_directory + "*.shp")] - # tf = [split_out_test_shapefile_directory] * len(fs) - # with Pool() as pool: - # pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) - # fs = [f for f in glob(in_train_shapefile_directory + "*.shp")] - # tf = [split_out_train_shapefile_directory] * len(fs) - # with Pool() as pool: - # pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) - extract_training_data_v2(split_out_train_shapefile_directory, image_directory, - training_data_directory, assign_shapefile_year, assign_shapefile_class_code, n_classes=6) - extract_training_data_v2(split_out_test_shapefile_directory, image_directory, - test_data_directory, assign_shapefile_year, assign_shapefile_class_code, n_classes=6) - # directories = os.listdir(image_directory) - # test = image_directory + directories[0] - # scene_dates_and_mappings = paths_mappings_single_scene(test) - # from pprint import pprint - # for s in scene_dates_and_mappings: - # print(s) - # pprint(scene_dates_and_mappings[s]) + training_data_directory = '/home/thomas/ssd/training_data/train_mc/' + test_data_directory = '/home/thomas/ssd/training_data/test_mc/' + offline_augmentation_dict = {0:200, 1:608, 2:608, 3:400, 4:200, 5:608} + extract_training_data_multiple_classes_per_instance(split_out_train_shapefile_directory, + image_directory, training_data_directory, assign_shapefile_year, + assign_shapefile_class_code, n_classes=6) + offline_augmentation_dict = {0:608, 1:608, 2:608, 3:608, 4:608, 5:608} + extract_training_data_multiple_classes_per_instance(split_out_test_shapefile_directory, + image_directory, test_data_directory, assign_shapefile_year, + assign_shapefile_class_code, n_classes=6) diff --git a/fully-conv-classification/shuffle_data.py b/fully-conv-classification/shuffle_data.py deleted file mode 100644 index ff15bcd..0000000 --- a/fully-conv-classification/shuffle_data.py +++ /dev/null @@ -1,57 +0,0 @@ -import h5py -from collections import defaultdict -import numpy as np - -def one_epoch(filenames, random_indices, class_code, chunk_size=500, n_classes=4): - ''' Filename is the name of the data file, - chunk_size the number of instances that can fit in memory. - ''' - if not isinstance(filenames, list): - filenames = [filenames] - for i in range(0, random_indices.shape[0], chunk_size): - ret = load_sample(filenames, random_indices[i:i+chunk_size]) - yield ret, make_one_hot(np.ones((ret.shape[0]))*class_code, n_classes) - -def make_one_hot(labels, n_classes): - ret = np.zeros((len(labels), n_classes)) - for i, e in enumerate(labels): - ret[i, int(e)] = 1 - return ret - -def load_sample(fnames, random_indices): - ''' Fnames: filenames of all files of class_code class - required_instances: number of instances of training data required ''' - random_indices.sort() - ls = [] - last = 0 - offset = 0 - for f in fnames: - with h5py.File(f, 'r') as hdf5: - for key in hdf5: - if hdf5[key].shape[0]: - last = offset - offset += hdf5[key].shape[0] - indices = random_indices[random_indices < offset] - indices = indices[indices >= last] - try: - ls.append(hdf5[key][indices-last, :, :, :]) - except UnboundLocalError as e: - pass - - flattened = [e for sublist in ls for e in sublist] - return np.asarray(flattened) - - -def get_total_instances(fnames): - total_instances = 0 - num_keys = 0 - for f in fnames: - with h5py.File(f, 'r') as hdf5: - for key in hdf5: - if hdf5[key].shape[0]: - total_instances += hdf5[key].shape[0] - num_keys += 1 - return total_instances, num_keys - -if __name__ == '__main__': - pass From 6c4c71c3e0321c0998bca3af66ac08256ad720ab Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sun, 18 Aug 2019 11:02:14 -0600 Subject: [PATCH 75/89] Split generators and extraction of data into two separate files --- .../extract_training_data.py | 360 ++++++++++++++++++ 1 file changed, 360 insertions(+) create mode 100644 fully-conv-classification/extract_training_data.py diff --git a/fully-conv-classification/extract_training_data.py b/fully-conv-classification/extract_training_data.py new file mode 100644 index 0000000..82eb7ee --- /dev/null +++ b/fully-conv-classification/extract_training_data.py @@ -0,0 +1,360 @@ +import numpy as np +import numpy.ma as ma +import os +import time +import pickle +import warnings +import pdb +import matplotlib.pyplot as plt + +from glob import glob +from random import sample, shuffle, choice +from scipy.ndimage.morphology import distance_transform_edt +from rasterio import open as rasopen +from rasterio.errors import RasterioIOError +from skimage import transform +from sat_image.warped_vrt import warp_single_image +from tensorflow.keras.utils import Sequence +from multiprocessing import Pool +from collections import defaultdict + +from runspec import mask_rasters +from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr +from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features + + +def distance_map(mask): + mask = mask.copy().astype(bool) + mask = ~mask # make the non-masked areas masked + distances = distance_transform_edt(mask) # ask where the closest masked pixel is + return distances + + +class DataTile(object): + + def __init__(self, data, one_hot, weights, class_code): + self.dict = {} + self.dict['data'] = data + self.dict['one_hot'] = one_hot + self.dict['class_code'] = class_code + self.dict['weights'] = weights + # Need to split the data into separate classes to play with class balance. + + def to_pickle(self, training_directory): + if not os.path.isdir(training_directory): + os.mkdir(training_directory) + template = os.path.join(training_directory, + 'class_{}_data/'.format(self.dict['class_code'])) + if not os.path.isdir(template): + os.mkdir(template) + outfile = os.path.join(template, str(time.time()) + ".pkl") + if not os.path.isfile(outfile): + with open(outfile, 'wb') as f: + pickle.dump(self.dict, f, protocol=pickle.HIGHEST_PROTOCOL) + else: + print("What? Contact administrator.") + + +def _pickle_datatile(datatile, training_directory): + template = os.path.join(training_directory, + 'class_{}_data/'.format(datatile.dict['class_code'])) + if not os.path.isdir(template): + os.mkdir(template) + outfile = os.path.join(template, str(time.time()) + ".pkl") + if not os.path.isfile(outfile): + with open(outfile, 'wb') as f: + pickle.dump(datatile.dict, f, protocol=pickle.HIGHEST_PROTOCOL) + else: + pass + + +def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): + ''' + ``Fmasks'' are masks of clouds and water. We don't want clouds/water in + the training set, so this function gets all the fmasks for a landsat + scene (contained in image_directory), and merges them into one raster. + They may not be the same size, so warp_vrt is used to make them align. + ''' + paths = [] + for dirpath, dirnames, filenames in os.walk(image_directory): + for f in filenames: + for suffix in mask_rasters(): + if f.endswith(suffix): + paths.append(os.path.join(dirpath, f)) + for fmask_file in paths: + fmask, _ = load_raster(fmask_file) + # clouds, water present where fmask == 1. + try: + class_mask = ma.masked_where(fmask == 1, class_mask) + except (ValueError, IndexError) as e: + fmask = warp_single_image(fmask_file, class_mask_geo) + class_mask = ma.masked_where(fmask == 1, class_mask) + + return class_mask + + +def extract_training_data_multiple_classes_per_instance(split_shapefile_directory, image_directory, + training_data_directory, assign_shapefile_year, assign_shapefile_class_code, n_classes=5): + + split_shapefiles = [f for f in glob(os.path.join(split_shapefile_directory, "*.shp"))] + + done = set() + + total_time = 0 + + for counter, shapefile in enumerate(split_shapefiles): + begin_time = time.time() + if shapefile in done: + continue + _, path, row = os.path.splitext(shapefile)[0][-7:].split('_') + year = assign_shapefile_year(shapefile) + path_row_year = path + '_' + row + '_' + str(year) + print("Extracting data for", path_row_year) + shapefiles_over_same_path_row = all_matching_shapefiles(shapefile, + split_shapefile_directory, assign_shapefile_year) + done.update(shapefiles_over_same_path_row) + image_path = os.path.join(image_directory, path_row_year) + if not os.path.isdir(image_path): + download_from_pr(path, row, year, image_directory) + continue + image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) + try: + mask_file = image_path_map['B1.TIF'][0] + except IndexError: + os.rmdir(os.path.join(image_directory, path_row_year)) + download_from_pr(path, row, year, image_directory) + image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) + mask_file = image_path_map['B1.TIF'][0] + + mask, mask_meta = load_raster(mask_file) + mask = np.zeros_like(mask).astype(np.int) + fmask = concatenate_fmasks(os.path.join(image_directory, path_row_year), mask, + mask_meta) + if fmask.mask.all(): + print("All pixels covered by cloud for {}".format(path_row_year)) + continue + + first = True + class_labels = None + for f in shapefiles_over_same_path_row: + class_code = assign_shapefile_class_code(f) + print(f, class_code) + out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) + if first: + class_labels = out + class_labels[~class_labels.mask] = class_code + first = False + else: + class_labels[~out.mask] = class_code + try: + image_stack = stack_rasters_multiprocess(image_path_map, target_geo=mask_meta, target_shape=mask.shape) + except RasterioIOError as e: + print("Redownload images for", path_row_year) + # TODO: remove corrupted file and redownload images. + continue + class_labels[fmask.mask] = ma.masked # well, I didn't fmask the data. + image_stack = np.swapaxes(image_stack, 0, 2) + class_labels = np.swapaxes(class_labels, 0, 2) + class_labels = np.squeeze(class_labels) + _save_training_data_multiple_classes(image_stack, class_labels, + training_data_directory, n_classes) + end_time = time.time() + diff = end_time - begin_time + total_time += diff + print('single iteration time:', diff, 'avg.', total_time / (counter + 1)) + + +def _save_training_data_multiple_classes(image_stack, class_labels, training_data_directory, n_classes): + tile_size = 608 + out = [] + class_code = 7 # dummy... + for i in range(0, image_stack.shape[0]-tile_size, tile_size): + for j in range(0, image_stack.shape[1]-tile_size, tile_size): + class_label_tile = class_labels[i:i+tile_size, j:j+tile_size] + if np.all(class_label_tile.mask == True): + continue + sub_one_hot = _one_hot_from_labels_mc(class_label_tile, n_classes) + weights = _weights_from_one_hot(sub_one_hot, n_classes) + sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] + dt = DataTile(sub_image_stack, sub_one_hot, weights, class_code) + out.append(dt) + if len(out) > 50: + with Pool() as pool: + td = [training_data_directory]*len(out) + pool.starmap(_pickle_datatile, zip(out, td)) + out = [] + if len(out): + with Pool() as pool: + td = [training_data_directory]*len(out) + pool.starmap(_pickle_datatile, zip(out, td)) + out = [] + + +def _one_hot_from_labels_mc(labels, n_classes): + one_hot = np.zeros((labels.shape[0], labels.shape[1], n_classes)) + for class_code in range(n_classes): + one_hot[:, :, class_code][labels == class_code] = 1 + if class_code == 0: # apply border class to only irrigated pixels + border_labels = make_border_labels(one_hot[:, :, 0], border_width=1) + border_labels.astype(np.int) + one_hot[:, :, n_classes-1] = border_labels + return one_hot.astype(np.int) + + +def extract_training_data_single_class_per_instance(split_shapefile_directory, image_directory, + training_data_directory, assign_shapefile_year, assign_shapefile_class_code, + offline_augmentation_dict=None, n_classes=5): + + split_shapefiles = [f for f in glob(os.path.join(split_shapefile_directory, "*.shp"))] + + done = set() + + total_time = 0 + + for counter, shapefile in enumerate(split_shapefiles): + begin_time = time.time() + if shapefile in done: + continue + _, path, row = os.path.splitext(shapefile)[0][-7:].split('_') + year = assign_shapefile_year(shapefile) + path_row_year = path + '_' + row + '_' + str(year) + print("Extracting data for", path_row_year) + shapefiles_over_same_path_row = all_matching_shapefiles(shapefile, + split_shapefile_directory, assign_shapefile_year) + done.update(shapefiles_over_same_path_row) + image_path = os.path.join(image_directory, path_row_year) + if not os.path.isdir(image_path): + download_from_pr(path, row, year, image_directory) + continue + image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) + try: + mask_file = image_path_map['B1.TIF'][0] + except IndexError: + os.rmdir(os.path.join(image_directory, path_row_year)) + download_from_pr(path, row, year, image_directory) + image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) + mask_file = image_path_map['B1.TIF'][0] + + mask, mask_meta = load_raster(mask_file) + mask = np.zeros_like(mask).astype(np.int) + fmask = concatenate_fmasks(os.path.join(image_directory, path_row_year), mask, + mask_meta) + if fmask.mask.all(): + print("All pixels covered by cloud for {}".format(path_row_year)) + continue + first = True + class_labels = None + for f in shapefiles_over_same_path_row: + class_code = assign_shapefile_class_code(f) + if offline_augmentation_dict[class_code] == 0: + continue + print(f, class_code) + out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) + if first: + class_labels = out + class_labels[~class_labels.mask] = class_code + first = False + else: + class_labels[~out.mask] = class_code + if class_labels is None: + print("no extra augmentation for", path_row_year) + continue + try: + image_stack = stack_rasters_multiprocess(image_path_map, target_geo=mask_meta, target_shape=mask.shape) + except RasterioIOError as e: + print("Redownload images for", path_row_year) + # TODO: remove corrupted file and redownload images. + continue + class_labels[fmask.mask] = ma.masked # well, I didn't fmask the data. + image_stack = np.swapaxes(image_stack, 0, 2) + class_labels = np.swapaxes(class_labels, 0, 2) + class_labels = np.squeeze(class_labels) + _save_training_data_offline_augmentation(image_stack, class_labels, + training_data_directory, n_classes, offline_augmentation_dict) + end_time = time.time() + diff = end_time - begin_time + total_time += diff + print('single iteration time:', diff, 'avg.', total_time / (counter + 1)) + + +def _save_training_data_offline_augmentation(image_stack, class_labels, + training_data_directory, n_classes, offline_augmentation_dict): + unique = np.unique(class_labels) + unique = unique[~unique.mask] + tile_size = 608 + for class_code in unique: + out = [] + augmentation_step = offline_augmentation_dict[class_code] + for i in range(0, image_stack.shape[0]-tile_size, augmentation_step): + for j in range(0, image_stack.shape[1]-tile_size, augmentation_step): + class_label_tile = class_labels[i:i+tile_size, j:j+tile_size] + if not _check_dimensions_and_min_pixels(class_label_tile, class_code, tile_size): + continue + sub_one_hot = _one_hot_from_labels(class_label_tile, class_code, n_classes) + weights = _weights_from_one_hot(sub_one_hot, n_classes) + sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] + dt = DataTile(sub_image_stack, sub_one_hot, weights, class_code) + out.append(dt) + if len(out) > 50: + with Pool() as pool: + td = [training_data_directory]*len(out) + pool.starmap(_pickle_datatile, zip(out, td)) + out = [] + if len(out): + with Pool() as pool: + td = [training_data_directory]*len(out) + pool.starmap(_pickle_datatile, zip(out, td)) + out = [] + + +def _weights_from_one_hot(one_hot, n_classes): + weights = np.zeros_like(one_hot) + tmp = np.sum(one_hot, 2) + for i in range(n_classes): + weights[:, :, i] = tmp + return weights.astype(bool) + + +def _one_hot_from_shapefile(shapefile, mask_file, shapefile_class_code, n_classes): + class_labels, _ = mask_raster_to_shapefile(shapefile, mask_file, return_binary=False) + if class_labels.mask.all(): + return None + one_hot = _one_hot_from_labels(class_labels, shapefile_class_code, n_classes) + return one_hot + + +def _one_hot_from_labels(labels, class_code, n_classes): + one_hot = np.zeros((labels.shape[0], labels.shape[1], n_classes)) + one_hot[:, :, class_code][labels == class_code] = 1 + if class_code == 0: # apply border class to only irrigated pixels + border_labels = make_border_labels(one_hot[:, :, 0], border_width=1) + border_labels.astype(np.int) + one_hot[:, :, n_classes-1] = border_labels + return one_hot.astype(np.int) + + +def _check_dimensions_and_min_pixels(sub_one_hot, class_code, tile_size): + # 200 is the minimum amount of pixels required to save the data. + if sub_one_hot.shape[0] != tile_size or sub_one_hot.shape[1] != tile_size: + return False + xx = np.where(sub_one_hot == class_code) + if len(xx[0]) == 0: + return False + return True + + +def all_matching_shapefiles(to_match, shapefile_directory, assign_shapefile_year): + out = [] + pr = get_shapefile_path_row(to_match) + year = assign_shapefile_year(to_match) + for f in glob(os.path.join(shapefile_directory, "*.shp")): + if get_shapefile_path_row(f) == pr and assign_shapefile_year(f) == year: + out.append(f) + return out + + +def make_border_labels(mask, border_width): + ''' Border width: Pixel width. ''' + dm = distance_map(mask) + dm[dm > border_width] = 0 + return dm From 9c08f3ee91b3f03e6139117509b2b9c782403d68 Mon Sep 17 00:00:00 2001 From: thomas <40218556+tcolligan4@users.noreply.github.com> Date: Mon, 26 Aug 2019 12:05:16 -0600 Subject: [PATCH 76/89] Update README.md --- README.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/README.md b/README.md index 6375cd3..e4f658e 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,44 @@ Then get the latest gdal: Then the latest master branch of rasterio: ```pip install git+https://github.com/mapbox/rasterio.git``` + +Install Metio: + +```pip install git+https://github.com/tcolligan4/Metio.git``` + +Install SatelliteImage: + +```pip install git+https://github.com/dgketchum/satellite_image.git``` + +# usage +# 1 +Edit the file ```runspec.py``` and fill out the two methods ```assign_shapefile_class_code``` and ```assign_shapefile_year```. These functions take as input a path to a shapefile and return integers corresponding to the class code of the vector data in the shapefile and the year the data was recorded, respectively. This means shapefiles should be split up according to class code and year before the next step. + +Also in ``runpsec.py``, select the bands you want by editing ``landsat_rasters()``, ``static_rasters()``, and ``climate_rasters()``. ``mask_rasters()`` specifies water and cloud masks. +# 2 +use split_shapefile.py to split the training data shapefiles into WRS2 descending path/rows. +```python split_shapefile.py --shapefile-dir /home/thomas/training_data/ --output-dir /home/thomas/split_training_data/``` +Default extension is .shp for the input shapefiles. +# 3 +run extract_training_data.py to extract training data. This relies on the methods ``assign_shapefile_class_code`` and ``assign_shapefile_year`` that reside in ``runspec.py``. It also downloads all image data to image-dir. Right now, it downloads all 11 Landsat bands for 3 scenes from may-october. I need to figure out how to change this. +```python extract_training_data.py --shapefile-dir /home/thomas/split_training_data --image-dir /home/thomas/landsat/ --training-dir /home/thomas/irrmapper/data/train/ --n-classes 5``` +Before running this, check ```_one_hot_from_labels_mc()``` in ```extract_training_data.py```. This applies a border class to shapefile data of class code 0 for reasons related to mapping irrigation. If this is not what you want, comment out the conditional in this function. + +# 4 +train a model with train_model.py. + +# TODO: +Make training a model easier (i.e. don't require a separate weights matrix and stop computing softmax within the network) +Make the images downloaded pull from runspec.py, not automatically download all possible bands +Implement IoU (multiclass dice loss). +add binary classification possibililty +add raster training data extraction +add paths_map_single_scene flag to extract_training_data + + + + + + + + From 1f299ee230e476c67e752192ce6b72c17a4c5a09 Mon Sep 17 00:00:00 2001 From: thomas <40218556+tcolligan4@users.noreply.github.com> Date: Tue, 27 Aug 2019 10:01:21 -0600 Subject: [PATCH 77/89] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e4f658e..b19858c 100644 --- a/README.md +++ b/README.md @@ -29,20 +29,20 @@ Install SatelliteImage: ```pip install git+https://github.com/dgketchum/satellite_image.git``` # usage -# 1 +## 1 Edit the file ```runspec.py``` and fill out the two methods ```assign_shapefile_class_code``` and ```assign_shapefile_year```. These functions take as input a path to a shapefile and return integers corresponding to the class code of the vector data in the shapefile and the year the data was recorded, respectively. This means shapefiles should be split up according to class code and year before the next step. Also in ``runpsec.py``, select the bands you want by editing ``landsat_rasters()``, ``static_rasters()``, and ``climate_rasters()``. ``mask_rasters()`` specifies water and cloud masks. -# 2 +## 2 use split_shapefile.py to split the training data shapefiles into WRS2 descending path/rows. ```python split_shapefile.py --shapefile-dir /home/thomas/training_data/ --output-dir /home/thomas/split_training_data/``` Default extension is .shp for the input shapefiles. -# 3 +## 3 run extract_training_data.py to extract training data. This relies on the methods ``assign_shapefile_class_code`` and ``assign_shapefile_year`` that reside in ``runspec.py``. It also downloads all image data to image-dir. Right now, it downloads all 11 Landsat bands for 3 scenes from may-october. I need to figure out how to change this. ```python extract_training_data.py --shapefile-dir /home/thomas/split_training_data --image-dir /home/thomas/landsat/ --training-dir /home/thomas/irrmapper/data/train/ --n-classes 5``` Before running this, check ```_one_hot_from_labels_mc()``` in ```extract_training_data.py```. This applies a border class to shapefile data of class code 0 for reasons related to mapping irrigation. If this is not what you want, comment out the conditional in this function. -# 4 +## 4 train a model with train_model.py. # TODO: From b1acb540dd950d9c8d909c0822e60d0199b8d60e Mon Sep 17 00:00:00 2001 From: thomas <40218556+tcolligan4@users.noreply.github.com> Date: Tue, 27 Aug 2019 10:02:45 -0600 Subject: [PATCH 78/89] Update README.md --- README.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b19858c..49b48d9 100644 --- a/README.md +++ b/README.md @@ -32,14 +32,18 @@ Install SatelliteImage: ## 1 Edit the file ```runspec.py``` and fill out the two methods ```assign_shapefile_class_code``` and ```assign_shapefile_year```. These functions take as input a path to a shapefile and return integers corresponding to the class code of the vector data in the shapefile and the year the data was recorded, respectively. This means shapefiles should be split up according to class code and year before the next step. -Also in ``runpsec.py``, select the bands you want by editing ``landsat_rasters()``, ``static_rasters()``, and ``climate_rasters()``. ``mask_rasters()`` specifies water and cloud masks. +Also in ``runspec.py``, select the bands you want by editing ``landsat_rasters()``, ``static_rasters()``, and ``climate_rasters()``. ``mask_rasters()`` specifies water and cloud masks. ## 2 -use split_shapefile.py to split the training data shapefiles into WRS2 descending path/rows. -```python split_shapefile.py --shapefile-dir /home/thomas/training_data/ --output-dir /home/thomas/split_training_data/``` +use split_shapefile.py to split the training data shapefiles into WRS2 descending path/rows. Ex: +``` +python split_shapefile.py --shapefile-dir /home/thomas/training_data/ --output-dir /home/thomas/split_training_data/ +``` Default extension is .shp for the input shapefiles. ## 3 -run extract_training_data.py to extract training data. This relies on the methods ``assign_shapefile_class_code`` and ``assign_shapefile_year`` that reside in ``runspec.py``. It also downloads all image data to image-dir. Right now, it downloads all 11 Landsat bands for 3 scenes from may-october. I need to figure out how to change this. -```python extract_training_data.py --shapefile-dir /home/thomas/split_training_data --image-dir /home/thomas/landsat/ --training-dir /home/thomas/irrmapper/data/train/ --n-classes 5``` +run extract_training_data.py to extract training data. This relies on the methods ``assign_shapefile_class_code`` and ``assign_shapefile_year`` that reside in ``runspec.py``. It also downloads all image data to image-dir. +``` +python extract_training_data.py --shapefile-dir /home/thomas/split_training_data --image-dir /home/thomas/landsat/ --training-dir /home/thomas/irrmapper/data/train/ --n-classes 5 +``` Before running this, check ```_one_hot_from_labels_mc()``` in ```extract_training_data.py```. This applies a border class to shapefile data of class code 0 for reasons related to mapping irrigation. If this is not what you want, comment out the conditional in this function. ## 4 From bf865880a283db1fb193e839116937455deaeb43 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 27 Aug 2019 12:37:22 -0600 Subject: [PATCH 79/89] Update README.md --- README.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 49b48d9..cbfffae 100644 --- a/README.md +++ b/README.md @@ -51,16 +51,9 @@ train a model with train_model.py. # TODO: Make training a model easier (i.e. don't require a separate weights matrix and stop computing softmax within the network) -Make the images downloaded pull from runspec.py, not automatically download all possible bands +Extract new data sets using new model Implement IoU (multiclass dice loss). add binary classification possibililty add raster training data extraction +Make the images downloaded pull from runspec.py, not automatically download all possible bands add paths_map_single_scene flag to extract_training_data - - - - - - - - From 13c92fe77d7ff36fc0d64329895fecb5aedb04a6 Mon Sep 17 00:00:00 2001 From: thomas <40218556+tcolligan4@users.noreply.github.com> Date: Tue, 27 Aug 2019 12:39:03 -0600 Subject: [PATCH 80/89] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index cbfffae..f734483 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,13 @@ train a model with train_model.py. # TODO: Make training a model easier (i.e. don't require a separate weights matrix and stop computing softmax within the network) Extract new data sets using new model + Implement IoU (multiclass dice loss). + add binary classification possibililty + add raster training data extraction + Make the images downloaded pull from runspec.py, not automatically download all possible bands + add paths_map_single_scene flag to extract_training_data From fc10aef12f8d7e533373fee24137eb4edb4ec552 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Tue, 27 Aug 2019 13:37:16 -0600 Subject: [PATCH 81/89] simplified input pipeline. Removed explicit weight matrix --- fully-conv-classification/argmax_rasters.py | 9 +- fully-conv-classification/data_generators.py | 32 ++- fully-conv-classification/data_utils.py | 38 +-- .../extract_training_data.py | 36 ++- fully-conv-classification/fully_conv.py | 124 --------- fully-conv-classification/models.py | 250 +----------------- fully-conv-classification/run_model_cli.py | 138 ---------- .../runner_from_shapefile.py | 40 +-- fully-conv-classification/runspec.py | 153 ++--------- fully-conv-classification/split_shapefiles.py | 20 ++ fully-conv-classification/train_model.py | 232 ++++++++++++++++ 11 files changed, 375 insertions(+), 697 deletions(-) delete mode 100644 fully-conv-classification/fully_conv.py delete mode 100644 fully-conv-classification/run_model_cli.py create mode 100644 fully-conv-classification/split_shapefiles.py create mode 100644 fully-conv-classification/train_model.py diff --git a/fully-conv-classification/argmax_rasters.py b/fully-conv-classification/argmax_rasters.py index b045a15..be33f1d 100755 --- a/fully-conv-classification/argmax_rasters.py +++ b/fully-conv-classification/argmax_rasters.py @@ -1,6 +1,6 @@ import numpy as np from rasterio import open as rasopen -from rasterio import int32 +from rasterio import int32, float32 from glob import glob from os.path import basename, join, dirname, splitext import argparse @@ -11,12 +11,15 @@ def compute_argmax(f, outfile): arr = src.read() meta = src.meta.copy() + irr = arr[0] + irr[irr < 0.3] = np.nan + irr = np.expand_dims(irr, 0) arg = np.argmax(arr, axis=0) arg = np.expand_dims(arg, axis=0) arg = arg.astype(np.int32) - meta.update(count=1, dtype=int32) + meta.update(count=1, dtype=float32) with rasopen(outfile, 'w', **meta) as dst: - dst.write(arg) + dst.write(irr) if __name__ == '__main__': diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 0ad687d..cf545a9 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -26,10 +26,15 @@ class SatDataSequence(Sequence): - def __init__(self, data_directory, batch_size, class_weights={}, - balance=True, single_class_per_tile=True, n_classes=6, classes_to_augment=None): + def __init__(self, data_directory, batch_size, class_weights=None, + balance=True, single_class_per_tile=True, n_classes=5, classes_to_augment=None): self.data_directory = data_directory self.class_weights = class_weights + if self.class_weights is None: + dct = {} + for i in range(n_classes): + dct[i] = 1 + self.class_weights = dct self.n_classes = n_classes self.single_class_per_tile = single_class_per_tile self.batch_size = batch_size @@ -86,7 +91,7 @@ def on_epoch_end(self): def __getitem__(self, idx): batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] data_tiles = [self._from_pickle(x) for x in batch] - processed = self._make_weights_labels_and_features(data_tiles, self.classes_to_augment) + processed = self._labels_and_features(data_tiles, self.classes_to_augment) batch_x = processed[0] batch_y = processed[1] return batch_x, batch_y @@ -97,6 +102,27 @@ def _from_pickle(self, filename): data = pickle.load(f) return data + def _apply_weights(self, one_hot): + for i in range(self.n_classes): + one_hot[:, :, i] *= self.class_weights[i] + + + def _labels_and_features(self, data_tiles, classes_to_augment): + features = [] + one_hots = [] + for tile in data_tiles: + data = tile['data'] + one_hot = tile['one_hot'].astype(np.int) + one_hot[0, 0, :] = 0 + self._apply_weights(one_hot) + class_code = tile['class_code'] + if not self._no_augment: + if classes_to_augment[tile['class_code']]: + data, one_hot, weights = _augment_data(data, one_hot, weights) + features.append(data) + one_hots.append(one_hot) + return [np.asarray(features)], [np.asarray(one_hots)] + def _make_weights_labels_and_features(self, data_tiles, classes_to_augment): class_weights = self.class_weights diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 2fc24cf..07f0df7 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -43,9 +43,9 @@ def download_images_over_shapefile(shapefile, image_directory, year): return ims -def download_from_pr(p, r, year, image_directory): +def download_from_pr(p, r, year, image_directory, landsat_bands, climate_bands): '''Downloads p/r corresponding to the location of - the shapefile, and creates master raster''' + the shapefile.''' # TODO: add rasterioIOError error checking # and resolution here. suff = str(p) + '_' + str(r) + "_" + str(year) @@ -55,16 +55,15 @@ def download_from_pr(p, r, year, image_directory): satellite = 7 if not os.path.isdir(landsat_dir): os.mkdir(landsat_dir) - ims = _download_images(landsat_dir, p, r, year, satellite) - else: - ims = _download_images(landsat_dir, p, r, year, satellite) + ims = _download_images(landsat_dir, p, r, year, satellite, landsat_bands, climate_bands) return ims -def _download_images(project_directory, path, row, year, satellite=8, n_landsat=3, - max_cloud_pct=40): +def _download_images(project_directory, path, row, year, satellite, landsat_bands, climate_bands, + n_landsat=3, max_cloud_pct=40): - image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, + image_stack = ImageStack(satellite=satellite, path=path, landsat_bands=landsat_bands, + climate_bands=climate_bands, row=row, root=project_directory, max_cloud_pct=max_cloud_pct, n_landsat=n_landsat, year=year) image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is @@ -312,14 +311,19 @@ def load_raster(raster_name): meta = src.meta.copy() return arr, meta +if __name__ == "__main__": + + from runspec import landsat_rasters, climate_rasters + download_from_pr(37, 28, 2013, '/home/thomas/landsat_test/', landsat_rasters(), + climate_rasters()) + + + + + + + + + -def save_model_info(outfile, args): - template = '{}={}|' - with open(outfile, 'a') as f: - for key in args: - f.write(template.format(key, args[key])) - f.write("\n-------------------\n") - print("wrote run info to {}".format(outfile)) -if __name__ == "__main__": - pass diff --git a/fully-conv-classification/extract_training_data.py b/fully-conv-classification/extract_training_data.py index 82eb7ee..0a4ca9b 100644 --- a/fully-conv-classification/extract_training_data.py +++ b/fully-conv-classification/extract_training_data.py @@ -4,6 +4,7 @@ import time import pickle import warnings +import argparse import pdb import matplotlib.pyplot as plt @@ -18,7 +19,7 @@ from multiprocessing import Pool from collections import defaultdict -from runspec import mask_rasters +from runspec import landsat_rasters, climate_rasters, mask_rasters, assign_shapefile_class_code, assign_shapefile_year from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features @@ -32,13 +33,11 @@ def distance_map(mask): class DataTile(object): - def __init__(self, data, one_hot, weights, class_code): + def __init__(self, data, one_hot, class_code): self.dict = {} self.dict['data'] = data self.dict['one_hot'] = one_hot self.dict['class_code'] = class_code - self.dict['weights'] = weights - # Need to split the data into separate classes to play with class balance. def to_pickle(self, training_directory): if not os.path.isdir(training_directory): @@ -115,8 +114,7 @@ def extract_training_data_multiple_classes_per_instance(split_shapefile_director done.update(shapefiles_over_same_path_row) image_path = os.path.join(image_directory, path_row_year) if not os.path.isdir(image_path): - download_from_pr(path, row, year, image_directory) - continue + download_from_pr(path, row, year, image_directory, landsat_rasters, climate_rasters) image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) try: mask_file = image_path_map['B1.TIF'][0] @@ -174,9 +172,8 @@ def _save_training_data_multiple_classes(image_stack, class_labels, training_dat if np.all(class_label_tile.mask == True): continue sub_one_hot = _one_hot_from_labels_mc(class_label_tile, n_classes) - weights = _weights_from_one_hot(sub_one_hot, n_classes) sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] - dt = DataTile(sub_image_stack, sub_one_hot, weights, class_code) + dt = DataTile(sub_image_stack, sub_one_hot, class_code) out.append(dt) if len(out) > 50: with Pool() as pool: @@ -224,14 +221,14 @@ def extract_training_data_single_class_per_instance(split_shapefile_directory, i done.update(shapefiles_over_same_path_row) image_path = os.path.join(image_directory, path_row_year) if not os.path.isdir(image_path): - download_from_pr(path, row, year, image_directory) - continue + download_from_pr(path, row, year, image_directory, landsat_rasters, climate_rasters) image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) try: + # todo : more robust way of getting a random band from the paths map mask_file = image_path_map['B1.TIF'][0] except IndexError: os.rmdir(os.path.join(image_directory, path_row_year)) - download_from_pr(path, row, year, image_directory) + download_from_pr(path, row, year, image_directory, landsat_rasters, climate_rasters) image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) mask_file = image_path_map['B1.TIF'][0] @@ -358,3 +355,20 @@ def make_border_labels(mask, border_width): dm = distance_map(mask) dm[dm > border_width] = 0 return dm + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('-s', '--shapefile-dir', help='shapefile directory containing the split shapefiles', type=str) + parser.add_argument('-i', '--image-dir', help='directory in which to find/save landsat images', type=str) + parser.add_argument('-t', '--training-dir', help='directory in which to save training data', type=str) + parser.add_argument('-n', '--n-classes', help='number of classes present', type=int) + + # todo : add single scene mapping + # more robust selection of random band + # how to download only selected images? + + args = parser.parse_args() + extract_training_data_multiple_classes_per_instance(args.shapefile_dir, args.image_dir, + args.training_dir, assign_shapefile_year, assign_shapefile_class_code, + n_classes=args.n_classes) diff --git a/fully-conv-classification/fully_conv.py b/fully-conv-classification/fully_conv.py deleted file mode 100644 index 51aeb56..0000000 --- a/fully-conv-classification/fully_conv.py +++ /dev/null @@ -1,124 +0,0 @@ -import os -#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' -import time -import keras.backend as K -import tensorflow as tf -import numpy as np -from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) -from data_generators import SatDataSequence -from functools import partial -from models import unet_same_padding -config = tf.ConfigProto() -config.gpu_options.allow_growth - - -def weighted_loss(target, output): - out = -tf.reduce_sum(target*output, len(output.get_shape())-1) - return out - - -def weighted_loss_ce_and_dl(target, output): - # Target: One hot encoding of segmentation mask. - # Output: Output of network. In this case, log(softmax). - soft = tf.nn.softmax(output) - numerator = tf.reduce_sum(soft*target, 1) - numerator = tf.reduce_sum(numerator, 2) - sum_ui_k = tf.reduce_sum(soft, 1) - sum_ui_k = tf.reduce_sum(sum_ui_k, 2) - sum_vi_k = tf.reduce_sum(target, 1) - sum_vi_k = tf.reduce_sum(sum_vi_k, 2) - - final = (-2/4)*tf.reduce_sum(numerator / (sum_ui_k + sum_vi_k), 1) - out = -tf.reduce_sum(target*output, len(output.get_shape())-1) - return final + out - - -def weighted_focal_loss(target, output, gamma=1): - # L = a0 *(1-pt)^gamma * ce - # Output of model is CE. - # Target is one-hot encoded. - soft = tf.nn.softmax(output, axis=-1) - pt = tf.pow(1-soft, gamma) # probability - return -tf.reduce_sum(target*output*pt, len(output.get_shape())-1) - - -def acc(y_true, y_pred): - y_pred_sum = tf.reduce_sum(y_pred, axis=-1) - mask = tf.not_equal(y_pred_sum, 0) - y_arg = tf.argmax(y_pred, axis=-1) - y_t_arg = tf.argmax(y_true, axis=-1) - y_arg_mask = tf.boolean_mask(y_arg, mask) - y_t_arg_mask = tf.boolean_mask(y_t_arg, mask) - return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) - - -def lr_schedule(epoch, initial_learning_rate): - lr = 0.0001 - if epoch > 15: - lr /= 256 - elif epoch > 13: - lr /= 128 - elif epoch > 11: - lr /= 64 - elif epoch > 9: - lr /= 32. - elif epoch > 7: - lr /= 16. - elif epoch > 5: - lr /= 8. - elif epoch > 3: - lr /= 4. - elif epoch > 1: - lr /= 2. - print('Learning rate: ', lr) - return lr - - -if __name__ == '__main__': - - n_classes = 6 - input_shape = (None, None, 51) - initial_learning_rate = 1e-4 - weight_shape = (None, None, n_classes) - filepath = './models/template_to_fill_in/model.h5' - tb_path = './models/template_to_fill_in/' + str(time.time()) - if not os.path.isdir(tb_path): - os.makedirs(tb_path) - # Prepare callbacks for model saving and for learning rate adjustment. - checkpoint = ModelCheckpoint(filepath=filepath, - monitor='val_acc', - verbose=1, - save_best_only=True) - tensorboard = TensorBoard(log_dir=tb_path, update_freq=30, profile_batch=0) - lr_schedule_func = partial(lr_schedule, initial_learning_rate=initial_learning_rate) - lr_scheduler = LearningRateScheduler(lr_schedule_func) - model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=5) - opt = tf.keras.optimizers.Adam() - model.compile(opt, loss=weighted_loss, metrics=[acc]) - # model.summary() #line_length argument - # irrigated, uncultivated, unirrigated, wetlands, border - class_weights = {0:1, 1:1.0, 2:1.0, 3:1, 4:1.0, 5:1} - classes_to_augment = {0:False, 1:False, 2:False, 3:False, 4:False, 7:False} - batch_size = 3 - balance = True - generator = SatDataSequence('/home/thomas/ssd/training_data/train_mc/', batch_size=batch_size, - class_weights=class_weights, single_class_per_tile=False, balance=balance, n_classes=n_classes, classes_to_augment=classes_to_augment) - valid_generator = SatDataSequence('/home/thomas/ssd/training_data/test_mc/', - batch_size=batch_size, balance=False, n_classes=n_classes, single_class_per_tile=False, - class_weights=class_weights) - # m,model.fit_generator(generator, - # m, epochs=20, - # m, callbacks=[lr_scheduler, checkpoint, tensorboard], - # m, use_multiprocessing=False, - # m, validation_data=valid_generator, - # m, workers=1, - # m, max_queue_size=20, - # m, verbose=1) - model.fit_generator(generator, - epochs=4, - callbacks=[lr_scheduler, checkpoint, tensorboard], - use_multiprocessing=True, - validation_data=valid_generator, - workers=12, - max_queue_size=20, - verbose=1) diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 54e8f13..717b707 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -10,13 +10,6 @@ def gradient_wrt_inputs(model, data): - # s = '1553014193.4813933' - # f = 'training_data/multiclass/train/class_2_data/{}.pkl'.format(s) - # with open(f, 'rb') as f: - # data = pload(f) - # data = np.expand_dims(data['data'], axis=0) - # data = np.swapaxes(data, 1, 3) - # gradient_wrt_inputs(model, data) layer_output = model.output loss = -tf.reduce_mean(layer_output) grads = K.gradients(loss, model.input[0])[0] @@ -50,10 +43,9 @@ def ConvBNRelu(x, filters=64): _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) -def unet_same_padding(input_shape, weight_shape, initial_exp=6, n_classes=5): +def unet(input_shape, initial_exp=6, n_classes=5): features = Input(shape=input_shape) - weights = Input(shape=weight_shape) _power = initial_exp exp = 2 @@ -106,239 +98,11 @@ def unet_same_padding(input_shape, weight_shape, initial_exp=6, n_classes=5): u4_c1 = Concatenate()([c12, c1]) c13 = ConvBlock(u4_c1, filters=exp**_power) - last_conv = Conv2D(filters=n_classes, kernel_size=1, padding='same', activation='softmax')(c13) - last = Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(last_conv) - last = Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(last) - last = Lambda(lambda x: K.log(x))(last) - weighted_xen = multiply([last, weights]) - return Model(inputs=[features, weights], outputs=[weighted_xen]) - - -def unet_valid_padding(input_shape, weighted_input_shape, n_classes, base_exp=5): - ''' - This model does not use any Conv2DTranspose layers. - Instead a Upsampling2D layer with a Conv layer after - with same padding. - ''' - inp1 = Input(input_shape) - weighted_input = Input(shape=weighted_input_shape) - base = 2 - exp = base_exp - - # 64 filters - c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(inp1) - c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c1) - c2 = BatchNormalization()(c2) - mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - - exp += 1 - # 128 filters - c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp1) - c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c3) - c4 = BatchNormalization()(c4) - mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) - - - exp += 1 - # 256 filters - c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp2) - c6 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c5) - c6 = BatchNormalization()(c6) - mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c6) - - exp += 1 - # 512 filters - c7 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp3) - c8 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c7) - c8 = BatchNormalization()(c8) - - mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c8) - - exp += 1 - # 1024 filters - c9 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp4) - c10 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c9) - c10 = BatchNormalization()(c10) - - exp -= 1 - # 512 filters, making 1024 when concatenated with - # the corresponding layer from the contracting path. - # u1 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - # activation='relu')(c10) - u1 = UpSampling2D(size=(2, 2))(c10) - u1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u1) - - c8_cropped = Cropping2D(cropping=4)(c8) - concat_u1_c8 = Concatenate()([u1, c8_cropped]) - - # 512 filters - c11 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u1_c8) - - exp -= 1 - # 256 filters, making 512 when concatenated with the - # corresponding layer from the contracting path. - c12 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c11) - - # u2 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - # activation='relu')(c12) - u2 = UpSampling2D(size=(2, 2))(c12) - u2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u2) - - c6_cropped = Cropping2D(cropping=16)(c6) - concat_u2_c6 = Concatenate()([u2, c6_cropped]) - - # 256 filters - c13 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u2_c6) - bn1 = BatchNormalization()(c13) - - exp -= 1 - # 128 filters, making 256 when concatenated with the - # corresponding layer from the contracting path. - c14 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn1) - - # u3 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - # activation='relu')(c14) - u3 = UpSampling2D(size=(2, 2))(c14) - u3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u3) - - c4_cropped = Cropping2D(cropping=40)(c4) - concat_u3_c4 = Concatenate()([u3, c4_cropped]) - - # 128 filters - c15 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u3_c4) - bn2 = BatchNormalization()(c15) - - exp -= 1 - # 64 filters, making 128 when concatenated with the - # corresponding layer from the contracting path. - c16 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn2) - - #u4 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - # activation='relu')(c16) - u4 = UpSampling2D(size=(2, 2))(c16) - u4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='same')(u4) - - c2_cropped = Cropping2D(cropping=88)(c2) - concat_u4_c2 = Concatenate()([u4, c2_cropped]) - - c17 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u4_c2) - bn3 = BatchNormalization()(c17) - - c18 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(bn3) - c18 = BatchNormalization()(c18) - - last_conv = Conv2D(filters=n_classes, kernel_size=1, activation='softmax', padding='valid')(c18) - - last = Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(last_conv) - last = Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(last) - last = Lambda(lambda x: K.log(x))(last) - weighted_sum = multiply([last, weighted_input]) - return Model(inputs=[inp1, weighted_input], outputs=[weighted_sum]) - - -def unet(n_classes, channel_depth=36): - x = Input((None, None, channel_depth)) - base = 2 - exp = 5 - - # 64 filters - c1 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(x) - c2 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c1) - mp1 = MaxPooling2D(pool_size=2, strides=(2, 2))(c2) - - exp += 1 - # 128 filters - c3 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp1) - c4 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c3) - mp2 = MaxPooling2D(pool_size=2, strides=(2, 2))(c4) - - - exp += 1 - # 256 filters - c5 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp2) - c6 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c5) - mp3 = MaxPooling2D(pool_size=2, strides=(2, 2))(c6) - - exp += 1 - # 512 filters - c7 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp3) - c8 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c7) - mp4 = MaxPooling2D(pool_size=2, strides=(2, 2))(c8) - - exp += 1 - # 1024 filters - c9 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(mp4) - c10 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c9) - - exp -= 1 - # 512 filters, making 1024 when concatenated with - # the corresponding layer from the contracting path. - u1 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - activation='relu')(c10) - - c8_cropped = Cropping2D(cropping=4)(c8) - concat_u1_c8 = Concatenate()([u1, c8_cropped]) - - # 512 filters - c11 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u1_c8) - - exp -= 1 - # 256 filters, making 512 when concatenated with the - # corresponding layer from the contracting path. - c12 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(c11) - - u2 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - activation='relu')(c12) - - c6_cropped = Cropping2D(cropping=16)(c6) - concat_u2_c6 = Concatenate()([u2, c6_cropped]) - - # 256 filters - c13 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u2_c6) - bn1 = BatchNormalization(axis=3)(c13) - - exp -= 1 - # 128 filters, making 256 when concatenated with the - # corresponding layer from the contracting path. - c14 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn1) - - u3 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - activation='relu')(c14) - - c4_cropped = Cropping2D(cropping=40)(c4) - concat_u3_c4 = Concatenate()([u3, c4_cropped]) - - # 128 filters - c15 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u3_c4) - bn2 = BatchNormalization(axis=3)(c15) - - exp -= 1 - # 64 filters, making 128 when concatenated with the - # corresponding layer from the contracting path. - c16 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', padding='valid')(bn2) - - u4 = Conv2DTranspose(filters=base**exp, strides=(2, 2), kernel_size=(2, 2), - activation='relu')(c16) - - c2_cropped = Cropping2D(cropping=88)(c2) - concat_u4_c2 = Concatenate()([u4, c2_cropped]) - - c17 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(concat_u4_c2) - bn3 = BatchNormalization(axis=3)(c17) - - c18 = Conv2D(filters=base**exp, kernel_size=(3,3), activation='relu', - padding='valid')(bn3) - - last = Conv2D(filters=n_classes, kernel_size=1, activation='linear', padding='valid')(c18) - return Model(inputs=x, outputs=last) + logits = Conv2D(filters=n_classes, kernel_size=1, strides=1, + activation=None, name='logits')(c13) + + return Model(inputs=[features], outputs=[logits]) +if __name__ == '__main__': + pass diff --git a/fully-conv-classification/run_model_cli.py b/fully-conv-classification/run_model_cli.py deleted file mode 100644 index 4f20e19..0000000 --- a/fully-conv-classification/run_model_cli.py +++ /dev/null @@ -1,138 +0,0 @@ -import argparse -import os -import datetime -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' -import time -import tensorflow as tf -import tensorflow.keras.backend as K -import numpy as np -import random -from functools import partial -from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) -from tensorflow.keras.models import load_model -from data_generators import SatDataSequence -from models import unet_same_padding - - -_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) - - -def weighted_loss(target, output): - out = -tf.reduce_sum(target*output, len(output.get_shape())-1) - return out - - -def acc(y_true, y_pred): - y_pred_sum = tf.reduce_sum(y_pred, axis=-1) - mask = tf.not_equal(y_pred_sum, 0) - y_arg = tf.argmax(y_pred, axis=-1) - y_t_arg = tf.argmax(y_true, axis=-1) - y_arg_mask = tf.boolean_mask(y_arg, mask) - y_t_arg_mask = tf.boolean_mask(y_t_arg, mask) - return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) - - -def lr_schedule(epoch, initial_learning_rate): - lr = initial_learning_rate - if epoch > 9: - lr /= 32. - elif epoch > 7: - lr /= 16. - elif epoch > 5: - lr /= 8. - elif epoch > 3: - lr /= 4. - elif epoch > 1: - lr /= 3. - print('Learning rate: ', lr) - return lr - - -def _list_to_dict(ls): - dct = {} - for i, e in enumerate(ls): - dct[i] = e - return dct - - -def _save_model_info(root_directory, accuracy, loss, weights, augment, learning_rate, balance, - train_mc): - directory_name = os.path.join("./models", "{:.3f}acc".format(accuracy)) - if os.path.isdir(directory_name): - directory_name = os.path.join("./models", "{:.5f}acc".format(accuracy)) - filename = os.path.join(directory_name, "run_info_{:.3f}acc.txt".format(accuracy)) - os.rename(root_directory, directory_name) - print(filename) - with open(filename, 'w') as f: - print("acc: {:.3f}".format(accuracy), file=f) - print("loss: {}".format(loss), file=f) - print("weights: {}".format(weights), file=f) - print("augment scheme: {}".format(augment), file=f) - print("lr: {}".format(learning_rate), file=f) - print("balance: {}".format(balance), file=f) - print('train w multiple classes per tile: {}'.format(train_mc)) - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(fromfile_prefix_chars='@') - parser.add_argument('-f', '--file') - parser.add_argument("-lr", "--learning_rate", type=float) - parser.add_argument("-b", "--balance", type=int) - parser.add_argument("-w", "--weights", nargs=1) - parser.add_argument("-a", "--augment", nargs=1) - args = parser.parse_args() - learning_rate_scheduler = partial(lr_schedule, initial_learning_rate=args.learning_rate) - print("--------------------------------------------------") - args.augment = [bool(int(x)) for x in args.augment[0].split(' ')] - args.weights = [float(x) for x in args.weights[0].split(' ')] - print("---------------HYPERPARAMETERS--------------------") - print(args.learning_rate, args.balance, args.augment, args.weights) - print("--------------------------------------------------") - n_classes = 6 - input_shape = (None, None, 51) - weight_shape = (None, None, n_classes) - unique_path = str(time.time()) - root_directory = './models/{}/'.format(unique_path) - model_path = './models/{}/model.h5'.format(unique_path) - tb_path = './models/{}/graphs/'.format(unique_path) - if not os.path.isdir(tb_path): - os.makedirs(tb_path) - # Prepare callbacks for model saving and for learning rate adjustment. - checkpoint = ModelCheckpoint(filepath=model_path, - monitor='val_acc', - verbose=1, - save_best_only=True) - tensorboard = TensorBoard(log_dir=tb_path, update_freq=30, profile_batch=0) - lr_scheduler = LearningRateScheduler(learning_rate_scheduler) - model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=5) - # model.summary() - opt = tf.keras.optimizers.Adam() - model.compile(opt, loss=weighted_loss, metrics=[acc]) - class_weights = _list_to_dict(args.weights) - classes_to_augment = _list_to_dict(args.augment) - batch_size = 3 - balance = args.balance - train_mc = True - generator = SatDataSequence('/home/thomas/ssd/training_data/train_mc/', batch_size=batch_size, - class_weights=class_weights, balance=balance, n_classes=n_classes, - classes_to_augment=classes_to_augment) - class_weights = {0:1, 1:1, 2:1, 3:1, 4:1, 5:1, 6:1} - valid_generator = SatDataSequence('/home/thomas/ssd/training_data/test_mc/', - batch_size=batch_size, balance=False, n_classes=n_classes, - single_class_per_tile=True, class_weights=class_weights) - model.fit_generator(generator, - epochs=7, - callbacks=[lr_scheduler, checkpoint, tensorboard], - use_multiprocessing=True, - validation_data=valid_generator, - workers=12, - shuffle=False, - verbose=1) - model = load_model(model_path, custom_objects={'weighted_loss':weighted_loss, 'tf':tf, - '_epsilon':_epsilon, 'acc':acc}) - accuracy = model.evaluate_generator(valid_generator) - loss = accuracy[0] - accuracy = accuracy[1] - _save_model_info(root_directory, accuracy, loss, class_weights, classes_to_augment, - args.learning_rate, args.balance, train_mc) diff --git a/fully-conv-classification/runner_from_shapefile.py b/fully-conv-classification/runner_from_shapefile.py index ba643fc..5c58d6c 100644 --- a/fully-conv-classification/runner_from_shapefile.py +++ b/fully-conv-classification/runner_from_shapefile.py @@ -10,35 +10,9 @@ from shapefile_utils import filter_shapefile_overlapping from data_utils import paths_mappings_single_scene, paths_map_multiple_scenes from extract_training_data import extract_training_data_multiple_classes_per_instance +from runspec import assign_shapefile_class_code, assign_shapefile_year -def assign_shapefile_class_code(shapefile): - if 'irrigated' in shapefile and 'unirrigated' not in shapefile: - return 0 - if 'unirrigated' in shapefile: - return 1 - if 'uncultivated' in shapefile: - return 2 - if 'wetlands' in shapefile: - return 3 - if 'fallow' in shapefile: - return 4 - - -def assign_shapefile_year(shapefile): - # get the shapefile name, not the whole path. - return 2013 - - -# fs = [f for f in glob(in_test_shapefile_directory + "*.shp")] -# tf = [split_out_test_shapefile_directory] * len(fs) -# with Pool() as pool: -# pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) -# fs = [f for f in glob(in_train_shapefile_directory + "*.shp")] -# tf = [split_out_train_shapefile_directory] * len(fs) -# with Pool() as pool: -# pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) - if __name__ == "__main__": # out_shapefile_directory = 'shapefile_data' # shp = "/home/thomas/IrrigationGIS/western_states_irrgis/MT/MT_Main/" @@ -56,13 +30,17 @@ def assign_shapefile_year(shapefile): split_out_train_shapefile_directory = 'shapefile_data/train/' split_out_test_shapefile_directory = 'shapefile_data/test/' image_directory = '/home/thomas/share/image_data/train/' - training_data_directory = '/home/thomas/ssd/training_data/train_mc/' - test_data_directory = '/home/thomas/ssd/training_data/test_mc/' + training_data_directory = '/home/thomas/ssd/training_data/four_class/train/' + test_data_directory = '/home/thomas/ssd/training_data/four_class/test/' + # assign_shapefile_class_code = assign_shapefile_class_code_binary + n_classes = 5 offline_augmentation_dict = {0:200, 1:608, 2:608, 3:400, 4:200, 5:608} + # todo: add multiple directories to this function. + # that would probably reduce runtime to half. extract_training_data_multiple_classes_per_instance(split_out_train_shapefile_directory, image_directory, training_data_directory, assign_shapefile_year, - assign_shapefile_class_code, n_classes=6) + assign_shapefile_class_code, n_classes=n_classes) offline_augmentation_dict = {0:608, 1:608, 2:608, 3:608, 4:608, 5:608} extract_training_data_multiple_classes_per_instance(split_out_test_shapefile_directory, image_directory, test_data_directory, assign_shapefile_year, - assign_shapefile_class_code, n_classes=6) + assign_shapefile_class_code, n_classes=n_classes) diff --git a/fully-conv-classification/runspec.py b/fully-conv-classification/runspec.py index 2fdd81d..317e8eb 100644 --- a/fully-conv-classification/runspec.py +++ b/fully-conv-classification/runspec.py @@ -21,7 +21,33 @@ sys.path.append(abspath) +def assign_shapefile_class_code(shapefile): + if 'irrigated' in shapefile and 'unirrigated' not in shapefile: + return 0 + if 'unirrigated' in shapefile: + return 1 + if 'uncultivated' in shapefile: + return 2 + if 'wetlands' in shapefile: + return 3 + if 'fallow' in shapefile: + return 4 + + +def assign_shapefile_class_code_binary(shapefile): + if 'irrigated' in shapefile and 'unirrigated' not in shapefile: + return 0 + return 1 + + +def assign_shapefile_year(shapefile): + # get the shapefile name, not the whole path. + return 2013 + + def landsat_rasters(): + # keys: satellite number + # values: bands that are available for that satellite. b = {1: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', 'B7.TIF'), 2: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', 'B7.TIF'), 3: ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF', 'B5.TIF', 'B6.TIF', 'B7.TIF'), @@ -56,132 +82,5 @@ def mask_rasters(): return m -class TrainingAssignments(object): - def __init__(self, root): - self.attribute_list = ['forest', 'fallow', 'irrigated', 'other'] - - self.root = root - self.shapes = None - - self.attributes = {0: {'ltype': 'irrigated', 'path': None}, - - 1: {'ltype': 'dryland', 'path': None}, - - 2: {'ltype': 'forest', 'path': None}, - - 3: {'ltype': 'other', 'path': None}} - - def add_paths(self): - for key, vector in enumerate(self.shapes): - self.attributes[key]['path'] = os.path.join(self.root, vector) - - -class Idaho(TrainingAssignments): - - def __init__(self, root): - TrainingAssignments.__init__(self, root) - - self.shapes = ['ID_2011_Irrigated_WGS84_4030.shp', 'non_irrigated_ESPA_2011_100_200_ac.shp', - 'ID_Public_forest_4030.shp', 'ID_Public_other_4030.shp'] - self.add_paths() - - self.path = 40 - self.row = 30 - self.year = 2011 - self.sat = 5 - - -class Montana(TrainingAssignments): - - def __init__(self, root): - TrainingAssignments.__init__(self, root) - - self.shapes = ['MT_Huntley_Main_2013_3728.shp', 'MT_FLU_2017_Fallow_3728.shp', - 'MT_FLU_2017_Forrest_3728.shp', 'MT_other_3728.shp'] - self.add_paths() - - self.path = 37 - self.row = 28 - self.year = 2013 - self.sat = 8 - - -class Nevada(TrainingAssignments): - - def __init__(self, root): - TrainingAssignments.__init__(self, root) - - self.shapes = ['2015_IRR_ACRE_WGS84.shp', 'NV_fallow.shp', - 'NV_forest.shp', 'NV_other.shp'] - self.add_paths() - - self.path = 41 - self.row = 32 - self.year = 2015 - self.sat = 8 - - -class Oregon(TrainingAssignments): - - def __init__(self, root): - TrainingAssignments.__init__(self, root) - - self.shapes = ['harney_irrigated_2016.shp', 'harney_fallow_2016.shp', - 'OR_forest.shp', 'OR_other.shp'] - self.add_paths() - - self.path = 43 - self.row = 30 - self.year = 2016 - self.sat = 8 - - -class Utah(TrainingAssignments): - - def __init__(self, root): - TrainingAssignments.__init__(self, root) - - self.shapes = ['UT_Irrigated_3732_2014.shp', 'UT_UnirrigatedAg_3732.shp', - 'UT_forest.shp', 'UT_other.shp'] - self.add_paths() - - self.path = 37 - self.row = 32 - self.year = 2014 - self.sat = 8 - - -class Washington(TrainingAssignments): - - def __init__(self, root): - TrainingAssignments.__init__(self, root) - - self.shapes = ['WA_2017_irrigated_4427.shp', 'WA_2017_unirrigated_ag_4427.shp', - 'WA_Forest_Practices_Applications_4427.shp', 'WA_other_4427.shp'] - self.add_paths() - - self.path = 44 - self.row = 27 - self.year = 2017 - self.sat = 8 - - -class MontanaTest(Montana): - def __init__(self): - Montana.__init__(self) - - for code, _dict in self.attributes.items(): - _dict['path'] = _dict['path'].replace(os.path.join('spatial_data', 'MT'), - os.path.join('tests', 'data', 'pixel_extract_test', - )) - _dict['path'] = _dict['path'].replace('.shp', '_clip.shp') - - self.unique_classes = len(self.attributes.keys()) - - self.sample_negative = False - - if __name__ == '__main__': pass - -# ========================= EOF ================================================================ diff --git a/fully-conv-classification/split_shapefiles.py b/fully-conv-classification/split_shapefiles.py new file mode 100644 index 0000000..c25d58f --- /dev/null +++ b/fully-conv-classification/split_shapefiles.py @@ -0,0 +1,20 @@ +import os +import argparse +from multiprocessing import Pool +from glob import glob +from shapefile_utils import filter_shapefile_overlapping + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('-s', '--shapefile-dir', + help='directory where unsplit shapefiles are stored', type=str) + parser.add_argument('-o', '--output-dir', help='where to store the split shapefiles', type=str) + parser.add_argument('-p', '--processes', help='n processes to spawn', default=12) + parser.add_argument('-e', '--extension', help='shapefile extension', default=".shp") + args = parser.parse_args() + fs = glob(os.path.join(args.shapefile_dir, "*" + args.extension)) + tf = [args.output_dir] * len(fs) + with Pool() as pool: + pool.starmap(filter_shapefile_overlapping, zip(fs, tf)) diff --git a/fully-conv-classification/train_model.py b/fully-conv-classification/train_model.py new file mode 100644 index 0000000..476d4f2 --- /dev/null +++ b/fully-conv-classification/train_model.py @@ -0,0 +1,232 @@ +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +os.environ['CUDA_VISIBLE_DEVICES'] = '-1' +import time +import keras.backend as K +import tensorflow as tf +import numpy as np +import argparse +from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) +from functools import partial +from sklearn.metrics import confusion_matrix +from tensorflow.keras.models import load_model + +from models import unet +from data_generators import SatDataSequence +config = tf.ConfigProto() +config.gpu_options.allow_growth + +_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) + +def _softmax(logits): + preds = np.exp(logits) + soft = preds / np.sum(preds, axis=-1, keepdims=True) + return soft + + +def _confusion_matrix(valid_generator, model): + first = True + for batch_x, y_true in valid_generator: + y_true = y_true[0][0] + mask = np.sum(y_true, axis=2).astype(bool) + y_pred = model.predict(batch_x) + y_pred = y_pred[0] + y_pred = _softmax(y_pred) + y_pred = np.argmax(y_pred, axis=2) + y_true = np.argmax(y_true, axis=2) + y_pred = y_pred[mask] + y_true = y_true[mask] + if first: + cmat = confusion_matrix(y_true, y_pred, + labels=[0, 1, 2, 3, 4, 5]) + first = False + else: + cmat += confusion_matrix(y_true, y_pred, + labels=[0, 1, 2, 3, 4, 5]) + print(cmat) + precision_dict = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0} + recall_dict = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0} + for i in range(6): + recall_dict[i] = cmat[i, i] / np.sum(cmat[i, :]) + precision_dict[i] = cmat[i, i] / np.sum(cmat[:, i]) + return cmat, recall_dict, precision_dict + + +def weighted_loss(target, output): + out = -tf.reduce_sum(target*output, len(output.get_shape())-1) + return out + + +def weighted_loss_ce_and_dl(target, output): + # Target: One hot encoding of segmentation mask. + # Output: Output of network. In this case, log(softmax). + soft = tf.nn.softmax(output) + numerator = tf.reduce_sum(soft*target, 1) + numerator = tf.reduce_sum(numerator, 2) + sum_ui_k = tf.reduce_sum(soft, 1) + sum_ui_k = tf.reduce_sum(sum_ui_k, 2) + sum_vi_k = tf.reduce_sum(target, 1) + sum_vi_k = tf.reduce_sum(sum_vi_k, 2) + + final = (-2/4)*tf.reduce_sum(numerator / (sum_ui_k + sum_vi_k), 1) + out = -tf.reduce_sum(target*output, len(output.get_shape())-1) + return final + out + + +def weighted_focal_loss(target, output, gamma=1): + # L = a0 *(1-pt)^gamma * ce + # Output of model is CE. + # Target is one-hot encoded. + soft = tf.nn.softmax(output, axis=-1) + pt = tf.pow(1-soft, 2) # probability + return -tf.reduce_sum(target*output*pt, len(output.get_shape())-1) + + +def acc(y_true, y_pred): + y_pred_sum = tf.reduce_sum(y_pred, axis=-1) + mask = tf.not_equal(y_pred_sum, 0) + y_arg = tf.argmax(y_pred, axis=-1) + y_t_arg = tf.argmax(y_true, axis=-1) + y_arg_mask = tf.boolean_mask(y_arg, mask) + y_t_arg_mask = tf.boolean_mask(y_t_arg, mask) + return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) + + +def lr_schedule(epoch, initial_learning_rate): + lr = initial_learning_rate + if epoch > 15: + lr /= 256 + elif epoch > 13: + lr /= 128 + elif epoch > 11: + lr /= 64 + elif epoch > 9: + lr /= 32. + elif epoch > 7: + lr /= 16. + elif epoch > 5: + lr /= 8. + elif epoch > 3: + lr /= 4. + elif epoch > 1: + lr /= 2. + print('Learning rate: ', lr) + return lr + + +def _save_model_info(root_directory, train_type, loss_func, accuracy, loss, class_weights, classes_to_augment, initial_learning_rate, train_mc, cmat, precision, recall): + directory_name = os.path.join("./models", "{:.3f}acc_class_weights_multiple_classes".format(accuracy)) + if os.path.isdir(directory_name): + directory_name = os.path.join("./models", "{:.5f}acc".format(accuracy)) + filename = os.path.join(directory_name, "run_info_{:.3f}acc.txt".format(accuracy)) + os.rename(root_directory, directory_name) + print(filename) + with open(filename, 'w') as f: + print("acc: {:.3f}".format(accuracy), file=f) + print("train_type: {}".format(train_type), file=f) + print("loss_func: {}".format(loss_func), file=f) + print("loss: {}".format(loss), file=f) + print("weights: {}".format(class_weights), file=f) + print("augment scheme: {}".format(classes_to_augment), file=f) + print("lr: {}".format(initial_learning_rate), file=f) + print('train w multiple classes per tile: {}'.format(train_mc), file=f) + print('confusion_matrix: {}'.format(cmat), file=f) + print('precision: {}'.format(precision), file=f) + print('recall: {}'.format(recall), file=f) + + +def _return_loss(inp): + if 'focal' in inp: + return weighted_focal_loss + return weighted_loss + + +def _list_to_dict(ls): + dct = {} + for i, e in enumerate(ls[0].split(' ')): + dct[i] = e + return dct + + +def _nclass(i): + if 'multiple' in i: + return 6 + return 5 + + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(fromfile_prefix_chars='@') + parser.add_argument('-f', '--file') + parser.add_argument("-lr", "--learning_rate", type=float) + parser.add_argument("-b", "--balance", type=int) + parser.add_argument("-w", "--weights", nargs=1) + parser.add_argument("-lf", "--loss_func") + parser.add_argument("-tt", '--train_type') + parser.add_argument("-nc", '--n_classes') + parser.add_argument("-td", '--train-data-dir', default='/home/thomas/ssd/training_data/train/') + parser.add_argument("-ted", '-test-data-dir', default='/home/thomas/ssd/training_data/test/') + args = parser.parse_args() + learning_rate_scheduler = partial(lr_schedule, initial_learning_rate=args.learning_rate) + # runtime params + n_classes = _nclass(args.train_type) + input_shape = (None, None, 51) + initial_learning_rate = args.learning_rate + class_weights = {0:85, 1:1.0, 2:1.546, 3:784.286, 4:972.84, 5:357.78} + class_weights = _list_to_dict(args.weights) + classes_to_augment = {0:False, 1:False, 2:False, 3:False, 4:False, 7:True} + loss_func = _return_loss(args.loss_func) + print(initial_learning_rate, class_weights, loss_func) + # + weight_shape = (None, None, n_classes) + filepath = './models/template_to_fill_in/model.h5' + tb_path = './models/template_to_fill_in/' + str(time.time()) + root_directory = './models/template_to_fill_in/' + if not os.path.isdir(tb_path): + os.makedirs(tb_path) + # Prepare callbacks for model saving and for learning rate adjustment. + checkpoint = ModelCheckpoint(filepath=filepath, + monitor='val_acc', + verbose=1, + save_best_only=True) + tensorboard = TensorBoard(log_dir=tb_path, profile_batch=0, update_freq=30, batch_size=3) + lr_schedule_func = partial(lr_schedule, initial_learning_rate=initial_learning_rate) + lr_scheduler = LearningRateScheduler(lr_schedule_func) + model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=4) + opt = tf.keras.optimizers.Adam() + model.compile(opt, loss=loss_func, metrics=[acc]) + batch_size = 4 + balance = True + train_mc = True + training_data_directory = args.train_type + generator = SatDataSequence('{}'.format(args.train_data_dir), hatch_size=batch_size, + class_weights=class_weights, single_class_per_tile=False, balance=balance, + n_classes=n_classes, classes_to_augment=classes_to_augment) + class_weights = {0:1, 1:1.0, 2:1.0, 3:1, 4:1, 5:1} + valid_generator = SatDataSequence('{}'.format(args.test_data_dir), + batch_size=batch_size, balance=False, n_classes=n_classes, single_class_per_tile=False, + class_weights=class_weights) + + model.fit_generator(generator, + epochs=20, + callbacks=[lr_scheduler, checkpoint, tensorboard], + use_multiprocessing=True, + validation_data=valid_generator, + workers=12, + max_queue_size=30, + verbose=1) + + if args.loss_func == 'focal': + model = load_model(filepath, custom_objects={'weighted_focal_loss':weighted_focal_loss, + 'tf':tf, '_epsilon':_epsilon, 'acc':acc}) + else: + model = load_model(filepath, custom_objects={'weighted_loss':weighted_loss, + 'tf':tf, '_epsilon':_epsilon, 'acc':acc}) + + accuracy = model.evaluate_generator(valid_generator) + loss = accuracy[0] + accuracy = accuracy[1] + cmat, prec, recall = _confusion_matrix(valid_generator, model) + _save_model_info(root_directory, args.train_type, args.loss_func, accuracy, loss, class_weights, + classes_to_augment, initial_learning_rate, train_mc, cmat, prec, recall) From ed7a824409106394d4d8b7c189e4d3c6bfc63261 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 28 Aug 2019 09:09:09 -0600 Subject: [PATCH 82/89] Commit before removal of weights code --- fully-conv-classification/data_generators.py | 46 ++++++++++++++++---- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index cf545a9..3902f5b 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -27,7 +27,7 @@ class SatDataSequence(Sequence): def __init__(self, data_directory, batch_size, class_weights=None, - balance=True, single_class_per_tile=True, n_classes=5, classes_to_augment=None): + balance=False, single_class_per_tile=True, n_classes=5, classes_to_augment=None): self.data_directory = data_directory self.class_weights = class_weights if self.class_weights is None: @@ -36,10 +36,17 @@ def __init__(self, data_directory, batch_size, class_weights=None, dct[i] = 1 self.class_weights = dct self.n_classes = n_classes + self.binary = False + if n_classes == 1: + self.binary = True self.single_class_per_tile = single_class_per_tile self.batch_size = batch_size - self._no_augment = classes_to_augment is None self.classes_to_augment = classes_to_augment + if self.classes_to_augment is None: + dct = {} + for i in range(n_classes): + dct[i] = False + self.classes_to_augment = dct self.balance = balance self._get_files() self.n_files = len(self.file_list) @@ -62,7 +69,6 @@ def _get_files(self): shuffle(self.file_list) - def _create_file_list(self): max_instances = max(self.lengths) self.file_list = [] @@ -85,13 +91,19 @@ def __len__(self): def on_epoch_end(self): - self._create_file_list() + if not self.balance: + shuffle(self.file_list) + else: + self._create_file_list() def __getitem__(self, idx): batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] data_tiles = [self._from_pickle(x) for x in batch] - processed = self._labels_and_features(data_tiles, self.classes_to_augment) + if self.binary: + processed = self._binary_labels_and_features(data_tiles, self.classes_to_augment) + else: + processed = self._labels_and_features(data_tiles, self.classes_to_augment) batch_x = processed[0] batch_y = processed[1] return batch_x, batch_y @@ -102,11 +114,30 @@ def _from_pickle(self, filename): data = pickle.load(f) return data + def _apply_weights(self, one_hot): for i in range(self.n_classes): one_hot[:, :, i] *= self.class_weights[i] + def _binary_labels_and_features(self, data_tiles, classes_to_augment): + features = [] + one_hots = [] + for tile in data_tiles: + data = tile['data'] + one_hot = tile['one_hot'].astype(np.int) + binary_one_hot = np.ones((one_hot.shape[0], one_hot.shape[1])).astype(np.int)*-1 # -1 will represent nodata in this case. + for i in range(one_hot.shape[2]): + if i == 0: + binary_one_hot[:, :][one_hot[:, :, i] == 1] = 1 + else: + binary_one_hot[:, :][one_hot[:, :, i] == 1] = 0 + + features.append(data) + one_hots.append(binary_one_hot) + return [np.asarray(features)], [np.asarray(one_hots)] + + def _labels_and_features(self, data_tiles, classes_to_augment): features = [] one_hots = [] @@ -116,9 +147,8 @@ def _labels_and_features(self, data_tiles, classes_to_augment): one_hot[0, 0, :] = 0 self._apply_weights(one_hot) class_code = tile['class_code'] - if not self._no_augment: - if classes_to_augment[tile['class_code']]: - data, one_hot, weights = _augment_data(data, one_hot, weights) + if classes_to_augment[tile['class_code']]: + data, one_hot, weights = _augment_data(data, one_hot, weights) features.append(data) one_hots.append(one_hot) return [np.asarray(features)], [np.asarray(one_hots)] From 200a0ad60573472485330921d56754e8371c270b Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 28 Aug 2019 09:10:27 -0600 Subject: [PATCH 83/89] Update README.md --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f734483..031b107 100644 --- a/README.md +++ b/README.md @@ -50,12 +50,15 @@ Before running this, check ```_one_hot_from_labels_mc()``` in ```extract_trainin train a model with train_model.py. # TODO: -Make training a model easier (i.e. don't require a separate weights matrix and stop computing softmax within the network) -Extract new data sets using new model +Make training a model easier [x] (i.e. don't require a separate weights matrix and stop computing softmax within the network) -Implement IoU (multiclass dice loss). +Implement binary dice loss [x] -add binary classification possibililty +Get rid of class_7_data necessity + +Implement multiclass dice loss + +add binary classification possibility [x] add raster training data extraction From 3744717f379473dbb08b8dbcc3c3f10c2d145b39 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Mon, 14 Oct 2019 11:32:46 -0600 Subject: [PATCH 84/89] Reorganization, fixed bug in extract_training_data --- fully-conv-classification/argmax_rasters.py | 46 -- fully-conv-classification/data_generators.py | 264 +++++------ fully-conv-classification/data_utils.py | 163 +++++-- fully-conv-classification/evaluate_image.py | 151 ++++--- .../extract_training_data.py | 416 +++++++++--------- .../precision_and_recall.py | 33 ++ fully-conv-classification/prepare_images.py | 62 ++- fully-conv-classification/runspec.py | 32 +- fully-conv-classification/shapefile_utils.py | 7 +- fully-conv-classification/split_shapefiles.py | 0 fully-conv-classification/train_model.py | 255 +++-------- .../train_model_random_files.py | 78 ++++ fully-conv-classification/train_utils.py | 211 +++++++++ 13 files changed, 979 insertions(+), 739 deletions(-) delete mode 100755 fully-conv-classification/argmax_rasters.py mode change 100644 => 100755 fully-conv-classification/evaluate_image.py create mode 100644 fully-conv-classification/precision_and_recall.py mode change 100644 => 100755 fully-conv-classification/split_shapefiles.py create mode 100644 fully-conv-classification/train_model_random_files.py create mode 100644 fully-conv-classification/train_utils.py diff --git a/fully-conv-classification/argmax_rasters.py b/fully-conv-classification/argmax_rasters.py deleted file mode 100755 index be33f1d..0000000 --- a/fully-conv-classification/argmax_rasters.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy as np -from rasterio import open as rasopen -from rasterio import int32, float32 -from glob import glob -from os.path import basename, join, dirname, splitext -import argparse - -def compute_argmax(f, outfile): - - with rasopen(f, 'r') as src: - arr = src.read() - meta = src.meta.copy() - - irr = arr[0] - irr[irr < 0.3] = np.nan - irr = np.expand_dims(irr, 0) - arg = np.argmax(arr, axis=0) - arg = np.expand_dims(arg, axis=0) - arg = arg.astype(np.int32) - meta.update(count=1, dtype=float32) - with rasopen(outfile, 'w', **meta) as dst: - dst.write(irr) - -if __name__ == '__main__': - - parser = argparse.ArgumentParser() - parser.add_argument("-f", - "--file", - help='geoTIFF to perform argmax on', - required=True) - parser.add_argument('-o', - '--outfile', - help='optional filename for outfile') - - args = parser.parse_args() - if not args.outfile: - outfile = basename(args.file) - outdir = dirname(args.file) - outfile = splitext(outfile)[0] + '_argmax.tif' - outfile = join(outdir, outfile) - compute_argmax(args.file, outfile) - - - - - diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 3902f5b..68aaefd 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -19,70 +19,37 @@ from collections import defaultdict from sys import getsizeof -from runspec import mask_rasters from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features -class SatDataSequence(Sequence): +class RandomMajorityUndersamplingSequence(Sequence): - def __init__(self, data_directory, batch_size, class_weights=None, - balance=False, single_class_per_tile=True, n_classes=5, classes_to_augment=None): + def __init__(self, batch_size, data_directory, training=True): + + self.training = training self.data_directory = data_directory - self.class_weights = class_weights - if self.class_weights is None: - dct = {} - for i in range(n_classes): - dct[i] = 1 - self.class_weights = dct - self.n_classes = n_classes - self.binary = False - if n_classes == 1: - self.binary = True - self.single_class_per_tile = single_class_per_tile self.batch_size = batch_size - self.classes_to_augment = classes_to_augment - if self.classes_to_augment is None: - dct = {} - for i in range(n_classes): - dct[i] = False - self.classes_to_augment = dct - self.balance = balance self._get_files() self.n_files = len(self.file_list) + shuffle(self.file_list) self.idx = 0 def _get_files(self): - # Now, get n lists where n is the number of classes (excluding border class). - # Then, sample from the minority lists until we have - # the same number of data tiles from each class, then concatenate - # all the lists and shuffle. on epoch end, do this process again. - self.file_dict = {} - i = 0 - for (dirpath, dirnames, filenames) in os.walk(self.data_directory): - if dirpath != self.data_directory: - self.file_dict[i] = [os.path.join(dirpath, x) for x in filenames] - i += 1 - self.lengths = [len(self.file_dict[k]) for k in self.file_dict] - self._create_file_list() - shuffle(self.file_list) - - def _create_file_list(self): - max_instances = max(self.lengths) + self.class_directories = os.listdir(self.data_directory) + self.n_classes = len(self.class_directories) + self.files = [glob(os.path.join(self.data_directory, d, "*.pkl")) for d in + self.class_directories] + self.n_minority = min([len(f) for f in self.files]) self.file_list = [] - for class_dir in self.file_dict: - files = self.file_dict[class_dir] - self.file_list.extend(files) - if self.balance: - if len(files) < max_instances: - s = len(files) - if len(files) < (max_instances - len(files)): - files *= (max_instances // len(files)) - shuffle(files) - additional_files = sample(files, max_instances-s) - self.file_list.extend(additional_files) + if self.training: + self.file_list.extend(sample(self.files[0], self.n_minority)) + self.file_list.extend(sample(self.files[1], self.n_minority)) + else: + self.file_list.extend(self.files[0]) + self.file_list.extend(self.files[1]) shuffle(self.file_list) @@ -91,21 +58,24 @@ def __len__(self): def on_epoch_end(self): - if not self.balance: - shuffle(self.file_list) - else: - self._create_file_list() + self.file_list = [] + self.file_list.extend(sample(self.files[0], self.n_minority)) + self.file_list.extend(sample(self.files[1], self.n_minority)) + shuffle(self.file_list) + self.n_files = len(self.file_list) def __getitem__(self, idx): batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] data_tiles = [self._from_pickle(x) for x in batch] - if self.binary: - processed = self._binary_labels_and_features(data_tiles, self.classes_to_augment) + self.batch=batch + if self.n_classes == 2: + processed = self._binary_labels_and_features(data_tiles) else: - processed = self._labels_and_features(data_tiles, self.classes_to_augment) - batch_x = processed[0] + processed = self._labels_and_features(data_tiles) + batch_y = processed[1] + batch_x = processed[0] return batch_x, batch_y @@ -115,123 +85,155 @@ def _from_pickle(self, filename): return data - def _apply_weights(self, one_hot): - for i in range(self.n_classes): - one_hot[:, :, i] *= self.class_weights[i] - - - def _binary_labels_and_features(self, data_tiles, classes_to_augment): + def _labels_and_features(self, data_tiles): features = [] one_hots = [] for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) - binary_one_hot = np.ones((one_hot.shape[0], one_hot.shape[1])).astype(np.int)*-1 # -1 will represent nodata in this case. - for i in range(one_hot.shape[2]): - if i == 0: - binary_one_hot[:, :][one_hot[:, :, i] == 1] = 1 - else: - binary_one_hot[:, :][one_hot[:, :, i] == 1] = 0 - + one_hot[0, 0, :] = 0 + class_code = tile['class_code'] + data, one_hot = _augment_data(data, one_hot) features.append(data) - one_hots.append(binary_one_hot) + one_hots.append(one_hot) return [np.asarray(features)], [np.asarray(one_hots)] - def _labels_and_features(self, data_tiles, classes_to_augment): + def _binary_labels_and_features(self, data_tiles): features = [] one_hots = [] - for tile in data_tiles: + bad_shape = False + for cnt, tile in enumerate(data_tiles): data = tile['data'] one_hot = tile['one_hot'].astype(np.int) - one_hot[0, 0, :] = 0 - self._apply_weights(one_hot) - class_code = tile['class_code'] - if classes_to_augment[tile['class_code']]: - data, one_hot, weights = _augment_data(data, one_hot, weights) + binary_one_hot = np.ones((one_hot.shape[0], one_hot.shape[1])).astype(np.int)*-1 + for i in range(one_hot.shape[2]): + if i == 1: + binary_one_hot[:, :][one_hot[:, :, i] == 1] = 1 + else: + binary_one_hot[:, :][one_hot[:, :, i] == 1] = 0 + neg_examples = np.where(binary_one_hot == 0) + n_neg = len(neg_examples[0]) + n_pos = len(np.where(binary_one_hot == 1)[0]) + if n_neg != 0: + xs = np.random.choice(neg_examples[0], n_neg - n_pos, replace=False) + ys = np.random.choice(neg_examples[1], n_neg - n_pos, replace=False) + binary_one_hot[xs, ys] = -1 + if self.training: + data, binary_one_hot = _augment_data(data, binary_one_hot, binary=True) + binary_one_hot = np.expand_dims(binary_one_hot, 2) features.append(data) - one_hots.append(one_hot) + one_hots.append(binary_one_hot) return [np.asarray(features)], [np.asarray(one_hots)] - def _make_weights_labels_and_features(self, data_tiles, classes_to_augment): - class_weights = self.class_weights +class BinaryDataSequence(Sequence): + + def __init__(self, batch_size, file_list, training=True, balance_pixels=False): + # this requires a file list of training data. + self.training = training + self.balance_pixels = balance_pixels + self.batch_size = batch_size + self.file_list = file_list + self.n_files = len(self.file_list) + shuffle(self.file_list) + self.idx = 0 + + + def __len__(self): + return int(np.ceil(self.n_files / self.batch_size)) + + + def on_epoch_end(self): + shuffle(self.file_list) + + + def __getitem__(self, idx): + batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] + data_tiles = [self._from_pickle(x) for x in batch] + processed = self._binary_labels_and_features(data_tiles) + batch_y = processed[1] + batch_x = processed[0] + return batch_x, batch_y + + + def _from_pickle(self, filename): + with open(filename, 'rb') as f: + data = pickle.load(f) + return data + + + def _apply_weights(self, one_hot): + for i in range(self.n_classes): + one_hot[:, :, i] *= self.class_weights[i] + + + def _binary_labels_and_features(self, data_tiles): features = [] one_hots = [] - weight_list = [] for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) - one_hot[0, 0, :] = 0 - weights = tile['weights'].astype(np.int) - class_code = tile['class_code'] - if not self.single_class_per_tile: - weights = np.zeros_like(one_hot) - for cc in range(self.n_classes): - for dd in range(self.n_classes): - weights[:, :, cc][one_hot[:, :, dd] == 1] = class_weights[dd] - - assert np.all(weights[:, :, 0] == weights[:, :, 1]) - - if self.single_class_per_tile: - for i in range(self.n_classes): - weights[:, :, i][one_hot[:, :, class_code] == 1] = class_weights[class_code] - - if class_code == 0 and self.single_class_per_tile: - for i in range(self.n_classes): - weights[:, :, i][one_hot[:, :, self.n_classes-1] == 1] = class_weights[self.n_classes-1] - if not self._no_augment: - if classes_to_augment[tile['class_code']]: - data, one_hot, weights = _augment_data(data, one_hot, weights) + binary_one_hot = np.ones((one_hot.shape[0], one_hot.shape[1])).astype(np.int)*-1 + for i in range(one_hot.shape[2]): + if i == 1: + binary_one_hot[:, :][one_hot[:, :, i] == 1] = 1 + else: + binary_one_hot[:, :][one_hot[:, :, i] == 1] = 0 + if self.training: + data, binary_one_hot = _augment_data(data, binary_one_hot, binary=True) + if self.training and self.balance_pixels: + neg_examples = np.where(binary_one_hot == 0) + n_neg = len(neg_examples[0]) + n_pos = len(np.where(binary_one_hot == 1)[0]) + if n_neg > n_pos: + idx = np.random.choice(np.arange(n_neg), n_neg - n_pos, replace=False) + xs = neg_examples[0][idx] + ys = neg_examples[1][idx] + binary_one_hot[xs, ys] = -1 + binary_one_hot = np.expand_dims(binary_one_hot, 2) features.append(data) - one_hots.append(one_hot) - weight_list.append(weights) - return [np.asarray(features), np.asarray(weight_list)], [np.asarray(one_hots)] + one_hots.append(binary_one_hot) + return [np.asarray(features)], [np.asarray(one_hots)] -def _flip_lr(feature_tile, one_hot, weights): +def _flip_lr(feature_tile, one_hot, binary=False): for i in range(feature_tile.shape[2]): feature_tile[:, :, i] = np.fliplr(feature_tile[:, :, i]) + if binary: + one_hot = np.fliplr(one_hot) + return feature_tile, one_hot for i in range(one_hot.shape[2]): one_hot[:, :, i] = np.fliplr(one_hot[:, :, i]) - weights[:, :, i] = np.fliplr(weights[:, :, i]) - return feature_tile, one_hot, weights + return feature_tile, one_hot -def _flip_ud(feature_tile, one_hot, weights): +def _flip_ud(feature_tile, one_hot, binary=False): + for i in range(feature_tile.shape[2]): feature_tile[:, :, i] = np.flipud(feature_tile[:, :, i]) + if binary: + one_hot = np.flipud(one_hot) + return feature_tile, one_hot for i in range(one_hot.shape[2]): one_hot[:, :, i] = np.flipud(one_hot[:, :, i]) - weights[:, :, i] = np.flipud(weights[:, :, i]) - return feature_tile, one_hot, weights - - -def _rotate(feature_tile, one_hot, weights): - # Rotate data. - rot = np.random.randint(-25, 25) - for i in range(feature_tile.shape[2]): - feature_tile[:, :, i] = transform.rotate(feature_tile[:, :, i], rot, cval=0) - for i in range(one_hot.shape[2]): - one_hot[:, :, i] = transform.rotate(one_hot[:, :, i], rot, cval=0) - weights[:, :, i] = transform.rotate(weights[:, :, i], rot, cval=0) - return feature_tile, one_hot, weights + return feature_tile, one_hot -def _flip_lr_ud(feature_tile, one_hot, weights): - feature_tile, one_hot, weights = _flip_lr(feature_tile, one_hot, weights) - feature_tile, one_hot, weights = _flip_ud(feature_tile, one_hot, weights) - return feature_tile, one_hot, weights +def _flip_lr_ud(feature_tile, one_hot, binary=False): + feature_tile, one_hot = _flip_lr(feature_tile, one_hot, binary) + feature_tile, one_hot = _flip_ud(feature_tile, one_hot, binary) + return feature_tile, one_hot -def _do_nothing(feature_tile, one_hot, weights): - return feature_tile, one_hot, weights +def _do_nothing(feature_tile, one_hot, binary): + return feature_tile, one_hot -def _augment_data(feature_tile, one_hot, weights): +def _augment_data(feature_tile, one_hot, binary=False): ''' Applies rotation | lr | ud | lr_ud | flipping, or doesn't. ''' possible_augments = [_flip_ud, _flip_lr, _flip_lr_ud, _do_nothing] - return choice(possible_augments)(feature_tile, one_hot, weights) + return choice(possible_augments)(feature_tile, one_hot, binary) if __name__ == '__main__': diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 07f0df7..7f20231 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -6,7 +6,7 @@ from fiona import open as fopen from glob import glob -from lxml import html +from lxml import html from requests import get from copy import deepcopy from numpy import zeros, asarray, array, reshape, nan, sqrt, std, uint16 @@ -43,27 +43,23 @@ def download_images_over_shapefile(shapefile, image_directory, year): return ims -def download_from_pr(p, r, year, image_directory, landsat_bands, climate_bands): +def download_from_pr(p, r, year, image_directory, satellite=8): '''Downloads p/r corresponding to the location of the shapefile.''' # TODO: add rasterioIOError error checking # and resolution here. suff = str(p) + '_' + str(r) + "_" + str(year) landsat_dir = os.path.join(image_directory, suff) - satellite = 8 - if year < 2013: - satellite = 7 if not os.path.isdir(landsat_dir): os.mkdir(landsat_dir) - ims = _download_images(landsat_dir, p, r, year, satellite, landsat_bands, climate_bands) + ims = _download_images(landsat_dir, p, r, year, satellite) return ims -def _download_images(project_directory, path, row, year, satellite, landsat_bands, climate_bands, +def _download_images(project_directory, path, row, year, satellite, n_landsat=3, max_cloud_pct=40): - image_stack = ImageStack(satellite=satellite, path=path, landsat_bands=landsat_bands, - climate_bands=climate_bands, row=row, root=project_directory, + image_stack = ImageStack(satellite=satellite, path=path, row=row, root=project_directory, max_cloud_pct=max_cloud_pct, n_landsat=n_landsat, year=year) image_stack.build_evaluating() # the difference b/t build_training() and build_eval() is @@ -125,7 +121,7 @@ def _climate_band_map(directory, band_map, date): return band_map -def paths_mappings_single_scene(landsat_directory): +def paths_mapping_single_scene(landsat_directory): directories = [os.path.join(landsat_directory, f) for f in os.listdir(landsat_directory) if os.path.isdir(os.path.join(landsat_directory, f))] climate_directory = os.path.join(landsat_directory, 'climate_rasters') @@ -170,6 +166,71 @@ def paths_map_multiple_scenes(image_directory, satellite=8): return band_map +def mean_of_scenes(paths_map, target_geo, target_shape): + + rasters = _load_rasters(paths_map, target_geo, target_shape) + n_scenes = len(paths_map['B1.TIF']) + num_rasters = len(rasters) + j = 0 + image_stack = np.zeros((num_rasters, target_shape[1], target_shape[2])) + for band in sorted(paths_map.keys()): + feature_rasters = paths_map[band] + empty = np.zeros(target_shape) + for feature_raster in feature_raster: + empty += rasters[feature_raster] + image_stack[j] = empty/n_scenes + return image_stack + + +def median_of_scenes(paths_map, target_geo, target_shape): + + rasters = _load_rasters(paths_map, target_geo, target_shape) + n_scenes = len(paths_map['B1.TIF']) + num_rasters = len(rasters) + j = 0 + image_stack = np.zeros((num_rasters, target_shape[1], target_shape[2])) + for band in sorted(paths_map.keys()): + feature_rasters = paths_map[band] + empty = np.zeros(target_shape) + for feature_raster in feature_raster: + empty += rasters[feature_raster] + image_stack[j] = empty/n_scenes + return image_stack + + +def map_bands_to_indices(target_bands, satellite=8): + + band_map = defaultdict(list) + for band in landsat_rasters()[satellite]: + band_map[band] = [] + for band in static_rasters(): + band_map[band] = [] + for band in climate_rasters(): + band_map[band] = [] + + image_directory = '/home/thomas/share/image_data/train/37_28_2013/' + extensions = (".tif", ".TIF") + for dirpath, dirnames, filenames in os.walk(image_directory): + for f in filenames: + if any(ext in f for ext in extensions): + for band in band_map: + if f.endswith(band): + band_map[band].append(os.path.join(dirpath, f)) + + for band in band_map: + band_map[band] = sorted(band_map[band]) # ensures ordering within bands - sort by time. + + indices = [] + i = 0 + for feat in sorted(band_map.keys()): # ensures the stack is in the same order each time. + feature_rasters = band_map[feat] + for feature_raster in feature_rasters: + for band in target_bands: + if feature_raster.endswith(band): + indices.append(i) + i += 1 + return indices + def _maybe_warp(feature_raster, target_geo, target_shape): arr, _ = load_raster(feature_raster) @@ -178,15 +239,29 @@ def _maybe_warp(feature_raster, target_geo, target_shape): return arr, feature_raster -def stack_rasters_multiprocess(paths_map, target_geo, target_shape): - first = True - stack = None +def _load_rasters(paths_map, target_geo, target_shape): + single_band = False num_rasters = 0 for key in paths_map: - num_rasters += len(paths_map[key]) + if isinstance(paths_map[key], str): + single_band = True + num_rasters += 1 + else: + num_rasters += len(paths_map[key]) j = 0 - feature_rasters = [feature_raster for feat in paths_map.keys() for feature_raster in - paths_map[feat]] + + if not single_band: + feature_rasters = [feature_raster for feat in paths_map.keys() for feature_raster in + paths_map[feat]] + else: + feature_rasters = [paths_map[feat] for feat in paths_map.keys()] + tg = [target_geo]*len(feature_rasters) + ts = [target_shape]*len(feature_rasters) + if not single_band: + feature_rasters = [feature_raster for feat in paths_map.keys() for feature_raster in + paths_map[feat]] + else: + feature_rasters = [paths_map[feat] for feat in paths_map.keys()] tg = [target_geo]*len(feature_rasters) ts = [target_shape]*len(feature_rasters) with Pool() as pool: @@ -194,12 +269,35 @@ def stack_rasters_multiprocess(paths_map, target_geo, target_shape): # Speedup of ~40s. out = pool.starmap(_maybe_warp, zip(feature_rasters, tg, ts)) rasters = {feature_raster: array for (array, feature_raster) in out} + return rasters, num_rasters + + +def stack_rasters_multiprocess(paths_map, target_geo, target_shape): + first = True + stack = None + single_band = False + j = 0 + rasters, num_rasters = _load_rasters(paths_map, target_geo, target_shape) for feat in sorted(paths_map.keys()): # ensures the stack is in the same order each time. # Ordering within bands is assured by sorting the list that # each band corresponding to, as that's sorted by date. feature_rasters = paths_map[feat] # maps bands to their location in filesystem. - for feature_raster in feature_rasters: - arr = rasters[feature_raster] + if not single_band: + for feature_raster in feature_rasters: + arr = rasters[feature_raster] + if first: + stack = zeros((num_rasters, target_shape[1], target_shape[2]), uint16) + stack[j, :, :] = arr + j += 1 + first = False + else: + stack[j, :, :] = arr + j += 1 + else: + + arr, _ = _select_correct_band(rasters, feat) + + # somehow select if first: stack = zeros((num_rasters, target_shape[1], target_shape[2]), uint16) stack[j, :, :] = arr @@ -211,12 +309,17 @@ def stack_rasters_multiprocess(paths_map, target_geo, target_shape): return stack +def _select_correct_band(rasters, target_feat): + for path in rasters: + if path.endswith(target_feat): + return rasters[path], path + + def stack_rasters(paths_map, target_geo, target_shape): first = True stack = None num_rasters = 0 - for key in paths_map: - num_rasters += len(paths_map[key]) + for key in paths_map: num_rasters += len(paths_map[key]) j = 0 for feat in sorted(paths_map.keys()): # ensures the stack is in the same order each time. # Ordering within bands is assured by sorting the list that @@ -314,16 +417,10 @@ def load_raster(raster_name): if __name__ == "__main__": from runspec import landsat_rasters, climate_rasters - download_from_pr(37, 28, 2013, '/home/thomas/landsat_test/', landsat_rasters(), - climate_rasters()) - - - - - - - - - - - + for path in range(34, 44): + for row in range(26, 30): + for sat in [7, 8]: + for year in [2012, 2013, 2014, 2015]: + if year < 2012 and sat == 8: + continue + download_from_pr(path, row, year, '/home/thomas/share/landsat_test/', 7) diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py old mode 100644 new mode 100755 index 59a7641..2ceee22 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -1,23 +1,51 @@ import os -# os.environ['CUDA_VISIBLE_DEVICES'] = "-1" +#E os.environ['CUDA_VISIBLE_DEVICES'] = "-1" import numpy as np import keras.backend as K import tensorflow as tf import pdb +import argparse + from sys import stdout from tensorflow.keras.models import load_model from glob import glob from rasterio.errors import RasterioIOError from matplotlib.pyplot import imshow, show, subplots from multiprocessing import Pool +from scipy.special import expit +from scipy.stats import mode -from data_utils import save_raster, stack_rasters, paths_map_multiple_scenes, load_raster, clip_raster -from fully_conv import weighted_loss, weighted_focal_loss +from data_utils import (save_raster, stack_rasters, stack_rasters_multiprocess, paths_map_multiple_scenes, load_raster, clip_raster, paths_mapping_single_scene) +from losses import multiclass_acc, masked_binary_xent, dice_loss, binary_acc, binary_focal_loss from extract_training_data import concatenate_fmasks - _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) +masked_binary_xent = masked_binary_xent(pos_weight=1.0) +custom_objects = {'masked_binary_xent':masked_binary_xent, 'binary_acc':binary_acc} + + +def _evaluate_image_return_logits(model, raster, n_classes, n_overlaps=4): + chunk_size = 608 + diff = 608 + stride = 608 + overlap_step = 10 + raster = np.swapaxes(raster, 0, 2) + raster = np.expand_dims(raster, 0) + out = np.zeros((raster.shape[1], raster.shape[2], n_classes)) + for k in range(0, n_overlaps*overlap_step, overlap_step): + for i in range(k, raster.shape[1]-diff, stride): + for j in range(k, raster.shape[2]-diff, stride): + sub_raster = raster[:, i:i+chunk_size, j:j+chunk_size, :] + preds = model.predict([sub_raster]) + preds = expit(preds[0]) + out[i:i+chunk_size, j:j+chunk_size, :] += preds + stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k // overlap_step + 1, n_overlaps, i / raster.shape[1])) + out = np.swapaxes(out, 0, 2) + out = out.astype(np.float32) + return out + + def fmask_evaluated_image(evaluated_image, path, row, year, landsat_directory): image, meta = load_raster(evaluated_image) suffix = str(path) + '_' + str(row) + '_' + str(year) @@ -32,67 +60,70 @@ def fmask_evaluated_image(evaluated_image, path, row, year, landsat_directory): return image, meta -def evaluate_image_many_shot(path, row, year, image_directory, model_path, num_classes=4, n_overlaps=4, outfile=None, ii=None): +def evaluate_image_many_shot(image_directory, model_paths, n_classes=4, + n_overlaps=4, outfile=None, custom_objects=None): ''' To recover from same padding, slide many different patches over the image. ''' - suffix = '{}_{}_{}'.format(path, row, year) - image_path = os.path.join(image_directory, suffix) - model = load_model(model_path, custom_objects={'weighted_loss':weighted_loss, 'tf':tf, - '_epsilon':_epsilon}) - if not os.path.isdir(image_path): - print('Images not downloaded for {}'.format(image_path)) + print(outfile) + if not isinstance(model_paths, list): + model_paths = [model_paths] + if os.path.isfile(outfile): + print("image {} already exists".format(outfile)) return - paths_mapping = paths_map_multiple_scenes(image_path) - try: - template, meta = load_raster(paths_mapping['B1.TIF'][0]) - image_stack = stack_rasters(paths_mapping, meta, template.shape) - except Exception as e: - print(e) + if not os.path.isdir(image_directory): + print('Images not downloaded for {}'.format(image_directory)) return - class_mask = np.ones((1, image_stack.shape[2], image_stack.shape[1], num_classes)) # Just a placeholder - out = np.zeros((image_stack.shape[2], image_stack.shape[1], num_classes)) - chunk_size = 608 - diff = 608 - stride = 608 - overlap_step = 10 - image_stack = np.swapaxes(image_stack, 0, 2) - image_stack = np.expand_dims(image_stack, 0) - print(image_stack.shape) - for k in range(0, n_overlaps*overlap_step, overlap_step): - for i in range(k, image_stack.shape[1]-diff, stride): - for j in range(k, image_stack.shape[2]-diff, stride): - sub_image_stack = image_stack[:, i:i+chunk_size, j:j+chunk_size, :] - sub_mask = class_mask[:, i:i+chunk_size, j:j+chunk_size, :] - preds = model.predict([sub_image_stack, sub_mask]) - preds = np.exp(preds) - soft = preds / np.sum(preds, axis=-1, keepdims=True) - out[i:i+chunk_size, j:j+chunk_size, :] += soft[0] - stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k // overlap_step + 1, n_overlaps, i / image_stack.shape[1])) - out = np.swapaxes(out, 0, 2) - out = out.astype(np.float32) - temp_mask = np.zeros((1, out.shape[1], out.shape[2])) - masked_image = concatenate_fmasks(image_path, temp_mask, meta, nodata=1) - for i in range(out.shape[0]): - out[i, :, :][masked_image.mask[0]] = np.nan - meta.update(dtype=np.float32) - out /= n_overlaps + paths_mapping = paths_map_multiple_scenes(image_directory) + template, meta = load_raster(paths_mapping['B1.TIF'][0]) + image_stack = stack_rasters_multiprocess(paths_mapping, meta, template.shape) + out_arr = np.zeros((1, image_stack.shape[1], image_stack.shape[2])) + for i, model_path in enumerate(model_paths): + print('loading {}'.format(model_path)) + model = load_model(model_path, custom_objects=custom_objects) + out_arr += _evaluate_image_return_logits(model, image_stack, n_classes=n_classes, + n_overlaps=n_overlaps) + del model + + print(out_arr.shape) + temp_mask = np.zeros((1, out_arr.shape[1], out_arr.shape[2])) + fmasked_image = concatenate_fmasks(image_directory, temp_mask, meta, nodata=1) + # for i in range(out_arr.shape[0]): + # out_arr[i, :, :][fmasked_image.mask[0]] = np.nan + meta.update(dtype=np.float64) + out_arr /= n_overlaps if outfile: - save_raster(out, outfile, meta, count=num_classes) - return out + save_raster(out_arr, outfile, meta, count=n_classes) + return out_arr if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-m', '--model', type=str, required=True) + parser.add_argument('-i', '--image-dir', type=str, required=True) + parser.add_argument('-o', '--out-dir', type=str) + parser.add_argument('-n', '--n-classes', type=int, default=5) + parser.add_argument('-b', '--binary', action='store_true') + args = parser.parse_args() + if args.out_dir is None: + out_dir = os.path.dirname(os.path.splitext(args.model)[0]) + if not os.path.isdir(out_dir): + os.mkdir(out_dir) + else: + out_dir = args.out_dir + + os.environ['CUDA_VISIBLE_DEVICES'] = '-1' + custom_objects = {'mb':masked_binary_xent, 'binary_acc':binary_acc} - paths = [37, 39, 41] - rows = [28, 27, 27] - years = [2013, 2013, 2013] - image_directory = "/home/thomas/share/image_data/train/" - model_dirs = [d for d in os.listdir('./models/') if 'template_to_fill_in' in d] - model_paths = ['/home/thomas/IrrMapper/fully-conv-classification/models/{}/model.h5'.format(model_dir) for model_dir in model_dirs] - outfile_path = '/home/thomas/IrrMapper/fully-conv-classification/models/{}/' - # outfile_path = outfile_path + "evaluated_{}_{}_{}.tif" - outfile_paths = [outfile_path.format(model_dir) + "evaluated_{}_{}_{}.tif" for model_dir in model_dirs] - for model_path, outfile_path in zip(model_paths, outfile_paths): - print(model_path, outfile_path) - for path, row, year in zip(paths, rows, years): - evaluate_image_many_shot(path, row, year, image_directory, model_path, num_classes=6, - n_overlaps=1, outfile=outfile_path.format(path, row, year)) + # model_paths = glob('/home/thomas/IrrMapper/fully-conv-classification/ensemble_models/test3/*.h5') + # model_paths = sorted(model_paths) + # model_paths = model_paths[len(model_paths)-1] + model_paths = args.model + image_directory = args.image_dir + outfile = os.path.join(os.path.basename(os.path.normpath(image_directory)) + + '_random_majority_sample.tif') + outfile = os.path.join(out_dir, outfile) + evaluate_image_many_shot(image_directory, + model_paths=model_paths, + n_classes=args.n_classes, + n_overlaps=1, + outfile=outfile, + custom_objects=custom_objects) diff --git a/fully-conv-classification/extract_training_data.py b/fully-conv-classification/extract_training_data.py index 0a4ca9b..5c44ad8 100644 --- a/fully-conv-classification/extract_training_data.py +++ b/fully-conv-classification/extract_training_data.py @@ -15,12 +15,11 @@ from rasterio.errors import RasterioIOError from skimage import transform from sat_image.warped_vrt import warp_single_image -from tensorflow.keras.utils import Sequence -from multiprocessing import Pool +from multiprocessing import Pool from collections import defaultdict from runspec import landsat_rasters, climate_rasters, mask_rasters, assign_shapefile_class_code, assign_shapefile_year -from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr +from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr, paths_mapping_single_scene from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features @@ -51,7 +50,7 @@ def to_pickle(self, training_directory): with open(outfile, 'wb') as f: pickle.dump(self.dict, f, protocol=pickle.HIGHEST_PROTOCOL) else: - print("What? Contact administrator.") + raise ValueError() def _pickle_datatile(datatile, training_directory): @@ -67,19 +66,21 @@ def _pickle_datatile(datatile, training_directory): pass -def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): +def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0, target_directory=None): ''' ``Fmasks'' are masks of clouds and water. We don't want clouds/water in the training set, so this function gets all the fmasks for a landsat scene (contained in image_directory), and merges them into one raster. They may not be the same size, so warp_vrt is used to make them align. ''' + class_mask = class_mask.copy() paths = [] for dirpath, dirnames, filenames in os.walk(image_directory): for f in filenames: for suffix in mask_rasters(): if f.endswith(suffix): - paths.append(os.path.join(dirpath, f)) + pth = os.path.join(dirpath, f) + paths.append(pth) for fmask_file in paths: fmask, _ = load_raster(fmask_file) # clouds, water present where fmask == 1. @@ -92,86 +93,84 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0): return class_mask -def extract_training_data_multiple_classes_per_instance(split_shapefile_directory, image_directory, - training_data_directory, assign_shapefile_year, assign_shapefile_class_code, n_classes=5): - - split_shapefiles = [f for f in glob(os.path.join(split_shapefile_directory, "*.shp"))] - - done = set() - - total_time = 0 - - for counter, shapefile in enumerate(split_shapefiles): - begin_time = time.time() - if shapefile in done: - continue - _, path, row = os.path.splitext(shapefile)[0][-7:].split('_') - year = assign_shapefile_year(shapefile) - path_row_year = path + '_' + row + '_' + str(year) - print("Extracting data for", path_row_year) - shapefiles_over_same_path_row = all_matching_shapefiles(shapefile, - split_shapefile_directory, assign_shapefile_year) - done.update(shapefiles_over_same_path_row) - image_path = os.path.join(image_directory, path_row_year) - if not os.path.isdir(image_path): - download_from_pr(path, row, year, image_directory, landsat_rasters, climate_rasters) - image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) - try: - mask_file = image_path_map['B1.TIF'][0] - except IndexError: - os.rmdir(os.path.join(image_directory, path_row_year)) - download_from_pr(path, row, year, image_directory) - image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) - mask_file = image_path_map['B1.TIF'][0] - - mask, mask_meta = load_raster(mask_file) - mask = np.zeros_like(mask).astype(np.int) - fmask = concatenate_fmasks(os.path.join(image_directory, path_row_year), mask, - mask_meta) - if fmask.mask.all(): - print("All pixels covered by cloud for {}".format(path_row_year)) - continue - - first = True - class_labels = None - for f in shapefiles_over_same_path_row: - class_code = assign_shapefile_class_code(f) - print(f, class_code) - out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) - if first: - class_labels = out - class_labels[~class_labels.mask] = class_code - first = False - else: - class_labels[~out.mask] = class_code - try: - image_stack = stack_rasters_multiprocess(image_path_map, target_geo=mask_meta, target_shape=mask.shape) - except RasterioIOError as e: - print("Redownload images for", path_row_year) - # TODO: remove corrupted file and redownload images. - continue - class_labels[fmask.mask] = ma.masked # well, I didn't fmask the data. - image_stack = np.swapaxes(image_stack, 0, 2) - class_labels = np.swapaxes(class_labels, 0, 2) - class_labels = np.squeeze(class_labels) - _save_training_data_multiple_classes(image_stack, class_labels, - training_data_directory, n_classes) - end_time = time.time() - diff = end_time - begin_time - total_time += diff - print('single iteration time:', diff, 'avg.', total_time / (counter + 1)) - - -def _save_training_data_multiple_classes(image_stack, class_labels, training_data_directory, n_classes): - tile_size = 608 +def extract_training_data_over_path_row(shapefiles, path, row, year, image_directory, + training_data_directory, n_classes, assign_shapefile_class_code, path_map_func=None, + preprocessing_func=None, tile_size=608): + + if path_map_func is None: + path_map_func = paths_map_multiple_scenes + + if not isinstance(shapefiles, list): + shapefiles = [shapefiles] + + path_row_year = str(path) + '_' + str(row) + '_' + str(year) + image_path = os.path.join(image_directory, path_row_year) + if not os.path.isdir(image_path): + download_from_pr(path, row, year, image_directory) + image_path_maps = path_map_func(image_path) + mask_file = _random_tif_from_directory(image_path) + mask, mask_meta = load_raster(mask_file) + mask = np.zeros_like(mask).astype(np.int) + first = True + class_labels = None + for f in shapefiles: + class_code = assign_shapefile_class_code(f) + print(f, class_code) + out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) + if first: + class_labels = out + class_labels[~class_labels.mask] = class_code + first = False + else: + class_labels[~out.mask] = class_code + try: + image_stack = stack_rasters_multiprocess(image_path_maps, target_geo=mask_meta, target_shape=mask.shape) + except RasterioIOError as e: + print("Redownload images for", path_row_year) + print(e) + return + if preprocessing_func is not None: + image_stack = preprocessing_func(image_path_map, image_stack) + + class_labels = concatenate_fmasks(image_path, class_labels, mask_meta) + image_stack = np.swapaxes(image_stack, 0, 2) + class_labels = np.swapaxes(class_labels, 0, 2) + class_labels = np.squeeze(class_labels) + tiles_y, tiles_x = _target_indices_from_class_labels(class_labels, tile_size) + _save_training_data_from_indices(image_stack, class_labels, training_data_directory, + n_classes, tiles_x, tiles_y, tile_size) + + +def _target_indices_from_class_labels(class_labels, tile_size): + where = np.nonzero(~class_labels.mask) + max_y = np.max(where[0]) + min_y = np.min(where[0]) + max_x = np.max(where[1]) + min_x = np.min(where[1]) + max_y += (tile_size - ((max_y - min_y) % tile_size)) + max_x += (tile_size - ((max_x - min_x) % tile_size)) + tiles_y = range(min_y, max_y, tile_size) + tiles_x = range(min_x, max_x, tile_size) + return tiles_y, tiles_x + + +def _save_training_data_from_indices(image_stack, class_labels, training_data_directory, + n_classes, indices_y, indices_x, tile_size): out = [] - class_code = 7 # dummy... - for i in range(0, image_stack.shape[0]-tile_size, tile_size): - for j in range(0, image_stack.shape[1]-tile_size, tile_size): + for i in indices_x: + for j in indices_y: class_label_tile = class_labels[i:i+tile_size, j:j+tile_size] - if np.all(class_label_tile.mask == True): + shape = class_label_tile.shape + if (shape[0], shape[1]) != (tile_size, tile_size): + # Todo: handle this continue - sub_one_hot = _one_hot_from_labels_mc(class_label_tile, n_classes) + if np.all(class_label_tile.mask): + continue + if np.any(class_label_tile == 1): + class_code = 1 + else: + class_code = 0 + sub_one_hot = _one_hot_from_labels(class_label_tile, n_classes) sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] dt = DataTile(sub_image_stack, sub_one_hot, class_code) out.append(dt) @@ -187,121 +186,83 @@ def _save_training_data_multiple_classes(image_stack, class_labels, training_dat out = [] -def _one_hot_from_labels_mc(labels, n_classes): - one_hot = np.zeros((labels.shape[0], labels.shape[1], n_classes)) - for class_code in range(n_classes): - one_hot[:, :, class_code][labels == class_code] = 1 - if class_code == 0: # apply border class to only irrigated pixels - border_labels = make_border_labels(one_hot[:, :, 0], border_width=1) - border_labels.astype(np.int) - one_hot[:, :, n_classes-1] = border_labels - return one_hot.astype(np.int) +def _random_tif_from_directory(image_directory): + + bleh = os.listdir(image_directory) + for d in bleh: + if os.path.isdir(os.path.join(image_directory, d)): + tiffs = glob(os.path.join(os.path.join(image_directory, d), "*.TIF")) + tiffs = [tif for tif in tiffs if 'BQA' not in tif] + break + shuffle(tiffs) + return tiffs[0] + + +def min_data_tiles_to_cover_labels(shapefiles, path, row, year, image_directory, tile_size=608): + path_row_year = "_".join([str(path), str(row), str(year)]) + image_directory = os.path.join(image_directory, path_row_year) + mask_file = _random_tif_from_directory(image_directory) + mask, mask_meta = load_raster(mask_file) + mask = np.zeros_like(mask).astype(np.int) + first = True + class_labels = None + if not isinstance(shapefiles, list): + shapefiles = [shapefiles] + for f in shapefiles: + class_code = assign_shapefile_class_code(f) + out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) + if first: + class_labels = out + class_labels[~class_labels.mask] = class_code + first = False + else: + class_labels[~out.mask] = class_code + class_labels = concatenate_fmasks(image_directory, class_labels, mask_meta) + where = np.nonzero(~class_labels.mask[0]) + max_y = np.max(where[0]) + min_y = np.min(where[0]) + max_x = np.max(where[1]) + min_x = np.min(where[1]) + frac = np.count_nonzero(~class_labels.mask)/(class_labels.shape[1]*class_labels.shape[2]) + max_y += (tile_size - ((max_y - min_y) % tile_size)) + max_x += (tile_size - ((max_x - min_x) % tile_size)) -def extract_training_data_single_class_per_instance(split_shapefile_directory, image_directory, - training_data_directory, assign_shapefile_year, assign_shapefile_class_code, - offline_augmentation_dict=None, n_classes=5): + tiles_y = range(min_y, max_y, tile_size) + tiles_x = range(min_x, max_x, tile_size) - split_shapefiles = [f for f in glob(os.path.join(split_shapefile_directory, "*.shp"))] + plt.plot([max_x, max_x], [max_y, min_y], 'b', linewidth=2) + plt.plot([min_x, min_x], [max_y, min_y], 'b', linewidth=2) + plt.plot([min_x, max_x], [max_y, max_y], 'b', linewidth=2) + plt.plot([min_x, max_x], [min_y, min_y], 'b', linewidth=2) - done = set() - total_time = 0 - for counter, shapefile in enumerate(split_shapefiles): - begin_time = time.time() - if shapefile in done: - continue - _, path, row = os.path.splitext(shapefile)[0][-7:].split('_') - year = assign_shapefile_year(shapefile) - path_row_year = path + '_' + row + '_' + str(year) - print("Extracting data for", path_row_year) - shapefiles_over_same_path_row = all_matching_shapefiles(shapefile, - split_shapefile_directory, assign_shapefile_year) - done.update(shapefiles_over_same_path_row) - image_path = os.path.join(image_directory, path_row_year) - if not os.path.isdir(image_path): - download_from_pr(path, row, year, image_directory, landsat_rasters, climate_rasters) - image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) - try: - # todo : more robust way of getting a random band from the paths map - mask_file = image_path_map['B1.TIF'][0] - except IndexError: - os.rmdir(os.path.join(image_directory, path_row_year)) - download_from_pr(path, row, year, image_directory, landsat_rasters, climate_rasters) - image_path_map = paths_map_multiple_scenes(os.path.join(image_directory, path_row_year)) - mask_file = image_path_map['B1.TIF'][0] - - mask, mask_meta = load_raster(mask_file) - mask = np.zeros_like(mask).astype(np.int) - fmask = concatenate_fmasks(os.path.join(image_directory, path_row_year), mask, - mask_meta) - if fmask.mask.all(): - print("All pixels covered by cloud for {}".format(path_row_year)) - continue - first = True - class_labels = None - for f in shapefiles_over_same_path_row: - class_code = assign_shapefile_class_code(f) - if offline_augmentation_dict[class_code] == 0: - continue - print(f, class_code) - out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) - if first: - class_labels = out - class_labels[~class_labels.mask] = class_code - first = False - else: - class_labels[~out.mask] = class_code - if class_labels is None: - print("no extra augmentation for", path_row_year) - continue - try: - image_stack = stack_rasters_multiprocess(image_path_map, target_geo=mask_meta, target_shape=mask.shape) - except RasterioIOError as e: - print("Redownload images for", path_row_year) - # TODO: remove corrupted file and redownload images. - continue - class_labels[fmask.mask] = ma.masked # well, I didn't fmask the data. - image_stack = np.swapaxes(image_stack, 0, 2) - class_labels = np.swapaxes(class_labels, 0, 2) - class_labels = np.squeeze(class_labels) - _save_training_data_offline_augmentation(image_stack, class_labels, - training_data_directory, n_classes, offline_augmentation_dict) - end_time = time.time() - diff = end_time - begin_time - total_time += diff - print('single iteration time:', diff, 'avg.', total_time / (counter + 1)) - - -def _save_training_data_offline_augmentation(image_stack, class_labels, - training_data_directory, n_classes, offline_augmentation_dict): - unique = np.unique(class_labels) - unique = unique[~unique.mask] - tile_size = 608 - for class_code in unique: - out = [] - augmentation_step = offline_augmentation_dict[class_code] - for i in range(0, image_stack.shape[0]-tile_size, augmentation_step): - for j in range(0, image_stack.shape[1]-tile_size, augmentation_step): - class_label_tile = class_labels[i:i+tile_size, j:j+tile_size] - if not _check_dimensions_and_min_pixels(class_label_tile, class_code, tile_size): - continue - sub_one_hot = _one_hot_from_labels(class_label_tile, class_code, n_classes) - weights = _weights_from_one_hot(sub_one_hot, n_classes) - sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] - dt = DataTile(sub_image_stack, sub_one_hot, weights, class_code) - out.append(dt) - if len(out) > 50: - with Pool() as pool: - td = [training_data_directory]*len(out) - pool.starmap(_pickle_datatile, zip(out, td)) - out = [] - if len(out): - with Pool() as pool: - td = [training_data_directory]*len(out) - pool.starmap(_pickle_datatile, zip(out, td)) - out = [] + y_min = [min_x] * len(tiles_y) + y_max = [max_x] * len(tiles_y) + for t, mn, mx in zip(tiles_y, y_min, y_max): + plt.plot([mn, mx], [t, t], 'r') + + x_min = [min_y] * len(tiles_x) + x_max = [max_y] * len(tiles_x) + for t, mn, mx in zip(tiles_x, x_min, x_max): + plt.plot([t, t], [mn, mx], 'r') + + plt.imshow(class_labels[0]) + plt.title(frac) + plt.colorbar() + plt.show() + + +def _one_hot_from_labels(labels, n_classes): + one_hot = np.zeros((labels.shape[0], labels.shape[1], n_classes)) + for class_code in range(n_classes): + one_hot[:, :, class_code][labels == class_code] = 1 + # if class_code == 1: # apply border class to only irrigated pixels + # border_labels = make_border_labels(one_hot[:, :, 1], border_width=1) + # border_labels = border_labels.astype(np.uint8) + # one_hot[:, :, n_classes-1][border_labels == 1] = 1 + return one_hot.astype(np.int) def _weights_from_one_hot(one_hot, n_classes): @@ -320,16 +281,6 @@ def _one_hot_from_shapefile(shapefile, mask_file, shapefile_class_code, n_classe return one_hot -def _one_hot_from_labels(labels, class_code, n_classes): - one_hot = np.zeros((labels.shape[0], labels.shape[1], n_classes)) - one_hot[:, :, class_code][labels == class_code] = 1 - if class_code == 0: # apply border class to only irrigated pixels - border_labels = make_border_labels(one_hot[:, :, 0], border_width=1) - border_labels.astype(np.int) - one_hot[:, :, n_classes-1] = border_labels - return one_hot.astype(np.int) - - def _check_dimensions_and_min_pixels(sub_one_hot, class_code, tile_size): # 200 is the minimum amount of pixels required to save the data. if sub_one_hot.shape[0] != tile_size or sub_one_hot.shape[1] != tile_size: @@ -346,7 +297,7 @@ def all_matching_shapefiles(to_match, shapefile_directory, assign_shapefile_year year = assign_shapefile_year(to_match) for f in glob(os.path.join(shapefile_directory, "*.shp")): if get_shapefile_path_row(f) == pr and assign_shapefile_year(f) == year: - out.append(f) + out.append(f) return out @@ -356,19 +307,54 @@ def make_border_labels(mask, border_width): dm[dm > border_width] = 0 return dm +def _mean_of_three_images(paths_map, image_stack): + # for each key in image_stack (sorted): + # ...climate...landsat...static... + pass + + if __name__ == '__main__': + + sd = glob('shapefile_data/test/*.shp') + idd = '/home/thomas/share/image_data/' + td = '/home/thomas/ssd/binary_train_no_border_labels/test/' + n_classes = 2 + + done = set() - parser = argparse.ArgumentParser() - parser.add_argument('-s', '--shapefile-dir', help='shapefile directory containing the split shapefiles', type=str) - parser.add_argument('-i', '--image-dir', help='directory in which to find/save landsat images', type=str) - parser.add_argument('-t', '--training-dir', help='directory in which to save training data', type=str) - parser.add_argument('-n', '--n-classes', help='number of classes present', type=int) + for i, f in enumerate(sd): + if f in done: + continue + ffg = all_matching_shapefiles(f, 'shapefile_data/test/', assign_shapefile_year) + for e in ffg: + done.add(e) + bs = os.path.splitext(os.path.basename(f))[0] + _, path, row = bs[-7:].split("_") + year = assign_shapefile_year(f) + print("extracting data for", path, row, year) + paths_map_func = paths_map_multiple_scenes + extract_training_data_over_path_row(ffg, path, row, year, idd, td, n_classes, + assign_shapefile_class_code, path_map_func=paths_map_func) + + + # TODO: rewrite this to take advantage of test train data in same path/row + sd = glob('shapefile_data/train/*.shp') + idd = '/home/thomas/share/image_data/' + td = '/home/thomas/ssd/binary_train_no_border_labels/train/' + n_classes = 2 - # todo : add single scene mapping - # more robust selection of random band - # how to download only selected images? + done = set() - args = parser.parse_args() - extract_training_data_multiple_classes_per_instance(args.shapefile_dir, args.image_dir, - args.training_dir, assign_shapefile_year, assign_shapefile_class_code, - n_classes=args.n_classes) + for i, f in enumerate(sd): + if f in done: + continue + ffg = all_matching_shapefiles(f, 'shapefile_data/train/', assign_shapefile_year) + for e in ffg: + done.add(e) + bs = os.path.splitext(os.path.basename(f))[0] + _, path, row = bs[-7:].split("_") + year = assign_shapefile_year(f) + print("extracting data for", path, row, year) + paths_map_func = paths_map_multiple_scenes + extract_training_data_over_path_row(ffg, path, row, year, idd, td, n_classes, + assign_shapefile_class_code, path_map_func=paths_map_func) diff --git a/fully-conv-classification/precision_and_recall.py b/fully-conv-classification/precision_and_recall.py new file mode 100644 index 0000000..44cdbd6 --- /dev/null +++ b/fully-conv-classification/precision_and_recall.py @@ -0,0 +1,33 @@ +import os +import argparse +from tensorflow.keras.models import load_model +from glob import glob + + +from losses import binary_focal_loss, binary_acc, masked_binary_xent +from data_generators import RandomMajorityUndersamplingSequence, BinaryDataSequence +from train_utils import confusion_matrix_from_generator + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--model', type=str, required=True) + parser.add_argument('--test-data-path', type=str, required=True) + parser.add_argument('--use-gpu', action='store_true') + args = parser.parse_args() + if not args.use_gpu: + os.environ['CUDA_VISIBLE_DEVICES'] = '-1' + custom_objects = {'mb':masked_binary_xent(pos_weight=1.0), 'binary_acc':binary_acc} + try: + model = load_model(args.model, custom_objects=custom_objects) + except ValueError as e: + print(e.args) + raise + + batch_size = 1 + files = glob(os.path.join(args.test_data_path, '*.pkl')) + test_generator = BinaryDataSequence(batch_size, files, training=False) + cmat, prec, recall = confusion_matrix_from_generator(test_generator, batch_size, model, + n_classes=2) + print('model {} has p:{}, r:{}'.format(args.model, prec, recall)) diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index d57a7ed..7715fb7 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -31,15 +31,14 @@ from shutil import rmtree from bounds import RasterBounds, GeoBounds from dem import AwsDem -from ssebop_app.image import get_image from functools import partial -from pyproj import Proj, transform as pytransform +from pyproj import Proj, transform as pytransform, CRS as pyCRS from shapely.geometry import shape, Polygon, mapping from shapely.ops import transform from rasterio import open as rasopen, float32 from rasterio.crs import CRS from pixel_classification.crop_data_layer import CropDataLayer as Cdl -from pixel_classification.runspec import landsat_rasters, static_rasters, ancillary_rasters, mask_rasters, climate_rasters +from pixel_classification.runspec import (static_rasters, ancillary_rasters, mask_rasters, landsat_rasters) from sklearn.preprocessing import StandardScaler from geopandas.geodataframe import GeoDataFrame @@ -49,8 +48,7 @@ class ImageStack(object): """ def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None, - max_cloud_pct=None, start=None, end=None, year=None, n_landsat=None, - climate_targets=None): + max_cloud_pct=None, start=None, end=None, year=None, n_landsat=None): self.landsat_mapping = {'LT5': Landsat5, 'LE7': Landsat7, 'LC8': Landsat8} self.landsat_mapping_abv = {5: 'LT5', 7: 'LE7', 8: 'LC8'} @@ -65,7 +63,7 @@ def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None self.lon = lon self.year = year - self.max_cloud = max_cloud_pct + self.max_cloud = 100 self.start = start self.end = end self.root = root @@ -83,24 +81,19 @@ def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None self.cdl_tif = None self.cdl_mask = None - self.climate_targets = climate_targets - if not self.climate_targets: - self.climate_targets = ['pr', 'pet', 'tmmn', 'tmmx', 'etr'] - + self.climate_targets = ['pr', 'pet', 'tmmn', 'tmmx', 'etr'] self.n = n_landsat - self.ancillary_rasters = [] self.exclude_rasters = [] if year and not start and not end: - self.start = '{}-05-01'.format(self.year) - self.end = '{}-10-15'.format(self.year) + self.start = '{}-01-01'.format(self.year) + self.end = '{}-12-30'.format(self.year) def build_training(self): self.get_landsat(fmask=True) self.profile = self.landsat.rasterio_geometry self.get_climate_timeseries() - self.get_et() self.get_terrain() self.paths_map, self.masks = self._order_images() @@ -108,10 +101,10 @@ def build_evaluating(self): # Multiprocessing on this may not be plausible. self.get_landsat(fmask=True) self.profile = self.landsat.rasterio_geometry # fix this? - #self.get_et() This doesn't work reliably. - self.get_climate_timeseries() + # self.get_climate_timeseries() + # The above line stopped working for unknown reasons. + # Maybe due to mismatched CRS? self.get_terrain() - self.paths_map, self.masks = self._order_images() # paths map is just path-> location # in filesystem. def get_cdl(self): @@ -135,16 +128,18 @@ def get_landsat(self, fmask=False): the root directory. """ if self.lat is None: - g = GoogleDownload(self.start, self.end, self.sat, path=self.path, row=self.row, - output_path=self.root, max_cloud_percent=self.max_cloud) + g = GoogleDownload(self.start, self.end, self.sat, + path=self.path, row=self.row, output_path=self.root, + max_cloud_percent=self.max_cloud) else: - g = GoogleDownload(self.start, self.end, self.sat, latitude=self.lat, longitude=self.lon, - output_path=self.root, max_cloud_percent=self.max_cloud) - - g.select_scenes(self.n) - self.scenes = g.selected_scenes - g.download(list_type='selected') - + g = GoogleDownload(self.start, self.end, self.sat, + latitude=self.lat, longitude=self.lon, output_path=self.root, + max_cloud_percent=self.max_cloud) + + #l g.select_scenes(100) + # print('this should download after') + # self.scenes = g.selected_scenes + g.download(list_type='all') self.image_dirs = [x[0] for x in os.walk(self.root) if os.path.basename(x[0])[:3] in self.landsat_mapping.keys()] @@ -155,20 +150,16 @@ def get_landsat(self, fmask=False): def _get_bounds(self): poly_in = self.landsat.get_tile_geometry() poly_in = Polygon(poly_in[0]['coordinates'][0]) + crs = pyCRS(self.profile['crs']['init']) project = partial( pytransform, - Proj(self.profile['crs']), - Proj(self.profile['crs'])) + Proj(crs), + Proj(crs)) # The above is not needed. for_bounds = partial( pytransform, - Proj(self.profile['crs']), - Proj(init='epsg:4326')) - # Change the coordinate system - # The issue: the CRSs for the bounding box and for the mask are different. - # In _project, the incorrect CRS was making it throw an error. - # the fix? Inputting bounds in a unprojected CRS and - # a projected shape for masking. + Proj(crs), + Proj('epsg:4326')) poly = transform(project, poly_in) poly_bounds = transform(for_bounds, poly_in) poly = Polygon(poly.exterior.coords) @@ -180,6 +171,7 @@ def _get_bounds(self): def get_climate_timeseries(self): bounds, geometry = self._get_bounds() + print(bounds, geometry) dates = self.scenes['DATE_ACQUIRED'].values all_dates = arange(datetime(self.year, 3, 1), max(dates)+1, timedelta(days=1)).astype(datetime64) diff --git a/fully-conv-classification/runspec.py b/fully-conv-classification/runspec.py index 317e8eb..0c1553b 100644 --- a/fully-conv-classification/runspec.py +++ b/fully-conv-classification/runspec.py @@ -1,9 +1,7 @@ # ============================================================================================= -# Copyright 2018 dgketchum -# +# Copyright 2018 dgketchum # # Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# you may not use this file except in compliance with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # @@ -20,25 +18,21 @@ abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(abspath) +target_bands = ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF') def assign_shapefile_class_code(shapefile): if 'irrigated' in shapefile and 'unirrigated' not in shapefile: - return 0 - if 'unirrigated' in shapefile: return 1 - if 'uncultivated' in shapefile: - return 2 - if 'wetlands' in shapefile: - return 3 - if 'fallow' in shapefile: - return 4 - - -def assign_shapefile_class_code_binary(shapefile): - if 'irrigated' in shapefile and 'unirrigated' not in shapefile: - return 0 - return 1 + return 0 +# if 'unirrigated' in shapefile: +# return 1 +# if 'uncultivated' in shapefile: +# return 2 +# if 'wetlands' in shapefile: +# return 2 +# if 'fallow' in shapefile: +# return 4 def assign_shapefile_year(shapefile): # get the shapefile name, not the whole path. @@ -78,7 +72,7 @@ def static_rasters(): def mask_rasters(): - m = ('cloud_fmask.tif', 'water_fmask.tif') + m = ('cloud_fmask.tif', )# , 'water_fmask.tif') return m diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 8fb126e..44dc508 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -5,6 +5,7 @@ from copy import deepcopy from fiona import open as fopen from rasterio.mask import mask +from pyproj import CRS from rasterio import open as rasopen from shapely.geometry import shape, mapping, Polygon from sklearn.neighbors import KDTree @@ -27,7 +28,9 @@ def mask_raster_to_shapefile(shapefile, raster, return_binary=True): shp = gpd.read_file(shapefile) shp = shp[shp.geometry.notnull()] with rasopen(raster, 'r') as src: - shp = shp.to_crs(src.crs) + # pyproj deprecated the +init syntax. + crs = CRS(src.crs['init']) + shp = shp.to_crs(crs) features = get_features(shp) arr = src.read() out_image, out_transform = mask(src, shapes=features, filled=False) @@ -45,6 +48,8 @@ def mask_raster_to_features(raster, features, features_meta): # the whole metadata? gdf = gdf[gdf.geometry.notnull()] with rasopen(raster, 'r') as src: + crs = CRS(src.crs['init']) + print(crs) shp = gdf.to_crs(src.crs) features = get_features(shp) arr = src.read() diff --git a/fully-conv-classification/split_shapefiles.py b/fully-conv-classification/split_shapefiles.py old mode 100644 new mode 100755 diff --git a/fully-conv-classification/train_model.py b/fully-conv-classification/train_model.py index 476d4f2..7befbb0 100644 --- a/fully-conv-classification/train_model.py +++ b/fully-conv-classification/train_model.py @@ -1,6 +1,6 @@ import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' -os.environ['CUDA_VISIBLE_DEVICES'] = '-1' +# os.environ['CUDA_VISIBLE_DEVICES'] = '-1' import time import keras.backend as K import tensorflow as tf @@ -10,223 +10,80 @@ from functools import partial from sklearn.metrics import confusion_matrix from tensorflow.keras.models import load_model +from scipy.special import expit from models import unet from data_generators import SatDataSequence -config = tf.ConfigProto() -config.gpu_options.allow_growth +from data_utils import map_bands_to_indices +from train_utils import (construct_parser, make_temporary_directory, save_model_info, + lr_schedule, confusion_matrix_from_generator) +from runspec import target_bands +from losses import * -_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) -def _softmax(logits): - preds = np.exp(logits) - soft = preds / np.sum(preds, axis=-1, keepdims=True) - return soft +training_data_dir = '/home/thomas/ssd/single_scene/train/' +test_data_dir = '/home/thomas/ssd/single_scene/test/' -def _confusion_matrix(valid_generator, model): - first = True - for batch_x, y_true in valid_generator: - y_true = y_true[0][0] - mask = np.sum(y_true, axis=2).astype(bool) - y_pred = model.predict(batch_x) - y_pred = y_pred[0] - y_pred = _softmax(y_pred) - y_pred = np.argmax(y_pred, axis=2) - y_true = np.argmax(y_true, axis=2) - y_pred = y_pred[mask] - y_true = y_true[mask] - if first: - cmat = confusion_matrix(y_true, y_pred, - labels=[0, 1, 2, 3, 4, 5]) - first = False - else: - cmat += confusion_matrix(y_true, y_pred, - labels=[0, 1, 2, 3, 4, 5]) - print(cmat) - precision_dict = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0} - recall_dict = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0} - for i in range(6): - recall_dict[i] = cmat[i, i] / np.sum(cmat[i, :]) - precision_dict[i] = cmat[i, i] / np.sum(cmat[:, i]) - return cmat, recall_dict, precision_dict - - -def weighted_loss(target, output): - out = -tf.reduce_sum(target*output, len(output.get_shape())-1) - return out - - -def weighted_loss_ce_and_dl(target, output): - # Target: One hot encoding of segmentation mask. - # Output: Output of network. In this case, log(softmax). - soft = tf.nn.softmax(output) - numerator = tf.reduce_sum(soft*target, 1) - numerator = tf.reduce_sum(numerator, 2) - sum_ui_k = tf.reduce_sum(soft, 1) - sum_ui_k = tf.reduce_sum(sum_ui_k, 2) - sum_vi_k = tf.reduce_sum(target, 1) - sum_vi_k = tf.reduce_sum(sum_vi_k, 2) - - final = (-2/4)*tf.reduce_sum(numerator / (sum_ui_k + sum_vi_k), 1) - out = -tf.reduce_sum(target*output, len(output.get_shape())-1) - return final + out - - -def weighted_focal_loss(target, output, gamma=1): - # L = a0 *(1-pt)^gamma * ce - # Output of model is CE. - # Target is one-hot encoded. - soft = tf.nn.softmax(output, axis=-1) - pt = tf.pow(1-soft, 2) # probability - return -tf.reduce_sum(target*output*pt, len(output.get_shape())-1) - - -def acc(y_true, y_pred): - y_pred_sum = tf.reduce_sum(y_pred, axis=-1) - mask = tf.not_equal(y_pred_sum, 0) - y_arg = tf.argmax(y_pred, axis=-1) - y_t_arg = tf.argmax(y_true, axis=-1) - y_arg_mask = tf.boolean_mask(y_arg, mask) - y_t_arg_mask = tf.boolean_mask(y_t_arg, mask) - return K.mean(K.equal(y_t_arg_mask, y_arg_mask)) - - -def lr_schedule(epoch, initial_learning_rate): - lr = initial_learning_rate - if epoch > 15: - lr /= 256 - elif epoch > 13: - lr /= 128 - elif epoch > 11: - lr /= 64 - elif epoch > 9: - lr /= 32. - elif epoch > 7: - lr /= 16. - elif epoch > 5: - lr /= 8. - elif epoch > 3: - lr /= 4. - elif epoch > 1: - lr /= 2. - print('Learning rate: ', lr) - return lr - - -def _save_model_info(root_directory, train_type, loss_func, accuracy, loss, class_weights, classes_to_augment, initial_learning_rate, train_mc, cmat, precision, recall): - directory_name = os.path.join("./models", "{:.3f}acc_class_weights_multiple_classes".format(accuracy)) - if os.path.isdir(directory_name): - directory_name = os.path.join("./models", "{:.5f}acc".format(accuracy)) - filename = os.path.join(directory_name, "run_info_{:.3f}acc.txt".format(accuracy)) - os.rename(root_directory, directory_name) - print(filename) - with open(filename, 'w') as f: - print("acc: {:.3f}".format(accuracy), file=f) - print("train_type: {}".format(train_type), file=f) - print("loss_func: {}".format(loss_func), file=f) - print("loss: {}".format(loss), file=f) - print("weights: {}".format(class_weights), file=f) - print("augment scheme: {}".format(classes_to_augment), file=f) - print("lr: {}".format(initial_learning_rate), file=f) - print('train w multiple classes per tile: {}'.format(train_mc), file=f) - print('confusion_matrix: {}'.format(cmat), file=f) - print('precision: {}'.format(precision), file=f) - print('recall: {}'.format(recall), file=f) - - -def _return_loss(inp): - if 'focal' in inp: - return weighted_focal_loss - return weighted_loss +if __name__ == '__main__': -def _list_to_dict(ls): - dct = {} - for i, e in enumerate(ls[0].split(' ')): - dct[i] = e - return dct - + parser = construct_parser() + args = parser.parse_args() + initial_learning_rate = args.learning_rate -def _nclass(i): - if 'multiple' in i: - return 6 - return 5 + temp_dir, model_path, tensorboard_path = make_temporary_directory('./models/') + input_shape = (None, None, 19) + n_classes = 1 -if __name__ == '__main__': + model = unet(input_shape, initial_exp=4, n_classes=n_classes) - parser = argparse.ArgumentParser(fromfile_prefix_chars='@') - parser.add_argument('-f', '--file') - parser.add_argument("-lr", "--learning_rate", type=float) - parser.add_argument("-b", "--balance", type=int) - parser.add_argument("-w", "--weights", nargs=1) - parser.add_argument("-lf", "--loss_func") - parser.add_argument("-tt", '--train_type') - parser.add_argument("-nc", '--n_classes') - parser.add_argument("-td", '--train-data-dir', default='/home/thomas/ssd/training_data/train/') - parser.add_argument("-ted", '-test-data-dir', default='/home/thomas/ssd/training_data/test/') - args = parser.parse_args() - learning_rate_scheduler = partial(lr_schedule, initial_learning_rate=args.learning_rate) - # runtime params - n_classes = _nclass(args.train_type) - input_shape = (None, None, 51) - initial_learning_rate = args.learning_rate - class_weights = {0:85, 1:1.0, 2:1.546, 3:784.286, 4:972.84, 5:357.78} - class_weights = _list_to_dict(args.weights) - classes_to_augment = {0:False, 1:False, 2:False, 3:False, 4:False, 7:True} - loss_func = _return_loss(args.loss_func) - print(initial_learning_rate, class_weights, loss_func) - # - weight_shape = (None, None, n_classes) - filepath = './models/template_to_fill_in/model.h5' - tb_path = './models/template_to_fill_in/' + str(time.time()) - root_directory = './models/template_to_fill_in/' - if not os.path.isdir(tb_path): - os.makedirs(tb_path) - # Prepare callbacks for model saving and for learning rate adjustment. - checkpoint = ModelCheckpoint(filepath=filepath, - monitor='val_acc', + tensorboard = TensorBoard(log_dir=tensorboard_path, + profile_batch=0, + update_freq=30, + batch_size=3) + checkpoint = ModelCheckpoint(filepath=model_path, + monitor='val_binary_acc', verbose=1, save_best_only=True) - tensorboard = TensorBoard(log_dir=tb_path, profile_batch=0, update_freq=30, batch_size=3) - lr_schedule_func = partial(lr_schedule, initial_learning_rate=initial_learning_rate) - lr_scheduler = LearningRateScheduler(lr_schedule_func) - model = unet_same_padding(input_shape, weight_shape, n_classes=n_classes, initial_exp=4) + lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate) + lr_scheduler = LearningRateScheduler(lr_schedule) + opt = tf.keras.optimizers.Adam() - model.compile(opt, loss=loss_func, metrics=[acc]) + class_weights = {0:1, 1:1, 2:1, 3:1, 4:1, 5:1} + classes_to_augment = True #{0:1, 1:1, 2:1, 3:1, 4:1, 5:1} batch_size = 4 - balance = True - train_mc = True - training_data_directory = args.train_type - generator = SatDataSequence('{}'.format(args.train_data_dir), hatch_size=batch_size, - class_weights=class_weights, single_class_per_tile=False, balance=balance, - n_classes=n_classes, classes_to_augment=classes_to_augment) - class_weights = {0:1, 1:1.0, 2:1.0, 3:1, 4:1, 5:1} - valid_generator = SatDataSequence('{}'.format(args.test_data_dir), - batch_size=batch_size, balance=False, n_classes=n_classes, single_class_per_tile=False, - class_weights=class_weights) - - model.fit_generator(generator, - epochs=20, - callbacks=[lr_scheduler, checkpoint, tensorboard], + pos_weight = args.pos_weight + loss_func = binary_focal_loss(gamma=2, alpha=0.25) + model.compile(opt, loss=loss_func, metrics=[binary_acc]) + train_generator = SatDataSequence(training_data_dir, class_weights=class_weights, + batch_size=batch_size, n_classes=n_classes) + test_generator = SatDataSequence(test_data_dir, class_weights=class_weights, + batch_size=batch_size, training=False, n_classes=n_classes) + + model.fit_generator(train_generator, + epochs=5, + validation_data=test_generator, + callbacks=[tensorboard, lr_scheduler, checkpoint], use_multiprocessing=True, - validation_data=valid_generator, workers=12, max_queue_size=30, verbose=1) - - if args.loss_func == 'focal': - model = load_model(filepath, custom_objects={'weighted_focal_loss':weighted_focal_loss, - 'tf':tf, '_epsilon':_epsilon, 'acc':acc}) - else: - model = load_model(filepath, custom_objects={'weighted_loss':weighted_loss, - 'tf':tf, '_epsilon':_epsilon, 'acc':acc}) - - accuracy = model.evaluate_generator(valid_generator) - loss = accuracy[0] - accuracy = accuracy[1] - cmat, prec, recall = _confusion_matrix(valid_generator, model) - _save_model_info(root_directory, args.train_type, args.loss_func, accuracy, loss, class_weights, - classes_to_augment, initial_learning_rate, train_mc, cmat, prec, recall) + model.save('single_scene.h5') + # loss_func = binary_focal_loss(gamma=2, alpha=0.25) + # custom_objects = {'bfl':loss_func, 'binary_acc':binary_acc} + # model = load_model('models/temp/model.h5', custom_objects=custom_objects) + # test_data_dir = '/home/thomas/ssd/binary_train/test/' + # test_generator = SatDataSequence(test_data_dir, class_weights=class_weights, + # batch_size=batch_size, training=False, n_classes=n_classes) + # accuracy = model.evaluate_generator(test_generator) + # loss = accuracy[0] + # accuracy = accuracy[1] + # test_generator = SatDataSequence(test_data_dir, class_weights=class_weights, + # batch_size=1, n_classes=n_classes) + # cmat, prec, recall = confusion_matrix_from_generator(test_generator, model, n_classes=2) + # print(prec, recall) + # save_model_info(temp_dir, loss_func.__name__, accuracy, loss, class_weights, + # classes_to_augment, pos_weight, initial_learning_rate, cmat, prec, recall) diff --git a/fully-conv-classification/train_model_random_files.py b/fully-conv-classification/train_model_random_files.py new file mode 100644 index 0000000..696127e --- /dev/null +++ b/fully-conv-classification/train_model_random_files.py @@ -0,0 +1,78 @@ +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +# os.environ['CUDA_VISIBLE_DEVICES'] = '-1' +import keras.backend as K +import tensorflow as tf +import numpy as np +from argparse import ArgumentParser +from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) +from functools import partial +from sklearn.metrics import confusion_matrix +from tensorflow.keras.models import load_model +from scipy.special import expit +from random import sample +from glob import glob + + +from models import unet +from data_generators import RandomMajorityUndersamplingSequence, BinaryDataSequence +from train_utils import lr_schedule +from losses import (binary_focal_loss, binary_acc, masked_binary_xent, masked_categorical_xent, + multiclass_acc) + +# don't monitor binary acc any more, monitor precision and recall. + +if __name__ == '__main__': + + ap = ArgumentParser() + ap.add_argument('--training-dir', type=str) + ap.add_argument('--testing-dir', type=str) + args = ap.parse_args() + + training_dir = args.training_dir + testing_dir = args.testing_dir + + initial_learning_rate = 1e-3 + + input_shape = (None, None, 51) + + n_classes = 1 + + model = unet(input_shape, initial_exp=4, n_classes=n_classes) + model_path = 'random_majority_files/only_irrigated_no_border_labels/' + if not os.path.isdir(model_path): + os.mkdir(model_path) + + model_path += 'model.h5' + + tensorboard = TensorBoard(log_dir='/tmp/', + profile_batch=0, + update_freq=30, + batch_size=3) + checkpoint = ModelCheckpoint(filepath=model_path, + monitor='val_binary_acc', + verbose=1, + save_best_only=True) + lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate) + lr_scheduler = LearningRateScheduler(lr_schedule) + + train_files = glob('/home/thomas/ssd/binary_train_no_border_labels/train/class_1_data/*.pkl') + test_files = glob('/home/thomas/ssd/binary_train_no_border_labels/test/class_1_data/*.pkl') + + opt = tf.keras.optimizers.Adam() + batch_size = 4 + loss_func = masked_binary_xent(pos_weight=1.0) + # loss_func = masked_categorical_xent + model.compile(opt, loss=loss_func, metrics=[binary_acc]) + # train_generator = RandomMajorityUndersamplingSequence(batch_size, training_dir) + # test_generator = RandomMajorityUndersamplingSequence(batch_size, testing_dir) + train_generator = BinaryDataSequence(batch_size, train_files) + test_generator = BinaryDataSequence(batch_size, test_files) + model.fit_generator(train_generator, + epochs=50, + validation_data=test_generator, + callbacks=[tensorboard, lr_scheduler, checkpoint], + use_multiprocessing=True, + workers=12, + max_queue_size=30, + verbose=1) diff --git a/fully-conv-classification/train_utils.py b/fully-conv-classification/train_utils.py new file mode 100644 index 0000000..18c7c13 --- /dev/null +++ b/fully-conv-classification/train_utils.py @@ -0,0 +1,211 @@ +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +# os.environ['CUDA_VISIBLE_DEVICES'] = '-1' +import time +import tensorflow as tf +import numpy as np +import matplotlib.pyplot as plt +import argparse +import pickle +from scipy.special import expit +from sklearn.metrics import confusion_matrix +from tensorflow.keras.models import load_model +from collections import defaultdict +from multiprocessing import Pool +from random import sample, shuffle +from glob import glob + + +def make_temporary_directory(model_directory=None): + if model_directory is None: + model_directory = './models/' + temp_dir = os.path.join(model_directory, 'temp') + model_path = os.path.join(temp_dir, 'model.h5') + tb_path = os.path.join(temp_dir, str(time.time())) + if not os.path.isdir(tb_path): + os.makedirs(tb_path) + return temp_dir, model_path, tb_path + + +def _bin_dict(dct, k, alpha, n_minority): + first_edge = min(dct.values()) + last_edge = max(dct.values()) + + bin_edges = np.linspace(first_edge, last_edge, k+1, endpoint=True) + + file_dict = defaultdict(list) + hardness_dict = defaultdict(lambda: 0) + + for data_filename in dct: + hardness = dct[data_filename] + for i in range(len(bin_edges)-1): + if bin_edges[i] <= hardness and hardness < bin_edges[i+1]: + file_dict[bin_edges[i]].append(data_filename) + hardness_dict[bin_edges[i]] += hardness + break # don't need to go on. + + average_hardness_contribution = {} + for bin_edge in file_dict: + if not len(file_dict[bin_edge]): + continue + average_hardness_contribution[bin_edge] = hardness_dict[bin_edge] / len(file_dict[bin_edge]) + + sampling_weights = {} + total_weight = 0 + for bin_edge in average_hardness_contribution: + t = 1/(alpha + average_hardness_contribution[bin_edge]) + sampling_weights[bin_edge] = t + total_weight += t + + outfiles = [] + for bin_edge, weight in sampling_weights.items(): + n_samples = int(np.round(weight*n_minority) / total_weight) + undersample = file_dict[bin_edge] + if len(undersample) < n_samples: + undersample *= int(n_samples // len(undersample)) + 1 + # lazy with +1! alternative: add n_samples % len(undersample) files to undersample + outfiles.extend(sample(undersample, n_samples)) + return outfiles + +def hardbin(negative_example_directory, models, n_minority, alpha, k, custom_objects): + # Steps: + # train first model on randomly selected negative examples + loss_dct = defaultdict(lambda: 0) + if not isinstance(models, list): + models = [models] + print(models) + + files = glob(os.path.join(negative_example_directory, "*.pkl")) + # parallelize? + for model_path in models: + print("Loading model {}".format(model_path)) + model = load_model(model_path, custom_objects=custom_objects) + for i, f in enumerate(files): + with open(f, 'rb') as src: + data = pickle.load(src) + y_pred = model.predict(np.expand_dims(data['data'], 0)) + mask = data['one_hot'][:, :, 0] == 1 # where there is majority class. + y_pred = expit(y_pred) + y_pred = y_pred[0, :, :, 0][mask] + avg_pred_miss = np.mean(y_pred) # + # average hardness of tile. A larger number + # means the network was more sure that the underlying false postive + # was actually positive. + loss_dct[f] += avg_pred_miss + del model + + for f in loss_dct: + loss_dct[f] /= len(models) + + return _bin_dict(loss_dct, k, alpha, n_minority) + + +def _preprocess_masks_and_calculate_cmat(y_true, y_pred, n_classes=2): + labels = range(n_classes) + if n_classes == 2: + mask = np.ones_like(y_true).astype(bool) + mask[y_true == -1] = False + else: + mask = np.sum(y_true, axis=2).astype(bool) + y_pred = y_pred + if n_classes > 2: + y_pred = np.squeeze(y_pred) + y_pred = softmax(y_pred) + y_pred = np.argmax(y_pred, axis=2) + y_true = np.argmax(y_true, axis=2) + y_pred = y_pred[mask] + y_true = y_true[mask] + else: + y_pred = np.round(expit(y_pred)) + y_pred = y_pred[mask] + y_true = y_true[mask] + + cmat = confusion_matrix(y_true, y_pred, + labels=labels) + + return cmat + +def confusion_matrix_from_generator(valid_generator, batch_size, model, n_classes=2): + out_cmat = np.zeros((n_classes, n_classes)) + with Pool(batch_size) as pool: + for batch_x, y_true in valid_generator: + y_true = y_true[0] + preds = model.predict(batch_x) + sz = batch_x[0].shape[0] + try: + y_trues = [np.squeeze(y_true[i]) for i in range(sz)] + y_preds = [np.squeeze(preds[i]) for i in range(sz)] + except IndexError as e: + print(e) + continue + + cmats = pool.starmap(_preprocess_masks_and_calculate_cmat, zip(y_trues, y_preds, + [n_classes]*batch_size)) + for cmat in cmats: + out_cmat += cmat + + print(out_cmat) + precision_dict = {} + recall_dict = {} + for i in range(n_classes): + precision_dict[i] = 0 + recall_dict[i] = 0 + for i in range(n_classes): + recall_dict[i] = out_cmat[i, i] / np.sum(out_cmat[i, :]) + precision_dict[i] = out_cmat[i, i] / np.sum(out_cmat[:, i]) + return cmat, recall_dict, precision_dict + + +def lr_schedule(epoch, initial_learning_rate): + lr = initial_learning_rate + if epoch > 15: + lr /= 256 + elif epoch > 13: + lr /= 128 + elif epoch > 11: + lr /= 64 + elif epoch > 9: + lr /= 32. + elif epoch > 7: + lr /= 16. + elif epoch > 5: + lr /= 8. + elif epoch > 3: + lr /= 4. + elif epoch > 1: + lr /= 2. + print('Learning rate: ', lr) + return float(lr) + + +def save_model_info(root_directory, loss_func, accuracy, loss, class_weights, classes_to_augment, + initial_learning_rate, pos_weight, cmat, precision, recall): + directory_name = os.path.join("./models", "{:.3f}".format(accuracy)) + if os.path.isdir(directory_name): + directory_name = os.path.join("./models", "{:.5f}acc".format(accuracy)) + filename = os.path.join(directory_name, "run_info_{:.3f}acc.txt".format(accuracy)) + os.rename(root_directory, directory_name) + print(filename) + with open(filename, 'w') as f: + print("acc: {:.3f}".format(accuracy), file=f) + print("loss_func: {}".format(loss_func), file=f) + print("loss: {}".format(loss), file=f) + print("weights: {}".format(class_weights), file=f) + print("augment scheme: {}".format(classes_to_augment), file=f) + print("lr: {}".format(initial_learning_rate), file=f) + print('pos_weight: {}'.format(pos_weight), file=f) + print('confusion_matrix: {}'.format(cmat), file=f) + print('precision: {}'.format(precision), file=f) + print('recall: {}'.format(recall), file=f) + + +def construct_parser(): + parser = argparse.ArgumentParser(fromfile_prefix_chars='@') + parser.add_argument("-lr", "--learning_rate", type=float, default=0.001) + parser.add_argument("-nc", '--n_classes', type=int, default=1) + parser.add_argument("-p", '--pos-weight', type=float, default=1.0) + return parser + + +if __name__ == '__main__': + pass From 5d2b961695c40e9c787db47907ddf0840c65bb11 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Wed, 16 Oct 2019 16:42:47 -0600 Subject: [PATCH 85/89] Reorganizing data generators, added finetuning capability --- fully-conv-classification/data_generators.py | 134 ++++++++++++++++-- .../extract_training_data.py | 29 ++-- fully-conv-classification/finetune_model.py | 96 +++++++++++++ .../precision_and_recall.py | 25 +++- fully-conv-classification/runspec.py | 19 +-- .../train_model_random_files.py | 34 ++--- fully-conv-classification/train_utils.py | 1 - 7 files changed, 283 insertions(+), 55 deletions(-) create mode 100644 fully-conv-classification/finetune_model.py diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 68aaefd..f736f39 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -13,16 +13,94 @@ from rasterio import open as rasopen from rasterio.errors import RasterioIOError from skimage import transform +from skimage.morphology import erosion from sat_image.warped_vrt import warp_single_image from tensorflow.keras.utils import Sequence from multiprocessing import Pool from collections import defaultdict -from sys import getsizeof +from sys import getsizeof, exit from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features +class SatDataGenerator(Sequence): + + def __init__(self, batch_size, n_classes, training=True): + + self.batch_size = batch_size + self.training = training + self.n_classes = n_classes + + def _get_files(self): + # Required override. + raise NotImplementedError + + + def on_epoch_end(self): + raise NotImplementedError + + + def __len__(self): + raise NotImplementedError + + + def __getitem__(self, idx): + batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] + data_tiles = [self._from_pickle(x) for x in batch] + self.batch=batch + if self.n_classes == 2: + processed = self._binary_labels_and_features(data_tiles) + else: + processed = self._labels_and_features(data_tiles) + batch_y = processed[1] + batch_x = processed[0] + raise NotImplementedError + return batch_x, batch_y + + + def _from_pickle(self, filename): + with open(filename, 'rb') as f: + data = pickle.load(f) + return data + + + def _labels_and_features(self, data_tiles): + features = [] + one_hots = [] + for tile in data_tiles: + data = tile['data'] + one_hot = tile['one_hot'].astype(np.int) + class_code = tile['class_code'] + if self.training: + data, one_hot = _augment_data(data, one_hot) + features.append(data) + one_hots.append(one_hot) + return [np.asarray(features)], [np.asarray(one_hots)] + + + def _binary_labels_and_features(self, data_tiles): + features = [] + one_hots = [] + bad_shape = False + for cnt, tile in enumerate(data_tiles): + data = tile['data'] + one_hot = tile['one_hot'].astype(np.int) + binary_one_hot = np.ones((one_hot.shape[0], one_hot.shape[1])).astype(np.int)*-1 + for i in range(one_hot.shape[2]): + if i == 1: + binary_one_hot[:, :][one_hot[:, :, i] == 1] = 1 + else: + binary_one_hot[:, :][one_hot[:, :, i] == 1] = 0 + if self.training: + data, binary_one_hot = _augment_data(data, binary_one_hot, binary=True) + binary_one_hot = np.expand_dims(binary_one_hot, 2) + features.append(data) + one_hots.append(binary_one_hot) + return [np.asarray(features)], [np.asarray(one_hots)] + + + class RandomMajorityUndersamplingSequence(Sequence): def __init__(self, batch_size, data_directory, training=True): @@ -129,27 +207,60 @@ def _binary_labels_and_features(self, data_tiles): class BinaryDataSequence(Sequence): - def __init__(self, batch_size, file_list, training=True, balance_pixels=False): + def __init__(self, batch_size, minority_file_list, majority_file_list, total_files=None, + training=True, balance_pixels=False, erode=False, balance_files=False): # this requires a file list of training data. self.training = training self.balance_pixels = balance_pixels self.batch_size = batch_size - self.file_list = file_list - self.n_files = len(self.file_list) - shuffle(self.file_list) + self.erode = erode + self.minority_file_list = minority_file_list + self.majority_file_list = majority_file_list + need_to_resample = True + assert(len(self.majority_file_list) >= len(self.minority_file_list)) + if total_files is not None: + self.total_files = total_files + self.total_files = min(len(self.minority_file_list), total_files) + elif balance_files: + self.total_files = len(self.minority_file_list) + else: + self.file_list = self.minority_file_list + self.majority_file_list + self.total_files = len(self.majority_file_list) + len(self.minority_file_list) + self.file_subset = np.random.choice(self.file_list, self.total_files, replace=False) + need_to_resample = False + + if need_to_resample: + self.file_subset = list(np.random.choice(self.minority_file_list, self.total_files, + replace=False)) + self.file_subset.extend(list(np.random.choice(self.majority_file_list, self.total_files, + replace=False))) + + assert(len(self.minority_file_list) <= len(self.file_subset)) + + + shuffle(self.file_subset) self.idx = 0 def __len__(self): - return int(np.ceil(self.n_files / self.batch_size)) + return int(np.ceil(self.total_files / self.batch_size)) def on_epoch_end(self): - shuffle(self.file_list) + if self.training: + # resample from corpus + self.file_subset = list(np.random.choice(self.minority_file_list, self.total_files, + replace=False)) + self.file_subset.extend(list(np.random.choice(self.majority_file_list, self.total_files, + replace=False))) + shuffle(self.file_subset) + else: + # don't resample from corpus + shuffle(self.file_subset) def __getitem__(self, idx): - batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] + batch = self.file_subset[idx * self.batch_size:(idx + 1)*self.batch_size] data_tiles = [self._from_pickle(x) for x in batch] processed = self._binary_labels_and_features(data_tiles) batch_y = processed[1] @@ -171,6 +282,8 @@ def _apply_weights(self, one_hot): def _binary_labels_and_features(self, data_tiles): features = [] one_hots = [] + if not self.training: + np.random.seed(0) for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) @@ -191,6 +304,11 @@ def _binary_labels_and_features(self, data_tiles): xs = neg_examples[0][idx] ys = neg_examples[1][idx] binary_one_hot[xs, ys] = -1 + + if self.erode: + binary_one_hot = erosion(binary_one_hot) + binary_one_hot = erosion(binary_one_hot) + binary_one_hot = np.expand_dims(binary_one_hot, 2) features.append(data) one_hots.append(binary_one_hot) diff --git a/fully-conv-classification/extract_training_data.py b/fully-conv-classification/extract_training_data.py index 5c44ad8..095cb41 100644 --- a/fully-conv-classification/extract_training_data.py +++ b/fully-conv-classification/extract_training_data.py @@ -154,6 +154,16 @@ def _target_indices_from_class_labels(class_labels, tile_size): return tiles_y, tiles_x +def _assign_class_code_to_tile(class_label_tile): + if np.all(class_label_tile != 0): + unique, unique_count = np.unique(class_label_tile, return_counts=True) + unique = unique[:-1] # assume np.ma.masked is last. + unique_count = unique_count[:-1] + return unique[np.argmax(unique_count)] + # if a tile has any irrigated pixels, return 0. + return 0 + + def _save_training_data_from_indices(image_stack, class_labels, training_data_directory, n_classes, indices_y, indices_x, tile_size): out = [] @@ -161,15 +171,11 @@ def _save_training_data_from_indices(image_stack, class_labels, training_data_di for j in indices_y: class_label_tile = class_labels[i:i+tile_size, j:j+tile_size] shape = class_label_tile.shape - if (shape[0], shape[1]) != (tile_size, tile_size): - # Todo: handle this - continue if np.all(class_label_tile.mask): continue - if np.any(class_label_tile == 1): - class_code = 1 - else: - class_code = 0 + if (shape[0], shape[1]) != (tile_size, tile_size): + continue + class_code = _assign_class_code_to_tile(class_label_tile) sub_one_hot = _one_hot_from_labels(class_label_tile, n_classes) sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] dt = DataTile(sub_image_stack, sub_one_hot, class_code) @@ -317,8 +323,8 @@ def _mean_of_three_images(paths_map, image_stack): sd = glob('shapefile_data/test/*.shp') idd = '/home/thomas/share/image_data/' - td = '/home/thomas/ssd/binary_train_no_border_labels/test/' - n_classes = 2 + td = '/home/thomas/ssd/multiclass_no_border_labels/test/' + n_classes = 4 done = set() @@ -339,9 +345,8 @@ def _mean_of_three_images(paths_map, image_stack): # TODO: rewrite this to take advantage of test train data in same path/row sd = glob('shapefile_data/train/*.shp') - idd = '/home/thomas/share/image_data/' - td = '/home/thomas/ssd/binary_train_no_border_labels/train/' - n_classes = 2 + td = '/home/thomas/ssd/multiclass_no_border_labels/train/' + n_classes = 4 done = set() diff --git a/fully-conv-classification/finetune_model.py b/fully-conv-classification/finetune_model.py new file mode 100644 index 0000000..8c47d39 --- /dev/null +++ b/fully-conv-classification/finetune_model.py @@ -0,0 +1,96 @@ +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' +import keras.backend as K +import tensorflow as tf +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL) +import numpy as np +from argparse import ArgumentParser +from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) +from functools import partial +from tensorflow.keras.models import load_model +from glob import glob + + +from models import unet +from data_generators import RandomMajorityUndersamplingSequence, BinaryDataSequence +from train_utils import lr_schedule +from losses import (binary_focal_loss, binary_acc, masked_binary_xent, masked_categorical_xent, + multiclass_acc) + +def make_file_list(directory, ext='*.pkl'): + + is_top_dir = True + for d in os.listdir(directory): + if not os.path.isdir(os.path.join(directory, d)): + is_top_dir = False + break + + files = [] + if is_top_dir: + for d in os.listdir(directory): + files.extend(glob(os.path.join(directory, d, ext))) + else: + files.extend(glob(os.path.join(directory, ext))) + return files + + + +if __name__ == '__main__': + + ap = ArgumentParser() + ap.add_argument("--model-to-finetune", type=str, required=True) + ap.add_argument("--loss-func", type=str, required=True) + args = ap.parse_args() + + mb = masked_binary_xent(1.0) + bfl = binary_focal_loss() + input_shape = (None, None, 51) + + n_classes = 1 + custom_objects = {'mb':masked_binary_xent(), 'bfl':bfl, 'binary_acc':binary_acc} + model_frozen = load_model(args.model_to_finetune, custom_objects=custom_objects) + + if 'finetuned' not in args.model_to_finetune: + model_out_path = os.path.splitext(args.model_to_finetune)[0] + '_finetuned.h5' + else: + model_out_path = args.model_to_finetune + + print(model_out_path) + + for layer in model_frozen.layers[:-1]: + layer.trainable = False + + checkpoint = ModelCheckpoint(filepath=model_out_path, + monitor='val_binary_acc', + verbose=1, + save_best_only=True) + + initial_learning_rate = 1e-3 + lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate) + lr_scheduler = LearningRateScheduler(lr_schedule) + + batch_size = 4 + data_directory = '/home/thomas/ssd/binary_train_no_border_labels/' + minority_file_list = glob(os.path.join(data_directory, 'train/class_1_data/*pkl') ) + majority_file_list = glob(os.path.join(data_directory, 'train/class_0_data/*pkl') ) + train_generator = BinaryDataSequence(batch_size, minority_file_list, majority_file_list, balance_files=True, erode=False) + minority_file_list = glob(os.path.join(data_directory, 'test/class_1_data/*pkl') ) + majority_file_list = glob(os.path.join(data_directory, 'test/class_0_data/*pkl') ) + opt = tf.keras.optimizers.Adam() + loss_func = binary_focal_loss(gamma=3, alpha=0.99) + # loss_func = masked_categorical_xent + model_frozen.compile(opt, loss=loss_func, metrics=[binary_acc]) + # train_generator = RandomMajorityUndersamplingSequence(batch_size, training_dir) + # test_generator = RandomMajorityUndersamplingSequence(batch_size, testing_dir) + test_generator = BinaryDataSequence(batch_size, minority_file_list, majority_file_list, + training=False, erode=False, total_files=120) + model_frozen.fit_generator(train_generator, + epochs=15, + validation_data=test_generator, + callbacks=[lr_scheduler, checkpoint], + use_multiprocessing=True, + workers=12, + max_queue_size=30, + verbose=1) + model_frozen.save("fuullytaruined.h5") diff --git a/fully-conv-classification/precision_and_recall.py b/fully-conv-classification/precision_and_recall.py index 44cdbd6..b03feda 100644 --- a/fully-conv-classification/precision_and_recall.py +++ b/fully-conv-classification/precision_and_recall.py @@ -1,5 +1,7 @@ import os import argparse +import tensorflow as tf +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL) from tensorflow.keras.models import load_model from glob import glob @@ -18,16 +20,29 @@ args = parser.parse_args() if not args.use_gpu: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' - custom_objects = {'mb':masked_binary_xent(pos_weight=1.0), 'binary_acc':binary_acc} + #custom_objects = {'mb':masked_binary_xent(pos_weight=1.0), 'binary_acc':binary_acc} + custom_objects = {'mb':masked_binary_xent(), 'bfl':binary_focal_loss(), 'binary_acc':binary_acc} try: model = load_model(args.model, custom_objects=custom_objects) except ValueError as e: print(e.args) raise - batch_size = 1 - files = glob(os.path.join(args.test_data_path, '*.pkl')) - test_generator = BinaryDataSequence(batch_size, files, training=False) + batch_size = 12 + dirs = os.listdir(args.test_data_path) + is_top_dir = True + for d in dirs: + if not os.path.isdir(os.path.join(args.test_data_path, d)): + is_top_dir = False + break + files = [] + if is_top_dir: + for d in dirs: + fs = glob(os.path.join(args.test_data_path, d, '*.pkl')) + files.extend(fs) + else: + files = glob(os.path.join(args.test_data_path, '*.pkl')) + test_generator = BinaryDataSequence(batch_size, majority_file_list=files, minority_file_list=[], training=False) cmat, prec, recall = confusion_matrix_from_generator(test_generator, batch_size, model, n_classes=2) - print('model {} has p:{}, r:{}'.format(args.model, prec, recall)) + print('model {} has \n p:{}\n r:{}'.format(args.model, prec, recall)) diff --git a/fully-conv-classification/runspec.py b/fully-conv-classification/runspec.py index 0c1553b..5bc4c4e 100644 --- a/fully-conv-classification/runspec.py +++ b/fully-conv-classification/runspec.py @@ -20,19 +20,20 @@ target_bands = ('B1.TIF', 'B2.TIF', 'B3.TIF', 'B4.TIF') -def assign_shapefile_class_code(shapefile): +def assign_shapefile_class_code_binary(shapefile): if 'irrigated' in shapefile and 'unirrigated' not in shapefile: return 1 return 0 -# if 'unirrigated' in shapefile: -# return 1 -# if 'uncultivated' in shapefile: -# return 2 -# if 'wetlands' in shapefile: -# return 2 -# if 'fallow' in shapefile: -# return 4 +def assign_shapefile_class_code(shapefile): + if 'irrigated' in shapefile and 'unirrigated' not in shapefile: + return 0 + if 'unirrigated' in shapefile or 'wetlands' in shapefile: + return 1 + if 'uncultivated' in shapefile: + return 2 + if 'fallow' in shapefile: + return 3 def assign_shapefile_year(shapefile): # get the shapefile name, not the whole path. diff --git a/fully-conv-classification/train_model_random_files.py b/fully-conv-classification/train_model_random_files.py index 696127e..cf6bd0b 100644 --- a/fully-conv-classification/train_model_random_files.py +++ b/fully-conv-classification/train_model_random_files.py @@ -24,50 +24,44 @@ if __name__ == '__main__': - ap = ArgumentParser() - ap.add_argument('--training-dir', type=str) - ap.add_argument('--testing-dir', type=str) - args = ap.parse_args() - - training_dir = args.training_dir - testing_dir = args.testing_dir - initial_learning_rate = 1e-3 input_shape = (None, None, 51) - n_classes = 1 + n_classes = 4 model = unet(input_shape, initial_exp=4, n_classes=n_classes) - model_path = 'random_majority_files/only_irrigated_no_border_labels/' + model_path = 'random_majority_files/multiclass/only_irrigated_no_border_labels/' if not os.path.isdir(model_path): os.mkdir(model_path) - - model_path += 'model.h5' + model_path += 'model_xent_no_balanced_pixels.h5' tensorboard = TensorBoard(log_dir='/tmp/', profile_batch=0, update_freq=30, batch_size=3) checkpoint = ModelCheckpoint(filepath=model_path, - monitor='val_binary_acc', + monitor='val_multiclass_acc', verbose=1, save_best_only=True) + lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate) lr_scheduler = LearningRateScheduler(lr_schedule) train_files = glob('/home/thomas/ssd/binary_train_no_border_labels/train/class_1_data/*.pkl') test_files = glob('/home/thomas/ssd/binary_train_no_border_labels/test/class_1_data/*.pkl') + train_files = '/home/thomas/ssd/binary_train_no_border_labels/train/class_1_data/*.pkl' + test_files = '/home/thomas/ssd/binary_train_no_border_labels/test/class_1_data/*.pkl' opt = tf.keras.optimizers.Adam() batch_size = 4 - loss_func = masked_binary_xent(pos_weight=1.0) - # loss_func = masked_categorical_xent - model.compile(opt, loss=loss_func, metrics=[binary_acc]) - # train_generator = RandomMajorityUndersamplingSequence(batch_size, training_dir) - # test_generator = RandomMajorityUndersamplingSequence(batch_size, testing_dir) - train_generator = BinaryDataSequence(batch_size, train_files) - test_generator = BinaryDataSequence(batch_size, test_files) + loss_func = masked_categorical_xent + metric = multiclass_acc + model.compile(opt, loss=loss_func, metrics=[metric]) + train_generator = RandomMajorityUndersamplingSequence(batch_size, training_dir) + test_generator = RandomMajorityUndersamplingSequence(batch_size, testing_dir) + # train_generator = BinaryDataSequence(batch_size, train_files) + # test_generator = BinaryDataSequence(batch_size, test_files, balance_pixels=True, training=False) model.fit_generator(train_generator, epochs=50, validation_data=test_generator, diff --git a/fully-conv-classification/train_utils.py b/fully-conv-classification/train_utils.py index 18c7c13..b0d3b3a 100644 --- a/fully-conv-classification/train_utils.py +++ b/fully-conv-classification/train_utils.py @@ -138,7 +138,6 @@ def confusion_matrix_from_generator(valid_generator, batch_size, model, n_classe except IndexError as e: print(e) continue - cmats = pool.starmap(_preprocess_masks_and_calculate_cmat, zip(y_trues, y_preds, [n_classes]*batch_size)) for cmat in cmats: From 57f3e40244621326ed552543dd2b634b1964c915 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sat, 26 Oct 2019 13:36:38 -0600 Subject: [PATCH 86/89] Changing extraction of data, changed a lot of stuff honestly --- fully-conv-classification/data_generators.py | 386 +++++++++--------- fully-conv-classification/data_utils.py | 65 +-- fully-conv-classification/evaluate_image.py | 91 +++-- .../extract_training_data.py | 103 ++--- .../precision_and_recall.py | 32 +- fully-conv-classification/runspec.py | 7 + fully-conv-classification/shapefile_utils.py | 19 - .../train_model_random_files.py | 29 +- fully-conv-classification/train_utils.py | 35 +- 9 files changed, 378 insertions(+), 389 deletions(-) diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index f736f39..105f76d 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -26,11 +26,12 @@ class SatDataGenerator(Sequence): - def __init__(self, batch_size, n_classes, training=True): + def __init__(self, batch_size, n_classes, balance_pixels_per_batch=False, training=True): self.batch_size = batch_size - self.training = training self.n_classes = n_classes + self.training = training + self.balance_pixels_per_batch = balance_pixels_per_batch def _get_files(self): # Required override. @@ -46,17 +47,7 @@ def __len__(self): def __getitem__(self, idx): - batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] - data_tiles = [self._from_pickle(x) for x in batch] - self.batch=batch - if self.n_classes == 2: - processed = self._binary_labels_and_features(data_tiles) - else: - processed = self._labels_and_features(data_tiles) - batch_y = processed[1] - batch_x = processed[0] raise NotImplementedError - return batch_x, batch_y def _from_pickle(self, filename): @@ -68,119 +59,73 @@ def _from_pickle(self, filename): def _labels_and_features(self, data_tiles): features = [] one_hots = [] + if self.balance_pixels_per_batch: + min_count = self._count_pixels(data_tiles) + for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) class_code = tile['class_code'] + if self.balance_pixels_per_batch: + one_hot = self._balance_pixels(one_hot, min_count) if self.training: data, one_hot = _augment_data(data, one_hot) features.append(data) one_hots.append(one_hot) - return [np.asarray(features)], [np.asarray(one_hots)] - - def _binary_labels_and_features(self, data_tiles): - features = [] - one_hots = [] - bad_shape = False - for cnt, tile in enumerate(data_tiles): - data = tile['data'] - one_hot = tile['one_hot'].astype(np.int) - binary_one_hot = np.ones((one_hot.shape[0], one_hot.shape[1])).astype(np.int)*-1 - for i in range(one_hot.shape[2]): - if i == 1: - binary_one_hot[:, :][one_hot[:, :, i] == 1] = 1 - else: - binary_one_hot[:, :][one_hot[:, :, i] == 1] = 0 - if self.training: - data, binary_one_hot = _augment_data(data, binary_one_hot, binary=True) - binary_one_hot = np.expand_dims(binary_one_hot, 2) - features.append(data) - one_hots.append(binary_one_hot) return [np.asarray(features)], [np.asarray(one_hots)] + def _balance_pixels(self, one_hot, min_count, binary=False): -class RandomMajorityUndersamplingSequence(Sequence): - - def __init__(self, batch_size, data_directory, training=True): - - self.training = training - self.data_directory = data_directory - self.batch_size = batch_size - self._get_files() - self.n_files = len(self.file_list) - shuffle(self.file_list) - self.idx = 0 - - - def _get_files(self): - - self.class_directories = os.listdir(self.data_directory) - self.n_classes = len(self.class_directories) - self.files = [glob(os.path.join(self.data_directory, d, "*.pkl")) for d in - self.class_directories] - self.n_minority = min([len(f) for f in self.files]) - self.file_list = [] - if self.training: - self.file_list.extend(sample(self.files[0], self.n_minority)) - self.file_list.extend(sample(self.files[1], self.n_minority)) - else: - self.file_list.extend(self.files[0]) - self.file_list.extend(self.files[1]) - shuffle(self.file_list) - - - def __len__(self): - return int(np.ceil(self.n_files / self.batch_size)) - - - def on_epoch_end(self): - self.file_list = [] - self.file_list.extend(sample(self.files[0], self.n_minority)) - self.file_list.extend(sample(self.files[1], self.n_minority)) - shuffle(self.file_list) - self.n_files = len(self.file_list) - + if binary: + ys, xs = np.where(one_hot[:, :] == 1) + if len(ys): + ys = np.random.choice(ys, size=int(len(ys)-min_count), replace=False) + xs = np.random.choice(xs, size=int(len(xs)-min_count), replace=False) + one_hot[ys, xs, i] = -1 + ys, xs = np.where(one_hot[:, :] == 0) + if len(ys): + ys = np.random.choice(ys, size=int(len(ys)-min_count), replace=False) + xs = np.random.choice(xs, size=int(len(xs)-min_count), replace=False) + one_hot[ys, xs, i] = -1 - def __getitem__(self, idx): - batch = self.file_list[idx * self.batch_size:(idx + 1)*self.batch_size] - data_tiles = [self._from_pickle(x) for x in batch] - self.batch=batch - if self.n_classes == 2: - processed = self._binary_labels_and_features(data_tiles) else: - processed = self._labels_and_features(data_tiles) - - batch_y = processed[1] - batch_x = processed[0] - return batch_x, batch_y - - - def _from_pickle(self, filename): - with open(filename, 'rb') as f: - data = pickle.load(f) - return data + for i in range(one_hot.shape[2]): + ys, xs = np.where(one_hot[:, :, i] == 1) + if len(ys): + ys = np.random.choice(ys, size=int(len(ys)-min_count), replace=False) + xs = np.random.choice(xs, size=int(len(xs)-min_count), replace=False) + one_hot[ys, xs, i] = 0 + + return one_hot - def _labels_and_features(self, data_tiles): - features = [] - one_hots = [] + def _count_pixels(self, data_tiles): + pixel_counts = np.ones((self.n_classes))*np.inf for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) - one_hot[0, 0, :] = 0 - class_code = tile['class_code'] - data, one_hot = _augment_data(data, one_hot) - features.append(data) - one_hots.append(one_hot) - return [np.asarray(features)], [np.asarray(one_hots)] - + binary_one_hot = np.ones((one_hot.shape[0], one_hot.shape[1])).astype(np.int)*-1 + nodata_mask = np.sum(one_hot, axis=2) + argmaxed = np.argmax(one_hot, axis=2) + argmaxed[nodata_mask == 0] = -1 + unique, counts = np.unique(argmaxed, return_counts=True) + unique = unique[1:] + counts = counts[1:] + for val, count in zip(unique, counts): + if count < pixel_counts[val]: + pixel_counts[val] = count + return np.min(pixel_counts) + def _binary_labels_and_features(self, data_tiles): features = [] one_hots = [] - bad_shape = False + + if self.balance_pixels_per_batch: + min_count = self._count_pixels(data_tiles) + for cnt, tile in enumerate(data_tiles): data = tile['data'] one_hot = tile['one_hot'].astype(np.int) @@ -190,13 +135,11 @@ def _binary_labels_and_features(self, data_tiles): binary_one_hot[:, :][one_hot[:, :, i] == 1] = 1 else: binary_one_hot[:, :][one_hot[:, :, i] == 1] = 0 - neg_examples = np.where(binary_one_hot == 0) - n_neg = len(neg_examples[0]) - n_pos = len(np.where(binary_one_hot == 1)[0]) - if n_neg != 0: - xs = np.random.choice(neg_examples[0], n_neg - n_pos, replace=False) - ys = np.random.choice(neg_examples[1], n_neg - n_pos, replace=False) - binary_one_hot[xs, ys] = -1 + + if self.balance_pixels_per_batch: + + one_hot = self._balance_pixels(one_hot, min_count, binary=True) + if self.training: data, binary_one_hot = _augment_data(data, binary_one_hot, binary=True) binary_one_hot = np.expand_dims(binary_one_hot, 2) @@ -205,114 +148,165 @@ def _binary_labels_and_features(self, data_tiles): return [np.asarray(features)], [np.asarray(one_hots)] -class BinaryDataSequence(Sequence): +class DataGenerator(SatDataGenerator): + ''' + Feeds examples into the network in order + sorted by class_label. This is a form of random majority + undersampling. - def __init__(self, batch_size, minority_file_list, majority_file_list, total_files=None, - training=True, balance_pixels=False, erode=False, balance_files=False): - # this requires a file list of training data. - self.training = training - self.balance_pixels = balance_pixels - self.batch_size = batch_size - self.erode = erode - self.minority_file_list = minority_file_list - self.majority_file_list = majority_file_list - need_to_resample = True - assert(len(self.majority_file_list) >= len(self.minority_file_list)) - if total_files is not None: - self.total_files = total_files - self.total_files = min(len(self.minority_file_list), total_files) - elif balance_files: - self.total_files = len(self.minority_file_list) - else: - self.file_list = self.minority_file_list + self.majority_file_list - self.total_files = len(self.majority_file_list) + len(self.minority_file_list) - self.file_subset = np.random.choice(self.file_list, self.total_files, replace=False) - need_to_resample = False + I want the following functionality: + Easily switch between binary/multiclass classification - if need_to_resample: - self.file_subset = list(np.random.choice(self.minority_file_list, self.total_files, - replace=False)) - self.file_subset.extend(list(np.random.choice(self.majority_file_list, self.total_files, - replace=False))) + Can focus on examples from one class (dict of target_classes) + Can apply arbitary morphological operations to the input labels - assert(len(self.minority_file_list) <= len(self.file_subset)) + Able to feed in examples without any preprocessing (unbalanced) + Able to feed in examples that are balanced, but in a random order + Able to feed in examples that are balanced and in a definite order (queue of files) + Able to feed in batches that are balanced on a pixel count level. + ''' + def __init__(self, data_directory, batch_size, n_classes=None, training=True, + target_classes=None, balance=False, balance_examples_per_batch=False, + balance_pixels_per_batch=False): + # Assert that all three can't be true + super().__init__(batch_size, n_classes, balance_pixels_per_batch, training) + self.data_directory = data_directory + self.balance = balance + self.balance_examples_per_batch = balance_examples_per_batch + self.target_classes = target_classes + self._get_files() - shuffle(self.file_subset) - self.idx = 0 + def _check_if_directory_is_in_targets(self, directory): + ''' Assumes directory is at the top of the + directory hierarchy ''' + if self.target_classes is None: + return True + if not isinstance(self.target_classes, list): + if isinstance(self.target_classes, int): + self.target_classes = [self.target_classes] + else: + raise ValueError("target_classes must be one of int, list") + for target in self.target_classes: + if str(target) in directory: + return True + return False + + + def _get_files(self): + dirs = os.listdir(self.data_directory) + if self.n_classes is None: + self.n_classes = len(dirs) + for d in os.listdir(self.data_directory): + if not os.path.isdir(os.path.join(self.data_directory, d)): + raise ValueError("Non-directory object exists in data_directory") + dirs = [os.path.join(self.data_directory, d) for d in dirs \ + if self._check_if_directory_is_in_targets(d)] + self.dirs = dirs + if not self.balance and not self.balance_examples_per_batch: + self.n_files = self._unbalanced_file_list(dirs, first=True) + self._on_epoch_end = self._unbalanced_file_list + # all training examples, randomly selected + # number of files in an epoch is the sum of the files + # for each class + return + elif self.balance: + self.n_files = self._balanced_file_list(dirs, first=True) + self._on_epoch_end = self._balanced_file_list + # balanced file list with random selection + # i.e. the number of files in an epoch + # is n classes * min number of training examples for any class + return + if self.balance_examples_per_batch: + self.n_files = self._balanced_queue(dirs, first=True) + self._on_epoch_end = self._balanced_queue + # all training examples, fed to the network in sequential order + # i.e. 1, 2, 3, 4, 1, 2, 3, 4 + # the number of files in an epoch is + # n classes * min number of training examples for any class + return def __len__(self): - return int(np.ceil(self.total_files / self.batch_size)) + + return int(np.ceil(self.n_files / self.batch_size)) def on_epoch_end(self): - if self.training: - # resample from corpus - self.file_subset = list(np.random.choice(self.minority_file_list, self.total_files, - replace=False)) - self.file_subset.extend(list(np.random.choice(self.majority_file_list, self.total_files, - replace=False))) - shuffle(self.file_subset) - else: - # don't resample from corpus - shuffle(self.file_subset) + # Recreates the file list + self._on_epoch_end(self.dirs, first=False) def __getitem__(self, idx): - batch = self.file_subset[idx * self.batch_size:(idx + 1)*self.batch_size] + # print("suspicious:", idx) + # model.fit_generator does not pull batches in order of batch. + batch = self.files[idx * self.batch_size:(idx + 1)*self.batch_size] data_tiles = [self._from_pickle(x) for x in batch] - processed = self._binary_labels_and_features(data_tiles) - batch_y = processed[1] - batch_x = processed[0] + self.batch = batch + if self.n_classes == 2: + batch_x, batch_y = self._binary_labels_and_features(data_tiles) + else: + batch_x, batch_y = self._labels_and_features(data_tiles) + return batch_x, batch_y - def _from_pickle(self, filename): - with open(filename, 'rb') as f: - data = pickle.load(f) - return data - - - def _apply_weights(self, one_hot): - for i in range(self.n_classes): - one_hot[:, :, i] *= self.class_weights[i] + def _unbalanced_file_list(self, dirs, first): + if first: + self.files = [] + for d in dirs: + self.files.extend(glob(os.path.join(d, "*pkl"))) + return len(self.files) + else: + shuffle(self.files) + + + def _balanced_file_list(self, dirs, first): + if first: + self.file_dict = {} + self.n_minority = np.inf + for d in dirs: + files = glob(os.path.join(d, "*pkl")) + shuffle(files) + self.file_dict[d] = files + if len(files) < self.n_minority: + self.n_minority = len(files) + self.files = [] + for key in self.file_dict: + self.files.extend(sample(self.file_dict[key], self.n_minority, replace=False)) + return len(self.files) + else: + self.files = [] + for key in self.file_dict: + self.files.extend(sample(self.file_dict[key], self.n_minority, replace=False)) + shuffle(self.files) - def _binary_labels_and_features(self, data_tiles): - features = [] - one_hots = [] - if not self.training: - np.random.seed(0) - for tile in data_tiles: - data = tile['data'] - one_hot = tile['one_hot'].astype(np.int) - binary_one_hot = np.ones((one_hot.shape[0], one_hot.shape[1])).astype(np.int)*-1 - for i in range(one_hot.shape[2]): - if i == 1: - binary_one_hot[:, :][one_hot[:, :, i] == 1] = 1 - else: - binary_one_hot[:, :][one_hot[:, :, i] == 1] = 0 - if self.training: - data, binary_one_hot = _augment_data(data, binary_one_hot, binary=True) - if self.training and self.balance_pixels: - neg_examples = np.where(binary_one_hot == 0) - n_neg = len(neg_examples[0]) - n_pos = len(np.where(binary_one_hot == 1)[0]) - if n_neg > n_pos: - idx = np.random.choice(np.arange(n_neg), n_neg - n_pos, replace=False) - xs = neg_examples[0][idx] - ys = neg_examples[1][idx] - binary_one_hot[xs, ys] = -1 - - if self.erode: - binary_one_hot = erosion(binary_one_hot) - binary_one_hot = erosion(binary_one_hot) - - binary_one_hot = np.expand_dims(binary_one_hot, 2) - features.append(data) - one_hots.append(binary_one_hot) - return [np.asarray(features)], [np.asarray(one_hots)] + + def _balanced_queue(self, dirs, first): + self.file_dict = {} + self.n_minority = np.inf + for d in dirs: + files = glob(os.path.join(d, "*pkl")) + shuffle(files) + self.file_dict[d] = files + if len(files) < self.n_minority: + self.n_minority = len(files) + if not first: + for key in self.file_dict: + shuffle(self.file_dict[key]) + self.files = [] + to_empty = self.file_dict.copy() + while True: + try: + for key in sorted(to_empty): + # python3 supports ordered iteration over dict keys. + # this may not work perfectly when + # fitting a generator with use_multiprocessing == True + self.files.append(to_empty[key].pop()) + except IndexError as e: + break + return len(self.files) def _flip_lr(feature_tile, one_hot, binary=False): diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 7f20231..990f83d 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -3,13 +3,13 @@ import json import pdb import datetime +import numpy as np from fiona import open as fopen from glob import glob from lxml import html from requests import get from copy import deepcopy -from numpy import zeros, asarray, array, reshape, nan, sqrt, std, uint16 from shapely.geometry import shape from collections import defaultdict from rasterio import float32, open as rasopen @@ -166,36 +166,47 @@ def paths_map_multiple_scenes(image_directory, satellite=8): return band_map -def mean_of_scenes(paths_map, target_geo, target_shape): - rasters = _load_rasters(paths_map, target_geo, target_shape) - n_scenes = len(paths_map['B1.TIF']) - num_rasters = len(rasters) +def mean_of_three(paths_map, image_stack, target_shape, satellite=8): + + # iterate over paths_map + # iterate over each raster in paths_map + j = 0 - image_stack = np.zeros((num_rasters, target_shape[1], target_shape[2])) + out_image_stack = np.zeros((19, target_shape[1], target_shape[2])) + out_idx = 0 for band in sorted(paths_map.keys()): - feature_rasters = paths_map[band] - empty = np.zeros(target_shape) - for feature_raster in feature_raster: - empty += rasters[feature_raster] - image_stack[j] = empty/n_scenes - return image_stack + if band in landsat_rasters()[satellite]: + for sub_band in paths_map[band]: + out_image_stack[out_idx] += image_stack[j] + j += 1 + out_image_stack[out_idx] /= 3 + out_idx += 1 + else: + out_image_stack[out_idx] = image_stack[j] + out_idx += 1 + return out_image_stack -def median_of_scenes(paths_map, target_geo, target_shape): - rasters = _load_rasters(paths_map, target_geo, target_shape) - n_scenes = len(paths_map['B1.TIF']) - num_rasters = len(rasters) +def median_of_three(paths_map, image_stack, target_shape, satellite=8): + j = 0 - image_stack = np.zeros((num_rasters, target_shape[1], target_shape[2])) + out_image_stack = np.zeros((19, target_shape[1], target_shape[2])) + out_idx = 0 for band in sorted(paths_map.keys()): - feature_rasters = paths_map[band] - empty = np.zeros(target_shape) - for feature_raster in feature_raster: - empty += rasters[feature_raster] - image_stack[j] = empty/n_scenes - return image_stack + if band in landsat_rasters()[satellite]: + slc = np.zeros((3, target_shape[1], target_shape[2])) + for i, sub_band in enumerate(paths_map[band]): + slc[i] = image_stack[j] + j += 1 + out_image_stack[out_idx] = np.median(slc, axis=0) + out_idx += 1 + else: + out_image_stack[out_idx] = image_stack[j] + out_idx += 1 + + return out_image_stack def map_bands_to_indices(target_bands, satellite=8): @@ -286,7 +297,7 @@ def stack_rasters_multiprocess(paths_map, target_geo, target_shape): for feature_raster in feature_rasters: arr = rasters[feature_raster] if first: - stack = zeros((num_rasters, target_shape[1], target_shape[2]), uint16) + stack = np.zeros((num_rasters, target_shape[1], target_shape[2]), np.uint16) stack[j, :, :] = arr j += 1 first = False @@ -299,7 +310,7 @@ def stack_rasters_multiprocess(paths_map, target_geo, target_shape): # somehow select if first: - stack = zeros((num_rasters, target_shape[1], target_shape[2]), uint16) + stack = np.zeros((num_rasters, target_shape[1], target_shape[2]), np.uint16) stack[j, :, :] = arr j += 1 first = False @@ -329,7 +340,7 @@ def stack_rasters(paths_map, target_geo, target_shape): with rasopen(feature_raster, mode='r') as src: arr = src.read() if first: - stack = zeros((num_rasters, target_shape[1], target_shape[2]), uint16) + stack = np.zeros((num_rasters, target_shape[1], target_shape[2]), np.uint16) stack[j, :, :] = arr j += 1 first = False @@ -390,7 +401,7 @@ def clip_raster(evaluated, path, row, outfile=None): out = out.to_crs(src.crs) features = get_features(out) # if crop == true for mask, you have to update the metadata. - out_image, out_transform = mask(src, shapes=features, crop=True, nodata=nan) + out_image, out_transform = mask(src, shapes=features, crop=True, nodata=np.nan) meta = src.meta.copy() count = out_image.shape[0] diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 2ceee22..8a0e4ae 100755 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -1,8 +1,8 @@ import os -#E os.environ['CUDA_VISIBLE_DEVICES'] = "-1" import numpy as np import keras.backend as K import tensorflow as tf +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL) import pdb import argparse @@ -15,8 +15,13 @@ from scipy.special import expit from scipy.stats import mode -from data_utils import (save_raster, stack_rasters, stack_rasters_multiprocess, paths_map_multiple_scenes, load_raster, clip_raster, paths_mapping_single_scene) -from losses import multiclass_acc, masked_binary_xent, dice_loss, binary_acc, binary_focal_loss +from train_utils import softmax +from runspec import irrigated_path_rows_mt +from data_utils import (save_raster, stack_rasters, stack_rasters_multiprocess, + paths_map_multiple_scenes, load_raster, clip_raster, paths_mapping_single_scene, + mean_of_three) +from losses import (multiclass_acc, masked_binary_xent, dice_loss, binary_acc, binary_focal_loss, + masked_categorical_xent) from extract_training_data import concatenate_fmasks _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) @@ -24,7 +29,6 @@ masked_binary_xent = masked_binary_xent(pos_weight=1.0) custom_objects = {'masked_binary_xent':masked_binary_xent, 'binary_acc':binary_acc} - def _evaluate_image_return_logits(model, raster, n_classes, n_overlaps=4): chunk_size = 608 diff = 608 @@ -38,8 +42,7 @@ def _evaluate_image_return_logits(model, raster, n_classes, n_overlaps=4): for j in range(k, raster.shape[2]-diff, stride): sub_raster = raster[:, i:i+chunk_size, j:j+chunk_size, :] preds = model.predict([sub_raster]) - preds = expit(preds[0]) - out[i:i+chunk_size, j:j+chunk_size, :] += preds + out[i:i+chunk_size, j:j+chunk_size, :] += preds[0] stdout.write("K: {} of {}. Percent done: {:.2f}\r".format(k // overlap_step + 1, n_overlaps, i / raster.shape[1])) out = np.swapaxes(out, 0, 2) out = out.astype(np.float32) @@ -61,7 +64,7 @@ def fmask_evaluated_image(evaluated_image, path, row, year, landsat_directory): def evaluate_image_many_shot(image_directory, model_paths, n_classes=4, - n_overlaps=4, outfile=None, custom_objects=None): + n_overlaps=4, outfile=None, custom_objects=None, preprocessing_func=None): ''' To recover from same padding, slide many different patches over the image. ''' print(outfile) if not isinstance(model_paths, list): @@ -75,7 +78,9 @@ def evaluate_image_many_shot(image_directory, model_paths, n_classes=4, paths_mapping = paths_map_multiple_scenes(image_directory) template, meta = load_raster(paths_mapping['B1.TIF'][0]) image_stack = stack_rasters_multiprocess(paths_mapping, meta, template.shape) - out_arr = np.zeros((1, image_stack.shape[1], image_stack.shape[2])) + if preprocessing_func is not None: + image_stack = mean_of_three(image_stack, paths_mapping) + out_arr = np.zeros((n_classes, image_stack.shape[1], image_stack.shape[2])) for i, model_path in enumerate(model_paths): print('loading {}'.format(model_path)) model = load_model(model_path, custom_objects=custom_objects) @@ -83,12 +88,15 @@ def evaluate_image_many_shot(image_directory, model_paths, n_classes=4, n_overlaps=n_overlaps) del model - print(out_arr.shape) + out_arr = softmax(out_arr) temp_mask = np.zeros((1, out_arr.shape[1], out_arr.shape[2])) fmasked_image = concatenate_fmasks(image_directory, temp_mask, meta, nodata=1) - # for i in range(out_arr.shape[0]): - # out_arr[i, :, :][fmasked_image.mask[0]] = np.nan - meta.update(dtype=np.float64) + for i in range(out_arr.shape[0]): + out_arr[i, :, :][fmasked_image.mask[0]] = np.nan + + out_arr = out_arr.astype(np.float32) + meta.update(dtype=np.float32) + out_arr /= n_overlaps if outfile: save_raster(out_arr, outfile, meta, count=n_classes) @@ -98,10 +106,16 @@ def evaluate_image_many_shot(image_directory, model_paths, n_classes=4, if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-m', '--model', type=str, required=True) - parser.add_argument('-i', '--image-dir', type=str, required=True) + parser.add_argument('-im', '--image-dir', type=str, required=True) parser.add_argument('-o', '--out-dir', type=str) parser.add_argument('-n', '--n-classes', type=int, default=5) parser.add_argument('-b', '--binary', action='store_true') + parser.add_argument('--outfile', type=str, required=True) + parser.add_argument('--use-gpu', action='store_true') + parser.add_argument('--include-path-row', action='store_true') + parser.add_argument('--evaluate-all-mt', action='store_true') + parser.add_argument('--preprocessing-func', type=str) + parser.add_argument('--year', type=int, default=2013) args = parser.parse_args() if args.out_dir is None: out_dir = os.path.dirname(os.path.splitext(args.model)[0]) @@ -109,21 +123,40 @@ def evaluate_image_many_shot(image_directory, model_paths, n_classes=4, os.mkdir(out_dir) else: out_dir = args.out_dir + if not os.path.isdir(out_dir): + os.mkdir(out_dir) - os.environ['CUDA_VISIBLE_DEVICES'] = '-1' - custom_objects = {'mb':masked_binary_xent, 'binary_acc':binary_acc} - - # model_paths = glob('/home/thomas/IrrMapper/fully-conv-classification/ensemble_models/test3/*.h5') - # model_paths = sorted(model_paths) - # model_paths = model_paths[len(model_paths)-1] + if not args.use_gpu: + os.environ['CUDA_VISIBLE_DEVICES'] = '-1' + custom_objects = {'mb':masked_binary_xent, 'multiclass_acc':multiclass_acc, + 'binary_acc':binary_acc, 'masked_categorical_xent':masked_categorical_xent} model_paths = args.model - image_directory = args.image_dir - outfile = os.path.join(os.path.basename(os.path.normpath(image_directory)) + - '_random_majority_sample.tif') - outfile = os.path.join(out_dir, outfile) - evaluate_image_many_shot(image_directory, - model_paths=model_paths, - n_classes=args.n_classes, - n_overlaps=1, - outfile=outfile, - custom_objects=custom_objects) + if args.evaluate_all_mt: + for path, row in irrigated_path_rows_mt(): + image_directory = args.image_dir + "_".join([str(path), str(row), str(args.year)]) + outfile = args.outfile + if args.include_path_row: + outfile = os.path.splitext(outfile)[0] + outfile = os.path.basename(os.path.normpath(image_directory)) + outfile + ".tif" + outfile = os.path.join(out_dir, outfile) + evaluate_image_many_shot(image_directory, + model_paths=model_paths, + n_classes=args.n_classes, + n_overlaps=1, + outfile=outfile, + custom_objects=custom_objects) + image_directory = args.image_dir + else: + outfile = args.outfile + if args.include_path_row: + outfile = os.path.splitext(outfile)[0] + outfile = os.path.basename(os.path.normpath(args.image_dir)) + outfile + ".tif" + outfile = os.path.join(out_dir, outfile) + + evaluate_image_many_shot(args.image_dir, + model_paths=model_paths, + n_classes=args.n_classes, + n_overlaps=1, + outfile=outfile, + custom_objects=custom_objects, + preprocessing_func=args.preprocessing_func) diff --git a/fully-conv-classification/extract_training_data.py b/fully-conv-classification/extract_training_data.py index 095cb41..ad29c26 100644 --- a/fully-conv-classification/extract_training_data.py +++ b/fully-conv-classification/extract_training_data.py @@ -19,7 +19,7 @@ from collections import defaultdict from runspec import landsat_rasters, climate_rasters, mask_rasters, assign_shapefile_class_code, assign_shapefile_year -from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr, paths_mapping_single_scene +from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr, paths_mapping_single_scene, mean_of_three, median_of_three from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features @@ -93,15 +93,15 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0, ta return class_mask -def extract_training_data_over_path_row(shapefiles, path, row, year, image_directory, - training_data_directory, n_classes, assign_shapefile_class_code, path_map_func=None, +def extract_training_data_over_path_row(test_train_shapefiles, path, row, year, image_directory, + training_data_root_directory, n_classes, assign_shapefile_class_code, path_map_func=None, preprocessing_func=None, tile_size=608): if path_map_func is None: path_map_func = paths_map_multiple_scenes - if not isinstance(shapefiles, list): - shapefiles = [shapefiles] + if not isinstance(test_train_shapefiles, dict): + raise ValueError("expected dict, got {}".format(type(test_train_shapefiles))) path_row_year = str(path) + '_' + str(row) + '_' + str(year) image_path = os.path.join(image_directory, path_row_year) @@ -111,34 +111,38 @@ def extract_training_data_over_path_row(shapefiles, path, row, year, image_direc mask_file = _random_tif_from_directory(image_path) mask, mask_meta = load_raster(mask_file) mask = np.zeros_like(mask).astype(np.int) - first = True - class_labels = None - for f in shapefiles: - class_code = assign_shapefile_class_code(f) - print(f, class_code) - out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) - if first: - class_labels = out - class_labels[~class_labels.mask] = class_code - first = False - else: - class_labels[~out.mask] = class_code try: image_stack = stack_rasters_multiprocess(image_path_maps, target_geo=mask_meta, target_shape=mask.shape) except RasterioIOError as e: print("Redownload images for", path_row_year) print(e) return - if preprocessing_func is not None: - image_stack = preprocessing_func(image_path_map, image_stack) - - class_labels = concatenate_fmasks(image_path, class_labels, mask_meta) - image_stack = np.swapaxes(image_stack, 0, 2) - class_labels = np.swapaxes(class_labels, 0, 2) - class_labels = np.squeeze(class_labels) - tiles_y, tiles_x = _target_indices_from_class_labels(class_labels, tile_size) - _save_training_data_from_indices(image_stack, class_labels, training_data_directory, - n_classes, tiles_x, tiles_y, tile_size) + image_stack = median_of_three(image_path_maps, image_stack, mask.shape) + for key, shapefiles in test_train_shapefiles.items(): + if key.lower() not in ('test', 'train'): + raise ValueError("expected key to be one of case-insenstive {test, train},\ + got {}".format(key)) + + training_data_directory = os.path.join(training_data_root_directory, key) + first = True + class_labels = None + for f in shapefiles: + class_code = assign_shapefile_class_code(f) + print(f, class_code) + out, _ = mask_raster_to_shapefile(f, mask_file, return_binary=False) + if first: + class_labels = out + class_labels[~class_labels.mask] = class_code + first = False + else: + class_labels[~out.mask] = class_code + class_labels = concatenate_fmasks(image_path, class_labels, mask_meta) + image_stack = np.swapaxes(image_stack, 0, 2) + class_labels = np.swapaxes(class_labels, 0, 2) + class_labels = np.squeeze(class_labels) + tiles_y, tiles_x = _target_indices_from_class_labels(class_labels, tile_size) + _save_training_data_from_indices(image_stack, class_labels, training_data_directory, + n_classes, tiles_x, tiles_y, tile_size) def _target_indices_from_class_labels(class_labels, tile_size): @@ -313,53 +317,28 @@ def make_border_labels(mask, border_width): dm[dm > border_width] = 0 return dm -def _mean_of_three_images(paths_map, image_stack): - # for each key in image_stack (sorted): - # ...climate...landsat...static... - pass - if __name__ == '__main__': - sd = glob('shapefile_data/test/*.shp') - idd = '/home/thomas/share/image_data/' - td = '/home/thomas/ssd/multiclass_no_border_labels/test/' + image_directory = '/home/thomas/share/image_data/' + shapefiles = glob('shapefile_data/test/*.shp') + glob('shapefile_data/train/*.shp') + training_root_directory = '/home/thomas/ssd/test_extract/' n_classes = 4 - done = set() - for i, f in enumerate(sd): + for f in shapefiles: if f in done: continue - ffg = all_matching_shapefiles(f, 'shapefile_data/test/', assign_shapefile_year) - for e in ffg: + test_shapefiles = all_matching_shapefiles(f, 'shapefile_data/test/', assign_shapefile_year) + train_shapefiles = all_matching_shapefiles(f, 'shapefile_data/train/', assign_shapefile_year) + for e in test_shapefiles + train_shapefiles: done.add(e) bs = os.path.splitext(os.path.basename(f))[0] _, path, row = bs[-7:].split("_") year = assign_shapefile_year(f) print("extracting data for", path, row, year) paths_map_func = paths_map_multiple_scenes - extract_training_data_over_path_row(ffg, path, row, year, idd, td, n_classes, - assign_shapefile_class_code, path_map_func=paths_map_func) - - - # TODO: rewrite this to take advantage of test train data in same path/row - sd = glob('shapefile_data/train/*.shp') - td = '/home/thomas/ssd/multiclass_no_border_labels/train/' - n_classes = 4 - - done = set() + test_train_shapefiles = {'test':test_shapefiles, 'train':train_shapefiles} + extract_training_data_over_path_row(test_train_shapefiles, path, row, year, image_directory, + training_root_directory, n_classes, assign_shapefile_class_code, path_map_func=paths_map_func) - for i, f in enumerate(sd): - if f in done: - continue - ffg = all_matching_shapefiles(f, 'shapefile_data/train/', assign_shapefile_year) - for e in ffg: - done.add(e) - bs = os.path.splitext(os.path.basename(f))[0] - _, path, row = bs[-7:].split("_") - year = assign_shapefile_year(f) - print("extracting data for", path, row, year) - paths_map_func = paths_map_multiple_scenes - extract_training_data_over_path_row(ffg, path, row, year, idd, td, n_classes, - assign_shapefile_class_code, path_map_func=paths_map_func) diff --git a/fully-conv-classification/precision_and_recall.py b/fully-conv-classification/precision_and_recall.py index b03feda..cd0837c 100644 --- a/fully-conv-classification/precision_and_recall.py +++ b/fully-conv-classification/precision_and_recall.py @@ -6,8 +6,9 @@ from glob import glob -from losses import binary_focal_loss, binary_acc, masked_binary_xent -from data_generators import RandomMajorityUndersamplingSequence, BinaryDataSequence +from losses import (binary_focal_loss, binary_acc, masked_binary_xent, multiclass_acc, + masked_categorical_xent) +from data_generators import DataGenerator from train_utils import confusion_matrix_from_generator @@ -16,33 +17,24 @@ parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, required=True) parser.add_argument('--test-data-path', type=str, required=True) + parser.add_argument('--target-class', type=str) + parser.add_argument('--batch-size', type=int, default=4) + parser.add_argument('--n-classes', type=int, default=4) parser.add_argument('--use-gpu', action='store_true') args = parser.parse_args() if not args.use_gpu: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' #custom_objects = {'mb':masked_binary_xent(pos_weight=1.0), 'binary_acc':binary_acc} - custom_objects = {'mb':masked_binary_xent(), 'bfl':binary_focal_loss(), 'binary_acc':binary_acc} + custom_objects = {'mb':masked_binary_xent, 'multiclass_acc':multiclass_acc, + 'binary_acc':binary_acc, 'masked_categorical_xent':masked_categorical_xent} try: model = load_model(args.model, custom_objects=custom_objects) except ValueError as e: print(e.args) raise - - batch_size = 12 - dirs = os.listdir(args.test_data_path) - is_top_dir = True - for d in dirs: - if not os.path.isdir(os.path.join(args.test_data_path, d)): - is_top_dir = False - break - files = [] - if is_top_dir: - for d in dirs: - fs = glob(os.path.join(args.test_data_path, d, '*.pkl')) - files.extend(fs) - else: - files = glob(os.path.join(args.test_data_path, '*.pkl')) - test_generator = BinaryDataSequence(batch_size, majority_file_list=files, minority_file_list=[], training=False) + batch_size = args.batch_size + test_generator = DataGenerator(data_directory=args.test_data_path, batch_size=batch_size, + training=False, target_classes=args.target_class) cmat, prec, recall = confusion_matrix_from_generator(test_generator, batch_size, model, - n_classes=2) + n_classes=args.n_classes) print('model {} has \n p:{}\n r:{}'.format(args.model, prec, recall)) diff --git a/fully-conv-classification/runspec.py b/fully-conv-classification/runspec.py index 5bc4c4e..5588ee8 100644 --- a/fully-conv-classification/runspec.py +++ b/fully-conv-classification/runspec.py @@ -76,6 +76,13 @@ def mask_rasters(): m = ('cloud_fmask.tif', )# , 'water_fmask.tif') return m +def irrigated_path_rows_mt(): + ls = [(37, 28), (39, 27), (42, 26), (42, 27), (41, 28), (41, 27), (41, 26), (40, 27), + (40, 28), (39, 28), (39, 29), (38, 26), (38, 28), (34, 27), (35, 27), (36, 26), + (36, 27), (36, 28), (37, 26)] + return ls + + if __name__ == '__main__': pass diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index 44dc508..a62d0c6 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -59,25 +59,6 @@ def mask_raster_to_features(raster, features, features_meta): return out_image, meta -def generate_class_mask(shapefile, master_raster, nodata=-1): - ''' Generates a mask with 1 everywhere - shapefile data is present and a no_data value everywhere else. - no_data is -1 in this case, as it is never a valid class label. - Switching coordinate reference systems is important here, or - else the masking won't work. - ''' - shp = gpd.read_file(shapefile) - shp = shp[shp.geometry.notnull()] - with rasopen(master_raster, 'r') as src: - shp = shp.to_crs(src.crs) - features = get_features(shp) - out_image, out_transform = mask(src, shapes=features, nodata=-1) - out_image[out_image != -1] = 1 - out_image[out_image == -1] = 0 - meta = src.meta - return out_image, meta - - def get_shapefile_lat_lon(shapefile): ''' Center of shapefile''' with fopen(shapefile, "r") as src: diff --git a/fully-conv-classification/train_model_random_files.py b/fully-conv-classification/train_model_random_files.py index cf6bd0b..856d23d 100644 --- a/fully-conv-classification/train_model_random_files.py +++ b/fully-conv-classification/train_model_random_files.py @@ -15,12 +15,14 @@ from models import unet -from data_generators import RandomMajorityUndersamplingSequence, BinaryDataSequence +from data_generators import DataGenerator from train_utils import lr_schedule from losses import (binary_focal_loss, binary_acc, masked_binary_xent, masked_categorical_xent, multiclass_acc) +join = os.path.join # don't monitor binary acc any more, monitor precision and recall. +# or monitor top-k accuracy. if __name__ == '__main__': @@ -31,10 +33,11 @@ n_classes = 4 model = unet(input_shape, initial_exp=4, n_classes=n_classes) - model_path = 'random_majority_files/multiclass/only_irrigated_no_border_labels/' + model_path = 'random_majority_files/multiclass/' if not os.path.isdir(model_path): os.mkdir(model_path) - model_path += 'model_xent_no_balanced_pixels.h5' + + model_path += 'three_scenes_concat_only_irrigated_tiles_balanced_pixels_per_batch.h5' tensorboard = TensorBoard(log_dir='/tmp/', profile_batch=0, @@ -46,27 +49,25 @@ save_best_only=True) lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate) - lr_scheduler = LearningRateScheduler(lr_schedule) + lr_scheduler = LearningRateScheduler(lr_schedule, verbose=True) - train_files = glob('/home/thomas/ssd/binary_train_no_border_labels/train/class_1_data/*.pkl') - test_files = glob('/home/thomas/ssd/binary_train_no_border_labels/test/class_1_data/*.pkl') - train_files = '/home/thomas/ssd/binary_train_no_border_labels/train/class_1_data/*.pkl' - test_files = '/home/thomas/ssd/binary_train_no_border_labels/test/class_1_data/*.pkl' + root = '/home/thomas/ssd/multiclass_no_border_labels/' + train_dir = join(root, 'train') + test_dir = join(root, 'test') opt = tf.keras.optimizers.Adam() batch_size = 4 loss_func = masked_categorical_xent metric = multiclass_acc model.compile(opt, loss=loss_func, metrics=[metric]) - train_generator = RandomMajorityUndersamplingSequence(batch_size, training_dir) - test_generator = RandomMajorityUndersamplingSequence(batch_size, testing_dir) - # train_generator = BinaryDataSequence(batch_size, train_files) - # test_generator = BinaryDataSequence(batch_size, test_files, balance_pixels=True, training=False) + train_generator = DataGenerator(train_dir, batch_size, target_classes=0, n_classes=n_classes, + balance_examples_per_batch=False, balance_pixels_per_batch=True) + test_generator = DataGenerator(test_dir, batch_size, target_classes=0, n_classes=n_classes, training=False) model.fit_generator(train_generator, - epochs=50, + epochs=25, validation_data=test_generator, callbacks=[tensorboard, lr_scheduler, checkpoint], use_multiprocessing=True, workers=12, - max_queue_size=30, + max_queue_size=12, verbose=1) diff --git a/fully-conv-classification/train_utils.py b/fully-conv-classification/train_utils.py index b0d3b3a..5eaca69 100644 --- a/fully-conv-classification/train_utils.py +++ b/fully-conv-classification/train_utils.py @@ -16,6 +16,12 @@ from glob import glob +def softmax(arr, count_dim=0): + arr = np.exp(arr) + arr /= (np.sum(arr, axis=count_dim, keepdims=True)) + return arr + + def make_temporary_directory(model_directory=None): if model_directory is None: model_directory = './models/' @@ -110,7 +116,7 @@ def _preprocess_masks_and_calculate_cmat(y_true, y_pred, n_classes=2): y_pred = y_pred if n_classes > 2: y_pred = np.squeeze(y_pred) - y_pred = softmax(y_pred) + y_pred = softmax(y_pred, count_dim=2) y_pred = np.argmax(y_pred, axis=2) y_true = np.argmax(y_true, axis=2) y_pred = y_pred[mask] @@ -120,13 +126,15 @@ def _preprocess_masks_and_calculate_cmat(y_true, y_pred, n_classes=2): y_pred = y_pred[mask] y_true = y_true[mask] - cmat = confusion_matrix(y_true, y_pred, - labels=labels) + cmat = confusion_matrix(y_true, y_pred, + labels=labels) return cmat def confusion_matrix_from_generator(valid_generator, batch_size, model, n_classes=2): out_cmat = np.zeros((n_classes, n_classes)) + if not len(valid_generator): + raise ValueError("Length of validation generator is 0") with Pool(batch_size) as pool: for batch_x, y_true in valid_generator: y_true = y_true[0] @@ -155,26 +163,9 @@ def confusion_matrix_from_generator(valid_generator, batch_size, model, n_classe return cmat, recall_dict, precision_dict -def lr_schedule(epoch, initial_learning_rate): +def lr_schedule(epoch, initial_learning_rate, efold=50): lr = initial_learning_rate - if epoch > 15: - lr /= 256 - elif epoch > 13: - lr /= 128 - elif epoch > 11: - lr /= 64 - elif epoch > 9: - lr /= 32. - elif epoch > 7: - lr /= 16. - elif epoch > 5: - lr /= 8. - elif epoch > 3: - lr /= 4. - elif epoch > 1: - lr /= 2. - print('Learning rate: ', lr) - return float(lr) + return float(lr*np.exp(-epoch/efold)) def save_model_info(root_directory, loss_func, accuracy, loss, class_weights, classes_to_augment, From ba37c0f0a5dd0307d09eb17eec23e1dcbf914891 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Sat, 2 Nov 2019 10:33:04 -0600 Subject: [PATCH 87/89] commit before leaving for the weekend. Added multiclass focal loss. and a bunch of other stuff --- fully-conv-classification/data_generators.py | 31 +++-- fully-conv-classification/data_utils.py | 2 +- .../extract_training_data.py | 11 +- fully-conv-classification/finetune_model.py | 67 +++++----- fully-conv-classification/losses.py | 122 ++++++++++++++++++ fully-conv-classification/train_model.py | 89 ------------- .../train_model_random_files.py | 39 ++++-- fully-conv-classification/train_utils.py | 6 +- 8 files changed, 210 insertions(+), 157 deletions(-) create mode 100644 fully-conv-classification/losses.py delete mode 100644 fully-conv-classification/train_model.py diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 105f76d..82b9933 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -26,12 +26,14 @@ class SatDataGenerator(Sequence): - def __init__(self, batch_size, n_classes, balance_pixels_per_batch=False, training=True): + def __init__(self, batch_size, n_classes, balance_pixels_per_batch=False, training=True, + apply_irrigated_weights=False): self.batch_size = batch_size self.n_classes = n_classes self.training = training self.balance_pixels_per_batch = balance_pixels_per_batch + self.apply_irrigated_weights = apply_irrigated_weights def _get_files(self): # Required override. @@ -65,6 +67,8 @@ def _labels_and_features(self, data_tiles): for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) + if self.apply_irrigated_weights: + one_hot[:, :, 0] *= 50 class_code = tile['class_code'] if self.balance_pixels_per_batch: one_hot = self._balance_pixels(one_hot, min_count) @@ -93,9 +97,12 @@ def _balance_pixels(self, one_hot, min_count, binary=False): else: for i in range(one_hot.shape[2]): ys, xs = np.where(one_hot[:, :, i] == 1) + if len(ys) == min_count: + continue if len(ys): - ys = np.random.choice(ys, size=int(len(ys)-min_count), replace=False) - xs = np.random.choice(xs, size=int(len(xs)-min_count), replace=False) + n_to_choose = int(len(ys) - min_count) # 0 out all but min_count pixels + ys = np.random.choice(ys, size=n_to_choose, replace=False) + xs = np.random.choice(xs, size=n_to_choose, replace=False) one_hot[ys, xs, i] = 0 return one_hot @@ -106,7 +113,6 @@ def _count_pixels(self, data_tiles): for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) - binary_one_hot = np.ones((one_hot.shape[0], one_hot.shape[1])).astype(np.int)*-1 nodata_mask = np.sum(one_hot, axis=2) argmaxed = np.argmax(one_hot, axis=2) argmaxed[nodata_mask == 0] = -1 @@ -137,7 +143,6 @@ def _binary_labels_and_features(self, data_tiles): binary_one_hot[:, :][one_hot[:, :, i] == 1] = 0 if self.balance_pixels_per_batch: - one_hot = self._balance_pixels(one_hot, min_count, binary=True) if self.training: @@ -157,7 +162,7 @@ class DataGenerator(SatDataGenerator): I want the following functionality: Easily switch between binary/multiclass classification - Can focus on examples from one class (dict of target_classes) + Can focus on examples from one class (list of target_classes) Can apply arbitary morphological operations to the input labels Able to feed in examples without any preprocessing (unbalanced) @@ -168,13 +173,15 @@ class DataGenerator(SatDataGenerator): ''' def __init__(self, data_directory, batch_size, n_classes=None, training=True, target_classes=None, balance=False, balance_examples_per_batch=False, - balance_pixels_per_batch=False): + balance_pixels_per_batch=False, apply_irrigated_weights=False, + steps_per_epoch=None): # Assert that all three can't be true super().__init__(batch_size, n_classes, balance_pixels_per_batch, training) self.data_directory = data_directory self.balance = balance self.balance_examples_per_batch = balance_examples_per_batch self.target_classes = target_classes + self.steps_per_epoch = steps_per_epoch self._get_files() @@ -228,7 +235,8 @@ def _get_files(self): return def __len__(self): - + if self.steps_per_epoch is not None: + return self.steps_per_epoch return int(np.ceil(self.n_files / self.batch_size)) @@ -238,8 +246,7 @@ def on_epoch_end(self): def __getitem__(self, idx): - # print("suspicious:", idx) - # model.fit_generator does not pull batches in order of batch. + # model.fit_generator does not pull batches in order. batch = self.files[idx * self.batch_size:(idx + 1)*self.batch_size] data_tiles = [self._from_pickle(x) for x in batch] self.batch = batch @@ -274,12 +281,12 @@ def _balanced_file_list(self, dirs, first): self.n_minority = len(files) self.files = [] for key in self.file_dict: - self.files.extend(sample(self.file_dict[key], self.n_minority, replace=False)) + self.files.extend(sample(self.file_dict[key], self.n_minority)) return len(self.files) else: self.files = [] for key in self.file_dict: - self.files.extend(sample(self.file_dict[key], self.n_minority, replace=False)) + self.files.extend(sample(self.file_dict[key], self.n_minority)) shuffle(self.files) diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 990f83d..8d72bed 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -398,7 +398,7 @@ def clip_raster(evaluated, path, row, outfile=None): out = out[out['ROW'] == row] with rasopen(evaluated, 'r') as src: - out = out.to_crs(src.crs) + out = out.to_crs(src.crs['init']) features = get_features(out) # if crop == true for mask, you have to update the metadata. out_image, out_transform = mask(src, shapes=features, crop=True, nodata=np.nan) diff --git a/fully-conv-classification/extract_training_data.py b/fully-conv-classification/extract_training_data.py index ad29c26..c787f0f 100644 --- a/fully-conv-classification/extract_training_data.py +++ b/fully-conv-classification/extract_training_data.py @@ -246,8 +246,6 @@ def min_data_tiles_to_cover_labels(shapefiles, path, row, year, image_directory, plt.plot([min_x, max_x], [max_y, max_y], 'b', linewidth=2) plt.plot([min_x, max_x], [min_y, min_y], 'b', linewidth=2) - - y_min = [min_x] * len(tiles_y) y_max = [max_x] * len(tiles_y) for t, mn, mx in zip(tiles_y, y_min, y_max): @@ -259,7 +257,7 @@ def min_data_tiles_to_cover_labels(shapefiles, path, row, year, image_directory, plt.plot([t, t], [mn, mx], 'r') plt.imshow(class_labels[0]) - plt.title(frac) + plt.title('path/row: {} {} percent data pixels: {:.3f}'.format(path, row, frac)) plt.colorbar() plt.show() @@ -338,7 +336,8 @@ def make_border_labels(mask, border_width): year = assign_shapefile_year(f) print("extracting data for", path, row, year) paths_map_func = paths_map_multiple_scenes - test_train_shapefiles = {'test':test_shapefiles, 'train':train_shapefiles} - extract_training_data_over_path_row(test_train_shapefiles, path, row, year, image_directory, - training_root_directory, n_classes, assign_shapefile_class_code, path_map_func=paths_map_func) + min_data_tiles_to_cover_labels(train_shapefiles, path, row, 2013, image_directory) + # test_train_shapefiles = {'test':test_shapefiles, 'train':train_shapefiles} + # extract_training_data_over_path_row(test_train_shapefiles, path, row, year, image_directory, + # training_root_directory, n_classes, assign_shapefile_class_code, path_map_func=paths_map_func) diff --git a/fully-conv-classification/finetune_model.py b/fully-conv-classification/finetune_model.py index 8c47d39..7ef9015 100644 --- a/fully-conv-classification/finetune_model.py +++ b/fully-conv-classification/finetune_model.py @@ -8,16 +8,17 @@ from argparse import ArgumentParser from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) from functools import partial -from tensorflow.keras.models import load_model +from tensorflow.keras.models import load_model from glob import glob from models import unet -from data_generators import RandomMajorityUndersamplingSequence, BinaryDataSequence +from data_generators import DataGenerator from train_utils import lr_schedule from losses import (binary_focal_loss, binary_acc, masked_binary_xent, masked_categorical_xent, multiclass_acc) +join = os.path.join def make_file_list(directory, ext='*.pkl'): is_top_dir = True @@ -40,57 +41,57 @@ def make_file_list(directory, ext='*.pkl'): ap = ArgumentParser() ap.add_argument("--model-to-finetune", type=str, required=True) - ap.add_argument("--loss-func", type=str, required=True) + ap.add_argument("--loss-func", type=str) args = ap.parse_args() - mb = masked_binary_xent(1.0) - bfl = binary_focal_loss() input_shape = (None, None, 51) n_classes = 1 - custom_objects = {'mb':masked_binary_xent(), 'bfl':bfl, 'binary_acc':binary_acc} + custom_objects = {'mb':masked_binary_xent(), 'binary_acc':binary_acc, + 'masked_categorical_xent':masked_categorical_xent, 'multiclass_acc':multiclass_acc} + model_frozen = load_model(args.model_to_finetune, custom_objects=custom_objects) if 'finetuned' not in args.model_to_finetune: - model_out_path = os.path.splitext(args.model_to_finetune)[0] + '_finetuned.h5' + model_out_path = os.path.splitext(args.model_to_finetune)[0] + '_finetuned' else: model_out_path = args.model_to_finetune + model_out_path = os.path.join("random_majority_files/SGD/", 'finetuned') print(model_out_path) - for layer in model_frozen.layers[:-1]: + for layer in model_frozen.layers[:-2]: layer.trainable = False + # model_frozen.summary() + + model_out_path += "-{epoch:02d}-{val_multiclass_acc:.2f}.hdf5" checkpoint = ModelCheckpoint(filepath=model_out_path, - monitor='val_binary_acc', - verbose=1, - save_best_only=True) + monitor='val_multiclass_acc', + verbose=1, save_best_only=False) initial_learning_rate = 1e-3 - lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate) - lr_scheduler = LearningRateScheduler(lr_schedule) - - batch_size = 4 - data_directory = '/home/thomas/ssd/binary_train_no_border_labels/' - minority_file_list = glob(os.path.join(data_directory, 'train/class_1_data/*pkl') ) - majority_file_list = glob(os.path.join(data_directory, 'train/class_0_data/*pkl') ) - train_generator = BinaryDataSequence(batch_size, minority_file_list, majority_file_list, balance_files=True, erode=False) - minority_file_list = glob(os.path.join(data_directory, 'test/class_1_data/*pkl') ) - majority_file_list = glob(os.path.join(data_directory, 'test/class_0_data/*pkl') ) - opt = tf.keras.optimizers.Adam() - loss_func = binary_focal_loss(gamma=3, alpha=0.99) - # loss_func = masked_categorical_xent - model_frozen.compile(opt, loss=loss_func, metrics=[binary_acc]) - # train_generator = RandomMajorityUndersamplingSequence(batch_size, training_dir) - # test_generator = RandomMajorityUndersamplingSequence(batch_size, testing_dir) - test_generator = BinaryDataSequence(batch_size, minority_file_list, majority_file_list, - training=False, erode=False, total_files=120) + lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate, efold=50) + lr_scheduler = LearningRateScheduler(lr_schedule, verbose=True) + opt = tf.keras.optimizers.SGD() + + root = '/home/thomas/ssd/multiclass_no_border_labels/' + train_dir = join(root, 'train') + test_dir = join(root, 'test') + + loss_func = masked_categorical_xent + batch_size = 8 + model_frozen.compile(opt, loss=loss_func, metrics=[multiclass_acc]) + train_generator = DataGenerator(train_dir, batch_size, target_classes=None, + n_classes=n_classes, training=True, apply_irrigated_weights=True, + steps_per_epoch=200) + test_generator = DataGenerator(test_dir, batch_size, target_classes=[0, 1], + n_classes=n_classes, training=False) model_frozen.fit_generator(train_generator, - epochs=15, + epochs=40, validation_data=test_generator, callbacks=[lr_scheduler, checkpoint], - use_multiprocessing=True, - workers=12, + use_multiprocessing=False, + workers=1, max_queue_size=30, verbose=1) - model_frozen.save("fuullytaruined.h5") diff --git a/fully-conv-classification/losses.py b/fully-conv-classification/losses.py new file mode 100644 index 0000000..407bc1c --- /dev/null +++ b/fully-conv-classification/losses.py @@ -0,0 +1,122 @@ +import keras.backend as K +import tensorflow as tf + +_epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) + +def binary_focal_loss(gamma=2, alpha=0.25): + ''' + Focal loss: + + FL (pt) = -(1-pt)^gamma * log(pt) + where + pt = p if y==1 + 1-p otherwise + ''' + + def bfl(y_true, y_pred): + mask = tf.not_equal(y_true, -1) # true where the mask isn't==-1 + y_pred = tf.nn.sigmoid(y_pred) + pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) + pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) + epsilon = K.epsilon() + # clip to prevent NaN's and Inf's + pt_1 = K.clip(pt_1, epsilon, 1. - epsilon) + pt_0 = K.clip(pt_0, epsilon, 1. - epsilon) + pt_1 = tf.boolean_mask(pt_1, mask) + pt_0 = tf.boolean_mask(pt_0, mask) + + return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) \ + -K.mean((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0)) + return bfl + + +def multiclass_focal_loss(gamma=2, alpha=0.25): + + + def multiclass_FL(y_true, y_pred): + y_true_sum = tf.math.reduce_sum(y_true, axis=-1) + mask = tf.not_equal(y_true_sum, 0) + # y_true = tf.boolean_mask(y_true, mask) + # y_pred = tf.boolean_mask(y_pred, mask) + # probabilities = tf.nn.softmax(y_pred) + # xen = -y_true * tf.math.log(probabilities) # all 0s where y_true is all 0s + # loss = alpha*tf.math.pow(1-probabilities, gamma) * xen + # return tf.math.reduce_mean(loss) + probabilities = tf.nn.softmax(y_pred) + xen = -y_true * tf.math.log(probabilities) # all 0s where y_true is all 0s + complement = tf.dtypes.cast(tf.equal(y_true, 0), tf.float32) + negative_probabilities = -tf.math.pow(complement*probabilities, + gamma)*tf.math.log(complement) + masked_xen = tf.boolean_mask(xen, mask) + masked_complement = tf.boolean_mask(xen, negative_probabilities) + return tf.reduce_mean(masked_xen) + tf.reduce_mean(masked_complement) + + return multiclass_FL + + +def dice_coef(y_true, y_pred, smooth=1): + """ + Dice = (2*|X & Y|)/ (|X|+ |Y|) + = 2*sum(|A*B|)/(sum(A^2)+sum(B^2)) + ref: https://arxiv.org/pdf/1606.04797v1.pdf + """ + intersection = K.sum(K.abs(y_true * y_pred), axis=-1) + return (2. * intersection + smooth) / (K.sum(K.square(y_true), -1) + K.sum(K.square(y_pred), -1) + smooth) + + +def dice_loss(y_true, y_pred): + mask = tf.not_equal(y_true, -1) + # y_pred = tf.nn.softmax(y_pred) + y_true = tf.boolean_mask(y_true, mask) + y_pred = tf.boolean_mask(y_pred, mask) + return 1 - dice_coef(y_true, y_pred) + + +def masked_binary_xent(pos_weight=1.0): + # One_hot matrix is all zeros along depth if there isn't + # a data pixel there. Accordingly, we + # mask out the pixels that do not contain data. + # binary xent requires a y_true of shape nxmx1, with -1 + # indicating nodata + def mb(y_true, y_pred): + mask = tf.not_equal(y_true, -1) + y_true = tf.boolean_mask(y_true, mask) + y_pred = tf.boolean_mask(y_pred, mask) + return tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits( + labels=y_true, + logits=y_pred, + pos_weight=pos_weight)) + return mb + + +def masked_categorical_xent(y_true, y_pred): + # One_hot matrix is all zeros along depth if there isn't + # a data pixel there. Accordingly, we + # mask out the pixels that do not contain data. + # wait what? I don't need to even mask this! + # the one_hot matrix contains depthwise 0s + # where there isn't data... + # y_true_sum = tf.math.reduce_sum(y_true, axis=-1) + # mask = tf.not_equal(y_true_sum, 0) + # y_true = tf.boolean_mask(y_true, mask) + # y_pred = tf.boolean_mask(y_pred, mask) + return tf.nn.softmax_cross_entropy_with_logits_v2(y_true, y_pred) + + +def binary_acc(y_true, y_pred): + y_pred = tf.round(tf.nn.sigmoid(y_pred)) + mask = tf.not_equal(y_true, -1) + y_true = tf.boolean_mask(y_true, mask) + y_pred = tf.boolean_mask(y_pred, mask) + return K.mean(K.equal(y_true, K.round(y_pred))) + + +def multiclass_acc(y_true, y_pred): + y_true_sum = tf.reduce_sum(y_true, axis=-1) + mask = tf.not_equal(y_true_sum, 0) + y_pred = tf.nn.softmax(y_pred) + y_pred = tf.argmax(y_pred, axis=-1) + y_true = tf.argmax(y_true, axis=-1) + y_true_masked = tf.boolean_mask(y_true, mask) + y_pred_masked = tf.boolean_mask(y_pred, mask) + return K.mean(K.equal(y_pred_masked, y_true_masked)) diff --git a/fully-conv-classification/train_model.py b/fully-conv-classification/train_model.py deleted file mode 100644 index 7befbb0..0000000 --- a/fully-conv-classification/train_model.py +++ /dev/null @@ -1,89 +0,0 @@ -import os -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' -# os.environ['CUDA_VISIBLE_DEVICES'] = '-1' -import time -import keras.backend as K -import tensorflow as tf -import numpy as np -import argparse -from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, LearningRateScheduler) -from functools import partial -from sklearn.metrics import confusion_matrix -from tensorflow.keras.models import load_model -from scipy.special import expit - -from models import unet -from data_generators import SatDataSequence -from data_utils import map_bands_to_indices -from train_utils import (construct_parser, make_temporary_directory, save_model_info, - lr_schedule, confusion_matrix_from_generator) -from runspec import target_bands -from losses import * - - - -training_data_dir = '/home/thomas/ssd/single_scene/train/' -test_data_dir = '/home/thomas/ssd/single_scene/test/' - - -if __name__ == '__main__': - - parser = construct_parser() - args = parser.parse_args() - initial_learning_rate = args.learning_rate - - temp_dir, model_path, tensorboard_path = make_temporary_directory('./models/') - - input_shape = (None, None, 19) - - n_classes = 1 - - model = unet(input_shape, initial_exp=4, n_classes=n_classes) - - tensorboard = TensorBoard(log_dir=tensorboard_path, - profile_batch=0, - update_freq=30, - batch_size=3) - checkpoint = ModelCheckpoint(filepath=model_path, - monitor='val_binary_acc', - verbose=1, - save_best_only=True) - lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate) - lr_scheduler = LearningRateScheduler(lr_schedule) - - opt = tf.keras.optimizers.Adam() - class_weights = {0:1, 1:1, 2:1, 3:1, 4:1, 5:1} - classes_to_augment = True #{0:1, 1:1, 2:1, 3:1, 4:1, 5:1} - batch_size = 4 - pos_weight = args.pos_weight - loss_func = binary_focal_loss(gamma=2, alpha=0.25) - model.compile(opt, loss=loss_func, metrics=[binary_acc]) - train_generator = SatDataSequence(training_data_dir, class_weights=class_weights, - batch_size=batch_size, n_classes=n_classes) - test_generator = SatDataSequence(test_data_dir, class_weights=class_weights, - batch_size=batch_size, training=False, n_classes=n_classes) - - model.fit_generator(train_generator, - epochs=5, - validation_data=test_generator, - callbacks=[tensorboard, lr_scheduler, checkpoint], - use_multiprocessing=True, - workers=12, - max_queue_size=30, - verbose=1) - model.save('single_scene.h5') - # loss_func = binary_focal_loss(gamma=2, alpha=0.25) - # custom_objects = {'bfl':loss_func, 'binary_acc':binary_acc} - # model = load_model('models/temp/model.h5', custom_objects=custom_objects) - # test_data_dir = '/home/thomas/ssd/binary_train/test/' - # test_generator = SatDataSequence(test_data_dir, class_weights=class_weights, - # batch_size=batch_size, training=False, n_classes=n_classes) - # accuracy = model.evaluate_generator(test_generator) - # loss = accuracy[0] - # accuracy = accuracy[1] - # test_generator = SatDataSequence(test_data_dir, class_weights=class_weights, - # batch_size=1, n_classes=n_classes) - # cmat, prec, recall = confusion_matrix_from_generator(test_generator, model, n_classes=2) - # print(prec, recall) - # save_model_info(temp_dir, loss_func.__name__, accuracy, loss, class_weights, - # classes_to_augment, pos_weight, initial_learning_rate, cmat, prec, recall) diff --git a/fully-conv-classification/train_model_random_files.py b/fully-conv-classification/train_model_random_files.py index 856d23d..a06e45e 100644 --- a/fully-conv-classification/train_model_random_files.py +++ b/fully-conv-classification/train_model_random_files.py @@ -17,8 +17,7 @@ from models import unet from data_generators import DataGenerator from train_utils import lr_schedule -from losses import (binary_focal_loss, binary_acc, masked_binary_xent, masked_categorical_xent, - multiclass_acc) +from losses import * join = os.path.join # don't monitor binary acc any more, monitor precision and recall. @@ -32,12 +31,22 @@ n_classes = 4 + ap = ArgumentParser() + ap.add_argument('--gamma', type=float) + + args = ap.parse_args() + + gamma = float(args.gamma) + if gamma is None: + # default gamma + gamma = 2.0 + model = unet(input_shape, initial_exp=4, n_classes=n_classes) model_path = 'random_majority_files/multiclass/' if not os.path.isdir(model_path): os.mkdir(model_path) - model_path += 'three_scenes_concat_only_irrigated_tiles_balanced_pixels_per_batch.h5' + model_path += 'three_scenes_concat_balance_examples_per_batch_focal_loss_gamma-{}.h5'.format(gamma) tensorboard = TensorBoard(log_dir='/tmp/', profile_batch=0, @@ -48,7 +57,8 @@ verbose=1, save_best_only=True) - lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate) + epochs = 1000 + lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate, efold=epochs/10) lr_scheduler = LearningRateScheduler(lr_schedule, verbose=True) root = '/home/thomas/ssd/multiclass_no_border_labels/' @@ -57,17 +67,20 @@ opt = tf.keras.optimizers.Adam() batch_size = 4 - loss_func = masked_categorical_xent + loss_func = multiclass_focal_loss(gamma=gamma) metric = multiclass_acc model.compile(opt, loss=loss_func, metrics=[metric]) - train_generator = DataGenerator(train_dir, batch_size, target_classes=0, n_classes=n_classes, - balance_examples_per_batch=False, balance_pixels_per_batch=True) - test_generator = DataGenerator(test_dir, batch_size, target_classes=0, n_classes=n_classes, training=False) + train_generator = DataGenerator(train_dir, batch_size, target_classes=None, + n_classes=n_classes, balance=False, balance_pixels_per_batch=False, + balance_examples_per_batch=True, apply_irrigated_weights=False, + training=False) + test_generator = DataGenerator(test_dir, batch_size, target_classes=0, + n_classes=n_classes, training=False) model.fit_generator(train_generator, - epochs=25, + epochs=epochs, validation_data=test_generator, callbacks=[tensorboard, lr_scheduler, checkpoint], - use_multiprocessing=True, - workers=12, - max_queue_size=12, - verbose=1) + use_multiprocessing=False, + workers=1, + max_queue_size=1, + verbose=0) diff --git a/fully-conv-classification/train_utils.py b/fully-conv-classification/train_utils.py index 5eaca69..8025138 100644 --- a/fully-conv-classification/train_utils.py +++ b/fully-conv-classification/train_utils.py @@ -158,12 +158,12 @@ def confusion_matrix_from_generator(valid_generator, batch_size, model, n_classe precision_dict[i] = 0 recall_dict[i] = 0 for i in range(n_classes): - recall_dict[i] = out_cmat[i, i] / np.sum(out_cmat[i, :]) - precision_dict[i] = out_cmat[i, i] / np.sum(out_cmat[:, i]) + precision_dict[i] = out_cmat[i, i] / np.sum(out_cmat[i, :]) # row i + recall_dict[i] = out_cmat[i, i] / np.sum(out_cmat[:, i]) # column i return cmat, recall_dict, precision_dict -def lr_schedule(epoch, initial_learning_rate, efold=50): +def lr_schedule(epoch, initial_learning_rate, efold): lr = initial_learning_rate return float(lr*np.exp(-epoch/efold)) From 4e3fe603875d9a495017a9912c1497990e59d4c5 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Thu, 14 Nov 2019 18:05:59 -0700 Subject: [PATCH 88/89] commit before merge --- fully-conv-classification/_clip_rasters.py | 20 + fully-conv-classification/crop_data_layer.py | 408 +++++++++++++++++++ 2 files changed, 428 insertions(+) create mode 100644 fully-conv-classification/_clip_rasters.py create mode 100644 fully-conv-classification/crop_data_layer.py diff --git a/fully-conv-classification/_clip_rasters.py b/fully-conv-classification/_clip_rasters.py new file mode 100644 index 0000000..ecabf65 --- /dev/null +++ b/fully-conv-classification/_clip_rasters.py @@ -0,0 +1,20 @@ +import os +from argparse import ArgumentParser +from data_utils import clip_raster +from glob import glob + +def _parse_path_row(f): + bs = os.path.basename(f).split("_") + return bs[0], bs[1] + +if __name__ == '__main__': + + ap = ArgumentParser() + ap.add_argument('--raster', type=str, required=True) + ap.add_argument('--out-dir', type=str, required=True) + ap.add_argument('--outfile', type=str) + args = ap.parse_args() + if args.outfile is None: + outfile = args.raster + path, row = _parse_path_row(args.raster) + clip_raster(args.raster, int(path), int(row), outfile=outfile) diff --git a/fully-conv-classification/crop_data_layer.py b/fully-conv-classification/crop_data_layer.py new file mode 100644 index 0000000..d81d8c5 --- /dev/null +++ b/fully-conv-classification/crop_data_layer.py @@ -0,0 +1,408 @@ +# =============================================================================== +# Copyright 2018 dgketchum +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +import os +import sys + +abspath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(abspath) + +try: + from urllib.request import urlretrieve +except ImportError: + from urllib import urlretrieve +import re +import copy +from xml.etree import ElementTree +from requests import get +from numpy import empty, float32 +from numpy import isin +from rasterio import open as rasopen +from rasterio.dtypes import uint8 +from rasterio.crs import CRS +from rasterio.transform import Affine +from rasterio.mask import mask +from rasterio.warp import reproject, Resampling +from rasterio.warp import calculate_default_transform as cdt +from tempfile import mkdtemp +from bounds import RasterBounds + + +class CropDataLayer(object): + + def __init__(self, target_profile=None, year=None, out_dir=None, from_file=None): + + self.url_base = 'https://nassgeodata.gmu.edu/axis2/services/CDLService/GetCDLFile?year={year}&bbox={wsen}' + + if from_file: + self.from_file = from_file + with rasopen(from_file) as src: + self.cdl = src.read() + self.target_profile = src.profile + self.cdl_empty = False + + else: + self.cdl_empty = True + self.cdl = None + if not out_dir: + self.cdl_location = os.path.join(os.path.dirname(__file__), 'model_data') + else: + self.cdl_location = out_dir + + self.zip_file = os.path.join(self.cdl_location, '{}_30m_cdls.zip'.format(year)) + + self.temp_dir = mkdtemp() + + if target_profile and year: + self.target_profile = target_profile + self.bbox = RasterBounds(profile=self.target_profile, + affine_transform=self.target_profile['transform']) + self.bbox.expand(**{'east': 0.1, 'west': -0.1, 'north': 0.2, 'south': -0.2}) + self.bbox_projected = bb = self.bbox.to_epsg(5070) + bb_str = '{},{},{},{}'.format(bb[0], bb[1], bb[2], bb[3]) + self.request_url = self.url_base.format(year=year, wsen=bb_str) + self.data_url = self._get_data_url() + + self.original_tif = None + self.mask = None + self.projection = None + self.reprojection = None + + def get_original_tif(self, out_file=None): + + req = get(self.data_url, verify=False) + + if req.status_code != 200: + raise ValueError('Bad response {} from request.'.format(req.status_code)) + + if not out_file: + self.original_tif = os.path.join(self.temp_dir, os.path.basename(self.data_url)) + else: + self.original_tif = out_file + + with open(self.original_tif, 'wb') as f: + print('Downloading {}'.format(self.data_url)) + for chunk in req.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + + def get_conforming_data(self, clip_geometry, keep_original=False, out_file=None): + self.get_original_tif() + self._reproject() + self._mask(clip_geometry) + result = self._resample() + + if not keep_original: + os.remove(self.original_tif) + + if out_file: + self.save(result, self.target_profile, output_filename=os.path.join(self.cdl_location, + 'cdl.tif')) + self.cdl = result + return result + + def get_mask(self, clip_geometry=None, out_file=None): + + arr = None + + if self.cdl_empty: + try: + arr = self.get_conforming_data(clip_geometry=clip_geometry) + except ValueError as e: + print(e.args) + print('Need clip geometry to build cdl') + raise + else: + arr = self.cdl + + crop = list(self.crop.keys()) + msk = isin(arr, crop) + msk = ~msk + msk = msk.astype(uint8) + profile = copy.deepcopy(self.target_profile) + profile['dtype'] = uint8 + if out_file: + with rasopen(out_file, 'w', **profile) as dst: + dst.write(arr.astype(uint8)) + + return msk + + def _reproject(self): + + self.reprojection = os.path.join(self.temp_dir, 'cdl_reprojection.tif') + + with rasopen(self.original_tif, 'r') as src: + src_profile = src.profile + src_bounds = src.bounds + src_array = src.read(1) + + dst_profile = copy.deepcopy(self.target_profile) + dst_profile['dtype'] = float32 + bounds = src_bounds + dst_affine, dst_width, dst_height = cdt(src_profile['crs'], + dst_profile['crs'], + src_profile['width'], + src_profile['height'], + *bounds) + + dst_profile.update({'crs': dst_profile['crs'], + 'transform': dst_affine, + 'width': dst_width, + 'height': dst_height}) + + with rasopen(self.reprojection, 'w', **dst_profile) as dst: + dst_array = empty((1, dst_height, dst_width), dtype=float32) + + reproject(src_array, dst_array, src_transform=src_profile['transform'], + src_crs=src_profile['crs'], dst_crs=self.target_profile['crs'], + dst_transform=dst_affine, resampling=Resampling.nearest, + num_threads=2) + + dst.write(dst_array.reshape(1, dst_array.shape[1], dst_array.shape[2])) + + def _mask(self, clip): + + mask_path = os.path.join(self.temp_dir, 'masked.tif') + + with rasopen(self.reprojection) as src: + out_arr, out_trans = mask(src, clip, crop=True, + all_touched=True) + out_meta = src.meta.copy() + out_meta.update({'driver': 'GTiff', + 'height': out_arr.shape[1], + 'width': out_arr.shape[2], + 'transform': out_trans}) + + with rasopen(mask_path, 'w', **out_meta) as dst: + dst.write(out_arr) + + setattr(self, 'mask', mask_path) + delattr(self, 'reprojection') + + def _resample(self): + + resample_path = os.path.join(self.temp_dir, 'resample.tif') + + with rasopen(self.mask, 'r') as src: + array = src.read(1) + profile = src.profile + res = src.res + try: + target_affine = self.target_profile['affine'] + except KeyError: + target_affine = self.target_profile['transform'] + target_res = target_affine.a + res_coeff = res[0] / target_res + + new_array = empty(shape=(1, round(array.shape[0] * res_coeff), + round(array.shape[1] * res_coeff)), dtype=float32) + aff = src.transform + new_affine = Affine(aff.a / res_coeff, aff.b, aff.c, aff.d, aff.e / res_coeff, aff.f) + + profile['transform'] = self.target_profile['transform'] + profile['width'] = self.target_profile['width'] + profile['height'] = self.target_profile['height'] + profile['dtype'] = str(new_array.dtype) + + delattr(self, 'mask') + + with rasopen(resample_path, 'w', **profile) as dst: + reproject(array, new_array, src_transform=aff, dst_transform=new_affine, src_crs=src.crs, + dst_crs=src.crs, resampling=Resampling.nearest) + + dst.write(new_array) + + with rasopen(resample_path, 'r') as src: + arr = src.read() + + return arr + + @staticmethod + def save(array, geometry, output_filename, crs=None, return_array=False): + try: + array = array.reshape(1, array.shape[1], array.shape[2]) + except IndexError: + array = array.reshape(1, array.shape[0], array.shape[1]) + geometry['dtype'] = str(array.dtype) + if crs: + geometry['crs'] = CRS({'init': crs}) + with rasopen(output_filename, 'w', **geometry) as dst: + dst.write(array) + if return_array: + return array + return None + + def download_zipped_cdl(self): + if not os.path.isfile(self.zip_file): + req = urlretrieve(self.request_url, self.cdl_location) + if req.status_code != 200: + raise ValueError('Bad response {} from request.'.format(req.status_code)) + + with open(self.zip_file, 'wb') as f: + print('Downloading {}'.format(self.request_url)) + for chunk in req.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + + def _get_data_url(self): + r = get(self.request_url, verify=False) + tree = ElementTree.fromstring(r.content) + u = [ElementTree.tostring(e) for e in tree][0].decode("utf-8") + result = re.search('(.*)', u).group(1) + return result + + @property + def crop(self): + return {1: 'Corn', + 2: 'Cotton', + 3: 'Rice', + 4: 'Sorghum', + 5: 'Soybeans', + 6: 'Sunflower', + 10: 'Peanuts', + 11: 'Tobacco', + 12: 'Sweet Corn', + 13: 'Pop or Orn Corn', + 14: 'Mint', + 21: 'Barley', + 22: 'Durum Wheat', + 23: 'Spring Wheat', + 24: 'Winter Wheat', + 25: 'Other Small Grains', + 26: 'Dbl Crop WinWht / Soybeans', + 27: 'Rye', + 28: 'Oats', + 29: 'Millet', + 30: 'Speltz', + 31: 'Canola', + 32: 'Flaxseed', + 33: 'Safflower', + 34: 'Rape Seed', + 35: 'Mustard', + 36: 'Alfalfa', + 37: 'Other Hay / NonAlfalfa', + 38: 'Camelina', + 39: 'Buckwheat', + 41: 'Sugarbeets', + 42: 'Dry Beans', + 43: 'Potatoes', + 44: 'Other Crops', + 45: 'Sugarcane', + 46: 'Sweet Potatoes', + 47: 'Misc Vegs & Fruits', + 48: 'Watermelons', + 49: 'Onions', + 50: 'Cucumbers', + 51: 'Chick Peas', + 52: 'Lentils', + 53: 'Peas', + 54: 'Tomatoes', + 55: 'Caneberries', + 56: 'Hops', + 57: 'Herbs', + 58: 'Clover/Wildflowers', + 61: 'Fallow/Idle Cropland', + 66: 'Cherries', + 67: 'Peaches', + 68: 'Apples', + 69: 'Grapes', + 70: 'Christmas Trees', + 71: 'Other Tree Crops', + 72: 'Citrus', + 74: 'Pecans', + 75: 'Almonds', + 76: 'Walnuts', + 77: 'Pears', + 204: 'Pistachios', + 205: 'Triticale', + 206: 'Carrots', + 207: 'Asparagus', + 208: 'Garlic', + 209: 'Cantaloupes', + 210: 'Prunes', + 211: 'Olives', + 212: 'Oranges', + 213: 'Honeydew Melons', + 214: 'Broccoli', + 216: 'Peppers', + 217: 'Pomegranates', + 218: 'Nectarines', + 219: 'Greens', + 220: 'Plums', + 221: 'Strawberries', + 222: 'Squash', + 223: 'Apricots', + 224: 'Vetch', + 225: 'Dbl Crop WinWht/Corn', + 226: 'Dbl Crop Oats/Corn', + 227: 'Lettuce', + 229: 'Pumpkins', + 230: 'Dbl Crop Lettuce/Durum Wht', + 231: 'Dbl Crop Lettuce/Cantaloupe', + 232: 'Dbl Crop Lettuce/Cotton', + 233: 'Dbl Crop Lettuce/Barley', + 234: 'Dbl Crop Durum Wht/Sorghum', + 235: 'Dbl Crop Barley/Sorghum', + 236: 'Dbl Crop WinWht/Sorghum', + 237: 'Dbl Crop Barley/Corn', + 238: 'Dbl Crop WinWht/Cotton', + 239: 'Dbl Crop Soybeans/Cotton', + 240: 'Dbl Crop Soybeans/Oats', + 241: 'Dbl Crop Corn/Soybeans', + 242: 'Blueberries', + 243: 'Cabbage', + 244: 'Cauliflower', + 245: 'Celery', + 246: 'Radishes', + 247: 'Turnips', + 248: 'Eggplants', + 249: 'Gourds', + 250: 'Cranberries', + 254: 'Dbl Crop Barley/Soybeans'} + + @property + def non_crop(self): + return {37: 'Other Hay/Non Alfalfa', + 59: 'Sod/Grass Seed', + 60: 'Switchgrass', + 63: 'Forest', + 64: 'Shrubland', + 65: 'Barren', + 81: 'Clouds/No Data', + 82: 'Developed', + 83: 'Water', + 87: 'Wetlands', + 88: 'Nonag/Undefined', + 92: 'Aquaculture', + 111: 'Open Water', + 112: 'Perennial Ice/Snow', + 121: 'Developed/Open Space', + 122: 'Developed/Low Intensity', + 123: 'Developed/Med Intensity', + 124: 'Developed/High Intensity', + 131: 'Barren', + 141: 'Deciduous Forest', + 142: 'Evergreen Forest', + 143: 'Mixed Forest', + 152: 'Shrubland', + 176: 'Grass/Pasture', + 190: 'Woody Wetlands', + 195: 'Herbaceous Wetlands'} + + +if __name__ == '__main__': + pass +# ========================= EOF ==================================================================== From 3c049811b334dc534b141fc743b5db93c1782016 Mon Sep 17 00:00:00 2001 From: Thomas Colligan Date: Fri, 15 Nov 2019 11:33:47 -0700 Subject: [PATCH 89/89] added cdl training data --- .gitignore | 27 ++++ fully-conv-classification/data_generators.py | 45 ++++-- fully-conv-classification/data_utils.py | 64 ++++++-- fully-conv-classification/evaluate_image.py | 9 +- .../extract_training_data.py | 73 +++++++-- fully-conv-classification/losses.py | 27 +++- fully-conv-classification/models.py | 65 ++++++++ .../precision_and_recall.py | 20 ++- fully-conv-classification/prepare_images.py | 12 +- fully-conv-classification/runspec.py | 140 +++++++++++++++++- fully-conv-classification/shapefile_utils.py | 4 +- .../train_model_random_files.py | 42 +++--- fully-conv-classification/train_utils.py | 61 +++++++- 13 files changed, 505 insertions(+), 84 deletions(-) diff --git a/.gitignore b/.gitignore index 8689bda..cb6d1bc 100644 --- a/.gitignore +++ b/.gitignore @@ -112,3 +112,30 @@ spatial_data/MT tests/data gee_training.py model_data/ +fully-conv-classification/*shp +fully-conv-classification/*dbf +fully-conv-classification/*cpg +fully-conv-classification/*prj +fully-conv-classification/*sh +fully-conv-classification/*tif +fully-conv-classification/*txt +fully-conv-classification/*shx +fully-conv-classification/compare_model_outputs/ +fully-conv-classification/binary/ +fully-conv-classification/all_models/ +fully-conv-classification/models/ +fully-conv-classification/copy_files.py +fully-conv-classification/examine_dates.py +fully-conv-classification/evaluated_over_mt/ +fully-conv-classification/evaluated_images/ +fully-conv-classification/no_expit/ +fully-conv-classification/tags +fully-conv-classification/random_tifs/ +fully-conv-classification/random_majority_files/ +fully-conv-classification/model_ensemble.py +fully-conv-classification/hyperparameters/ +fully-conv-classification/mean_of_three/ +fully-conv-classification/ensemble_models/ +fully-conv-classification/precision_and_recall_multiple_files.py +fully-conv-classification/plot_cmat.py +fully-conv-classification/shapefile_data/ diff --git a/fully-conv-classification/data_generators.py b/fully-conv-classification/data_generators.py index 82b9933..d36d865 100644 --- a/fully-conv-classification/data_generators.py +++ b/fully-conv-classification/data_generators.py @@ -22,18 +22,23 @@ from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features +from runspec import cdl_crop_values, cdl_non_crop_values class SatDataGenerator(Sequence): def __init__(self, batch_size, n_classes, balance_pixels_per_batch=False, training=True, - apply_irrigated_weights=False): + apply_irrigated_weights=False, augment_data=False, use_cdl=False): self.batch_size = batch_size self.n_classes = n_classes + self.use_cdl = use_cdl self.training = training self.balance_pixels_per_batch = balance_pixels_per_batch self.apply_irrigated_weights = apply_irrigated_weights + self.augment_data = augment_data + if not self.training: + self.augment_data = False def _get_files(self): # Required override. @@ -60,23 +65,32 @@ def _from_pickle(self, filename): def _labels_and_features(self, data_tiles): features = [] + crop = list(cdl_crop_values().keys()) + if self.use_cdl: + cdls = [] one_hots = [] if self.balance_pixels_per_batch: min_count = self._count_pixels(data_tiles) - for tile in data_tiles: data = tile['data'] one_hot = tile['one_hot'].astype(np.int) + if self.use_cdl: + cdl = tile['cdl'].astype(np.int) + cdl = np.isin(cdl, crop) + cdls.append(cdl) if self.apply_irrigated_weights: one_hot[:, :, 0] *= 50 class_code = tile['class_code'] if self.balance_pixels_per_batch: one_hot = self._balance_pixels(one_hot, min_count) - if self.training: + if self.augment_data: data, one_hot = _augment_data(data, one_hot) features.append(data) one_hots.append(one_hot) + if self.use_cdl: + return [np.asarray(features)], [np.asarray(one_hots), np.asarray(cdls)] + return [np.asarray(features)], [np.asarray(one_hots)] @@ -145,7 +159,7 @@ def _binary_labels_and_features(self, data_tiles): if self.balance_pixels_per_batch: one_hot = self._balance_pixels(one_hot, min_count, binary=True) - if self.training: + if self.augment_data: data, binary_one_hot = _augment_data(data, binary_one_hot, binary=True) binary_one_hot = np.expand_dims(binary_one_hot, 2) features.append(data) @@ -174,14 +188,15 @@ class DataGenerator(SatDataGenerator): def __init__(self, data_directory, batch_size, n_classes=None, training=True, target_classes=None, balance=False, balance_examples_per_batch=False, balance_pixels_per_batch=False, apply_irrigated_weights=False, - steps_per_epoch=None): + steps_per_epoch=None, augment_data=False, use_cdl=False): # Assert that all three can't be true - super().__init__(batch_size, n_classes, balance_pixels_per_batch, training) + super().__init__(batch_size, n_classes, balance_pixels_per_batch, training, augment_data) self.data_directory = data_directory self.balance = balance self.balance_examples_per_batch = balance_examples_per_batch self.target_classes = target_classes self.steps_per_epoch = steps_per_epoch + self.use_cdl = use_cdl self._get_files() @@ -241,7 +256,8 @@ def __len__(self): def on_epoch_end(self): - # Recreates the file list + # Recreates the file list if you're training, + # otherwise the validation file list stays the same. self._on_epoch_end(self.dirs, first=False) @@ -264,9 +280,20 @@ def _unbalanced_file_list(self, dirs, first): self.files = [] for d in dirs: self.files.extend(glob(os.path.join(d, "*pkl"))) + shuffle(self.files) + self.entire_corpus = self.files.copy() + if not self.training and self.steps_per_epoch is not None: + self.entire_corpus = self.files.copy() + self.files = self.entire_corpus[:self.steps_per_epoch*self.batch_size] + elif not self.training: + self.files = self.entire_corpus return len(self.files) else: - shuffle(self.files) + shuffle(self.entire_corpus) + if self.steps_per_epoch is None: + self.files = self.entire_corpus + else: + self.files = self.entire_corpus[:self.steps_per_epoch*self.batch_size] def _balanced_file_list(self, dirs, first): @@ -289,8 +316,8 @@ def _balanced_file_list(self, dirs, first): self.files.extend(sample(self.file_dict[key], self.n_minority)) shuffle(self.files) - def _balanced_queue(self, dirs, first): + # do this until the majority class file list is empty self.file_dict = {} self.n_minority = np.inf for d in dirs: diff --git a/fully-conv-classification/data_utils.py b/fully-conv-classification/data_utils.py index 8d72bed..241449a 100644 --- a/fully-conv-classification/data_utils.py +++ b/fully-conv-classification/data_utils.py @@ -10,20 +10,56 @@ from lxml import html from requests import get from copy import deepcopy -from shapely.geometry import shape +from shapely.geometry import shape, mapping from collections import defaultdict from rasterio import float32, open as rasopen +from shapely.geometry import shape, Polygon, mapping from rasterio.mask import mask from pickle import load from multiprocessing import Pool +from sat_image.image import Landsat8 from prepare_images import ImageStack +from crop_data_layer import CropDataLayer as Cdl from shapefile_utils import get_features from sat_image.warped_vrt import warp_single_image from runspec import landsat_rasters, static_rasters, climate_rasters WRS2 = '../spatial_data/wrs2_descending_usa.shp' +def download_cdl_over_path_row(path, row, year, image_directory): + + out_dir = os.path.join(image_directory, '_'.join([str(path), str(row), str(year)])) + cdl_mask = os.path.join(out_dir, "cdl_mask.tif") + if os.path.isfile(cdl_mask): + print("cdl already downloaded for {} {} {}".format(path, row, year)) + return + + sub_dirs = os.listdir(out_dir) + if not len(sub_dirs): + raise ValueError("images not downloaded for {} {} {}".format(path, row, year)) + + print(path, row, year) + for r in sub_dirs: + if os.path.isdir(os.path.join(out_dir, r)): + if 'climate' not in r: + random_landsat_dir = os.path.join(out_dir, r) + break + + landsat = glob(os.path.join(random_landsat_dir, "*TIF"))[0] + landsat_pic = landsat + landsat = Landsat8(random_landsat_dir) + try: + polygon = landsat.get_tile_geometry() + cdl = Cdl(year=year, target_profile=landsat.profile) + cdl.get_mask(clip_geometry=polygon, out_file=cdl_mask) + except Exception as e: + print(e.args) + print(landsat_pic) + + + + def download_images_over_shapefile(shapefile, image_directory, year): '''Downloads p/r corresponding to the location of the shapefile. Image_directory: where to save the raw images. @@ -391,11 +427,16 @@ def all_rasters(image_directory, satellite=8): return band_map +def _get_path_row_geometry(path, row): + shp = gpd.read_file(WRS2) + out = shp[shp['PATH'] == int(path)] + out = out[out['ROW'] == int(row)] + return out + + def clip_raster(evaluated, path, row, outfile=None): - shp = gpd.read_file(WRS2) - out = shp[shp['PATH'] == path] - out = out[out['ROW'] == row] + out = _get_path_row_geometry(path, row) with rasopen(evaluated, 'r') as src: out = out.to_crs(src.crs['init']) @@ -425,13 +466,14 @@ def load_raster(raster_name): meta = src.meta.copy() return arr, meta +prs = [[34, 26], [36, 27], [37,28], [34,27], [39,26], [37,29], [42,27], [41,28], [39,29], [36,28], [40,27], [37,26], [35,26], [38,29], [40,28], [38,27], [35,27], [42,26], [41,26], [40,29], [34,29], [35,29], [38,26], [36,26], [39,28], [41,27], [38,28], [37,27], [36,29], [35,28], [43,26], [39,27], [40,26], [43,27], [34,28]] + if __name__ == "__main__": from runspec import landsat_rasters, climate_rasters - for path in range(34, 44): - for row in range(26, 30): - for sat in [7, 8]: - for year in [2012, 2013, 2014, 2015]: - if year < 2012 and sat == 8: - continue - download_from_pr(path, row, year, '/home/thomas/share/landsat_test/', 7) + + year = 2013 + for path, row in prs: + # download_from_pr(int(path), int(row), int(year), '/home/thomas/share/image_data/') + download_cdl_over_path_row(path, row, year, '/home/thomas/share/image_data/') + diff --git a/fully-conv-classification/evaluate_image.py b/fully-conv-classification/evaluate_image.py index 8a0e4ae..ba3b114 100755 --- a/fully-conv-classification/evaluate_image.py +++ b/fully-conv-classification/evaluate_image.py @@ -20,8 +20,7 @@ from data_utils import (save_raster, stack_rasters, stack_rasters_multiprocess, paths_map_multiple_scenes, load_raster, clip_raster, paths_mapping_single_scene, mean_of_three) -from losses import (multiclass_acc, masked_binary_xent, dice_loss, binary_acc, binary_focal_loss, - masked_categorical_xent) +from losses import * from extract_training_data import concatenate_fmasks _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) @@ -128,8 +127,12 @@ def evaluate_image_many_shot(image_directory, model_paths, n_classes=4, if not args.use_gpu: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' + + mfl = multiclass_focal_loss() + custom_objects = {'mb':masked_binary_xent, 'multiclass_acc':multiclass_acc, - 'binary_acc':binary_acc, 'masked_categorical_xent':masked_categorical_xent} + 'binary_acc':binary_acc, 'masked_categorical_xent':masked_categorical_xent, + 'multiclass_FL':mfl} model_paths = args.model if args.evaluate_all_mt: for path, row in irrigated_path_rows_mt(): diff --git a/fully-conv-classification/extract_training_data.py b/fully-conv-classification/extract_training_data.py index c787f0f..5f484d1 100644 --- a/fully-conv-classification/extract_training_data.py +++ b/fully-conv-classification/extract_training_data.py @@ -11,14 +11,16 @@ from glob import glob from random import sample, shuffle, choice from scipy.ndimage.morphology import distance_transform_edt -from rasterio import open as rasopen +from rasterio import open as rasopen, band from rasterio.errors import RasterioIOError +from rasterio.warp import calculate_default_transform, reproject, Resampling from skimage import transform from sat_image.warped_vrt import warp_single_image from multiprocessing import Pool from collections import defaultdict -from runspec import landsat_rasters, climate_rasters, mask_rasters, assign_shapefile_class_code, assign_shapefile_year +from runspec import (landsat_rasters, climate_rasters, mask_rasters, assign_shapefile_class_code, + assign_shapefile_year, cdl_crop_values, cdl_non_crop_values) from data_utils import load_raster, paths_map_multiple_scenes, stack_rasters, stack_rasters_multiprocess, download_from_pr, paths_mapping_single_scene, mean_of_three, median_of_three from shapefile_utils import get_shapefile_path_row, mask_raster_to_shapefile, filter_shapefile_overlapping, mask_raster_to_features @@ -32,11 +34,12 @@ def distance_map(mask): class DataTile(object): - def __init__(self, data, one_hot, class_code): + def __init__(self, data, one_hot, class_code, cdl_mask): self.dict = {} self.dict['data'] = data self.dict['one_hot'] = one_hot self.dict['class_code'] = class_code + self.dict['cdl'] = cdl_mask def to_pickle(self, training_directory): if not os.path.isdir(training_directory): @@ -93,6 +96,36 @@ def concatenate_fmasks(image_directory, class_mask, class_mask_geo, nodata=0, ta return class_mask +def reproject_if_needed(source, target): + with rasopen(target, 'r') as dst: + dst_crs = dst.meta['crs'] + + with rasopen(source) as src: + if src.meta['crs'] == dst_crs: + return source + transform, width, height = calculate_default_transform( + src.crs, dst_crs, src.width, src.height, *src.bounds) + kwargs = src.meta.copy() + kwargs.update({ + 'crs': dst_crs, + 'transform': transform, + 'width': width, + 'height': height + }) + + with rasopen(source, 'w', **kwargs) as dst: + for i in range(1, src.count + 1): + reproject( + source=band(src, i), + destination=band(dst, i), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=dst_crs, + resampling=Resampling.nearest) + return source + + def extract_training_data_over_path_row(test_train_shapefiles, path, row, year, image_directory, training_data_root_directory, n_classes, assign_shapefile_class_code, path_map_func=None, preprocessing_func=None, tile_size=608): @@ -111,13 +144,18 @@ def extract_training_data_over_path_row(test_train_shapefiles, path, row, year, mask_file = _random_tif_from_directory(image_path) mask, mask_meta = load_raster(mask_file) mask = np.zeros_like(mask).astype(np.int) + cdl_path = os.path.join(image_path, 'cdl_mask.tif') + cdl_raster, cdl_meta = load_raster(cdl_path) + if mask.shape != cdl_raster.shape: + cdl_raster = warp_single_image(cdl_path, mask_meta) + cdl_raster = np.swapaxes(cdl_raster, 0, 2) try: image_stack = stack_rasters_multiprocess(image_path_maps, target_geo=mask_meta, target_shape=mask.shape) + image_stack = np.swapaxes(image_stack, 0, 2) except RasterioIOError as e: print("Redownload images for", path_row_year) print(e) return - image_stack = median_of_three(image_path_maps, image_stack, mask.shape) for key, shapefiles in test_train_shapefiles.items(): if key.lower() not in ('test', 'train'): raise ValueError("expected key to be one of case-insenstive {test, train},\ @@ -137,12 +175,11 @@ def extract_training_data_over_path_row(test_train_shapefiles, path, row, year, else: class_labels[~out.mask] = class_code class_labels = concatenate_fmasks(image_path, class_labels, mask_meta) - image_stack = np.swapaxes(image_stack, 0, 2) class_labels = np.swapaxes(class_labels, 0, 2) class_labels = np.squeeze(class_labels) tiles_y, tiles_x = _target_indices_from_class_labels(class_labels, tile_size) - _save_training_data_from_indices(image_stack, class_labels, training_data_directory, - n_classes, tiles_x, tiles_y, tile_size) + _save_training_data_from_indices(image_stack, class_labels, cdl_raster, + training_data_directory, n_classes, tiles_x, tiles_y, tile_size) def _target_indices_from_class_labels(class_labels, tile_size): @@ -159,6 +196,8 @@ def _target_indices_from_class_labels(class_labels, tile_size): def _assign_class_code_to_tile(class_label_tile): + if np.any(class_label_tile == 3): + return 3 if np.all(class_label_tile != 0): unique, unique_count = np.unique(class_label_tile, return_counts=True) unique = unique[:-1] # assume np.ma.masked is last. @@ -168,8 +207,8 @@ def _assign_class_code_to_tile(class_label_tile): return 0 -def _save_training_data_from_indices(image_stack, class_labels, training_data_directory, - n_classes, indices_y, indices_x, tile_size): +def _save_training_data_from_indices(image_stack, class_labels, cdl_raster, + training_data_directory, n_classes, indices_y, indices_x, tile_size): out = [] for i in indices_x: for j in indices_y: @@ -181,8 +220,10 @@ def _save_training_data_from_indices(image_stack, class_labels, training_data_di continue class_code = _assign_class_code_to_tile(class_label_tile) sub_one_hot = _one_hot_from_labels(class_label_tile, n_classes) + sub_cdl = cdl_raster[i:i+tile_size, j:j+tile_size, :] + sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] sub_image_stack = image_stack[i:i+tile_size, j:j+tile_size, :] - dt = DataTile(sub_image_stack, sub_one_hot, class_code) + dt = DataTile(sub_image_stack, sub_one_hot, class_code, sub_cdl) out.append(dt) if len(out) > 50: with Pool() as pool: @@ -320,7 +361,10 @@ def make_border_labels(mask, border_width): image_directory = '/home/thomas/share/image_data/' shapefiles = glob('shapefile_data/test/*.shp') + glob('shapefile_data/train/*.shp') - training_root_directory = '/home/thomas/ssd/test_extract/' + training_root_directory = '/home/thomas/share/multiclass_with_separate_fallow_directory_and_cdl/' + if not os.path.isdir(training_root_directory): + os.makedirs(training_root_directory) + n_classes = 4 done = set() @@ -336,8 +380,7 @@ def make_border_labels(mask, border_width): year = assign_shapefile_year(f) print("extracting data for", path, row, year) paths_map_func = paths_map_multiple_scenes - min_data_tiles_to_cover_labels(train_shapefiles, path, row, 2013, image_directory) - # test_train_shapefiles = {'test':test_shapefiles, 'train':train_shapefiles} - # extract_training_data_over_path_row(test_train_shapefiles, path, row, year, image_directory, - # training_root_directory, n_classes, assign_shapefile_class_code, path_map_func=paths_map_func) + test_train_shapefiles = {'test':test_shapefiles, 'train':train_shapefiles} + extract_training_data_over_path_row(test_train_shapefiles, path, row, year, image_directory, + training_root_directory, n_classes, assign_shapefile_class_code, path_map_func=paths_map_func) diff --git a/fully-conv-classification/losses.py b/fully-conv-classification/losses.py index 407bc1c..8b1ae61 100644 --- a/fully-conv-classification/losses.py +++ b/fully-conv-classification/losses.py @@ -1,5 +1,6 @@ import keras.backend as K import tensorflow as tf +from sklearn.metrics import confusion_matrix _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) @@ -30,6 +31,19 @@ def bfl(y_true, y_pred): return bfl +def precision_and_recall(y_true, y_pred): + y_true_sum = tf.math.reduce_sum(y_true, axis=-1) + mask = tf.not_equal(y_true_sum, 0) + y_pred = tf.nn.softmax(y_pred, axis=-1) + y_pred = tf.argmax(y_pred, axis=-1) + y_true = tf.argmax(y_true, axis=-1) + y_pred = tf.boolean_mask(y_pred, mask) + y_true = tf.boolean_mask(y_true, mask) + # precision: out of everything I predicted irrigated, what was actually irrigated? + return 1 + + + def multiclass_focal_loss(gamma=2, alpha=0.25): @@ -42,7 +56,7 @@ def multiclass_FL(y_true, y_pred): # xen = -y_true * tf.math.log(probabilities) # all 0s where y_true is all 0s # loss = alpha*tf.math.pow(1-probabilities, gamma) * xen # return tf.math.reduce_mean(loss) - probabilities = tf.nn.softmax(y_pred) + probabilities = tf.nn.softmax(y_pred, axis=-1) xen = -y_true * tf.math.log(probabilities) # all 0s where y_true is all 0s complement = tf.dtypes.cast(tf.equal(y_true, 0), tf.float32) negative_probabilities = -tf.math.pow(complement*probabilities, @@ -89,6 +103,7 @@ def mb(y_true, y_pred): return mb + def masked_categorical_xent(y_true, y_pred): # One_hot matrix is all zeros along depth if there isn't # a data pixel there. Accordingly, we @@ -96,10 +111,10 @@ def masked_categorical_xent(y_true, y_pred): # wait what? I don't need to even mask this! # the one_hot matrix contains depthwise 0s # where there isn't data... - # y_true_sum = tf.math.reduce_sum(y_true, axis=-1) - # mask = tf.not_equal(y_true_sum, 0) - # y_true = tf.boolean_mask(y_true, mask) - # y_pred = tf.boolean_mask(y_pred, mask) + y_true_sum = tf.math.reduce_sum(y_true, axis=-1) + mask = tf.not_equal(y_true_sum, 0) + y_true = tf.boolean_mask(y_true, mask) + y_pred = tf.boolean_mask(y_pred, mask) return tf.nn.softmax_cross_entropy_with_logits_v2(y_true, y_pred) @@ -111,7 +126,7 @@ def binary_acc(y_true, y_pred): return K.mean(K.equal(y_true, K.round(y_pred))) -def multiclass_acc(y_true, y_pred): +def m_acc(y_true, y_pred): y_true_sum = tf.reduce_sum(y_true, axis=-1) mask = tf.not_equal(y_true_sum, 0) y_pred = tf.nn.softmax(y_pred) diff --git a/fully-conv-classification/models.py b/fully-conv-classification/models.py index 717b707..97252f1 100644 --- a/fully-conv-classification/models.py +++ b/fully-conv-classification/models.py @@ -42,6 +42,71 @@ def ConvBNRelu(x, filters=64): _epsilon = tf.convert_to_tensor(K.epsilon(), tf.float32) +def two_headed_unet(input_shape, initial_exp=6, n_classes=5): + + features = Input(shape=input_shape) + _power = initial_exp + exp = 2 + + c1 = ConvBlock(features, exp**_power) + mp1 = MaxPooling2D(pool_size=2, strides=2)(c1) + + _power += 1 + + c2 = ConvBlock(mp1, exp**_power) + mp2 = MaxPooling2D(pool_size=2, strides=2)(c2) + + _power += 1 + + c3 = ConvBlock(mp2, exp**_power) + mp3 = MaxPooling2D(pool_size=2, strides=2)(c3) + + _power += 1 + + c4 = ConvBlock(mp3, exp**_power) + mp4 = MaxPooling2D(pool_size=2, strides=2)(c4) + + _power += 1 + + # 1024 filters + c5 = ConvBlock(mp4, exp**_power) + _power -= 1 + + u1 = UpSampling2D(size=(2, 2))(c5) + c6 = ConvBNRelu(u1, filters=exp**_power) + u1_c4 = Concatenate()([c6, c4]) + c7 = ConvBlock(u1_c4, filters=exp**_power) + + _power -= 1 + + u2 = UpSampling2D(size=(2, 2))(c7) + c8 = ConvBNRelu(u2, filters=exp**_power) + u2_c3 = Concatenate()([c8, c3]) + c9 = ConvBlock(u2_c3, filters=exp**_power) + + _power -= 1 + + u3 = UpSampling2D(size=(2, 2))(c9) + c10 = ConvBNRelu(u3, filters=exp**_power) + u3_c2 = Concatenate()([c10, c2]) + c11 = ConvBlock(u3_c2, filters=exp**_power) + + _power -= 1 + u4 = UpSampling2D(size=(2, 2))(c11) + c12 = ConvBNRelu(u4, filters=exp**_power) + u4_c1 = Concatenate()([c12, c1]) + c13 = ConvBlock(u4_c1, filters=exp**_power) + cdl_logits = Conv2D(filters=1, kernel_size=1, strides=1, + activation='sigmoid', name='cdl')(c13) + + concat_final = Concatenate()([cdl_logits, c13]) + + irr_logits = Conv2D(filters=n_classes, kernel_size=1, strides=1, + activation=None, name='irr')(concat_final) + + return Model(inputs=[features], outputs=[irr_logits, cdl_logits]) + + def unet(input_shape, initial_exp=6, n_classes=5): diff --git a/fully-conv-classification/precision_and_recall.py b/fully-conv-classification/precision_and_recall.py index cd0837c..d5c2f17 100644 --- a/fully-conv-classification/precision_and_recall.py +++ b/fully-conv-classification/precision_and_recall.py @@ -4,10 +4,9 @@ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL) from tensorflow.keras.models import load_model from glob import glob +from numpy import sum as nsum - -from losses import (binary_focal_loss, binary_acc, masked_binary_xent, multiclass_acc, - masked_categorical_xent) +from losses import * from data_generators import DataGenerator from train_utils import confusion_matrix_from_generator @@ -24,17 +23,26 @@ args = parser.parse_args() if not args.use_gpu: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' - #custom_objects = {'mb':masked_binary_xent(pos_weight=1.0), 'binary_acc':binary_acc} + + mfl = multiclass_focal_loss() + custom_objects = {'mb':masked_binary_xent, 'multiclass_acc':multiclass_acc, - 'binary_acc':binary_acc, 'masked_categorical_xent':masked_categorical_xent} + 'binary_acc':binary_acc, 'masked_categorical_xent':masked_categorical_xent, + 'multiclass_FL':mfl} try: model = load_model(args.model, custom_objects=custom_objects) except ValueError as e: print(e.args) raise batch_size = args.batch_size + try: + target_class = int(args.target_class) + except TypeError as e: + target_class = None test_generator = DataGenerator(data_directory=args.test_data_path, batch_size=batch_size, - training=False, target_classes=args.target_class) + training=False, target_classes=target_class) cmat, prec, recall = confusion_matrix_from_generator(test_generator, batch_size, model, n_classes=args.n_classes) + print(cmat) + print(nsum(cmat, axis=1)) print('model {} has \n p:{}\n r:{}'.format(args.model, prec, recall)) diff --git a/fully-conv-classification/prepare_images.py b/fully-conv-classification/prepare_images.py index 7715fb7..f9fd1fd 100644 --- a/fully-conv-classification/prepare_images.py +++ b/fully-conv-classification/prepare_images.py @@ -87,8 +87,8 @@ def __init__(self, satellite, path=None, row=None, lat=None, lon=None, root=None self.exclude_rasters = [] if year and not start and not end: - self.start = '{}-01-01'.format(self.year) - self.end = '{}-12-30'.format(self.year) + self.start = '{}-05-01'.format(self.year) + self.end = '{}-10-15'.format(self.year) def build_training(self): self.get_landsat(fmask=True) @@ -136,10 +136,10 @@ def get_landsat(self, fmask=False): latitude=self.lat, longitude=self.lon, output_path=self.root, max_cloud_percent=self.max_cloud) - #l g.select_scenes(100) - # print('this should download after') - # self.scenes = g.selected_scenes - g.download(list_type='all') + g.select_scenes(self.n) + self.scenes = g.selected_scenes + g.download(list_type='selected') + # g.download(list_type='all') self.image_dirs = [x[0] for x in os.walk(self.root) if os.path.basename(x[0])[:3] in self.landsat_mapping.keys()] diff --git a/fully-conv-classification/runspec.py b/fully-conv-classification/runspec.py index 5588ee8..f1eb785 100644 --- a/fully-conv-classification/runspec.py +++ b/fully-conv-classification/runspec.py @@ -28,9 +28,9 @@ def assign_shapefile_class_code_binary(shapefile): def assign_shapefile_class_code(shapefile): if 'irrigated' in shapefile and 'unirrigated' not in shapefile: return 0 - if 'unirrigated' in shapefile or 'wetlands' in shapefile: + if 'unirrigated' in shapefile: return 1 - if 'uncultivated' in shapefile: + if 'uncultivated' in shapefile or 'wetlands' in shapefile: return 2 if 'fallow' in shapefile: return 3 @@ -82,6 +82,142 @@ def irrigated_path_rows_mt(): (36, 27), (36, 28), (37, 26)] return ls +def cdl_crop_values(): + return {1: 'Corn', + 2: 'Cotton', + 3: 'Rice', + 4: 'Sorghum', + 5: 'Soybeans', + 6: 'Sunflower', + 10: 'Peanuts', + 11: 'Tobacco', + 12: 'Sweet Corn', + 13: 'Pop or Orn Corn', + 14: 'Mint', + 21: 'Barley', + 22: 'Durum Wheat', + 23: 'Spring Wheat', + 24: 'Winter Wheat', + 25: 'Other Small Grains', + 26: 'Dbl Crop WinWht / Soybeans', + 27: 'Rye', + 28: 'Oats', + 29: 'Millet', + 30: 'Speltz', + 31: 'Canola', + 32: 'Flaxseed', + 33: 'Safflower', + 34: 'Rape Seed', + 35: 'Mustard', + 36: 'Alfalfa', + 37: 'Other Hay / NonAlfalfa', + 38: 'Camelina', + 39: 'Buckwheat', + 41: 'Sugarbeets', + 42: 'Dry Beans', + 43: 'Potatoes', + 44: 'Other Crops', + 45: 'Sugarcane', + 46: 'Sweet Potatoes', + 47: 'Misc Vegs & Fruits', + 48: 'Watermelons', + 49: 'Onions', + 50: 'Cucumbers', + 51: 'Chick Peas', + 52: 'Lentils', + 53: 'Peas', + 54: 'Tomatoes', + 55: 'Caneberries', + 56: 'Hops', + 57: 'Herbs', + 58: 'Clover/Wildflowers', + 61: 'Fallow/Idle Cropland', + 66: 'Cherries', + 67: 'Peaches', + 68: 'Apples', + 69: 'Grapes', + 70: 'Christmas Trees', + 71: 'Other Tree Crops', + 72: 'Citrus', + 74: 'Pecans', + 75: 'Almonds', + 76: 'Walnuts', + 77: 'Pears', + 204: 'Pistachios', + 205: 'Triticale', + 206: 'Carrots', + 207: 'Asparagus', + 208: 'Garlic', + 209: 'Cantaloupes', + 210: 'Prunes', + 211: 'Olives', + 212: 'Oranges', + 213: 'Honeydew Melons', + 214: 'Broccoli', + 216: 'Peppers', + 217: 'Pomegranates', + 218: 'Nectarines', + 219: 'Greens', + 220: 'Plums', + 221: 'Strawberries', + 222: 'Squash', + 223: 'Apricots', + 224: 'Vetch', + 225: 'Dbl Crop WinWht/Corn', + 226: 'Dbl Crop Oats/Corn', + 227: 'Lettuce', + 229: 'Pumpkins', + 230: 'Dbl Crop Lettuce/Durum Wht', + 231: 'Dbl Crop Lettuce/Cantaloupe', + 232: 'Dbl Crop Lettuce/Cotton', + 233: 'Dbl Crop Lettuce/Barley', + 234: 'Dbl Crop Durum Wht/Sorghum', + 235: 'Dbl Crop Barley/Sorghum', + 236: 'Dbl Crop WinWht/Sorghum', + 237: 'Dbl Crop Barley/Corn', + 238: 'Dbl Crop WinWht/Cotton', + 239: 'Dbl Crop Soybeans/Cotton', + 240: 'Dbl Crop Soybeans/Oats', + 241: 'Dbl Crop Corn/Soybeans', + 242: 'Blueberries', + 243: 'Cabbage', + 244: 'Cauliflower', + 245: 'Celery', + 246: 'Radishes', + 247: 'Turnips', + 248: 'Eggplants', + 249: 'Gourds', + 250: 'Cranberries', + 254: 'Dbl Crop Barley/Soybeans'} + + +def cdl_non_crop_values(): + return {37: 'Other Hay/Non Alfalfa', + 59: 'Sod/Grass Seed', + 60: 'Switchgrass', + 63: 'Forest', + 64: 'Shrubland', + 65: 'Barren', + 81: 'Clouds/No Data', + 82: 'Developed', + 83: 'Water', + 87: 'Wetlands', + 88: 'Nonag/Undefined', + 92: 'Aquaculture', + 111: 'Open Water', + 112: 'Perennial Ice/Snow', + 121: 'Developed/Open Space', + 122: 'Developed/Low Intensity', + 123: 'Developed/Med Intensity', + 124: 'Developed/High Intensity', + 131: 'Barren', + 141: 'Deciduous Forest', + 142: 'Evergreen Forest', + 143: 'Mixed Forest', + 152: 'Shrubland', + 176: 'Grass/Pasture', + 190: 'Woody Wetlands', + 195: 'Herbaceous Wetlands'} if __name__ == '__main__': diff --git a/fully-conv-classification/shapefile_utils.py b/fully-conv-classification/shapefile_utils.py index a62d0c6..79f7644 100644 --- a/fully-conv-classification/shapefile_utils.py +++ b/fully-conv-classification/shapefile_utils.py @@ -19,7 +19,8 @@ def get_features(gdf): def mask_raster_to_shapefile(shapefile, raster, return_binary=True): - ''' Generates a mask with 1 everywhere + ''' + Generates a mask with 1 everywhere shapefile data is present and a no_data value everywhere else. no_data is -1 in this case, as it is never a valid class label. Switching coordinate reference systems is important here, or @@ -43,7 +44,6 @@ def mask_raster_to_shapefile(shapefile, raster, return_binary=True): def mask_raster_to_features(raster, features, features_meta): # This function is useful when you don't have access to the # file from which the features came or if the file doesn't exist. - gdf = gpd.GeoDataFrame.from_features(features, features_meta) # do I need # the whole metadata? gdf = gdf[gdf.geometry.notnull()] diff --git a/fully-conv-classification/train_model_random_files.py b/fully-conv-classification/train_model_random_files.py index a06e45e..9ba18a3 100644 --- a/fully-conv-classification/train_model_random_files.py +++ b/fully-conv-classification/train_model_random_files.py @@ -1,5 +1,5 @@ import os -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' import keras.backend as K import tensorflow as tf @@ -12,11 +12,12 @@ from scipy.special import expit from random import sample from glob import glob +from time import time -from models import unet +from models import unet, two_headed_unet from data_generators import DataGenerator -from train_utils import lr_schedule +from train_utils import lr_schedule, F1Score from losses import * join = os.path.join @@ -41,19 +42,22 @@ # default gamma gamma = 2.0 - model = unet(input_shape, initial_exp=4, n_classes=n_classes) - model_path = 'random_majority_files/multiclass/' + model = two_headed_unet(input_shape, initial_exp=4, n_classes=n_classes) + model_path = 'random_majority_files/multiclass/normal_xen_with_cdl/' if not os.path.isdir(model_path): os.mkdir(model_path) - model_path += 'three_scenes_concat_balance_examples_per_batch_focal_loss_gamma-{}.h5'.format(gamma) + model_path += 'balance_examples_per_batch_xen.h5' - tensorboard = TensorBoard(log_dir='/tmp/', + pth = '/home/thomas/tensorboard/'+str(time()) + if not os.path.isdir(pth): + os.mkdir(pth) + tensorboard = TensorBoard(log_dir=pth, profile_batch=0, update_freq=30, batch_size=3) checkpoint = ModelCheckpoint(filepath=model_path, - monitor='val_multiclass_acc', + monitor='val_irr_m_acc', verbose=1, save_best_only=True) @@ -61,26 +65,30 @@ lr_schedule = partial(lr_schedule, initial_learning_rate=initial_learning_rate, efold=epochs/10) lr_scheduler = LearningRateScheduler(lr_schedule, verbose=True) - root = '/home/thomas/ssd/multiclass_no_border_labels/' + root = '/home/thomas/share/multiclass_with_separate_fallow_directory_and_cdl/' train_dir = join(root, 'train') test_dir = join(root, 'test') opt = tf.keras.optimizers.Adam() batch_size = 4 - loss_func = multiclass_focal_loss(gamma=gamma) - metric = multiclass_acc - model.compile(opt, loss=loss_func, metrics=[metric]) + loss_func = masked_categorical_xent + metric = m_acc + loss_weights = [1.0, 0.25] + model.compile(opt, loss=[masked_categorical_xent, 'binary_crossentropy'], + metrics={'irr':metric, 'cdl':'accuracy'}, loss_weights=loss_weights) train_generator = DataGenerator(train_dir, batch_size, target_classes=None, n_classes=n_classes, balance=False, balance_pixels_per_batch=False, balance_examples_per_batch=True, apply_irrigated_weights=False, - training=False) - test_generator = DataGenerator(test_dir, batch_size, target_classes=0, - n_classes=n_classes, training=False) + training=True, augment_data=False, use_cdl=True) + test_generator = DataGenerator(test_dir, batch_size, target_classes=None, + n_classes=n_classes, training=False, balance=False, steps_per_epoch=30, + augment_data=False, use_cdl=True) + m2 = F1Score(test_generator, n_classes, model_path, batch_size, two_headed_net=True) model.fit_generator(train_generator, epochs=epochs, validation_data=test_generator, - callbacks=[tensorboard, lr_scheduler, checkpoint], + callbacks=[tensorboard, lr_scheduler, checkpoint, m2], use_multiprocessing=False, workers=1, max_queue_size=1, - verbose=0) + verbose=1) diff --git a/fully-conv-classification/train_utils.py b/fully-conv-classification/train_utils.py index 8025138..cf5f84d 100644 --- a/fully-conv-classification/train_utils.py +++ b/fully-conv-classification/train_utils.py @@ -2,20 +2,63 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' import time +import heapq import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import argparse import pickle +import pdb from scipy.special import expit from sklearn.metrics import confusion_matrix from tensorflow.keras.models import load_model -from collections import defaultdict +from sys import stdout +from tensorflow.keras.callbacks import Callback +from collections import defaultdict, namedtuple from multiprocessing import Pool from random import sample, shuffle from glob import glob +class F1Score(Callback): + + # this is really heavy handed! + # have to evaluate the set twice + def __init__(self, validation_data, n_classes, model_out_path, batch_size=4, two_headed_net=False): + super(F1Score, self).__init__() + self.validation_data = validation_data + self.batch_size = batch_size + self.n_classes = n_classes + self.model_out_path = os.path.splitext(model_out_path)[0] + self.two_headed_net = two_headed_net + if self.two_headed_net: + self.model_out_path += "epoch-{}-f1-{}.h5" + else: + self.model_out_path += "epoch-{}-f1-{}.h5" + self.f1_scores = [] + + def on_train_begin(self, logs={}): + pass + + def on_epoch_end(self, epochs, logs): + # 5.4.1 For each validation batch + cmat, prec, recall = confusion_matrix_from_generator(self.validation_data, + batch_size=self.batch_size, model=self.model, n_classes=self.n_classes, + multi_output=self.two_headed_net) + print('n pixels per class:', np.sum(cmat, axis=1)) + print(prec) + print(recall) + precision_irrigated = prec[0] + recall_irrigated = recall[0] + f1 = 2*(precision_irrigated * recall_irrigated) / (precision_irrigated + recall_irrigated) + if np.isnan(f1): + return + outp = self.model_out_path.format(epochs, f1) + print('saving', outp) + if not os.path.isfile(outp): + self.model.save(outp) # maybe get some space savings + return + def softmax(arr, count_dim=0): arr = np.exp(arr) arr /= (np.sum(arr, axis=count_dim, keepdims=True)) @@ -131,14 +174,16 @@ def _preprocess_masks_and_calculate_cmat(y_true, y_pred, n_classes=2): return cmat -def confusion_matrix_from_generator(valid_generator, batch_size, model, n_classes=2): + +def confusion_matrix_from_generator(valid_generator, batch_size, model, n_classes=2, + print_mat=False, multi_output=False): out_cmat = np.zeros((n_classes, n_classes)) if not len(valid_generator): raise ValueError("Length of validation generator is 0") with Pool(batch_size) as pool: - for batch_x, y_true in valid_generator: - y_true = y_true[0] - preds = model.predict(batch_x) + for cnt, (batch_x, y_true) in enumerate(valid_generator): + y_true = y_true[0] # pull irrigated ground truth + preds = model.predict(batch_x)[0] sz = batch_x[0].shape[0] try: y_trues = [np.squeeze(y_true[i]) for i in range(sz)] @@ -150,8 +195,10 @@ def confusion_matrix_from_generator(valid_generator, batch_size, model, n_classe [n_classes]*batch_size)) for cmat in cmats: out_cmat += cmat + stdout.write('{}/{}\r'.format(cnt, len(valid_generator))) - print(out_cmat) + if print_mat: + print(out_cmat) precision_dict = {} recall_dict = {} for i in range(n_classes): @@ -160,7 +207,7 @@ def confusion_matrix_from_generator(valid_generator, batch_size, model, n_classe for i in range(n_classes): precision_dict[i] = out_cmat[i, i] / np.sum(out_cmat[i, :]) # row i recall_dict[i] = out_cmat[i, i] / np.sum(out_cmat[:, i]) # column i - return cmat, recall_dict, precision_dict + return out_cmat, recall_dict, precision_dict def lr_schedule(epoch, initial_learning_rate, efold):