diff --git a/examples/python/beam/BUILD b/examples/python/beam/BUILD deleted file mode 100644 index d4fd7f419..000000000 --- a/examples/python/beam/BUILD +++ /dev/null @@ -1,51 +0,0 @@ -# Placeholder: load py_library -# Placeholder: load py_binary -load("//bazel:pytest.bzl", "tensorstore_pytest_test") -# Beam based tensorstore examples - -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) - -py_binary( - name = "run_pipeline", - srcs = ["run_pipeline.py"], - python_version = "PY3", - tags = ["manual"], - deps = [ - ":pipelines_lib", - "@pypa_absl_py//:absl_py", - "@pypa_apache_beam//:apache_beam", - "@pypa_gin_config//:gin_config", - ], -) - -tensorstore_pytest_test( - name = "pipeline_test", - size = "medium", - srcs = ["pipeline_test.py"], - tags = ["manual"], - deps = [ - ":pipelines_lib", - "//python/tensorstore", - "@pypa_apache_beam//:apache_beam", - "@pypa_numpy//:numpy", - ], -) - -py_library( - name = "pipelines_lib", - srcs = [ - "compute_dfbyf.py", - "compute_percentiles.py", - "reshard_tensor.py", - ], - tags = ["manual"], - deps = [ - "//python/tensorstore", - "@pypa_absl_py//:absl_py", - "@pypa_apache_beam//:apache_beam", - "@pypa_gin_config//:gin_config", - "@pypa_numpy//:numpy", - ], -) diff --git a/examples/python/beam/README.md b/examples/python/beam/README.md deleted file mode 100644 index 1c5f1f1ef..000000000 --- a/examples/python/beam/README.md +++ /dev/null @@ -1,10 +0,0 @@ -Examples in python - -Setup - -pip3 install --upgrade virtualenv --user -python3 -m virtualenv env -source env/bin/activate -pip3 install -r requirements.txt - - diff --git a/examples/python/beam/compute_dfbyf.py b/examples/python/beam/compute_dfbyf.py deleted file mode 100644 index cc1cac5f7..000000000 --- a/examples/python/beam/compute_dfbyf.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright 2020 The TensorStore Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Compute the df/f given a f and a percentile tensor. - -Computes (f - percentile) / (smooth + percentile). -""" - -import logging -import apache_beam as beam -from apache_beam.options.pipeline_options import PipelineOptions -import gin - - -class ComputeDFByF(beam.core.DoFn): - """Computes the df by f.""" - - def __init__(self, input_spec, percentile_spec, output_spec, - percentile_index, smoothing): - self._input_spec = input_spec - self._percentile_spec = percentile_spec - self._output_spec = output_spec - self._percentile_index = percentile_index - self._smoothing = smoothing - - def setup(self): - """Sets up the beam bundle.""" - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - self._ds_in = ts.open(self._input_spec).result() - self._shape = self._ds_in.domain.shape - self._ds_percentile = ts.open(self._percentile_spec).result() - self._ds_out = ts.open(self._output_spec).result() - self._dtype = self._ds_out.dtype.numpy_dtype - - def process(self, yz): - """Computes the df/f at a given y and z.""" - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import numpy as np - y, z = yz - # Process entire xt tiles at once. - f = self._ds_in[:, y, z, :].read().result() - b = self._ds_percentile[:, y, z, :, self._percentile_index].read().result() - fnp = np.array(f) - fnp = fnp.astype(self._dtype) - bnp = np.array(b) - bnp = bnp.astype(self._dtype) - - d = self._smoothing + bnp - d[d == 0] = 1 - - output = (fnp - bnp) / d - self._ds_out[:, y, z, :] = output.astype(self._dtype) - yield None - - -@gin.configurable("compute_dfbyf") -def compute_dfbyf(pipeline_options=gin.REQUIRED, - input_spec=gin.REQUIRED, - percentile_spec=gin.REQUIRED, - output_spec=gin.REQUIRED, - percentile_index=gin.REQUIRED, - smoothing=gin.REQUIRED): - """Computes the df/f of a base and percentile T major XYZT tensors. - - Args: - pipeline_options: dictionary of pipeline options - input_spec: Tensorstore input spec. - percentile_spec: Tensorstore percentile spec. - output_spec: Tensorstore output spec. - percentile_index: the index of the 5th dimention to use for divisor. - smoothing: amount to add to divisor. - """ - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - pipeline_opt = PipelineOptions.from_dictionary(pipeline_options) - logging.info(pipeline_opt.get_all_options()) - ds = ts.open(input_spec).result() - shape = ds.domain.shape - yz = [] - for y in range(shape[1]): - for z in range(shape[2]): - yz.append((y, z)) - - with beam.Pipeline(options=pipeline_opt) as p: - ys = p | beam.Create(yz) - result = ys | beam.ParDo(ComputeDFByF(input_spec, percentile_spec, - output_spec, percentile_index, - smoothing)) - del result diff --git a/examples/python/beam/compute_percentiles.py b/examples/python/beam/compute_percentiles.py deleted file mode 100644 index 174579b02..000000000 --- a/examples/python/beam/compute_percentiles.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright 2020 The TensorStore Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Compute percentiles in a 4D tensor. - -Given an XYZT scalar tensor, computes the percentile along the T axis. -""" - -import logging -import apache_beam as beam -from apache_beam.options.pipeline_options import PipelineOptions -import gin - - -def get_window(location, radius, length): - """Computes the start and end of a window centered at location. - - Args: - location: location to be centered. - radius: radius of window. - length: length of sequence. - Returns: - start: starting index. - end: ending index (exclusive). - """ - start = None - end = None - if location < radius: - start = 0 - end = min(length, 2 * radius + 1) - return start, end - if location + radius >= length - 1: - start = length - 2 * radius - 1 - end = length - return start, end - start = max(0, location - radius) - end = location + radius + 1 - return start, end - - -class ComputePercentile(beam.core.DoFn): - """Computes the percentiles of a TS.""" - - def __init__(self, input_spec, output_spec, radius, - percentiles): - self._input_spec = input_spec - self._output_spec = output_spec - self._radius = radius - self._percentiles = percentiles - self._num_percentiles = len(percentiles) - - def setup(self): - """Sets up the beam bundle.""" - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - self._ds_in = ts.open(self._input_spec).result() - self._shape = self._ds_in.domain.shape - self._dtype = self._ds_in.dtype.numpy_dtype - self._ds_out = ts.open(self._output_spec).result() - - def process(self, yz): - """Computes percentiles at a given y and z.""" - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import numpy as np - y, z = yz - # Process entire xt tiles at once. - tile = self._ds_in[:, y, z, :].read().result() - staging = np.zeros((self._shape[0], self._shape[3], - self._num_percentiles), - dtype=self._dtype) - for t in range(self._shape[3]): - start, end = get_window(t, self._radius, self._shape[3]) - ptile = np.percentile(tile[:, start:end], self._percentiles, - axis=1, interpolation="nearest").transpose() - staging[:, t, :] = ptile - self._ds_out[:, y, z, :, :] = staging - yield None - - -@gin.configurable("compute_percentiles") -def compute_percentiles(pipeline_options=gin.REQUIRED, - input_spec=gin.REQUIRED, - output_spec=gin.REQUIRED, - radius=gin.REQUIRED, - percentiles=gin.REQUIRED): - """Computes the percentile in a window of T major voxels. - - Args: - pipeline_options: dictionary of pipeline options - input_spec: Tensorstore input spec. - output_spec: Tensorstore output spec. - radius: the radius over which to compute the percentile. - percentiles: Percentiles to compute. - """ - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - pipeline_opt = PipelineOptions.from_dictionary(pipeline_options) - logging.info(pipeline_opt.get_all_options()) - ds = ts.open(input_spec).result() - shape = ds.domain.shape - yz = [] - for y in range(shape[1]): - for z in range(shape[2]): - yz.append((y, z)) - - with beam.Pipeline(options=pipeline_opt) as p: - ys = p | beam.Create(yz) - result = ys | beam.ParDo(ComputePercentile(input_spec, output_spec, radius, - percentiles)) - del result diff --git a/examples/python/beam/example.gin b/examples/python/beam/example.gin deleted file mode 100644 index 4d0219e64..000000000 --- a/examples/python/beam/example.gin +++ /dev/null @@ -1,54 +0,0 @@ -# Example gin config to reshard a tensor from xy contiguous to t. - -# This pipeline just has one step. -run.steps = [ @reshard_tensor_xy2t ] - -reshard_tensor_xy2t.pipeline_options = { - "runner" : "DataFlowRunner", - "autoscaling_algorithm" : "THROUGHPUT_BASED", - "num_workers" : "50", - "max_num_workers" : "100", - # No public IPs means can use more workers but cannot access pip. - "no_use_public_ips" : False, - "experiments" : "shuffle_mode=service", - "project" : # Insert your project here, - "region" : # Your zone here", - "staging_location" : #gs://bucket/staging, - "temp_location" : #gs://bucket/staging/temp, - # Use either setup or requirements. - "setup_file" : "./setup.py", - #"requirements_file" : "requirements.txt", - #"requirements_cache" : "/tmp/requirements_cache", - "job_name" : "transpose", - "service_account_email" : #your service account -} - -reshard_tensor_xy2t.num_frames = 44444 -reshard_tensor_xy2t.dx = 256 - -reshard_tensor_xy2t.input_spec = { - "open" : True, - "driver" : "n5", - "path" : # Path to input, - "kvstore" : { - "driver": "gcs", - "bucket": # Your bucket - } -} - -reshard_tensor_xy2t.output_spec = { - "create" : True, - "open" : True, - "driver" : "n5", - "path" : # Path to output, - "kvstore" : { - "driver": "gcs", - "bucket": # Your bucket - }, - "metadata" : { - "compression": { "type": "gzip" }, - "dataType" : "uint16", - "dimensions": [2048, 1116, 31, 44444], - "blockSize" : [256, 1, 1, 44444] - } -} diff --git a/examples/python/beam/pipeline_test.py b/examples/python/beam/pipeline_test.py deleted file mode 100644 index f11b8a003..000000000 --- a/examples/python/beam/pipeline_test.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2020 The TensorStore Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for the pipeline library. -""" - -import os -import numpy as np -import tensorstore as ts -import compute_dfbyf -import compute_percentiles -import reshard_tensor - - -def make_spec(path, dimensions, blocksize, dtype='uint32'): - spec = { - 'driver': 'n5', - 'kvstore': { - 'driver': 'file', - 'path': path, - }, - 'metadata': { - 'compression': {'type': 'gzip'}, - 'dataType': dtype, - 'dimensions': dimensions, - 'blockSize': blocksize, - }, - 'open': True, - 'create': True, - } - return spec - - -def test_reshard_tensor_xy2xt(tmp_path): - pipeline_options = {'runner': 'DirectRunner'} - input_file = os.path.join(tmp_path, 'a') - output_file = os.path.join(tmp_path, 'b') - dim = [6, 7, 8, 9] - count = np.prod(dim) - data = np.arange(count, dtype=np.uint32) - data = np.reshape(data, dim) - input_spec = make_spec(input_file, dim, [6, 7, 1, 1]) - input_ds = ts.open(input_spec).result() - input_ds[:, :, :, :] = data - - output_spec = make_spec(output_file, dim, [1, 1, 1, 9]) - dx = 3 - reshard_tensor.reshard_tensor_xy2xt(pipeline_options, input_spec, - output_spec, dx) - output_ds = ts.open(output_spec).result() - np.testing.assert_array_equal(data, output_ds[:, :, :, :]) - - # Now go from xt back to xy - inverse_file = os.path.join(tmp_path, 'c') - inverse_spec = make_spec(inverse_file, dim, [6, 7, 1, 1]) - - reshard_tensor.reshard_tensor_xt2xy(pipeline_options, output_spec, - inverse_spec, dx) - inverse_ds = ts.open(inverse_spec).result() - np.testing.assert_array_equal(data, inverse_ds[:, :, :, :]) - - -def test_get_percentile_window(): - # Test when it is near the start. - start, end = compute_percentiles.get_window(1, 3, 100) - assert start == 0 - assert end == 7 - start, end = compute_percentiles.get_window(3, 3, 100) - assert start == 0 - assert end == 7 - start, end = compute_percentiles.get_window(50, 3, 100) - assert start == 47 - assert end == 54 - start, end = compute_percentiles.get_window(98, 3, 100) - assert start == 93 - assert end == 100 - start, end = compute_percentiles.get_window(96, 3, 100) - assert start == 93 - assert end == 100 - - -def test_compute_percentile(tmp_path): - pipeline_options = {'runner': 'DirectRunner'} - input_file = os.path.join(tmp_path, 'a') - output_file = os.path.join(tmp_path, 'b') - dim = [2, 2, 2, 3] - count = np.prod(dim) - data = np.arange(count, dtype=np.uint32) - data = np.reshape(data, dim) - input_spec = make_spec(input_file, dim, [1, 1, 1, 3]) - input_ds = ts.open(input_spec).result() - input_ds[:, :, :, :] = data - - dim2 = [2, 2, 2, 3, 3] - output_spec = make_spec(output_file, dim2, [1, 1, 1, 3, 3]) - compute_percentiles.compute_percentiles(pipeline_options, - input_spec, - output_spec, - radius=1, - percentiles=[0, 50, 100]) - output_ds = ts.open(output_spec).result() - # The minimums, medians and maximums should match. - expected_min = np.min(data, axis=3) - expected_max = np.max(data, axis=3) - expected_median = (expected_min + expected_max) // 2 - for i in range(3): - np.testing.assert_array_equal(expected_min, output_ds[:, :, :, i, 0]) - np.testing.assert_array_equal(expected_median, output_ds[:, :, :, i, 1]) - np.testing.assert_array_equal(expected_max, output_ds[:, :, :, i, 2]) - - -def test_compute_dfbyf(tmp_path): - pipeline_options = {'runner': 'DirectRunner'} - input_file = os.path.join(tmp_path, 'a') - percentile_file = os.path.join(tmp_path, 'b') - output_file = os.path.join(tmp_path, 'c') - - dim = [6, 7, 8, 9] - count = np.prod(dim) - data = np.arange(count, dtype=np.uint32) - data = np.reshape(data, dim) - input_spec = make_spec(input_file, dim, [1, 1, 1, 9]) - input_ds = ts.open(input_spec).result() - input_ds[:, :, :, :] = data - - dim_percentile = [6, 7, 8, 9, 3] - data_percentile = np.arange(np.prod(dim_percentile), dtype=np.uint32) - data_percentile = np.reshape(data_percentile, dim_percentile) - percentile_spec = make_spec(percentile_file, dim_percentile, - [1, 1, 1, 9, 3]) - percentile_ds = ts.open(percentile_spec).result() - percentile_ds[:, :, :, :, :] = data_percentile - - output_spec = make_spec(output_file, dim, [1, 1, 1, 9], 'float32') - smoothing = 3.0 - for percentile_index in range(3): - compute_dfbyf.compute_dfbyf( - pipeline_options, input_spec, percentile_spec, - output_spec, percentile_index, smoothing) - f = data.astype(np.float32) - b = data_percentile[:, :, :, :, percentile_index].astype(np.float32) - expected = (f - b) / (smoothing + b) - output_ds = ts.open(output_spec).result() - computed = output_ds[:, :, :, :] - np.testing.assert_array_almost_equal(expected, computed) - diff --git a/examples/python/beam/requirements.txt b/examples/python/beam/requirements.txt deleted file mode 100644 index 2df778c25..000000000 --- a/examples/python/beam/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -apache-beam[gcp] -absl-py -gin-config -numpy -tensorstore diff --git a/examples/python/beam/reshard_tensor.py b/examples/python/beam/reshard_tensor.py deleted file mode 100644 index 456e9671c..000000000 --- a/examples/python/beam/reshard_tensor.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2020 The TensorStore Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Library to reshard tensors from one block size to another. - -Reshards a tensor to a new subblock size. -""" - -import logging -import apache_beam as beam -from apache_beam.options.pipeline_options import PipelineOptions -import gin - - -class ReadTSXYChunk(beam.core.DoFn): - """Reads from Tensorstore stored with XY major format.""" - - def __init__(self, spec, dx): - self._spec = spec - self._dx = dx - - def setup(self): - """Sets up the beam bundle.""" - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - self._ds = ts.open(self._spec).result() - - def process(self, frame): - """Reads a voxel and emits as ((x, y, z) , (frame, values[dx])) tuples.""" - logging.info("Reading frame %d", frame) - voxel = self._ds[:, :, :, frame].read().result() - shape = voxel.shape - for x in range(0, shape[0], self._dx): - for y in range(shape[1]): - for z in range(shape[2]): - element = ((x, y, z), (frame, voxel[x:x + self._dx, y, z])) - yield element - - -class ReadTSXTChunk(beam.core.DoFn): - """Reads from Tensorstore stored with XT major format.""" - - def __init__(self, spec, dx): - self._spec = spec - self._dx = dx - - def setup(self): - """Sets up the beam bundle.""" - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - self._ds = ts.open(self._spec).result() - - def process(self, yz): - """Reads a voxel and emits as (frame , ((x, y, z), values[dx])) tuples.""" - y, z = yz - voxel = self._ds[:, y, z, :].read().result() - shape = voxel.shape - for x in range(0, shape[0], self._dx): - for frame in range(voxel.shape[1]): - element = (frame, ((x, y, z), voxel[x:x + self._dx, frame])) - yield element - - -class WriteTSXTChunk(beam.core.DoFn): - """Writes to tensorstore in XT major format.""" - - def __init__(self, spec, dx): - self._spec = spec - self._dx = dx - - def setup(self): - """Sets up the beam bundle.""" - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - self._ds = ts.open(self._spec).result() - self._dtype = self._ds.dtype.numpy_dtype - shape = self._ds.domain.shape - self._num_frames = shape[3] - - def process(self, element): - """Writes a voxel.""" - xyz, tv = element - x, y, z = xyz - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import numpy as np - frames = np.zeros(shape=(self._dx, self._num_frames), - dtype=self._dtype) - for t, v in tv: - frames[:, t] = v - self._ds[x: x + self._dx, y, z, :] = frames - yield None - - -class WriteTSXYChunk(beam.core.DoFn): - """Writes to tensorstore in XY major format.""" - - def __init__(self, spec, dx): - self._spec = spec - self._dx = dx - - def setup(self): - """Sets up the beam bundle.""" - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - self._ds = ts.open(self._spec).result() - self._dtype = self._ds.dtype.numpy_dtype - self._shape = self._ds.domain.shape - - def process(self, element): - """Writes a voxel. Expects (frame , ((x, y, z), values[dx])).""" - frame, kv = element - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import numpy as np - voxel = np.zeros(shape=(self._shape[0], self._shape[1], self._shape[2]), - dtype=self._dtype) - for loc, vals in kv: - x, y, z = loc - voxel[x:x + self._dx, y, z] = vals - self._ds[:, :, :, frame] = voxel - yield None - - -@gin.configurable("reshard_tensor_xy2xt") -def reshard_tensor_xy2xt(pipeline_options=gin.REQUIRED, - input_spec=gin.REQUIRED, - output_spec=gin.REQUIRED, - dx=gin.REQUIRED): - """Reshards an XY contiguous tensor to t contiguous. - - Args: - pipeline_options: dictionary of pipeline options - input_spec: Tensorstore input spec. - output_spec: Tensorstore output spec. - dx: The size of a block of x for efficiency. - """ - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - pipeline_opt = PipelineOptions.from_dictionary(pipeline_options) - logging.info(pipeline_opt.get_all_options()) - ds = ts.open(input_spec).result() - shape = ds.domain.shape - num_frames = shape[3] - - with beam.Pipeline(options=pipeline_opt) as p: - frames = p | beam.Create(range(num_frames)) - voxels = frames | beam.ParDo(ReadTSXYChunk(input_spec, dx)) - voxels_grouped = voxels | beam.GroupByKey() - result = voxels_grouped | beam.ParDo(WriteTSXTChunk(output_spec, dx)) - del result - - -@gin.configurable("reshard_tensor_xt2xy") -def reshard_tensor_xt2xy(pipeline_options=gin.REQUIRED, - input_spec=gin.REQUIRED, - output_spec=gin.REQUIRED, - dx=gin.REQUIRED): - """Reshards an T contiguous tensor to XY contiguous. - - Args: - pipeline_options: dictionary of pipeline options - input_spec: Tensorstore input spec. - output_spec: Tensorstore output spec. - dx: The size of a block of x for efficiency. - """ - # pylint: disable=g-import-not-at-top, import-outside-toplevel - import tensorstore as ts - pipeline_opt = PipelineOptions.from_dictionary(pipeline_options) - logging.info(pipeline_opt.get_all_options()) - ds = ts.open(input_spec).result() - shape = ds.domain.shape - yz = [] - for y in range(shape[1]): - for z in range(shape[2]): - yz.append((y, z)) - - with beam.Pipeline(options=pipeline_opt) as p: - frames = p | beam.Create(yz) - voxels = frames | beam.ParDo(ReadTSXTChunk(input_spec, dx)) - voxels_grouped = voxels | beam.GroupByKey() - result = voxels_grouped | beam.ParDo(WriteTSXYChunk(output_spec, dx)) - del result diff --git a/examples/python/beam/run_pipeline.py b/examples/python/beam/run_pipeline.py deleted file mode 100644 index d228ebfb9..000000000 --- a/examples/python/beam/run_pipeline.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2020 The TensorStore Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Runs the beam pipeline. - -python3 run_pipeline.py --gin_config reshard.gin -""" - -import logging -from absl import app -from absl import flags - -# These modules use the import side effect to register factory methods. -# pylint: disable=unused-import -import compute_dfbyf -import compute_percentiles -import gin -import reshard_tensor -# pylint: enable=unused-import - -FLAGS = flags.FLAGS -flags.DEFINE_multi_string("gin_config", [], - "List of paths to the config files.") -flags.DEFINE_multi_string("gin_bindings", [], - "Newline separated list of Gin parameter bindings.") - - -@gin.configurable("run") -def run(steps=gin.REQUIRED): - for step in steps: - logging.info("Running step %s", str(step)) - step() - - -def main(argv): - # unused - del argv - gin.parse_config_files_and_bindings(FLAGS.gin_config, FLAGS.gin_bindings) - run() - -if __name__ == "__main__": - logging.getLogger().setLevel(logging.INFO) - app.run(main) diff --git a/examples/python/beam/setup.py b/examples/python/beam/setup.py deleted file mode 100644 index 3f4263158..000000000 --- a/examples/python/beam/setup.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2020 The TensorStore Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Setup script for beam pipeline.""" -import setuptools - -REQUIRED_PACKAGES = [ - "absl-py==0.9.0", - "gin-config==0.3.0", - "numpy==1.18.3", - "tensorstore==0.1.1" -] - -PY_MODULES = [ - "compute_dfbyf", - "compute_percentiles", - "reshard_tensor" -] - -setuptools.setup( - name="tensorstore_beam_pipeline", - version="0.0.0", - install_requires=REQUIRED_PACKAGES, - py_modules=PY_MODULES, - packages=setuptools.find_packages() -) diff --git a/third_party/pypa/build_requirements_frozen.txt b/third_party/pypa/build_requirements_frozen.txt index 837c5b205..21428d01d 100644 --- a/third_party/pypa/build_requirements_frozen.txt +++ b/third_party/pypa/build_requirements_frozen.txt @@ -1,3 +1,3 @@ # DO NOT EDIT: Generated from build_requirements.txt by generate_workspace.py -numpy==1.24.4 +numpy==1.26.1 ml_dtypes==0.3.1 diff --git a/third_party/pypa/examples_requirements.txt b/third_party/pypa/examples_requirements.txt index cd057e782..3ba2a6688 100644 --- a/third_party/pypa/examples_requirements.txt +++ b/third_party/pypa/examples_requirements.txt @@ -1,3 +1,2 @@ # Packages required to run the examples. -apache-beam -gin-config +numpy \ No newline at end of file diff --git a/third_party/pypa/examples_requirements_frozen.txt b/third_party/pypa/examples_requirements_frozen.txt index 52eff52dd..1937c5564 100644 --- a/third_party/pypa/examples_requirements_frozen.txt +++ b/third_party/pypa/examples_requirements_frozen.txt @@ -1,3 +1,2 @@ # DO NOT EDIT: Generated from examples_requirements.txt by generate_workspace.py -apache-beam==2.51.0 -gin-config==0.5.0 +numpy==1.26.1 diff --git a/third_party/pypa/generate_workspace.py b/third_party/pypa/generate_workspace.py index 7f3361588..13ce161c0 100755 --- a/third_party/pypa/generate_workspace.py +++ b/third_party/pypa/generate_workspace.py @@ -100,7 +100,7 @@ def _get_session(): # https://warehouse.pypa.io/api-reference/json.html def get_pypa_json(package_selector: str) -> Json: """Fetches a Python package or package/version from PyPI.""" - uri = f"https://pypi.python.org/pypi/{package_selector}/json" + uri = f"https://pypi.org/pypi/{package_selector}/json" print(uri) r = _get_session().get(uri, timeout=5) r.raise_for_status() @@ -155,8 +155,14 @@ def _is_suitable_release(release: Json): requires_python = release_pkg.get("requires_python") if requires_python is None: return True - while requires_python.endswith(".*"): - requires_python = requires_python[:-2] + # Trim some invalid substrings + while True: + if requires_python.endswith(".*"): + requires_python = requires_python[:-2] + elif requires_python.endswith(".*"): + requires_python = requires_python[:-1] + else: + break spec = SpecifierSet(requires_python) for v in list(remaining_python_versions): if v in spec: @@ -185,6 +191,7 @@ def _merge_requirements(dest: Requirement, b: Requirement) -> bool: if merged_specifier != dest.specifier: dest.specifier = merged_specifier result = True + # TODO Merge markers? return result diff --git a/third_party/pypa/test_requirements_frozen.txt b/third_party/pypa/test_requirements_frozen.txt index 0f52052f0..13c5a3cdd 100644 --- a/third_party/pypa/test_requirements_frozen.txt +++ b/third_party/pypa/test_requirements_frozen.txt @@ -1,14 +1,14 @@ # DO NOT EDIT: Generated from test_requirements.txt by generate_workspace.py -numpy==1.24.4 -pytest==7.4.2 +numpy==1.26.1 +pytest==7.4.3 pytest-asyncio==0.21.1 -cloudpickle==2.2.1 +cloudpickle==3.0.0 ml_dtypes==0.3.1 grpcio==1.59.0 flask==3.0.0 requests-toolbelt==1.0.0 scalpl==0.4.2 crc32c==2.3.post0 -werkzeug==3.0.0 +werkzeug==3.0.1 googleapis-common-protos==1.61.0 protobuf==4.24.4 diff --git a/third_party/pypa/workspace.bzl b/third_party/pypa/workspace.bzl index f31fffe30..bc1c5c8bd 100644 --- a/third_party/pypa/workspace.bzl +++ b/third_party/pypa/workspace.bzl @@ -11,7 +11,6 @@ def repo(): repo_pypa_absl_py() repo_pypa_alabaster() repo_pypa_annotated_types() - repo_pypa_apache_beam() repo_pypa_appdirs() repo_pypa_appnope() repo_pypa_asttokens() @@ -25,21 +24,13 @@ def repo(): repo_pypa_cloudpickle() repo_pypa_colorama() repo_pypa_crc32c() - repo_pypa_crcmod() repo_pypa_decorator() - repo_pypa_dill() - repo_pypa_dnspython() repo_pypa_docutils() repo_pypa_exceptiongroup() repo_pypa_executing() - repo_pypa_fastavro() - repo_pypa_fasteners() repo_pypa_flask() - repo_pypa_gin_config() repo_pypa_googleapis_common_protos() repo_pypa_grpcio() - repo_pypa_hdfs() - repo_pypa_httplib2() repo_pypa_idna() repo_pypa_imagesize() repo_pypa_importlib_metadata() @@ -48,15 +39,12 @@ def repo(): repo_pypa_itsdangerous() repo_pypa_jedi() repo_pypa_jinja2() - repo_pypa_js2py() repo_pypa_jsonschema() repo_pypa_jsonschema_specifications() repo_pypa_markupsafe() repo_pypa_matplotlib_inline() repo_pypa_ml_dtypes() repo_pypa_numpy() - repo_pypa_objsize() - repo_pypa_orjson() repo_pypa_packaging() repo_pypa_parso() repo_pypa_pexpect() @@ -64,26 +52,17 @@ def repo(): repo_pypa_platformdirs() repo_pypa_pluggy() repo_pypa_prompt_toolkit() - repo_pypa_proto_plus() repo_pypa_protobuf() repo_pypa_ptyprocess() repo_pypa_pure_eval() - repo_pypa_pyarrow() repo_pypa_pydantic() repo_pypa_pydantic_core() repo_pypa_pydantic_extra_types() - repo_pypa_pydot() repo_pypa_pygments() - repo_pypa_pyjsparser() - repo_pypa_pymongo() - repo_pypa_pyparsing() repo_pypa_pytest() repo_pypa_pytest_asyncio() - repo_pypa_python_dateutil() - repo_pypa_pytz() repo_pypa_pyyaml() repo_pypa_referencing() - repo_pypa_regex() repo_pypa_requests() repo_pypa_requests_toolbelt() repo_pypa_rpds_py() @@ -102,13 +81,11 @@ def repo(): repo_pypa_tomli() repo_pypa_traitlets() repo_pypa_typing_extensions() - repo_pypa_tzlocal() repo_pypa_urllib3() repo_pypa_wcwidth() repo_pypa_werkzeug() repo_pypa_yapf() repo_pypa_zipp() - repo_pypa_zstandard() def repo_pypa_absl_py(): maybe( @@ -134,64 +111,6 @@ def repo_pypa_annotated_types(): requirement = "annotated-types==0.6.0", ) -def repo_pypa_apache_beam(): - repo_pypa_cloudpickle() - repo_pypa_crcmod() - repo_pypa_dill() - repo_pypa_fastavro() - repo_pypa_fasteners() - repo_pypa_grpcio() - repo_pypa_hdfs() - repo_pypa_httplib2() - repo_pypa_js2py() - repo_pypa_numpy() - repo_pypa_objsize() - repo_pypa_orjson() - repo_pypa_packaging() - repo_pypa_proto_plus() - repo_pypa_protobuf() - repo_pypa_pyarrow() - repo_pypa_pydot() - repo_pypa_pymongo() - repo_pypa_python_dateutil() - repo_pypa_pytz() - repo_pypa_regex() - repo_pypa_requests() - repo_pypa_typing_extensions() - repo_pypa_zstandard() - maybe( - third_party_python_package, - name = "pypa_apache_beam", - target = "apache_beam", - requirement = "apache-beam==2.51.0", - deps = [ - "@pypa_cloudpickle//:cloudpickle", - "@pypa_crcmod//:crcmod", - "@pypa_dill//:dill", - "@pypa_fastavro//:fastavro", - "@pypa_fasteners//:fasteners", - "@pypa_grpcio//:grpcio", - "@pypa_hdfs//:hdfs", - "@pypa_httplib2//:httplib2", - "@pypa_js2py//:js2py", - "@pypa_numpy//:numpy", - "@pypa_objsize//:objsize", - "@pypa_orjson//:orjson", - "@pypa_packaging//:packaging", - "@pypa_proto_plus//:proto_plus", - "@pypa_protobuf//:protobuf", - "@pypa_pyarrow//:pyarrow", - "@pypa_pydot//:pydot", - "@pypa_pymongo//:pymongo", - "@pypa_python_dateutil//:python_dateutil", - "@pypa_pytz//:pytz", - "@pypa_regex//:regex", - "@pypa_requests//:requests", - "@pypa_typing_extensions//:typing_extensions", - "@pypa_zstandard//:zstandard", - ], - ) - def repo_pypa_appdirs(): maybe( third_party_python_package, @@ -214,7 +133,7 @@ def repo_pypa_asttokens(): third_party_python_package, name = "pypa_asttokens", target = "asttokens", - requirement = "asttokens==2.4.0", + requirement = "asttokens==2.4.1", deps = [ "@pypa_six//:six", ], @@ -233,7 +152,7 @@ def repo_pypa_babel(): third_party_python_package, name = "pypa_babel", target = "babel", - requirement = "babel==2.13.0", + requirement = "babel==2.13.1", ) def repo_pypa_backcall(): @@ -265,7 +184,7 @@ def repo_pypa_charset_normalizer(): third_party_python_package, name = "pypa_charset_normalizer", target = "charset_normalizer", - requirement = "charset-normalizer==3.3.0", + requirement = "charset-normalizer==3.3.1", ) def repo_pypa_click(): @@ -281,7 +200,7 @@ def repo_pypa_cloudpickle(): third_party_python_package, name = "pypa_cloudpickle", target = "cloudpickle", - requirement = "cloudpickle==2.2.1", + requirement = "cloudpickle==3.0.0", ) def repo_pypa_colorama(): @@ -300,14 +219,6 @@ def repo_pypa_crc32c(): requirement = "crc32c==2.3.post0", ) -def repo_pypa_crcmod(): - maybe( - third_party_python_package, - name = "pypa_crcmod", - target = "crcmod", - requirement = "crcmod==1.7", - ) - def repo_pypa_decorator(): maybe( third_party_python_package, @@ -316,22 +227,6 @@ def repo_pypa_decorator(): requirement = "decorator==5.1.1", ) -def repo_pypa_dill(): - maybe( - third_party_python_package, - name = "pypa_dill", - target = "dill", - requirement = "dill==0.3.1.1", - ) - -def repo_pypa_dnspython(): - maybe( - third_party_python_package, - name = "pypa_dnspython", - target = "dnspython", - requirement = "dnspython==2.4.2", - ) - def repo_pypa_docutils(): maybe( third_party_python_package, @@ -356,22 +251,6 @@ def repo_pypa_executing(): requirement = "executing==2.0.0", ) -def repo_pypa_fastavro(): - maybe( - third_party_python_package, - name = "pypa_fastavro", - target = "fastavro", - requirement = "fastavro==1.8.4", - ) - -def repo_pypa_fasteners(): - maybe( - third_party_python_package, - name = "pypa_fasteners", - target = "fasteners", - requirement = "fasteners==0.19", - ) - def repo_pypa_flask(): repo_pypa_blinker() repo_pypa_click() @@ -394,14 +273,6 @@ def repo_pypa_flask(): ], ) -def repo_pypa_gin_config(): - maybe( - third_party_python_package, - name = "pypa_gin_config", - target = "gin_config", - requirement = "gin-config==0.5.0", - ) - def repo_pypa_googleapis_common_protos(): repo_pypa_protobuf() maybe( @@ -422,26 +293,6 @@ def repo_pypa_grpcio(): requirement = "grpcio==1.59.0", ) -def repo_pypa_hdfs(): - maybe( - third_party_python_package, - name = "pypa_hdfs", - target = "hdfs", - requirement = "hdfs==2.7.3", - ) - -def repo_pypa_httplib2(): - repo_pypa_pyparsing() - maybe( - third_party_python_package, - name = "pypa_httplib2", - target = "httplib2", - requirement = "httplib2==0.22.0", - deps = [ - "@pypa_pyparsing//:pyparsing", - ], - ) - def repo_pypa_idna(): maybe( third_party_python_package, @@ -548,22 +399,6 @@ def repo_pypa_jinja2(): ], ) -def repo_pypa_js2py(): - repo_pypa_pyjsparser() - repo_pypa_six() - repo_pypa_tzlocal() - maybe( - third_party_python_package, - name = "pypa_js2py", - target = "js2py", - requirement = "js2py==0.74", - deps = [ - "@pypa_pyjsparser//:pyjsparser", - "@pypa_six//:six", - "@pypa_tzlocal//:tzlocal", - ], - ) - def repo_pypa_jsonschema(): repo_pypa_attrs() repo_pypa_jsonschema_specifications() @@ -631,23 +466,7 @@ def repo_pypa_numpy(): third_party_python_package, name = "pypa_numpy", target = "numpy", - requirement = "numpy==1.24.4", - ) - -def repo_pypa_objsize(): - maybe( - third_party_python_package, - name = "pypa_objsize", - target = "objsize", - requirement = "objsize==0.6.1", - ) - -def repo_pypa_orjson(): - maybe( - third_party_python_package, - name = "pypa_orjson", - target = "orjson", - requirement = "orjson==3.9.9", + requirement = "numpy==1.26.1", ) def repo_pypa_packaging(): @@ -714,18 +533,6 @@ def repo_pypa_prompt_toolkit(): ], ) -def repo_pypa_proto_plus(): - repo_pypa_protobuf() - maybe( - third_party_python_package, - name = "pypa_proto_plus", - target = "proto_plus", - requirement = "proto-plus==1.22.3", - deps = [ - "@pypa_protobuf//:protobuf", - ], - ) - def repo_pypa_protobuf(): maybe( third_party_python_package, @@ -750,18 +557,6 @@ def repo_pypa_pure_eval(): requirement = "pure-eval==0.2.2", ) -def repo_pypa_pyarrow(): - repo_pypa_numpy() - maybe( - third_party_python_package, - name = "pypa_pyarrow", - target = "pyarrow", - requirement = "pyarrow==11.0.0", - deps = [ - "@pypa_numpy//:numpy", - ], - ) - def repo_pypa_pydantic(): repo_pypa_annotated_types() repo_pypa_pydantic_core() @@ -802,18 +597,6 @@ def repo_pypa_pydantic_extra_types(): ], ) -def repo_pypa_pydot(): - repo_pypa_pyparsing() - maybe( - third_party_python_package, - name = "pypa_pydot", - target = "pydot", - requirement = "pydot==1.4.2", - deps = [ - "@pypa_pyparsing//:pyparsing", - ], - ) - def repo_pypa_pygments(): maybe( third_party_python_package, @@ -822,34 +605,6 @@ def repo_pypa_pygments(): requirement = "pygments==2.16.1", ) -def repo_pypa_pyjsparser(): - maybe( - third_party_python_package, - name = "pypa_pyjsparser", - target = "pyjsparser", - requirement = "pyjsparser==2.7.1", - ) - -def repo_pypa_pymongo(): - repo_pypa_dnspython() - maybe( - third_party_python_package, - name = "pypa_pymongo", - target = "pymongo", - requirement = "pymongo==4.5.0", - deps = [ - "@pypa_dnspython//:dnspython", - ], - ) - -def repo_pypa_pyparsing(): - maybe( - third_party_python_package, - name = "pypa_pyparsing", - target = "pyparsing", - requirement = "pyparsing==3.1.1", - ) - def repo_pypa_pytest(): repo_pypa_colorama() repo_pypa_exceptiongroup() @@ -861,7 +616,7 @@ def repo_pypa_pytest(): third_party_python_package, name = "pypa_pytest", target = "pytest", - requirement = "pytest==7.4.2", + requirement = "pytest==7.4.3", deps = [ "@pypa_colorama//:colorama", "@pypa_exceptiongroup//:exceptiongroup", @@ -884,26 +639,6 @@ def repo_pypa_pytest_asyncio(): ], ) -def repo_pypa_python_dateutil(): - repo_pypa_six() - maybe( - third_party_python_package, - name = "pypa_python_dateutil", - target = "python_dateutil", - requirement = "python-dateutil==2.8.2", - deps = [ - "@pypa_six//:six", - ], - ) - -def repo_pypa_pytz(): - maybe( - third_party_python_package, - name = "pypa_pytz", - target = "pytz", - requirement = "pytz==2023.3.post1", - ) - def repo_pypa_pyyaml(): maybe( third_party_python_package, @@ -926,14 +661,6 @@ def repo_pypa_referencing(): ], ) -def repo_pypa_regex(): - maybe( - third_party_python_package, - name = "pypa_regex", - target = "regex", - requirement = "regex==2023.10.3", - ) - def repo_pypa_requests(): repo_pypa_certifi() repo_pypa_charset_normalizer() @@ -1141,7 +868,7 @@ def repo_pypa_traitlets(): third_party_python_package, name = "pypa_traitlets", target = "traitlets", - requirement = "traitlets==5.11.2", + requirement = "traitlets==5.12.0", ) def repo_pypa_typing_extensions(): @@ -1152,14 +879,6 @@ def repo_pypa_typing_extensions(): requirement = "typing-extensions==4.8.0", ) -def repo_pypa_tzlocal(): - maybe( - third_party_python_package, - name = "pypa_tzlocal", - target = "tzlocal", - requirement = "tzlocal==5.1", - ) - def repo_pypa_urllib3(): maybe( third_party_python_package, @@ -1182,7 +901,7 @@ def repo_pypa_werkzeug(): third_party_python_package, name = "pypa_werkzeug", target = "werkzeug", - requirement = "werkzeug==3.0.0", + requirement = "werkzeug==3.0.1", deps = [ "@pypa_markupsafe//:markupsafe", ], @@ -1211,11 +930,3 @@ def repo_pypa_zipp(): target = "zipp", requirement = "zipp==3.17.0", ) - -def repo_pypa_zstandard(): - maybe( - third_party_python_package, - name = "pypa_zstandard", - target = "zstandard", - requirement = "zstandard==0.21.0", - )