Skip to content

Commit

Permalink
fix: Update metadata to include HLS granule ID and links to Fmask lay…
Browse files Browse the repository at this point in the history
…er (#48)

* Include Input_HLS_GranuleUR additional attribute

* Fix some typos found by typos_lsp

* Include links to Fmask from HLS granule in OnlineAccessURLs

* format & lint

* Bump lxml for indenting and numpy for eaiser Mac ARM installs

* Explain pin for numpy

* self review

* fix typo

* Revert version changes & figure out how to indent without py3.9 or lxml>=4.5
  • Loading branch information
ceholden authored Jan 8, 2025
1 parent 2856231 commit 56d2d9a
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 9 deletions.
61 changes: 56 additions & 5 deletions hls_vi/generate_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import re
import sys
from xml.dom import minidom

from datetime import datetime, timezone
from pathlib import Path
Expand Down Expand Up @@ -92,7 +93,13 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None:
processing_time = tags["HLS_VI_PROCESSING_TIME"]

granule_ur = tree.find("GranuleUR")
input_granule_ur = granule_ur.text
granule_ur.text = granule_ur.text.replace("HLS", "HLS-VI")
set_additional_attribute(
tree.find("AdditionalAttributes"),
"Input_HLS_GranuleUR",
input_granule_ur,
)

time_format = "%Y-%m-%dT%H:%M:%S.%fZ"
formatted_date = datetime.now(timezone.utc).strftime(time_format)
Expand Down Expand Up @@ -125,24 +132,35 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None:

tree.find("DataFormat").text = "COG"

append_fmask_online_access_urls(
tree.find("OnlineAccessURLs"),
input_granule_ur,
)

with (
importlib_resources.files("hls_vi")
/ "schema"
/ "Granule.xsd" # pyright: ignore[reportOperatorIssue]
).open() as xsd:
ET.XMLSchema(file=xsd).assertValid(tree)

tree.write(
str(output_dir / metadata_path.name.replace("HLS", "HLS-VI")),
encoding="utf-8",
xml_declaration=True,
# Python 3.9 or `lxml==4.5` add an `indent()` function to nicely format our XML
# Alas we cannot use those yet, so rely on this approach using `xml.dom.minidom`
dom = minidom.parseString(
ET.tostring(tree, xml_declaration=True, pretty_print=False)
)
pretty_xml = os.linesep.join(
[line for line in dom.toprettyxml(indent=" ").splitlines() if line.strip()]
)

dest = output_dir / metadata_path.name.replace("HLS", "HLS-VI")
dest.write_text(pretty_xml, encoding="utf-8")


def normalize_additional_attributes(container: ElementBase) -> None:
"""Normalize additional attribute values.
On rare occassions, granule data is split and recombined upstream. When this
On rare occasions, granule data is split and recombined upstream. When this
occurs, the associated metadata is also split and recombined, resulting in values
for additional attributes that are created by joining the separate parts with the
string `" + "`.
Expand Down Expand Up @@ -193,6 +211,39 @@ def set_additional_attribute(attrs: ElementBase, name: str, value: str) -> None:
attrs.append(attr)


def append_fmask_online_access_urls(
access_urls: ElementBase, hls_granule_ur: str
) -> None:
"""Include links to Fmask layer from HLS granule in metadata
This is intended to help users find the relevant Fmask band without
having to duplicate it into the HLS-VI product. See,
https://github.com/NASA-IMPACT/hls-vi/issues/47
"""
prefix = "HLSL30.020" if hls_granule_ur.startswith("HLS.L30") else "HLSS30.020"

http_attr = Element("OnlineAccessURL", None, None)
http_attr_url = Element("URL", None, None)
http_attr_url.text = f"https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/{prefix}/{hls_granule_ur}/{hls_granule_ur}.Fmask.tif" # noqa: E501
http_attr_desc = Element("URLDescription", None, None)
http_attr_desc.text = f"Download Fmask quality layer {hls_granule_ur}.Fmask.tif"
http_attr.append(http_attr_url)
http_attr.append(http_attr_desc)

s3_attr = Element("OnlineAccessURL", None, None)
s3_attr_url = Element("URL", None, None)
s3_attr_url.text = (
f"s3://lp-prod-protected/{prefix}/{hls_granule_ur}/{hls_granule_ur}.Fmask.tif"
)
s3_attr_desc = Element("URLDescription", None, None)
s3_attr_desc.text = f"This link provides direct download access via S3 to the Fmask quality layer {hls_granule_ur}.Fmask.tif" # noqa: E501
s3_attr.append(s3_attr_url)
s3_attr.append(s3_attr_desc)

access_urls.append(http_attr)
access_urls.append(s3_attr)


def parse_args() -> Tuple[Path, Path]:
short_options = "i:o:"
long_options = ["instrument=", "inputdir=", "outputdir="]
Expand Down
3 changes: 2 additions & 1 deletion hls_vi/schema/Granule.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="GranuleDelete" type="GranuleDelete">
</xs:element>
<xs:element name="GranuleUR" type="GranuleUR"></xs:element>
<xs:element name="Input_GranuleUR" type="GranuleUR"></xs:element>
<xs:complexType name="GranuleMetaDataFile">
<xs:annotation>
<xs:documentation />
Expand Down Expand Up @@ -1298,7 +1299,7 @@ xmlns:xs="http://www.w3.org/2001/XMLSchema">
type="ListOfAdditionalAttributeValues">
<xs:annotation>
<xs:documentation>The ordered list of values of the
additioanl attribute for this granule. The values will be
additional attribute for this granule. The values will be
kept in the order which they appear.</xs:documentation>
</xs:annotation>
</xs:element>
Expand Down
2 changes: 1 addition & 1 deletion hls_vi/schema/MetadataCommon.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@
</xs:sequence>
</xs:choice>
</xs:complexType>
<!-- #mark Emtpy Type -->
<!-- #mark Empty Type -->
<xs:complexType name="EmptyType">
<xs:annotation>
<xs:documentation>The element should contain no children. In
Expand Down
16 changes: 15 additions & 1 deletion tests/fixtures/HLS-VI.L30.T06WVS.2024120T211159.v2.0.cmr.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<DataGranule>
<ProducerGranuleId>HLS-VI.L30.T06WVS.2024120T211159</ProducerGranuleId>
<DayNightFlag>DAY</DayNightFlag>
<ProductionDateTime>UPDATE HLS Prodution DATETIME</ProductionDateTime>
<ProductionDateTime>UPDATE HLS Production DATETIME</ProductionDateTime>
<LocalVersionId>2.0</LocalVersionId>
</DataGranule>
<Temporal>
Expand Down Expand Up @@ -240,8 +240,22 @@
<Value>https://doi.org</Value>
</Values>
</AdditionalAttribute>
<AdditionalAttribute>
<Name>Input_HLS_GranuleUR</Name>
<Values>
<Value>HLS.L30.T06WVS.2024120T211159.v2.0</Value>
</Values>
</AdditionalAttribute>
</AdditionalAttributes>
<OnlineAccessURLs>
<OnlineAccessURL>
<URL>https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T06WVS.2024120T211159.v2.0/HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URL>
<URLDescription>Download Fmask quality layer HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
<OnlineAccessURL>
<URL>s3://lp-prod-protected/HLSL30.020/HLS.L30.T06WVS.2024120T211159.v2.0/HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URL>
<URLDescription>This link provides direct download access via S3 to the Fmask quality layer HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
</OnlineAccessURLs>
<OnlineResources>
</OnlineResources>
Expand Down
16 changes: 15 additions & 1 deletion tests/fixtures/HLS-VI.S30.T13RCN.2024128T173909.v2.0.cmr.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<DataGranule>
<ProducerGranuleId>HLS-VI.S30.T13RCN.2024128T173909</ProducerGranuleId>
<DayNightFlag>DAY</DayNightFlag>
<ProductionDateTime>UPDATE HLS Prodution DATETIME</ProductionDateTime>
<ProductionDateTime>UPDATE HLS Production DATETIME</ProductionDateTime>
<LocalVersionId>2.0</LocalVersionId>
</DataGranule>
<Temporal>
Expand Down Expand Up @@ -302,8 +302,22 @@
<Value>https://doi.org</Value>
</Values>
</AdditionalAttribute>
<AdditionalAttribute>
<Name>Input_HLS_GranuleUR</Name>
<Values>
<Value>HLS.S30.T13RCN.2024128T173909.v2.0</Value>
</Values>
</AdditionalAttribute>
</AdditionalAttributes>
<OnlineAccessURLs>
<OnlineAccessURL>
<URL>https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13RCN.2024128T173909.v2.0/HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URL>
<URLDescription>Download Fmask quality layer HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
<OnlineAccessURL>
<URL>s3://lp-prod-protected/HLSS30.020/HLS.S30.T13RCN.2024128T173909.v2.0/HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URL>
<URLDescription>This link provides direct download access via S3 to the Fmask quality layer HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
</OnlineAccessURLs>
<OnlineResources>
</OnlineResources>
Expand Down

0 comments on commit 56d2d9a

Please sign in to comment.