Skip to content

Commit

Permalink
Fix CorpusReplaceOrthFromReferenceCorpus (#290)
Browse files Browse the repository at this point in the history
* followup fix

* first draft test

* tests

* updates

* dont change search
  • Loading branch information
Atticus1806 committed Jul 26, 2022
1 parent e56dbf7 commit 1e1481d
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 1 deletion.
2 changes: 1 addition & 1 deletion corpus/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def run(self):
c.load(self.bliss_corpus.get_path())

for s in c.segments():
tag in s.fullname()
tag = s.fullname()
assert tag in orths.keys(), "Segment %s not found in reference corpus" % tag
s.orth = orths[tag]

Expand Down
12 changes: 12 additions & 0 deletions tests/job_tests/returnn/files/test_replace.corpus.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="utf-8"?>
<corpus name="job-testing">
<speaker-description name="116">
<gender>male</gender>
</speaker-description>
<recording name="116-288045-0000" audio="sample.wav">
<speaker name="116"/>
<segment name="116-288045-0000" start="0.0000" end="10.6500">
<orth> AS I APPROACHED [non-speech] THE CITY, I HEARD BELLS RINGING [noise] [noise] AND A LITTLE LATER I FOUND THE THIS DOES NOT BELONG HERE! BUT I WANT IT HERE! STREETS ASTIR WITH THRONGS OF WELL DRESSED PEOPLE IN FAMILY GROUPS WENDING THEIR WAY HITHER AND THITHER!!! </orth>
</segment>
</recording>
</corpus>
12 changes: 12 additions & 0 deletions tests/job_tests/returnn/files/test_replace.corrupt.corpus.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="utf-8"?>
<corpus name="job-testing">
<speaker-description name="116">
<gender>male</gender>
</speaker-description>
<recording name="116-288045-0000" audio="sample.wav">
<speaker name="116"/>
<segment name="116-288045-0000" start="0.0000" end="10.6500">
<orth> AS I APPROACHED [non-speech] THE CITY, I HEARD BELLS RINGING [noise] [noise] AND A LITTLE LATER THIS DOES NOT BELONG HERE I FOUND THE THIS DOES NOT BELONG HERE! BUT I WANT IT HERE! STREETS ASTIR WITH THRONGS OF WELL DRESSED PEOPLE IN FAMILY GROUPS WENDING THEIR WAY HITHER AND THITHER!!!</orth>
</segment>
</recording>
</corpus>
23 changes: 23 additions & 0 deletions tests/job_tests/returnn/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
import tempfile
import filecmp
from sisyphus import setup_path

from i6_core.corpus.convert import CorpusReplaceOrthFromReferenceCorpus

Path = setup_path(__package__)


def test_corpus_replace_orth_from_reference_corpus():

with tempfile.TemporaryDirectory() as tmpdir:
reference_corpus = Path("files/test_replace.corpus.xml")
bliss_corpus_corrupt = Path("files/test_replace.corrupt.corpus.xml")

replace_job = CorpusReplaceOrthFromReferenceCorpus(
bliss_corpus=bliss_corpus_corrupt, reference_bliss_corpus=reference_corpus
)
replace_job.out_corpus = Path(os.path.join(tmpdir, "replaced.corpus.xml"))
replace_job.run()

assert filecmp.cmp(replace_job.out_corpus, reference_corpus, shallow=False)

0 comments on commit 1e1481d

Please sign in to comment.