diff --git a/cli/__main__.py b/cli/__main__.py index 757f158..32b8d7f 100644 --- a/cli/__main__.py +++ b/cli/__main__.py @@ -429,19 +429,6 @@ def _generate_password(length: int = 10) -> str: "prefix", type=str, ) -@argument( - "topics_path", - type=PathType( - exists=True, - file_okay=True, - dir_okay=False, - writable=False, - readable=True, - resolve_path=True, - allow_dash=False, - path_type=Path, - ), -) @argument( "pool_path", type=PathType( @@ -462,7 +449,6 @@ def prepare_relevance_judgments( guidelines_path: Path | None, extra_supervisors: Sequence[str], prefix: str, - topics_path: Path, pool_path: Sequence[Path], ) -> None: """ @@ -475,6 +461,9 @@ def prepare_relevance_judgments( raise ValueError("Empty project prefix.") project_prefix = slugify(prefix) + if len(pool_path) == 0: + raise ValueError("Empty pool_path.") + guidelines: str if guidelines_path is not None: with guidelines_path.open("rt") as file: @@ -488,7 +477,6 @@ def prepare_relevance_judgments( password=doccano_password, ) echo("Successfully authenticated with Doccano API.") - pool = concat( read_json( open(path, "r"), @@ -511,20 +499,8 @@ def prepare_relevance_judgments( ) echo(f"Found {len(pool)} pooled documents.") - # Read the topics. - topics = read_xml(topics_path, dtype=str) - echo(f"Found {len(topics)} topics.") - - # Merge in groups from the topics - pool = pool.merge( - topics[["number", "group"]], - how="left", - left_on="query_id", - right_on="number", - ) - groups: set[str] = set(pool["group"].drop_duplicates().to_list()) - echo(f"Found {len(groups)} groups.") + echo(f"Found {len(groups)} groups:", groups) # Create mapping of groups to usernames and project names. group_user_names: Mapping[str, str] = {