feat: add manifest read/write
This commit is contained in:
parent
09a5c0989f
commit
e6b2a174c5
57
tcga_downloader/manifest.py
Normal file
57
tcga_downloader/manifest.py
Normal file
@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List
|
||||
|
||||
REQUIRED_FIELDS = ["file_id", "file_name", "data_type", "data_format", "size", "md5"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ManifestRecord:
|
||||
file_id: str
|
||||
file_name: str
|
||||
data_type: str
|
||||
data_format: str
|
||||
size: int
|
||||
md5: str
|
||||
|
||||
|
||||
def _validate_record(rec: ManifestRecord) -> None:
|
||||
if not rec.file_id or not rec.file_name:
|
||||
raise ValueError("file_id and file_name are required")
|
||||
if rec.size < 0:
|
||||
raise ValueError("size must be non-negative")
|
||||
|
||||
|
||||
def write_manifest(records: Iterable[ManifestRecord], path: Path, fmt: str = "tsv") -> None:
|
||||
path = Path(path)
|
||||
if fmt not in {"tsv", "json"}:
|
||||
raise ValueError("fmt must be 'tsv' or 'json'")
|
||||
records = list(records)
|
||||
for rec in records:
|
||||
_validate_record(rec)
|
||||
|
||||
if fmt == "json":
|
||||
data = [rec.__dict__ for rec in records]
|
||||
path.write_text(json.dumps(data, indent=2))
|
||||
return
|
||||
|
||||
with path.open("w", newline="") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=REQUIRED_FIELDS, delimiter="\t")
|
||||
writer.writeheader()
|
||||
for rec in records:
|
||||
writer.writerow(rec.__dict__)
|
||||
|
||||
|
||||
def load_manifest(path: Path) -> List[ManifestRecord]:
|
||||
path = Path(path)
|
||||
if path.suffix.lower() == ".json":
|
||||
data = json.loads(path.read_text())
|
||||
return [ManifestRecord(**row) for row in data]
|
||||
|
||||
with path.open("r", newline="") as f:
|
||||
reader = csv.DictReader(f, delimiter="\t")
|
||||
return [ManifestRecord(**row) for row in reader]
|
||||
18
tests/test_manifest.py
Normal file
18
tests/test_manifest.py
Normal file
@ -0,0 +1,18 @@
|
||||
from tcga_downloader.manifest import ManifestRecord, write_manifest, load_manifest
|
||||
|
||||
|
||||
def test_manifest_roundtrip_tsv(tmp_path):
|
||||
records = [
|
||||
ManifestRecord(
|
||||
file_id="f1",
|
||||
file_name="a.tsv",
|
||||
data_type="Gene Expression",
|
||||
data_format="TSV",
|
||||
size=123,
|
||||
md5="abc",
|
||||
)
|
||||
]
|
||||
path = tmp_path / "m.tsv"
|
||||
write_manifest(records, path, fmt="tsv")
|
||||
loaded = load_manifest(path)
|
||||
assert loaded == records
|
||||
Loading…
Reference in New Issue
Block a user