feat: add manifest read/write
This commit is contained in:
parent
09a5c0989f
commit
e6b2a174c5
57
tcga_downloader/manifest.py
Normal file
57
tcga_downloader/manifest.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable, List
|
||||||
|
|
||||||
|
REQUIRED_FIELDS = ["file_id", "file_name", "data_type", "data_format", "size", "md5"]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ManifestRecord:
|
||||||
|
file_id: str
|
||||||
|
file_name: str
|
||||||
|
data_type: str
|
||||||
|
data_format: str
|
||||||
|
size: int
|
||||||
|
md5: str
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_record(rec: ManifestRecord) -> None:
|
||||||
|
if not rec.file_id or not rec.file_name:
|
||||||
|
raise ValueError("file_id and file_name are required")
|
||||||
|
if rec.size < 0:
|
||||||
|
raise ValueError("size must be non-negative")
|
||||||
|
|
||||||
|
|
||||||
|
def write_manifest(records: Iterable[ManifestRecord], path: Path, fmt: str = "tsv") -> None:
|
||||||
|
path = Path(path)
|
||||||
|
if fmt not in {"tsv", "json"}:
|
||||||
|
raise ValueError("fmt must be 'tsv' or 'json'")
|
||||||
|
records = list(records)
|
||||||
|
for rec in records:
|
||||||
|
_validate_record(rec)
|
||||||
|
|
||||||
|
if fmt == "json":
|
||||||
|
data = [rec.__dict__ for rec in records]
|
||||||
|
path.write_text(json.dumps(data, indent=2))
|
||||||
|
return
|
||||||
|
|
||||||
|
with path.open("w", newline="") as f:
|
||||||
|
writer = csv.DictWriter(f, fieldnames=REQUIRED_FIELDS, delimiter="\t")
|
||||||
|
writer.writeheader()
|
||||||
|
for rec in records:
|
||||||
|
writer.writerow(rec.__dict__)
|
||||||
|
|
||||||
|
|
||||||
|
def load_manifest(path: Path) -> List[ManifestRecord]:
|
||||||
|
path = Path(path)
|
||||||
|
if path.suffix.lower() == ".json":
|
||||||
|
data = json.loads(path.read_text())
|
||||||
|
return [ManifestRecord(**row) for row in data]
|
||||||
|
|
||||||
|
with path.open("r", newline="") as f:
|
||||||
|
reader = csv.DictReader(f, delimiter="\t")
|
||||||
|
return [ManifestRecord(**row) for row in reader]
|
||||||
18
tests/test_manifest.py
Normal file
18
tests/test_manifest.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from tcga_downloader.manifest import ManifestRecord, write_manifest, load_manifest
|
||||||
|
|
||||||
|
|
||||||
|
def test_manifest_roundtrip_tsv(tmp_path):
|
||||||
|
records = [
|
||||||
|
ManifestRecord(
|
||||||
|
file_id="f1",
|
||||||
|
file_name="a.tsv",
|
||||||
|
data_type="Gene Expression",
|
||||||
|
data_format="TSV",
|
||||||
|
size=123,
|
||||||
|
md5="abc",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
path = tmp_path / "m.tsv"
|
||||||
|
write_manifest(records, path, fmt="tsv")
|
||||||
|
loaded = load_manifest(path)
|
||||||
|
assert loaded == records
|
||||||
Loading…
Reference in New Issue
Block a user