62 lines
1.7 KiB
Python
62 lines
1.7 KiB
Python
from __future__ import annotations
|
|
|
|
import csv
|
|
import json
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Iterable, List
|
|
|
|
REQUIRED_FIELDS = ["file_id", "file_name", "data_type", "data_format", "size", "md5"]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ManifestRecord:
|
|
file_id: str
|
|
file_name: str
|
|
data_type: str
|
|
data_format: str
|
|
size: int
|
|
md5: str
|
|
|
|
|
|
def _validate_record(rec: ManifestRecord) -> None:
|
|
if not rec.file_id or not rec.file_name:
|
|
raise ValueError("file_id and file_name are required")
|
|
if rec.size < 0:
|
|
raise ValueError("size must be non-negative")
|
|
|
|
|
|
def write_manifest(records: Iterable[ManifestRecord], path: Path, fmt: str = "tsv") -> None:
|
|
path = Path(path)
|
|
if fmt not in {"tsv", "json"}:
|
|
raise ValueError("fmt must be 'tsv' or 'json'")
|
|
records = list(records)
|
|
for rec in records:
|
|
_validate_record(rec)
|
|
|
|
if fmt == "json":
|
|
data = [rec.__dict__ for rec in records]
|
|
path.write_text(json.dumps(data, indent=2))
|
|
return
|
|
|
|
with path.open("w", newline="") as f:
|
|
writer = csv.DictWriter(f, fieldnames=REQUIRED_FIELDS, delimiter="\t")
|
|
writer.writeheader()
|
|
for rec in records:
|
|
writer.writerow(rec.__dict__)
|
|
|
|
|
|
def load_manifest(path: Path) -> List[ManifestRecord]:
|
|
path = Path(path)
|
|
if path.suffix.lower() == ".json":
|
|
data = json.loads(path.read_text())
|
|
return [ManifestRecord(**row) for row in data]
|
|
|
|
with path.open("r", newline="") as f:
|
|
reader = csv.DictReader(f, delimiter="\t")
|
|
records: List[ManifestRecord] = []
|
|
for row in reader:
|
|
row["size"] = int(row["size"])
|
|
records.append(ManifestRecord(**row))
|
|
return records
|