tcga-downloader/examples/validate_downloads.py
yunpeng.zhang a01a59b371
Some checks failed
CI / Lint (push) Failing after 9m32s
CI / Test (3.11) (push) Successful in 6m41s
CI / Test (3.12) (push) Successful in 4m21s
feat: add interactive cli
2026-02-09 13:13:39 +08:00

63 lines
1.7 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Example: Validate manifest and downloaded files.
"""
from pathlib import Path
from tcga_downloader.manifest import (
load_manifest,
validate_files_against_manifest,
validate_manifest,
)
def main():
manifest_path = Path("manifest.tsv")
data_dir = Path("data")
print("Loading manifest...")
records = load_manifest(manifest_path)
print(f"Loaded {len(records)} records from manifest")
print("\nValidating manifest structure...")
errors = validate_manifest(records)
if errors:
print("Manifest validation errors found:")
for error in errors:
print(f" - {error}")
else:
print("Manifest structure is valid!")
if data_dir.exists():
print("\nValidating downloaded files against manifest...")
missing, checksum_errors = validate_files_against_manifest(records, data_dir)
if missing:
print(f"\nMissing files ({len(missing)}):")
for filename in missing[:10]:
print(f" - {filename}")
if len(missing) > 10:
print(f" ... and {len(missing) - 10} more")
if checksum_errors:
print(f"\nChecksum errors ({len(checksum_errors)}):")
for error in checksum_errors[:10]:
print(f" - {error}")
if len(checksum_errors) > 10:
print(f" ... and {len(checksum_errors) - 10} more")
if not missing and not checksum_errors:
print("\nAll files are present and checksums match!")
else:
print("\nValidation failed. Please re-download missing files.")
else:
print(f"\nData directory {data_dir} does not exist. Skipping file validation.")
if __name__ == "__main__":
main()