Source code for fvdb_reality_capture.tools._download_example_data

# Copyright Contributors to the OpenVDB Project
# SPDX-License-Identifier: Apache-2.0
#

import logging
import pathlib
import shutil

import requests
import tqdm


def _download_one_dataset(dataset_name: str, dataset_url: str, dataset_download_path: pathlib.Path):
    logger = logging.getLogger(f"{__name__}.download_example_data")
    dataset_filename = pathlib.Path(dataset_url).name
    dataset_file_path = dataset_download_path / dataset_filename

    if dataset_download_path.exists():
        logger.warning(f"Dataset directory {dataset_download_path} already exists. Skipping download.")
        return

    dataset_download_path.mkdir(parents=True, exist_ok=True)

    response = requests.get(dataset_url, stream=True)
    if response.status_code == 200:
        total_size = int(response.headers.get("content-length", 0))
        assert total_size > 0, "Downloaded file is empty."
        logger.info(f"Downloading dataset {dataset_name} from {dataset_url} to {dataset_file_path}")
        with open(dataset_file_path, "wb") as f:
            with tqdm.tqdm(
                total=total_size,
                unit="B",
                unit_scale=True,
                desc=f"Downloading dataset {dataset_name}",
            ) as progress_bar:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
                    progress_bar.update(len(chunk))
        logger.info("Dataset downloaded successfully.")
    else:
        raise RuntimeError(f"Failed to download weights from {dataset_url}. Status code: {response.status_code}")

    logger.info(f"Extracting archive {dataset_filename} to {dataset_download_path}.")
    shutil.unpack_archive(dataset_file_path, extract_dir=dataset_download_path)



[docs]
def download_example_data(dataset="all", download_path: str | pathlib.Path = pathlib.Path.cwd() / "data"):
    """
    Download example datasets for fvdb-reality-capture.

    The fvdb-reality-capture package provides several example datasets that can be used for testing and experimentation.
    This function allows users to easily download these datasets for use in their own projects.

    The available datasets are:

    - ``"mipnerf360"``: A dataset of 360-degree images originally captured for the
      `Mip-NeRF 360 <https://arxiv.org/abs/2111.12077>`_ paper.
    - ``"gettysburg"``: A dataset featuring an aeriel flyover of the city of Gettysburg, Pennsylvania.
    - ``"safety_park"``: An aerial drone orbit of a safety training park.
    - ``"miris_factory"``: A dataset of synthetically rendered images of a car factory, generated by
      `Miris <https://miris.com/>`_.
    - ``"all"``: Download all available datasets.

    Each dataset is downloaded as a compressed archive and extracted into its own subdirectory in the path specified
    by ``download_path``.

    Args:
        dataset (str): The name of the dataset to download. Use ``"all"`` to download all datasets.
            Default is ``"all"``.
        download_path (str | pathlib.Path): The directory where datasets will be downloaded.
            Default is the ``data`` subdirectory in the current working directory.
    """

    # dataset urls
    dataset_urls = {
        "mipnerf360": "https://fvdb-data.s3.us-east-2.amazonaws.com/fvdb-reality-capture/360_v2.zip",
        "gettysburg": "https://fvdb-data.s3.us-east-2.amazonaws.com/fvdb-reality-capture/gettysburg.zip",
        "safety_park": "https://fvdb-data.s3.us-east-2.amazonaws.com/fvdb-reality-capture/safety_park.zip",
        "miris_factory": "https://fvdb-data.s3.us-east-2.amazonaws.com/fvdb-reality-capture/miris_factory.zip",
    }

    # where each dataset goes
    dataset_directories = {
        "mipnerf360": "360_v2",
        "gettysburg": "gettysburg",
        "safety_park": "safety_park",
        "miris_factory": "miris_factory",
    }

    if isinstance(download_path, str):
        download_path = pathlib.Path(download_path)
    download_path.mkdir(parents=True, exist_ok=True)

    if dataset not in dataset_urls.keys() and dataset != "all":
        raise ValueError(f"Unknown dataset {dataset}. Supported datasets are {list(dataset_urls.keys())} and 'all'.")

    if dataset == "all":
        for dataset_name in dataset_urls:
            dataset_download_path = download_path / dataset_directories[dataset_name]
            _download_one_dataset(dataset_name, dataset_urls[dataset_name], dataset_download_path)
    else:
        dataset_download_path = download_path / dataset_directories[dataset]
        _download_one_dataset(dataset, dataset_urls[dataset], dataset_download_path)