Skip to content

Metadata

datamate.metadata

Module for handling metadata reading, writing and validation for Directory objects.

MetadataError

Bases: ValueError

Base class for metadata-related errors.

Source code in datamate/metadata.py
22
23
24
25
class MetadataError(ValueError):
    """Base class for metadata-related errors."""

    pass

MetadataParseError

Bases: MetadataError

Raised when metadata YAML cannot be parsed.

Source code in datamate/metadata.py
28
29
30
31
class MetadataParseError(MetadataError):
    """Raised when metadata YAML cannot be parsed."""

    pass

MetadataValidationError

Bases: MetadataError

Raised when metadata structure is invalid.

Source code in datamate/metadata.py
34
35
36
37
class MetadataValidationError(MetadataError):
    """Raised when metadata structure is invalid."""

    pass

read_meta

read_meta(path, retries=5)

Read and validate metadata from a directory’s _meta.yaml file.

Parameters:

Name Type Description Default
path Path

Directory path containing _meta.yaml

required
retries int

Number of retry attempts for transient failures

5

Returns:

Type Description
Namespace

Namespace containing validated metadata with config and status

Raises:

Type Description
MetadataParseError

If YAML parsing fails

MetadataValidationError

If metadata structure is invalid

FileNotFoundError

If _meta.yaml doesn’t exist

NotADirectoryError

If path is not a directory

Note

Returns default metadata (empty config, status=”done”) for non-existent paths

Source code in datamate/metadata.py
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def read_meta(path: Path, retries: int = 5) -> Namespace:
    """Read and validate metadata from a directory's `_meta.yaml` file.

    Args:
        path: Directory path containing `_meta.yaml`
        retries: Number of retry attempts for transient failures

    Returns:
        Namespace containing validated metadata with `config` and `status`

    Raises:
        MetadataParseError: If YAML parsing fails
        MetadataValidationError: If metadata structure is invalid
        FileNotFoundError: If `_meta.yaml` doesn't exist
        NotADirectoryError: If path is not a directory

    Note:
        Returns default metadata (empty config, status="done") for non-existent paths
    """
    meta_path = path / "_meta.yaml"

    try:
        yaml = YAML()
        with open(meta_path, "r") as f:
            try:
                meta = yaml.load(f)
            except Exception as e:
                raise MetadataParseError(f"Failed to parse {meta_path}: {e}") from e

        meta = namespacify(meta)

        # Validate metadata structure
        if not isinstance(meta, Namespace):
            raise MetadataValidationError(
                f"Metadata must be a Namespace, got {type(meta)}"
            )

        if not hasattr(meta, "config"):
            raise MetadataValidationError(
                f"Missing required 'config' field in {meta_path}"
            )

        if not isinstance(meta.config, Namespace):
            raise MetadataValidationError(
                f"'config' must be a Namespace in {meta_path}"
            )

        if not hasattr(meta, "status"):
            raise MetadataValidationError(
                f"Missing required 'status' field in {meta_path}"
            )

        if not isinstance(meta.status, str):
            raise MetadataValidationError(f"'status' must be a string in {meta_path}")

        # Handle legacy 'spec' field
        if hasattr(meta, "spec"):
            if not isinstance(meta.spec, Namespace):
                raise MetadataValidationError(
                    f"Legacy 'spec' must be a Namespace in {meta_path}"
                )
            warnings.warn(
                f"Directory {path} uses legacy 'spec' instead of 'meta'. Please update.",
                DeprecationWarning,
                stacklevel=2,
            )
            meta["config"] = meta.pop("spec")

        return meta

    except (MetadataError, AssertionError) as e:
        if retries > 0:
            sleep(0.1)
            return read_meta(path, retries=retries - 1)
        raise e

    except (FileNotFoundError, NotADirectoryError):
        # Return default metadata for non-existent or invalid paths
        return Namespace(config=None, status="done")

write_meta

write_meta(path, config=None, status=None, **kwargs)

Write metadata to a YAML file.

Parameters:

Name Type Description Default
path Path

Path to write the metadata file

required
config Optional[Dict[str, Any]]

Configuration dictionary to store

None
status Optional[Literal['done', 'error', 'running']]

Status string indicating directory state

None
**kwargs Any

Additional metadata fields to include

{}
Note

Supports serialization of numpy types (arrays, integers, floats)

Source code in datamate/metadata.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def write_meta(
    path: Path,
    config: Optional[Dict[str, Any]] = None,
    status: Optional[Literal["done", "error", "running"]] = None,
    **kwargs: Any,
) -> None:
    """Write metadata to a YAML file.

    Args:
        path: Path to write the metadata file
        config: Configuration dictionary to store
        status: Status string indicating directory state
        **kwargs: Additional metadata fields to include

    Note:
        Supports serialization of numpy types (arrays, integers, floats)
    """
    yaml = YAML()

    # support dumping numpy objects
    def represent_numpy_float(self, value):
        return self.represent_float(float(value))

    def represent_numpy_int(self, value):
        return self.represent_int(int(value))

    def represent_numpy_array(self, value):
        return self.represent_sequence(value.tolist())

    yaml.Representer.add_multi_representer(np.ndarray, represent_numpy_array)
    yaml.Representer.add_multi_representer(np.floating, represent_numpy_float)
    yaml.Representer.add_multi_representer(np.integer, represent_numpy_int)

    # This allows directory types to be dumped to yaml
    config = _identify_elements(config)
    kwargs = _identify_elements(kwargs)

    # dump config to yaml
    with open(path, "w") as f:
        yaml.dump({"config": config, "status": status, **kwargs}, f)