Input/Output API

This section documents the components responsible for handling file input and output in FlatProt, including parsing structure files and configuration files (styles, annotations).

IO Concept

The IO module acts as the interface between FlatProt's internal data structures and external files. Its primary responsibilities are:

Structure Parsing: Reading 3D coordinates, sequence information, and potentially secondary structure assignments from standard formats like PDB and mmCIF. This often involves leveraging libraries like Gemmi (e.g., via GemmiStructureParser).
Configuration Parsing: Reading and validating configuration files written in TOML format, specifically for custom styles (StyleParser) and annotations (AnnotationParser). These parsers translate the TOML definitions into structured Pydantic models used by the Scene and Rendering systems.
Validation: Performing basic checks on input files (e.g., existence, basic format validation) before attempting full parsing.
Error Handling: Defining specific exception types related to file reading, parsing, and validation errors.

Structure Parser

Handles reading and parsing protein structure files (PDB, mmCIF).

Bases: StructureParser

Source code in src/flatprot/io/structure_gemmi_adapter.py

class GemmiStructureParser(StructureParser):
    def parse_structure(
        self, structure_file: Path, secondary_structure_file: Optional[Path] = None
    ) -> Structure:
        """Main entry point for structure parsing"""
        # 1. Parse structure
        structure = self._parse_structure_file(structure_file)

        # 2. Process each chain
        chains = []
        for chain in structure[0]:
            # Extract basic chain data
            chain_data = self._parse_chain_data(chain)
            # Get secondary structure
            ss_regions = self._get_secondary_structure(
                structure, secondary_structure_file
            )

            chain_obj = Chain(chain.name, **chain_data)
            for region in ss_regions:
                chain_obj.add_secondary_structure(
                    region[0], region[1], region[2], allow_missing_residues=True
                )
            chains.append(chain_obj)

        # Assign structure ID from filename stem
        structure_id = structure_file.stem
        return Structure(chains, id=structure_id)

    def _parse_structure_file(self, structure_file: Path) -> gemmi.Structure:
        """Parse structure from file using gemmi"""
        structure = gemmi.read_structure(str(structure_file))
        return structure

    def _parse_chain_data(self, chain: gemmi.Chain) -> dict:
        """Extract basic chain data using gemmi"""
        residue_indices = []
        residue_names = []
        coordinates = []

        def get_ca_coordinates(residue: gemmi.Residue) -> np.ndarray:
            for atom in residue:
                if atom.name == "CA":
                    return np.array([atom.pos.x, atom.pos.y, atom.pos.z])
            return None

        for residue in chain:
            coordinate = get_ca_coordinates(residue)
            if coordinate is not None:
                coordinates.append(coordinate)
                residue_indices.append(residue.seqid.num)
                residue = gemmi.find_tabulated_residue(residue.name).one_letter_code
                residue = "X" if not residue.isupper() else residue
                residue_names.append(ResidueType(residue))
        assert len(residue_indices) == len(coordinates)
        assert len(residue_indices) == len(residue_names)
        return {
            "index": residue_indices,
            "residues": residue_names,
            "coordinates": np.array(coordinates, dtype=np.float32),
        }

    def _get_secondary_structure(
        self,
        structure: gemmi.Structure,
        secondary_structure_file: Optional[Path] = None,
    ) -> list[tuple[SecondaryStructureType, int, int]]:
        if secondary_structure_file is not None:
            return parse_dssp(secondary_structure_file)
        else:
            return self._get_secondary_structure_cif(structure)

    def _get_secondary_structure_cif(
        self, structure: gemmi.Structure
    ) -> list[tuple[SecondaryStructureType, int, int]]:
        """Get secondary structure from gemmi structure"""
        ss_regions = []

        # Extract helices and sheets from gemmi structure
        for helix in structure.helices:
            start = helix.start.res_id.seqid.num
            end = helix.end.res_id.seqid.num
            ss_regions.append((SecondaryStructureType.HELIX, start, end))

        for sheet in structure.sheets:
            for strand in sheet.strands:
                start = strand.start.res_id.seqid.num
                end = strand.end.res_id.seqid.num
                ss_regions.append((SecondaryStructureType.SHEET, start, end))

        return ss_regions

    def save_structure(
        self, structure: Structure, output_file: Path, separate_chains=False
    ) -> None:
        """Save structure using gemmi"""
        gemmi_structure = gemmi.Structure()
        model = gemmi.Model("1")

        for chain_id, chain_data in structure.items():
            chain = gemmi.Chain(chain_id)

            for idx, (residue_idx, residue, coord) in enumerate(
                zip(chain_data.index, chain_data.residues, chain_data.coordinates)
            ):
                gemmi_res = gemmi.Residue()
                gemmi_res.name = residue.name
                gemmi_res.seqid = gemmi.SeqId(residue_idx)

                ca = gemmi.Atom()
                ca.name = "CA"
                ca.pos = gemmi.Position(*coord)
                ca.element = gemmi.Element("C")
                gemmi_res.add_atom(ca)

                chain.add_residue(gemmi_res)

            model.add_chain(chain)

        gemmi_structure.add_model(model)
        gemmi_structure.write_pdb(str(output_file))

`parse_structure(structure_file, secondary_structure_file=None)`

Main entry point for structure parsing

Source code in src/flatprot/io/structure_gemmi_adapter.py

def parse_structure(
    self, structure_file: Path, secondary_structure_file: Optional[Path] = None
) -> Structure:
    """Main entry point for structure parsing"""
    # 1. Parse structure
    structure = self._parse_structure_file(structure_file)

    # 2. Process each chain
    chains = []
    for chain in structure[0]:
        # Extract basic chain data
        chain_data = self._parse_chain_data(chain)
        # Get secondary structure
        ss_regions = self._get_secondary_structure(
            structure, secondary_structure_file
        )

        chain_obj = Chain(chain.name, **chain_data)
        for region in ss_regions:
            chain_obj.add_secondary_structure(
                region[0], region[1], region[2], allow_missing_residues=True
            )
        chains.append(chain_obj)

    # Assign structure ID from filename stem
    structure_id = structure_file.stem
    return Structure(chains, id=structure_id)

`save_structure(structure, output_file, separate_chains=False)`

Save structure using gemmi

Source code in src/flatprot/io/structure_gemmi_adapter.py

def save_structure(
    self, structure: Structure, output_file: Path, separate_chains=False
) -> None:
    """Save structure using gemmi"""
    gemmi_structure = gemmi.Structure()
    model = gemmi.Model("1")

    for chain_id, chain_data in structure.items():
        chain = gemmi.Chain(chain_id)

        for idx, (residue_idx, residue, coord) in enumerate(
            zip(chain_data.index, chain_data.residues, chain_data.coordinates)
        ):
            gemmi_res = gemmi.Residue()
            gemmi_res.name = residue.name
            gemmi_res.seqid = gemmi.SeqId(residue_idx)

            ca = gemmi.Atom()
            ca.name = "CA"
            ca.pos = gemmi.Position(*coord)
            ca.element = gemmi.Element("C")
            gemmi_res.add_atom(ca)

            chain.add_residue(gemmi_res)

        model.add_chain(chain)

    gemmi_structure.add_model(model)
    gemmi_structure.write_pdb(str(output_file))

options: show_root_heading: true members_order: source

Style Parser

Parses TOML files defining custom styles for structure elements.

Parser for TOML style files focusing on structure elements and connections.

Source code in src/flatprot/io/styles.py

class StyleParser:
    """Parser for TOML style files focusing on structure elements and connections."""

    # Define known sections and their corresponding Pydantic models
    KNOWN_SECTIONS = {
        "helix": HelixStyle,
        "sheet": SheetStyle,
        "coil": CoilStyle,
        "connection": ConnectionStyle,
        "position_annotation": PositionAnnotationStyle,
    }

    def __init__(self, file_path: Union[str, Path]):
        """Initialize the style parser.

        Args:
            file_path: Path to the TOML style file

        Raises:
            StyleFileNotFoundError: If the file doesn't exist
            InvalidTomlError: If the TOML is malformed
        """
        self.file_path = Path(file_path)
        if not self.file_path.exists():
            raise StyleFileNotFoundError(f"Style file not found: {self.file_path}")

        try:
            with open(self.file_path, "r") as f:
                self.raw_style_data = toml.load(f)
        except toml.TomlDecodeError as e:
            raise InvalidTomlError(f"Invalid TOML format: {e}")

        self._validate_structure()

    def _validate_structure(self) -> None:
        """Checks for unknown top-level sections in the style file."""
        unknown_sections = [
            section
            for section in self.raw_style_data
            if section not in self.KNOWN_SECTIONS
        ]

        if unknown_sections:
            # This is just a warning, not an error
            print(
                f"Warning: Unknown style sections found and ignored: {', '.join(unknown_sections)}"
            )

    def parse(
        self,
    ) -> Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]:
        """Parses the known sections from the TOML file into Pydantic style objects.

        Returns:
            A dictionary mapping section names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')
            to their corresponding Pydantic style model instances.

        Raises:
            StyleValidationError: If any style section has invalid data according to
                                  its Pydantic model.
            StyleParsingError: For other general parsing issues.
        """
        parsed_styles: Dict[
            str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]
        ] = {}

        for section_name, StyleModelClass in self.KNOWN_SECTIONS.items():
            section_data = self.raw_style_data.get(section_name)

            if section_data is None:
                # Section not present in the file, skip it (will use default later)
                continue

            if not isinstance(section_data, dict):
                raise StyleValidationError(
                    f"Invalid format for section '{section_name}'. Expected a table (dictionary), got {type(section_data).__name__}."
                )

            try:
                # Pydantic handles validation and type conversion (including Color)
                style_instance = StyleModelClass(**section_data)
                parsed_styles[section_name] = style_instance
            except ValidationError as e:
                # Provide more context for validation errors
                error_details = e.errors()
                error_msgs = [
                    f"  - {err['loc'][0]}: {err['msg']}" for err in error_details
                ]
                raise StyleValidationError(
                    f"Invalid style definition in section '{section_name}':\\n"
                    + "\\n".join(error_msgs)
                ) from e
            except Exception as e:
                # Catch other potential errors during instantiation
                raise StyleParsingError(
                    f"Error processing style section '{section_name}': {e}"
                ) from e

        return parsed_styles

    def get_element_styles(
        self,
    ) -> Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]:
        """Parse and return the element styles defined in the TOML file.

        Returns:
            Dict mapping element type names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')
            to their parsed Pydantic style objects. Sections not found in the TOML
            file will be omitted from the dictionary.

        Raises:
            StyleValidationError: If validation of any section fails.
            StyleParsingError: For general parsing issues.
        """
        try:
            return self.parse()
        except (StyleValidationError, StyleParsingError):
            # Re-raise exceptions from parse
            raise
        except Exception as e:
            # Catch unexpected errors during the overall process
            raise StyleParsingError(f"Failed to get element styles: {e}") from e

    def get_raw_data(self) -> Dict[str, Any]:
        """Return the raw, unprocessed style data loaded from the TOML file.

        Returns:
            Dict containing the raw parsed TOML data.
        """
        return self.raw_style_data

`init(file_path)`

Initialize the style parser.

Parameters:	`file_path` (`Union[str, Path]`) – Path to the TOML style file

Raises:	`StyleFileNotFoundError` – If the file doesn't exist `InvalidTomlError` – If the TOML is malformed

Source code in src/flatprot/io/styles.py

def __init__(self, file_path: Union[str, Path]):
    """Initialize the style parser.

    Args:
        file_path: Path to the TOML style file

    Raises:
        StyleFileNotFoundError: If the file doesn't exist
        InvalidTomlError: If the TOML is malformed
    """
    self.file_path = Path(file_path)
    if not self.file_path.exists():
        raise StyleFileNotFoundError(f"Style file not found: {self.file_path}")

    try:
        with open(self.file_path, "r") as f:
            self.raw_style_data = toml.load(f)
    except toml.TomlDecodeError as e:
        raise InvalidTomlError(f"Invalid TOML format: {e}")

    self._validate_structure()

`get_element_styles()`

Parse and return the element styles defined in the TOML file.

Returns:

Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]] –

Dict mapping element type names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')
Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]] –

to their parsed Pydantic style objects. Sections not found in the TOML
Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]] –

file will be omitted from the dictionary.

Raises:	`StyleValidationError` – If validation of any section fails. `StyleParsingError` – For general parsing issues.

Source code in src/flatprot/io/styles.py

def get_element_styles(
    self,
) -> Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]:
    """Parse and return the element styles defined in the TOML file.

    Returns:
        Dict mapping element type names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')
        to their parsed Pydantic style objects. Sections not found in the TOML
        file will be omitted from the dictionary.

    Raises:
        StyleValidationError: If validation of any section fails.
        StyleParsingError: For general parsing issues.
    """
    try:
        return self.parse()
    except (StyleValidationError, StyleParsingError):
        # Re-raise exceptions from parse
        raise
    except Exception as e:
        # Catch unexpected errors during the overall process
        raise StyleParsingError(f"Failed to get element styles: {e}") from e

`get_raw_data()`

Return the raw, unprocessed style data loaded from the TOML file.

Returns:	`Dict[str, Any]` – Dict containing the raw parsed TOML data.

Source code in src/flatprot/io/styles.py

def get_raw_data(self) -> Dict[str, Any]:
    """Return the raw, unprocessed style data loaded from the TOML file.

    Returns:
        Dict containing the raw parsed TOML data.
    """
    return self.raw_style_data

`parse()`

Parses the known sections from the TOML file into Pydantic style objects.

Returns:	`Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]` – A dictionary mapping section names ('helix', 'sheet', 'coil', 'connection', 'position_annotation') `Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]` – to their corresponding Pydantic style model instances.

Raises:	`StyleValidationError` – If any style section has invalid data according to its Pydantic model. `StyleParsingError` – For other general parsing issues.

Source code in src/flatprot/io/styles.py

def parse(
    self,
) -> Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]:
    """Parses the known sections from the TOML file into Pydantic style objects.

    Returns:
        A dictionary mapping section names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')
        to their corresponding Pydantic style model instances.

    Raises:
        StyleValidationError: If any style section has invalid data according to
                              its Pydantic model.
        StyleParsingError: For other general parsing issues.
    """
    parsed_styles: Dict[
        str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]
    ] = {}

    for section_name, StyleModelClass in self.KNOWN_SECTIONS.items():
        section_data = self.raw_style_data.get(section_name)

        if section_data is None:
            # Section not present in the file, skip it (will use default later)
            continue

        if not isinstance(section_data, dict):
            raise StyleValidationError(
                f"Invalid format for section '{section_name}'. Expected a table (dictionary), got {type(section_data).__name__}."
            )

        try:
            # Pydantic handles validation and type conversion (including Color)
            style_instance = StyleModelClass(**section_data)
            parsed_styles[section_name] = style_instance
        except ValidationError as e:
            # Provide more context for validation errors
            error_details = e.errors()
            error_msgs = [
                f"  - {err['loc'][0]}: {err['msg']}" for err in error_details
            ]
            raise StyleValidationError(
                f"Invalid style definition in section '{section_name}':\\n"
                + "\\n".join(error_msgs)
            ) from e
        except Exception as e:
            # Catch other potential errors during instantiation
            raise StyleParsingError(
                f"Error processing style section '{section_name}': {e}"
            ) from e

    return parsed_styles

options: show_root_heading: true members_order: source

Annotation Parser

Parses TOML files defining annotations (points, lines, areas) and their inline styles.

Parses annotation files in TOML format with optional inline styles.

Creates fully initialized PointAnnotation, LineAnnotation, or AreaAnnotation objects from the flatprot.scene.annotations module.

Source code in src/flatprot/io/annotations.py

class AnnotationParser:
    """Parses annotation files in TOML format with optional inline styles.

    Creates fully initialized PointAnnotation, LineAnnotation, or AreaAnnotation
    objects from the `flatprot.scene.annotations` module.
    """

    def __init__(self, file_path: Union[str, Path]):
        """Initialize the parser with the path to the annotation file.

        Args:
            file_path: Path to the TOML file containing annotations.

        Raises:
            AnnotationFileNotFoundError: If the file does not exist.
        """
        self.file_path = Path(file_path)
        # Check file existence
        if not self.file_path.exists():
            raise AnnotationFileNotFoundError(str(self.file_path))

        # Map annotation type strings to their respective parsing methods
        self._parsers: Dict[
            str, Callable[[Dict[str, Any], str, str], AnnotationObjectType]
        ] = {
            "point": self._parse_point_annotation,
            "line": self._parse_line_annotation,
            "area": self._parse_area_annotation,
        }

    def _parse_inline_style(
        self,
        style_dict: Optional[Dict[str, Any]],
        StyleModel: Type[BaseAnnotationStyle],
        context: str,
    ) -> Optional[BaseAnnotationStyle]:
        """Parses the inline style dictionary using the provided Pydantic model."""
        if style_dict is None:
            return None
        if not isinstance(style_dict, dict):
            raise MalformedAnnotationError(
                context,
                f"'style' entry must be a table (dictionary), got {type(style_dict).__name__}.",
            )
        try:
            style_instance = StyleModel.model_validate(style_dict)
            return style_instance
        except ValidationError as e:
            error_details = e.errors()
            error_msgs = [f"  - {err['loc'][0]}: {err['msg']}" for err in error_details]
            raise MalformedAnnotationError(
                context, "Invalid style definition:\n" + "\n".join(error_msgs)
            ) from e
        except Exception as e:
            raise AnnotationError(f"{context}: Error creating style object: {e}") from e

    def _parse_point_annotation(
        self, anno_data: Dict[str, Any], anno_id: str, context: str
    ) -> PointAnnotation:
        """Parses a point annotation entry."""
        label = anno_data.get("label")
        index_str = anno_data.get("index")
        style_dict = anno_data.get("style")

        if index_str is None:
            raise MalformedAnnotationError(
                context, "Missing 'index' field for 'point' annotation."
            )

        target_coord = _parse_residue_coordinate(index_str, context)
        style_instance = self._parse_inline_style(
            style_dict, PointAnnotationStyle, context
        )

        return PointAnnotation(
            id=anno_id,
            target=target_coord,
            style=cast(Optional[PointAnnotationStyle], style_instance),
            label=label,
        )

    def _parse_line_annotation(
        self, anno_data: Dict[str, Any], anno_id: str, context: str
    ) -> LineAnnotation:
        """Parses a line annotation entry."""
        label = anno_data.get("label")
        indices_list = anno_data.get("indices")
        style_dict = anno_data.get("style")

        if not isinstance(indices_list, list) or len(indices_list) != 2:
            raise MalformedAnnotationError(
                context,
                "Field 'indices' for 'line' annotation must be a list of exactly two coordinate strings (e.g., ['A:10', 'A:20']).",
            )

        target_coords = [
            _parse_residue_coordinate(s, f"{context}, index {j + 1}")
            for j, s in enumerate(indices_list)
        ]
        style_instance = self._parse_inline_style(
            style_dict, LineAnnotationStyle, context
        )

        return LineAnnotation(
            id=anno_id,
            start_coordinate=target_coords[0],
            end_coordinate=target_coords[1],
            style=cast(
                Optional[LineAnnotationStyle], style_instance
            ),  # Cast for type checker
            label=label,
        )

    def _parse_area_annotation(
        self, anno_data: Dict[str, Any], anno_id: str, context: str
    ) -> AreaAnnotation:
        """Parses an area annotation entry."""
        label = anno_data.get("label")
        range_str = anno_data.get("range")
        style_dict = anno_data.get("style")
        # AreaAnnotation currently only supports range, not list of indices

        if range_str is None:
            raise MalformedAnnotationError(
                context, "Missing 'range' field for 'area' annotation."
            )

        target_range = _parse_residue_range(range_str, context)
        target_range_set = ResidueRangeSet([target_range])
        style_instance = self._parse_inline_style(
            style_dict, AreaAnnotationStyle, context
        )

        return AreaAnnotation(
            id=anno_id,
            residue_range_set=target_range_set,
            style=cast(
                Optional[AreaAnnotationStyle], style_instance
            ),  # Cast for type checker
            label=label,
        )

    def parse(self) -> List[AnnotationObjectType]:
        """Parse the annotation file and create annotation objects.

        Returns:
            List of parsed annotation objects (PointAnnotation, LineAnnotation, AreaAnnotation).

        Raises:
            MalformedAnnotationError: If the TOML file is malformed, missing required structure,
                                      contains invalid formats, or style validation fails.
            AnnotationError: For other general parsing issues.

        """
        try:
            # Parse TOML content
            raw_data = toml.load(self.file_path)
        except toml.TomlDecodeError as e:
            raise MalformedAnnotationError(
                f"File: {self.file_path}", f"Invalid TOML syntax: {str(e)}"
            ) from e
        except Exception as e:
            raise AnnotationError(
                f"Error loading TOML file {self.file_path}: {e}"
            ) from e

        if not isinstance(raw_data, dict) or "annotations" not in raw_data:
            raise MalformedAnnotationError(
                f"File: {self.file_path}", "Missing top-level 'annotations' list."
            )

        if not isinstance(raw_data["annotations"], list):
            raise MalformedAnnotationError(
                f"File: {self.file_path}", "'annotations' key must contain a list."
            )

        parsed_annotations: List[AnnotationObjectType] = []
        for i, anno_data in enumerate(raw_data["annotations"]):
            context = f"File: {self.file_path}, Annotation #{i + 1}"
            try:
                if not isinstance(anno_data, dict):
                    raise MalformedAnnotationError(
                        context, "Annotation entry must be a table (dictionary)."
                    )

                anno_type = anno_data.get("type")
                if not isinstance(anno_type, str):
                    raise MalformedAnnotationError(
                        context,
                        f"Missing or invalid 'type' field. Expected a string, got {type(anno_type).__name__}.",
                    )

                # Look up the parser function based on the type
                parser_func = self._parsers.get(anno_type)
                if parser_func is None:
                    raise MalformedAnnotationError(
                        context,
                        f"Unknown annotation 'type': '{anno_type}'. Must be one of {list(self._parsers.keys())}.",
                    )

                # Check for optional user-provided ID
                provided_id = anno_data.get("id")
                if provided_id is not None:
                    if isinstance(provided_id, str):
                        anno_id = provided_id
                    else:
                        raise MalformedAnnotationError(
                            context,
                            f"Optional 'id' field must be a string, got {type(provided_id).__name__}.",
                        )
                else:
                    # Generate ID if not provided
                    anno_id = f"annotation_{self.file_path.stem}_{i}_{anno_type}"

                # Call the specific parser method
                annotation_object = parser_func(anno_data, anno_id, context)
                parsed_annotations.append(annotation_object)

            except (MalformedAnnotationError, AnnotationError) as e:
                raise e
            except Exception as e:
                raise AnnotationError(
                    f"Unexpected error processing annotation in {context}: {e}"
                ) from e

        return parsed_annotations

`init(file_path)`

Initialize the parser with the path to the annotation file.

Parameters:	`file_path` (`Union[str, Path]`) – Path to the TOML file containing annotations.

Raises:	`AnnotationFileNotFoundError` – If the file does not exist.

Source code in src/flatprot/io/annotations.py

def __init__(self, file_path: Union[str, Path]):
    """Initialize the parser with the path to the annotation file.

    Args:
        file_path: Path to the TOML file containing annotations.

    Raises:
        AnnotationFileNotFoundError: If the file does not exist.
    """
    self.file_path = Path(file_path)
    # Check file existence
    if not self.file_path.exists():
        raise AnnotationFileNotFoundError(str(self.file_path))

    # Map annotation type strings to their respective parsing methods
    self._parsers: Dict[
        str, Callable[[Dict[str, Any], str, str], AnnotationObjectType]
    ] = {
        "point": self._parse_point_annotation,
        "line": self._parse_line_annotation,
        "area": self._parse_area_annotation,
    }

`parse()`

Parse the annotation file and create annotation objects.

Returns:	`List[AnnotationObjectType]` – List of parsed annotation objects (PointAnnotation, LineAnnotation, AreaAnnotation).

Raises:	`MalformedAnnotationError` – If the TOML file is malformed, missing required structure, contains invalid formats, or style validation fails. `AnnotationError` – For other general parsing issues.

Source code in src/flatprot/io/annotations.py

def parse(self) -> List[AnnotationObjectType]:
    """Parse the annotation file and create annotation objects.

    Returns:
        List of parsed annotation objects (PointAnnotation, LineAnnotation, AreaAnnotation).

    Raises:
        MalformedAnnotationError: If the TOML file is malformed, missing required structure,
                                  contains invalid formats, or style validation fails.
        AnnotationError: For other general parsing issues.

    """
    try:
        # Parse TOML content
        raw_data = toml.load(self.file_path)
    except toml.TomlDecodeError as e:
        raise MalformedAnnotationError(
            f"File: {self.file_path}", f"Invalid TOML syntax: {str(e)}"
        ) from e
    except Exception as e:
        raise AnnotationError(
            f"Error loading TOML file {self.file_path}: {e}"
        ) from e

    if not isinstance(raw_data, dict) or "annotations" not in raw_data:
        raise MalformedAnnotationError(
            f"File: {self.file_path}", "Missing top-level 'annotations' list."
        )

    if not isinstance(raw_data["annotations"], list):
        raise MalformedAnnotationError(
            f"File: {self.file_path}", "'annotations' key must contain a list."
        )

    parsed_annotations: List[AnnotationObjectType] = []
    for i, anno_data in enumerate(raw_data["annotations"]):
        context = f"File: {self.file_path}, Annotation #{i + 1}"
        try:
            if not isinstance(anno_data, dict):
                raise MalformedAnnotationError(
                    context, "Annotation entry must be a table (dictionary)."
                )

            anno_type = anno_data.get("type")
            if not isinstance(anno_type, str):
                raise MalformedAnnotationError(
                    context,
                    f"Missing or invalid 'type' field. Expected a string, got {type(anno_type).__name__}.",
                )

            # Look up the parser function based on the type
            parser_func = self._parsers.get(anno_type)
            if parser_func is None:
                raise MalformedAnnotationError(
                    context,
                    f"Unknown annotation 'type': '{anno_type}'. Must be one of {list(self._parsers.keys())}.",
                )

            # Check for optional user-provided ID
            provided_id = anno_data.get("id")
            if provided_id is not None:
                if isinstance(provided_id, str):
                    anno_id = provided_id
                else:
                    raise MalformedAnnotationError(
                        context,
                        f"Optional 'id' field must be a string, got {type(provided_id).__name__}.",
                    )
            else:
                # Generate ID if not provided
                anno_id = f"annotation_{self.file_path.stem}_{i}_{anno_type}"

            # Call the specific parser method
            annotation_object = parser_func(anno_data, anno_id, context)
            parsed_annotations.append(annotation_object)

        except (MalformedAnnotationError, AnnotationError) as e:
            raise e
        except Exception as e:
            raise AnnotationError(
                f"Unexpected error processing annotation in {context}: {e}"
            ) from e

    return parsed_annotations

options: show_root_heading: true members_order: source

File Validation

Utility functions for validating input files.

Validate that the file exists and is a valid PDB or CIF format.

Parameters:	`path` (`Path`) – Path to the structure file

Raises:	`StructureFileNotFoundError` – If the file does not exist `InvalidStructureError` – If the file is not a valid PDB or CIF format

Source code in src/flatprot/io/structure.py

def validate_structure_file(path: Path) -> None:
    """Validate that the file exists and is a valid PDB or CIF format.

    Args:
        path: Path to the structure file

    Raises:
        StructureFileNotFoundError: If the file does not exist
        InvalidStructureError: If the file is not a valid PDB or CIF format
    """
    # Check file existence
    if not path.exists():
        raise StructureFileNotFoundError(str(path))

    # Check file extension
    suffix = path.suffix.lower()
    if suffix not in [".pdb", ".cif", ".mmcif", ".ent"]:
        raise InvalidStructureError(
            str(path),
            "PDB or CIF",
            "File does not have a recognized structure file extension (.pdb, .cif, .mmcif, .ent)",
        )

    # Basic content validation
    try:
        with open(path, "r") as f:
            content = f.read(1000)  # Read first 1000 bytes for quick check

            # Basic check for PDB format
            if suffix in [".pdb", ".ent"]:
                if not (
                    "ATOM" in content or "HETATM" in content or "HEADER" in content
                ):
                    raise InvalidStructureError(
                        str(path),
                        "PDB",
                        "File does not contain required PDB records (ATOM, HETATM, or HEADER)",
                    )

            # Basic check for mmCIF format
            if suffix in [".cif", ".mmcif"]:
                if not (
                    "_atom_site." in content or "loop_" in content or "data_" in content
                ):
                    raise InvalidStructureError(
                        str(path),
                        "CIF",
                        "File does not contain required CIF categories (_atom_site, loop_, or data_)",
                    )
    except UnicodeDecodeError:
        raise InvalidStructureError(
            str(path),
            "PDB or CIF",
            "File contains invalid characters and is not a valid text file",
        )

options: show_root_heading: true

Validate that optional files exist if specified.

Parameters:	`file_paths` (`List[Optional[Path]]`) – List of file paths to check (can include None values)

Raises:	`FileNotFoundError` – If any specified file does not exist

Source code in src/flatprot/io/__init__.py

def validate_optional_files(
    file_paths: List[Optional[Path]],
) -> None:
    """Validate that optional files exist if specified.

    Args:
        file_paths: List of file paths to check (can include None values)

    Raises:
        FileNotFoundError: If any specified file does not exist
    """
    for path in file_paths:
        if path and not path.exists():
            raise FileNotFoundError(str(path))

options: show_root_heading: true

IO Errors

Exceptions specific to file input, output, parsing, and validation.

Error classes for the FlatProt IO module.