Input/Output API

This section documents the components responsible for handling file input and output in FlatProt, including parsing structure files and configuration files (styles, annotations).

IO Concept

The IO module acts as the interface between FlatProt's internal data structures and external files. Its primary responsibilities are:

  1. Structure Parsing: Reading 3D coordinates, sequence information, and potentially secondary structure assignments from standard formats like PDB and mmCIF. This often involves leveraging libraries like Gemmi (e.g., via GemmiStructureParser).
  2. Configuration Parsing: Reading and validating configuration files written in TOML format, specifically for custom styles (StyleParser) and annotations (AnnotationParser). These parsers translate the TOML definitions into structured Pydantic models used by the Scene and Rendering systems.
  3. Validation: Performing basic checks on input files (e.g., existence, basic format validation) before attempting full parsing.
  4. Error Handling: Defining specific exception types related to file reading, parsing, and validation errors.

Structure Parser

Handles reading and parsing protein structure files (PDB, mmCIF).

Bases: StructureParser

Source code in src/flatprot/io/structure_gemmi_adapter.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
class GemmiStructureParser(StructureParser):
    def parse_structure(
        self, structure_file: Path, secondary_structure_file: Optional[Path] = None
    ) -> Structure:
        """Main entry point for structure parsing"""
        # 1. Parse structure
        structure = self._parse_structure_file(structure_file)

        # 2. Process each chain
        chains = []
        for chain in structure[0]:
            # Extract basic chain data
            chain_data = self._parse_chain_data(chain)
            # Get secondary structure
            ss_regions = self._get_secondary_structure(
                structure, secondary_structure_file
            )

            chain_obj = Chain(chain.name, **chain_data)
            for region in ss_regions:
                chain_obj.add_secondary_structure(
                    region[0], region[1], region[2], allow_missing_residues=True
                )
            chains.append(chain_obj)

        # Assign structure ID from filename stem
        structure_id = structure_file.stem
        return Structure(chains, id=structure_id)

    def _parse_structure_file(self, structure_file: Path) -> gemmi.Structure:
        """Parse structure from file using gemmi"""
        structure = gemmi.read_structure(str(structure_file))
        return structure

    def _parse_chain_data(self, chain: gemmi.Chain) -> dict:
        """Extract basic chain data using gemmi"""
        residue_indices = []
        residue_names = []
        coordinates = []

        def get_ca_coordinates(residue: gemmi.Residue) -> np.ndarray:
            for atom in residue:
                if atom.name == "CA":
                    return np.array([atom.pos.x, atom.pos.y, atom.pos.z])
            return None

        for residue in chain:
            coordinate = get_ca_coordinates(residue)
            if coordinate is not None:
                coordinates.append(coordinate)
                residue_indices.append(residue.seqid.num)
                residue = gemmi.find_tabulated_residue(residue.name).one_letter_code
                residue = "X" if not residue.isupper() else residue
                residue_names.append(ResidueType(residue))
        assert len(residue_indices) == len(coordinates)
        assert len(residue_indices) == len(residue_names)
        return {
            "index": residue_indices,
            "residues": residue_names,
            "coordinates": np.array(coordinates, dtype=np.float32),
        }

    def _get_secondary_structure(
        self,
        structure: gemmi.Structure,
        secondary_structure_file: Optional[Path] = None,
    ) -> list[tuple[SecondaryStructureType, int, int]]:
        if secondary_structure_file is not None:
            return parse_dssp(secondary_structure_file)
        else:
            return self._get_secondary_structure_cif(structure)

    def _get_secondary_structure_cif(
        self, structure: gemmi.Structure
    ) -> list[tuple[SecondaryStructureType, int, int]]:
        """Get secondary structure from gemmi structure"""
        ss_regions = []

        # Extract helices and sheets from gemmi structure
        for helix in structure.helices:
            start = helix.start.res_id.seqid.num
            end = helix.end.res_id.seqid.num
            ss_regions.append((SecondaryStructureType.HELIX, start, end))

        for sheet in structure.sheets:
            for strand in sheet.strands:
                start = strand.start.res_id.seqid.num
                end = strand.end.res_id.seqid.num
                ss_regions.append((SecondaryStructureType.SHEET, start, end))

        return ss_regions

    def save_structure(
        self, structure: Structure, output_file: Path, separate_chains=False
    ) -> None:
        """Save structure using gemmi"""
        gemmi_structure = gemmi.Structure()
        model = gemmi.Model("1")

        for chain_id, chain_data in structure.items():
            chain = gemmi.Chain(chain_id)

            for idx, (residue_idx, residue, coord) in enumerate(
                zip(chain_data.index, chain_data.residues, chain_data.coordinates)
            ):
                gemmi_res = gemmi.Residue()
                gemmi_res.name = residue.name
                gemmi_res.seqid = gemmi.SeqId(residue_idx)

                ca = gemmi.Atom()
                ca.name = "CA"
                ca.pos = gemmi.Position(*coord)
                ca.element = gemmi.Element("C")
                gemmi_res.add_atom(ca)

                chain.add_residue(gemmi_res)

            model.add_chain(chain)

        gemmi_structure.add_model(model)
        gemmi_structure.write_pdb(str(output_file))

parse_structure(structure_file, secondary_structure_file=None)

Main entry point for structure parsing

Source code in src/flatprot/io/structure_gemmi_adapter.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def parse_structure(
    self, structure_file: Path, secondary_structure_file: Optional[Path] = None
) -> Structure:
    """Main entry point for structure parsing"""
    # 1. Parse structure
    structure = self._parse_structure_file(structure_file)

    # 2. Process each chain
    chains = []
    for chain in structure[0]:
        # Extract basic chain data
        chain_data = self._parse_chain_data(chain)
        # Get secondary structure
        ss_regions = self._get_secondary_structure(
            structure, secondary_structure_file
        )

        chain_obj = Chain(chain.name, **chain_data)
        for region in ss_regions:
            chain_obj.add_secondary_structure(
                region[0], region[1], region[2], allow_missing_residues=True
            )
        chains.append(chain_obj)

    # Assign structure ID from filename stem
    structure_id = structure_file.stem
    return Structure(chains, id=structure_id)

save_structure(structure, output_file, separate_chains=False)

Save structure using gemmi

Source code in src/flatprot/io/structure_gemmi_adapter.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def save_structure(
    self, structure: Structure, output_file: Path, separate_chains=False
) -> None:
    """Save structure using gemmi"""
    gemmi_structure = gemmi.Structure()
    model = gemmi.Model("1")

    for chain_id, chain_data in structure.items():
        chain = gemmi.Chain(chain_id)

        for idx, (residue_idx, residue, coord) in enumerate(
            zip(chain_data.index, chain_data.residues, chain_data.coordinates)
        ):
            gemmi_res = gemmi.Residue()
            gemmi_res.name = residue.name
            gemmi_res.seqid = gemmi.SeqId(residue_idx)

            ca = gemmi.Atom()
            ca.name = "CA"
            ca.pos = gemmi.Position(*coord)
            ca.element = gemmi.Element("C")
            gemmi_res.add_atom(ca)

            chain.add_residue(gemmi_res)

        model.add_chain(chain)

    gemmi_structure.add_model(model)
    gemmi_structure.write_pdb(str(output_file))

options: show_root_heading: true members_order: source

Style Parser

Parses TOML files defining custom styles for structure elements.

Parser for TOML style files focusing on structure elements and connections.

Source code in src/flatprot/io/styles.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
class StyleParser:
    """Parser for TOML style files focusing on structure elements and connections."""

    # Define known sections and their corresponding Pydantic models
    KNOWN_SECTIONS = {
        "helix": HelixStyle,
        "sheet": SheetStyle,
        "coil": CoilStyle,
        "connection": ConnectionStyle,
        "position_annotation": PositionAnnotationStyle,
    }

    def __init__(self, file_path: Union[str, Path]):
        """Initialize the style parser.

        Args:
            file_path: Path to the TOML style file

        Raises:
            StyleFileNotFoundError: If the file doesn't exist
            InvalidTomlError: If the TOML is malformed
        """
        self.file_path = Path(file_path)
        if not self.file_path.exists():
            raise StyleFileNotFoundError(f"Style file not found: {self.file_path}")

        try:
            with open(self.file_path, "r") as f:
                self.raw_style_data = toml.load(f)
        except toml.TomlDecodeError as e:
            raise InvalidTomlError(f"Invalid TOML format: {e}")

        self._validate_structure()

    def _validate_structure(self) -> None:
        """Checks for unknown top-level sections in the style file."""
        unknown_sections = [
            section
            for section in self.raw_style_data
            if section not in self.KNOWN_SECTIONS
        ]

        if unknown_sections:
            # This is just a warning, not an error
            print(
                f"Warning: Unknown style sections found and ignored: {', '.join(unknown_sections)}"
            )

    def parse(
        self,
    ) -> Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]:
        """Parses the known sections from the TOML file into Pydantic style objects.

        Returns:
            A dictionary mapping section names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')
            to their corresponding Pydantic style model instances.

        Raises:
            StyleValidationError: If any style section has invalid data according to
                                  its Pydantic model.
            StyleParsingError: For other general parsing issues.
        """
        parsed_styles: Dict[
            str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]
        ] = {}

        for section_name, StyleModelClass in self.KNOWN_SECTIONS.items():
            section_data = self.raw_style_data.get(section_name)

            if section_data is None:
                # Section not present in the file, skip it (will use default later)
                continue

            if not isinstance(section_data, dict):
                raise StyleValidationError(
                    f"Invalid format for section '{section_name}'. Expected a table (dictionary), got {type(section_data).__name__}."
                )

            try:
                # Pydantic handles validation and type conversion (including Color)
                style_instance = StyleModelClass(**section_data)
                parsed_styles[section_name] = style_instance
            except ValidationError as e:
                # Provide more context for validation errors
                error_details = e.errors()
                error_msgs = [
                    f"  - {err['loc'][0]}: {err['msg']}" for err in error_details
                ]
                raise StyleValidationError(
                    f"Invalid style definition in section '{section_name}':\\n"
                    + "\\n".join(error_msgs)
                ) from e
            except Exception as e:
                # Catch other potential errors during instantiation
                raise StyleParsingError(
                    f"Error processing style section '{section_name}': {e}"
                ) from e

        return parsed_styles

    def get_element_styles(
        self,
    ) -> Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]:
        """Parse and return the element styles defined in the TOML file.

        Returns:
            Dict mapping element type names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')
            to their parsed Pydantic style objects. Sections not found in the TOML
            file will be omitted from the dictionary.

        Raises:
            StyleValidationError: If validation of any section fails.
            StyleParsingError: For general parsing issues.
        """
        try:
            return self.parse()
        except (StyleValidationError, StyleParsingError):
            # Re-raise exceptions from parse
            raise
        except Exception as e:
            # Catch unexpected errors during the overall process
            raise StyleParsingError(f"Failed to get element styles: {e}") from e

    def get_raw_data(self) -> Dict[str, Any]:
        """Return the raw, unprocessed style data loaded from the TOML file.

        Returns:
            Dict containing the raw parsed TOML data.
        """
        return self.raw_style_data

__init__(file_path)

Initialize the style parser.

Parameters:
  • file_path (Union[str, Path]) –

    Path to the TOML style file

Raises:
Source code in src/flatprot/io/styles.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def __init__(self, file_path: Union[str, Path]):
    """Initialize the style parser.

    Args:
        file_path: Path to the TOML style file

    Raises:
        StyleFileNotFoundError: If the file doesn't exist
        InvalidTomlError: If the TOML is malformed
    """
    self.file_path = Path(file_path)
    if not self.file_path.exists():
        raise StyleFileNotFoundError(f"Style file not found: {self.file_path}")

    try:
        with open(self.file_path, "r") as f:
            self.raw_style_data = toml.load(f)
    except toml.TomlDecodeError as e:
        raise InvalidTomlError(f"Invalid TOML format: {e}")

    self._validate_structure()

get_element_styles()

Parse and return the element styles defined in the TOML file.

Returns:
  • Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]] –

    Dict mapping element type names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')

  • Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]] –

    to their parsed Pydantic style objects. Sections not found in the TOML

  • Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]] –

    file will be omitted from the dictionary.

Raises:
Source code in src/flatprot/io/styles.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def get_element_styles(
    self,
) -> Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]:
    """Parse and return the element styles defined in the TOML file.

    Returns:
        Dict mapping element type names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')
        to their parsed Pydantic style objects. Sections not found in the TOML
        file will be omitted from the dictionary.

    Raises:
        StyleValidationError: If validation of any section fails.
        StyleParsingError: For general parsing issues.
    """
    try:
        return self.parse()
    except (StyleValidationError, StyleParsingError):
        # Re-raise exceptions from parse
        raise
    except Exception as e:
        # Catch unexpected errors during the overall process
        raise StyleParsingError(f"Failed to get element styles: {e}") from e

get_raw_data()

Return the raw, unprocessed style data loaded from the TOML file.

Returns:
  • Dict[str, Any] –

    Dict containing the raw parsed TOML data.

Source code in src/flatprot/io/styles.py
152
153
154
155
156
157
158
def get_raw_data(self) -> Dict[str, Any]:
    """Return the raw, unprocessed style data loaded from the TOML file.

    Returns:
        Dict containing the raw parsed TOML data.
    """
    return self.raw_style_data

parse()

Parses the known sections from the TOML file into Pydantic style objects.

Returns:
  • Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]] –

    A dictionary mapping section names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')

  • Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]] –

    to their corresponding Pydantic style model instances.

Raises:
Source code in src/flatprot/io/styles.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def parse(
    self,
) -> Dict[str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]]:
    """Parses the known sections from the TOML file into Pydantic style objects.

    Returns:
        A dictionary mapping section names ('helix', 'sheet', 'coil', 'connection', 'position_annotation')
        to their corresponding Pydantic style model instances.

    Raises:
        StyleValidationError: If any style section has invalid data according to
                              its Pydantic model.
        StyleParsingError: For other general parsing issues.
    """
    parsed_styles: Dict[
        str, Union[BaseStructureStyle, ConnectionStyle, PositionAnnotationStyle]
    ] = {}

    for section_name, StyleModelClass in self.KNOWN_SECTIONS.items():
        section_data = self.raw_style_data.get(section_name)

        if section_data is None:
            # Section not present in the file, skip it (will use default later)
            continue

        if not isinstance(section_data, dict):
            raise StyleValidationError(
                f"Invalid format for section '{section_name}'. Expected a table (dictionary), got {type(section_data).__name__}."
            )

        try:
            # Pydantic handles validation and type conversion (including Color)
            style_instance = StyleModelClass(**section_data)
            parsed_styles[section_name] = style_instance
        except ValidationError as e:
            # Provide more context for validation errors
            error_details = e.errors()
            error_msgs = [
                f"  - {err['loc'][0]}: {err['msg']}" for err in error_details
            ]
            raise StyleValidationError(
                f"Invalid style definition in section '{section_name}':\\n"
                + "\\n".join(error_msgs)
            ) from e
        except Exception as e:
            # Catch other potential errors during instantiation
            raise StyleParsingError(
                f"Error processing style section '{section_name}': {e}"
            ) from e

    return parsed_styles

options: show_root_heading: true members_order: source

Annotation Parser

Parses TOML files defining annotations (points, lines, areas) and their inline styles.

Parses annotation files in TOML format with optional inline styles.

Creates fully initialized PointAnnotation, LineAnnotation, or AreaAnnotation objects from the flatprot.scene.annotations module.

Source code in src/flatprot/io/annotations.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
class AnnotationParser:
    """Parses annotation files in TOML format with optional inline styles.

    Creates fully initialized PointAnnotation, LineAnnotation, or AreaAnnotation
    objects from the `flatprot.scene.annotations` module.
    """

    def __init__(self, file_path: Union[str, Path]):
        """Initialize the parser with the path to the annotation file.

        Args:
            file_path: Path to the TOML file containing annotations.

        Raises:
            AnnotationFileNotFoundError: If the file does not exist.
        """
        self.file_path = Path(file_path)
        # Check file existence
        if not self.file_path.exists():
            raise AnnotationFileNotFoundError(str(self.file_path))

        # Map annotation type strings to their respective parsing methods
        self._parsers: Dict[
            str, Callable[[Dict[str, Any], str, str], AnnotationObjectType]
        ] = {
            "point": self._parse_point_annotation,
            "line": self._parse_line_annotation,
            "area": self._parse_area_annotation,
        }

    def _parse_inline_style(
        self,
        style_dict: Optional[Dict[str, Any]],
        StyleModel: Type[BaseAnnotationStyle],
        context: str,
    ) -> Optional[BaseAnnotationStyle]:
        """Parses the inline style dictionary using the provided Pydantic model."""
        if style_dict is None:
            return None
        if not isinstance(style_dict, dict):
            raise MalformedAnnotationError(
                context,
                f"'style' entry must be a table (dictionary), got {type(style_dict).__name__}.",
            )
        try:
            style_instance = StyleModel.model_validate(style_dict)
            return style_instance
        except ValidationError as e:
            error_details = e.errors()
            error_msgs = [f"  - {err['loc'][0]}: {err['msg']}" for err in error_details]
            raise MalformedAnnotationError(
                context, "Invalid style definition:\n" + "\n".join(error_msgs)
            ) from e
        except Exception as e:
            raise AnnotationError(f"{context}: Error creating style object: {e}") from e

    def _parse_point_annotation(
        self, anno_data: Dict[str, Any], anno_id: str, context: str
    ) -> PointAnnotation:
        """Parses a point annotation entry."""
        label = anno_data.get("label")
        index_str = anno_data.get("index")
        style_dict = anno_data.get("style")

        if index_str is None:
            raise MalformedAnnotationError(
                context, "Missing 'index' field for 'point' annotation."
            )

        target_coord = _parse_residue_coordinate(index_str, context)
        style_instance = self._parse_inline_style(
            style_dict, PointAnnotationStyle, context
        )

        return PointAnnotation(
            id=anno_id,
            target=target_coord,
            style=cast(Optional[PointAnnotationStyle], style_instance),
            label=label,
        )

    def _parse_line_annotation(
        self, anno_data: Dict[str, Any], anno_id: str, context: str
    ) -> LineAnnotation:
        """Parses a line annotation entry."""
        label = anno_data.get("label")
        indices_list = anno_data.get("indices")
        style_dict = anno_data.get("style")

        if not isinstance(indices_list, list) or len(indices_list) != 2:
            raise MalformedAnnotationError(
                context,
                "Field 'indices' for 'line' annotation must be a list of exactly two coordinate strings (e.g., ['A:10', 'A:20']).",
            )

        target_coords = [
            _parse_residue_coordinate(s, f"{context}, index {j + 1}")
            for j, s in enumerate(indices_list)
        ]
        style_instance = self._parse_inline_style(
            style_dict, LineAnnotationStyle, context
        )

        return LineAnnotation(
            id=anno_id,
            start_coordinate=target_coords[0],
            end_coordinate=target_coords[1],
            style=cast(
                Optional[LineAnnotationStyle], style_instance
            ),  # Cast for type checker
            label=label,
        )

    def _parse_area_annotation(
        self, anno_data: Dict[str, Any], anno_id: str, context: str
    ) -> AreaAnnotation:
        """Parses an area annotation entry."""
        label = anno_data.get("label")
        range_str = anno_data.get("range")
        style_dict = anno_data.get("style")
        # AreaAnnotation currently only supports range, not list of indices

        if range_str is None:
            raise MalformedAnnotationError(
                context, "Missing 'range' field for 'area' annotation."
            )

        target_range = _parse_residue_range(range_str, context)
        target_range_set = ResidueRangeSet([target_range])
        style_instance = self._parse_inline_style(
            style_dict, AreaAnnotationStyle, context
        )

        return AreaAnnotation(
            id=anno_id,
            residue_range_set=target_range_set,
            style=cast(
                Optional[AreaAnnotationStyle], style_instance
            ),  # Cast for type checker
            label=label,
        )

    def parse(self) -> List[AnnotationObjectType]:
        """Parse the annotation file and create annotation objects.

        Returns:
            List of parsed annotation objects (PointAnnotation, LineAnnotation, AreaAnnotation).

        Raises:
            MalformedAnnotationError: If the TOML file is malformed, missing required structure,
                                      contains invalid formats, or style validation fails.
            AnnotationError: For other general parsing issues.

        """
        try:
            # Parse TOML content
            raw_data = toml.load(self.file_path)
        except toml.TomlDecodeError as e:
            raise MalformedAnnotationError(
                f"File: {self.file_path}", f"Invalid TOML syntax: {str(e)}"
            ) from e
        except Exception as e:
            raise AnnotationError(
                f"Error loading TOML file {self.file_path}: {e}"
            ) from e

        if not isinstance(raw_data, dict) or "annotations" not in raw_data:
            raise MalformedAnnotationError(
                f"File: {self.file_path}", "Missing top-level 'annotations' list."
            )

        if not isinstance(raw_data["annotations"], list):
            raise MalformedAnnotationError(
                f"File: {self.file_path}", "'annotations' key must contain a list."
            )

        parsed_annotations: List[AnnotationObjectType] = []
        for i, anno_data in enumerate(raw_data["annotations"]):
            context = f"File: {self.file_path}, Annotation #{i + 1}"
            try:
                if not isinstance(anno_data, dict):
                    raise MalformedAnnotationError(
                        context, "Annotation entry must be a table (dictionary)."
                    )

                anno_type = anno_data.get("type")
                if not isinstance(anno_type, str):
                    raise MalformedAnnotationError(
                        context,
                        f"Missing or invalid 'type' field. Expected a string, got {type(anno_type).__name__}.",
                    )

                # Look up the parser function based on the type
                parser_func = self._parsers.get(anno_type)
                if parser_func is None:
                    raise MalformedAnnotationError(
                        context,
                        f"Unknown annotation 'type': '{anno_type}'. Must be one of {list(self._parsers.keys())}.",
                    )

                # Check for optional user-provided ID
                provided_id = anno_data.get("id")
                if provided_id is not None:
                    if isinstance(provided_id, str):
                        anno_id = provided_id
                    else:
                        raise MalformedAnnotationError(
                            context,
                            f"Optional 'id' field must be a string, got {type(provided_id).__name__}.",
                        )
                else:
                    # Generate ID if not provided
                    anno_id = f"annotation_{self.file_path.stem}_{i}_{anno_type}"

                # Call the specific parser method
                annotation_object = parser_func(anno_data, anno_id, context)
                parsed_annotations.append(annotation_object)

            except (MalformedAnnotationError, AnnotationError) as e:
                raise e
            except Exception as e:
                raise AnnotationError(
                    f"Unexpected error processing annotation in {context}: {e}"
                ) from e

        return parsed_annotations

__init__(file_path)

Initialize the parser with the path to the annotation file.

Parameters:
  • file_path (Union[str, Path]) –

    Path to the TOML file containing annotations.

Raises:
Source code in src/flatprot/io/annotations.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def __init__(self, file_path: Union[str, Path]):
    """Initialize the parser with the path to the annotation file.

    Args:
        file_path: Path to the TOML file containing annotations.

    Raises:
        AnnotationFileNotFoundError: If the file does not exist.
    """
    self.file_path = Path(file_path)
    # Check file existence
    if not self.file_path.exists():
        raise AnnotationFileNotFoundError(str(self.file_path))

    # Map annotation type strings to their respective parsing methods
    self._parsers: Dict[
        str, Callable[[Dict[str, Any], str, str], AnnotationObjectType]
    ] = {
        "point": self._parse_point_annotation,
        "line": self._parse_line_annotation,
        "area": self._parse_area_annotation,
    }

parse()

Parse the annotation file and create annotation objects.

Returns:
  • List[AnnotationObjectType] –

    List of parsed annotation objects (PointAnnotation, LineAnnotation, AreaAnnotation).

Raises:
  • MalformedAnnotationError –

    If the TOML file is malformed, missing required structure, contains invalid formats, or style validation fails.

  • AnnotationError –

    For other general parsing issues.

Source code in src/flatprot/io/annotations.py
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
def parse(self) -> List[AnnotationObjectType]:
    """Parse the annotation file and create annotation objects.

    Returns:
        List of parsed annotation objects (PointAnnotation, LineAnnotation, AreaAnnotation).

    Raises:
        MalformedAnnotationError: If the TOML file is malformed, missing required structure,
                                  contains invalid formats, or style validation fails.
        AnnotationError: For other general parsing issues.

    """
    try:
        # Parse TOML content
        raw_data = toml.load(self.file_path)
    except toml.TomlDecodeError as e:
        raise MalformedAnnotationError(
            f"File: {self.file_path}", f"Invalid TOML syntax: {str(e)}"
        ) from e
    except Exception as e:
        raise AnnotationError(
            f"Error loading TOML file {self.file_path}: {e}"
        ) from e

    if not isinstance(raw_data, dict) or "annotations" not in raw_data:
        raise MalformedAnnotationError(
            f"File: {self.file_path}", "Missing top-level 'annotations' list."
        )

    if not isinstance(raw_data["annotations"], list):
        raise MalformedAnnotationError(
            f"File: {self.file_path}", "'annotations' key must contain a list."
        )

    parsed_annotations: List[AnnotationObjectType] = []
    for i, anno_data in enumerate(raw_data["annotations"]):
        context = f"File: {self.file_path}, Annotation #{i + 1}"
        try:
            if not isinstance(anno_data, dict):
                raise MalformedAnnotationError(
                    context, "Annotation entry must be a table (dictionary)."
                )

            anno_type = anno_data.get("type")
            if not isinstance(anno_type, str):
                raise MalformedAnnotationError(
                    context,
                    f"Missing or invalid 'type' field. Expected a string, got {type(anno_type).__name__}.",
                )

            # Look up the parser function based on the type
            parser_func = self._parsers.get(anno_type)
            if parser_func is None:
                raise MalformedAnnotationError(
                    context,
                    f"Unknown annotation 'type': '{anno_type}'. Must be one of {list(self._parsers.keys())}.",
                )

            # Check for optional user-provided ID
            provided_id = anno_data.get("id")
            if provided_id is not None:
                if isinstance(provided_id, str):
                    anno_id = provided_id
                else:
                    raise MalformedAnnotationError(
                        context,
                        f"Optional 'id' field must be a string, got {type(provided_id).__name__}.",
                    )
            else:
                # Generate ID if not provided
                anno_id = f"annotation_{self.file_path.stem}_{i}_{anno_type}"

            # Call the specific parser method
            annotation_object = parser_func(anno_data, anno_id, context)
            parsed_annotations.append(annotation_object)

        except (MalformedAnnotationError, AnnotationError) as e:
            raise e
        except Exception as e:
            raise AnnotationError(
                f"Unexpected error processing annotation in {context}: {e}"
            ) from e

    return parsed_annotations

options: show_root_heading: true members_order: source

File Validation

Utility functions for validating input files.

Validate that the file exists and is a valid PDB or CIF format.

Parameters:
  • path (Path) –

    Path to the structure file

Raises:
Source code in src/flatprot/io/structure.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def validate_structure_file(path: Path) -> None:
    """Validate that the file exists and is a valid PDB or CIF format.

    Args:
        path: Path to the structure file

    Raises:
        StructureFileNotFoundError: If the file does not exist
        InvalidStructureError: If the file is not a valid PDB or CIF format
    """
    # Check file existence
    if not path.exists():
        raise StructureFileNotFoundError(str(path))

    # Check file extension
    suffix = path.suffix.lower()
    if suffix not in [".pdb", ".cif", ".mmcif", ".ent"]:
        raise InvalidStructureError(
            str(path),
            "PDB or CIF",
            "File does not have a recognized structure file extension (.pdb, .cif, .mmcif, .ent)",
        )

    # Basic content validation
    try:
        with open(path, "r") as f:
            content = f.read(1000)  # Read first 1000 bytes for quick check

            # Basic check for PDB format
            if suffix in [".pdb", ".ent"]:
                if not (
                    "ATOM" in content or "HETATM" in content or "HEADER" in content
                ):
                    raise InvalidStructureError(
                        str(path),
                        "PDB",
                        "File does not contain required PDB records (ATOM, HETATM, or HEADER)",
                    )

            # Basic check for mmCIF format
            if suffix in [".cif", ".mmcif"]:
                if not (
                    "_atom_site." in content or "loop_" in content or "data_" in content
                ):
                    raise InvalidStructureError(
                        str(path),
                        "CIF",
                        "File does not contain required CIF categories (_atom_site, loop_, or data_)",
                    )
    except UnicodeDecodeError:
        raise InvalidStructureError(
            str(path),
            "PDB or CIF",
            "File contains invalid characters and is not a valid text file",
        )

options: show_root_heading: true

Validate that optional files exist if specified.

Parameters:
  • file_paths (List[Optional[Path]]) –

    List of file paths to check (can include None values)

Raises:
Source code in src/flatprot/io/__init__.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def validate_optional_files(
    file_paths: List[Optional[Path]],
) -> None:
    """Validate that optional files exist if specified.

    Args:
        file_paths: List of file paths to check (can include None values)

    Raises:
        FileNotFoundError: If any specified file does not exist
    """
    for path in file_paths:
        if path and not path.exists():
            raise FileNotFoundError(str(path))

options: show_root_heading: true

IO Errors

Exceptions specific to file input, output, parsing, and validation.

Error classes for the FlatProt IO module.

AnnotationError

Bases: IOError

Base class for annotation-related errors.

Source code in src/flatprot/io/errors.py
90
91
92
93
94
class AnnotationError(IOError):
    """Base class for annotation-related errors."""

    def __init__(self, message: str):
        super().__init__(f"Annotation error: {message}")

AnnotationFileError

Bases: AnnotationError

Exception raised when there's an issue with an annotation file.

Source code in src/flatprot/io/errors.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class AnnotationFileError(AnnotationError):
    """Exception raised when there's an issue with an annotation file."""

    def __init__(self, file_path: str, details: Optional[str] = None):
        self.file_path = file_path
        self.details = details

        message = f"Invalid annotation file: {file_path}"
        if details:
            message += f"\n{details}"

        suggestion = (
            "\nAnnotation files should be in TOML format with an 'annotations' list."
        )
        suggestion += "\nExample annotation format:\n"
        suggestion += """
        [[annotations]]
        type = "point"
        label = "Active site"
        chain = "A"
        indices = 123
        color = "#FF0000"
        """

        super().__init__(f"{message}{suggestion}")

AnnotationFileNotFoundError

Bases: AnnotationError

Exception raised when an annotation file is not found.

Source code in src/flatprot/io/errors.py
 97
 98
 99
100
101
102
class AnnotationFileNotFoundError(AnnotationError):
    """Exception raised when an annotation file is not found."""

    def __init__(self, file_path: str):
        message = f"Annotation file not found: {file_path}"
        super().__init__(message)

FileError

Bases: IOError

Base class for file-related errors.

Source code in src/flatprot/io/errors.py
22
23
24
25
26
class FileError(IOError):
    """Base class for file-related errors."""

    def __init__(self, message: str):
        super().__init__(message)

FileNotFoundError

Bases: FileError

Exception raised when a required file is not found.

Source code in src/flatprot/io/errors.py
29
30
31
32
33
34
35
36
class FileNotFoundError(FileError):
    """Exception raised when a required file is not found."""

    def __init__(self, file_path: str):
        self.file_path = file_path
        message = f"File not found: {file_path}"
        suggestion = "Please check that the file exists and the path is correct."
        super().__init__(f"{message}\n{suggestion}")

IOError

Bases: FlatProtError

Base class for all IO-related errors in FlatProt.

Source code in src/flatprot/io/errors.py
15
16
17
18
19
class IOError(FlatProtError):
    """Base class for all IO-related errors in FlatProt."""

    def __init__(self, message: str):
        super().__init__(f"IO error: {message}")

InvalidColorError

Bases: StyleError

Exception raised when an invalid color is specified.

Source code in src/flatprot/io/errors.py
236
237
238
239
240
241
242
243
244
245
246
class InvalidColorError(StyleError):
    """Exception raised when an invalid color is specified."""

    def __init__(self, color_value: str, element_type: str):
        self.color_value = color_value
        self.element_type = element_type

        message = f"Invalid color value '{color_value}' for {element_type}."
        suggestion = "\nColors should be specified as hex (#RRGGBB), RGB (rgb(r,g,b)), or named colors."

        super().__init__(f"{message}{suggestion}")

InvalidFieldTypeError

Bases: AnnotationError

Exception raised when a field has an invalid type.

Source code in src/flatprot/io/errors.py
121
122
123
124
125
126
127
128
129
130
131
132
class InvalidFieldTypeError(AnnotationError):
    """Exception raised when a field has an invalid type."""

    def __init__(
        self,
        annotation_type: str,
        field_name: str,
        expected_type: str,
        annotation_index: int,
    ):
        message = f"Invalid type for field '{field_name}' in {annotation_type} annotation at index {annotation_index}. Expected {expected_type}."
        super().__init__(message)

InvalidFileFormatError

Bases: FileError

Exception raised when a file has an invalid format.

Source code in src/flatprot/io/errors.py
39
40
41
42
43
44
45
46
class InvalidFileFormatError(FileError):
    """Exception raised when a file has an invalid format."""

    def __init__(self, file_path: str, expected_format: str):
        self.file_path = file_path
        self.expected_format = expected_format
        message = f"Invalid file format for {file_path}. Expected {expected_format}."
        super().__init__(message)

InvalidMatrixDimensionsError

Bases: MatrixError

Error raised when a matrix has invalid dimensions.

Source code in src/flatprot/io/errors.py
265
266
267
268
class InvalidMatrixDimensionsError(MatrixError):
    """Error raised when a matrix has invalid dimensions."""

    pass

InvalidMatrixError

Bases: MatrixError

Exception raised when a matrix file has an invalid format.

Source code in src/flatprot/io/errors.py
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
class InvalidMatrixError(MatrixError):
    """Exception raised when a matrix file has an invalid format."""

    def __init__(self, file_path: str, details: Optional[str] = None):
        self.file_path = file_path
        self.details = details

        message = f"Invalid matrix file: {file_path}"
        if details:
            message += f"\n{details}"

        suggestion = "\nMatrix files should be NumPy .npy files containing a 4x4 transformation matrix."
        suggestion += "\nAlternatively, you can use separate .npy files for rotation (3x3) and translation (3x1)."

        super().__init__(f"{message}{suggestion}")

InvalidMatrixFormatError

Bases: MatrixError

Error raised when a matrix file has an invalid format.

Source code in src/flatprot/io/errors.py
271
272
273
274
class InvalidMatrixFormatError(MatrixError):
    """Error raised when a matrix file has an invalid format."""

    pass

InvalidReferenceError

Bases: AnnotationError

Exception raised when an annotation references a nonexistent chain or residue.

Source code in src/flatprot/io/errors.py
135
136
137
138
139
140
141
142
143
144
145
146
class InvalidReferenceError(AnnotationError):
    """Exception raised when an annotation references a nonexistent chain or residue."""

    def __init__(
        self,
        annotation_type: str,
        reference_type: str,
        reference: str,
        annotation_index: int,
    ):
        message = f"Invalid {reference_type} reference '{reference}' in {annotation_type} annotation at index {annotation_index}."
        super().__init__(message)

InvalidStructureError

Bases: StructureError

Exception raised when a structure file has an invalid format.

Source code in src/flatprot/io/errors.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class InvalidStructureError(StructureError):
    """Exception raised when a structure file has an invalid format."""

    def __init__(
        self, file_path: str, expected_format: str, details: Optional[str] = None
    ):
        self.file_path = file_path
        self.expected_format = expected_format
        self.details = details

        message = f"Invalid {expected_format} file: {file_path}"
        if details:
            message += f"\n{details}"

        suggestion = f"\nPlease ensure the file is a valid {expected_format} format. "
        if expected_format == "PDB":
            suggestion += "PDB files should contain ATOM, HETATM, or HEADER records."
        elif expected_format == "CIF":
            suggestion += "CIF files should contain _atom_site categories, loop_, or data_ sections."

        super().__init__(f"{message}\n{suggestion}")

InvalidTomlError

Bases: StyleParsingError

Error for malformed TOML files.

Source code in src/flatprot/io/errors.py
198
199
200
201
class InvalidTomlError(StyleParsingError):
    """Error for malformed TOML files."""

    pass

MalformedAnnotationError

Bases: AnnotationError

Exception raised when an annotation file has an invalid format.

Source code in src/flatprot/io/errors.py
105
106
107
108
109
110
class MalformedAnnotationError(AnnotationError):
    """Exception raised when an annotation file has an invalid format."""

    def __init__(self, file_path: str, details: str):
        message = f"Malformed annotation file: {file_path}\n{details}"
        super().__init__(message)

MatrixError

Bases: IOError

Base class for matrix-related errors.

Source code in src/flatprot/io/errors.py
250
251
252
253
254
class MatrixError(IOError):
    """Base class for matrix-related errors."""

    def __init__(self, message: str):
        super().__init__(f"Matrix error: {message}")

MatrixFileError

Bases: MatrixError

Error raised when a matrix file can't be read.

Source code in src/flatprot/io/errors.py
277
278
279
280
class MatrixFileError(MatrixError):
    """Error raised when a matrix file can't be read."""

    pass

MatrixFileNotFoundError

Bases: MatrixError

Exception raised when a matrix file is not found.

Source code in src/flatprot/io/errors.py
257
258
259
260
261
262
class MatrixFileNotFoundError(MatrixError):
    """Exception raised when a matrix file is not found."""

    def __init__(self, file_path: str):
        message = f"Matrix file not found: {file_path}"
        super().__init__(message)

MissingRequiredFieldError

Bases: AnnotationError

Exception raised when a required field is missing from an annotation.

Source code in src/flatprot/io/errors.py
113
114
115
116
117
118
class MissingRequiredFieldError(AnnotationError):
    """Exception raised when a required field is missing from an annotation."""

    def __init__(self, annotation_type: str, field_name: str, annotation_index: int):
        message = f"Missing required field '{field_name}' for {annotation_type} annotation at index {annotation_index}"
        super().__init__(message)

OutputError

Bases: IOError

Base class for output-related errors.

Source code in src/flatprot/io/errors.py
301
302
303
304
305
class OutputError(IOError):
    """Base class for output-related errors."""

    def __init__(self, message: str):
        super().__init__(f"Output error: {message}")

OutputFileError

Bases: OutputError

Exception raised when there's an issue with an output file.

Source code in src/flatprot/io/errors.py
308
309
310
311
312
313
314
315
316
317
318
319
320
321
class OutputFileError(OutputError):
    """Exception raised when there's an issue with an output file."""

    def __init__(self, file_path: str, details: Optional[str] = None):
        self.file_path = file_path
        self.details = details

        message = f"Error writing to output file: {file_path}"
        if details:
            message += f"\n{details}"

        suggestion = "\nPlease check that you have write permissions to the directory and sufficient disk space."

        super().__init__(f"{message}{suggestion}")

StructureError

Bases: IOError

Base class for structure-related errors.

Source code in src/flatprot/io/errors.py
50
51
52
53
54
class StructureError(IOError):
    """Base class for structure-related errors."""

    def __init__(self, message: str):
        super().__init__(f"Structure error: {message}")

StructureFileNotFoundError

Bases: StructureError

Exception raised when a structure file is not found.

Source code in src/flatprot/io/errors.py
57
58
59
60
61
62
63
class StructureFileNotFoundError(StructureError):
    """Exception raised when a structure file is not found."""

    def __init__(self, file_path: str):
        message = f"Structure file not found: {file_path}"
        suggestion = "Please check that the file exists and the path is correct."
        super().__init__(f"{message}\n{suggestion}")

StyleError

Bases: IOError

Base class for style-related errors.

Source code in src/flatprot/io/errors.py
177
178
179
180
181
class StyleError(IOError):
    """Base class for style-related errors."""

    def __init__(self, message: str):
        super().__init__(f"Style error: {message}")

StyleFileError

Bases: StyleError

Exception raised when there's an issue with a style file.

Source code in src/flatprot/io/errors.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
class StyleFileError(StyleError):
    """Exception raised when there's an issue with a style file."""

    def __init__(self, file_path: str, details: Optional[str] = None):
        self.file_path = file_path
        self.details = details

        message = f"Invalid style file: {file_path}"
        if details:
            message += f"\n{details}"

        suggestion = "\nStyle files should be in TOML format with sections for different elements."
        suggestion += "\nExample style format:\n"
        suggestion += """
        [helix]
        fill_color = "#FF0000"
        stroke_color = "#800000"

        [sheet]
        fill_color = "#00FF00"
        stroke_color = "#008000"
        """

        super().__init__(f"{message}{suggestion}")

StyleFileNotFoundError

Bases: StyleError

Exception raised when a style file is not found.

Source code in src/flatprot/io/errors.py
184
185
186
187
188
189
class StyleFileNotFoundError(StyleError):
    """Exception raised when a style file is not found."""

    def __init__(self, file_path: str):
        message = f"Style file not found: {file_path}"
        super().__init__(message)

StyleParsingError

Bases: StyleError

Base error for style parsing issues.

Source code in src/flatprot/io/errors.py
192
193
194
195
class StyleParsingError(StyleError):
    """Base error for style parsing issues."""

    pass

StyleValidationError

Bases: StyleParsingError

Error for invalid style field types or values.

Source code in src/flatprot/io/errors.py
204
205
206
207
class StyleValidationError(StyleParsingError):
    """Error for invalid style field types or values."""

    pass

options: show_root_heading: true