google · shivamtiwari3 · Mar 11, 2026 · Mar 15, 2026 · gemini-code-assist · Mar 11, 2026
@@ -256,6 +256,30 @@ def get_model_content_types(self) -> List[ContentTypeLabel]:
         model_content_types.update(self._model_config.target_labels_space)
         return sorted(model_content_types)
 
+    def get_content_type_info(self, label: ContentTypeLabel) -> ContentTypeInfo:
+        """Returns metadata for a given content type label.
+
+        This provides access to the content type knowledge base, exposing
+        structured metadata such as mime type, group, human-readable
+        description, common file extensions, and whether the type is
+        text-based.
+
+        Args:
+            label: The ContentTypeLabel to look up.
+
+        Returns:
+            The ContentTypeInfo for the given label.
+
+        Raises:
+            MagikaError: If the label is not present in the content type
+                knowledge base (e.g., ContentTypeLabel.UNDEFINED).
+        """
+        if label not in self._cts_infos:
+            raise MagikaError(
+                f"Content type '{label}' is not in the content type knowledge base."
+            )
+        return self._cts_infos[label]
+
     @staticmethod
     def _get_default_model_name() -> str:
         """Returns the default model name.

@@ -761,6 +761,37 @@ def test_get_model_and_output_content_types() -> None:
     }.issubset(model_content_types_set)
 
 
+def test_get_content_type_info() -> None:
+    """Test Magika.get_content_type_info() — fixes #826.
+
+    Verifies that the public API correctly exposes ContentTypeInfo metadata
+    (mime_type, group, description, extensions, is_text) for every
+    ContentTypeLabel that is part of the content type knowledge base.
+    """
+    m = Magika()
+
+    # Every output content type must be queryable and return valid metadata.
+    for label in m.get_output_content_types():
+        info = m.get_content_type_info(label)
+        assert isinstance(info, ContentTypeInfo)
+        assert info.label == label
+        assert isinstance(info.mime_type, str) and info.mime_type != ""
+        assert isinstance(info.group, str) and info.group != ""
+        assert isinstance(info.description, str) and info.description != ""
+        assert isinstance(info.extensions, list)
+        assert isinstance(info.is_text, bool)
+
+    # Spot-check a well-known content type's metadata.
+    pdf_info = m.get_content_type_info(ContentTypeLabel.PDF)
+    assert pdf_info.mime_type == "application/pdf"
+    assert pdf_info.group == "document"
+    assert pdf_info.is_text is False
+
+    # Text-based type should report is_text=True.
+    py_info = m.get_content_type_info(ContentTypeLabel.PYTHON)
+    assert py_info.is_text is True
+
+
 def test_magika_imports():
     imported_modules = utils.get_imported_objects_after_wildcard()