diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..3e9e779 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Ignore HTML files +*.html linguist-vendored diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4268c05..59576be 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.10", "3.12"] + python-version: ["3.10", "3.12", "3.14"] steps: - uses: actions/checkout@v4 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..db39684 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,49 @@ +# Changelog + +## 1.5.1 (2025-11-01) + +### New Features +- Add `+` operator to combine two ChemFormula instances by summing element counts and charges (see [example6.py](https://github.com/molshape/ChemFormula/tree/main/examples/example6.py)) +- Add `-` operator to subtract one ChemFormula instance from another one by subtracting element counts and charges (see [example6.py](https://github.com/molshape/ChemFormula/tree/main/examples/example6.py)) +- Add `*` operator to multiply element counts and charge with a positive integer from left and right + +### Example +```python +from chemformula import ChemFormula + +water = ChemFormula("H2O") +proton = ChemFormula("H", 1) + +oxonium = water + proton # => ChemFormula("H3O", 1) +hydroxonium = oxonium + 3 * water # => ChemFormula("H9O4", 1) + +print(oxonium.hill_formula.unicode) # => H₃O⁺ +print(oxonium - proton == water) # True +print(hydroxonium.hill_formula.unicode) # => H₉O₄⁺ +``` + +--- + +## 1.5.0 (2025-09-14) + +### New Feature +- Add support for hydrogen isotopes (deuterium "D" and tritium "T") via a global `AllowHydrogenIsotopes` flag in `chemformula.config` (see [example5.py](https://github.com/molshape/ChemFormula/tree/main/examples/example5.py)) +- Implement `.contains_isotopes` attribute to the `ChemFormula` class for detecting specific isotopes in formulas + +### Deprecated Feature +- Replace `.radioactive` property with `.is_radioactive` +- `.radioactive` can still be used, but is flagged as deprecated and emits a `DeprecationWarning` + +### Example +```python +import chemformula.config +from chemformula import ChemFormula + +chemformula.config.AllowHydrogenIsotopes = True + +heavy_water = ChemFormula("D2O") +print(f"{heavy_water.formula_weight:.2f} g/mol") # => 20.03 g/mol + +super_heavy_water = ChemFormula("T2O") +print(super_heavy_water.is_radioactive) # => True +``` diff --git a/README.md b/README.md index 8370cfc..04104f7 100644 --- a/README.md +++ b/README.md @@ -7,49 +7,43 @@ ![License](https://img.shields.io/github/license/molshape/ChemFormula) \ ![GitHub stars](https://img.shields.io/github/stars/molshape/ChemFormula) +--- -
-Table of Contents +## Table of Contents 1. [Description](#description) 2. [How to install and uninstall?](#how-to-install-and-uninstall) 3. [Dependencies](#dependencies) 4. [How to use?](#how-to-use) -5. [Examples](#examples) -6. [Comparing and Sorting](#comparing-and-sorting-of-chemical-formulas) -7. [Atomic Weight Data](#atomic-weight-data) - -
+5. [Examples and Formula Formatting](#examples-and-formula-formatting) +6. [Formula Arithmetics (Addition, Subtraction, Multiplication)](#arithmetics-with-chemical-formulas-addition-subtraction-multiplication) +7. [Comparing and Sorting](#comparing-and-sorting-of-chemical-formulas) +8. [Atomic Weight Data](#atomic-weight-data) + +--- ## Description -**ChemFormula** is a Python class for working with chemical formulas. It allows parsing chemical formulas and generating predefined (LaTeX, HTML) or customized formatted output strings, e. g. [Cu(NH3)4]SO4⋅H2O. **ChemFormula** is also calculating the formula weight and thus enabling stoichiometric calculations with chemical formula objects. Atomic weights are based on IUPAC recommendations (see [Atomic Weight Data](#atomic-weight-data)). +**ChemFormula** is a Python library for working with chemical formulas. It allows parsing chemical formulas and generating predefined (LaTeX, HTML) or customized formatted output strings, e.g. [Cu(NH3)4]SO4⋅H2O. **ChemFormula** also calculates formula weights and weight distributions and enables stoichiometric calculations with chemical formula objects. Arithmetic operations (`+`, `-`, `*`) between formula objects are supported for combining and modifying chemical compositions. Atomic weights are based on IUPAC recommendations (see [Atomic Weight Data](#atomic-weight-data)). +--- ## How to install and uninstall -**ChemFormula** can be installed from the [Python Package Index (PyPI)](https://pypi.org/) repository by calling - - pip install chemformula - -or - - uv add chemformula +**ChemFormula** can be installed from the [Python Package Index (PyPI)](https://pypi.org/) repository by calling `pip install chemformula` or `uv add chemformula`. -In order to uninstall **ChemFormula** from your local environment use +In order to uninstall **ChemFormula** from your local environment use `pip uninstall chemformula` or `uv remove chemformula`. - pip uninstall chemformula - -or - - uv remove chemformula +--- ## Dependencies -**ChemFormula** uses the [casregnum package](https://pypi.org/project/casregnum/) to manage CAS Registry Numbers®. The corresponding properties of the `CAS` class are therefore inherited to the ```ChemFormula``` class. +**ChemFormula** uses the [casregnum package](https://pypi.org/project/casregnum/) to manage CAS Registry Numbers®. The corresponding properties of the `CAS` class are therefore inherited to the `ChemFormula` class. + +--- ## How to use **ChemFormula** provides the `ChemFormula` class for creating a chemical formula object: -```Python +```python from chemformula import ChemFormula chemical_formula = ChemFormula(formula, @@ -60,7 +54,7 @@ chemical_formula = ChemFormula(formula, *Examples:* -```Python +```python ethylcinnamate = ChemFormula("(C6H5)CHCHCOOC2H5") tetraamminecoppersulfate = ChemFormula("[Cu(NH3)4]SO4.H2O") uranophane = ChemFormula("Ca(UO2)2(SiO3OH)2.(H2O)5") @@ -74,7 +68,7 @@ theine = ChemFormula("(C5N4H)O2(CH3)3", name = "theine", cas = "58-08-2") The `ChemFormula` class offers the following attributes/functions -```Python +```python .formula # original chemical formula used to create the chemical formula object .text_formula # formula including charge as text output @@ -133,11 +127,12 @@ The `ChemFormula` class offers the following attributes/functions .cas.check_digit # CAS number check digit, inherited property from casregnum.CAS ``` +--- -## Examples +## Examples and Formula Formatting The following python sample script -```Python +```python from chemformula import ChemFormula tetraamminecoppersulfate = ChemFormula("[Cu(NH3)4]SO4.H2O") @@ -170,8 +165,8 @@ print(f" {mole:.1f} mol of {ethylcinnamate.name} weighs {mole * ethylcinnamate.f mass = 24 print(f" {mass:.1f} g of {ethylcinnamate.name} corresponds to {mass/ethylcinnamate.formula_weight * 1000:.1f} mmol.") print(f" The elemental composition of {ethylcinnamate.name} is as follows:") -for stringElementSymbol, floatElementFraction in ethylcinnamate.mass_fraction.items(): - print(f" {stringElementSymbol:<2}: {floatElementFraction * 100:>5.2f} %") +for stringElementSymbol, floatElementFraction in ethylcinnamate.mass_fractions.items(): + print(f" {stringElementSymbol:<2}: {floatElementFraction * 100:>5.2f} %") print(f"\n--- {uranophane.name} and {muscarine.name} ---") print(f" Yes, {uranophane.name} is radioactive.") if uranophane.is_radioactive else print(f" No, {uranophane.name} is not radioactive.") @@ -205,7 +200,7 @@ generates the following output --- Formula Weights Calculations with Ethyl Cinnamate --- The formula weight of ethyl cinnamate (C₁₁H₁₂O₂) is 176.21 g/mol. - 1.4 mol of ethyl cinnamate weight 246.7 g. + 1.4 mol of ethyl cinnamate weighs 246.7 g. 24.0 g of ethyl cinnamate corresponds to 136.2 mmol. The elemental composition of ethyl cinnamate is as follows: C : 74.98 % @@ -224,8 +219,36 @@ generates the following output --- CAS Registry Number --- Caffeine has the CAS RN 58-08-2 (or as an integer: 58082). -More examples can be found at [/examples/](https://github.com/molshape/ChemFormula/blob/main/examples/). +More examples can be found in the folder [examples/](https://github.com/molshape/ChemFormula/tree/main/examples/). +--- + +## Arithmetics with Chemical Formulas (Addition, Subtraction, Multiplication) + +`ChemFormula` instances can be added and subtracted with each other and can be multiplied with a positive integer factor to create a new `ChemFormula` instance by summing, subtracting or multiplying element counts and charges: + +```python +ATP = ChemFormula("C10H12N5O13P3", -4) # Adenosine triphosphate +water = ChemFormula("H2O") +dihydrogen_phosphate = ChemFormula("H2PO4", -1) + +AMP = ATP + 2 * water - 2 * dihydrogen_phosphate # Adenosine monophosphate + +print("\n--- Arithmetics with ChemFormula Objects ---") +print(f" ATP ({ATP.hill_formula.unicode}) hydrolyzes with two water molecules" + f" to AMP ({AMP.hill_formula.unicode}) and two inorganic phosphates ({dihydrogen_phosphate.unicode})\n" + f" releasing energy for cellular processes.\n") +``` + +creates the following output: + + --- Arithmetics with ChemFormula Objects --- + ATP (C₁₀H₁₂N₅O₁₃P₃⁴⁻) hydrolyzes with two water molecules to AMP (C₁₀H₁₂N₅O₇P²⁻) and two inorganic phosphates (H₂PO₄⁻) + releasing energy for cellular processes. + +[example6.py](https://github.com/molshape/ChemFormula/tree/main/examples/example6.py) shows more examples for formula arithmetics. + +--- ## Comparing and Sorting of Chemical Formulas @@ -271,6 +294,9 @@ generates the following output 6. C₆H₁₂O₆ 7. C₆H₁₂S₆ +[example4.py](https://github.com/molshape/ChemFormula/tree/main/examples/example4.py) provides detailed examples for sorting and comparing `ChemFormula` instances. + +--- ## Using Isotopes like Deuterium or Tritium @@ -304,7 +330,7 @@ creates the following output: No, H₂O contains no specific isotopes. Yes, D₂O contains specific isotopes. - +--- ## Atomic Weight Data @@ -326,4 +352,6 @@ Quoted atomic weights are those suggested for materials where the origin of the Data for hydrogen isotopes are taken from the **AME2020 Atomic Mass Evaluation** by Meng Wang *et al.*: - - [Chinese Phys. C, 2021, (45), 030003](https://doi.org/10.1088/1674-1137/abddaf) + - [*Chinese Phys. C*, **2021**, *45*(3), 030003](https://doi.org/10.1088/1674-1137/abddaf) + +--- \ No newline at end of file diff --git a/examples/example1.py b/examples/example1.py index 755b42c..06e9062 100644 --- a/examples/example1.py +++ b/examples/example1.py @@ -26,7 +26,7 @@ print(f" {stringElementSymbol:<2}: {floatElementFraction * 100:>5.2f} %") print("\n--- Is Uranophane Radioactive and Charged? ---") -print(f" Yes, {uranophane.name} is radioactive.") if uranophane.is_radioactive else print(f" No, {uranophane.name} is not radioactive.") # noqa: E501 +print(f" Yes, {uranophane.name} is radioactive.") if uranophane.is_radioactive else print(f" No, {uranophane.name} is not radioactive.") print(f" Yes, {uranophane.name} is charged.") if uranophane.charged else print(f" No, {uranophane.name} is not charged.") print("\n--- Accessing Single Elements through FormulaObject.Element['Element_Symbol'] ---") @@ -39,7 +39,7 @@ # Original: [Cu(NH3)4]SO4.H2O # Charged: False # Charge (int): 0 -# LaTeX: \[\textnormal{Cu}\(\textnormal{N}\textnormal{H}_{3}\)_{4}\]\textnormal{S}\textnormal{O}_{4}\cdot\textnormal{H}_{2}\textnormal{O} # noqa: E501 +# LaTeX: \[\textnormal{Cu}\(\textnormal{N}\textnormal{H}_{3}\)_{4}\]\textnormal{S}\textnormal{O}_{4}\cdot\textnormal{H}_{2}\textnormal{O} # HTML: [Cu(NH3)4]SO4⋅H2O # Custom format: --> [Cu(NH_<3>)_<4>]SO_<4> * H_<2>O <-- # Sum formula: CuN4H14SO5 diff --git a/examples/example2.py b/examples/example2.py index 36a6c0a..8a61333 100644 --- a/examples/example2.py +++ b/examples/example2.py @@ -12,7 +12,7 @@ print(f" LaTeX: {muscarine.latex}") print(f" HTML: {muscarine.html}") print(f" Unicode: {muscarine.unicode}") -print(f" Custom format: {muscarine.format_formula('--> ', '', '', '_<', '>', ' <--', '', '', ' * ', '^^', '^^', '(+)', '(-)')}") # noqa: E501 +print(f" Custom format: {muscarine.format_formula('--> ', '', '', '_<', '>', ' <--', '', '', ' * ', '^^', '^^', '(+)', '(-)')}") print(f" Sum formula: {muscarine.sum_formula}") print(f" Hill formula: {muscarine.hill_formula}") @@ -25,7 +25,7 @@ print(f" LaTeX: {pyrophosphate.latex}") print(f" HTML: {pyrophosphate.html}") print(f" Unicode: {pyrophosphate.unicode}") -print(f" Custom format: {pyrophosphate.format_formula('--> ', '', '', '_<', '>', ' <--', '', '', ' * ', '^^', '^^', '(+)', '(-)')}") # noqa: E501 +print(f" Custom format: {pyrophosphate.format_formula('--> ', '', '', '_<', '>', ' <--', '', '', ' * ', '^^', '^^', '(+)', '(-)')}") print(f" Sum formula: {pyrophosphate.sum_formula}") print(f" Hill formula: {pyrophosphate.hill_formula}") @@ -40,7 +40,7 @@ print(f" {stringElementSymbol:<2}: {floatElementFraction * 100:>5.2f} %") print("\n--- Is L-(+)-Muscarine Radioactive and Charged? ---") -print(f" Yes, {muscarine.name} is radioactive.") if muscarine.is_radioactive else print(f" No, {muscarine.name} is not radioactive.") # noqa: E501 +print(f" Yes, {muscarine.name} is radioactive.") if muscarine.is_radioactive else print(f" No, {muscarine.name} is not radioactive.") print(f" Yes, {muscarine.name} is charged.") if muscarine.charged else print(f" No, {muscarine.name} is not charged.") print("\n--- Accessing Single Elements through FormulaObject.Element['Element_Symbol'] ---") @@ -54,8 +54,8 @@ # Charged: True # Charge (int): 1 # Charge (str): + -# LaTeX: \(\(\textnormal{C}\textnormal{H}_{3}\)_{3}\textnormal{N}\)\(\textnormal{C}_{6}\textnormal{H}_{11}\textnormal{O}_{2}\)^{+} # noqa: E501 -# HTML: ((CH3)3N)(C6H11O2)+ # noqa: E501 +# LaTeX: \(\(\textnormal{C}\textnormal{H}_{3}\)_{3}\textnormal{N}\)\(\textnormal{C}_{6}\textnormal{H}_{11}\textnormal{O}_{2}\)^{+} +# HTML: ((CH3)3N)(C6H11O2)+ # Unicode: ((CH₃)₃N)(C₆H₁₁O₂)⁺ # Custom format: --> ((CH_<3>)_<3>N)(C_<6>H_<11>O_<2>)^^+^^ <-- # Sum formula: C9H20NO2 diff --git a/examples/example5.py b/examples/example5.py index 2a927e8..598a66a 100644 --- a/examples/example5.py +++ b/examples/example5.py @@ -7,8 +7,8 @@ heavy_water = ChemFormula("D2O") print("\n--- Isotopes in ChemFormula Objects ---") -print(f" Yes, {water.unicode} contains specific isotopes.") if water.contains_isotopes else print(f" No, {water.unicode} contains no specific isotopes.") # noqa: E501 -print(f" Yes, {heavy_water.unicode} contains specific isotopes.\n") if heavy_water.contains_isotopes else print(f" No, {heavy_water.unicode} contains no specific isotopes.\n") # noqa: E501 +print(f" Yes, {water.unicode} contains specific isotopes.") if water.contains_isotopes else print(f" No, {water.unicode} contains no specific isotopes.") +print(f" Yes, {heavy_water.unicode} contains specific isotopes.\n") if heavy_water.contains_isotopes else print(f" No, {heavy_water.unicode} contains no specific isotopes.\n") # OUTPUT: # diff --git a/examples/example6.py b/examples/example6.py new file mode 100644 index 0000000..4b43fc1 --- /dev/null +++ b/examples/example6.py @@ -0,0 +1,69 @@ +from chemformula import ChemFormula + +conjugated_diene = ChemFormula("C4H6") +dienophile = ChemFormula("C2H4") + +diels_alder_adduct = conjugated_diene + dienophile + +print("\n--- Adding ChemFormula Objects ---") +print(f" Butadiene {conjugated_diene.unicode} and ethylene {dienophile.unicode}" + f" undergo a Diels-Alder reaction to form {diels_alder_adduct.unicode}.") +print(f" Molecular weight: {diels_alder_adduct.formula_weight:.2f} g/mol.") + +# OUTPUT: +# +# --- Adding ChemFormula Objects --- +# Butadiene C₄H₆ and ethylene C₂H₄ undergo a Diels-Alder reaction to form C₆H₁₀. +# Molecular weight: 82.15 g/mol. +# + + +dichloroethane = ChemFormula("ClH2CCH2Cl") +hydrogen_chloride = ChemFormula("HCl") +vinyl_chloride = dichloroethane - hydrogen_chloride + +print("\n--- Subtracting ChemFormula Objects ---") +print(f" Vinyl chloride {vinyl_chloride.hill_formula.unicode} is synthesized from dichloroethane" + f" {dichloroethane.hill_formula.unicode} by elimination of hydrogen chloride" + f" {hydrogen_chloride.hill_formula.unicode}.") +print(f" Molecular weight: {vinyl_chloride.formula_weight:.2f} g/mol.") + +# OUTPUT: +# +# --- Subtracting ChemFormula Objects --- +# Vinyl chloride C₂H₃Cl is synthesized from dichloroethane C₂H₄Cl₂ by elimination of hydrogen chloride ClH. +# Molecular weight: 62.50 g/mol. +# + + +borane = ChemFormula("BH3") +diborane = 2 * borane +print("\n--- Multiplying ChemFormula Objects ---") +print(f" Diborane {diborane.hill_formula.unicode} is formed by the dimerization of two borane" + f" {borane.hill_formula.unicode} molecules.") +print(f" Molecular weight of diborane: {diborane.formula_weight:.2f} g/mol.") + +# OUTPUT: +# +# --- Multiplying ChemFormula Objects --- +# Diborane B₂H₆ is formed by the dimerization of two borane BH₃ molecules. +# Molecular weight of diborane: 27.67 g/mol. +# + + +ATP = ChemFormula("C10H12N5O13P3", -4) +water = ChemFormula("H2O") +dihydrogen_phosphate = ChemFormula("H2PO4", -1) + +AMP = ATP + 2 * water - 2 * dihydrogen_phosphate + +print("\n--- Arithmetics with ChemFormula Objects ---") +print(f" ATP ({ATP.hill_formula.unicode}) hydrolyzes with two water molecules" + f" to AMP ({AMP.hill_formula.unicode}) and two inorganic phosphates ({dihydrogen_phosphate.unicode})" + f" releasing energy for cellular processes.\n") + +# OUTPUT: +# +# --- Arithmetics with ChemFormula Objects --- +# ATP (C₁₀H₁₂N₅O₁₃P₃⁴⁻) hydrolyzes to AMP (C₁₀H₁₂N₅O₇P²⁻) and two inorganic phosphates (H₂PO₄⁻) releasing energy for cellular processes. +# diff --git a/examples/example_general.py b/examples/example_general.py index dff8098..30f01be 100644 --- a/examples/example_general.py +++ b/examples/example_general.py @@ -24,7 +24,7 @@ print(f" Hill formula: {muscarine.hill_formula.latex}") print(f"\n--- Formula Weights Calculations with {ethylcinnamate.name.title()} ---") -print(f" The formula weight of {ethylcinnamate.name} ({ethylcinnamate.sum_formula.unicode}) is {ethylcinnamate.formula_weight:.2f} g/mol.") # noqa: E501 +print(f" The formula weight of {ethylcinnamate.name} ({ethylcinnamate.sum_formula.unicode}) is {ethylcinnamate.formula_weight:.2f} g/mol.") mole = 1.4 print(f" {mole:.1f} mol of {ethylcinnamate.name} weight {mole * ethylcinnamate.formula_weight:.1f} g.") mass = 24 @@ -34,9 +34,9 @@ print(f" {stringElementSymbol:<2}: {floatElementFraction * 100:>5.2f} %") print(f"\n--- {uranophane.name} and {muscarine.name} ---") -print(f" Yes, {uranophane.name} is radioactive.") if uranophane.is_radioactive else print(f" No, {uranophane.name} is not radioactive.") # noqa: E501 +print(f" Yes, {uranophane.name} is radioactive.") if uranophane.is_radioactive else print(f" No, {uranophane.name} is not radioactive.") print(f" Yes, {uranophane.name} is charged.") if uranophane.charged else print(f" No, {uranophane.name} is not charged.") -print(f" Yes, {muscarine.name} is radioactive.") if muscarine.is_radioactive else print(f" No, {muscarine.name} is not radioactive.") # noqa: E501 +print(f" Yes, {muscarine.name} is radioactive.") if muscarine.is_radioactive else print(f" No, {muscarine.name} is not radioactive.") print(f" Yes, {muscarine.name} is charged.") if muscarine.charged else print(f" No, {muscarine.name} is not charged.") print('\n--- Accessing Single Elements through FormulaObject.Element["Element_Symbol"] ---') @@ -51,8 +51,8 @@ # Print instance: ((CH3)3N)(C6H11O2) # Original: ((CH3)3N)(C6H11O2) # Text formula: ((CH3)3N)(C6H11O2) + -# HTML: ((CH3)3N)(C6H11O2)+ # noqa: E501 -# LaTeX: \(\(\textnormal{C}\textnormal{H}_{3}\)_{3}\textnormal{N}\)\(\textnormal{C}_{6}\textnormal{H}_{11}\textnormal{O}_{2}\)^{+} # noqa: E501 +# HTML: ((CH3)3N)(C6H11O2)+ +# LaTeX: \(\(\textnormal{C}\textnormal{H}_{3}\)_{3}\textnormal{N}\)\(\textnormal{C}_{6}\textnormal{H}_{11}\textnormal{O}_{2}\)^{+} # Unicode: ((CH₃)₃N)(C₆H₁₁O₂)⁺ # Charge (int): 1 # Charge (str): + diff --git a/pyproject.toml b/pyproject.toml index f7bb861..fe7e15f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ [project] name = "chemformula" -version = "1.5.0" -description = "ChemFormula is a Python class for working with chemical formulas. It allows parsing chemical formulas, generating formatted output strings and calculating formula weights." -readme = "README.md" +version = "1.5.1" +description = "ChemFormula is a Python library for working with chemical formulas. It supports parsing formulas, generating formatted output strings, calculating molecular weights and weight distributions, and performing arithmetic operations on ChemFormula objects." +readme = { file = "README.md", content-type = "text/markdown" } authors = [ {name = "Axel Müller", email = "molshape@gmx.net"}, ] @@ -17,12 +17,14 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", "License :: OSI Approved :: MIT License", "Topic :: Scientific/Engineering :: Chemistry", ] -license = "MIT" +license = { text = "MIT" } license-files = ["LICENSE"] [project.urls] @@ -34,8 +36,8 @@ Repository = "https://github.com/molshape/ChemFormula.git" requires = ["hatchling"] build-backend = "hatchling.build" -[tool.uv] -dev-dependencies = [ +[dependency-groups] +dev = [ "mypy>=1.18.1", "pytest>=8.4.1", "pytest-cov>=7.0.0", @@ -51,7 +53,6 @@ python_classes = ["Test*"] python_functions = ["test_*"] addopts = [ "--strict-markers", - "--strict-config", "--verbose", "--cov=src", "--cov-branch", @@ -61,7 +62,7 @@ addopts = [ [tool.ruff] line-length = 88 -target-version = "py39" +target-version = "py310" [tool.ruff.lint] select = [ @@ -93,5 +94,5 @@ exclude_lines = [ "raise AssertionError", "raise NotImplementedError", "if 0:", - "if __name__ == .__main__.:", + "if __name__ == '__main__':", ] diff --git a/src/chemformula/chemformula.py b/src/chemformula/chemformula.py index 8cb467f..5818c7a 100644 --- a/src/chemformula/chemformula.py +++ b/src/chemformula/chemformula.py @@ -1,517 +1,632 @@ -""" -ChemFormula: A Python class for working with chemical formulas. It allows parsing chemical formulas, generating formatted output strings and calculating formula weights. -""" - -from __future__ import annotations - -import re -import warnings -from collections import defaultdict - -import casregnum - -from . import elements - - -# Class for chemical formula strings -class ChemFormulaString: - """ - ChemFormulaString class for chemical formula strings with charge information - - Attributes: - ----------- - formula : str - Chemical (input) formula as a string - charge : int - Charge of the chemical formula - charged : bool - Boolean property whether the formula object is charged (True) or not (False) - text_charge : str - Text representation of the charge as a text string - text_formula : str - Text representation of the chemical formula including charge information - latex : str - LaTeX representation of the chemical formula including charge information - html : str - HTML representation of the chemical formula including charge information - unicode : str - Unicode representation of the chemical formula including charge information - - Methods: - -------- - format_formula() : str - Formats formula (ChemFormulaString object) as a customized strings - """ - - def __init__(self, formula: str, charge: int = 0) -> None: - self.formula = formula - self.charge = charge - - # formula as standard string output - def __str__(self) -> str: - return self.formula - - # formula as detailed string output - def __repr__(self) -> str: - return f"{self.__class__.__name__}(formula='{self.formula}', charge={self.charge})" - - # Returns original input formula - @property - def formula(self) -> str: - """Returns the original input formula as a string.""" - return self._formula - - @formula.setter - def formula(self, input_formula: str) -> None: - self._formula = str(input_formula) - - # Returns the charge of the formula object - @property - def charge(self) -> int: - """Returns the charge of the formula object as an integer.""" - return self._charge - - # Checks, whether the charge is valid - @charge.setter - def charge(self, charge: int) -> None: - if isinstance(charge, int): - self._charge = charge - else: - raise TypeError( - f"Invalid Charge Value '{charge}' (expected an integer (), but found {type(charge)})" - ) - - # Boolean property whether the formula object is charged (True) or not (False) - @property - def charged(self) -> bool: - """Returns whether the formula object is charged (True) or not (False)""" - return False if self.charge == 0 else True - - # Returns the charge of the formula object as a text string - @property - def text_charge(self) -> str: - """Returns the charge of the formula object as a text string, without the number "1" for charges of ±1.""" - # a charge of "1+" or "1-" is printed without the number "1" - charge_output = "" - if self.charge == 0: - return charge_output - if not(abs(self.charge) == 1): - charge_output = str(abs(self.charge)) - charge_output += "+" if self.charge > 0 else "-" - return charge_output - - # Returns formula and charge as a text string - @property - def text_formula(self) -> str: - """Returns the chemical formula including charge information as a text string.""" - if self.charged: - return f"{self.formula} {self.text_charge}" - return self.formula - - # Formats formula (ChemFormulaString object) as a customized strings - def format_formula(self, - formula_prefix: str = "", - element_prefix: str = "", element_suffix: str = "", - freq_prefix: str = "", freq_suffix: str = "", - formula_suffix: str = "", - bracket_prefix: str = "", bracket_suffix: str = "", - multiply_symbol: str = "", - charge_prefix: str = "", charge_suffix: str = "", - charge_positive: str = "+", charge_negative: str = "-" - ) -> str: - """Formats the chemical formula (ChemFormulaString object) as a customized string with user-defined prefixes and suffixes: - Parameters: - - formula_prefix (str): Prefix for the entire formula - - element_prefix (str): Prefix for each element - - element_suffix (str): Suffix for each element - - freq_prefix (str): Prefix for each frequency - - freq_suffix (str): Suffix for each frequency - - formula_suffix (str): Suffix for the entire formula - - bracket_prefix (str): Prefix for each bracket - - bracket_suffix (str): Suffix for each bracket - - multiply_symbol (str): Symbol for multiplication - - charge_prefix (str): Prefix for the charge - - charge_suffix (str): Suffix for the charge - - charge_positive (str): Symbol for positive charge - - charge_negative (str): Symbol for negative charge - """ - formatted_formula = re.sub(r"([\{\[\(\)\]\}]){1}", bracket_prefix + r"\g<1>" + bracket_suffix, self.formula) - formatted_formula = re.sub(r"([A-Z]{1}[a-z]{0,1})", element_prefix + r"\g<1>" + element_suffix, formatted_formula) - formatted_formula = re.sub(r"(\d+)", freq_prefix + r"\g<1>" + freq_suffix, formatted_formula) - formatted_formula = re.sub(r"[\.\*]", multiply_symbol, formatted_formula) - # create charge string, by replacing + and - with the respective charge symbols - charge = self.text_charge - charge.replace("+", charge_positive) - charge.replace("-", charge_negative) - if self.charged: - return formula_prefix + formatted_formula + charge_prefix + charge + charge_suffix + formula_suffix - else: - return formula_prefix + formatted_formula + formula_suffix - - # Returns a LaTeX representation of a formula (ChemFormulaString object) - @property - def latex(self) -> str: - """Returns a LaTeX representation of the chemical formula (including charge information) as a string.""" - return self.format_formula("", - r"\\textnormal{", "}", - "_{", "}", - "", - r"\\", - multiply_symbol=r"\\cdot", - charge_prefix="^{", charge_suffix="}" - ) - - # Returns an HTML representation of a formula (ChemFormulaString object) - @property - def html(self) -> str: - """Returns an HTML representation of the chemical formula (including charge information) as a string. - Specifies the class 'ChemFormula' for custom CSS.""" - return self.format_formula("", - "", "", - "", "", - "", - multiply_symbol="⋅", - charge_prefix="", charge_suffix="", - charge_negative="–" - ) - - # Returns formula with unicode sub- and superscripts (₀₁₂₃₄₅₆₇₈₉⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻) - @property - def unicode(self) -> str: - """Returns a Unicode representation of the chemical formula (including charge information) as a string.""" - subscript_num = "₀₁₂₃₄₅₆₇₈₉" - superscript_num = "⁰¹²³⁴⁵⁶⁷⁸⁹" - unicode_formula = self.formula # start with original formula - unicode_charge = self.text_charge # start with original text_charge - # replace all numbers (0 - 9) by subscript numbers (for elemental frequencies) - # and superscript numbers (for charge information) - for number in range(0, 10): - unicode_formula = unicode_formula.replace(str(number), subscript_num[number]) - unicode_charge = unicode_charge.replace(str(number), superscript_num[number]) - unicode_charge = unicode_charge.replace("+", "⁺") - unicode_charge = unicode_charge.replace("-", "⁻") - return unicode_formula + unicode_charge - - -# Class for chemical formula dictionaries -class ChemFormulaDict(defaultdict): - """ - ChemFormulaDict class for chemical formula dictionaries with element symbols as keys and element frequencies as values - """ - def __init__(self, *args, **kwargs) -> None: - super().__init__(int, *args, **kwargs) # default value for non-existing elements is 0 - - def __setitem__(self, key_element: str, value_frequency: int | float) -> None: - if key_element not in elements.get_valid_element_symbols(): - raise ValueError( - f"Invalid Element Symbol (unknown element symbol '{key_element}')" - ) - super().__setitem__(key_element, value_frequency) - - -# Class for chemical formula objects -class ChemFormula(ChemFormulaString): - """ - ChemFormula class for chemical formula objects with formula, different representations (LaTeX, HTML, Unicode), - formula_weight, charge, name, information on radioactivity and specific isotopes, as well as CAS registry number - information, if provided - - Attributes (inherited from ChemFormulaString): - ---------------------------------------------- - formula : str - Chemical (input) formula as a string - charge : int - Charge of the chemical formula - charged : bool - Boolean property whether the formula object is charged (True) or not (False) - text_charge : str - Text representation of the charge as a text string - text_formula : str - Text representation of the chemical formula including charge information - latex : str - LaTeX representation of the chemical formula including charge information - html : str - HTML representation of the chemical formula including charge information - unicode : str - Unicode representation of the chemical formula including charge information - - Additional Attributes: - ---------------------- - name : str or None - Name of the chemical formula (if provided) - cas : casregnum.CAS or None - CAS registry number as a casregnum.CAS object of the chemical formula (if provided) - element : ChemFormulaDict - Chemical formula as a ChemFormulaDict object with (key : value) = (element symbol : element frequency) - sum_formula : ChemFormulaDict - Chemical formula in Hill notation as a ChemFormulaDict object with (key : value) = (element symbol : element frequency) - hill_formula : ChemFormulaDict - Chemical formula in Hill notation as a ChemFormulaDict object with (key : value) = (element symbol : element frequency) - formula_weight : float - Formula weight of the chemical formula in g/mol - mass_fraction : ChemFormulaDict - Mass fraction of each element as a ChemFormulaDict object with (key : value) = (element symbol : mass fraction) - contains_isotopes : bool - Boolean property whether the formula contains an element symbol that is refering to a specific isotope (e.g. D or Tc) - is_radioactive : bool - Boolean property whether the formula contains at least one radioactive element (True) or not (False) - - Methods (inherited from ChemFormulaString): - ------------------------------------------- - format_formula() : str - Formats formula (ChemFormulaString object) as a customized strings - - Additional Methods: - ------------------- - __eq__() : bool - Tests if two chemical formula objects are identical - __lt__() : bool - Compares two formulas with respect to their lexical sorting according to Hill's notation - """ - - def __init__(self, formula: str, charge: int = 0, name: str | None = None, cas: str | int | None = None) -> None: - # Parent information - ChemFormulaString.__init__(self, formula, charge) - # Additional input information - self.name = name - self.cas = cas - # parse chemical formula and test for consistency - self._clean_formula = self._clean_up_formula() - self._check_formula(self._clean_formula) - self._resolved_formula = self._resolve_brackets(self._clean_formula) - _ = self.mass_fraction # trigger mass_fraction parsing to check for valid element symbols and atomic weights - - # Test if two chemical formla objects are identical - def __eq__(self, other: object) -> bool: - """Tests if two chemical formula objects are identical.""" - # two chemical formula objects are considered to be equal if they have - # the same chemical composition (in Hill notation), the same charge, - # and the same CAS registry number (if provided) - if not isinstance(other, ChemFormula): - raise TypeError("Comparisons can only be made between ChemFormula objects.") - return (str(self.hill_formula) == str(other.hill_formula) and self.charge == other.charge and self.cas == other.cas) - - # Compares two formulas with respect to their lexical sorting according to Hill's notation - def __lt__(self, other: object) -> bool: - """Compares two chemical formula objects with respect to their lexical sorting according to Hill's notation.""" - if not isinstance(other, ChemFormula): - raise TypeError("Comparisons can only be made between ChemFormula objects.") - elements_self = tuple(self._element_hill_sorted.items()) - elements_other = tuple(other._element_hill_sorted.items()) - # cycle through the elements in Hill notation - for i in range(0, min(len(elements_self), len(elements_other))): - # first check for the alphabetical sorting of the element symbol - if elements_self[i][0].lower() < elements_other[i][0].lower(): - return True - if elements_self[i][0].lower() > elements_other[i][0].lower(): - return False - # if the element symbol is identical, check the frequency of that element - if elements_self[i][0] == elements_other[i][0] and elements_self[i][1] < elements_other[i][1]: - return True - if elements_self[i][0] == elements_other[i][0] and elements_self[i][1] > elements_other[i][1]: - return False - # if everything to this point is identical then: - # the shorter formula (with less elements) is always lesser/smaller than the longer formula (with more elements) - if len(elements_self) - 1 == i and len(elements_other) - 1 > i: - return True - # if everything has failed so far then Self > Other - return False - - # Clean up chemical formula, i. e. harmonize brackets, add quantifier "1" to bracketed units without quantifier - def _clean_up_formula(self) -> str: - """Cleans up the input formula by harmonizing brackets, removing whitespaces, dots and asterisks, - and adding a quantifier `1` to bracketed units without a quantifier.""" - formula = self.formula - # for simplicity reasons: create a (...)1 around the whole formula - formula = "(" + formula + ")1" - # replace all type of brackets ("{", "[") by round brackets "(" - formula = re.sub(r"[\{\[\(]", "(", formula) - formula = re.sub(r"[\)\]\}]", ")", formula) - # replace all whitespaces, dots and asterisks - formula = re.sub(r"[\.\s\*]+", "", formula) - # search for brackets without a frequency information (...) and add a frequency of 1 => (...)1 - formula = re.sub(r"\)(\D)", r")1\g<1>", formula) - return formula - - # Checks whether the formula is valid regarding bracketing - def _check_formula(self, formula: str) -> bool: - """Checks whether the formula is valid regarding bracketing and general element symbols conventions. - Raises a ValueError if the formula is invalid. Element validation is done in the `ChemFormulaDict` class.""" - bracket_counter = 0 - for character in formula: - if character == "(": - bracket_counter += 1 - if character == ")": - bracket_counter -= 1 - if bracket_counter < 0: # there are more closing brackets than opening brackets during parsing formula - raise ValueError( - "Invalid Bracket Structure in Formula (expecting an opening bracket, but found a closing bracket)" - ) - if not bracket_counter == 0: # number of opening brackets is not identical to the number of closing brackets - raise ValueError( - "Invalid Bracket Structure in Formula (inconsistent number of opening and closing brackets)" - ) - if re.search("[a-z]{2,}", formula): # at least two lowercase letters found in sequence - raise ValueError( - "Invalid Element Symbol (two lowercase letters found in sequence)" - ) - # no error found - return True - - # Recursively resolve all brackets in the provided formula - def _resolve_brackets(self, formula: str) -> str: - """Recursively resolves all brackets in the provided formula and returns a formula without any brackets as a string.""" - # stop recursion if formula contains no more brackets - if "(" in formula: - # find smallest bracket unit, i. e. a bracketed entity that does not contain any other brackets - most_inner_bracket_unit = re.search(r"\(([A-Za-z0-9]*)\)(\d+)", formula) - assert most_inner_bracket_unit is not None # should never be None, as presence of "(" is checked above - # remove smallest bracket unit from original formula string using match.span() and string splicing - pre_match = formula[0:most_inner_bracket_unit.span()[0]:] # string before the bracketed unit - post_match = formula[most_inner_bracket_unit.span()[1]::] # string after the bracketed unit - inner_match = most_inner_bracket_unit.group(1) # string of the bracketed unit - multiplier_match = int(most_inner_bracket_unit.group(2)) # multiplier of the bracketed unit - # find all element symbols + (optional) element frequency occurrences of inner_match - element_freq_list = re.findall(r"[A-Z]{1}[a-z]{0,1}\d*", inner_match) - # separate the element symbol portion from the number portion (if any) for all occurrences - resolved_match = "" - for element_freq_item in element_freq_list: - element_freq = re.match(r"(\D+)(\d*)", element_freq_item) - assert element_freq is not None # should never be None, due to the return value of `re.findall()` - element = element_freq.group(1) - freq = element_freq.group(2) - freq = 1 if not freq else freq # if no frequency is given, set frequency to 1 - # create a resolved version of the bracketed unit and replace the bracketed unit with this resolved string - resolved_match += str(element) + str(int(freq) * multiplier_match) - formula = pre_match + resolved_match + post_match - # recursively resolve brackets - formula = self._resolve_brackets(formula) - return str(formula) - - # Returns the formula as a dictionary with (key : value) = (element symbol : element frequency) - @property - def element(self) -> ChemFormulaDict: - """Returns the chemical formula as a `ChemFormulaDict` object with (key : value) = (element symbol : element frequency).""" - # find all occurrences of one capital letter, possibly one lower case letter and some multiplier number - # Note: a multiplier number is always present in resolved formulas - dict_formula = ChemFormulaDict() - element_freq_list = re.findall(r"[A-Z]{1}[a-z]{0,1}\d+", self._resolved_formula) - # separate for each occurrence the letter portion from the number portion (if any) - for element_freq_item in element_freq_list: - # separate element symbol from element frequency - element_freq = re.match(r"(\D+)(\d+)", element_freq_item) - assert element_freq is not None # should never be None, due to the return value of `_resolve_brackets()` - element = element_freq.group(1) - freq = element_freq.group(2) - # create a dictionary with element symbols as keys and element frequencies as values - dict_formula[element] += int(freq) - return ChemFormulaDict(dict_formula) - - # Return the formula as a dictionary with (key : value) = (element symbol : element frequency) in Hill sorting - @property - def _element_hill_sorted(self) -> ChemFormulaDict: - """Returns the chemical formula as a `ChemFormulaDict` object in Hill notation with (key : value) = (element symbol : element frequency).""" - dict_sorted_elements = dict(sorted(self.element.items())) - dict_hill_sorted_elements = {} - # extract "C" and "H" (if "C" is also present) from the original dictionary - if "C" in dict_sorted_elements: - dict_hill_sorted_elements["C"] = dict_sorted_elements["C"] - del dict_sorted_elements["C"] - if "H" in dict_sorted_elements: - dict_hill_sorted_elements["H"] = dict_sorted_elements["H"] - del dict_sorted_elements["H"] - # create new Hill dictionary by placing "C" and "H" (if "C" is also present) in front of all other elements - dict_hill_sorted_elements = dict_hill_sorted_elements | dict_sorted_elements - return ChemFormulaDict(dict_hill_sorted_elements) - - # function to contract formula from a given (element symbol : element frequency) dictionary - @staticmethod - def _contract_formula(dict_element_freq: ChemFormulaDict, charge: int) -> ChemFormulaString: - """Contracts the formula to a sum formula by generating a `ChemFormulaString` object from a given (element symbol : element frequency) dictionary.""" - formula_output = "" - for element, freq in dict_element_freq.items(): - formula_output += element # element symbol - if freq > 1: - formula_output += str(freq) # add multipliers when they are greater than 1 - return ChemFormulaString(formula_output, charge) - - # Generate sum formula as a string - @property - def sum_formula(self) -> ChemFormulaString: - """Returns the chemical formula as a `ChemFormulaString` object in sum formula notation.""" - return ChemFormula._contract_formula(self.element, self.charge) - - # Generate sum formula as a string - # Source: Edwin A. Hill, J. Am. Chem. Soc., 1900 (22), 8, 478-494 (https://doi.org/10.1021/ja02046a005) - @property - def hill_formula(self) -> ChemFormulaString: - """Returns the chemical formula as a `ChemFormulaString` object in Hill notation.""" - return ChemFormula._contract_formula(self._element_hill_sorted, self.charge) - - # Returns the formula weight of the formula object, atomic weights are taken from elements.py - @property - def formula_weight(self) -> float: - """Returns the formula weight of the chemical formula in g/mol as a float.""" - float_formula_weight: float = 0.0 - for element, freq in self.element.items(): - float_formula_weight += freq * elements.atomic_weight(element) - return float(float_formula_weight) - - # Calculate mass fractions for each element in the formula as a dictionary, atomic weights are taken from elements.py - @property - def mass_fraction(self) -> ChemFormulaDict: - """Returns the mass fraction of each element as a `ChemFormulaDict` object with (key : value) = (element symbol : mass fraction).""" - dict_mass_fraction: ChemFormulaDict = ChemFormulaDict() - for element, freq in self.element.items(): - dict_mass_fraction[element] = float((freq * elements.atomic_weight(element)) / self.formula_weight) - return ChemFormulaDict(dict_mass_fraction) - - # Checks, whether an element is classified as radioactive, radioactivity data is taken from elements.py - @property - def is_radioactive(self) -> bool: - """Returns whether the formula contains at least one radioactive element (True) or not (False) and is therefore classified as radioactive.""" - for element in self.element: - if elements.isradioactive(element): - return True # element and therefore the formula is radioactive - return False # no radioactive elements found and therefore no radioactive formula - # Deprecated: use is_radioactive instead - @property - def radioactive(self) -> bool: - """Deprecated: use `is_radioactive` instead. Returns whether the formula contains at least one radioactive element (True) or not (False) and is therefore classified as radioactive.""" - warnings.warn( - "The 'radioactive' property is deprecated, use 'is_radioactive' instead.", - DeprecationWarning, - stacklevel=2 - ) - return self.is_radioactive - - # Checks, whether a specific isotop of an element is used, isotop data is taken from elements.py - @property - def contains_isotopes(self) -> bool: - """Returns whether the formula contains an element symbol that is refering to a specific isotope (e.g. D or Tc).""" - for element in self.element: - if elements.isisotope(element): - return True # element is a specific isotope - return False # no isotopes of elements found - - # Returns the name of the formula - @property - def name(self) -> str | None: - """Returns the name of the chemical formula (if provided) as a string or None.""" - return self._name - - # Makes sure, that the name of the formula is a string - @name.setter - def name(self, name: str | None) -> None: - self._name = None if name is None else str(name) - - # Returns the CAS registry number of the formula object - @property - def cas(self) -> casregnum.CAS | None: - """Returns the CAS registry number of the chemical formula as a `casregnum.CAS` object (if provided) or None.""" - return None if self._cas is None else self._cas - - # Checks, whether the CAS registry number is valid by using the CAS class from CASRegistryNumber.py - @cas.setter - def cas(self, cas_rn: int | str | None) -> None: - self._cas = None if cas_rn is None else casregnum.CAS(cas_rn) +""" +ChemFormula: A Python class for working with chemical formulas. It allows parsing chemical formulas, generating formatted output strings and calculating formula weights. +""" + +from __future__ import annotations + +import re +import warnings + +import casregnum + +from . import elements + + +# Class for chemical formula strings +class ChemFormulaString: + """ + ChemFormulaString class for chemical formula strings with charge information + + Attributes: + ----------- + formula : str + Chemical (input) formula as a string + charge : int + Charge of the chemical formula + charged : bool + Boolean property whether the formula object is charged (True) or not (False) + text_charge : str + Text representation of the charge as a text string + text_formula : str + Text representation of the chemical formula including charge information + latex : str + LaTeX representation of the chemical formula including charge information + html : str + HTML representation of the chemical formula including charge information + unicode : str + Unicode representation of the chemical formula including charge information + + Methods: + -------- + format_formula() : str + Formats formula (ChemFormulaString object) as a customized strings + """ + + def __init__(self, formula: str, charge: int = 0) -> None: + self.formula = formula + self.charge = charge + + # formula as standard string output + def __str__(self) -> str: + return self.formula + + # formula as detailed string output + def __repr__(self) -> str: + return f"{self.__class__.__name__}(formula='{self.formula}', charge={self.charge})" + + # Returns original input formula + @property + def formula(self) -> str: + """Returns the original input formula as a string.""" + return self._formula + + @formula.setter + def formula(self, input_formula: str) -> None: + self._formula = str(input_formula) + + # Returns the charge of the formula object + @property + def charge(self) -> int: + """Returns the charge of the formula object as an integer.""" + return self._charge + + # Checks, whether the charge is valid + @charge.setter + def charge(self, charge: int) -> None: + if isinstance(charge, int): + self._charge = charge + else: + raise TypeError( + f"Invalid Charge Value '{charge}' (expected an integer (), but found {type(charge)})" + ) + + # Boolean property whether the formula object is charged (True) or not (False) + @property + def charged(self) -> bool: + """Returns whether the formula object is charged (True) or not (False)""" + return False if self.charge == 0 else True + + # Returns the charge of the formula object as a text string + @property + def text_charge(self) -> str: + """Returns the charge of the formula object as a text string, without the number "1" for charges of ±1.""" + # a charge of "1+" or "1-" is printed without the number "1" + charge_output = "" + if self.charge == 0: + return charge_output + if not(abs(self.charge) == 1): + charge_output = str(abs(self.charge)) + charge_output += "+" if self.charge > 0 else "-" + return charge_output + + # Returns formula and charge as a text string + @property + def text_formula(self) -> str: + """Returns the chemical formula including charge information as a text string.""" + if self.charged: + return f"{self.formula} {self.text_charge}" + return self.formula + + # Formats formula (ChemFormulaString object) as a customized strings + def format_formula(self, + formula_prefix: str = "", + element_prefix: str = "", element_suffix: str = "", + freq_prefix: str = "", freq_suffix: str = "", + formula_suffix: str = "", + bracket_prefix: str = "", bracket_suffix: str = "", + multiply_symbol: str = "", + charge_prefix: str = "", charge_suffix: str = "", + charge_positive: str = "+", charge_negative: str = "-" + ) -> str: + """Formats the chemical formula (ChemFormulaString object) as a customized string with user-defined prefixes and suffixes: + Parameters: + - formula_prefix (str): Prefix for the entire formula + - element_prefix (str): Prefix for each element + - element_suffix (str): Suffix for each element + - freq_prefix (str): Prefix for each frequency + - freq_suffix (str): Suffix for each frequency + - formula_suffix (str): Suffix for the entire formula + - bracket_prefix (str): Prefix for each bracket + - bracket_suffix (str): Suffix for each bracket + - multiply_symbol (str): Symbol for multiplication + - charge_prefix (str): Prefix for the charge + - charge_suffix (str): Suffix for the charge + - charge_positive (str): Symbol for positive charge + - charge_negative (str): Symbol for negative charge + """ + formatted_formula = re.sub(r"([\{\[\(\)\]\}]){1}", bracket_prefix + r"\g<1>" + bracket_suffix, self.formula) + formatted_formula = re.sub(r"([A-Z]{1}[a-z]{0,1})", element_prefix + r"\g<1>" + element_suffix, formatted_formula) + formatted_formula = re.sub(r"(\d+)", freq_prefix + r"\g<1>" + freq_suffix, formatted_formula) + formatted_formula = re.sub(r"[\.\*]", multiply_symbol, formatted_formula) + # create charge string, by replacing + and - with the respective charge symbols + charge = self.text_charge + charge.replace("+", charge_positive) + charge.replace("-", charge_negative) + if self.charged: + return formula_prefix + formatted_formula + charge_prefix + charge + charge_suffix + formula_suffix + else: + return formula_prefix + formatted_formula + formula_suffix + + # Returns a LaTeX representation of a formula (ChemFormulaString object) + @property + def latex(self) -> str: + """Returns a LaTeX representation of the chemical formula (including charge information) as a string.""" + return self.format_formula("", + r"\\textnormal{", "}", + "_{", "}", + "", + r"\\", + multiply_symbol=r"\\cdot", + charge_prefix="^{", charge_suffix="}" + ) + + # Returns an HTML representation of a formula (ChemFormulaString object) + @property + def html(self) -> str: + """Returns an HTML representation of the chemical formula (including charge information) as a string. + Specifies the class 'ChemFormula' for custom CSS.""" + return self.format_formula("", + "", "", + "", "", + "", + multiply_symbol="⋅", + charge_prefix="", charge_suffix="", + charge_negative="–" + ) + + # Returns formula with unicode sub- and superscripts (₀₁₂₃₄₅₆₇₈₉⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻) + @property + def unicode(self) -> str: + """Returns a Unicode representation of the chemical formula (including charge information) as a string.""" + subscript_num = "₀₁₂₃₄₅₆₇₈₉" + superscript_num = "⁰¹²³⁴⁵⁶⁷⁸⁹" + unicode_formula = self.formula # start with original formula + unicode_charge = self.text_charge # start with original text_charge + # replace all numbers (0 - 9) by subscript numbers (for elemental frequencies) + # and superscript numbers (for charge information) + for number in range(0, 10): + unicode_formula = unicode_formula.replace(str(number), subscript_num[number]) + unicode_charge = unicode_charge.replace(str(number), superscript_num[number]) + unicode_charge = unicode_charge.replace("+", "⁺") + unicode_charge = unicode_charge.replace("-", "⁻") + return unicode_formula + unicode_charge + + +# Class for chemical formula dictionaries (only positive integer frequencies allowed) +class ChemFormulaDict(dict): + """ + ChemFormulaDict class for chemical formula dictionaries with element symbols as keys and + integer element frequencies as values. + """ + def __init__(self, cf_dict: dict[str, int] | None = None) -> None: + super().__init__() + if cf_dict is None: + return + if not isinstance(cf_dict, dict): + raise TypeError(f"Invalid Input Type for `ChemFormulaDict` Initialization (expected a dict, but found {type(cf_dict).__name__})") + for key_element, value_frequency in cf_dict.items(): + self[key_element] = int(value_frequency) + + def __missing__(self, key_element: str) -> int: + return 0 # default value for non-existing elements is 0 + + def __setitem__(self, key_element: str, value_frequency: int) -> None: + if key_element not in elements.get_valid_element_symbols(): + raise ValueError( + f"Invalid Element Symbol (unknown element symbol '{key_element}')" + ) + if not isinstance(value_frequency, int) or value_frequency < 0: + raise ValueError(f"Invalid Element Frequency (must be a non-negative integer, got {value_frequency} for element '{key_element}')") + super().__setitem__(key_element, value_frequency) + + +# Class for chemical formula dictionaries (integer and float fractions allowed) +class ChemFormulaDictFloat(dict): + """ + ChemFormulaDictFloat class for chemical formula dictionaries with element symbols as keys and + float (or int) element fractions as values. Values are stored as floats. + """ + def __init__(self, cf_dict: dict[str, float | int] | None = None) -> None: + super().__init__() + if cf_dict is None: + return + if not isinstance(cf_dict, dict): + raise TypeError(f"Invalid Input Type for `ChemFormulaDictFloat` Initialization (expected a dict, but found {type(cf_dict).__name__})") + for key_element, value_fraction in cf_dict.items(): + self[key_element] = float(value_fraction) # store as float + + def __missing__(self, key_element: str) -> float: + return 0.0 # default value for non-existing elements is 0.0 + + def __setitem__(self, key_element: str, value_fraction: float) -> None: + if key_element not in elements.get_valid_element_symbols(): + raise ValueError( + f"Invalid Element Symbol (unknown element symbol '{key_element}')" + ) + if not (isinstance(value_fraction, (int, float))): + raise ValueError(f"Invalid Element Fraction (must be an integer or float value, got type {type(value_fraction).__name__} ({value_fraction}) for element '{key_element}')") + super().__setitem__(key_element, float(value_fraction)) + + +# Class for chemical formula objects +class ChemFormula(ChemFormulaString): + """ + ChemFormula class for chemical formula objects with formula, different representations (LaTeX, HTML, Unicode), + formula_weight, charge, name, information on radioactivity and specific isotopes, as well as CAS registry number + information, if provided + + Attributes (inherited from ChemFormulaString): + ---------------------------------------------- + formula : str + Chemical (input) formula as a string + charge : int + Charge of the chemical formula + charged : bool + Boolean property whether the formula object is charged (True) or not (False) + text_charge : str + Text representation of the charge as a text string + text_formula : str + Text representation of the chemical formula including charge information + latex : str + LaTeX representation of the chemical formula including charge information + html : str + HTML representation of the chemical formula including charge information + unicode : str + Unicode representation of the chemical formula including charge information + + Additional Attributes: + ---------------------- + name : str or None + Name of the chemical formula (if provided) + cas : casregnum.CAS or None + CAS registry number as a casregnum.CAS object of the chemical formula (if provided) + element : ChemFormulaDict + Chemical formula as a ChemFormulaDict object with (key : value) = (element symbol : element frequency) + sum_formula : ChemFormulaDict + Chemical formula in Hill notation as a ChemFormulaDict object with (key : value) = (element symbol : element frequency) + hill_formula : ChemFormulaDict + Chemical formula in Hill notation as a ChemFormulaDict object with (key : value) = (element symbol : element frequency) + formula_weight : float + Formula weight of the chemical formula in g/mol + mass_fraction : ChemFormulaDictFloat + Mass fraction of each element as a ChemFormulaDictFloat object with (key : value) = (element symbol : mass fraction) + contains_isotopes : bool + Boolean property whether the formula contains an element symbol that is refering to a specific isotope (e.g. D or Tc) + is_radioactive : bool + Boolean property whether the formula contains at least one radioactive element (True) or not (False) + + Methods (inherited from ChemFormulaString): + ------------------------------------------- + format_formula() : str + Formats formula (ChemFormulaString object) as a customized strings + + Additional Methods: + ------------------- + __eq__() : bool + Tests if two chemical formula objects are identical + __lt__() : bool + Compares two formulas with respect to their lexical sorting according to Hill's notation + __add__() : ChemFormula + Adds two chemical formula objects by summing up their element frequencies and charges + __sub__() : ChemFormula + Subtracts two chemical formula objects by subtracting their element frequencies and charges + __mul__() : ChemFormula + Multiplies a chemical formula object by a positive integer factor by multiplying all element frequencies and the charge + __rmul__() : ChemFormula + Multiplies a chemical formula object by a positive integer factor by multiplying all element frequencies and the charge + """ + + def __init__(self, formula: str, charge: int = 0, name: str | None = None, cas: str | int | None = None) -> None: + # Parent information + ChemFormulaString.__init__(self, formula, charge) + # Additional input information + self.name = name + self.cas = cas + # parse chemical formula and test for consistency + self._clean_formula = self._clean_up_formula() + self._check_formula(self._clean_formula) + self._resolved_formula = self._resolve_brackets(self._clean_formula) + _ = self.mass_fraction # trigger mass_fraction parsing to check for valid element symbols and atomic weights + + # Test if two chemical formla objects are identical + def __eq__(self, other: object) -> bool: + """Tests if two chemical formula objects are identical.""" + # two chemical formula objects are considered to be equal if they have + # the same chemical composition (in Hill notation), the same charge, + # and the same CAS registry number (if provided) + if not isinstance(other, ChemFormula): + raise TypeError("Comparisons can only be made between ChemFormula objects.") + return (str(self.hill_formula) == str(other.hill_formula) and self.charge == other.charge and self.cas == other.cas) + + # Compares two formulas with respect to their lexical sorting according to Hill's notation + def __lt__(self, other: object) -> bool: + """Compares two chemical formula objects with respect to their lexical sorting according to Hill's notation.""" + if not isinstance(other, ChemFormula): + raise TypeError("Comparisons can only be made between ChemFormula objects.") + elements_self = tuple(self._element_hill_sorted.items()) + elements_other = tuple(other._element_hill_sorted.items()) + # cycle through the elements in Hill notation + for i in range(0, min(len(elements_self), len(elements_other))): + # first check for the alphabetical sorting of the element symbol + if elements_self[i][0].lower() < elements_other[i][0].lower(): + return True + if elements_self[i][0].lower() > elements_other[i][0].lower(): + return False + # if the element symbol is identical, check the frequency of that element + if elements_self[i][0] == elements_other[i][0] and elements_self[i][1] < elements_other[i][1]: + return True + if elements_self[i][0] == elements_other[i][0] and elements_self[i][1] > elements_other[i][1]: + return False + # if everything to this point is identical then: + # the shorter formula (with less elements) is always lesser/smaller than the longer formula (with more elements) + if len(elements_self) - 1 == i and len(elements_other) - 1 > i: + return True + # if everything has failed so far then Self > Other + return False + + # Add two chemical formula objects + def __add__(self, other: object) -> ChemFormula: + """Adds two chemical formula objects by summing up their element frequencies and charges.""" + if not isinstance(other, ChemFormula): + raise TypeError("Addition can only be performed between ChemFormula objects.") + # concatenate string formulas of both formula objects as sum formulas + combined_formula = self.sum_formula.formula + other.sum_formula.formula + # sum up the charges of both formula objects + sum_charge = self.charge + other.charge + combined_sum_formula = ChemFormula(combined_formula, sum_charge).sum_formula.formula + return ChemFormula(combined_sum_formula, sum_charge) + + # Subtract two chemical formula objects + def __sub__(self, other: object) -> ChemFormula: + """Subtracts two chemical formula objects by subtracting their element frequencies and charges. + self = minuend, other = subtrahend""" + if not isinstance(other, ChemFormula): + raise TypeError("Subtraction can only be performed between ChemFormula objects.") + dict_result = self.element.copy() + dict_minuend = self.element + dict_subtrahend = other.element + # subtract element frequencies of all subtrahend elements from minuend + for element_subtrahend, freq_subtrahend in dict_subtrahend.items(): + freq_minuend = dict_minuend.get(element_subtrahend, 0) + freq_result = freq_minuend - freq_subtrahend + # check for negative frequencies (i.e. invalid subtraction as more atoms are subtracted than are present) + if freq_result < 0: + raise ValueError(f"Subtraction leads to negative element frequency for element '{element_subtrahend}' ({freq_minuend} - {freq_subtrahend} = {freq_result})") + if freq_result > 0: + # update element frequency in result dictionary + dict_result[element_subtrahend] = freq_result + else: + # remove element if frequency becomes zero + dict_result.pop(element_subtrahend, None) + # subtract charges + sum_charge = self.charge - other.charge + # build formula string from resulting element dict and create new ChemFormula + if len(dict_result) == 0: + raise ValueError("Subtraction leads to an empty chemical formula (no elements left).") + combined_formula = "".join(f"{element}{freq}" for element, freq in dict_result.items()) + return ChemFormula(combined_formula, sum_charge) + + # Multiply chemical formula objects with integer factors + def __mul__(self, factor: int) -> ChemFormula: + """Multiplies a chemical formula object with a positive integer factor by multiplying all element frequencies and the charge. + self = chemical formula, factor = multiplication factor to the right (positive integer)""" + # allowed multiplication with integers + if isinstance(factor, int): + if factor <= 0: + raise ValueError(f"Multiplication factor must be a positive integer, found {factor}.") + dict_result = ChemFormulaDict() + for element, freq in self.element.items(): + dict_result[element] = factor * freq + multiplied_formula = "".join(f"{element}{freq}" for element, freq in dict_result.items()) + multiplied_charge = factor * self.charge + return ChemFormula(multiplied_formula, multiplied_charge) + # not allowed multiplication with another ChemFormula object + if isinstance(factor, ChemFormula): + raise TypeError(f"Multiplication can only be performed with a positive integer factor, found type {type(factor).__name__} ({factor}).") + # for all other types, return `NotImplemented` to allow Python and the other operand to handle the situation + return NotImplemented + + # Enable multiplication with the integer factor on the left side + def __rmul__(self, factor: int) -> ChemFormula: + """Enables multiplication with the positive integer factor on the left side. + self = chemical formula, factor = multiplication factor to the left (positive integer)""" + return self.__mul__(factor) + + # Clean up chemical formula, i. e. harmonize brackets, add quantifier "1" to bracketed units without quantifier + def _clean_up_formula(self) -> str: + """Cleans up the input formula by harmonizing brackets, removing whitespaces, dots and asterisks, + and adding a quantifier `1` to bracketed units without a quantifier.""" + formula = self.formula + # for simplicity reasons: create a (...)1 around the whole formula + formula = "(" + formula + ")1" + # replace all type of brackets ("{", "[") by round brackets "(" + formula = re.sub(r"[\{\[\(]", "(", formula) + formula = re.sub(r"[\)\]\}]", ")", formula) + # replace all whitespaces, dots and asterisks + formula = re.sub(r"[\.\s\*]+", "", formula) + # search for brackets without a frequency information (...) and add a frequency of 1 => (...)1 + formula = re.sub(r"\)(\D)", r")1\g<1>", formula) + return formula + + # Checks whether the formula is valid regarding bracketing + def _check_formula(self, formula: str) -> bool: + """Checks whether the formula is valid regarding bracketing and general element symbols conventions. + Raises a ValueError if the formula is invalid. Element validation is done in the `ChemFormulaDict` class.""" + bracket_counter = 0 + for character in formula: + if character == "(": + bracket_counter += 1 + if character == ")": + bracket_counter -= 1 + if bracket_counter < 0: # there are more closing brackets than opening brackets during parsing formula + raise ValueError( + "Invalid Bracket Structure in Formula (expecting an opening bracket, but found a closing bracket)" + ) + if not bracket_counter == 0: # number of opening brackets is not identical to the number of closing brackets + raise ValueError( + "Invalid Bracket Structure in Formula (inconsistent number of opening and closing brackets)" + ) + if re.search("[a-z]{2,}", formula): # at least two lowercase letters found in sequence + raise ValueError( + "Invalid Element Symbol (two lowercase letters found in sequence)" + ) + # no error found + return True + + # Recursively resolve all brackets in the provided formula + def _resolve_brackets(self, formula: str) -> str: + """Recursively resolves all brackets in the provided formula and returns a formula without any brackets as a string.""" + # stop recursion if formula contains no more brackets + if "(" in formula: + # find smallest bracket unit, i. e. a bracketed entity that does not contain any other brackets + most_inner_bracket_unit = re.search(r"\(([A-Za-z0-9]*)\)(\d+)", formula) + assert most_inner_bracket_unit is not None # should never be None, as presence of "(" is checked above + # remove smallest bracket unit from original formula string using match.span() and string splicing + pre_match = formula[0:most_inner_bracket_unit.span()[0]:] # string before the bracketed unit + post_match = formula[most_inner_bracket_unit.span()[1]::] # string after the bracketed unit + inner_match = most_inner_bracket_unit.group(1) # string of the bracketed unit + multiplier_match = int(most_inner_bracket_unit.group(2)) # multiplier of the bracketed unit + # find all element symbols + (optional) element frequency occurrences of inner_match + element_freq_list = re.findall(r"[A-Z]{1}[a-z]{0,1}\d*", inner_match) + # separate the element symbol portion from the number portion (if any) for all occurrences + resolved_match = "" + for element_freq_item in element_freq_list: + element_freq = re.match(r"(\D+)(\d*)", element_freq_item) + assert element_freq is not None # should never be None, due to the return value of `re.findall()` + element = element_freq.group(1) + freq = element_freq.group(2) + freq = 1 if not freq else freq # if no frequency is given, set frequency to 1 + # create a resolved version of the bracketed unit and replace the bracketed unit with this resolved string + resolved_match += str(element) + str(int(freq) * multiplier_match) + formula = pre_match + resolved_match + post_match + # recursively resolve brackets + formula = self._resolve_brackets(formula) + return str(formula) + + # Returns the formula as a dictionary with (key : value) = (element symbol : element frequency) + @property + def element(self) -> ChemFormulaDict: + """Returns the chemical formula as a `ChemFormulaDict` object with (key : value) = (element symbol : element frequency).""" + # find all occurrences of one capital letter, possibly one lower case letter and some multiplier number + # Note: a multiplier number is always present in resolved formulas + dict_formula = ChemFormulaDict() + element_freq_list = re.findall(r"[A-Z]{1}[a-z]{0,1}\d+", self._resolved_formula) + # separate for each occurrence the letter portion from the number portion (if any) + for element_freq_item in element_freq_list: + # separate element symbol from element frequency + element_freq = re.match(r"(\D+)(\d+)", element_freq_item) + assert element_freq is not None # should never be None, due to the return value of `_resolve_brackets()` + element = element_freq.group(1) + freq = element_freq.group(2) + # create a dictionary with element symbols as keys and element frequencies as values + dict_formula[element] += int(freq) + return ChemFormulaDict(dict_formula) + + # Return the formula as a dictionary with (key : value) = (element symbol : element frequency) in Hill sorting + @property + def _element_hill_sorted(self) -> ChemFormulaDict: + """Returns the chemical formula as a `ChemFormulaDict` object in Hill notation with (key : value) = (element symbol : element frequency).""" + dict_sorted_elements = dict(sorted(self.element.items())) + dict_hill_sorted_elements = {} + # extract "C" and "H" (if "C" is also present) from the original dictionary + if "C" in dict_sorted_elements: + dict_hill_sorted_elements["C"] = dict_sorted_elements["C"] + del dict_sorted_elements["C"] + if "H" in dict_sorted_elements: + dict_hill_sorted_elements["H"] = dict_sorted_elements["H"] + del dict_sorted_elements["H"] + # create new Hill dictionary by placing "C" and "H" (if "C" is also present) in front of all other elements + dict_hill_sorted_elements = dict_hill_sorted_elements | dict_sorted_elements + return ChemFormulaDict(dict_hill_sorted_elements) + + # function to contract formula from a given (element symbol : element frequency) dictionary + @staticmethod + def _contract_formula(dict_element_freq: ChemFormulaDict, charge: int) -> ChemFormulaString: + """Contracts the formula to a sum formula by generating a `ChemFormulaString` object from a given (element symbol : element frequency) dictionary.""" + formula_output = "" + for element, freq in dict_element_freq.items(): + formula_output += element # element symbol + if freq > 1: + formula_output += str(freq) # add multipliers when they are greater than 1 + return ChemFormulaString(formula_output, charge) + + # Generate sum formula as a string + @property + def sum_formula(self) -> ChemFormulaString: + """Returns the chemical formula as a `ChemFormulaString` object in sum formula notation.""" + return ChemFormula._contract_formula(self.element, self.charge) + + # Generate sum formula as a string + # Source: Edwin A. Hill, J. Am. Chem. Soc., 1900 (22), 8, 478-494 (https://doi.org/10.1021/ja02046a005) + @property + def hill_formula(self) -> ChemFormulaString: + """Returns the chemical formula as a `ChemFormulaString` object in Hill notation.""" + return ChemFormula._contract_formula(self._element_hill_sorted, self.charge) + + # Returns the formula weight of the formula object, atomic weights are taken from elements.py + @property + def formula_weight(self) -> float: + """Returns the formula weight of the chemical formula in g/mol as a float.""" + float_formula_weight: float = 0.0 + for element, freq in self.element.items(): + float_formula_weight += freq * elements.atomic_weight(element) + return float(float_formula_weight) + + # Calculate mass fractions for each element in the formula as a dictionary, atomic weights are taken from elements.py + @property + def mass_fraction(self) -> ChemFormulaDictFloat: + """Returns the mass fraction of each element as a `ChemFormulaDictFloat` object with (key : value) = (element symbol : mass fraction).""" + dict_mass_fraction: ChemFormulaDictFloat = ChemFormulaDictFloat() + for element, freq in self.element.items(): + dict_mass_fraction[element] = float((freq * elements.atomic_weight(element)) / self.formula_weight) + return ChemFormulaDictFloat(dict_mass_fraction) + + # Checks, whether an element is classified as radioactive, radioactivity data is taken from elements.py + @property + def is_radioactive(self) -> bool: + """Returns whether the formula contains at least one radioactive element (True) or not (False) and is therefore classified as radioactive.""" + for element in self.element: + if elements.isradioactive(element): + return True # element and therefore the formula is radioactive + return False # no radioactive elements found and therefore no radioactive formula + # Deprecated: use is_radioactive instead + @property + def radioactive(self) -> bool: + """Deprecated: use `is_radioactive` instead. Returns whether the formula contains at least one radioactive element (True) or not (False) and is therefore classified as radioactive.""" + warnings.warn( + "The 'radioactive' property is deprecated, use 'is_radioactive' instead.", + DeprecationWarning, + stacklevel=2 + ) + return self.is_radioactive + + # Checks, whether a specific isotop of an element is used, isotop data is taken from elements.py + @property + def contains_isotopes(self) -> bool: + """Returns whether the formula contains an element symbol that is refering to a specific isotope (e.g. D or Tc).""" + for element in self.element: + if elements.isisotope(element): + return True # element is a specific isotope + return False # no isotopes of elements found + + # Returns the name of the formula + @property + def name(self) -> str | None: + """Returns the name of the chemical formula (if provided) as a string or None.""" + return self._name + + # Makes sure, that the name of the formula is a string + @name.setter + def name(self, name: str | None) -> None: + self._name = None if name is None else str(name) + + # Returns the CAS registry number of the formula object + @property + def cas(self) -> casregnum.CAS | None: + """Returns the CAS registry number of the chemical formula as a `casregnum.CAS` object (if provided) or None.""" + return None if self._cas is None else self._cas + + # Checks, whether the CAS registry number is valid by using the CAS class from CASRegistryNumber.py + @cas.setter + def cas(self, cas_rn: int | str | None) -> None: + self._cas = None if cas_rn is None else casregnum.CAS(cas_rn) diff --git a/src/chemformula/chemformula.pyi b/src/chemformula/chemformula.pyi index 9e18ff1..f8d50b3 100644 --- a/src/chemformula/chemformula.pyi +++ b/src/chemformula/chemformula.pyi @@ -1,5 +1,3 @@ -from collections import defaultdict - import casregnum from . import elements as elements @@ -28,14 +26,24 @@ class ChemFormulaString: @property def unicode(self) -> str: ... -class ChemFormulaDict(defaultdict): - def __init__(self, *args, **kwargs) -> None: ... - def __setitem__(self, key_element: str, value_frequency: int | float) -> None: ... +class ChemFormulaDict(dict): + def __init__(self, cf_dict: dict[str, int] | None = None) -> None: ... + def __missing__(self, key_element: str) -> int: ... + def __setitem__(self, key_element: str, value_frequency: int) -> None: ... + +class ChemFormulaDictFloat(dict): + def __init__(self, cf_dict: dict[str, float | int] | None = None) -> None: ... + def __missing__(self, key_element: str) -> float: ... + def __setitem__(self, key_element: str, value_fraction: float) -> None: ... class ChemFormula(ChemFormulaString): def __init__(self, formula: str, charge: int = 0, name: str | None = None, cas: str | int | None = None) -> None: ... def __eq__(self, other: object) -> bool: ... def __lt__(self, other: object) -> bool: ... + def __add__(self, other: object) -> ChemFormula: ... + def __sub__(self, other: object) -> ChemFormula: ... + def __mul__(self, factor: int) -> ChemFormula: ... + def __rmul__(self, factor: int) -> ChemFormula: ... @property def element(self) -> ChemFormulaDict: ... @property @@ -45,7 +53,7 @@ class ChemFormula(ChemFormulaString): @property def formula_weight(self) -> float: ... @property - def mass_fraction(self) -> ChemFormulaDict: ... + def mass_fraction(self) -> ChemFormulaDictFloat: ... @property def is_radioactive(self) -> bool: ... @property diff --git a/tests/test_casregnum.py b/tests/test_casregnum.py index 46e9a0d..a8d1dd9 100644 --- a/tests/test_casregnum.py +++ b/tests/test_casregnum.py @@ -1,6 +1,6 @@ import pytest -from chemformula import ChemFormula +from chemformula.chemformula import ChemFormula # pytest fixtures diff --git a/tests/test_dictionary_classes.py b/tests/test_dictionary_classes.py new file mode 100644 index 0000000..de4ee5e --- /dev/null +++ b/tests/test_dictionary_classes.py @@ -0,0 +1,67 @@ +import pytest + +from chemformula.chemformula import ChemFormulaDict, ChemFormulaDictFloat + + +# Tests for ChemFormulaDict +def test_chemformulardict_valid_set_get(): + cf_dict = ChemFormulaDict() + cf_dict["C"] = 6 + assert cf_dict["C"] == 6 + # missing key returns default int 0 + assert isinstance(cf_dict["N"], int) + assert cf_dict["N"] == 0 + +def test_chemformulardict_invalid_symbol_raises(): + cf_dict = ChemFormulaDict() + with pytest.raises(ValueError): + cf_dict["Xx"] = 1 # unknown element symbol + +@pytest.mark.parametrize("bad_value", [-1, 1.5, "2"]) +def test_chemformulardict_invalid_frequency_types(bad_value): + cf_dict = ChemFormulaDict() + with pytest.raises(ValueError): + cf_dict["C"] = bad_value + +def test_chemformulardict_init_with_invalid_mapping_raises(): + with pytest.raises(ValueError): + ChemFormulaDict({"Xx": 1}) + +def test_chemformulardict_init_with_invalid_dict(): + with pytest.raises(TypeError): + ChemFormulaDict("C6H12O6") + + +# Tests for ChemFormulaDictFloat +def test_chemformulardictfloat_valid_set_get_and_default(): + f = ChemFormulaDictFloat() + f["O"] = 0.5 + assert isinstance(f["O"], float) + assert abs(f["O"] - 0.5) < 1e-12 + # missing key returns default float 0.0 + assert isinstance(f["He"], float) + assert f["He"] == 0.0 + +def test_chemformulardictfloat_accept_int_and_convert_to_float(): + cf_dict = ChemFormulaDictFloat() + cf_dict["H"] = 2 + assert isinstance(cf_dict["H"], float) + assert cf_dict["H"] == 2.0 + +def test_chemformulardictfloat_invalid_symbol_raises(): + cf_dict = ChemFormulaDictFloat() + with pytest.raises(ValueError): + cf_dict["Xx"] = 1.0 # unknown element symbol + +def test_chemformulardictfloat_invalid_value_type_raises(): + cf_dict = ChemFormulaDictFloat() + with pytest.raises(ValueError): + cf_dict["O"] = "0.5" # string not allowed + +def test_chemformulardictfloat_init_with_invalid_mapping_raises(): + with pytest.raises(ValueError): + ChemFormulaDictFloat({"Xx": 1.0}) + +def test_chemformulardictfloat_init_with_invalid_dict(): + with pytest.raises(TypeError): + ChemFormulaDictFloat("C6H12O6") diff --git a/tests/test_formula_arithmetic.py b/tests/test_formula_arithmetic.py new file mode 100644 index 0000000..f813798 --- /dev/null +++ b/tests/test_formula_arithmetic.py @@ -0,0 +1,101 @@ +import pytest + +import chemformula.config +from chemformula.chemformula import ChemFormula + +# Tests for functionality + +@pytest.fixture(autouse=True, scope="module") +def enable_hydrogen_isotopes(): + chemformula.config.AllowHydrogenIsotopes = True + + +@pytest.mark.parametrize( + "testinput1, testinput2, expected", + [ + (ChemFormula("H2O", 0), ChemFormula("H", 1), ChemFormula("H3O", 1)), + (ChemFormula("Ba", 2), ChemFormula("SO4", -2), ChemFormula("BaSO4", 0)), + (ChemFormula("C4H6", 0), ChemFormula("C2H4", 0), ChemFormula("C6H10", 0)), + ], +) +def test_addition(testinput1, testinput2, expected): + assert testinput1 + testinput2 == expected + + +@pytest.mark.parametrize( + "testinput1, testinput2, charge_testinput2, expected_isotope, expected_radioactive", + [ + ("H2O", "D", 1, True, False), + ("U", "O2", 0, False, True), + ], +) +def test_addition_properties(testinput1, testinput2, charge_testinput2, expected_isotope, expected_radioactive): + adduct = ChemFormula(testinput1) + ChemFormula(testinput2, charge_testinput2) + assert adduct.contains_isotopes == expected_isotope + assert adduct.is_radioactive == expected_radioactive + + +@pytest.mark.parametrize( + "testinput1, testinput2, expected", + [ + (ChemFormula("H3O", 1), ChemFormula("H", 1), ChemFormula("H2O", 0)), + (ChemFormula("CH2ClCH3", 0), ChemFormula("HCl", 0), ChemFormula("CH2CH2", 0)), + ], +) +def test_subtraction(testinput1, testinput2, expected): + assert testinput1 - testinput2 == expected + + +@pytest.mark.parametrize( + "testinput1, testinput2, expected", + [ + (ChemFormula("C2H4", 0), 2, ChemFormula("C4H8", 0)), + (2, ChemFormula("C2H4", 0), ChemFormula("C4H8", 0)), + (ChemFormula("Na", 1), 4, ChemFormula("Na4", 4)), + ], +) +def test_multiplication(testinput1, testinput2, expected): + assert testinput1 * testinput2 == expected + + +# Tests for error handling + + +def test_addition_failed(): + with pytest.raises(TypeError): + ChemFormula("H2O") + "H+" + + +def test_subtraction_failed_type(): + with pytest.raises(TypeError): + ChemFormula("H2O") - "H+" + + +def test_subtraction_failed_negative_frequency(): + with pytest.raises(ValueError): + ChemFormula("H2O") - ChemFormula("H3") + + +def test_subtraction_failed_empty_formula(): + with pytest.raises(ValueError): + ChemFormula("H2O") - ChemFormula("OH2") + + +def test_multiplication_failed(): + with pytest.raises(TypeError): + ChemFormula("C2H4") * 0.5 + with pytest.raises(TypeError): + 0.5 * ChemFormula("C2H4") + with pytest.raises(TypeError): + ChemFormula("C2H4") * "2" + with pytest.raises(TypeError): + "2" * ChemFormula("C2H4") + with pytest.raises(TypeError): + ChemFormula("C2H4") * ChemFormula("H2") + + +def test_multiplication_failed_non_positive_integer(): + with pytest.raises(ValueError): + ChemFormula("C2H4") * -2 + with pytest.raises(ValueError): + ChemFormula("C2H4") * 0 diff --git a/tests/test_general.py b/tests/test_general.py index c4129b3..82ad927 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -1,6 +1,7 @@ import pytest -from chemformula import ChemFormula, elements +import chemformula.config +from chemformula.chemformula import ChemFormula, elements # pytest fixtures @@ -16,6 +17,11 @@ def tetraamminecoppersulfate(): return ChemFormula("[Cu(NH3)4]SO4.H2O") +@pytest.fixture(autouse=True, scope="module") +def enable_hydrogen_isotopes(): + chemformula.config.AllowHydrogenIsotopes = False + + # Tests for functionality @@ -66,7 +72,9 @@ def test_charged(testinput, expected): ) def test_is_radioactive(testinput, expected): assert testinput.is_radioactive is expected - assert testinput.radioactive is expected # deprecated property + with pytest.warns(DeprecationWarning): + val = testinput.radioactive + assert val is expected def test_get_valid_element_symbols_without_hydrogen_isotopes(): @@ -152,14 +160,14 @@ def test_hill_formula(testinput, expected): def test_html(muscarine): assert ( muscarine.html - == "((CH3)3N)(C6H11O2)+" # noqa: E501 + == "((CH3)3N)(C6H11O2)+" ) def test_latex(muscarine): assert ( muscarine.latex - == r"\(\(\textnormal{C}\textnormal{H}_{3}\)_{3}\textnormal{N}\)\(\textnormal{C}_{6}\textnormal{H}_{11}\textnormal{O}_{2}\)^{+}" # noqa: E501 + == r"\(\(\textnormal{C}\textnormal{H}_{3}\)_{3}\textnormal{N}\)\(\textnormal{C}_{6}\textnormal{H}_{11}\textnormal{O}_{2}\)^{+}" ) @@ -201,31 +209,26 @@ def test_hill_formula_text_formula(testinput, expected): # Tests for error handling -#@pytest.mark.xfail(raises=TypeError) def test_charge_failed(): with pytest.raises(TypeError): ChemFormula("H3O", "+") -#@pytest.mark.xfail(raises=ValueError) def test_brackets_closing(): with pytest.raises(ValueError): ChemFormula("H2)O") -#@pytest.mark.xfail(raises=ValueError) def test_brackets(): with pytest.raises(ValueError): ChemFormula("(H2)(O") -#@pytest.mark.xfail(raises=ValueError) def test_element(): with pytest.raises(ValueError): ChemFormula("caO") -#@pytest.mark.xfail(raises=ValueError) def test_unknown_element(): with pytest.raises(ValueError): ChemFormula("XyO") diff --git a/tests/test_isotopes.py b/tests/test_isotopes.py index 0c770a6..4ac45f6 100644 --- a/tests/test_isotopes.py +++ b/tests/test_isotopes.py @@ -1,7 +1,7 @@ import pytest import chemformula.config -from chemformula import ChemFormula, elements +from chemformula.chemformula import ChemFormula, elements # Tests for functionality @@ -49,4 +49,6 @@ def test_formula_weight_hydrogen_isotopes(testinput, expected): ) def test_is_radioactive_isotopes(testinput, expected): assert ChemFormula(testinput).is_radioactive is expected - assert ChemFormula(testinput).radioactive is expected # `radioactive` is deprecated, use `is_radioactive` instead + with pytest.warns(DeprecationWarning): + val = ChemFormula(testinput).radioactive + assert val is expected diff --git a/tests/test_sorting.py b/tests/test_sorting.py index 8b166d2..91e0400 100644 --- a/tests/test_sorting.py +++ b/tests/test_sorting.py @@ -1,6 +1,6 @@ import pytest -from chemformula import ChemFormula +from chemformula.chemformula import ChemFormula # pytest fixtures diff --git a/uv.lock b/uv.lock index fc27bb5..574352d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10" [[package]] @@ -148,7 +148,7 @@ wheels = [ [[package]] name = "chemformula" -version = "1.5.0" +version = "1.5.1" source = { editable = "." } dependencies = [ { name = "casregnum" },