Source code for geom.functions.rdkit_module

import io, sys, os, shutil
import webbrowser, tempfile
import py3Dmol
import matplotlib.pyplot as plt

from geom.classes import parameters, molecule
from geom.functions import output, general

from rdkit import Chem
from rdkit.Chem import Draw, rdDepictor, AllChem, rdDetermineBonds, rdAbbreviations
from rdkit.Chem.Draw import rdMolDraw2D
# -------------------------------------------------------------------------------------

[docs]
def select_case(inp):
  """
  Selects the appropriate RDKit-related function based on user input.

  Args:
      inp (input_class): An instance of the input class containing user-defined parameters.

  Returns:
      None: Executes the corresponding geometry generation function.
  """


  if (inp.rdkit_visualize): visualize(inp)
  if (inp.rdkit_file_conversion): file_conversion(inp)
  if (inp.rdkit_opt and not inp.rdkit_conformers): force_field_optimization(inp)
  if (inp.rdkit_conformers): generate_conformers(inp)

  # Eliminate tmp folder containing xyz to pdb structure
  if (inp.rdkit_mol_file_extension==".xyz"): shutil.rmtree(inp.tmp_folder)

# -------------------------------------------------------------------------------------

[docs]
def visualize(inp):
  """
  Visualizes a molecule in 2D or 3D using RDKit.

  Args:
      inp (input_class): Input parameters containing visualization options.

  Returns:
      None: Displays the molecule visualization.
  """

  # Check input
  inp.check_input_case()   
 
  # Read geometry
  mol = load_rdkit_file(inp)

  # Visualize 2D or 3D
  if (inp.rdkit_visualize_2d): plot_2d_molecule(mol, inp)
  if (inp.rdkit_visualize_3d): plot_3d_molecule(mol)

# -------------------------------------------------------------------------------------

[docs]
def file_conversion(inp):
    """
    Converts a molecular file from one format to another using RDKit.

    Args:
        inp (input_class): Input parameters containing file conversion options.

    Returns:
        None: Saves the converted file to the results folder.
    """

    # Check input and create results folder
    inp.check_input_case()   
    general.create_results_geom()

    # Read geometry and ensure we have a 3D conformer
    mol = load_rdkit_file(inp)
    mol = embed_3d(mol)
    
    # Write to output file
    out_file = os.path.join("results_geom",inp.rdkit_output_file)
    save_rdkit_file(mol, out_file)

# -------------------------------------------------------------------------------------

[docs]
def force_field_optimization(inp):
    """
    Performs force field optimization on a molecule using RDKit.

    Args:
        inp (input_class): Input parameters containing optimization options.

    Returns:
        None: Saves the optimized geometry to the results folder.
    """

    param = parameters.parameters()    

    # Check input and create results folder
    inp.check_input_case()   
    general.create_results_geom()

    # Read geometry and ensure we have a 3D conformer
    mol = load_rdkit_file(inp)
    mol = embed_3d(mol)

    # Construct force field
    ff_name = inp.rdkit_force_field
    ff = None

    if ff_name in ("mmff94", "mmff94s"):
        props = AllChem.MMFFGetMoleculeProperties(mol, mmffVariant=ff_name)
        if props is not None:
            ff = AllChem.MMFFGetMoleculeForceField(mol, props)
        else:
            output.error(f'MMFF parameters unavailable for "{inp.rdkit_mol_file}" with "{ff_name} force field".')
    elif ff_name == "uff":
        ff = AllChem.UFFGetMoleculeForceField(mol)
    else:
        msg = ( 
            f"Force field '{ff_name}' not supported.\n\n"
              " Accepted entries:" 
              + "".join(f"\n - {entry}" for entry in param.rdkit_file_extensions_opt) 
        ) 
        output.error(msg)

    # If force field could not be constructed, stop
    if ff is None:
        output.error(f'Could not set up "{ff_name}" force field for "{inp.rdkit_mol_file}".')

    # Optimize
    max_iters = inp.rdkit_max_iters
    ff.Initialize()
    iters = ff.Minimize(maxIts=max_iters)   # runs the convergence loop, save the number of iterations
    energy = float(ff.CalcEnergy())

    # Check convergence
    if iters >= max_iters:
        output.error(
            f'Geometry optimization did not converge after {max_iters} iterations '
            f'for "{inp.rdkit_mol_file}" using "{ff_name} force field".'
        )

    # Store metadata (handy for SDF output)
    try:
        mol.SetProp("_FF", ff_name)
        mol.SetProp("_FF_Energy_kcalmol", f"{energy:.6f}")
        mol.SetProp("_FF_MaxIters", str(max_iters))
        mol.SetProp("_FF_Iters", str(iters))
        mol.SetProp("_FF_Converged", "true" if iters < max_iters else "false")
    except Exception:
        pass

    # Save to requested output
    out_file = os.path.join("results_geom", inp.rdkit_output_file)
    save_rdkit_file(mol, out_file)

# -------------------------------------------------------------------------------------

[docs]
def generate_conformers(inp):
    """
    Generates multiple conformers for a molecule using RDKit's ETKDG method.

    Args:
        inp (input_class): Input parameters containing conformer generation options.

    Returns:
        None: Saves the generated conformers to the results folder.
    """

    # Check input and create results folder
    inp.check_input_case()
    general.create_results_geom()

    # Read geometry and add hydrogens
    base = load_rdkit_file(inp)
    mol  = Chem.AddHs(base, addCoords=True)

    if mol.GetNumConformers() > 0:
        mol.RemoveAllConformers()

    # ETKDGv3 params from CLI
    params = AllChem.ETKDGv3()
    params.enforceChirality = True # Keep stereochemistry
    params.randomSeed = 42         # For reproducibility
    params.numThreads = 0          # Use all available cores

    params.pruneRmsThresh   = inp.rdkit_confs_prune_rms # default = 0.75
    
    # Embed multiple conformers with ETKDGv3
    conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=int(inp.rdkit_confs), params=params)

    # Fallback: embed using random coords if ETKDG failed
    if not conf_ids:
        params.useRandomCoords = True
        conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=int(inp.rdkit_confs), params=params)

    # If still no conformers, raise error
    if not conf_ids:
        return output.error(
            f"Conformer generation failed for '{inp.rdkit_mol_file}'. "
            "Try increasing -confs, or relaxing -pruneRms"
        )

    # Optimize each conformer with the selected force field
    if inp.rdkit_opt:
        conformers_force_field_optimization(mol, inp)

    # Cosmetic alignment
    AllChem.AlignMolConformers(mol)

    # Save each conformer
    save_rdkit_conformers(mol, inp)

# -------------------------------------------------------------------------------------

[docs]
def conformers_force_field_optimization(mol, inp, retry_factor=2):
    """
    Optimizes multiple conformers using force field methods with retry for non-converged structures.

    Args:
        mol (Chem.Mol): RDKit molecule with multiple conformers.
        inp (input_class): Input parameters containing optimization options.
        retry_factor (int): Factor to multiply max iterations for non-converged conformers.

    Returns:
        None: Updates the molecule with optimized conformers and metadata.
    """

    if mol is None or mol.GetNumConformers() == 0:
        output.error("optimize_conformers_batch: no conformers to optimize.")

    ff_name   = inp.rdkit_force_field   
    max_iters = inp.rdkit_max_iters     

    # ---- First pass: batch with the requested FF ----
    if ff_name in ("mmff94", "mmff94s"):
        if not AllChem.MMFFHasAllMoleculeParams(mol):
            output.error(
                f"MMFF parameters unavailable for '{inp.rdkit_mol_file}' with '{ff_name}'."
            )
            return
        variant = "MMFF94s" if ff_name == "mmff94s" else "MMFF94"
        results = AllChem.MMFFOptimizeMoleculeConfs(
            mol, maxIters=int(max_iters), numThreads=0, mmffVariant=variant
        )
    elif ff_name == "uff":
        results = AllChem.UFFOptimizeMoleculeConfs(
            mol, maxIters=int(max_iters), numThreads=0
        )
    else:
        param = parameters.parameters()
        msg = (
            f"Force field '{ff_name}' not supported.\n\n"
            " Accepted entries:"
            + "".join(f"\n - {entry}" for entry in param.rdkit_file_extensions_opt)
        )
        output.error(msg)
        return

    converged = [bool(c) for c, _ in results]
    energies  = [float(e) for _, e in results]

    # ---- Optional: SAME-FF retry for non-converged conformers ----
    hard = [i for i, ok in enumerate(converged) if not ok]
    if hard and retry_factor and retry_factor > 1:
        extra_iters = int(max_iters) * int(retry_factor)

        if ff_name in ("mmff94", "mmff94s"):
            variant = "MMFF94s" if ff_name == "mmff94s" else "MMFF94"
            props = AllChem.MMFFGetMoleculeProperties(mol, mmffVariant=variant)
            if props is not None:
                for cid in hard:
                    ff2 = AllChem.MMFFGetMoleculeForceField(mol, props, confId=cid)
                    if ff2 is None:
                        continue
                    ff2.Initialize()
                    it2 = ff2.Minimize(maxIts=extra_iters)
                    energies[cid]  = float(ff2.CalcEnergy())
                    converged[cid] = (it2 < extra_iters)
        else:  # UFF
            for cid in hard:
                ff2 = AllChem.UFFGetMoleculeForceField(mol, confId=cid)
                if ff2 is None:
                    continue
                ff2.Initialize()
                it2 = ff2.Minimize(maxIts=extra_iters)
                energies[cid]  = float(ff2.CalcEnergy())
                converged[cid] = (it2 < extra_iters)

    # ---- Annotate molecule & conformers ----
    try:
        mol.SetProp("_FF", str(ff_name))
        mol.SetProp("_FF_MaxIters", str(max_iters))
        for i, (E, ok) in enumerate(zip(energies, converged)):
            mol.SetProp(f"_Conf_{i:07d}_FF_Energy_kcalmol", f"{E:.6f}")
            mol.SetProp(f"_Conf_{i:07d}_FF_Converged", "true" if ok else "false")
    except Exception:
        pass

    # Warn if some still didn’t converge
    n_fail = sum(not ok for ok in converged)
    if n_fail:
        output.warn(
            f"Warning: {n_fail}/{len(converged)} conformers did not converge within "
            f"{max_iters} (and {max_iters*retry_factor if retry_factor>1 else max_iters} on retry) using '{ff_name}'."
        )

# -------------------------------------------------------------------------------------

[docs]
def save_rdkit_file(mol, out_file):
    """
    Saves an RDKit molecule to a file with the specified extension.

    Args:
        mol (Chem.Mol): RDKit molecule to save.
        out_file (str): Output file path.

    Returns:
        None: Saves the molecule to the specified file format.
    """

    param = parameters.parameters()  

    ext = out_file[-4:].lower()

    if ext == ".smi":
        with open(out_file, "w") as f:
            smi = Chem.MolToSmiles(mol)
            f.write(smi + "\n")
    
    elif ext in (".sdf", ".mol"):
        writer = Chem.SDWriter(out_file)
        writer.write(mol)
        writer.close()
    
    elif ext == ".pdb":
        Chem.MolToPDBFile(mol, out_file)

    elif ext == ".xyz":
        Chem.MolToXYZFile(mol, out_file)
    
    else:
        msg = (
            f"File extension '{ext}' for RDKit not supported.\n\n"
            "   Accepted file extensions:"
            + "".join(f"\n     - {extension}" for extension in param.rdkit_file_extensions)
        )
        output.error(msg)


# -------------------------------------------------------------------------------------

[docs]
def save_rdkit_conformers(mol, inp, out_dir="results_geom", max_digits=7):
    """
    Saves each conformer of a molecule to separate files with appropriate naming.

    Args:
        mol (Chem.Mol): RDKit molecule with multiple conformers.
        inp (input_class): Input parameters containing output options.
        out_dir (str): Output directory for conformer files.
        max_digits (int): Maximum digits for conformer numbering.

    Returns:
        None: Saves each conformer to separate files.
    """

    param = parameters.parameters()

    if mol is None:
        output.error("save_rdkit_conformers: input molecule is None.")

    nconfs = mol.GetNumConformers()
    if nconfs == 0:
        output.error("save_rdkit_conformers: the molecule has no conformers to save.")

    stem, ext = os.path.splitext(inp.rdkit_output_file)
    ext = ext.lower()

    if ext not in (".sdf", ".pdb", ".xyz"):
        msg = (
            f"File extension '{ext}' for RDKit not supported.\n\n"
            "   Accepted file extensions:"
            + "".join(f"\n     - {extension}" for extension in param.rdkit_file_extensions_confs)
        )
        output.error(msg)


    def _conf_converged(m, idx: int):
        # Try zero-padded then plain index
        for k in (f"_Conf_{idx:07d}_FF_Converged", f"_Conf_{idx}_FF_Converged"):
            if m.HasProp(k):
                v = m.GetProp(k).strip().lower()
                return v in ("true", "1", "yes", "y")
        return None  # unknown

    # padding width (up to max_digits)
    width = max(3, min(max_digits, len(str(max(0, nconfs - 1)))))

    tag_opt = bool(getattr(inp, "rdkit_opt", False))
    ff_tag  = str(getattr(inp, "rdkit_force_field", "")).strip()

    for i in range(nconfs):
        idx_str = f"{i:0{width}d}"  # 1-based index in filenames

        if tag_opt:
            conv = _conf_converged(mol, i)
            status = "CONV" if conv is True else ("NOTCONV" if conv is False else "UNKNOWN")
            stem_i = f"{stem}_opt_{status}_{ff_tag}"
        else:
            stem_i = stem

        out_path = os.path.join(out_dir, f"{stem_i}_conf_{idx_str}{ext}")

        if ext == ".sdf":
            w = Chem.SDWriter(out_path)
            try:
                mol.SetIntProp("_ConfId", int(i))
            except Exception:
                pass
            w.write(mol, confId=int(i))
            w.close()

        elif ext == ".pdb":
            block = Chem.MolToPDBBlock(mol, confId=int(i))
            with open(out_path, "w") as f:
                f.write(block if block.endswith("\n") else block + "\n")

        elif ext == ".xyz":
            block = Chem.MolToXYZBlock(mol, confId=int(i))
            with open(out_path, "w") as f:
                f.write(block if block.endswith("\n") else block + "\n")

# -------------------------------------------------------------------------------------

[docs]
def embed_3d(mol):
    """
    Ensures the given RDKit molecule has hydrogens and a 3D conformer.

    Args:
        mol (Chem.Mol): Input RDKit molecule.

    Returns:
        Chem.Mol: Molecule with hydrogens added and 3D conformer embedded.
    """

    if mol is None:
        raise ValueError("embed_3d: input molecule is None.")

    # Add hydrogens (returns a new molecule)
    mH = Chem.AddHs(mol, addCoords=True)

    def _has_true_3d(mm: Chem.Mol) -> bool:
        if mm.GetNumConformers() == 0:
            return False
        conf = mm.GetConformer()
        try:
            return conf.Is3D()
        except Exception:
            # Fallback: check if any z is meaningfully non-zero
            zs = [conf.GetAtomPosition(i).z for i in range(mm.GetNumAtoms())]
            return any(abs(z) > 1e-6 for z in zs)

    # Re-embed only if no conformer or the conformer is 2D
    if not _has_true_3d(mH):
        if mH.GetNumConformers() > 0:
            mH.RemoveAllConformers()
        rc = AllChem.EmbedMolecule(mH, AllChem.ETKDGv3())
        if rc == -1:
            # Optional minimal fallback; no optimization
            rc = AllChem.EmbedMolecule(mH, useRandomCoords=True)
            if rc == -1:
                raise RuntimeError("embed_3d: failed ETKDGv3 and random embedding.")

    # Copy back into the original object (keep callers' reference)
    mol.__init__(mH)
    return mol

# -------------------------------------------------------------------------------------

[docs]
def load_rdkit_file(inp):
    """
    Load a single RDKit Mol and keep only the first conformer.

    Supports .smi, .sdf, .mol, .pdb, and .xyz (converted to PDB).

    Args:
        inp: Object with:
            - ``rdkit_mol_file`` (str): Path to the input file.
            - ``remove_H`` (bool): Whether to remove explicit hydrogens.

    Returns:
        Chem.Mol | None: Loaded molecule (trimmed to a single conformer if needed),
        or triggers an error if loading fails.
    """

    param = parameters.parameters()
    filename = inp.rdkit_mol_file
    ext = filename[-4:].lower()

    mol = None

    if ext == ".smi":
        # Read the first SMILES from the file.
        with open(filename, "r") as f:
            line = f.readline().strip()
        smiles = line.split()[0] if line else ""
        mol = Chem.MolFromSmiles(smiles)
        if mol is not None and inp.remove_H:
            mol = Chem.RemoveHs(mol)

    elif ext in (".sdf", ".mol"):
        # Return the first valid molecule from the supplier.
        supp = Chem.SDMolSupplier(filename, removeHs=inp.remove_H)
        for m in supp:
            if m is not None:
                mol = m
                break

    elif ext == ".pdb":
        mol = Chem.MolFromPDBFile(filename, removeHs=inp.remove_H, sanitize=True)

    elif ext == ".xyz":
        # Convert to PDB; xyz_to_pdb updates inp.rdkit_mol_file.
        xyz_to_pdb(inp)
        filename = inp.rdkit_mol_file
        mol = Chem.MolFromPDBFile(filename, removeHs=inp.remove_H, sanitize=True)

    else:
        msg = (
            f"File extension '{ext}' for RDKit not supported.\n\n"
            "   Accepted file extensions:"
            + "".join(f"\n     - {extension}" for extension in param.rdkit_file_extensions)
        )
        return output.error(msg)

    if mol is None:
        # Raise error if loading failed.
        return output.error(
            f"Could not load a molecule from '{filename}'. The file may be empty, "
            "malformed, or incompatible with the selected options."
        )

    # Keep only the first conformer if multiple exist.
    return keep_first_conformer(mol)

# -------------------------------------------------------------------------------------

[docs]
def keep_first_conformer(mol):
    """Return a copy of the molecule keeping only conformer 0 (if multiple exist).

    Args:
        mol: RDKit molecule.

    Returns:
        The same molecule if it has ≤1 conformer; otherwise a copy
        containing only conformer 0.
    """

    if mol.GetNumConformers() <= 1:
        return mol
    
    out = Chem.Mol(mol)  # copies atoms/bonds/properties
    out.RemoveAllConformers()
    out.AddConformer(mol.GetConformer(0), assignId=True)

    return out

# -------------------------------------------------------------------------------------

[docs]
def plot_2d_molecule(mol, inp, size=(800, 700)):
    """
    Computes 2D coordinates and renders a molecule as an image.

    Args:
        mol (Chem.Mol): RDKit molecule to visualize.
        inp (input_class): Input parameters containing visualization options.
        size (tuple): Image size in pixels (width, height).

    Returns:
        None: Displays the 2D molecular visualization.
    """

    stereo_annotations = inp.stereo_annotations
    annotate_atom_indices = inp.atom_index
    black_and_white = inp.rdkit_bw

    legend_text = ""

    hl_atoms = hl_bonds = hatom = hbond = None

    m2d = Chem.Mol(mol)  # Avoid overwriting original
    rdDepictor.Compute2DCoords(m2d)
    Chem.AssignStereochemistry(m2d, cleanIt=True, force=True, flagPossibleStereoCenters=True)
    
    # --- Substructure match ---
    if inp.rdkit_match:
        ra, rb, rac, rbc = match_substructure(m2d, inp.match_smiles)  # your function (returns atoms/bonds + color maps)
        hl_atoms, hl_bonds, hatom, hbond = _merge_highlights(hl_atoms, hl_bonds, hatom, hbond, ra, rb, rac, rbc)

    # --- Aromaticity ---
    if inp.check_aromaticity:
        aa, ab, aac, abc = find_aromatic_highlights(m2d)  
        hl_atoms, hl_bonds, hatom, hbond = _merge_highlights(hl_atoms, hl_bonds, hatom, hbond, aa, ab, aac, abc)

    # --- Abbreviations ---
    if inp.rdkit_abbreviations:
        abbrevs = rdAbbreviations.GetDefaultAbbreviations()
        m2d = rdAbbreviations.CondenseMolAbbreviations(m2d, abbrevs, maxCoverage=0.8)

    w, h = size
    drawer = rdMolDraw2D.MolDraw2DCairo(w, h)
    opts = drawer.drawOptions()
    opts.addStereoAnnotation = bool(stereo_annotations)
    opts.addAtomIndices = bool(annotate_atom_indices)

    if black_and_white: opts.useBWAtomPalette()

    opts.padding = 0.05

    # Kekulize for nicer aromatic display (ignore failures)
    try:
        Chem.Kekulize(m2d, clearAromaticFlags=True)
    except Exception:
        pass

    rdMolDraw2D.PrepareAndDrawMolecule(
        drawer, m2d,
        highlightAtoms=hl_atoms,
        highlightBonds=hl_bonds,
        highlightAtomColors=hatom,
        highlightBondColors=hbond,
        legend=legend_text
    )

    drawer.FinishDrawing()

    png = drawer.GetDrawingText()
    import PIL.Image as Image, io
    img = Image.open(io.BytesIO(png))

    # --- Pretty gnuplot-like legend to distinguish aromatic atoms/bond and matched structure ---
    if inp.check_aromaticity and inp.rdkit_match:
        
        pale_green = (153, 230, 153)  # ~ (0.6, 1.0, 0.6)
        pale_blue  = (153, 204, 255)  # ~ (0.6, 0.8, 1.0)

        entries = [
            ("Aromaticity", pale_green),
            (f'SMILES match: {getattr(inp, "match_smiles", "") or ""}'.strip(), pale_blue),
        ]
        img = _draw_gnuplot_legend_pillow(
            img,
            entries,
            corner="top_left",     
            box_alpha=190,            
            pad=14,
            swatch_size=(28, 28),
            row_gap=12,
        )

    plt.imshow(img)
    plt.axis("off")
    plt.tight_layout()
    plt.show()

# -------------------------------------------------------------------------------------

[docs]
def plot_3d_molecule(mol, style="ballstick", width=1600, height=900, background="1xFFFFFF"):
    """
    Creates an interactive 3D visualization of a molecule using py3Dmol.

    Args:
        mol (Chem.Mol): RDKit molecule to visualize.
        style (str): Visualization style for the molecule.
        width (int): Width of the visualization window in pixels.
        height (int): Height of the visualization window in pixels.
        background (str): Background color for the visualization.

    Returns:
        None: Opens an interactive 3D visualization in the web browser.
    """

    # Ensure we have a 3D conformer.
    mol = embed_3d(mol)

    mblock = Chem.MolToMolBlock(mol)
    view = py3Dmol.view(width=width, height=height)
    view.addModel(mblock, 'mol')

    view.setStyle({'sphere': {'scale': 0.3}, 'stick': {}})

    view.setBackgroundColor(background)
    view.zoomTo()

    # Write HTML to a temp file and open in browser
    html = view._make_html()
    with tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w") as f:
        f.write(html)
        temp_html_path = f.name

    webbrowser.open("file://" + os.path.abspath(temp_html_path))

# -------------------------------------------------------------------------------------

[docs]
def xyz_to_pdb(inp):
    """
    Converts an XYZ file to PDB format using RDKit's bond determination.

    Args:
        inp (input_class): Input parameters containing XYZ file path.

    Returns:
        input_class: Updated input object with PDB file path.
    """

    # Determine the script's location
    script_path = os.path.abspath(__file__)
    script_dir  = os.path.dirname(script_path)
    base_dir    = os.path.dirname(script_dir)

    # Create tmp folder
    inp.tmp_folder = os.path.join(base_dir, 'tmp')
    if os.path.exists(inp.tmp_folder):
        shutil.rmtree(inp.tmp_folder)
    os.mkdir(inp.tmp_folder)

    # --- 1) Read XYZ ---
    tmp_mol = molecule.molecule()
    tmp_mol.read_geom(inp.rdkit_mol_file,False)

    symbols = tmp_mol.atoms
    coords   = tmp_mol.xyz.T      # (N, 3) in Å

    # --- 2) Build an RDKit molecule with those atoms & coordinates ---
    rw = Chem.RWMol()
    for sym in symbols:
        rw.AddAtom(Chem.Atom(sym))
    mol = rw.GetMol()

    conf = Chem.Conformer(len(symbols))
    for i, (x, y, z) in enumerate(coords):
        conf.SetAtomPosition(i, (float(x), float(y), float(z)))
    mol.AddConformer(conf, assignId=True)

    # --- 3) Infer bonds from 3D geometry (since XYZ has none) ---
    # Adds bonds (and proposes bond orders) by comparing inter-atomic distances to element-specific covalent radii and common valence patterns.
    rdDetermineBonds.DetermineBonds(mol)

    # --- 4) Sanitize & assign stereochemistry from structure ---
    
    # Makes the graph chemically consistent so stereo perception won’t fail:
    #    - checks valences, assigns formal charges, sets implicit H counts,
    #    - perceives aromaticity/kekulization, normalizes bond types, etc.

    Chem.SanitizeMol(mol)

    # Assign tetrahedral chirality from 3D and E/Z on double bonds

    # Tag potential tetrahedral centers as clockwise/counterclockwise
    Chem.rdmolops.AssignAtomChiralTagsFromStructure(mol)

    # Converts those tags into CIP R/S assignments and/or E/Z (cis/trans) to double bonds 
    Chem.AssignStereochemistry(mol, cleanIt=True, force=True) # cleanIt=True: Before assigning new stereo info, RDKit clears any old/stale stereo flags that might already be present in the molecule. 
                                                              # force = True: Recomputes even if something was set

    # --- 5) Write PDB with CONNECT records ---
    pdb_out = os.path.join(inp.tmp_folder, os.path.splitext(os.path.basename(inp.rdkit_mol_file))[0] + ".pdb")
    Chem.MolToPDBFile(mol, pdb_out)

    # Update the input object to point to the new PDB
    inp.rdkit_mol_file = pdb_out
    return inp

# -------------------------------------------------------------------------------------

[docs]
def match_substructure(mol, pattern):
    """
    Finds and highlights substructure matches in a molecule.

    Args:
        mol (Chem.Mol): Target molecule to search in.
        pattern (str): SMARTS or SMILES pattern to match.

    Returns:
        tuple: (highlightAtoms, highlightBonds, highlightAtomColors, highlightBondColors)
    """

    q = Chem.MolFromSmarts(pattern)
    if q is None: q = Chem.MolFromSmiles(pattern)

    if q is None: output.error(f'in substructure match cannot parse query: {pattern}')
    for m in mol.GetSubstructMatches(q, useChirality=True):
        atom_ids.update(m)
        # map query bonds to target bonds
        for qb in q.GetBonds():
            a1 = m[qb.GetBeginAtomIdx()]
            a2 = m[qb.GetEndAtomIdx()]
            b = mol.GetBondBetweenAtoms(a1, a2)
            if b is not None:
                bond_ids.add(b.GetIdx())

    if not atom_ids and not bond_ids:
        return None, None, None, None


    pale_blue = (0.6, 0.8, 1.0)
    hatom = {i: pale_blue for i in atom_ids}
    hbond = {i: pale_blue for i in bond_ids}

    return sorted(atom_ids), sorted(bond_ids), hatom, hbond

# -------------------------------------------------------------------------------------

[docs]
def _merge_highlights(a_atoms, a_bonds, a_atomCols, a_bondCols,
                      b_atoms, b_bonds, b_atomCols, b_bondCols):
    """
    Merges two highlight sets for molecular visualization.

    Args:
        a_*: First set of highlight information.
        b_*: Second set of highlight information.

    Returns:
        tuple: Merged (atoms, bonds, atomColors, bondColors)
    """

    if a_atoms is None: a_atoms, a_bonds, a_atomCols, a_bondCols = [], [], {}, {}
    if b_atoms is None: b_atoms, b_bonds, b_atomCols, b_bondCols = [], [], {}, {}
    atoms = sorted(set(a_atoms) | set(b_atoms))
    bonds = sorted(set(a_bonds) | set(b_bonds))
    atomCols = {**a_atomCols, **b_atomCols}
    bondCols = {**a_bondCols, **b_bondCols}
    return atoms or None, bonds or None, atomCols or None, bondCols or None

# -------------------------------------------------------------------------------------

[docs]
def find_aromatic_highlights(m):
    """
    Finds and highlights aromatic atoms and bonds in a molecule.

    Args:
        m (Chem.Mol): Input molecule.

    Returns:
        tuple: (aromatic_atoms, aromatic_bonds, atom_colors, bond_colors)
    """

    arom_atoms = [a.GetIdx() for a in m.GetAtoms() if a.GetIsAromatic()]
    arom_bonds = [b.GetIdx() for b in m.GetBonds() if b.GetIsAromatic()]
    
    if not arom_atoms and not arom_bonds:
        return None, None, None, None
    
    pale_green = (0.6, 1.0, 0.6)
    hatom = {i: pale_green for i in arom_atoms}
    hbond = {i: pale_green for i in arom_bonds}
    
    return arom_atoms, arom_bonds, hatom, hbond

# -------------------------------------------------------------------------------------

[docs]
def _draw_gnuplot_legend_pillow(
    img,  # PIL.Image
    entries,  # list of (label:str, rgb_tuple)
    corner="bottom_right",
    box_alpha=180,
    pad=14,
    swatch_size=(28, 28),
    row_gap=12,
    font_name_candidates=("Times New Roman", "Times.ttf", "Times", "DejaVuSerif.ttf")
):
    """
    Draws a gnuplot-like legend onto a PIL image.

    Args:
        img (PIL.Image): Input image to draw legend on.
        entries (list): List of (label, rgb_tuple) pairs.
        corner (str): Corner position for the legend.
        box_alpha (int): Alpha value for legend background.
        pad (int): Padding around legend content.
        swatch_size (tuple): Size of color swatches.
        row_gap (int): Gap between rows in the legend.
        font_name_candidates (tuple): Font names to try.

    Returns:
        PIL.Image: Image with legend added.
    """

    from PIL import ImageDraw, ImageFont, Image

    draw = ImageDraw.Draw(img, "RGBA")

    # Pick a font size relative to image height
    H = img.height
    base_pt = max(12, int(H * 0.04)) 
    font = None
    for name in font_name_candidates:
        try:
            font = ImageFont.truetype(name, base_pt)
            break
        except Exception:
            continue
    if font is None:
        font = ImageFont.load_default()

    # Measure legend block
    text_w = 0
    text_h_total = 0
    sw, sh = swatch_size
    for label, _ in entries:
        tw, th = draw.textbbox((0, 0), label, font=font)[2:]
        text_w = max(text_w, tw)
        text_h_total += max(th, sh)

    # Total size with padding, swatch + gap
    label_gap = 10
    row_h = max(sh, int(base_pt * 1.1))
    height = pad*2 + len(entries)*row_h + (len(entries)-1)*row_gap
    width = pad*2 + sw + label_gap + text_w

    # Position box
    inset = 18
    if corner == "bottom_right":
        x0 = img.width - width - inset
        y0 = img.height - height - inset
    elif corner == "bottom_left":
        x0 = inset
        y0 = img.height - height - inset
    elif corner == "top_right":
        x0 = img.width - width - inset
        y0 = inset
    else:  # top_left
        x0 = inset
        y0 = inset

    # Background (semi-transparent) + border
    box_bg = (245, 245, 245, box_alpha)  # light gray, semi-transparent
    box_border = (100, 100, 100, 255)
    draw.rounded_rectangle(
        [x0, y0, x0+width, y0+height],
        radius=10,
        fill=box_bg,
        outline=box_border,
        width=1
    )

    # Draw rows: swatch + label
    y = y0 + pad
    for label, rgb in entries:
        # swatch
        sx0, sy0 = x0 + pad, y + (row_h - sh)//2
        sx1, sy1 = sx0 + sw, sy0 + sh
        draw.rectangle([sx0, sy0, sx1, sy1], fill=rgb+(255,), outline=(60, 60, 60, 255))
        # label
        tx = sx1 + label_gap
        ty = y + (row_h - base_pt)//2 - 2
        draw.text((tx, ty), label, fill=(0, 0, 0, 255), font=font)
        y += row_h + row_gap

    return img

# -------------------------------------------------------------------------------------