Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 38 additions & 5 deletions src/ga4gh/vrs/extras/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def __init__(
}

self.to_translators = {
"gnomad": self._to_gnomad,
"hgvs": self._to_hgvs,
"spdi": self._to_spdi,
}
Expand Down Expand Up @@ -424,6 +425,32 @@ def _to_hgvs(
) -> list[str]:
return self.hgvs_tools.from_allele(vo, namespace)

def _to_gnomad(
self,
vo: models.Allele,
namespace: str | None = None,
**kwargs, # noqa: ARG002
) -> list[str]:
"""Generate a *list* of gnomAD-style identifiers for VRS Allele.

If no alias translations are available, an empty list is returned.

If `namespace` is not None, returns gnomAD strings for the
specified namespace if applicable, or throws ValueError if not.

If `namespace` is None, returns gnomAD strings based off of the
`default_assembly_name`.

If the VRS object cannot be expressed in gnomAD-style, raises ValueError.
"""
namespace = namespace or self.default_assembly_name
if not namespace.startswith("GRCh"):
error = f"Invalid gnomAD-style namespace '{namespace}'"
raise ValueError(error)
return self._to_location_expression(
"{alias}-{start}-{ref_seq}-{alt_seq}", vo, namespace,
)

def _to_spdi(
self, vo: models.Allele, namespace: str | None = "refseq", **kwargs
) -> list[str]:
Expand All @@ -450,12 +477,20 @@ def _to_spdi(
SPDI and VRS use identical normalization. The incoming Allele
is expected to be normalized per VRS spec.
"""
ref_seq_limit = kwargs.get("ref_seq_limit", 0)
return self._to_location_expression(
"{alias}:{start}:{ref_seq}:{alt_seq}", vo, namespace, ref_seq_limit=ref_seq_limit,
)

def _to_location_expression(
self, id_template: str, vo: models.Allele, namespace: str | None , ref_seq_limit: int | None = None,
) -> list[str]:
sequence = f"ga4gh:{vo.location.get_refget_accession()}"
aliases = self.data_proxy.translate_sequence_identifier(sequence, namespace)
aliases = [a.split(":")[1] for a in aliases]
seq_proxies = {a: SequenceProxy(self.data_proxy, a) for a in aliases}
start, end = vo.location.start, vo.location.end
spdi_exprs = []
exprs = []

for alias in aliases:
# Get the reference sequence
Expand All @@ -482,14 +517,12 @@ def _to_spdi(

# Optionally allow using the length of the reference sequence
# instead of the sequence itself.
ref_seq_limit = kwargs.get("ref_seq_limit", 0)
if ref_seq_limit is not None and len(ref_seq) > int(ref_seq_limit):
ref_seq = len(ref_seq)

spdi_expr = f"{alias}:{start}:{ref_seq}:{alt_seq}"
spdi_exprs.append(spdi_expr)
exprs.append(id_template.format(alias=alias, start=start, ref_seq=ref_seq, alt_seq=alt_seq))

return spdi_exprs
return exprs

def _post_process_imported_allele(
self, allele: models.Allele, **kwargs
Expand Down
Loading