diff --git a/src/ga4gh/vrs/extras/translator.py b/src/ga4gh/vrs/extras/translator.py index 6ed4e57f..05d2c205 100644 --- a/src/ga4gh/vrs/extras/translator.py +++ b/src/ga4gh/vrs/extras/translator.py @@ -496,6 +496,10 @@ def _from_hgvs( if not refget_accession: return None + # translate coding coordinates to positional coordinates, if necessary + if sv.type == "c": + sv = self.hgvs_tools.c_to_n(sv) + location = models.SequenceLocation( sequenceReference=models.SequenceReference( refgetAccession=refget_accession diff --git a/tests/extras/cassettes/test_from_hgvs_coding_coordinates.yaml b/tests/extras/cassettes/test_from_hgvs_coding_coordinates.yaml new file mode 100644 index 00000000..7010f6b9 --- /dev/null +++ b/tests/extras/cassettes/test_from_hgvs_coding_coordinates.yaml @@ -0,0 +1,39 @@ +interactions: +- request: + body: null + headers: {} + method: GET + uri: http://localhost:5000/seqrepo/1/metadata/refseq:NM_001331029.1 + response: + body: + string: "{\n \"added\": \"2016-08-24T05:03:40Z\",\n \"aliases\": [\n \"MD5:a8d7243ffeea4ade1dbc33613887693b\",\n + \ \"NCBI:NM_001331029.1\",\n \"refseq:NM_001331029.1\",\n \"NCBI:XM_017001344.1\",\n + \ \"refseq:XM_017001344.1\",\n \"SEGUID:qw9iIJ9BPdGZfypwQH7S3L2f9gI\",\n + \ \"SHA1:ab0f62209f413dd1997f2a70407ed2dcbd9ff602\",\n \"VMC:GS_MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK\",\n + \ \"sha512t24u:MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK\",\n \"ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK\"\n + \ ],\n \"alphabet\": \"ACGT\",\n \"length\": 11291\n}\n" + headers: {} + status: + code: 200 + message: OK +- request: + body: null + headers: {} + method: GET + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NM_001331029.1&rettype=fasta&seq_start=250&seq_stop=350&tool=bioutils&email=biocommons-dev@googlegroups.com + response: + body: + string: '>NM_001331029.1:250-350 Homo sapiens protein phosphatase 1 regulatory + subunit 12B (PPP1R12B), transcript variant 8, mRNA + + GAGCCTGCGGAGCGACGAGGCGCGGGGCGGCAGCCGCTGACCAGGCGCGGGAGCCCCAGGGTCCGCTTCG + + AGGACGGTGCTGTCTTTCTGGCCGCCTGCTC + + + ' + headers: {} + status: + code: 200 + message: OK +version: 1 diff --git a/tests/extras/test_cnv_translator.py b/tests/extras/test_cnv_translator.py index a7a3fa0e..9c71ab55 100644 --- a/tests/extras/test_cnv_translator.py +++ b/tests/extras/test_cnv_translator.py @@ -173,3 +173,29 @@ def test_from_hgvs_cn_copies_zero(tlr): cn = tlr._from_hgvs("NC_000013.11:g.26440969_26443305del", copies=0) assert cn.type == "CopyNumberCount" assert cn.copies == 0 + + +@pytest.mark.vcr +def test_from_hgvs_coding_coordinates(tlr): + """CDS-relative (``c.``) inputs are converted to transcript-relative + coordinates via ``c_to_n`` before the VRS location is built; without the + conversion the resulting ``SequenceLocation`` would point into the 5' UTR. + """ + cx = tlr._from_hgvs("NM_001331029.1:c.100_200del") + assert cx.model_dump(exclude_none=True) == { + "id": "ga4gh:CX.lJiuo6QPsrSEKI7EeX0xpX5Iqx0R_Kal", + "type": "CopyNumberChange", + "digest": "lJiuo6QPsrSEKI7EeX0xpX5Iqx0R_Kal", + "location": { + "id": "ga4gh:SL.K0g9SmC4z2-ayJsTHFHuIAygGNv1UMgi", + "type": "SequenceLocation", + "digest": "K0g9SmC4z2-ayJsTHFHuIAygGNv1UMgi", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK", + }, + "start": 249, + "end": 350, + }, + "copyChange": "loss", + }