From 741463ba5b449fde1376b5d0f9bf18fb66841f90 Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Fri, 17 Apr 2026 11:58:55 -0400 Subject: [PATCH 1/2] fix: convert c. to n. in CnvTranslator before building location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CnvTranslator._from_hgvs accepted CDS-relative (c.) HGVS expressions but built SequenceLocation objects directly from sv.posedit.pos.{start,end}.base without first converting to transcript-relative coordinates. That produced locations whose start/end pointed into the 5' UTR of the named transcript and therefore had wrong ga4gh:SL.* digests. Mirror the AlleleTranslator path (HgvsTools.extract_allele_values) and run c_to_n when sv.type == "c", after the existing intronic guard and the refget_accession lookup. Concrete example: NM_001331029.1:c.100_200del previously produced start=99, end=200 (points into the UTR). After this fix it produces start=249, end=350 (the correct transcript-relative positions). Behavior change for any consumer that was passing c. HGVS to CnvTranslator — their digests will change. The pre-fix digests were objectively wrong so no correct consumer can have been relying on them. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ga4gh/vrs/extras/translator.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ga4gh/vrs/extras/translator.py b/src/ga4gh/vrs/extras/translator.py index 6ed4e57f..05d2c205 100644 --- a/src/ga4gh/vrs/extras/translator.py +++ b/src/ga4gh/vrs/extras/translator.py @@ -496,6 +496,10 @@ def _from_hgvs( if not refget_accession: return None + # translate coding coordinates to positional coordinates, if necessary + if sv.type == "c": + sv = self.hgvs_tools.c_to_n(sv) + location = models.SequenceLocation( sequenceReference=models.SequenceReference( refgetAccession=refget_accession From 27d985f4b5877e5abdda25e4df74d30307b3cb71 Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Fri, 17 Apr 2026 11:59:01 -0400 Subject: [PATCH 2/2] test: coding-coordinate CNV regression Pins the c_to_n conversion in CnvTranslator by asserting NM_001331029.1:c.100_200del resolves to transcript positions 249-350 (not CDS-relative 99-200 which would land in the 5' UTR). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../test_from_hgvs_coding_coordinates.yaml | 39 +++++++++++++++++++ tests/extras/test_cnv_translator.py | 26 +++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/extras/cassettes/test_from_hgvs_coding_coordinates.yaml diff --git a/tests/extras/cassettes/test_from_hgvs_coding_coordinates.yaml b/tests/extras/cassettes/test_from_hgvs_coding_coordinates.yaml new file mode 100644 index 00000000..7010f6b9 --- /dev/null +++ b/tests/extras/cassettes/test_from_hgvs_coding_coordinates.yaml @@ -0,0 +1,39 @@ +interactions: +- request: + body: null + headers: {} + method: GET + uri: http://localhost:5000/seqrepo/1/metadata/refseq:NM_001331029.1 + response: + body: + string: "{\n \"added\": \"2016-08-24T05:03:40Z\",\n \"aliases\": [\n \"MD5:a8d7243ffeea4ade1dbc33613887693b\",\n + \ \"NCBI:NM_001331029.1\",\n \"refseq:NM_001331029.1\",\n \"NCBI:XM_017001344.1\",\n + \ \"refseq:XM_017001344.1\",\n \"SEGUID:qw9iIJ9BPdGZfypwQH7S3L2f9gI\",\n + \ \"SHA1:ab0f62209f413dd1997f2a70407ed2dcbd9ff602\",\n \"VMC:GS_MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK\",\n + \ \"sha512t24u:MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK\",\n \"ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK\"\n + \ ],\n \"alphabet\": \"ACGT\",\n \"length\": 11291\n}\n" + headers: {} + status: + code: 200 + message: OK +- request: + body: null + headers: {} + method: GET + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NM_001331029.1&rettype=fasta&seq_start=250&seq_stop=350&tool=bioutils&email=biocommons-dev@googlegroups.com + response: + body: + string: '>NM_001331029.1:250-350 Homo sapiens protein phosphatase 1 regulatory + subunit 12B (PPP1R12B), transcript variant 8, mRNA + + GAGCCTGCGGAGCGACGAGGCGCGGGGCGGCAGCCGCTGACCAGGCGCGGGAGCCCCAGGGTCCGCTTCG + + AGGACGGTGCTGTCTTTCTGGCCGCCTGCTC + + + ' + headers: {} + status: + code: 200 + message: OK +version: 1 diff --git a/tests/extras/test_cnv_translator.py b/tests/extras/test_cnv_translator.py index a7a3fa0e..9c71ab55 100644 --- a/tests/extras/test_cnv_translator.py +++ b/tests/extras/test_cnv_translator.py @@ -173,3 +173,29 @@ def test_from_hgvs_cn_copies_zero(tlr): cn = tlr._from_hgvs("NC_000013.11:g.26440969_26443305del", copies=0) assert cn.type == "CopyNumberCount" assert cn.copies == 0 + + +@pytest.mark.vcr +def test_from_hgvs_coding_coordinates(tlr): + """CDS-relative (``c.``) inputs are converted to transcript-relative + coordinates via ``c_to_n`` before the VRS location is built; without the + conversion the resulting ``SequenceLocation`` would point into the 5' UTR. + """ + cx = tlr._from_hgvs("NM_001331029.1:c.100_200del") + assert cx.model_dump(exclude_none=True) == { + "id": "ga4gh:CX.lJiuo6QPsrSEKI7EeX0xpX5Iqx0R_Kal", + "type": "CopyNumberChange", + "digest": "lJiuo6QPsrSEKI7EeX0xpX5Iqx0R_Kal", + "location": { + "id": "ga4gh:SL.K0g9SmC4z2-ayJsTHFHuIAygGNv1UMgi", + "type": "SequenceLocation", + "digest": "K0g9SmC4z2-ayJsTHFHuIAygGNv1UMgi", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK", + }, + "start": 249, + "end": 350, + }, + "copyChange": "loss", + }