con · yarikoptic · May 14, 2025 · Mar 22, 2025 · Mar 22, 2025 · May 10, 2025
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -17,6 +17,6 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Codespell
         uses: codespell-project/actions-codespell@v2
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -11,7 +11,7 @@ jobs:
     env:
       CONTAINER: quay.io/con/tributors
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Build Docker Image
         run: docker build -t "${CONTAINER}" .
       - name: Log In to Quay.io

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout source
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 

diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml
@@ -13,7 +13,7 @@ jobs:
       run: |
         sudo apt-get update -qq
         sudo apt-get install shellcheck
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Run shellcheck
       run: |
         # I: running only on a subset of scripts which are shellcheck clean ATM

diff --git a/.github/workflows/test-action.yml b/.github/workflows/test-action.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Generate Updated Zenodo and Contributors
 
         # Important! Update to release https://github.com/con/tributors
@@ -55,21 +55,21 @@ jobs:
           allcontrib_skip_generate: false
 
       - name: Upload zenodo data as artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
 
         # Path is relative to GITHUB_WORKSPACE
         with:
           name: zenodo
           path: .zenodo.json
 
       - name: Upload allcontributors data as artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: allcontrib
           path: .all-contributorsrc
 
       - name: Upload README as artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: readme
           path: README.md
diff --git a/.github/workflows/test-tributors.yml b/.github/workflows/test-tributors.yml
@@ -8,7 +8,7 @@ jobs:
   formatting:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup black environment
         run: conda create --quiet --name black pyflakes
 
@@ -29,7 +29,7 @@ jobs:
     needs: formatting
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup testing environment
         run: conda create --quiet --name testing pytest
 
@@ -49,7 +49,7 @@ jobs:
     env:
       CONTAINER: quay.io/con/tributors
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Build Docker Image
         run: docker build -t "${CONTAINER}" .
       - name: Tag and Preview Container

diff --git a/.github/workflows/update-contributors.yml b/.github/workflows/update-contributors.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Tributors Update
 
         # Important! Update to release https://github.com/con/tributors

diff --git a/tributors/main/github.py b/tributors/main/github.py
@@ -15,7 +15,7 @@
 import requests
 import sys
 
-repository_regex = "(?P<owner>[\w,\-,\_]+)/(?P<repo>[\w,\-,\_\.]+)"
+repository_regex = r"(?P<owner>[\w,\-,\_]+)/(?P<repo>[\w,\-,\_\.]+)"
 
 bot = logging.getLogger("github")
 

diff --git a/tributors/main/orcid.py b/tributors/main/orcid.py
@@ -130,12 +130,12 @@ def get_orcid_token():
     return orcid_token
 
 
-def record_search(url, email, interactive=False, search_type=""):
-    """Given a url (with a name or email) do a record search looking for an orcid id.
+def record_search(url, terms, interactive=False, search_type=""):
+    """Given a url (with a name or terms) do a record search looking for an orcid id.
 
     Arguments:
       - url (str) : url to perform request
-      - email (str) : email, used just for logging
+      - terms (str) : terms, used just for logging
       - interactive (bool) : if True, ask user if there is more than a single response
       - search_type (str) : description on what search is based on, used just for logging
     """
@@ -152,19 +152,20 @@ def record_search(url, email, interactive=False, search_type=""):
     if len(results) == 1:
         return results[0]["orcid-id"]
 
+    term_str = terms[0] % terms[1:]
     # Only stream results to screen in interactive mode
     if not interactive:
         bot.info(
-            f"{email}: found more than 1 ({len(results)}) result for ORCID search {search_type}, "
+            f"{term_str}: found more than one ({len(results)}) result for ORCID search {search_type}, "
             "run with --interactive mode to select."
         )
-        return
+        return Ellipsis
 
     # One or more results
     if len(results) > 10:
         bot.warning("Found more than 10 results, will only show top 10.")
 
-    print("\n\n%s\n======================================================" % email)
+    print("\n\n%s\n======================================================" % term_str)
     for idx, r in enumerate(results):
         # Limit is ten results, count starting at 0
         idx = idx + 1
@@ -191,6 +192,9 @@ def record_search(url, email, interactive=False, search_type=""):
         else:
             print("[%s]\n%s\n" % (idx, record))
 
+    # TODO: here we should remember for a person on what we already presented as
+    # options and not to show them again.
+    #
     # If interactive, ask for choice prompt
     if interactive:
         skip_choices = ["s", "S", "skip"]
@@ -216,7 +220,7 @@ def record_search(url, email, interactive=False, search_type=""):
 
         if choice in enter_choices:
             return entry_prompt(
-                f"Please enter the ORCID for {email}.",
+                f"Please enter the ORCID for {term_str}.",
                 regex="[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$",
             )
 
@@ -227,53 +231,88 @@ def record_search(url, email, interactive=False, search_type=""):
         return results[int(choice) - 1]["orcid-id"]
 
 
-def get_orcid(email, name=None, interactive=False):
-    """Get an orcid identifier for a given email or name."""
-    # We must have an email OR name
-    if not email and not name:
-        return
+def extended_search_url(q, *args):
+    """Helper to properly quote args and avoid duplicating URL etc"""
+    # We will show only up to 10, so requesting 11, no need to get all default 1000
+    url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}&args=11"
+    if args:
+        url %= tuple(map(urllib.parse.quote, args))
+    return url
+
 
-    def extended_search_url(q, *args):
-        """Helper to properly quote args and avoid duplicating URL etc"""
-        url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}"
-        if args:
-            url %= tuple(map(urllib.parse.quote, args))
-        return url
+strict, loose = True, False
 
-    # First look for records based on email
-    orcid_id = None
+
+def gen_searches(email, name):
     if email:
-        url = extended_search_url("email:%s", email)
-        orcid_id = record_search(url, email, interactive, "by email")
+        yield (("email:%s", email), "by email", strict)
 
-    # Attempt # 2 will use the first and last name
-    if not orcid_id and name is not None:
+    # Next attempts will use name
+    if name is not None:
         delim = "," if "," in name else " "
         cleaner = "," if delim == " " else " "
 
-        parts = name.split(delim)
+        parts = [_.strip(cleaner) for _ in name.split(delim)]
 
         # No go if only a first or last name
         if len(parts) == 1:
             bot.debug(f"Skipping {name}, first and last are required for search.")
-            return orcid_id
+            return
+
+        # Just as is
+        yield (
+            ('credit-name:"%s"+OR+other-names:"%s"', name, name),
+            "by full credit or other names",
+            strict,
+        )
 
-        last, first = parts[0].strip(cleaner), " ".join(parts[1:]).strip(cleaner)
-        url = extended_search_url("%s+AND+%s", first, last)
-        orcid_id = record_search(url, name, interactive, "by name")
+        if delim == ",":
+            # Last, First Middle
+            last, given = parts[0], " ".join(parts[1:])
+        else:
+            # First Middle Last
+            given, last = " ".join(parts[:-1]), parts[-1]
+
+        yield (
+            ('given-names:"%s"+AND+family-name:"%s"', given, last),
+            "by name",
+            strict,
+        )
 
         # Attempt # 3 will try removing the middle name
-        if not orcid_id and " " in first:
-            url = extended_search_url(
-                "%s+AND+%s",
-                first.split(" ")[0].strip(),
-                last,
+        if " " in given:
+            yield (
+                (
+                    'given-names:"%s"+AND+family-name:"%s"',
+                    given.split(" ")[0].strip(),
+                    last,
+                ),
+                "by name",
+                loose,
             )
-            orcid_id = record_search(url, name, interactive, "by name without middle")
 
-        # Last attempt tries full name "as is"
-        if not orcid_id:
-            url = extended_search_url("%s", name)
-            orcid_id = record_search(url, name, interactive, "full name")
+        # Just a combination of all parts of the name
+        yield (
+            ("+AND+".join(["%s"] * len(parts)),) + tuple(parts),
+            "by name parts",
+            loose,
+        )
+
 
-    return orcid_id
+def get_orcid(email: str | None, name: str | None = None, interactive=False):
+    """Get an orcid identifier for a given email or name."""
+    # We must have an email OR name
+    if not email and not name:
+        return
+
+    for search_args, search_desc, strictness in gen_searches(email, name):
+        url = extended_search_url(*search_args)
+        if (
+            orcid_id := record_search(url, search_args, interactive, search_desc)
+        ) is not Ellipsis and orcid_id:
+            return orcid_id
+        if orcid_id is Ellipsis:
+            orcid_id = None
+            if strict:
+                break
+        # if loose, and still got multiple results, continue