Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/codespell.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Codespell
uses: codespell-project/actions-codespell@v2
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
env:
CONTAINER: quay.io/con/tributors
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Build Docker Image
run: docker build -t "${CONTAINER}" .
- name: Log In to Quay.io
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout source
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/shellcheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
run: |
sudo apt-get update -qq
sudo apt-get install shellcheck
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Run shellcheck
run: |
# I: running only on a subset of scripts which are shellcheck clean ATM
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/test-action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Generate Updated Zenodo and Contributors

# Important! Update to release https://github.com/con/tributors
Expand Down Expand Up @@ -55,21 +55,21 @@ jobs:
allcontrib_skip_generate: false

- name: Upload zenodo data as artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4

# Path is relative to GITHUB_WORKSPACE
with:
name: zenodo
path: .zenodo.json

- name: Upload allcontributors data as artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: allcontrib
path: .all-contributorsrc

- name: Upload README as artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: readme
path: README.md
6 changes: 3 additions & 3 deletions .github/workflows/test-tributors.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
formatting:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup black environment
run: conda create --quiet --name black pyflakes

Expand All @@ -29,7 +29,7 @@ jobs:
needs: formatting
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup testing environment
run: conda create --quiet --name testing pytest

Expand All @@ -49,7 +49,7 @@ jobs:
env:
CONTAINER: quay.io/con/tributors
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Build Docker Image
run: docker build -t "${CONTAINER}" .
- name: Tag and Preview Container
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/update-contributors.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Tributors Update

# Important! Update to release https://github.com/con/tributors
Expand Down
2 changes: 1 addition & 1 deletion tributors/main/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import requests
import sys

repository_regex = "(?P<owner>[\w,\-,\_]+)/(?P<repo>[\w,\-,\_\.]+)"
repository_regex = r"(?P<owner>[\w,\-,\_]+)/(?P<repo>[\w,\-,\_\.]+)"

bot = logging.getLogger("github")

Expand Down
119 changes: 79 additions & 40 deletions tributors/main/orcid.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,12 @@ def get_orcid_token():
return orcid_token


def record_search(url, email, interactive=False, search_type=""):
"""Given a url (with a name or email) do a record search looking for an orcid id.
def record_search(url, terms, interactive=False, search_type=""):
"""Given a url (with a name or terms) do a record search looking for an orcid id.

Arguments:
- url (str) : url to perform request
- email (str) : email, used just for logging
- terms (str) : terms, used just for logging
- interactive (bool) : if True, ask user if there is more than a single response
- search_type (str) : description on what search is based on, used just for logging
"""
Expand All @@ -152,19 +152,20 @@ def record_search(url, email, interactive=False, search_type=""):
if len(results) == 1:
return results[0]["orcid-id"]

term_str = terms[0] % terms[1:]
# Only stream results to screen in interactive mode
if not interactive:
bot.info(
f"{email}: found more than 1 ({len(results)}) result for ORCID search {search_type}, "
f"{term_str}: found more than one ({len(results)}) result for ORCID search {search_type}, "
"run with --interactive mode to select."
)
return
return Ellipsis

# One or more results
if len(results) > 10:
bot.warning("Found more than 10 results, will only show top 10.")

print("\n\n%s\n======================================================" % email)
print("\n\n%s\n======================================================" % term_str)
for idx, r in enumerate(results):
# Limit is ten results, count starting at 0
idx = idx + 1
Expand All @@ -191,6 +192,9 @@ def record_search(url, email, interactive=False, search_type=""):
else:
print("[%s]\n%s\n" % (idx, record))

# TODO: here we should remember for a person on what we already presented as
# options and not to show them again.
#
# If interactive, ask for choice prompt
if interactive:
skip_choices = ["s", "S", "skip"]
Expand All @@ -216,7 +220,7 @@ def record_search(url, email, interactive=False, search_type=""):

if choice in enter_choices:
return entry_prompt(
f"Please enter the ORCID for {email}.",
f"Please enter the ORCID for {term_str}.",
regex="[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$",
)

Expand All @@ -227,53 +231,88 @@ def record_search(url, email, interactive=False, search_type=""):
return results[int(choice) - 1]["orcid-id"]


def get_orcid(email, name=None, interactive=False):
"""Get an orcid identifier for a given email or name."""
# We must have an email OR name
if not email and not name:
return
def extended_search_url(q, *args):
"""Helper to properly quote args and avoid duplicating URL etc"""
# We will show only up to 10, so requesting 11, no need to get all default 1000
url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}&args=11"
if args:
url %= tuple(map(urllib.parse.quote, args))
return url


def extended_search_url(q, *args):
"""Helper to properly quote args and avoid duplicating URL etc"""
url = f"https://pub.orcid.org/v3.0/expanded-search?q={q}"
if args:
url %= tuple(map(urllib.parse.quote, args))
return url
strict, loose = True, False

# First look for records based on email
orcid_id = None

def gen_searches(email, name):
if email:
url = extended_search_url("email:%s", email)
orcid_id = record_search(url, email, interactive, "by email")
yield (("email:%s", email), "by email", strict)

# Attempt # 2 will use the first and last name
if not orcid_id and name is not None:
# Next attempts will use name
if name is not None:
delim = "," if "," in name else " "
cleaner = "," if delim == " " else " "

parts = name.split(delim)
parts = [_.strip(cleaner) for _ in name.split(delim)]

# No go if only a first or last name
if len(parts) == 1:
bot.debug(f"Skipping {name}, first and last are required for search.")
return orcid_id
return

# Just as is
yield (
('credit-name:"%s"+OR+other-names:"%s"', name, name),
"by full credit or other names",
strict,
)

last, first = parts[0].strip(cleaner), " ".join(parts[1:]).strip(cleaner)
url = extended_search_url("%s+AND+%s", first, last)
orcid_id = record_search(url, name, interactive, "by name")
if delim == ",":
# Last, First Middle
last, given = parts[0], " ".join(parts[1:])
else:
# First Middle Last
given, last = " ".join(parts[:-1]), parts[-1]

yield (
('given-names:"%s"+AND+family-name:"%s"', given, last),
"by name",
strict,
)

# Attempt # 3 will try removing the middle name
if not orcid_id and " " in first:
url = extended_search_url(
"%s+AND+%s",
first.split(" ")[0].strip(),
last,
if " " in given:
yield (
(
'given-names:"%s"+AND+family-name:"%s"',
given.split(" ")[0].strip(),
last,
),
"by name",
loose,
)
orcid_id = record_search(url, name, interactive, "by name without middle")

# Last attempt tries full name "as is"
if not orcid_id:
url = extended_search_url("%s", name)
orcid_id = record_search(url, name, interactive, "full name")
# Just a combination of all parts of the name
yield (
("+AND+".join(["%s"] * len(parts)),) + tuple(parts),
"by name parts",
loose,
)


return orcid_id
def get_orcid(email: str | None, name: str | None = None, interactive=False):
"""Get an orcid identifier for a given email or name."""
# We must have an email OR name
if not email and not name:
return

for search_args, search_desc, strictness in gen_searches(email, name):
url = extended_search_url(*search_args)
if (
orcid_id := record_search(url, search_args, interactive, search_desc)
) is not Ellipsis and orcid_id:
return orcid_id
if orcid_id is Ellipsis:
orcid_id = None
if strict:
break
# if loose, and still got multiple results, continue
Loading