-
Notifications
You must be signed in to change notification settings - Fork 56
Extend url2purl/purl2url coverage for Git-based source hosts #223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 10 commits
4c168a8
db9d6bc
7e87e09
24fdf1d
7e24835
0020818
5909b64
c47acb5
d02949b
92da2f2
dc169d8
fbff2d3
4929fc0
205d048
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -667,6 +667,265 @@ def build_bitbucket_purl(url): | |
| ) | ||
|
|
||
|
|
||
| def build_route_regex(domain_patterns, path_suffix="/.*"): | ||
| """ | ||
| Build a route regex from a list of domains | ||
| """ | ||
| domain_pattern = "|".join(domain_patterns) | ||
| return rf"https?://({domain_pattern}){path_suffix}" | ||
|
|
||
|
|
||
| SUB_GITLAB_DOMAINS = [r"git\.codelinaro\.org", r"gitlab\.(?!com\b)[^/]+"] | ||
| SUB_GITLAB_ROUTE_REGEX = build_route_regex(SUB_GITLAB_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(SUB_GITLAB_ROUTE_REGEX) | ||
| def build_gitlab_sub_purl(url): | ||
| """ | ||
| Return a PackageURL object from a GitLab Sub domains commit URL | ||
| For example: | ||
| https://gitlab.gnome.org/GNOME/gimp | ||
| https://git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm | ||
| https://gitlab.gnome.org/GNOME/gimp/-/commit/112a5e038f0646eae5ae314988ec074433d2b365 | ||
| https://git.codelinaro.org/linaro/qcom/project/-/commit/a40a9732c840e5a324fba78b0ff7980b497c3831 | ||
| """ | ||
|
|
||
| gitlab_sub_commit_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>.+?)/" | ||
| r"(?P<name>[^/]+)" | ||
| r"(?:/-/commit/(?P<version>[0-9a-fA-F]{7,64}))?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| commit_match = re.search(gitlab_sub_commit_pattern, url) | ||
| if commit_match: | ||
| return PackageURL( | ||
| type="generic", | ||
ziadhany marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| namespace=commit_match.group("namespace"), | ||
| name=commit_match.group("name"), | ||
| version=commit_match.group("version"), | ||
| ) | ||
|
|
||
|
|
||
| GITEA_DOMAINS = ["codeberg\.org", "gitea\.com"] | ||
| GITEA_ROUTE_REGEX = build_route_regex(GITEA_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(GITEA_ROUTE_REGEX) | ||
| def build_gitea_purl(url): | ||
| """ | ||
| Return a PackageURL object from a gitea/forgejo url | ||
| For example: | ||
| https://gitea.com/htc47/entur | ||
| https://codeberg.org/alpinelinux/aports | ||
| https://codeberg.org/alpinelinux/aports/commit/a40a9732c840e5a324fba78b0ff7980b497c3831 | ||
| https://gitea.com/htc47/entur/commit/271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c | ||
| """ | ||
|
|
||
| gitea_commit_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>.+?)/" | ||
| r"(?P<name>[^/]+)" | ||
| r"(?:/commit/(?P<version>[0-9a-fA-F]{7,64}))?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| commit_match = re.search(gitea_commit_pattern, url) | ||
| if commit_match: | ||
| return PackageURL( | ||
| type="generic", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need a Forgejo PURL! @johnmhoran |
||
| namespace=commit_match.group("namespace"), | ||
| name=commit_match.group("name"), | ||
| version=commit_match.group("version"), | ||
| ) | ||
|
|
||
|
|
||
| CGIT_DOMAINS = [ | ||
| r"git\.kernel\.org", | ||
| r"gitweb\.gentoo\.org", | ||
| "cgit\.git\.savannah\.gnu\.org", | ||
| "web\.git\.kernel\.org", | ||
| ] | ||
| CGIT_ROUTE_REGEX = build_route_regex(CGIT_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(CGIT_ROUTE_REGEX) | ||
| def build_cgit_purl(url): | ||
| """ | ||
| Return a PackageURL object from a cgit url | ||
| For example: | ||
| https://git.kernel.org/pub/scm/utils/b4/b4.git | ||
| https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git | ||
| https://cgit.git.savannah.gnu.org/cgit/uddf.git | ||
| https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git | ||
| https://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git | ||
| https://gitweb.gentoo.org/dev/darkside.git | ||
| https://gitweb.gentoo.org/repo/gentoo.git | ||
| https://git.kernel.org/pub/scm/bluetooth/bluez.git/commit/?id=74770b1fd2be612f9c2cf807db81fcdcc35e6560 | ||
| https://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git/commit/?h=for-next&id=bd771cf5c4254511cc4abb88f3dab3bd58bdf8e8 | ||
| https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/fs/smb?id=db363b0a1d9e6b9dc556296f1b1007aeb496a8cf | ||
| https://cgit.git.savannah.gnu.org/cgit/uddf.git/commit/?id=98c41e131dc952aee43d4ec392b80ca4c426be8d | ||
| https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825 | ||
| https://git.kernel.org/stable/c/9a9a8fe26751334b7739193a94eba741073b8a55 | ||
| """ | ||
|
|
||
| # https://git.kernel.org/stable/c/<hash> | ||
| kernel_shorthand = r"^https?://git\.kernel\.org/stable/c/" r"(?P<version>[0-9a-fA-F]{7,64})/?$" | ||
|
|
||
| cgit_project_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>.+?)/" | ||
| r"(?P<name>[^/]+?)" | ||
| r"(?:\.git)?" | ||
| r"(?:/commit/(?:[^?]+)?\?.*?\bid=(?P<version>[0-9a-fA-F]{7,64})(?:&.*)?)?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| if match := re.search(kernel_shorthand, url): | ||
| res = match.groupdict() | ||
| namespace = "git.kernel.org/pub/scm/linux/kernel/git/stable/" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs thinking and there is a emerging PURL registry that will cater to the kernel needs. |
||
| name = "linux" | ||
| elif match := re.search(cgit_project_pattern, url): | ||
| res = match.groupdict() | ||
| name = res["name"] | ||
| namespace = res["namespace"] | ||
| else: | ||
| return None | ||
|
|
||
| return PackageURL( | ||
| type="generic", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is in emerging git PURL for that from @darakian |
||
| namespace=namespace, | ||
| name=name, | ||
| version=res["version"], | ||
| qualifiers={}, | ||
| subpath="", | ||
| ) | ||
|
|
||
|
|
||
| GITILES_DOMAINS = [ | ||
| r"android\.googlesource\.com", | ||
| r"aomedia\.googlesource\.com", | ||
| r"chromium\.googlesource\.com", | ||
| r"gerrit\.googlesource\.com", | ||
| ] | ||
| GITILES_ROUTE_REGEX = build_route_regex(GITILES_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(GITILES_ROUTE_REGEX) | ||
| def build_gitiles_purl(url): | ||
| """ | ||
| Return a PackageURL object from Gitiles url | ||
| For example: | ||
| https://android.googlesource.com/platform/frameworks/base | ||
| https://android.googlesource.com/device/generic/vulkan-cereal | ||
| https://android.googlesource.com/platform/packages/apps/Settings/+/2968ccc911956fa5813a9a6a5e5c8970e383a60f | ||
| https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3 | ||
| """ | ||
|
|
||
| gitiles_project_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>(?:(?!/\+/).)+)/" | ||
| r"(?P<name>[^/]+)" | ||
| r"(?:/\+/(?P<version>[0-9a-fA-F]{7,64}))?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| match = re.search(gitiles_project_pattern, url) | ||
| if match: | ||
| return PackageURL( | ||
| type="generic", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likely also a candidate for the new git PURL type |
||
| namespace=match.group("namespace"), | ||
| name=match.group("name"), | ||
| version=match.group("version"), | ||
| qualifiers={}, | ||
| subpath="", | ||
| ) | ||
|
|
||
|
|
||
| ALLURA_DOMAINS = [r"sourceforge\.net", r"forge-allura\.apache\.org"] | ||
| ALLURA_ROUTE_REGEX = build_route_regex(ALLURA_DOMAINS, "/p/.*") | ||
|
|
||
|
|
||
| @purl_router.route(ALLURA_ROUTE_REGEX) | ||
| def build_allura_purl(url): | ||
| """ | ||
| Return a PackageURL object from an Apache Allura url (e.g., SourceForge). | ||
| For example: | ||
| https://sourceforge.net/p/djvu/djvulibre-git | ||
| https://sourceforge.net/p/expat/code_git | ||
| https://forge-allura.apache.org/p/allura/git | ||
| https://sourceforge.net/p/djvu/djvulibre-git/ci/e15d51510048927f172f1bf1f27ede65907d940d | ||
| https://sourceforge.net/p/infrarecorder/code/ci/9361b6f267e7b1c1576c48f6dac6dec18d8a93e0/ | ||
| https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946/ | ||
| """ | ||
|
|
||
| allura_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>.+?)/" | ||
| r"(?P<name>[^/]+?)" | ||
| r"(?:/ci/(?P<version>[0-9a-fA-F]{7,64}))?" | ||
| r"/?$" | ||
| ) | ||
|
|
||
| commit_match = re.search(allura_pattern, url) | ||
| if commit_match: | ||
| return PackageURL( | ||
| type="generic", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We may have a sourceforge type? Or this is for a git type |
||
| namespace=commit_match.group("namespace"), | ||
| name=commit_match.group("name"), | ||
| version=commit_match.group("version"), | ||
| qualifiers={}, | ||
| subpath="", | ||
| ) | ||
|
|
||
|
|
||
| GITWEB_DOMAINS = [ | ||
| r"gcc\.gnu\.org/git", | ||
| r"git\.postgresql\.org/gitweb", | ||
| "sourceware\.org/git", | ||
| "git\.openssl\.org/gitweb", | ||
| "gitbox\.apache\.org", | ||
| ] | ||
| GITWEB_ROUTE_REGEX = build_route_regex(GITWEB_DOMAINS) | ||
|
|
||
|
|
||
| @purl_router.route(GITWEB_ROUTE_REGEX) | ||
| def build_gitweb_purl(url): | ||
| """ | ||
| Return a PackageURL object from a Gitweb url. | ||
| For example: | ||
| https://gcc.gnu.org/git/?p=gcc.git | ||
| https://git.postgresql.org/gitweb/?p=hamn.git | ||
| https://sourceware.org/git/?p=glibc.git | ||
| https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339 | ||
| https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2 | ||
| https://sourceware.org/git/?p=glibc.git;a=commit;h=dedebed24f77762eea7d3c5ed2739a90a4d60461 | ||
| https://gitbox.apache.org/repos/asf?p=xalan-java.git;a=commit;h=da3e0d06b467247643ce04e88d3346739d119f21 | ||
| """ | ||
|
|
||
| gitweb_pattern = ( | ||
| r"^https?://" | ||
| r"(?P<namespace>[^?]+?)" | ||
| r"/?(?=\?)" | ||
| r"(?=.*[?;&]p=(?P<name>[^;&]+?)(?:\.git)?(?:[;&]|$))" | ||
| r"(?:(?=.*[?;&]h=(?P<version>[0-9a-fA-F]{7,64}))|)" | ||
| ) | ||
|
|
||
| commit_match = re.search(gitweb_pattern, url) | ||
| if commit_match: | ||
| namespace = commit_match.group("namespace") | ||
| name = commit_match.group("name") | ||
| return PackageURL( | ||
| type="generic", | ||
| namespace=namespace, | ||
| name=name, | ||
| version=commit_match.group("version"), | ||
| qualifiers={}, | ||
| subpath="", | ||
| ) | ||
|
|
||
|
|
||
| @purl_router.route("https?://gitlab\\.com/(?!.*/archive/).*") | ||
| def build_gitlab_purl(url): | ||
| """ | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.