Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions src/packageurl/contrib/purl2url.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
# Visit https://github.com/package-url/packageurl-python for support and
# download.

import re

from packageurl import PackageURL
from packageurl.contrib.route import NoRouteAvailable
from packageurl.contrib.route import Router
Expand Down Expand Up @@ -172,6 +174,88 @@ def build_gitlab_repo_url(purl):
return f"https://gitlab.com/{namespace}/{name}"


GIT_REPO_GENERIC = {
# cgit
(
r"git\.kernel\.org",
r"gitweb\.gentoo\.org",
"cgit\.git\.savannah\.gnu\.org",
"web\.git\.kernel\.org",
): {
"commit_url": "https://{namespace}/{name}.git/commit/?id={version}",
"repo_url": "https://{namespace}/{name}.git",
},
# gitiles
(
r"android\.googlesource\.com",
r"aomedia\.googlesource\.com",
r"chromium\.googlesource\.com",
r"gerrit\.googlesource\.com",
): {
"commit_url": "https://{namespace}/{name}/+/{version}",
"repo_url": "https://{namespace}/{name}",
},
# allura
(r"sourceforge\.net", r"forge-allura\.apache\.org"): {
"commit_url": "https://{namespace}/{name}/ci/{version}",
"repo_url": "https://{namespace}/{name}",
},
# gitweb
(
r"gcc\.gnu\.org/git",
r"git\.postgresql\.org",
"sourceware\.org",
"git\.openssl\.org",
"gitbox\.apache\.org",
): {
"commit_url": "https://{namespace}/?p={name}.git;a=commit;h={version}",
"repo_url": "https://{namespace}/?p={name}.git",
},
# gitea / forgejo
(
r"codeberg\.org",
r"gitea\.com",
): {
"commit_url": "https://{namespace}/{name}/commit/{version}",
"repo_url": "https://{namespace}/{name}",
},
# sub gitlab ( excludes gitlab.com )
(
r"git\.codelinaro\.org.*",
r"gitlab\.(?!com\b)[^/]+",
): {
"commit_url": "https://{namespace}/{name}/-/commit/{version}",
"repo_url": "https://{namespace}/{name}",
},
}


@repo_router.route("pkg:generic/.*")
def build_generic_repo_url(purl):
"""
Return a Commit URL from the `purl` string.
"""
purl_data = PackageURL.from_string(purl)
name = purl_data.name
namespace = purl_data.namespace
version = purl_data.version

if not (namespace and name):
return

for patterns, template_url in GIT_REPO_GENERIC.items():
for pattern in patterns:
if not re.match(pattern, namespace):
continue

if version:
return template_url["commit_url"].format(
namespace=namespace, name=name, version=version
)
return template_url["repo_url"].format(namespace=namespace, name=name)
return


@repo_router.route("pkg:(gem|rubygems)/.*")
def build_rubygems_repo_url(purl):
"""
Expand Down
259 changes: 259 additions & 0 deletions src/packageurl/contrib/url2purl.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,265 @@ def build_bitbucket_purl(url):
)


def build_route_regex(domain_patterns, path_suffix="/.*"):
"""
Build a route regex from a list of domains
"""
domain_pattern = "|".join(domain_patterns)
return rf"https?://({domain_pattern}){path_suffix}"


SUB_GITLAB_DOMAINS = [r"git\.codelinaro\.org", r"gitlab\.(?!com\b)[^/]+"]
SUB_GITLAB_ROUTE_REGEX = build_route_regex(SUB_GITLAB_DOMAINS)


@purl_router.route(SUB_GITLAB_ROUTE_REGEX)
def build_gitlab_sub_purl(url):
"""
Return a PackageURL object from a GitLab Sub domains commit URL
For example:
https://gitlab.gnome.org/GNOME/gimp
https://git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm
https://gitlab.gnome.org/GNOME/gimp/-/commit/112a5e038f0646eae5ae314988ec074433d2b365
https://git.codelinaro.org/linaro/qcom/project/-/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
"""

gitlab_sub_commit_pattern = (
r"^https?://"
r"(?P<namespace>.+?)/"
r"(?P<name>[^/]+)"
r"(?:/-/commit/(?P<version>[0-9a-fA-F]{7,64}))?"
r"/?$"
)

commit_match = re.search(gitlab_sub_commit_pattern, url)
if commit_match:
return PackageURL(
type="generic",
namespace=commit_match.group("namespace"),
name=commit_match.group("name"),
version=commit_match.group("version"),
)


GITEA_DOMAINS = ["codeberg\.org", "gitea\.com"]
GITEA_ROUTE_REGEX = build_route_regex(GITEA_DOMAINS)


@purl_router.route(GITEA_ROUTE_REGEX)
def build_gitea_purl(url):
"""
Return a PackageURL object from a gitea/forgejo url
For example:
https://gitea.com/htc47/entur
https://codeberg.org/alpinelinux/aports
https://codeberg.org/alpinelinux/aports/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
https://gitea.com/htc47/entur/commit/271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c
"""

gitea_commit_pattern = (
r"^https?://"
r"(?P<namespace>.+?)/"
r"(?P<name>[^/]+)"
r"(?:/commit/(?P<version>[0-9a-fA-F]{7,64}))?"
r"/?$"
)

commit_match = re.search(gitea_commit_pattern, url)
if commit_match:
return PackageURL(
type="generic",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need a Forgejo PURL! @johnmhoran

namespace=commit_match.group("namespace"),
name=commit_match.group("name"),
version=commit_match.group("version"),
)


CGIT_DOMAINS = [
r"git\.kernel\.org",
r"gitweb\.gentoo\.org",
"cgit\.git\.savannah\.gnu\.org",
"web\.git\.kernel\.org",
]
CGIT_ROUTE_REGEX = build_route_regex(CGIT_DOMAINS)


@purl_router.route(CGIT_ROUTE_REGEX)
def build_cgit_purl(url):
"""
Return a PackageURL object from a cgit url
For example:
https://git.kernel.org/pub/scm/utils/b4/b4.git
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
https://cgit.git.savannah.gnu.org/cgit/uddf.git
https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git
https://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git
https://gitweb.gentoo.org/dev/darkside.git
https://gitweb.gentoo.org/repo/gentoo.git
https://git.kernel.org/pub/scm/bluetooth/bluez.git/commit/?id=74770b1fd2be612f9c2cf807db81fcdcc35e6560
https://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git/commit/?h=for-next&id=bd771cf5c4254511cc4abb88f3dab3bd58bdf8e8
https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/fs/smb?id=db363b0a1d9e6b9dc556296f1b1007aeb496a8cf
https://cgit.git.savannah.gnu.org/cgit/uddf.git/commit/?id=98c41e131dc952aee43d4ec392b80ca4c426be8d
https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825
https://git.kernel.org/stable/c/9a9a8fe26751334b7739193a94eba741073b8a55
"""

# https://git.kernel.org/stable/c/<hash>
kernel_shorthand = r"^https?://git\.kernel\.org/stable/c/" r"(?P<version>[0-9a-fA-F]{7,64})/?$"

cgit_project_pattern = (
r"^https?://"
r"(?P<namespace>.+?)/"
r"(?P<name>[^/]+?)"
r"(?:\.git)?"
r"(?:/commit/(?:[^?]+)?\?.*?\bid=(?P<version>[0-9a-fA-F]{7,64})(?:&.*)?)?"
r"/?$"
)

if match := re.search(kernel_shorthand, url):
res = match.groupdict()
namespace = "git.kernel.org/pub/scm/linux/kernel/git/stable/"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs thinking and there is a emerging PURL registry that will cater to the kernel needs.

name = "linux"
elif match := re.search(cgit_project_pattern, url):
res = match.groupdict()
name = res["name"]
namespace = res["namespace"]
else:
return None

return PackageURL(
type="generic",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is in emerging git PURL for that from @darakian

namespace=namespace,
name=name,
version=res["version"],
qualifiers={},
subpath="",
)


GITILES_DOMAINS = [
r"android\.googlesource\.com",
r"aomedia\.googlesource\.com",
r"chromium\.googlesource\.com",
r"gerrit\.googlesource\.com",
]
GITILES_ROUTE_REGEX = build_route_regex(GITILES_DOMAINS)


@purl_router.route(GITILES_ROUTE_REGEX)
def build_gitiles_purl(url):
"""
Return a PackageURL object from Gitiles url
For example:
https://android.googlesource.com/platform/frameworks/base
https://android.googlesource.com/device/generic/vulkan-cereal
https://android.googlesource.com/platform/packages/apps/Settings/+/2968ccc911956fa5813a9a6a5e5c8970e383a60f
https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3
"""

gitiles_project_pattern = (
r"^https?://"
r"(?P<namespace>(?:(?!/\+/).)+)/"
r"(?P<name>[^/]+)"
r"(?:/\+/(?P<version>[0-9a-fA-F]{7,64}))?"
r"/?$"
)

match = re.search(gitiles_project_pattern, url)
if match:
return PackageURL(
type="generic",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likely also a candidate for the new git PURL type

namespace=match.group("namespace"),
name=match.group("name"),
version=match.group("version"),
qualifiers={},
subpath="",
)


ALLURA_DOMAINS = [r"sourceforge\.net", r"forge-allura\.apache\.org"]
ALLURA_ROUTE_REGEX = build_route_regex(ALLURA_DOMAINS, "/p/.*")


@purl_router.route(ALLURA_ROUTE_REGEX)
def build_allura_purl(url):
"""
Return a PackageURL object from an Apache Allura url (e.g., SourceForge).
For example:
https://sourceforge.net/p/djvu/djvulibre-git
https://sourceforge.net/p/expat/code_git
https://forge-allura.apache.org/p/allura/git
https://sourceforge.net/p/djvu/djvulibre-git/ci/e15d51510048927f172f1bf1f27ede65907d940d
https://sourceforge.net/p/infrarecorder/code/ci/9361b6f267e7b1c1576c48f6dac6dec18d8a93e0/
https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946/
"""

allura_pattern = (
r"^https?://"
r"(?P<namespace>.+?)/"
r"(?P<name>[^/]+?)"
r"(?:/ci/(?P<version>[0-9a-fA-F]{7,64}))?"
r"/?$"
)

commit_match = re.search(allura_pattern, url)
if commit_match:
return PackageURL(
type="generic",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may have a sourceforge type? Or this is for a git type

namespace=commit_match.group("namespace"),
name=commit_match.group("name"),
version=commit_match.group("version"),
qualifiers={},
subpath="",
)


GITWEB_DOMAINS = [
r"gcc\.gnu\.org/git",
r"git\.postgresql\.org/gitweb",
"sourceware\.org/git",
"git\.openssl\.org/gitweb",
"gitbox\.apache\.org",
]
GITWEB_ROUTE_REGEX = build_route_regex(GITWEB_DOMAINS)


@purl_router.route(GITWEB_ROUTE_REGEX)
def build_gitweb_purl(url):
"""
Return a PackageURL object from a Gitweb url.
For example:
https://gcc.gnu.org/git/?p=gcc.git
https://git.postgresql.org/gitweb/?p=hamn.git
https://sourceware.org/git/?p=glibc.git
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339
https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2
https://sourceware.org/git/?p=glibc.git;a=commit;h=dedebed24f77762eea7d3c5ed2739a90a4d60461
https://gitbox.apache.org/repos/asf?p=xalan-java.git;a=commit;h=da3e0d06b467247643ce04e88d3346739d119f21
"""

gitweb_pattern = (
r"^https?://"
r"(?P<namespace>[^?]+?)"
r"/?(?=\?)"
r"(?=.*[?;&]p=(?P<name>[^;&]+?)(?:\.git)?(?:[;&]|$))"
r"(?:(?=.*[?;&]h=(?P<version>[0-9a-fA-F]{7,64}))|)"
)

commit_match = re.search(gitweb_pattern, url)
if commit_match:
namespace = commit_match.group("namespace")
name = commit_match.group("name")
return PackageURL(
type="generic",
namespace=namespace,
name=name,
version=commit_match.group("version"),
qualifiers={},
subpath="",
)


@purl_router.route("https?://gitlab\\.com/(?!.*/archive/).*")
def build_gitlab_purl(url):
"""
Expand Down
Loading
Loading