Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,17 @@ jobs:
name: Run pytest
command: |
uv run pytest tests -v

markdown-link-checker:
executor: default
resource_class: small
steps:
- checkout-dep-1
- setup-python-env
- run:
name: Run link checker script
command: |
uv run scripts/markdown_link_checker.py

workflows:
version: 2
Expand All @@ -255,11 +266,21 @@ workflows:
or:
# run when manually triggered
- equal: [<< pipeline.parameters.run_job >>, "run-cicd"]
#run for every push
# run for every push
- equal: [<< pipeline.trigger_source >>, "webhook"]
jobs:
- cicd

markdown-link-checker:
when:
or:
# run when manually triggered
- equal: [<< pipeline.parameters.run_job >>, "markdown-link-checker"]
# run for every push
- equal: [<< pipeline.trigger_source >>, "webhook"]
jobs:
- markdown-link-checker

daily-l2-aggregate-later-loads:
when:
or:
Expand Down
62 changes: 62 additions & 0 deletions scripts/markdown_link_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import re
import subprocess
import shlex
from urllib.parse import urlparse


MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\((?P<destination>[^)]+)\)")


def main():
# Run the command to find markdown links and capture the output
found_links = subprocess.check_output(
shlex.split(r"""find . -name "*.md" -exec grep -HoP '\[([^\]]+)\]\(([^)]+)\)' {} \;"""),
text=True,
)

for link in found_links.split("\n"):
if not link:
continue

link_file, markdown_link = link.split(":", maxsplit=1)

destination_match = MARKDOWN_LINK_RE.match(markdown_link)
if not destination_match:
print(f"Invalid link: {markdown_link}")
continue
destination = destination_match.group("destination")

if destination.startswith("/"):
# The destination is an absolute path
check_absolute_path(destination)

elif destination.startswith("."):
# The destination is a relative path
check_relative_path(link_file, destination)

elif destination.startswith(("http://", "https://")):
# The destination is a URL
parsed_url = urlparse(destination)
if parsed_url.scheme and parsed_url.netloc:
check_url(destination)
else:
continue


def check_absolute_path(destination: str):
print("ABS: ", destination)
return


def check_relative_path(link_file: str, destination: str):
print("REL: ", link_file, destination)
return


def check_url(destination: str):
print("URL: ", destination)
return


if __name__ == "__main__":
main()