Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 32 additions & 83 deletions blueprints/redirector.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import time
from flask import (
Blueprint,
request,
Expand All @@ -8,15 +7,12 @@
)
from utils.url_utils import (
BOT_USER_AGENTS,
get_country,
get_client_ip,
validate_emoji_alias,
)
from utils.mongo_utils import (
load_url,
update_url,
load_emoji_url,
update_emoji_url,
)
from cache import cache_query as cq
from cache.cache_url import UrlData
Expand All @@ -27,37 +23,32 @@
from datetime import datetime, timezone
from urllib.parse import unquote
import re
import tldextract
from crawlerdetect import CrawlerDetect

from workers.stats_publisher import send_to_queue

crawler_detect = CrawlerDetect()
tld_no_cache_extract = tldextract.TLDExtract(cache_dir=None)

url_redirector = Blueprint("url_redirector", __name__)


@url_redirector.route("/<short_code>", methods=["GET"])
@limiter.exempt
def redirect_url(short_code):
def redirect_url(short_code: str):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): We've found these issues:


Explanation

The quality score for this function is below the quality threshold of 25%.
This score is a combination of the method length, cognitive complexity and working memory.

How can you solve this?

It might be worth refactoring this function to make it shorter and more readable.

  • Reduce the function length by extracting pieces of functionality out into
    their own functions. This is the most important thing you can do - ideally a
    function should be less than 10 lines.
  • Reduce nesting, perhaps by introducing guard clauses to return early.
  • Ensure that variables are tightly scoped, so that code using related concepts
    sits together within the function rather than being scattered.

user_ip = get_client_ip()
projection = {
"_id": 1,
"url": 1,
"password": 1,
"max-clicks": 1,
"expiration-time": 1,
"total-clicks": 1,
"ips": {"$elemMatch": {"$eq": user_ip}},
"block-bots": 1,
"average_redirection_time": 1,
}

short_code = unquote(short_code)

is_emoji = False

# Measure redirection time
start_time = time.perf_counter()
is_emoji = validate_emoji_alias(short_code)

cached_url_data = cq.get_url_data(short_code)
if cached_url_data:
Expand All @@ -67,8 +58,7 @@ def redirect_url(short_code):
"block-bots": cached_url_data.block_bots,
}
else:
if validate_emoji_alias(short_code):
is_emoji = True
if is_emoji:
url_data = load_emoji_url(short_code, projection)
else:
url_data = load_url(short_code, projection)
Expand Down Expand Up @@ -134,7 +124,7 @@ def redirect_url(short_code):

try:
ua = parse(user_agent)
if not ua or not ua.user_agent or not ua.os:
if not ua or not ua.string:
return jsonify(
{
"error_code": "400",
Expand All @@ -151,45 +141,17 @@ def redirect_url(short_code):
}
), 400

os_name = ua.os.family
browser = ua.user_agent.family
os_name = ua.os.family if ua.os else "Unknown"
browser = ua.user_agent.family if ua.user_agent else "Unknown"
referrer = request.headers.get("Referer")
country = get_country(user_ip)

is_unique_click = url_data.get("ips", None) is None

if country:
country = country.replace(".", " ")

updates = {"$inc": {}, "$set": {}, "$addToSet": {}}

if "ips" not in url_data:
url_data["ips"] = []

if referrer:
referrer_raw = tld_no_cache_extract(referrer)
referrer = (
f"{referrer_raw.domain}.{referrer_raw.suffix}"
if referrer_raw.suffix
else referrer_raw.domain
)
sanitized_referrer = re.sub(r"[.$\x00-\x1F\x7F-\x9F]", "_", referrer)

updates["$inc"][f"referrer.{sanitized_referrer}.counts"] = 1
updates["$addToSet"][f"referrer.{sanitized_referrer}.ips"] = user_ip

updates["$inc"][f"country.{country}.counts"] = 1
updates["$addToSet"][f"country.{country}.ips"] = user_ip

updates["$inc"][f"browser.{browser}.counts"] = 1
updates["$addToSet"][f"browser.{browser}.ips"] = user_ip

updates["$inc"][f"os_name.{os_name}.counts"] = 1
updates["$addToSet"][f"os_name.{os_name}.ips"] = user_ip
bot_name: str | None = None

for bot in BOT_USER_AGENTS:
bot_re = re.compile(bot, re.IGNORECASE)
if bot_re.search(user_agent):
bot_name = bot
if url_data.get("block-bots", False):
return (
jsonify(
Expand All @@ -201,11 +163,10 @@ def redirect_url(short_code):
),
403,
)
sanitized_bot = re.sub(r"[.$\x00-\x1F\x7F-\x9F]", "_", bot)
updates["$inc"][f"bots.{sanitized_bot}"] = 1
break
else:
if crawler_detect.isCrawler(user_agent):
bot_name = crawler_detect.getMatches()[0]
if url_data.get("block-bots", False):
return (
jsonify(
Expand All @@ -217,42 +178,25 @@ def redirect_url(short_code):
),
403,
)
updates["$inc"][f"bots.{crawler_detect.getMatches()}"] = 1

# increment the counter for the short code
today = str(datetime.now()).split()[0]
updates["$inc"][f"counter.{today}"] = 1

if is_unique_click:
updates["$inc"][f"unique_counter.{today}"] = 1

updates["$addToSet"]["ips"] = user_ip

updates["$inc"]["total-clicks"] = 1

updates["$set"]["last-click"] = str(
datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
)
updates["$set"]["last-click-browser"] = browser
updates["$set"]["last-click-os"] = os_name
updates["$set"]["last-click-country"] = country

# Calculate redirection time
end_time = time.perf_counter()
redirection_time = (end_time - start_time) * 1000

curr_avg = url_data.get("average_redirection_time", 0)

# Update Average Redirection Time
alpha = 0.1 # Smoothing factor, adjust as needed
updates["$set"]["average_redirection_time"] = round(
(1 - alpha) * curr_avg + alpha * redirection_time, 2
)
message = {
"short_code": short_code,
"os_name": os_name,
"browser": browser,
"referrer": referrer,
"ip": user_ip,
"user_agent": user_agent,
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
"is_unique_click": is_unique_click,
"bot_name": bot_name,
"is_emoji": is_emoji,
}

if is_emoji:
update_emoji_url(short_code, updates)
# send the stats message to the stats queue to be processed later
if request.method == "HEAD":
pass
Comment on lines +196 to +197
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The route decorator only specifies GET methods (@url_redirector.route("/<short_code>", methods=["GET"])), but there's a conditional check for HEAD requests that does nothing. Either:

  1. Update the route decorator to include HEAD requests:

    @url_redirector.route("/<short_code>", methods=["GET", "HEAD"])
  2. Or remove the conditional check since HEAD requests won't reach this handler with the current configuration.

This would ensure the route handling is consistent with the declared HTTP methods.

Suggested change
if request.method == "HEAD":
pass
# HEAD requests are handled automatically by Flask for GET routes
# No special handling needed

Spotted by Diamond

Is this helpful? React 👍 or 👎 to let us know.

else:
update_url(short_code, updates)
send_to_queue(message)

return redirect(url)

Expand Down Expand Up @@ -295,3 +239,8 @@ def check_password(short_code):
),
400,
)


@url_redirector.route("/ok", methods=["GET"])
Comment thread
Zingzy marked this conversation as resolved.
Outdated
def simple_redirect():
return "Ok", 200
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ description = "Open-Source URL Shortener Written in Flask"
readme = "README.md"
requires-python = ">=3.9"
dependencies = [
"aio-pika>=9.5.5",
"crawlerdetect>=0.3.0",
"dicttoxml>=1.7.16",
"emoji>=2.14.1",
Expand All @@ -14,7 +15,9 @@ dependencies = [
"flask-limiter[mongodb]>=3.11.0",
"geoip2>=5.1.0",
"gunicorn>=23.0.0",
"loguru>=0.7.3",
"openpyxl>=3.1.5",
"pika>=1.3.2",
"pycountry>=24.6.1",
"pymongo>=4.13.0",
"python-dotenv>=1.1.0",
Expand Down
9 changes: 8 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
aio-pika==9.5.5
aiohappyeyeballs==2.6.1
aiohttp==3.12.4
aiormq==6.8.1
aiosignal==1.3.2
attrs==25.3.0
blinker==1.9.0
Expand All @@ -14,6 +16,7 @@ dicttoxml==1.7.16
dnspython==2.7.0
emoji==2.14.1
et-xmlfile==2.0.0
exceptiongroup==1.3.0
filelock==3.18.0
flask==3.1.1
flask-caching==2.3.1
Expand All @@ -27,6 +30,7 @@ iniconfig==2.1.0
itsdangerous==2.2.0
jinja2==3.1.6
limits==4.2
loguru==0.7.3
markdown-it-py==3.0.0
markupsafe==3.0.2
maxminddb==2.7.0
Expand All @@ -36,6 +40,8 @@ multidict==6.4.4
openpyxl==3.1.5
ordered-set==4.1.0
packaging==24.2
pamqp==3.3.0
pika==1.3.2
pluggy==1.6.0
propcache==0.3.1
pycountry==24.6.1
Expand All @@ -47,7 +53,7 @@ pytest-mock==3.14.1
python-dotenv==1.1.0
pytz==2025.2
redis==6.2.0
requests==2.32.4
requests==2.32.3
requests-file==2.1.0
requests-mock==1.12.1
rich==13.9.4
Expand All @@ -62,5 +68,6 @@ urllib3==2.4.0
uv==0.7.8
validators==0.35.0
werkzeug==3.1.3
win32-setctime==1.2.0
wrapt==1.17.2
yarl==1.20.0
40 changes: 40 additions & 0 deletions start_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python3
"""
Analytics Worker Runner

This script starts the asynchronous analytics worker that processes
click data from RabbitMQ and updates MongoDB with analytics information.
"""

import sys
import os
import asyncio

# Ensure project root is on the path
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, ROOT_DIR)

from workers.stats_worker import StatsWorker # noqa: E402


def main():
"""Main function to start the analytics worker"""
print("=" * 60)
print("🚀 Starting Spoo.me Shortener Stats Worker")
print("=" * 60)
print("This worker will process click analytics asynchronously")
print("and update MongoDB with detailed statistics.")
print()

try:
# Run the async worker entrypoint
asyncio.run(StatsWorker())
except KeyboardInterrupt:
print("\n👋 Worker stopped by user")
except Exception as e:
print(f"\n❌ Worker failed with error: {e}")
sys.exit(1)


if __name__ == "__main__":
main()
Loading