-
-
Notifications
You must be signed in to change notification settings - Fork 50
Implement stats queueing using rabbitMQ #79
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,4 +1,3 @@ | ||||||||||
| import time | ||||||||||
| from flask import ( | ||||||||||
| Blueprint, | ||||||||||
| request, | ||||||||||
|
|
@@ -8,15 +7,12 @@ | |||||||||
| ) | ||||||||||
| from utils.url_utils import ( | ||||||||||
| BOT_USER_AGENTS, | ||||||||||
| get_country, | ||||||||||
| get_client_ip, | ||||||||||
| validate_emoji_alias, | ||||||||||
| ) | ||||||||||
| from utils.mongo_utils import ( | ||||||||||
| load_url, | ||||||||||
| update_url, | ||||||||||
| load_emoji_url, | ||||||||||
| update_emoji_url, | ||||||||||
| ) | ||||||||||
| from cache import cache_query as cq | ||||||||||
| from cache.cache_url import UrlData | ||||||||||
|
|
@@ -27,37 +23,32 @@ | |||||||||
| from datetime import datetime, timezone | ||||||||||
| from urllib.parse import unquote | ||||||||||
| import re | ||||||||||
| import tldextract | ||||||||||
| from crawlerdetect import CrawlerDetect | ||||||||||
|
|
||||||||||
| from workers.stats_publisher import send_to_queue | ||||||||||
|
|
||||||||||
| crawler_detect = CrawlerDetect() | ||||||||||
| tld_no_cache_extract = tldextract.TLDExtract(cache_dir=None) | ||||||||||
|
|
||||||||||
| url_redirector = Blueprint("url_redirector", __name__) | ||||||||||
|
|
||||||||||
|
|
||||||||||
| @url_redirector.route("/<short_code>", methods=["GET"]) | ||||||||||
| @limiter.exempt | ||||||||||
| def redirect_url(short_code): | ||||||||||
| def redirect_url(short_code: str): | ||||||||||
| user_ip = get_client_ip() | ||||||||||
| projection = { | ||||||||||
| "_id": 1, | ||||||||||
| "url": 1, | ||||||||||
| "password": 1, | ||||||||||
| "max-clicks": 1, | ||||||||||
| "expiration-time": 1, | ||||||||||
| "total-clicks": 1, | ||||||||||
| "ips": {"$elemMatch": {"$eq": user_ip}}, | ||||||||||
| "block-bots": 1, | ||||||||||
| "average_redirection_time": 1, | ||||||||||
| } | ||||||||||
|
|
||||||||||
| short_code = unquote(short_code) | ||||||||||
|
|
||||||||||
| is_emoji = False | ||||||||||
|
|
||||||||||
| # Measure redirection time | ||||||||||
| start_time = time.perf_counter() | ||||||||||
| is_emoji = validate_emoji_alias(short_code) | ||||||||||
|
|
||||||||||
| cached_url_data = cq.get_url_data(short_code) | ||||||||||
| if cached_url_data: | ||||||||||
|
|
@@ -67,8 +58,7 @@ def redirect_url(short_code): | |||||||||
| "block-bots": cached_url_data.block_bots, | ||||||||||
| } | ||||||||||
| else: | ||||||||||
| if validate_emoji_alias(short_code): | ||||||||||
| is_emoji = True | ||||||||||
| if is_emoji: | ||||||||||
| url_data = load_emoji_url(short_code, projection) | ||||||||||
| else: | ||||||||||
| url_data = load_url(short_code, projection) | ||||||||||
|
|
@@ -134,7 +124,7 @@ def redirect_url(short_code): | |||||||||
|
|
||||||||||
| try: | ||||||||||
| ua = parse(user_agent) | ||||||||||
| if not ua or not ua.user_agent or not ua.os: | ||||||||||
| if not ua or not ua.string: | ||||||||||
| return jsonify( | ||||||||||
| { | ||||||||||
| "error_code": "400", | ||||||||||
|
|
@@ -151,45 +141,17 @@ def redirect_url(short_code): | |||||||||
| } | ||||||||||
| ), 400 | ||||||||||
|
|
||||||||||
| os_name = ua.os.family | ||||||||||
| browser = ua.user_agent.family | ||||||||||
| os_name = ua.os.family if ua.os else "Unknown" | ||||||||||
| browser = ua.user_agent.family if ua.user_agent else "Unknown" | ||||||||||
| referrer = request.headers.get("Referer") | ||||||||||
| country = get_country(user_ip) | ||||||||||
|
|
||||||||||
| is_unique_click = url_data.get("ips", None) is None | ||||||||||
|
|
||||||||||
| if country: | ||||||||||
| country = country.replace(".", " ") | ||||||||||
|
|
||||||||||
| updates = {"$inc": {}, "$set": {}, "$addToSet": {}} | ||||||||||
|
|
||||||||||
| if "ips" not in url_data: | ||||||||||
| url_data["ips"] = [] | ||||||||||
|
|
||||||||||
| if referrer: | ||||||||||
| referrer_raw = tld_no_cache_extract(referrer) | ||||||||||
| referrer = ( | ||||||||||
| f"{referrer_raw.domain}.{referrer_raw.suffix}" | ||||||||||
| if referrer_raw.suffix | ||||||||||
| else referrer_raw.domain | ||||||||||
| ) | ||||||||||
| sanitized_referrer = re.sub(r"[.$\x00-\x1F\x7F-\x9F]", "_", referrer) | ||||||||||
|
|
||||||||||
| updates["$inc"][f"referrer.{sanitized_referrer}.counts"] = 1 | ||||||||||
| updates["$addToSet"][f"referrer.{sanitized_referrer}.ips"] = user_ip | ||||||||||
|
|
||||||||||
| updates["$inc"][f"country.{country}.counts"] = 1 | ||||||||||
| updates["$addToSet"][f"country.{country}.ips"] = user_ip | ||||||||||
|
|
||||||||||
| updates["$inc"][f"browser.{browser}.counts"] = 1 | ||||||||||
| updates["$addToSet"][f"browser.{browser}.ips"] = user_ip | ||||||||||
|
|
||||||||||
| updates["$inc"][f"os_name.{os_name}.counts"] = 1 | ||||||||||
| updates["$addToSet"][f"os_name.{os_name}.ips"] = user_ip | ||||||||||
| bot_name: str | None = None | ||||||||||
|
|
||||||||||
| for bot in BOT_USER_AGENTS: | ||||||||||
| bot_re = re.compile(bot, re.IGNORECASE) | ||||||||||
| if bot_re.search(user_agent): | ||||||||||
| bot_name = bot | ||||||||||
| if url_data.get("block-bots", False): | ||||||||||
| return ( | ||||||||||
| jsonify( | ||||||||||
|
|
@@ -201,11 +163,10 @@ def redirect_url(short_code): | |||||||||
| ), | ||||||||||
| 403, | ||||||||||
| ) | ||||||||||
| sanitized_bot = re.sub(r"[.$\x00-\x1F\x7F-\x9F]", "_", bot) | ||||||||||
| updates["$inc"][f"bots.{sanitized_bot}"] = 1 | ||||||||||
| break | ||||||||||
| else: | ||||||||||
| if crawler_detect.isCrawler(user_agent): | ||||||||||
| bot_name = crawler_detect.getMatches()[0] | ||||||||||
| if url_data.get("block-bots", False): | ||||||||||
| return ( | ||||||||||
| jsonify( | ||||||||||
|
|
@@ -217,42 +178,25 @@ def redirect_url(short_code): | |||||||||
| ), | ||||||||||
| 403, | ||||||||||
| ) | ||||||||||
| updates["$inc"][f"bots.{crawler_detect.getMatches()}"] = 1 | ||||||||||
|
|
||||||||||
| # increment the counter for the short code | ||||||||||
| today = str(datetime.now()).split()[0] | ||||||||||
| updates["$inc"][f"counter.{today}"] = 1 | ||||||||||
|
|
||||||||||
| if is_unique_click: | ||||||||||
| updates["$inc"][f"unique_counter.{today}"] = 1 | ||||||||||
|
|
||||||||||
| updates["$addToSet"]["ips"] = user_ip | ||||||||||
|
|
||||||||||
| updates["$inc"]["total-clicks"] = 1 | ||||||||||
|
|
||||||||||
| updates["$set"]["last-click"] = str( | ||||||||||
| datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") | ||||||||||
| ) | ||||||||||
| updates["$set"]["last-click-browser"] = browser | ||||||||||
| updates["$set"]["last-click-os"] = os_name | ||||||||||
| updates["$set"]["last-click-country"] = country | ||||||||||
|
|
||||||||||
| # Calculate redirection time | ||||||||||
| end_time = time.perf_counter() | ||||||||||
| redirection_time = (end_time - start_time) * 1000 | ||||||||||
|
|
||||||||||
| curr_avg = url_data.get("average_redirection_time", 0) | ||||||||||
|
|
||||||||||
| # Update Average Redirection Time | ||||||||||
| alpha = 0.1 # Smoothing factor, adjust as needed | ||||||||||
| updates["$set"]["average_redirection_time"] = round( | ||||||||||
| (1 - alpha) * curr_avg + alpha * redirection_time, 2 | ||||||||||
| ) | ||||||||||
| message = { | ||||||||||
| "short_code": short_code, | ||||||||||
| "os_name": os_name, | ||||||||||
| "browser": browser, | ||||||||||
| "referrer": referrer, | ||||||||||
| "ip": user_ip, | ||||||||||
| "user_agent": user_agent, | ||||||||||
| "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), | ||||||||||
| "is_unique_click": is_unique_click, | ||||||||||
| "bot_name": bot_name, | ||||||||||
| "is_emoji": is_emoji, | ||||||||||
| } | ||||||||||
|
|
||||||||||
| if is_emoji: | ||||||||||
| update_emoji_url(short_code, updates) | ||||||||||
| # send the stats message to the stats queue to be processed later | ||||||||||
| if request.method == "HEAD": | ||||||||||
| pass | ||||||||||
|
Comment on lines
+196
to
+197
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The route decorator only specifies GET methods (
This would ensure the route handling is consistent with the declared HTTP methods.
Suggested change
Spotted by Diamond |
||||||||||
| else: | ||||||||||
| update_url(short_code, updates) | ||||||||||
| send_to_queue(message) | ||||||||||
|
|
||||||||||
| return redirect(url) | ||||||||||
|
|
||||||||||
|
|
@@ -295,3 +239,8 @@ def check_password(short_code): | |||||||||
| ), | ||||||||||
| 400, | ||||||||||
| ) | ||||||||||
|
|
||||||||||
|
|
||||||||||
| @url_redirector.route("/ok", methods=["GET"]) | ||||||||||
|
Zingzy marked this conversation as resolved.
Outdated
|
||||||||||
| def simple_redirect(): | ||||||||||
| return "Ok", 200 | ||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| Analytics Worker Runner | ||
|
|
||
| This script starts the asynchronous analytics worker that processes | ||
| click data from RabbitMQ and updates MongoDB with analytics information. | ||
| """ | ||
|
|
||
| import sys | ||
| import os | ||
| import asyncio | ||
|
|
||
| # Ensure project root is on the path | ||
| ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
| sys.path.insert(0, ROOT_DIR) | ||
|
|
||
| from workers.stats_worker import StatsWorker # noqa: E402 | ||
|
|
||
|
|
||
| def main(): | ||
| """Main function to start the analytics worker""" | ||
| print("=" * 60) | ||
| print("🚀 Starting Spoo.me Shortener Stats Worker") | ||
| print("=" * 60) | ||
| print("This worker will process click analytics asynchronously") | ||
| print("and update MongoDB with detailed statistics.") | ||
| print() | ||
|
|
||
| try: | ||
| # Run the async worker entrypoint | ||
| asyncio.run(StatsWorker()) | ||
| except KeyboardInterrupt: | ||
| print("\n👋 Worker stopped by user") | ||
| except Exception as e: | ||
| print(f"\n❌ Worker failed with error: {e}") | ||
| sys.exit(1) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
issue (code-quality): We've found these issues:
use-named-expression)remove-pass-body)low-code-quality)Explanation
The quality score for this function is below the quality threshold of 25%.
This score is a combination of the method length, cognitive complexity and working memory.
How can you solve this?
It might be worth refactoring this function to make it shorter and more readable.
their own functions. This is the most important thing you can do - ideally a
function should be less than 10 lines.
sits together within the function rather than being scattered.