From 394a870900870f4149fd9177ef2790a6aba1eab2 Mon Sep 17 00:00:00 2001 From: Luan Fernandes Date: Thu, 13 Mar 2025 15:38:07 -0300 Subject: [PATCH 1/4] add log level for local debug --- docker-compose.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 1f52249..99f6355 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,9 @@ services: - .:/app environment: - PYTHONPATH=/app + - LOG_LEVEL=DEBUG + - UVICORN_LOG_LEVEL=debug + - PYTHONUNBUFFERED=1 restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] From 238d002c307354e5db48ea014f7fd000da593f2a Mon Sep 17 00:00:00 2001 From: Luan Fernandes Date: Thu, 13 Mar 2025 15:38:29 -0300 Subject: [PATCH 2/4] lots of updates to show distributions --- main.py | 213 ++++++++++++++- static/app.js | 624 +++++++++++++++++++++++++++++++++++++++++++ static/styles.css | 33 +++ templates/index.html | 151 +++++++---- 4 files changed, 969 insertions(+), 52 deletions(-) diff --git a/main.py b/main.py index bd4c31c..46a9c69 100644 --- a/main.py +++ b/main.py @@ -17,14 +17,17 @@ import logging import traceback from datetime import datetime +from scipy import stats # Configure logging +log_level = os.environ.get("LOG_LEVEL", "INFO").upper() logging.basicConfig( - level=logging.INFO, + level=getattr(logging, log_level), format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", handlers=[logging.FileHandler("app.log"), logging.StreamHandler()], ) logger = logging.getLogger(__name__) +logger.info(f"Starting application with log level: {log_level}") # Create directories if they don't exist os.makedirs("static", exist_ok=True) @@ -66,6 +69,20 @@ def run_conversion_experiment(self, sim_count: int = 100_000, show=False): ) self.conversion_results = self.conversion_test.evaluate() + # Create the posterior distributions for conversion rates + # For binary data, the posterior is a Beta distribution with parameters: + # alpha = a_prior + positives, beta = b_prior + (totals - positives) + self.conversion_distributions = {} + for variant_name, variant_data in zip( + self.conversion_test.variant_names, self.conversion_results + ): + a_prior = 0.5 # Default prior in BinaryDataTest + b_prior = 0.5 # Default prior in BinaryDataTest + alpha = a_prior + variant_data["positives"] + beta = b_prior + (variant_data["totals"] - variant_data["positives"]) + # Create a Beta distribution with these parameters + self.conversion_distributions[variant_name] = stats.beta(alpha, beta) + if show: print( pd.DataFrame(self.conversion_results).to_markdown( @@ -76,17 +93,90 @@ def run_conversion_experiment(self, sim_count: int = 100_000, show=False): def run_arpu_experiment(self, sim_count: int = 100_000, show=False): self.arpu_test: DeltaLognormalDataTest = DeltaLognormalDataTest() for v in self.variants: - rev_logs = [np.log(v.revenue / v.conversions)] * v.conversions + rev_logs = ( + [np.log(v.revenue / v.conversions)] * v.conversions + if v.conversions > 0 + else [] + ) self.arpu_test.add_variant_data_agg( v.name, totals=v.impressions, positives=v.conversions, sum_values=v.revenue, - sum_logs=sum(rev_logs), - sum_logs_2=sum([np.square(l) for l in rev_logs]), + sum_logs=sum(rev_logs) if rev_logs else 0, + sum_logs_2=sum([np.square(l) for l in rev_logs]) if rev_logs else 0, ) self.arpu_results = self.arpu_test.evaluate() + + # Create the posterior distributions for ARPU + # For delta-lognormal data, we need to simulate from the model + self.arpu_distributions = {} + + # Generate samples for each variant using the DeltaLognormalDataTest model + # We'll use the eval_simulation method which returns probabilities and expected loss + pbbs, loss = self.arpu_test.eval_simulation(sim_count=1000, seed=42) + + # For each variant, we'll generate samples from the posterior distribution + for variant_name in self.arpu_test.variant_names: + variant_idx = self.arpu_test.variant_names.index(variant_name) + + # Get the parameters for this variant + totals = self.arpu_test.totals[variant_idx] + positives = self.arpu_test.positives[variant_idx] + sum_logs = self.arpu_test.sum_logs[variant_idx] + sum_logs_2 = self.arpu_test.sum_logs_2[variant_idx] + + # Get the priors + a_prior_beta = self.arpu_test.a_priors_beta[variant_idx] + b_prior_beta = self.arpu_test.b_priors_beta[variant_idx] + m_prior = self.arpu_test.m_priors[variant_idx] + a_prior_ig = self.arpu_test.a_priors_ig[variant_idx] + b_prior_ig = self.arpu_test.b_priors_ig[variant_idx] + w_prior = self.arpu_test.w_priors[variant_idx] + + # Generate samples from the posterior distribution + # First, sample from the Beta distribution for conversion rate + np.random.seed(42 + variant_idx) # Different seed for each variant + conversion_rate = stats.beta( + a_prior_beta + positives, b_prior_beta + (totals - positives) + ).rvs(size=1000) + + # For positive values, we need to sample from the log-normal distribution + # The parameters for the log-normal are derived from the data + if positives > 0: + # Calculate posterior parameters for the log-normal distribution + n = positives + w_n = w_prior + n + m_n = (w_prior * m_prior + sum_logs) / w_n + a_n = a_prior_ig + n / 2 + b_n = b_prior_ig + 0.5 * ( + sum_logs_2 - 2 * m_n * sum_logs + w_n * m_n**2 + ) + + # Sample from the inverse gamma for variance + np.random.seed(42 + variant_idx + 100) # Different seed + variance = stats.invgamma(a_n, scale=b_n).rvs(size=1000) + + # Sample from the normal for mean + np.random.seed(42 + variant_idx + 200) # Different seed + mean = stats.norm(m_n, np.sqrt(variance / w_n)).rvs(size=1000) + + # Now sample from the log-normal with these parameters + np.random.seed(42 + variant_idx + 300) # Different seed + log_normal_samples = np.exp( + stats.norm(mean, np.sqrt(variance)).rvs(size=1000) + ) + + # Combine with conversion rate to get ARPU + arpu_samples = conversion_rate * log_normal_samples + else: + # If no conversions, ARPU is 0 + arpu_samples = np.zeros(1000) + + # Store the samples + self.arpu_distributions[variant_name] = arpu_samples.tolist() + if show: print( pd.DataFrame(self.arpu_results).to_markdown( @@ -200,6 +290,16 @@ def compile_full_data( def get_reports(self, probs_precision: int = 4): self.compile_full_data() + # Debug: Print the structure of revenue_per_sale_results + print( + "Revenue per sale results structure:", + ( + self.revenue_per_sale_results[0] + if self.revenue_per_sale_results + else "No results" + ), + ) + summaries = [] conv_stats = [] arpu_stats = [] @@ -216,6 +316,13 @@ def get_reports(self, probs_precision: int = 4): conv = {"variant": variant.get("variant")} conv.update(variant.get("conversion")) + + # Find the posterior_mean from the conversion_results + for result in self.conversion_results: + if result["variant"] == variant.get("variant"): + conv.update({"posterior_mean": result["posterior_mean"]}) + break + conv.update( { "lift": round( @@ -230,6 +337,13 @@ def get_reports(self, probs_precision: int = 4): arpu = {"variant": variant.get("variant")} arpu.update(variant.get("arpu")) + + # Find the posterior_mean for ARPU from the arpu_results + for result in self.arpu_results: + if result["variant"] == variant.get("variant"): + arpu.update({"posterior_mean": result["avg_values"]}) + break + arpu.update( { "lift": round( @@ -242,6 +356,13 @@ def get_reports(self, probs_precision: int = 4): rev_per_sale = {"variant": variant.get("variant")} rev_per_sale.update(variant.get("revenue_per_sale")) + + # Find the posterior_mean for revenue per sale from the revenue_per_sale_results + for result in self.revenue_per_sale_results: + if result["variant"] == variant.get("variant"): + rev_per_sale.update({"posterior_mean": result["posterior_mean"]}) + break + baseline_avg_ticket = baseline_res.get("summary").get("avg_ticket") variant_avg_ticket = summary["avg_ticket"] # Handle division by zero @@ -263,7 +384,62 @@ def get_reports(self, probs_precision: int = 4): _df_arpu = pd.DataFrame(arpu_stats) _df_rev_per_sale = pd.DataFrame(rev_per_sale_stats) - return _df_summary, _df_conv, _df_arpu, _df_rev_per_sale + # Get conversion distributions + conversion_distributions = {} + for variant_name, distribution in self.conversion_distributions.items(): + # Sample 500 points from the distribution for visualization + # Using a fixed random seed for reproducibility + np.random.seed(42) + conversion_distributions[variant_name] = distribution.rvs( + size=1000 + ).tolist() + + # Get ARPU distributions + arpu_distributions = self.arpu_distributions + + # Get revenue per sale distributions + # For exponential data, the posterior is a Gamma distribution with parameters: + # alpha = a_prior + totals, beta = b_prior + sum_values + revenue_per_sale_distributions = {} + for variant_name, variant_data in zip( + self.revenue_per_sale_test.variant_names, self.revenue_per_sale_results + ): + # Get the parameters for the Gamma distribution + # In ExponentialDataTest, the prior is Gamma(1, 0) + a_prior = 1 # Default prior in ExponentialDataTest + b_prior = 0 # Default prior in ExponentialDataTest + + # Get the data for this variant + totals = variant_data["totals"] + sum_values = variant_data["sum_values"] + + # Calculate the parameters for the posterior Gamma distribution + alpha = a_prior + totals + beta = b_prior + sum_values + + # Create samples from the Gamma distribution + np.random.seed( + 42 + self.revenue_per_sale_test.variant_names.index(variant_name) + ) + if totals > 0: # Only generate samples if there are conversions + revenue_per_sale_distributions[variant_name] = ( + stats.gamma(alpha, scale=1 / beta if beta > 0 else 1) + .rvs(size=1000) + .tolist() + ) + else: + # If no conversions, use a default distribution + revenue_per_sale_distributions[variant_name] = np.zeros(1000).tolist() + + return ( + _df_summary, + _df_conv, + _df_arpu, + _df_rev_per_sale, + conversion_distributions, + arpu_distributions, + revenue_per_sale_distributions, + ) # Pydantic models for API @@ -321,7 +497,15 @@ async def analyze_experiment(experiment_input: ExperimentInput): # Get reports logger.info("Generating experiment reports") - df_summary, df_conv, df_arpu, df_rev_per_sale = experiment.get_reports() + ( + df_summary, + df_conv, + df_arpu, + df_rev_per_sale, + conversion_distributions, + arpu_distributions, + revenue_per_sale_distributions, + ) = experiment.get_reports() # Convert DataFrames to dictionaries summary_dict = df_summary.to_dict(orient="records") @@ -335,12 +519,27 @@ async def analyze_experiment(experiment_input: ExperimentInput): "conversion_stats": conv_dict, "arpu_stats": arpu_dict, "revenue_per_sale_stats": rev_per_sale_dict, + "conversion_distributions": conversion_distributions, + "arpu_distributions": arpu_distributions, + "revenue_per_sale_distributions": revenue_per_sale_distributions, } except Exception as e: error_msg = f"Error in experiment analysis: {str(e)}" stack_trace = traceback.format_exc() logger.error(f"{error_msg}\n{stack_trace}") - raise HTTPException(status_code=400, detail=error_msg) + + # Log more details about the input data for debugging + logger.error(f"Input data that caused the error: {experiment_input.dict()}") + + # Return more detailed error information + raise HTTPException( + status_code=400, + detail={ + "error": error_msg, + "traceback": stack_trace, + "input_data": experiment_input.dict(), + }, + ) @app.get("/health") diff --git a/static/app.js b/static/app.js index f672535..4e5f4ae 100644 --- a/static/app.js +++ b/static/app.js @@ -1,4 +1,11 @@ document.addEventListener('DOMContentLoaded', function() { + // Check if Chart.js is available + if (typeof Chart === 'undefined') { + console.error('Chart.js not loaded, charts will not be displayed'); + } else { + console.log('Chart.js is available, version:', Chart.version); + } + // DOM elements const experimentForm = document.getElementById('experimentForm'); const variantsContainer = document.getElementById('variantsContainer'); @@ -10,6 +17,30 @@ document.addEventListener('DOMContentLoaded', function() { const arpuTable = document.getElementById('arpuTable').querySelector('tbody'); const revenuePerSaleTable = document.getElementById('revenuePerSaleTable').querySelector('tbody'); const exportResultsBtn = document.getElementById('exportResultsBtn'); + const conversionDistributionChart = document.getElementById('conversionDistributionChart'); + const arpuDistributionChart = document.getElementById('arpuDistributionChart'); + const revenuePerSaleDistributionChart = document.getElementById('revenuePerSaleDistributionChart'); + + // Chart instances + let distributionChartInstance = null; + let arpuDistributionChartInstance = null; + let revenuePerSaleDistributionChartInstance = null; + + // Color palette for variants (baseline is always red) + const variantColors = { + baseline: 'rgba(220, 53, 69, 0.7)', // Red for baseline + others: [ + 'rgba(0, 123, 255, 0.7)', // Blue + 'rgba(40, 167, 69, 0.7)', // Green + 'rgba(255, 193, 7, 0.7)', // Yellow + 'rgba(165, 42, 42, 0.7)', // Brown + 'rgba(111, 66, 193, 0.7)', // Purple + 'rgba(23, 162, 184, 0.7)', // Cyan + 'rgba(255, 102, 0, 0.7)', // Orange + 'rgba(0, 128, 128, 0.7)', // Teal + 'rgba(128, 0, 128, 0.7)' // Magenta + ] + }; // Template for variant inputs const variantTemplate = document.getElementById('variantTemplate'); @@ -21,6 +52,31 @@ document.addEventListener('DOMContentLoaded', function() { addVariant('A'); addVariant('B'); + // Set default values for variants + setTimeout(() => { + // Get all variant cards + const variantCards = document.querySelectorAll('.variant-card'); + + // Set default values for variant A (baseline) + if (variantCards.length > 0) { + const variantA = variantCards[0]; + variantA.querySelector('.variant-impressions').value = 1000; + variantA.querySelector('.variant-conversions').value = 100; + variantA.querySelector('.variant-revenue').value = 100; + } + + // Set default values for variant B + if (variantCards.length > 1) { + const variantB = variantCards[1]; + variantB.querySelector('.variant-impressions').value = 1000; + variantB.querySelector('.variant-conversions').value = 120; + variantB.querySelector('.variant-revenue').value = 110; + } + + // Set variant A as baseline + baselineVariantInput.value = 'A'; + }, 100); + // Event listeners addVariantBtn.addEventListener('click', () => { const nextLetter = String.fromCharCode(65 + variantCounter); // A, B, C, ... @@ -103,6 +159,9 @@ document.addEventListener('DOMContentLoaded', function() { const data = await response.json(); + // Store the data globally for later use + window.lastAnalysisData = data; + // Display results displayResults(data); @@ -258,8 +317,14 @@ document.addEventListener('DOMContentLoaded', function() { row.classList.add('best-variant'); } + // Add posterior mean column + const posteriorMean = variant.posterior_mean !== undefined ? + formatPercentage(variant.posterior_mean) : + formatPercentage(data.summary.find(v => v.variant === variant.variant)?.conversion || 0); + row.innerHTML = ` ${variant.variant} + ${posteriorMean} ${variant.expected_loss.toLocaleString()} ${formatProbability(variant.prob_being_best)} ${formatLift(variant.lift)} @@ -268,6 +333,9 @@ document.addEventListener('DOMContentLoaded', function() { conversionTable.appendChild(row); }); + // Create conversion distribution chart + createConversionDistributionChart(data.conversion_distributions); + // Populate ARPU stats table data.arpu_stats.forEach(variant => { const row = document.createElement('tr'); @@ -280,8 +348,14 @@ document.addEventListener('DOMContentLoaded', function() { row.classList.add('best-variant'); } + // Add posterior mean column + const posteriorMean = variant.posterior_mean !== undefined ? + variant.posterior_mean.toFixed(4) : + data.summary.find(v => v.variant === variant.variant)?.arpu.toFixed(4) || "0.0000"; + row.innerHTML = ` ${variant.variant} + ${posteriorMean} ${variant.expected_loss} ${formatProbability(variant.prob_being_best)} ${formatLift(variant.lift)} @@ -290,6 +364,9 @@ document.addEventListener('DOMContentLoaded', function() { arpuTable.appendChild(row); }); + // Create ARPU distribution chart + createArpuDistributionChart(data.arpu_distributions); + // Populate revenue per sale stats table data.revenue_per_sale_stats.forEach(variant => { const row = document.createElement('tr'); @@ -302,8 +379,14 @@ document.addEventListener('DOMContentLoaded', function() { row.classList.add('best-variant'); } + // Add posterior mean column + const posteriorMean = variant.posterior_mean !== undefined ? + variant.posterior_mean.toFixed(4) : + data.summary.find(v => v.variant === variant.variant)?.avg_ticket.toFixed(4) || "0.0000"; + row.innerHTML = ` ${variant.variant} + ${posteriorMean} ${variant.expected_loss} ${formatProbability(variant.prob_being_best)} ${formatLift(variant.lift)} @@ -311,6 +394,9 @@ document.addEventListener('DOMContentLoaded', function() { revenuePerSaleTable.appendChild(row); }); + + // Create Revenue Per Sale distribution chart + createRevenuePerSaleDistributionChart(data.revenue_per_sale_distributions); } // Find the best variant based on a metric @@ -399,4 +485,542 @@ document.addEventListener('DOMContentLoaded', function() { }).join(','); }).join('\n'); } + + // Create conversion distribution chart + function createConversionDistributionChart(distributionData) { + // If there's an existing chart, destroy it + if (distributionChartInstance) { + distributionChartInstance.destroy(); + } + + // Prepare data for the chart + const datasets = []; + const baselineVariant = baselineVariantInput.value.trim(); + let colorIndex = 0; + + // Process each variant's distribution + for (const [variantName, distribution] of Object.entries(distributionData)) { + // Calculate kernel density estimation for smoother visualization + const kdePoints = calculateKDE(distribution); + + // Determine color based on whether it's baseline or not + let color; + if (variantName === baselineVariant) { + color = variantColors.baseline; + } else { + color = variantColors.others[colorIndex % variantColors.others.length]; + colorIndex++; + } + + datasets.push({ + label: variantName, + data: kdePoints, + borderColor: color, + backgroundColor: color.replace('0.7', '0.2'), + borderWidth: 2, + pointRadius: 0, + fill: true, + tension: 0.4 + }); + + // Find posterior mean for this variant + const variantData = window.lastAnalysisData?.conversion_stats.find(v => v.variant === variantName); + let posteriorMean = null; + + if (variantData) { + if (variantData.posterior_mean !== undefined) { + posteriorMean = variantData.posterior_mean; + } else { + // If posterior_mean is not available, try to use conversion rate from summary + const summaryData = window.lastAnalysisData?.summary.find(v => v.variant === variantName); + if (summaryData && summaryData.conversion !== undefined) { + posteriorMean = summaryData.conversion; + } + } + } + + if (posteriorMean !== null) { + // Add a vertical line dataset for the posterior mean + datasets.push({ + label: `${variantName} Mean`, + data: [ + { x: posteriorMean, y: 0 }, + { x: posteriorMean, y: 50 } // Use a high value to ensure it spans the chart + ], + borderColor: color, + borderWidth: 2, + borderDash: [6, 4], + pointRadius: 0, + fill: false, + tension: 0, + showLine: true + }); + } + } + + // Create chart options + const chartOptions = { + scales: { + x: { + type: 'linear', + title: { + display: true, + text: 'Conversion Rate', + padding: { + top: 15, + bottom: 10 + } + }, + ticks: { + callback: function(value) { + return (value * 100).toFixed(1) + '%'; + } + } + }, + y: { + title: { + display: true, + text: 'Density' + }, + beginAtZero: true + } + }, + plugins: { + tooltip: { + callbacks: { + title: function(context) { + const value = context[0].parsed.x; + return 'Conversion Rate: ' + (value * 100).toFixed(2) + '%'; + } + }, + filter: function(tooltipItem) { + // Hide tooltips for the mean lines + return !tooltipItem.dataset.label.includes('Mean'); + } + }, + legend: { + position: 'right', + align: 'start', + labels: { + boxWidth: 12, + font: { + size: 11 + }, + filter: function(legendItem, chartData) { + // Hide the mean lines from the legend + return !legendItem.text.includes('Mean'); + } + } + }, + title: { + display: true, + text: 'Conversion Rate Distributions', + font: { + size: 14 + }, + padding: { + top: 10, + bottom: 20 + } + } + }, + layout: { + padding: { + top: 30, // Add padding at the top for variant labels + right: 10, + bottom: 10, + left: 10 + } + }, + interaction: { + mode: 'nearest', + intersect: false + }, + responsive: true, + maintainAspectRatio: false + }; + + // Create the chart + try { + console.log('Creating chart with options:', chartOptions); + distributionChartInstance = new Chart(conversionDistributionChart, { + type: 'line', + data: { + datasets: datasets + }, + options: chartOptions + }); + } catch (error) { + console.error('Error creating chart:', error); + } + } + + // Calculate Kernel Density Estimation for smoother distribution visualization + function calculateKDE(data) { + // Sort the data + const sortedData = [...data].sort((a, b) => a - b); + + // Determine min and max for the range + const min = Math.max(0, sortedData[0] - 0.01); + const max = sortedData[sortedData.length - 1] + 0.01; + + // Generate points for the KDE + const points = []; + const bandwidth = 0.005; // Adjust based on your data + const numPoints = 100; + + for (let i = 0; i < numPoints; i++) { + const x = min + (i / (numPoints - 1)) * (max - min); + let density = 0; + + // Calculate density at point x + for (const value of sortedData) { + const z = (x - value) / bandwidth; + density += Math.exp(-0.5 * z * z) / (bandwidth * Math.sqrt(2 * Math.PI)); + } + + density /= sortedData.length; + points.push({x, y: density}); + } + + return points; + } + + // Create ARPU distribution chart + function createArpuDistributionChart(distributionData) { + // If there's an existing chart, destroy it + if (arpuDistributionChartInstance) { + arpuDistributionChartInstance.destroy(); + } + + // Prepare data for the chart + const datasets = []; + const baselineVariant = baselineVariantInput.value.trim(); + let colorIndex = 0; + + // Process each variant's distribution + for (const [variantName, distribution] of Object.entries(distributionData)) { + // Calculate kernel density estimation for smoother visualization + const kdePoints = calculateKDE(distribution); + + // Determine color based on whether it's baseline or not + let color; + if (variantName === baselineVariant) { + color = variantColors.baseline; + } else { + color = variantColors.others[colorIndex % variantColors.others.length]; + colorIndex++; + } + + datasets.push({ + label: variantName, + data: kdePoints, + borderColor: color, + backgroundColor: color.replace('0.7', '0.2'), + borderWidth: 2, + pointRadius: 0, + fill: true, + tension: 0.4 + }); + + // Find posterior mean for this variant + const variantData = window.lastAnalysisData?.arpu_stats.find(v => v.variant === variantName); + let posteriorMean = null; + + if (variantData) { + if (variantData.posterior_mean !== undefined) { + posteriorMean = variantData.posterior_mean; + } else { + // If posterior_mean is not available, try to use ARPU from summary + const summaryData = window.lastAnalysisData?.summary.find(v => v.variant === variantName); + if (summaryData && summaryData.arpu !== undefined) { + posteriorMean = summaryData.arpu; + } + } + } + + if (posteriorMean !== null) { + // Add a vertical line dataset for the posterior mean + datasets.push({ + label: `${variantName} Mean`, + data: [ + { x: posteriorMean, y: 0 }, + { x: posteriorMean, y: 50 } // Use a high value to ensure it spans the chart + ], + borderColor: color, + borderWidth: 2, + borderDash: [6, 4], + pointRadius: 0, + fill: false, + tension: 0, + showLine: true + }); + } + } + + // Create chart options + const chartOptions = { + scales: { + x: { + type: 'linear', + title: { + display: true, + text: 'ARPU (Average Revenue Per User)', + padding: { + top: 15, + bottom: 10 + } + }, + ticks: { + callback: function(value) { + return value.toFixed(2); + } + } + }, + y: { + title: { + display: true, + text: 'Density' + }, + beginAtZero: true + } + }, + plugins: { + tooltip: { + callbacks: { + title: function(context) { + const value = context[0].parsed.x; + return 'ARPU: ' + value.toFixed(4); + } + }, + filter: function(tooltipItem) { + // Hide tooltips for the mean lines + return !tooltipItem.dataset.label.includes('Mean'); + } + }, + legend: { + position: 'right', + align: 'start', + labels: { + boxWidth: 12, + font: { + size: 11 + }, + filter: function(legendItem, chartData) { + // Hide the mean lines from the legend + return !legendItem.text.includes('Mean'); + } + } + }, + title: { + display: true, + text: 'ARPU Distributions', + font: { + size: 14 + }, + padding: { + top: 10, + bottom: 20 + } + } + }, + layout: { + padding: { + top: 30, // Add padding at the top for variant labels + right: 10, + bottom: 10, + left: 10 + } + }, + interaction: { + mode: 'nearest', + intersect: false + }, + responsive: true, + maintainAspectRatio: false + }; + + // Create the chart + try { + console.log('Creating ARPU chart with options:', chartOptions); + arpuDistributionChartInstance = new Chart(arpuDistributionChart, { + type: 'line', + data: { + datasets: datasets + }, + options: chartOptions + }); + } catch (error) { + console.error('Error creating ARPU chart:', error); + } + } + + // Create Revenue Per Sale distribution chart + function createRevenuePerSaleDistributionChart(distributionData) { + // If there's an existing chart, destroy it + if (revenuePerSaleDistributionChartInstance) { + revenuePerSaleDistributionChartInstance.destroy(); + } + + // Prepare data for the chart + const datasets = []; + const baselineVariant = baselineVariantInput.value.trim(); + let colorIndex = 0; + + // Process each variant's distribution + for (const [variantName, distribution] of Object.entries(distributionData)) { + // Calculate kernel density estimation for smoother visualization + const kdePoints = calculateKDE(distribution); + + // Determine color based on whether it's baseline or not + let color; + if (variantName === baselineVariant) { + color = variantColors.baseline; + } else { + color = variantColors.others[colorIndex % variantColors.others.length]; + colorIndex++; + } + + datasets.push({ + label: variantName, + data: kdePoints, + borderColor: color, + backgroundColor: color.replace('0.7', '0.2'), + borderWidth: 2, + pointRadius: 0, + fill: true, + tension: 0.4 + }); + + // Find posterior mean for this variant + const variantData = window.lastAnalysisData?.revenue_per_sale_stats.find(v => v.variant === variantName); + let posteriorMean = null; + + if (variantData) { + if (variantData.posterior_mean !== undefined) { + posteriorMean = variantData.posterior_mean; + } else { + // If posterior_mean is not available, try to use avg_ticket from summary + const summaryData = window.lastAnalysisData?.summary.find(v => v.variant === variantName); + if (summaryData && summaryData.avg_ticket !== undefined) { + posteriorMean = summaryData.avg_ticket; + } + } + } + + if (posteriorMean !== null) { + // Add a vertical line dataset for the posterior mean + datasets.push({ + label: `${variantName} Mean`, + data: [ + { x: posteriorMean, y: 0 }, + { x: posteriorMean, y: 50 } // Use a high value to ensure it spans the chart + ], + borderColor: color, + borderWidth: 2, + borderDash: [6, 4], + pointRadius: 0, + fill: false, + tension: 0, + showLine: true + }); + } + } + + // Create chart options + const chartOptions = { + scales: { + x: { + type: 'linear', + title: { + display: true, + text: 'Revenue Per Sale', + padding: { + top: 15, + bottom: 10 + } + }, + ticks: { + callback: function(value) { + return value.toFixed(2); + } + } + }, + y: { + title: { + display: true, + text: 'Density' + }, + beginAtZero: true + } + }, + plugins: { + tooltip: { + callbacks: { + title: function(context) { + const value = context[0].parsed.x; + return 'Revenue Per Sale: ' + value.toFixed(4); + } + }, + filter: function(tooltipItem) { + // Hide tooltips for the mean lines + return !tooltipItem.dataset.label.includes('Mean'); + } + }, + legend: { + position: 'right', + align: 'start', + labels: { + boxWidth: 12, + font: { + size: 11 + }, + filter: function(legendItem, chartData) { + // Hide the mean lines from the legend + return !legendItem.text.includes('Mean'); + } + } + }, + title: { + display: true, + text: 'Revenue Per Sale Distributions', + font: { + size: 14 + }, + padding: { + top: 10, + bottom: 20 + } + } + }, + layout: { + padding: { + top: 30, // Add padding at the top for variant labels + right: 10, + bottom: 10, + left: 10 + } + }, + interaction: { + mode: 'nearest', + intersect: false + }, + responsive: true, + maintainAspectRatio: false + }; + + // Create the chart + try { + console.log('Creating Revenue Per Sale chart with options:', chartOptions); + revenuePerSaleDistributionChartInstance = new Chart(revenuePerSaleDistributionChart, { + type: 'line', + data: { + datasets: datasets + }, + options: chartOptions + }); + } catch (error) { + console.error('Error creating Revenue Per Sale chart:', error); + } + } }); \ No newline at end of file diff --git a/static/styles.css b/static/styles.css index 29b9637..b70d162 100644 --- a/static/styles.css +++ b/static/styles.css @@ -150,4 +150,37 @@ main { @keyframes spinner-border { to { transform: rotate(360deg); } +} + +/* Chart styling */ +.chart-container { + margin: 0; + padding: 0.5rem; + padding-bottom: 4rem; /* Increased from 3rem to 4rem */ + background-color: #fff; + border-radius: 0.5rem; + box-shadow: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.05); +} + +/* Distribution chart tooltip */ +#conversionDistributionChart { + max-height: 350px; +} + +/* Table in side-by-side layout */ +.col-md-5 .table-responsive { + max-height: 350px; + overflow-y: auto; +} + +/* Responsive chart adjustments */ +@media (max-width: 767.98px) { + .chart-container { + height: 300px !important; + margin-top: 1.5rem; + } + + .col-md-5 .table-responsive { + max-height: none; + } } \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index a45e14b..d7f553b 100644 --- a/templates/index.html +++ b/templates/index.html @@ -136,50 +136,84 @@
Summary
- +
Conversion Statistics
-
- - - - - - - - - - - - -
VariantExpected LossProbability of Being BestLift vs Baseline
+
+ +
+
+ + + + + + + + + + + + + +
VariantPosterior MeanExpected LossProbability of Being BestLift vs Baseline
+
+
+ + +
+
+ +
+
+ + Dashed lines represent posterior means +
+
- +
ARPU Statistics
-
- - - - - - - - - - - - -
VariantExpected LossProbability of Being BestLift vs Baseline
+
+ +
+
+ + + + + + + + + + + + + +
VariantPosterior MeanExpected LossProbability of Being BestLift vs Baseline
+
+
+ + +
+
+ +
+
+ + Dashed lines represent posterior means +
+
@@ -190,20 +224,37 @@
ARPU Statistics
Revenue Per Sale Statistics
-
- - - - - - - - - - - - -
VariantExpected LossProbability of Being BestLift vs Baseline
+
+ +
+
+ + + + + + + + + + + + + +
VariantPosterior MeanExpected LossProbability of Being BestLift vs Baseline
+
+
+ + +
+
+ +
+
+ + Dashed lines represent posterior means +
+
@@ -243,7 +294,17 @@
Variant
- + + + \ No newline at end of file From c4a688f3de54bdd12e2db87539c282e67614a236 Mon Sep 17 00:00:00 2001 From: Luan Fernandes Date: Mon, 7 Apr 2025 22:41:59 -0300 Subject: [PATCH 3/4] distributions good now --- static/app.js | 192 +++++++++++-------------------------------- templates/index.html | 23 ++---- 2 files changed, 56 insertions(+), 159 deletions(-) diff --git a/static/app.js b/static/app.js index 4e5f4ae..0ded91b 100644 --- a/static/app.js +++ b/static/app.js @@ -498,11 +498,20 @@ document.addEventListener('DOMContentLoaded', function() { const baselineVariant = baselineVariantInput.value.trim(); let colorIndex = 0; - // Process each variant's distribution + // Calculate KDE points and find maximum density + let maxDensity = 0; + const kdePointsMap = {}; + for (const [variantName, distribution] of Object.entries(distributionData)) { - // Calculate kernel density estimation for smoother visualization const kdePoints = calculateKDE(distribution); + kdePointsMap[variantName] = kdePoints; + // Find maximum density across all variants + maxDensity = Math.max(maxDensity, ...kdePoints.map(point => point.y)); + } + + // Create datasets for each variant + for (const [variantName, distribution] of Object.entries(distributionData)) { // Determine color based on whether it's baseline or not let color; if (variantName === baselineVariant) { @@ -512,9 +521,10 @@ document.addEventListener('DOMContentLoaded', function() { colorIndex++; } + // Add the distribution curve dataset datasets.push({ label: variantName, - data: kdePoints, + data: kdePointsMap[variantName], borderColor: color, backgroundColor: color.replace('0.7', '0.2'), borderWidth: 2, @@ -522,40 +532,6 @@ document.addEventListener('DOMContentLoaded', function() { fill: true, tension: 0.4 }); - - // Find posterior mean for this variant - const variantData = window.lastAnalysisData?.conversion_stats.find(v => v.variant === variantName); - let posteriorMean = null; - - if (variantData) { - if (variantData.posterior_mean !== undefined) { - posteriorMean = variantData.posterior_mean; - } else { - // If posterior_mean is not available, try to use conversion rate from summary - const summaryData = window.lastAnalysisData?.summary.find(v => v.variant === variantName); - if (summaryData && summaryData.conversion !== undefined) { - posteriorMean = summaryData.conversion; - } - } - } - - if (posteriorMean !== null) { - // Add a vertical line dataset for the posterior mean - datasets.push({ - label: `${variantName} Mean`, - data: [ - { x: posteriorMean, y: 0 }, - { x: posteriorMean, y: 50 } // Use a high value to ensure it spans the chart - ], - borderColor: color, - borderWidth: 2, - borderDash: [6, 4], - pointRadius: 0, - fill: false, - tension: 0, - showLine: true - }); - } } // Create chart options @@ -582,7 +558,8 @@ document.addEventListener('DOMContentLoaded', function() { display: true, text: 'Density' }, - beginAtZero: true + beginAtZero: true, + suggestedMax: maxDensity * 1.1 } }, plugins: { @@ -592,10 +569,6 @@ document.addEventListener('DOMContentLoaded', function() { const value = context[0].parsed.x; return 'Conversion Rate: ' + (value * 100).toFixed(2) + '%'; } - }, - filter: function(tooltipItem) { - // Hide tooltips for the mean lines - return !tooltipItem.dataset.label.includes('Mean'); } }, legend: { @@ -605,10 +578,6 @@ document.addEventListener('DOMContentLoaded', function() { boxWidth: 12, font: { size: 11 - }, - filter: function(legendItem, chartData) { - // Hide the mean lines from the legend - return !legendItem.text.includes('Mean'); } } }, @@ -626,7 +595,7 @@ document.addEventListener('DOMContentLoaded', function() { }, layout: { padding: { - top: 30, // Add padding at the top for variant labels + top: 30, right: 10, bottom: 10, left: 10 @@ -642,7 +611,6 @@ document.addEventListener('DOMContentLoaded', function() { // Create the chart try { - console.log('Creating chart with options:', chartOptions); distributionChartInstance = new Chart(conversionDistributionChart, { type: 'line', data: { @@ -698,11 +666,20 @@ document.addEventListener('DOMContentLoaded', function() { const baselineVariant = baselineVariantInput.value.trim(); let colorIndex = 0; - // Process each variant's distribution + // Calculate KDE points and find maximum density + let maxDensity = 0; + const kdePointsMap = {}; + for (const [variantName, distribution] of Object.entries(distributionData)) { - // Calculate kernel density estimation for smoother visualization const kdePoints = calculateKDE(distribution); + kdePointsMap[variantName] = kdePoints; + // Find maximum density across all variants + maxDensity = Math.max(maxDensity, ...kdePoints.map(point => point.y)); + } + + // Create datasets for each variant + for (const [variantName, distribution] of Object.entries(distributionData)) { // Determine color based on whether it's baseline or not let color; if (variantName === baselineVariant) { @@ -712,9 +689,10 @@ document.addEventListener('DOMContentLoaded', function() { colorIndex++; } + // Add the distribution curve dataset datasets.push({ label: variantName, - data: kdePoints, + data: kdePointsMap[variantName], borderColor: color, backgroundColor: color.replace('0.7', '0.2'), borderWidth: 2, @@ -722,40 +700,6 @@ document.addEventListener('DOMContentLoaded', function() { fill: true, tension: 0.4 }); - - // Find posterior mean for this variant - const variantData = window.lastAnalysisData?.arpu_stats.find(v => v.variant === variantName); - let posteriorMean = null; - - if (variantData) { - if (variantData.posterior_mean !== undefined) { - posteriorMean = variantData.posterior_mean; - } else { - // If posterior_mean is not available, try to use ARPU from summary - const summaryData = window.lastAnalysisData?.summary.find(v => v.variant === variantName); - if (summaryData && summaryData.arpu !== undefined) { - posteriorMean = summaryData.arpu; - } - } - } - - if (posteriorMean !== null) { - // Add a vertical line dataset for the posterior mean - datasets.push({ - label: `${variantName} Mean`, - data: [ - { x: posteriorMean, y: 0 }, - { x: posteriorMean, y: 50 } // Use a high value to ensure it spans the chart - ], - borderColor: color, - borderWidth: 2, - borderDash: [6, 4], - pointRadius: 0, - fill: false, - tension: 0, - showLine: true - }); - } } // Create chart options @@ -782,7 +726,8 @@ document.addEventListener('DOMContentLoaded', function() { display: true, text: 'Density' }, - beginAtZero: true + beginAtZero: true, + suggestedMax: maxDensity * 1.1 } }, plugins: { @@ -792,10 +737,6 @@ document.addEventListener('DOMContentLoaded', function() { const value = context[0].parsed.x; return 'ARPU: ' + value.toFixed(4); } - }, - filter: function(tooltipItem) { - // Hide tooltips for the mean lines - return !tooltipItem.dataset.label.includes('Mean'); } }, legend: { @@ -805,10 +746,6 @@ document.addEventListener('DOMContentLoaded', function() { boxWidth: 12, font: { size: 11 - }, - filter: function(legendItem, chartData) { - // Hide the mean lines from the legend - return !legendItem.text.includes('Mean'); } } }, @@ -826,7 +763,7 @@ document.addEventListener('DOMContentLoaded', function() { }, layout: { padding: { - top: 30, // Add padding at the top for variant labels + top: 30, right: 10, bottom: 10, left: 10 @@ -842,7 +779,6 @@ document.addEventListener('DOMContentLoaded', function() { // Create the chart try { - console.log('Creating ARPU chart with options:', chartOptions); arpuDistributionChartInstance = new Chart(arpuDistributionChart, { type: 'line', data: { @@ -867,11 +803,20 @@ document.addEventListener('DOMContentLoaded', function() { const baselineVariant = baselineVariantInput.value.trim(); let colorIndex = 0; - // Process each variant's distribution + // Calculate KDE points and find maximum density + let maxDensity = 0; + const kdePointsMap = {}; + for (const [variantName, distribution] of Object.entries(distributionData)) { - // Calculate kernel density estimation for smoother visualization const kdePoints = calculateKDE(distribution); + kdePointsMap[variantName] = kdePoints; + // Find maximum density across all variants + maxDensity = Math.max(maxDensity, ...kdePoints.map(point => point.y)); + } + + // Create datasets for each variant + for (const [variantName, distribution] of Object.entries(distributionData)) { // Determine color based on whether it's baseline or not let color; if (variantName === baselineVariant) { @@ -881,9 +826,10 @@ document.addEventListener('DOMContentLoaded', function() { colorIndex++; } + // Add the distribution curve dataset datasets.push({ label: variantName, - data: kdePoints, + data: kdePointsMap[variantName], borderColor: color, backgroundColor: color.replace('0.7', '0.2'), borderWidth: 2, @@ -891,40 +837,6 @@ document.addEventListener('DOMContentLoaded', function() { fill: true, tension: 0.4 }); - - // Find posterior mean for this variant - const variantData = window.lastAnalysisData?.revenue_per_sale_stats.find(v => v.variant === variantName); - let posteriorMean = null; - - if (variantData) { - if (variantData.posterior_mean !== undefined) { - posteriorMean = variantData.posterior_mean; - } else { - // If posterior_mean is not available, try to use avg_ticket from summary - const summaryData = window.lastAnalysisData?.summary.find(v => v.variant === variantName); - if (summaryData && summaryData.avg_ticket !== undefined) { - posteriorMean = summaryData.avg_ticket; - } - } - } - - if (posteriorMean !== null) { - // Add a vertical line dataset for the posterior mean - datasets.push({ - label: `${variantName} Mean`, - data: [ - { x: posteriorMean, y: 0 }, - { x: posteriorMean, y: 50 } // Use a high value to ensure it spans the chart - ], - borderColor: color, - borderWidth: 2, - borderDash: [6, 4], - pointRadius: 0, - fill: false, - tension: 0, - showLine: true - }); - } } // Create chart options @@ -951,7 +863,8 @@ document.addEventListener('DOMContentLoaded', function() { display: true, text: 'Density' }, - beginAtZero: true + beginAtZero: true, + suggestedMax: maxDensity * 1.1 } }, plugins: { @@ -961,10 +874,6 @@ document.addEventListener('DOMContentLoaded', function() { const value = context[0].parsed.x; return 'Revenue Per Sale: ' + value.toFixed(4); } - }, - filter: function(tooltipItem) { - // Hide tooltips for the mean lines - return !tooltipItem.dataset.label.includes('Mean'); } }, legend: { @@ -974,10 +883,6 @@ document.addEventListener('DOMContentLoaded', function() { boxWidth: 12, font: { size: 11 - }, - filter: function(legendItem, chartData) { - // Hide the mean lines from the legend - return !legendItem.text.includes('Mean'); } } }, @@ -995,7 +900,7 @@ document.addEventListener('DOMContentLoaded', function() { }, layout: { padding: { - top: 30, // Add padding at the top for variant labels + top: 30, right: 10, bottom: 10, left: 10 @@ -1011,7 +916,6 @@ document.addEventListener('DOMContentLoaded', function() { // Create the chart try { - console.log('Creating Revenue Per Sale chart with options:', chartOptions); revenuePerSaleDistributionChartInstance = new Chart(revenuePerSaleDistributionChart, { type: 'line', data: { diff --git a/templates/index.html b/templates/index.html index d7f553b..65426e4 100644 --- a/templates/index.html +++ b/templates/index.html @@ -68,6 +68,11 @@
About

Multivariant Experiment Analysis

+ +
@@ -163,15 +168,11 @@
Conversion Statistics
- +
-
- - Dashed lines represent posterior means -
@@ -204,15 +205,11 @@
ARPU Statistics
- +
-
- - Dashed lines represent posterior means -
@@ -245,15 +242,11 @@
Revenue Per Sale Statistics
- +
-
- - Dashed lines represent posterior means -
From 51139c57203d9dd89296e79e79adb5c264b4bced Mon Sep 17 00:00:00 2001 From: Luan Fernandes Date: Mon, 7 Apr 2025 23:05:57 -0300 Subject: [PATCH 4/4] fix test --- test_main.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test_main.py b/test_main.py index 951af9d..c09073d 100644 --- a/test_main.py +++ b/test_main.py @@ -89,8 +89,19 @@ def test_get_reports(): ] experiment = WebsiteExperiment(variants=variants, baseline_variant="A") experiment.run(show=False) - df_summary, df_conv, df_arpu, df_rev_per_sale = experiment.get_reports() + ( + df_summary, + df_conv, + df_arpu, + df_rev_per_sale, + conv_dist, + arpu_dist, + rev_per_sale_dist, + ) = experiment.get_reports() assert not df_summary.empty assert not df_conv.empty assert not df_arpu.empty assert not df_rev_per_sale.empty + assert conv_dist is not None + assert arpu_dist is not None + assert rev_per_sale_dist is not None