diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5e4e1e0..fcdc799 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ on: branches: [ master ] jobs: - build: + test: runs-on: ubuntu-latest strategy: matrix: @@ -29,4 +29,46 @@ jobs: - name: Run tests run: | - python -m unittest discover tests \ No newline at end of file + pytest + + build: + needs: test + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker + id: meta + uses: docker/metadata-action@v3 + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=sha,format=long + type=ref,event=branch + type=ref,event=tag + latest + + - name: Build and push Docker image + uses: docker/build-push-action@v2 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 7ed2a06..45c57fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,8 +8,6 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . -EXPOSE 8501 +EXPOSE 8000 -ENV PYTHONPATH=/app - -CMD ["streamlit", "run", "Homepage.py"] +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/Makefile b/Makefile deleted file mode 100644 index 53eaf57..0000000 --- a/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -install-lib: - rm -rf dist - pip3 install wheel - pip3 install setuptools - pip3 install twine - python setup.py bdist_wheel - pip3 install --force-reinstall dist/**.whl - -test: - python -m unittest discover - -lint: - black . - flake8 --max-line-length=88 bot --show-source --exit-zero --ignore=NF001 - -types: - pytype --keep-going bot - -run: - docker run -p 8501:8501 janus diff --git a/README.md b/README.md index 32e9233..50dd7e6 100644 --- a/README.md +++ b/README.md @@ -4,53 +4,103 @@ ![Python Version](https://img.shields.io/badge/python-3.7%20%7C%203.8-brightgreen.svg) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -> **Warning** -> We're making a major breaking change in the project to use the [bayesian-testing](https://github.com/Matt52/bayesian-testing) library for better experiment management, and a full stack application will be developed to build a website for Janus. -> Please, consider using the distributed [package in pypi](https://pypi.org/project/janus-web-ab-testing/), which comes from the `evolve-janus-backend` branch. +Janus is a Bayesian A/B Testing application that supports multivariant experiments. It's designed to help you make data-driven decisions by analyzing conversion rates, revenue, and ARPU (Average Revenue Per User) across multiple variants. +## Features -Janus is an A/B Test Engine to be used in a variety use cases, especially to measure conversion, ticket and ARPU difference between variants, i.e, typical metrics for tests in marketplaces. The engine name is an analogy to _Janus_, the god of changes and transitions. - -This library came as an ideia of separate the statistical calculations in A/B Tests from other code that is typically used to manage tests and execute queries over the company's database, and hence usually carry proprietary code and even business logic, which should not be open sourced. There was the bud to build this library and get it open sourced. - -Checkout the [streamlit app](https://lgabs-janus-homepage-31diny.streamlit.app/) from this repo. +- **Multivariant Testing**: Compare multiple variants simultaneously (not just A vs B) +- **Bayesian Statistics**: Get more insightful results faster than traditional frequentist methods +- **Key Metrics Analysis**: + - Conversion rate + - Revenue for conversions + - Average revenue per impression (ARPU) +- **Modern Web Interface**: Clean, responsive UI built with Bootstrap +- **FastAPI Backend**: High-performance API for experiment analysis ## Installation -Open a terminal, clone this repository into your machine and stay into the project directory. - -Using a virtual environment is a good practice, but it is optional. If you enjoy it, go ahead and create a virtual environment by typing: -``` -python3 -m venv venv -r requirements.txt -``` -Once it is created, you must now activate the environment by using: -``` -source venv/bin/activate +1. Clone this repository: +```bash +git clone https://github.com/lgabs/janus.git +cd janus ``` -Now, you can install our lib (if you are not using virtual env, go straight to this command): + +2. Create a virtual environment (optional but recommended): +```bash +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate ``` -make install + +3. Install dependencies: +```bash +pip install -r requirements.txt ``` -And that's it! Now, inside our environment, we can import the `janus` lib inside our scripts with plain `import janus` etc. Try to test using the same code on `experiment_example.ipynb` notebook here or in a plain terminal. +## Running the Application + +### Using Python directly: + +```bash +uvicorn main:app --reload +``` -## Using as an Application +Then open your browser and navigate to `http://localhost:8000` -You can use _janus_ as a streamlit product in two ways: +### Using Docker: -### 1. Using Docker (Recommended) -Build and run the application locally using Docker: ```bash # Build the Docker image docker build -t janus . # Run the container -docker run -p 8501:8501 janus +docker run -p 8000:8000 janus ``` -Or use `make run`. Then open your browser and navigate to `http://localhost:8501` +### Using Docker Compose (Recommended): + +```bash +# Start the application +docker-compose up -d + +# View logs +docker-compose logs -f + +# Stop the application +docker-compose down +``` + +Then open your browser and navigate to `http://localhost:8000` + +## How to Use + +1. Enter your baseline variant name (e.g., "A" or "Control") +2. Add your variants with their respective data: + - Name: A unique identifier for the variant + - Impressions: Total number of users/sessions exposed to this variant + - Conversions: Number of successful conversions + - Revenue: Total revenue generated by this variant + +3. Click "Run Analysis" to see the results: + - Summary statistics for each variant + - Conversion statistics with probability of being the best variant + - ARPU statistics with probability of being the best variant + +4. Export your results as CSV if needed + +## API Documentation + +The API documentation is available at `/docs` when the application is running. + +## Technical Details + +This application uses: +- [FastAPI](https://fastapi.tiangolo.com/) for the backend +- [Bayesian-Testing](https://github.com/Matt52/bayesian-testing) for statistical calculations +- Bootstrap 5 for the frontend UI +- Jinja2 for HTML templating ## References + * [What is A/B Testing](https://en.wikipedia.org/wiki/A/B_testing) * The bayesian calculations were implemented based on [this VWO white paper](https://cdn2.hubspot.net/hubfs/310840/VWO_SmartStats_technical_whitepaper.pdf) * [VWO Website](https://vwo.com/) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1f52249 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,20 @@ +version: '3.8' + +services: + janus: + build: + context: . + dockerfile: Dockerfile + ports: + - "8000:8000" + volumes: + - .:/app + environment: + - PYTHONPATH=/app + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s \ No newline at end of file diff --git a/examples/.ipynb_checkpoints/Testing Examples and VWO cases-checkpoint.ipynb b/examples/.ipynb_checkpoints/Testing Examples and VWO cases-checkpoint.ipynb deleted file mode 100644 index aadc8fa..0000000 --- a/examples/.ipynb_checkpoints/Testing Examples and VWO cases-checkpoint.ipynb +++ /dev/null @@ -1,967 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Imports (make sure to install janus)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:38.828666Z", - "start_time": "2020-12-04T17:16:37.999997Z" - } - }, - "outputs": [], - "source": [ - "import janus\n", - "from janus.stats.experiment import Experiment, Variant\n", - "import pandas as pd\n", - "\n", - "import logging\n", - "logging.basicConfig(level=logging.INFO)\n", - "\n", - "pd.set_option('max_colwidth', -1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## A/A Test from VWO paper\n", - "\n", - "\"To begin I ran an A/A test comparing a sales process with a 5% conversion rate\n", - "and a mean of $\\$25$ revenue/sale. In this example, the standard deviation of the\n", - "data is $\\$5.6$ (compared to a mean revenue/visitor of $1.25).\n", - "According to Evan Miller’s t-test calculator at http://www.evanmiller.\n", - "org/ab-testing/t-test.html, this test will require 4,250 data points (per\n", - "sample) to resolve a 20% lift.\n", - "In the simulation, the Bayesian test finished with an average of about 3,292\n", - "data points (10th/90th percentile 250/7,750). The distribution is plottedin Figure 8\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAP+ElEQVR4nO3cbaxdVZ3H8e9P6sNEHQHpNKRtpow2TvCFSBrAaCYqsRQwUyZRUzMZGtOkb2qiiclYxhfMqCTlxchIZiTpSGMxjkh8CI2QwQ5CzLzg4aLI43R6RQhtgF4posbIDPifF2fVOdZ7uPfS23N6u76f5OTs/d/rnL32yr6/u+++65xUFZKkPrxq0h2QJI2PoS9JHTH0Jakjhr4kdcTQl6SOLJt0B17OGWecUWvWrJl0N47dvn2D57e9bbL9kNSF++6772dVtXy2bSd06K9Zs4apqalJd+PYvfe9g+c775xkLyR1IskTo7Z5e0eSOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpyQn8i91it2X7LRPb7+I5LJ7JfSZqLV/qS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHZlX6Cd5PMmDSe5PMtVqpyfZm2R/ez6t1ZPk2iTTSR5Icu7Q+2xu7fcn2Xx8DkmSNMpCrvTfV1XnVNW6tr4duL2q1gK3t3WAi4G17bEVuA4GvySAK4HzgfOAK4/8opAkjcex3N7ZCOxuy7uBy4bqN9TAXcCpSc4ELgL2VtXhqnoO2AtsOIb9S5IWaL6hX8D3ktyXZGurraiqp9ry08CKtrwSeHLotQdabVT99yTZmmQqydTMzMw8uydJmo9l82z3nqo6mORPgL1J/mt4Y1VVklqMDlXVTmAnwLp16xblPSVJA/O60q+qg+35EPAdBvfkn2m3bWjPh1rzg8DqoZevarVRdUnSmMwZ+klen+SNR5aB9cBDwB7gyAyczcDNbXkPcHmbxXMB8Hy7DXQbsD7Jae0fuOtbTZI0JvO5vbMC+E6SI+3/rar+Pcm9wE1JtgBPAB9p7W8FLgGmgV8DHwOoqsNJPgfc29p9tqoOL9qRSJLmNGfoV9VjwDtmqT8LXDhLvYBtI95rF7Br4d2UJC0GP5ErSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHVk3qGf5JQkP0ry3bZ+VpK7k0wn+UaS17T6a9v6dNu+Zug9rmj1fUkuWvSjkSS9rIVc6X8CeHRo/Wrgmqp6K/AcsKXVtwDPtfo1rR1JzgY2AW8HNgBfSnLKsXVfkrQQ8wr9JKuAS4Evt/UA7we+2ZrsBi5ryxvbOm37ha39RuDGqnqhqn4KTAPnLcIxSJLmab5X+v8E/C3w27b+ZuDnVfViWz8ArGzLK4EnAdr251v739Vnec3vJNmaZCrJ1MzMzPyPRJI0pzlDP8kHgUNVdd8Y+kNV7ayqdVW1bvny5ePYpSR1Y9k82rwb+MsklwCvA/4Y+CJwapJl7Wp+FXCwtT8IrAYOJFkGvAl4dqh+xPBrJEljMOeVflVdUVWrqmoNg3/Efr+q/hq4A/hQa7YZuLkt72nrtO3fr6pq9U1tds9ZwFrgnkU7EknSnOZzpT/Kp4Ebk3we+BFwfatfD3w1yTRwmMEvCqrq4SQ3AY8ALwLbquqlY9i/JGmBFhT6VXUncGdbfoxZZt9U1W+AD494/VXAVQvtpCRpcfiJXEnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI3OGfpLXJbknyY+TPJzkH1r9rCR3J5lO8o0kr2n117b16bZ9zdB7XdHq+5JcdNyOSpI0q/lc6b8AvL+q3gGcA2xIcgFwNXBNVb0VeA7Y0tpvAZ5r9WtaO5KcDWwC3g5sAL6U5JRFPBZJ0hzmDP0a+FVbfXV7FPB+4Jutvhu4rC1vbOu07RcmSavfWFUvVNVPgWngvMU4CEnS/Mzrnn6SU5LcDxwC9gI/AX5eVS+2JgeAlW15JfAkQNv+PPDm4fosr5EkjcG8Qr+qXqqqc4BVDK7O//x4dSjJ1iRTSaZmZmaO124kqUsLmr1TVT8H7gDeBZyaZFnbtAo42JYPAqsB2vY3Ac8O12d5zfA+dlbVuqpat3z58oV0T5I0h/nM3lme5NS2/EfAB4BHGYT/h1qzzcDNbXlPW6dt/35VVatvarN7zgLWAvcs0nFIkuZh2dxNOBPY3WbavAq4qaq+m+QR4MYknwd+BFzf2l8PfDXJNHCYwYwdqurhJDcBjwAvAtuq6qXFPRxJ0suZM/Sr6gHgnbPUH2OW2TdV9RvgwyPe6yrgqoV3U5K0GPxEriR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkfm89XKWqA122/5vfUbH3sWgE1H1Rfb4zsuPa7vL2np80pfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHVkztBPsjrJHUkeSfJwkk+0+ulJ9ibZ355Pa/UkuTbJdJIHkpw79F6bW/v9STYfv8OSJM1mPlf6LwKfqqqzgQuAbUnOBrYDt1fVWuD2tg5wMbC2PbYC18HglwRwJXA+cB5w5ZFfFJKk8Zgz9Kvqqar6YVv+JfAosBLYCOxuzXYDl7XljcANNXAXcGqSM4GLgL1VdbiqngP2AhsW82AkSS9vQff0k6wB3gncDayoqqfapqeBFW15JfDk0MsOtNqo+tH72JpkKsnUzMzMQronSZrDvEM/yRuAbwGfrKpfDG+rqgJqMTpUVTural1VrVu+fPlivKUkqZlX6Cd5NYPA/1pVfbuVn2m3bWjPh1r9ILB66OWrWm1UXZI0JvOZvRPgeuDRqvrC0KY9wJEZOJuBm4fql7dZPBcAz7fbQLcB65Oc1v6Bu77VJEljsmwebd4N/A3wYJL7W+3vgB3ATUm2AE8AH2nbbgUuAaaBXwMfA6iqw0k+B9zb2n22qg4vxkFIkuZnztCvqv8EMmLzhbO0L2DbiPfaBexaSAclSYvHT+RKUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR2ZM/ST7EpyKMlDQ7XTk+xNsr89n9bqSXJtkukkDyQ5d+g1m1v7/Uk2H5/DkSS9nGXzaPMV4J+BG4Zq24Hbq2pHku1t/dPAxcDa9jgfuA44P8npwJXAOqCA+5LsqarnFutABGu23zKxfT++49KJ7VvS/M15pV9VPwAOH1XeCOxuy7uBy4bqN9TAXcCpSc4ELgL2VtXhFvR7gQ2L0H9J0gK80nv6K6rqqbb8NLCiLa8Enhxqd6DVRtX/QJKtSaaSTM3MzLzC7kmSZnPM/8itqmJwy2ZRVNXOqlpXVeuWL1++WG8rSeKVh/4z7bYN7flQqx8EVg+1W9Vqo+qSpDF6paG/BzgyA2czcPNQ/fI2i+cC4Pl2G+g2YH2S09pMn/WtJkkaozln7yT5OvBe4IwkBxjMwtkB3JRkC/AE8JHW/FbgEmAa+DXwMYCqOpzkc8C9rd1nq+rofw5Lko6zOUO/qj46YtOFs7QtYNuI99kF7FpQ7yRJi8pP5EpSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktSROb9PX5qPNdtvmch+H99x6UT2Ky1VXulLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOuKHs7SkTepDYeAHw7Q0eaUvSR0x9CWpI4a+JHVk7Pf0k2wAvgicAny5qnaMuw/SYvBL5rQUjTX0k5wC/AvwAeAAcG+SPVX1yDj7IS1l/vNax2LcV/rnAdNV9RhAkhuBjYChLy0B/nWz9I079FcCTw6tHwDOH26QZCuwta3+Ksm+Y9jfGcDPjuH1i+JdRxau/uAku3G0E2JsTlCOzWgTGZtcPe49viIn0nnzp6M2nHDz9KtqJ7BzMd4ryVRVrVuM9zrZODajOTajOTajLZWxGffsnYPA6qH1Va0mSRqDcYf+vcDaJGcleQ2wCdgz5j5IUrfGenunql5M8nHgNgZTNndV1cPHcZeLcpvoJOXYjObYjObYjLYkxiZVNek+SJLGxE/kSlJHDH1J6shJGfpJNiTZl2Q6yfZJ92fSkjye5MEk9yeZarXTk+xNsr89nzbpfo5Dkl1JDiV5aKg261hk4Np2Hj2Q5NzJ9fz4GzE2f5/kYDt37k9yydC2K9rY7Ety0WR6PR5JVie5I8kjSR5O8olWX3LnzkkX+kNf9XAxcDbw0SRnT7ZXJ4T3VdU5Q/OItwO3V9Va4Pa23oOvABuOqo0ai4uBte2xFbhuTH2clK/wh2MDcE07d86pqlsB2s/UJuDt7TVfaj97J6sXgU9V1dnABcC2NgZL7tw56UKfoa96qKr/AY581YN+30Zgd1veDVw2ua6MT1X9ADh8VHnUWGwEbqiBu4BTk5w5lo5OwIixGWUjcGNVvVBVPwWmGfzsnZSq6qmq+mFb/iXwKINvGFhy587JGPqzfdXDygn15URRwPeS3Ne+5gJgRVU91ZafBlZMpmsnhFFj4bk08PF2i2LX0G3AbscmyRrgncDdLMFz52QMff2h91TVuQz+5NyW5C+GN9Zg3q5zd3EsZnEd8BbgHOAp4B8n2psJS/IG4FvAJ6vqF8Pblsq5czKGvl/1cJSqOtieDwHfYfBn+DNH/txsz4cm18OJGzUW3Z9LVfVMVb1UVb8F/pX/v4XT3dgkeTWDwP9aVX27lZfcuXMyhr5f9TAkyeuTvPHIMrAeeIjBmGxuzTYDN0+mhyeEUWOxB7i8zcS4AHh+6E/5Lhx1H/qvGJw7MBibTUlem+QsBv+wvGfc/RuXJAGuBx6tqi8MbVp6505VnXQP4BLgv4GfAJ+ZdH8mPBZ/Bvy4PR4+Mh7AmxnMNtgP/Adw+qT7Oqbx+DqD2xT/y+A+65ZRYwGEwUywnwAPAusm3f8JjM1X27E/wCDIzhxq/5k2NvuAiyfd/+M8Nu9hcOvmAeD+9rhkKZ47fg2DJHXkZLy9I0kawdCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHfk/0olAwddTS8sAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "_n = np.random.exponential(25, 10000)\n", - "mean = np.mean(_n)\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "plt.hist(_n)\n", - "plt.axvline(mean, color='red')" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "size = 1000\n", - "\n", - "def get_data(p, size, mu, std):\n", - " lambdas = np.random.binomial(1, p, size)\n", - " revenues = np.random.exponential(mu, size)\n", - " \n", - " return (lambdas, revenues)\n", - "\n", - "p = 0.05\n", - "mu_revenue = 25.0\n", - "std_revenue = 5.6\n", - "\n", - "a1_lambda, a1_revenue = get_data(p, size, mu_revenue, std_revenue)\n", - "a2_lambda, a2_revenue = get_data(p, size, mu_revenue, std_revenue)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
revenuesalesconversion
meansumcount
alternative
A125.3447655110000.051
A224.9116664710000.047
\n", - "
" - ], - "text/plain": [ - " revenue sales conversion\n", - " mean sum count \n", - "alternative \n", - "A1 25.344765 51 1000 0.051 \n", - "A2 24.911666 47 1000 0.047 " - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = pd.DataFrame(data={\n", - " \"alternative\": ['A1']*size,\n", - " \"revenue\": a1_revenue, \n", - " \"sales\": a1_lambda\n", - "})\n", - "_df2 = pd.DataFrame(data={\n", - " \"alternative\": ['A2']*size,\n", - " \"revenue\": a2_revenue, \n", - " \"sales\": a2_lambda\n", - "})\n", - "\n", - "df2 = pd.concat([df2, _df2])\n", - "df2['sum_revenue'] = df2['revenue']\n", - "\n", - "__df = df2.groupby(\"alternative\").agg({\"revenue\": \"mean\", \"sales\": [\"sum\", \"count\"]})\n", - "__df[\"conversion\"] = __df['sales']['sum'] / __df['sales']['count'] \n", - "\n", - "__df" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "experiment = Experiment(name=\"A/A Test\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A1')" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:root:INITIALIZING experiment 'A/A Test' evaluation...\n", - "INFO:root:consolidating metrics over variants...\n", - "INFO:root:calculating bayesian statistics over variants for metric conversion\n", - "INFO:root:sampling data for variant A1...\n", - "INFO:root:sampling data for variant A2...\n", - "INFO:root:calculating probability to beat and expected loss for variant A1...\n", - "INFO:root:calculating probability to beat and expected loss for variant A2...\n", - "INFO:root:calculating bayesian statistics over variants for metric revenue\n", - "INFO:root:sampling data for variant A1...\n", - "INFO:root:sampling data for variant A2...\n", - "INFO:root:calculating probability to beat and expected loss for variant A1...\n", - "INFO:root:calculating probability to beat and expected loss for variant A2...\n", - "INFO:root:calculating bayesian statistics over variants for metric arpu\n", - "INFO:root:sampling data for variant A1...\n", - "INFO:root:sampling data for variant A2...\n", - "INFO:root:calculating probability to beat and expected loss for variant A1...\n", - "INFO:root:calculating probability to beat and expected loss for variant A2...\n", - "INFO:root:consolidating final results...\n", - "INFO:root:consolidating final results for variant A1...\n", - "INFO:root:consolidating final results for variant A2...\n", - "INFO:root:FINISHED experiment evaluation.\n" - ] - }, - { - "data": { - "text/plain": [ - "{'A1': {'users': 1000,\n", - " 'sales': 51,\n", - " 'paids': 51,\n", - " 'revenue': 25344.765165558103,\n", - " 'conversion': 0.051,\n", - " 'ticket': 496.95618,\n", - " 'arpu': 25.34477,\n", - " 'statistics': {'conversion': {'chance_to_beat': 0.65963,\n", - " 'expected_loss': 0.0021973301774269333,\n", - " 'lift': 0.0851063829787233,\n", - " 'diff': 0.003999999999999997},\n", - " 'revenue': {'chance_to_beat': 0.375954,\n", - " 'expected_loss': 59.94820363403642,\n", - " 'lift': 0.01738538880664109,\n", - " 'diff': 433.099002073428},\n", - " 'arpu': {'chance_to_beat': 0.523998,\n", - " 'expected_loss': 2.6673904657629213,\n", - " 'lift': 0.017385426187806807,\n", - " 'diff': 0.4330999999999996}},\n", - " 'ratio': 0.5},\n", - " 'A2': {'users': 1000,\n", - " 'sales': 47,\n", - " 'paids': 47,\n", - " 'revenue': 24911.666163484675,\n", - " 'conversion': 0.047,\n", - " 'ticket': 530.03545,\n", - " 'arpu': 24.91167,\n", - " 'statistics': {'conversion': {'chance_to_beat': 0.34037,\n", - " 'expected_loss': 0.006181498341976208,\n", - " 'lift': -0.07843137254901955,\n", - " 'diff': -0.003999999999999997},\n", - " 'revenue': {'chance_to_beat': 0.624046,\n", - " 'expected_loss': 26.851069422616064,\n", - " 'lift': -0.017088302031773472,\n", - " 'diff': -433.099002073428},\n", - " 'arpu': {'chance_to_beat': 0.476002,\n", - " 'expected_loss': 3.0606801031872637,\n", - " 'lift': -0.01708833814629207,\n", - " 'diff': -0.4330999999999996}},\n", - " 'ratio': 0.5}}" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "experiment.run_experiment(df_results_per_user=df2)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
A1A2
users10001000
sales5147
paids5147
revenue25344.824911.7
conversion0.0510.047
ticket496.956530.035
arpu25.344824.9117
statistics{'conversion': {'chance_to_beat': 0.65963, 'expected_loss': 0.0021973301774269333, 'lift': 0.0851063829787233, 'diff': 0.003999999999999997}, 'revenue': {'chance_to_beat': 0.375954, 'expected_loss': 59.94820363403642, 'lift': 0.01738538880664109, 'diff': 433.099002073428}, 'arpu': {'chance_to_beat': 0.523998, 'expected_loss': 2.6673904657629213, 'lift': 0.017385426187806807, 'diff': 0.4330999999999996}}{'conversion': {'chance_to_beat': 0.34037, 'expected_loss': 0.006181498341976208, 'lift': -0.07843137254901955, 'diff': -0.003999999999999997}, 'revenue': {'chance_to_beat': 0.624046, 'expected_loss': 26.851069422616064, 'lift': -0.017088302031773472, 'diff': -433.099002073428}, 'arpu': {'chance_to_beat': 0.476002, 'expected_loss': 3.0606801031872637, 'lift': -0.01708833814629207, 'diff': -0.4330999999999996}}
ratio0.50.5
\n", - "
" - ], - "text/plain": [ - " A1 \\\n", - "users 1000 \n", - "sales 51 \n", - "paids 51 \n", - "revenue 25344.8 \n", - "conversion 0.051 \n", - "ticket 496.956 \n", - "arpu 25.3448 \n", - "statistics {'conversion': {'chance_to_beat': 0.65963, 'expected_loss': 0.0021973301774269333, 'lift': 0.0851063829787233, 'diff': 0.003999999999999997}, 'revenue': {'chance_to_beat': 0.375954, 'expected_loss': 59.94820363403642, 'lift': 0.01738538880664109, 'diff': 433.099002073428}, 'arpu': {'chance_to_beat': 0.523998, 'expected_loss': 2.6673904657629213, 'lift': 0.017385426187806807, 'diff': 0.4330999999999996}} \n", - "ratio 0.5 \n", - "\n", - " A2 \n", - "users 1000 \n", - "sales 47 \n", - "paids 47 \n", - "revenue 24911.7 \n", - "conversion 0.047 \n", - "ticket 530.035 \n", - "arpu 24.9117 \n", - "statistics {'conversion': {'chance_to_beat': 0.34037, 'expected_loss': 0.006181498341976208, 'lift': -0.07843137254901955, 'diff': -0.003999999999999997}, 'revenue': {'chance_to_beat': 0.624046, 'expected_loss': 26.851069422616064, 'lift': -0.017088302031773472, 'diff': -433.099002073428}, 'arpu': {'chance_to_beat': 0.476002, 'expected_loss': 3.0606801031872637, 'lift': -0.01708833814629207, 'diff': -0.4330999999999996}} \n", - "ratio 0.5 " - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame.from_dict(experiment.results)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Simulations\n", - "\n", - "\n", - "\n", - "We now consider a comparison between a sales process with a conversion rate of\n", - "5% and 6% in the variant. The revenue/sale is the same ($\\%25$) in both variants.\n", - "I ran a set of 400 A/B simulations. The threshold of caring ε was chosen\n", - "to be 2% of the mean visitor value of $1.25, or $0.025. I.e., a $0.025 loss was\n", - "considered acceptable.\n", - "In the A/B simulations I ran, the Bayesian test finished with an average of\n", - "about 3,000 data points. Out of 400 simulations, 95.75% returned the correct\n", - "result. The 10’th percentile of test durations was 250 samples, the 50’th percentile was 1,500 samples and the 95’th sample was 4,750. See Figure 9 for an\n", - "illustration.\n", - "This approximately corresponds to the balanced mode in the tool" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "\n", - "size = 1500\n", - "\n", - "pA = 0.05\n", - "pB = 0.06\n", - "mu_revenue = 25.0\n", - "std_revenue = 5.6\n", - "epsilon = 0.025\n", - "\n", - "\n", - "def generate_df(size=1500):\n", - " a_lambda, a_revenue = get_data(pA, size, mu_revenue, std_revenue)\n", - " b_lambda, b_revenue = get_data(pB, size, mu_revenue, std_revenue)\n", - "\n", - " dfA = pd.DataFrame(data={\n", - " \"alternative\": ['A']*size,\n", - " \"revenue\": a_revenue, \n", - " \"sales\": a_lambda\n", - " })\n", - " dfB = pd.DataFrame(data={\n", - " \"alternative\": ['B']*size,\n", - " \"revenue\": b_revenue, \n", - " \"sales\": b_lambda\n", - " })\n", - "\n", - " df3 = pd.concat([dfA, dfB])\n", - " df3['sum_revenue'] = df3['revenue']\n", - " \n", - " return df3" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
revenuesalesconversion
meansumcount
alternative
A26.2582388015000.053333
B24.1853757915000.052667
\n", - "
" - ], - "text/plain": [ - " revenue sales conversion\n", - " mean sum count \n", - "alternative \n", - "A 26.258238 80 1500 0.053333 \n", - "B 24.185375 79 1500 0.052667 " - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df3_example = generate_df()\n", - "__df = df3_example.groupby(\"alternative\").agg({\"revenue\": \"mean\", \"sales\": [\"sum\", \"count\"]})\n", - "__df[\"conversion\"] = __df['sales']['sum'] / __df['sales']['count'] \n", - "\n", - "__df" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install joblib" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Changing conversion" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [09:07<00:00, 5.48s/it]\n" - ] - } - ], - "source": [ - "from tqdm import tqdm\n", - "import multiprocessing\n", - "from joblib import Parallel, delayed\n", - "\n", - "N_SIMULATIONS = 100\n", - "\n", - "logging.basicConfig(level=logging.WARNING)\n", - "\n", - "num_cores = multiprocessing.cpu_count()\n", - "\n", - "list_of_dataframes = [generate_df() for _ in range(N_SIMULATIONS)]\n", - "inputs = tqdm(list_of_dataframes)\n", - "\n", - "def run_one_experiment(df):\n", - " df = df.copy()\n", - " experiment = Experiment(name=\"A/B Test\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A')\n", - " results = experiment.run_experiment(df_results_per_user=df)\n", - " \n", - " # return the winner if some loss that is lower than epsilon, if any\n", - " winners = []\n", - " for metric in ['conversion', 'revenue', 'arpu']:\n", - " lossA = results['A']['statistics'][metric]['expected_loss']\n", - " lossB = results['B']['statistics'][metric]['expected_loss']\n", - " if lossA < lossB and lossA < epsilon:\n", - " winners.append('A')\n", - " elif lossB < lossA and lossB < epsilon:\n", - " winners.append('B')\n", - " else:\n", - " winners.append(None)\n", - " \n", - " return winners\n", - " \n", - "processed_list = Parallel(n_jobs=num_cores)(delayed(run_one_experiment)(df) for df in inputs)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[['B', None, None],\n", - " ['B', None, None],\n", - " ['B', None, None],\n", - " ['A', None, None],\n", - " ['B', None, None],\n", - " ['B', None, None],\n", - " ['A', None, None],\n", - " ['B', None, None],\n", - " ['A', None, None],\n", - " ['B', None, None]]" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "processed_list[:10]" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "B conversion winner: 87.0 %\n", - "\t simulations with no winner: 0.0%\n", - "B revenue winner: 0.0 %\n", - "\t simulations with no winner: 97.0%\n", - "B arpu winner: 0.0 %\n", - "\t simulations with no winner: 100.0%\n" - ] - } - ], - "source": [ - "for i, metric in enumerate(['conversion', 'revenue', 'arpu']):\n", - " metric_list = [m[i] for m in processed_list]\n", - " print(f\"B {metric} winner: {100.0 * np.mean([m =='B' for m in metric_list])} %\")\n", - " print(f\"\\t simulations with no winner: {100.0 * np.mean([m == None for m in metric_list])}%\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Changing Revenue" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [09:04<00:00, 5.44s/it]\n" - ] - } - ], - "source": [ - "from tqdm import tqdm\n", - "import multiprocessing\n", - "from joblib import Parallel, delayed\n", - "import numpy as np\n", - "\n", - "N_SIMULATIONS = 100\n", - "\n", - "size = 1500\n", - "pA = 0.05\n", - "pB = 0.05\n", - "mu_revenueA = 20.0\n", - "mu_revenueB = 30.0\n", - "epsilon = 0.025\n", - "\n", - "\n", - "num_cores = multiprocessing.cpu_count()\n", - "\n", - "\n", - "def get_data(p, size, mu):\n", - " lambdas = np.random.binomial(1, p, size)\n", - " revenues = np.random.exponential(mu, size)\n", - " \n", - " return (lambdas, revenues)\n", - "\n", - "\n", - "def generate_df(pA, pB, mu_revenueA, mu_revenueB, size):\n", - " a_lambda, a_revenue = get_data(pA, size, mu_revenueA)\n", - " b_lambda, b_revenue = get_data(pB, size, mu_revenueB)\n", - "\n", - " dfA = pd.DataFrame(data={\n", - " \"alternative\": ['A']*size,\n", - " \"revenue\": a_revenue, \n", - " \"sales\": a_lambda\n", - " })\n", - " dfB = pd.DataFrame(data={\n", - " \"alternative\": ['B']*size,\n", - " \"revenue\": b_revenue, \n", - " \"sales\": b_lambda\n", - " })\n", - "\n", - " df3 = pd.concat([dfA, dfB])\n", - " df3['sum_revenue'] = df3['revenue']\n", - " \n", - " return df3\n", - "\n", - "\n", - "list_of_dataframes = [generate_df(pA, pB, mu_revenueA, mu_revenueB, size) for _ in range(N_SIMULATIONS)]\n", - "inputs = tqdm(list_of_dataframes)\n", - "\n", - "\n", - "def run_one_experiment(df):\n", - " df = df.copy()\n", - " experiment = Experiment(name=\"A/B Test\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A')\n", - " results = experiment.run_experiment(df_results_per_user=df)\n", - " \n", - " # return the winner if some loss that is lower than epsilon, if any\n", - " winners = []\n", - " for metric in ['conversion', 'revenue', 'arpu']:\n", - " lossA = results['A']['statistics'][metric]['expected_loss']\n", - " lossB = results['B']['statistics'][metric]['expected_loss']\n", - " if lossA < lossB and lossA < epsilon:\n", - " winners.append('A')\n", - " elif lossB < lossA and lossB < epsilon:\n", - " winners.append('B')\n", - " else:\n", - " winners.append(None)\n", - " \n", - " return winners\n", - " \n", - "processed_list_2 = Parallel(n_jobs=num_cores)(delayed(run_one_experiment)(df) for df in inputs)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "conversion: 56.00000000000001 %\n", - "\t simulations with no winner: 0.0\n", - "revenue: 0.0 %\n", - "\t simulations with no winner: 100.0\n", - "arpu: 0.0 %\n", - "\t simulations with no winner: 100.0\n" - ] - } - ], - "source": [ - "for i, metric in enumerate(['conversion', 'revenue', 'arpu']):\n", - " metric_list = [m[i] for m in processed_list_2]\n", - " print(f\"{metric}: {100.0 * np.mean([m =='B' for m in metric_list])} %\")\n", - " print(f\"\\t simulations with no winner: {100.0 * np.mean([m == None for m in metric_list])}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### A/A Test" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [08:54<00:00, 5.34s/it]\n" - ] - } - ], - "source": [ - "from tqdm import tqdm\n", - "import multiprocessing\n", - "from joblib import Parallel, delayed\n", - "import numpy as np\n", - "\n", - "N_SIMULATIONS = 100\n", - "\n", - "size = 1500\n", - "pA = 0.05\n", - "pB = 0.05\n", - "mu_revenueA = 20.0\n", - "mu_revenueB = 20.0\n", - "epsilon = 0.025\n", - "\n", - "num_cores = multiprocessing.cpu_count()\n", - "\n", - "\n", - "def get_data(p, size, mu):\n", - " lambdas = np.random.binomial(1, p, size)\n", - " revenues = np.random.exponential(mu, size)\n", - " \n", - " return (lambdas, revenues)\n", - "\n", - "\n", - "def generate_df(pA, pB, mu_revenueA, mu_revenueB, size):\n", - " a_lambda, a_revenue = get_data(pA, size, mu_revenueA)\n", - " b_lambda, b_revenue = get_data(pB, size, mu_revenueB)\n", - "\n", - " dfA = pd.DataFrame(data={\n", - " \"alternative\": ['A']*size,\n", - " \"revenue\": a_revenue, \n", - " \"sales\": a_lambda\n", - " })\n", - " dfB = pd.DataFrame(data={\n", - " \"alternative\": ['B']*size,\n", - " \"revenue\": b_revenue, \n", - " \"sales\": b_lambda\n", - " })\n", - "\n", - " df3 = pd.concat([dfA, dfB])\n", - " df3['sum_revenue'] = df3['revenue']\n", - " \n", - " return df3\n", - "\n", - "\n", - "list_of_dataframes = [generate_df(pA, pB, mu_revenueA, mu_revenueB, size) for _ in range(N_SIMULATIONS)]\n", - "inputs = tqdm(list_of_dataframes)\n", - "\n", - "\n", - "def run_one_experiment(df):\n", - " df = df.copy()\n", - " experiment = Experiment(name=\"A/B Test\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A')\n", - " results = experiment.run_experiment(df_results_per_user=df)\n", - " \n", - " # return the winner if some loss that is lower than epsilon, if any\n", - " winners = []\n", - " for metric in ['conversion', 'revenue', 'arpu']:\n", - " lossA = results['A']['statistics'][metric]['expected_loss']\n", - " lossB = results['B']['statistics'][metric]['expected_loss']\n", - " if lossA < lossB and lossA < epsilon:\n", - " winners.append('A')\n", - " elif lossB < lossA and lossB < epsilon:\n", - " winners.append('B')\n", - " else:\n", - " winners.append(None)\n", - " \n", - " return winners\n", - " \n", - "processed_list_3 = Parallel(n_jobs=num_cores)(delayed(run_one_experiment)(df) for df in inputs)" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "conversion: 55.00000000000001 %\n", - "\t simulations with no winner: 0.0\n", - "revenue: 0.0 %\n", - "\t simulations with no winner: 100.0\n", - "arpu: 0.0 %\n", - "\t simulations with no winner: 100.0\n" - ] - } - ], - "source": [ - "for i, metric in enumerate(['conversion', 'revenue', 'arpu']):\n", - " metric_list = [m[i] for m in processed_list_3]\n", - " print(f\"{metric}: {100.0 * np.mean([m =='B' for m in metric_list])} %\")\n", - " print(f\"\\t simulations with no winner: {100.0 * np.mean([m == None for m in metric_list])}\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - }, - "toc": { - "base_numbering": 1, - "nav_menu": { - "height": "177px", - "width": "290px" - }, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/.ipynb_checkpoints/experiment_example-checkpoint.ipynb b/examples/.ipynb_checkpoints/experiment_example-checkpoint.ipynb deleted file mode 100644 index 70f474b..0000000 --- a/examples/.ipynb_checkpoints/experiment_example-checkpoint.ipynb +++ /dev/null @@ -1,225 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Imports (make sure to install janus)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Obtaining file:///home/luanfernandes/projects/buser/janus/examples\n", - "\u001b[31mERROR: file:///home/luanfernandes/projects/buser/janus/examples does not appear to be a Python project: neither 'setup.py' nor 'pyproject.toml' found.\u001b[0m\u001b[31m\n", - "\u001b[0m" - ] - } - ], - "source": [ - "!pip install -e ." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:38.828666Z", - "start_time": "2020-12-04T17:16:37.999997Z" - } - }, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'janus'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/tmp/ipykernel_46086/1308016222.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mjanus\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mjanus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperiment\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mExperiment\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mVariant\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mlogging\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'janus'" - ] - } - ], - "source": [ - "import janus\n", - "from janus.stats.experiment import Experiment, Variant\n", - "import pandas as pd\n", - "\n", - "import logging\n", - "logging.basicConfig(level=logging.INFO)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2020-11-27T17:29:01.054901Z", - "start_time": "2020-11-27T17:29:01.049977Z" - } - }, - "source": [ - "## Load Test Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:38.970497Z", - "start_time": "2020-12-04T17:16:38.949037Z" - } - }, - "outputs": [], - "source": [ - "df = pd.read_csv(\"../tests/results_per_user.csv\")\n", - "df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2020-11-27T23:08:00.040508Z", - "start_time": "2020-11-27T23:08:00.031558Z" - } - }, - "source": [ - "We see that, for the data we have, alternative B looks better: 3 sales out of 4 participants, with average ticket of (230+170+320)/3 = 240 and average ARPU of 180. All metrics are higher that A's." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Initialize experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:38.974932Z", - "start_time": "2020-12-04T17:16:38.972390Z" - } - }, - "outputs": [], - "source": [ - "experiment = Experiment(name=\"my_experiment\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We run the experiment over our data to get results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:51.994576Z", - "start_time": "2020-12-04T17:16:38.976479Z" - } - }, - "outputs": [], - "source": [ - "experiment.run_experiment(df_results_per_user=df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:52.010513Z", - "start_time": "2020-12-04T17:16:51.996566Z" - } - }, - "outputs": [], - "source": [ - "pd.DataFrame.from_dict(experiment.results['A']['statistics'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:52.031314Z", - "start_time": "2020-12-04T17:16:52.012627Z" - } - }, - "outputs": [], - "source": [ - "pd.DataFrame.from_dict(experiment.results)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## References to compare\n", - "\n", - "https://vidogreg.shinyapps.io/bayes-arpu-test/\n", - "\n", - "https://abtestguide.com/bayesian/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/Testing Examples and VWO cases.ipynb b/examples/Testing Examples and VWO cases.ipynb deleted file mode 100644 index 257a306..0000000 --- a/examples/Testing Examples and VWO cases.ipynb +++ /dev/null @@ -1,967 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Imports (make sure to install janus)" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 2, - "source": [ - "import janus\n", - "from janus.stats.experiment import Experiment, Variant\n", - "import pandas as pd\n", - "\n", - "import logging\n", - "logging.basicConfig(level=logging.INFO)\n", - "\n", - "pd.set_option('max_colwidth', -1)" - ], - "outputs": [], - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:38.828666Z", - "start_time": "2020-12-04T17:16:37.999997Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "## A/A Test from VWO paper\n", - "\n", - "\"To begin I ran an A/A test comparing a sales process with a 5% conversion rate\n", - "and a mean of $\\$25$ revenue/sale. In this example, the standard deviation of the\n", - "data is $\\$5.6$ (compared to a mean revenue/visitor of $1.25).\n", - "According to Evan Miller’s t-test calculator at http://www.evanmiller.\n", - "org/ab-testing/t-test.html, this test will require 4,250 data points (per\n", - "sample) to resolve a 20% lift.\n", - "In the simulation, the Bayesian test finished with an average of about 3,292\n", - "data points (10th/90th percentile 250/7,750). The distribution is plottedin Figure 8\"\n" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 27, - "source": [ - "_n = np.random.exponential(25, 10000)\n", - "mean = np.mean(_n)\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "plt.hist(_n)\n", - "plt.axvline(mean, color='red')" - ], - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 27 - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAP+ElEQVR4nO3cbaxdVZ3H8e9P6sNEHQHpNKRtpow2TvCFSBrAaCYqsRQwUyZRUzMZGtOkb2qiiclYxhfMqCTlxchIZiTpSGMxjkh8CI2QwQ5CzLzg4aLI43R6RQhtgF4posbIDPifF2fVOdZ7uPfS23N6u76f5OTs/d/rnL32yr6/u+++65xUFZKkPrxq0h2QJI2PoS9JHTH0Jakjhr4kdcTQl6SOLJt0B17OGWecUWvWrJl0N47dvn2D57e9bbL9kNSF++6772dVtXy2bSd06K9Zs4apqalJd+PYvfe9g+c775xkLyR1IskTo7Z5e0eSOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpyQn8i91it2X7LRPb7+I5LJ7JfSZqLV/qS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHZlX6Cd5PMmDSe5PMtVqpyfZm2R/ez6t1ZPk2iTTSR5Icu7Q+2xu7fcn2Xx8DkmSNMpCrvTfV1XnVNW6tr4duL2q1gK3t3WAi4G17bEVuA4GvySAK4HzgfOAK4/8opAkjcex3N7ZCOxuy7uBy4bqN9TAXcCpSc4ELgL2VtXhqnoO2AtsOIb9S5IWaL6hX8D3ktyXZGurraiqp9ry08CKtrwSeHLotQdabVT99yTZmmQqydTMzMw8uydJmo9l82z3nqo6mORPgL1J/mt4Y1VVklqMDlXVTmAnwLp16xblPSVJA/O60q+qg+35EPAdBvfkn2m3bWjPh1rzg8DqoZevarVRdUnSmMwZ+klen+SNR5aB9cBDwB7gyAyczcDNbXkPcHmbxXMB8Hy7DXQbsD7Jae0fuOtbTZI0JvO5vbMC+E6SI+3/rar+Pcm9wE1JtgBPAB9p7W8FLgGmgV8DHwOoqsNJPgfc29p9tqoOL9qRSJLmNGfoV9VjwDtmqT8LXDhLvYBtI95rF7Br4d2UJC0GP5ErSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHVk3qGf5JQkP0ry3bZ+VpK7k0wn+UaS17T6a9v6dNu+Zug9rmj1fUkuWvSjkSS9rIVc6X8CeHRo/Wrgmqp6K/AcsKXVtwDPtfo1rR1JzgY2AW8HNgBfSnLKsXVfkrQQ8wr9JKuAS4Evt/UA7we+2ZrsBi5ryxvbOm37ha39RuDGqnqhqn4KTAPnLcIxSJLmab5X+v8E/C3w27b+ZuDnVfViWz8ArGzLK4EnAdr251v739Vnec3vJNmaZCrJ1MzMzPyPRJI0pzlDP8kHgUNVdd8Y+kNV7ayqdVW1bvny5ePYpSR1Y9k82rwb+MsklwCvA/4Y+CJwapJl7Wp+FXCwtT8IrAYOJFkGvAl4dqh+xPBrJEljMOeVflVdUVWrqmoNg3/Efr+q/hq4A/hQa7YZuLkt72nrtO3fr6pq9U1tds9ZwFrgnkU7EknSnOZzpT/Kp4Ebk3we+BFwfatfD3w1yTRwmMEvCqrq4SQ3AY8ALwLbquqlY9i/JGmBFhT6VXUncGdbfoxZZt9U1W+AD494/VXAVQvtpCRpcfiJXEnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI3OGfpLXJbknyY+TPJzkH1r9rCR3J5lO8o0kr2n117b16bZ9zdB7XdHq+5JcdNyOSpI0q/lc6b8AvL+q3gGcA2xIcgFwNXBNVb0VeA7Y0tpvAZ5r9WtaO5KcDWwC3g5sAL6U5JRFPBZJ0hzmDP0a+FVbfXV7FPB+4Jutvhu4rC1vbOu07RcmSavfWFUvVNVPgWngvMU4CEnS/Mzrnn6SU5LcDxwC9gI/AX5eVS+2JgeAlW15JfAkQNv+PPDm4fosr5EkjcG8Qr+qXqqqc4BVDK7O//x4dSjJ1iRTSaZmZmaO124kqUsLmr1TVT8H7gDeBZyaZFnbtAo42JYPAqsB2vY3Ac8O12d5zfA+dlbVuqpat3z58oV0T5I0h/nM3lme5NS2/EfAB4BHGYT/h1qzzcDNbXlPW6dt/35VVatvarN7zgLWAvcs0nFIkuZh2dxNOBPY3WbavAq4qaq+m+QR4MYknwd+BFzf2l8PfDXJNHCYwYwdqurhJDcBjwAvAtuq6qXFPRxJ0suZM/Sr6gHgnbPUH2OW2TdV9RvgwyPe6yrgqoV3U5K0GPxEriR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkfm89XKWqA122/5vfUbH3sWgE1H1Rfb4zsuPa7vL2np80pfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHVkztBPsjrJHUkeSfJwkk+0+ulJ9ibZ355Pa/UkuTbJdJIHkpw79F6bW/v9STYfv8OSJM1mPlf6LwKfqqqzgQuAbUnOBrYDt1fVWuD2tg5wMbC2PbYC18HglwRwJXA+cB5w5ZFfFJKk8Zgz9Kvqqar6YVv+JfAosBLYCOxuzXYDl7XljcANNXAXcGqSM4GLgL1VdbiqngP2AhsW82AkSS9vQff0k6wB3gncDayoqqfapqeBFW15JfDk0MsOtNqo+tH72JpkKsnUzMzMQronSZrDvEM/yRuAbwGfrKpfDG+rqgJqMTpUVTural1VrVu+fPlivKUkqZlX6Cd5NYPA/1pVfbuVn2m3bWjPh1r9ILB66OWrWm1UXZI0JvOZvRPgeuDRqvrC0KY9wJEZOJuBm4fql7dZPBcAz7fbQLcB65Oc1v6Bu77VJEljsmwebd4N/A3wYJL7W+3vgB3ATUm2AE8AH2nbbgUuAaaBXwMfA6iqw0k+B9zb2n22qg4vxkFIkuZnztCvqv8EMmLzhbO0L2DbiPfaBexaSAclSYvHT+RKUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR2ZM/ST7EpyKMlDQ7XTk+xNsr89n9bqSXJtkukkDyQ5d+g1m1v7/Uk2H5/DkSS9nGXzaPMV4J+BG4Zq24Hbq2pHku1t/dPAxcDa9jgfuA44P8npwJXAOqCA+5LsqarnFutABGu23zKxfT++49KJ7VvS/M15pV9VPwAOH1XeCOxuy7uBy4bqN9TAXcCpSc4ELgL2VtXhFvR7gQ2L0H9J0gK80nv6K6rqqbb8NLCiLa8Enhxqd6DVRtX/QJKtSaaSTM3MzLzC7kmSZnPM/8itqmJwy2ZRVNXOqlpXVeuWL1++WG8rSeKVh/4z7bYN7flQqx8EVg+1W9Vqo+qSpDF6paG/BzgyA2czcPNQ/fI2i+cC4Pl2G+g2YH2S09pMn/WtJkkaozln7yT5OvBe4IwkBxjMwtkB3JRkC/AE8JHW/FbgEmAa+DXwMYCqOpzkc8C9rd1nq+rofw5Lko6zOUO/qj46YtOFs7QtYNuI99kF7FpQ7yRJi8pP5EpSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktSROb9PX5qPNdtvmch+H99x6UT2Ky1VXulLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOuKHs7SkTepDYeAHw7Q0eaUvSR0x9CWpI4a+JHVk7Pf0k2wAvgicAny5qnaMuw/SYvBL5rQUjTX0k5wC/AvwAeAAcG+SPVX1yDj7IS1l/vNax2LcV/rnAdNV9RhAkhuBjYChLy0B/nWz9I079FcCTw6tHwDOH26QZCuwta3+Ksm+Y9jfGcDPjuH1i+JdRxau/uAku3G0E2JsTlCOzWgTGZtcPe49viIn0nnzp6M2nHDz9KtqJ7BzMd4ryVRVrVuM9zrZODajOTajOTajLZWxGffsnYPA6qH1Va0mSRqDcYf+vcDaJGcleQ2wCdgz5j5IUrfGenunql5M8nHgNgZTNndV1cPHcZeLcpvoJOXYjObYjObYjLYkxiZVNek+SJLGxE/kSlJHDH1J6shJGfpJNiTZl2Q6yfZJ92fSkjye5MEk9yeZarXTk+xNsr89nzbpfo5Dkl1JDiV5aKg261hk4Np2Hj2Q5NzJ9fz4GzE2f5/kYDt37k9yydC2K9rY7Ety0WR6PR5JVie5I8kjSR5O8olWX3LnzkkX+kNf9XAxcDbw0SRnT7ZXJ4T3VdU5Q/OItwO3V9Va4Pa23oOvABuOqo0ai4uBte2xFbhuTH2clK/wh2MDcE07d86pqlsB2s/UJuDt7TVfaj97J6sXgU9V1dnABcC2NgZL7tw56UKfoa96qKr/AY581YN+30Zgd1veDVw2ua6MT1X9ADh8VHnUWGwEbqiBu4BTk5w5lo5OwIixGWUjcGNVvVBVPwWmGfzsnZSq6qmq+mFb/iXwKINvGFhy587JGPqzfdXDygn15URRwPeS3Ne+5gJgRVU91ZafBlZMpmsnhFFj4bk08PF2i2LX0G3AbscmyRrgncDdLMFz52QMff2h91TVuQz+5NyW5C+GN9Zg3q5zd3EsZnEd8BbgHOAp4B8n2psJS/IG4FvAJ6vqF8Pblsq5czKGvl/1cJSqOtieDwHfYfBn+DNH/txsz4cm18OJGzUW3Z9LVfVMVb1UVb8F/pX/v4XT3dgkeTWDwP9aVX27lZfcuXMyhr5f9TAkyeuTvPHIMrAeeIjBmGxuzTYDN0+mhyeEUWOxB7i8zcS4AHh+6E/5Lhx1H/qvGJw7MBibTUlem+QsBv+wvGfc/RuXJAGuBx6tqi8MbVp6505VnXQP4BLgv4GfAJ+ZdH8mPBZ/Bvy4PR4+Mh7AmxnMNtgP/Adw+qT7Oqbx+DqD2xT/y+A+65ZRYwGEwUywnwAPAusm3f8JjM1X27E/wCDIzhxq/5k2NvuAiyfd/+M8Nu9hcOvmAeD+9rhkKZ47fg2DJHXkZLy9I0kawdCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHfk/0olAwddTS8sAAAAASUVORK5CYII=" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 28, - "source": [ - "import numpy as np\n", - "\n", - "size = 1000\n", - "\n", - "def get_data(p, size, mu, std):\n", - " lambdas = np.random.binomial(1, p, size)\n", - " revenues = np.random.exponential(mu, size)\n", - " \n", - " return (lambdas, revenues)\n", - "\n", - "p = 0.05\n", - "mu_revenue = 25.0\n", - "std_revenue = 5.6\n", - "\n", - "a1_lambda, a1_revenue = get_data(p, size, mu_revenue, std_revenue)\n", - "a2_lambda, a2_revenue = get_data(p, size, mu_revenue, std_revenue)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 29, - "source": [ - "df2 = pd.DataFrame(data={\n", - " \"alternative\": ['A1']*size,\n", - " \"revenue\": a1_revenue, \n", - " \"sales\": a1_lambda\n", - "})\n", - "_df2 = pd.DataFrame(data={\n", - " \"alternative\": ['A2']*size,\n", - " \"revenue\": a2_revenue, \n", - " \"sales\": a2_lambda\n", - "})\n", - "\n", - "df2 = pd.concat([df2, _df2])\n", - "df2['sum_revenue'] = df2['revenue']\n", - "\n", - "__df = df2.groupby(\"alternative\").agg({\"revenue\": \"mean\", \"sales\": [\"sum\", \"count\"]})\n", - "__df[\"conversion\"] = __df['sales']['sum'] / __df['sales']['count'] \n", - "\n", - "__df" - ], - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " revenue sales conversion\n", - " mean sum count \n", - "alternative \n", - "A1 25.344765 51 1000 0.051 \n", - "A2 24.911666 47 1000 0.047 " - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
revenuesalesconversion
meansumcount
alternative
A125.3447655110000.051
A224.9116664710000.047
\n", - "
" - ] - }, - "metadata": {}, - "execution_count": 29 - } - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 30, - "source": [ - "experiment = Experiment(name=\"A/A Test\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A1')" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 31, - "source": [ - "experiment.run_experiment(df_results_per_user=df2)" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "INFO:root:INITIALIZING experiment 'A/A Test' evaluation...\n", - "INFO:root:consolidating metrics over variants...\n", - "INFO:root:calculating bayesian statistics over variants for metric conversion\n", - "INFO:root:sampling data for variant A1...\n", - "INFO:root:sampling data for variant A2...\n", - "INFO:root:calculating probability to beat and expected loss for variant A1...\n", - "INFO:root:calculating probability to beat and expected loss for variant A2...\n", - "INFO:root:calculating bayesian statistics over variants for metric revenue\n", - "INFO:root:sampling data for variant A1...\n", - "INFO:root:sampling data for variant A2...\n", - "INFO:root:calculating probability to beat and expected loss for variant A1...\n", - "INFO:root:calculating probability to beat and expected loss for variant A2...\n", - "INFO:root:calculating bayesian statistics over variants for metric arpu\n", - "INFO:root:sampling data for variant A1...\n", - "INFO:root:sampling data for variant A2...\n", - "INFO:root:calculating probability to beat and expected loss for variant A1...\n", - "INFO:root:calculating probability to beat and expected loss for variant A2...\n", - "INFO:root:consolidating final results...\n", - "INFO:root:consolidating final results for variant A1...\n", - "INFO:root:consolidating final results for variant A2...\n", - "INFO:root:FINISHED experiment evaluation.\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "{'A1': {'users': 1000,\n", - " 'sales': 51,\n", - " 'paids': 51,\n", - " 'revenue': 25344.765165558103,\n", - " 'conversion': 0.051,\n", - " 'ticket': 496.95618,\n", - " 'arpu': 25.34477,\n", - " 'statistics': {'conversion': {'chance_to_beat': 0.65963,\n", - " 'expected_loss': 0.0021973301774269333,\n", - " 'lift': 0.0851063829787233,\n", - " 'diff': 0.003999999999999997},\n", - " 'revenue': {'chance_to_beat': 0.375954,\n", - " 'expected_loss': 59.94820363403642,\n", - " 'lift': 0.01738538880664109,\n", - " 'diff': 433.099002073428},\n", - " 'arpu': {'chance_to_beat': 0.523998,\n", - " 'expected_loss': 2.6673904657629213,\n", - " 'lift': 0.017385426187806807,\n", - " 'diff': 0.4330999999999996}},\n", - " 'ratio': 0.5},\n", - " 'A2': {'users': 1000,\n", - " 'sales': 47,\n", - " 'paids': 47,\n", - " 'revenue': 24911.666163484675,\n", - " 'conversion': 0.047,\n", - " 'ticket': 530.03545,\n", - " 'arpu': 24.91167,\n", - " 'statistics': {'conversion': {'chance_to_beat': 0.34037,\n", - " 'expected_loss': 0.006181498341976208,\n", - " 'lift': -0.07843137254901955,\n", - " 'diff': -0.003999999999999997},\n", - " 'revenue': {'chance_to_beat': 0.624046,\n", - " 'expected_loss': 26.851069422616064,\n", - " 'lift': -0.017088302031773472,\n", - " 'diff': -433.099002073428},\n", - " 'arpu': {'chance_to_beat': 0.476002,\n", - " 'expected_loss': 3.0606801031872637,\n", - " 'lift': -0.01708833814629207,\n", - " 'diff': -0.4330999999999996}},\n", - " 'ratio': 0.5}}" - ] - }, - "metadata": {}, - "execution_count": 31 - } - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 32, - "source": [ - "pd.DataFrame.from_dict(experiment.results)" - ], - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " A1 \\\n", - "users 1000 \n", - "sales 51 \n", - "paids 51 \n", - "revenue 25344.8 \n", - "conversion 0.051 \n", - "ticket 496.956 \n", - "arpu 25.3448 \n", - "statistics {'conversion': {'chance_to_beat': 0.65963, 'expected_loss': 0.0021973301774269333, 'lift': 0.0851063829787233, 'diff': 0.003999999999999997}, 'revenue': {'chance_to_beat': 0.375954, 'expected_loss': 59.94820363403642, 'lift': 0.01738538880664109, 'diff': 433.099002073428}, 'arpu': {'chance_to_beat': 0.523998, 'expected_loss': 2.6673904657629213, 'lift': 0.017385426187806807, 'diff': 0.4330999999999996}} \n", - "ratio 0.5 \n", - "\n", - " A2 \n", - "users 1000 \n", - "sales 47 \n", - "paids 47 \n", - "revenue 24911.7 \n", - "conversion 0.047 \n", - "ticket 530.035 \n", - "arpu 24.9117 \n", - "statistics {'conversion': {'chance_to_beat': 0.34037, 'expected_loss': 0.006181498341976208, 'lift': -0.07843137254901955, 'diff': -0.003999999999999997}, 'revenue': {'chance_to_beat': 0.624046, 'expected_loss': 26.851069422616064, 'lift': -0.017088302031773472, 'diff': -433.099002073428}, 'arpu': {'chance_to_beat': 0.476002, 'expected_loss': 3.0606801031872637, 'lift': -0.01708833814629207, 'diff': -0.4330999999999996}} \n", - "ratio 0.5 " - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
A1A2
users10001000
sales5147
paids5147
revenue25344.824911.7
conversion0.0510.047
ticket496.956530.035
arpu25.344824.9117
statistics{'conversion': {'chance_to_beat': 0.65963, 'expected_loss': 0.0021973301774269333, 'lift': 0.0851063829787233, 'diff': 0.003999999999999997}, 'revenue': {'chance_to_beat': 0.375954, 'expected_loss': 59.94820363403642, 'lift': 0.01738538880664109, 'diff': 433.099002073428}, 'arpu': {'chance_to_beat': 0.523998, 'expected_loss': 2.6673904657629213, 'lift': 0.017385426187806807, 'diff': 0.4330999999999996}}{'conversion': {'chance_to_beat': 0.34037, 'expected_loss': 0.006181498341976208, 'lift': -0.07843137254901955, 'diff': -0.003999999999999997}, 'revenue': {'chance_to_beat': 0.624046, 'expected_loss': 26.851069422616064, 'lift': -0.017088302031773472, 'diff': -433.099002073428}, 'arpu': {'chance_to_beat': 0.476002, 'expected_loss': 3.0606801031872637, 'lift': -0.01708833814629207, 'diff': -0.4330999999999996}}
ratio0.50.5
\n", - "
" - ] - }, - "metadata": {}, - "execution_count": 32 - } - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "## Simulations\n", - "\n", - "\n", - "\n", - "We now consider a comparison between a sales process with a conversion rate of\n", - "5% and 6% in the variant. The revenue/sale is the same ($\\%25$) in both variants.\n", - "I ran a set of 400 A/B simulations. The threshold of caring ε was chosen\n", - "to be 2% of the mean visitor value of $1.25, or $0.025. I.e., a $0.025 loss was\n", - "considered acceptable.\n", - "In the A/B simulations I ran, the Bayesian test finished with an average of\n", - "about 3,000 data points. Out of 400 simulations, 95.75% returned the correct\n", - "result. The 10’th percentile of test durations was 250 samples, the 50’th percentile was 1,500 samples and the 95’th sample was 4,750. See Figure 9 for an\n", - "illustration.\n", - "This approximately corresponds to the balanced mode in the tool" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 33, - "source": [ - "import numpy as np\n", - "\n", - "\n", - "size = 1500\n", - "\n", - "pA = 0.05\n", - "pB = 0.06\n", - "mu_revenue = 25.0\n", - "std_revenue = 5.6\n", - "epsilon = 0.025\n", - "\n", - "\n", - "def generate_df(size=1500):\n", - " a_lambda, a_revenue = get_data(pA, size, mu_revenue, std_revenue)\n", - " b_lambda, b_revenue = get_data(pB, size, mu_revenue, std_revenue)\n", - "\n", - " dfA = pd.DataFrame(data={\n", - " \"alternative\": ['A']*size,\n", - " \"revenue\": a_revenue, \n", - " \"sales\": a_lambda\n", - " })\n", - " dfB = pd.DataFrame(data={\n", - " \"alternative\": ['B']*size,\n", - " \"revenue\": b_revenue, \n", - " \"sales\": b_lambda\n", - " })\n", - "\n", - " df3 = pd.concat([dfA, dfB])\n", - " df3['sum_revenue'] = df3['revenue']\n", - " \n", - " return df3" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 34, - "source": [ - "df3_example = generate_df()\n", - "__df = df3_example.groupby(\"alternative\").agg({\"revenue\": \"mean\", \"sales\": [\"sum\", \"count\"]})\n", - "__df[\"conversion\"] = __df['sales']['sum'] / __df['sales']['count'] \n", - "\n", - "__df" - ], - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " revenue sales conversion\n", - " mean sum count \n", - "alternative \n", - "A 26.258238 80 1500 0.053333 \n", - "B 24.185375 79 1500 0.052667 " - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
revenuesalesconversion
meansumcount
alternative
A26.2582388015000.053333
B24.1853757915000.052667
\n", - "
" - ] - }, - "metadata": {}, - "execution_count": 34 - } - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 10, - "source": [ - "# !pip install joblib" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "### Changing conversion" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 35, - "source": [ - "from tqdm import tqdm\n", - "import multiprocessing\n", - "from joblib import Parallel, delayed\n", - "\n", - "N_SIMULATIONS = 100\n", - "\n", - "logging.basicConfig(level=logging.WARNING)\n", - "\n", - "num_cores = multiprocessing.cpu_count()\n", - "\n", - "list_of_dataframes = [generate_df() for _ in range(N_SIMULATIONS)]\n", - "inputs = tqdm(list_of_dataframes)\n", - "\n", - "def run_one_experiment(df):\n", - " df = df.copy()\n", - " experiment = Experiment(name=\"A/B Test\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A')\n", - " results = experiment.run_experiment(df_results_per_user=df)\n", - " \n", - " # return the winner if some loss that is lower than epsilon, if any\n", - " winners = []\n", - " for metric in ['conversion', 'revenue', 'arpu']:\n", - " lossA = results['A']['statistics'][metric]['expected_loss']\n", - " lossB = results['B']['statistics'][metric]['expected_loss']\n", - " if lossA < lossB and lossA < epsilon:\n", - " winners.append('A')\n", - " elif lossB < lossA and lossB < epsilon:\n", - " winners.append('B')\n", - " else:\n", - " winners.append(None)\n", - " \n", - " return winners\n", - " \n", - "processed_list = Parallel(n_jobs=num_cores)(delayed(run_one_experiment)(df) for df in inputs)" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [09:07<00:00, 5.48s/it]\n" - ] - } - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 36, - "source": [ - "processed_list[:10]" - ], - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[['B', None, None],\n", - " ['B', None, None],\n", - " ['B', None, None],\n", - " ['A', None, None],\n", - " ['B', None, None],\n", - " ['B', None, None],\n", - " ['A', None, None],\n", - " ['B', None, None],\n", - " ['A', None, None],\n", - " ['B', None, None]]" - ] - }, - "metadata": {}, - "execution_count": 36 - } - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 41, - "source": [ - "for i, metric in enumerate(['conversion', 'revenue', 'arpu']):\n", - " metric_list = [m[i] for m in processed_list]\n", - " print(f\"B {metric} winner: {100.0 * np.mean([m =='B' for m in metric_list])} %\")\n", - " print(f\"\\t simulations with no winner: {100.0 * np.mean([m == None for m in metric_list])}%\")" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "B conversion winner: 87.0 %\n", - "\t simulations with no winner: 0.0%\n", - "B revenue winner: 0.0 %\n", - "\t simulations with no winner: 97.0%\n", - "B arpu winner: 0.0 %\n", - "\t simulations with no winner: 100.0%\n" - ] - } - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "### Changing Revenue" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 44, - "source": [ - "from tqdm import tqdm\n", - "import multiprocessing\n", - "from joblib import Parallel, delayed\n", - "import numpy as np\n", - "\n", - "N_SIMULATIONS = 100\n", - "\n", - "size = 1500\n", - "pA = 0.05\n", - "pB = 0.05\n", - "mu_revenueA = 20.0\n", - "mu_revenueB = 30.0\n", - "epsilon = 0.025\n", - "\n", - "\n", - "num_cores = multiprocessing.cpu_count()\n", - "\n", - "\n", - "def get_data(p, size, mu):\n", - " lambdas = np.random.binomial(1, p, size)\n", - " revenues = np.random.exponential(mu, size)\n", - " \n", - " return (lambdas, revenues)\n", - "\n", - "\n", - "def generate_df(pA, pB, mu_revenueA, mu_revenueB, size):\n", - " a_lambda, a_revenue = get_data(pA, size, mu_revenueA)\n", - " b_lambda, b_revenue = get_data(pB, size, mu_revenueB)\n", - "\n", - " dfA = pd.DataFrame(data={\n", - " \"alternative\": ['A']*size,\n", - " \"revenue\": a_revenue, \n", - " \"sales\": a_lambda\n", - " })\n", - " dfB = pd.DataFrame(data={\n", - " \"alternative\": ['B']*size,\n", - " \"revenue\": b_revenue, \n", - " \"sales\": b_lambda\n", - " })\n", - "\n", - " df3 = pd.concat([dfA, dfB])\n", - " df3['sum_revenue'] = df3['revenue']\n", - " \n", - " return df3\n", - "\n", - "\n", - "list_of_dataframes = [generate_df(pA, pB, mu_revenueA, mu_revenueB, size) for _ in range(N_SIMULATIONS)]\n", - "inputs = tqdm(list_of_dataframes)\n", - "\n", - "\n", - "def run_one_experiment(df):\n", - " df = df.copy()\n", - " experiment = Experiment(name=\"A/B Test\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A')\n", - " results = experiment.run_experiment(df_results_per_user=df)\n", - " \n", - " # return the winner if some loss that is lower than epsilon, if any\n", - " winners = []\n", - " for metric in ['conversion', 'revenue', 'arpu']:\n", - " lossA = results['A']['statistics'][metric]['expected_loss']\n", - " lossB = results['B']['statistics'][metric]['expected_loss']\n", - " if lossA < lossB and lossA < epsilon:\n", - " winners.append('A')\n", - " elif lossB < lossA and lossB < epsilon:\n", - " winners.append('B')\n", - " else:\n", - " winners.append(None)\n", - " \n", - " return winners\n", - " \n", - "processed_list_2 = Parallel(n_jobs=num_cores)(delayed(run_one_experiment)(df) for df in inputs)" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [09:04<00:00, 5.44s/it]\n" - ] - } - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 45, - "source": [ - "for i, metric in enumerate(['conversion', 'revenue', 'arpu']):\n", - " metric_list = [m[i] for m in processed_list_2]\n", - " print(f\"{metric}: {100.0 * np.mean([m =='B' for m in metric_list])} %\")\n", - " print(f\"\\t simulations with no winner: {100.0 * np.mean([m == None for m in metric_list])}\")" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "conversion: 56.00000000000001 %\n", - "\t simulations with no winner: 0.0\n", - "revenue: 0.0 %\n", - "\t simulations with no winner: 100.0\n", - "arpu: 0.0 %\n", - "\t simulations with no winner: 100.0\n" - ] - } - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "### A/A Test" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 46, - "source": [ - "from tqdm import tqdm\n", - "import multiprocessing\n", - "from joblib import Parallel, delayed\n", - "import numpy as np\n", - "\n", - "N_SIMULATIONS = 100\n", - "\n", - "size = 1500\n", - "pA = 0.05\n", - "pB = 0.05\n", - "mu_revenueA = 20.0\n", - "mu_revenueB = 20.0\n", - "epsilon = 0.025\n", - "\n", - "num_cores = multiprocessing.cpu_count()\n", - "\n", - "\n", - "def get_data(p, size, mu):\n", - " lambdas = np.random.binomial(1, p, size)\n", - " revenues = np.random.exponential(mu, size)\n", - " \n", - " return (lambdas, revenues)\n", - "\n", - "\n", - "def generate_df(pA, pB, mu_revenueA, mu_revenueB, size):\n", - " a_lambda, a_revenue = get_data(pA, size, mu_revenueA)\n", - " b_lambda, b_revenue = get_data(pB, size, mu_revenueB)\n", - "\n", - " dfA = pd.DataFrame(data={\n", - " \"alternative\": ['A']*size,\n", - " \"revenue\": a_revenue, \n", - " \"sales\": a_lambda\n", - " })\n", - " dfB = pd.DataFrame(data={\n", - " \"alternative\": ['B']*size,\n", - " \"revenue\": b_revenue, \n", - " \"sales\": b_lambda\n", - " })\n", - "\n", - " df3 = pd.concat([dfA, dfB])\n", - " df3['sum_revenue'] = df3['revenue']\n", - " \n", - " return df3\n", - "\n", - "\n", - "list_of_dataframes = [generate_df(pA, pB, mu_revenueA, mu_revenueB, size) for _ in range(N_SIMULATIONS)]\n", - "inputs = tqdm(list_of_dataframes)\n", - "\n", - "\n", - "def run_one_experiment(df):\n", - " df = df.copy()\n", - " experiment = Experiment(name=\"A/B Test\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A')\n", - " results = experiment.run_experiment(df_results_per_user=df)\n", - " \n", - " # return the winner if some loss that is lower than epsilon, if any\n", - " winners = []\n", - " for metric in ['conversion', 'revenue', 'arpu']:\n", - " lossA = results['A']['statistics'][metric]['expected_loss']\n", - " lossB = results['B']['statistics'][metric]['expected_loss']\n", - " if lossA < lossB and lossA < epsilon:\n", - " winners.append('A')\n", - " elif lossB < lossA and lossB < epsilon:\n", - " winners.append('B')\n", - " else:\n", - " winners.append(None)\n", - " \n", - " return winners\n", - " \n", - "processed_list_3 = Parallel(n_jobs=num_cores)(delayed(run_one_experiment)(df) for df in inputs)" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [08:54<00:00, 5.34s/it]\n" - ] - } - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 47, - "source": [ - "for i, metric in enumerate(['conversion', 'revenue', 'arpu']):\n", - " metric_list = [m[i] for m in processed_list_3]\n", - " print(f\"{metric}: {100.0 * np.mean([m =='B' for m in metric_list])} %\")\n", - " print(f\"\\t simulations with no winner: {100.0 * np.mean([m == None for m in metric_list])}\")" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "conversion: 55.00000000000001 %\n", - "\t simulations with no winner: 0.0\n", - "revenue: 0.0 %\n", - "\t simulations with no winner: 100.0\n", - "arpu: 0.0 %\n", - "\t simulations with no winner: 100.0\n" - ] - } - ], - "metadata": {} - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python (venv-janus)", - "language": "python", - "name": "venv" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": { - "height": "177px", - "width": "290px" - }, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} \ No newline at end of file diff --git a/examples/dataset_summary.csv b/examples/dataset_summary.csv deleted file mode 100644 index 67bb623..0000000 --- a/examples/dataset_summary.csv +++ /dev/null @@ -1,7 +0,0 @@ -alternative,exposure_period,exposures,conversions,revenue,cost -A,'2022-01-01',100,10,100,90 -A,'2022-01-02',200,20,200,170 -A,'2022-01-03',100,12,120,100 -B,'2022-01-01',100,12,120,100 -B,'2022-01-02',110,15,180,160 -B,'2022-01-03',200,30,350,310 \ No newline at end of file diff --git a/examples/results_per_user.csv b/examples/results_per_user.csv deleted file mode 100644 index 7031a96..0000000 --- a/examples/results_per_user.csv +++ /dev/null @@ -1,11 +0,0 @@ -id,alternative,revenue,sales -1,A,0,0 -2,A,0,0 -3,A,0,0 -4,A,100,1 -5,A,200,1 -6,A,300,1 -7,B,0,0 -8,B,230,1 -9,B,170,1 -10,B,320,1 \ No newline at end of file diff --git a/examples/results_per_user_one_variant.csv b/examples/results_per_user_one_variant.csv deleted file mode 100644 index 8dfe7e1..0000000 --- a/examples/results_per_user_one_variant.csv +++ /dev/null @@ -1,7 +0,0 @@ -id,alternative,revenue,sum_revenue,sales -1,A,0,0,0 -2,A,0,0,0 -3,A,0,0,0 -4,A,100,100,1 -5,A,200,200,1 -6,A,300,300,1 diff --git a/experiment_example.ipynb b/experiment_example.ipynb deleted file mode 100644 index e6da946..0000000 --- a/experiment_example.ipynb +++ /dev/null @@ -1,770 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Imports (make sure to install janus)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install -e .." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip show janus" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install pyspark" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:38.828666Z", - "start_time": "2020-12-04T17:16:37.999997Z" - } - }, - "outputs": [], - "source": [ - "import janus\n", - "from janus.stats.experiment import Experiment, Variant\n", - "import pandas as pd\n", - "\n", - "import logging\n", - "logging.basicConfig(level=logging.INFO)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2020-11-27T17:29:01.054901Z", - "start_time": "2020-11-27T17:29:01.049977Z" - } - }, - "source": [ - "## Load Test Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:38.970497Z", - "start_time": "2020-12-04T17:16:38.949037Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idalternativerevenuesum_revenuesales
01A000
12A000
23A000
34A1001001
45A2002001
56A3003001
67B000
78B2302301
89B1701701
910B3203201
\n", - "
" - ], - "text/plain": [ - " id alternative revenue sum_revenue sales\n", - "0 1 A 0 0 0\n", - "1 2 A 0 0 0\n", - "2 3 A 0 0 0\n", - "3 4 A 100 100 1\n", - "4 5 A 200 200 1\n", - "5 6 A 300 300 1\n", - "6 7 B 0 0 0\n", - "7 8 B 230 230 1\n", - "8 9 B 170 170 1\n", - "9 10 B 320 320 1" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.read_csv(\"examples/results_per_user.csv\")\n", - "df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2020-11-27T23:08:00.040508Z", - "start_time": "2020-11-27T23:08:00.031558Z" - } - }, - "source": [ - "We see that, for the data we have, alternative B looks better: 3 sales out of 4 participants, with average ticket of (230+170+320)/3 = 240 and average ARPU of 180. All metrics are higher that A's." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Initialize experiment" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:38.974932Z", - "start_time": "2020-12-04T17:16:38.972390Z" - } - }, - "outputs": [], - "source": [ - "experiment = Experiment(name=\"my_experiment\",\n", - " keymetrics=['conversion', 'revenue', 'arpu'],\n", - " baseline_variant_name='A')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We run the experiment over our data to get results" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:51.994576Z", - "start_time": "2020-12-04T17:16:38.976479Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:root:INITIALIZING experiment 'my_experiment' evaluation...\n", - "INFO:root:consolidating metrics over variants...\n", - "INFO:root:calculating bayesian statistics over variants for metric conversion\n", - "INFO:root:sampling data for variant A...\n", - "INFO:root:sampling data for variant B...\n", - "INFO:root:calculating probability to beat and expected loss for variant A...\n", - "INFO:root:calculating probability to beat and expected loss for variant B...\n", - "INFO:root:calculating bayesian statistics over variants for metric revenue\n", - "INFO:root:sampling data for variant A...\n", - "INFO:root:sampling data for variant B...\n", - "INFO:root:calculating probability to beat and expected loss for variant A...\n", - "INFO:root:calculating probability to beat and expected loss for variant B...\n", - "INFO:root:calculating bayesian statistics over variants for metric arpu\n", - "INFO:root:sampling data for variant A...\n", - "INFO:root:sampling data for variant B...\n", - "INFO:root:calculating probability to beat and expected loss for variant A...\n", - "INFO:root:calculating probability to beat and expected loss for variant B...\n", - "INFO:root:consolidating final results...\n", - "INFO:root:consolidating final results for variant A...\n", - "INFO:root:consolidating final results for variant B...\n", - "INFO:root:FINISHED experiment evaluation.\n" - ] - }, - { - "data": { - "text/plain": [ - "{'A': {'users': 6,\n", - " 'sales': 3,\n", - " 'paids': 3,\n", - " 'revenue': 600,\n", - " 'conversion': 0.5,\n", - " 'ticket': 200.0,\n", - " 'arpu': 100.0,\n", - " 'statistics': {'conversion': {'chance_to_beat': 0.246727,\n", - " 'expected_loss': 0.20485381083632845,\n", - " 'lift': -0.33333333333333337,\n", - " 'diff': -0.25},\n", - " 'revenue': {'chance_to_beat': 0.401356,\n", - " 'expected_loss': 90.26219646796406,\n", - " 'lift': -0.16666666666666663,\n", - " 'diff': -120},\n", - " 'arpu': {'chance_to_beat': 0.289212,\n", - " 'expected_loss': 81.58532656393245,\n", - " 'lift': -0.4444444444444444,\n", - " 'diff': -80.0}},\n", - " 'ratio': 0.6},\n", - " 'B': {'users': 4,\n", - " 'sales': 3,\n", - " 'paids': 3,\n", - " 'revenue': 720,\n", - " 'conversion': 0.75,\n", - " 'ticket': 240.0,\n", - " 'arpu': 180.0,\n", - " 'statistics': {'conversion': {'chance_to_beat': 0.753273,\n", - " 'expected_loss': 0.03789908703261173,\n", - " 'lift': 0.5,\n", - " 'diff': 0.25},\n", - " 'revenue': {'chance_to_beat': 0.598644,\n", - " 'expected_loss': 50.334123257073756,\n", - " 'lift': 0.19999999999999996,\n", - " 'diff': 120},\n", - " 'arpu': {'chance_to_beat': 0.710788,\n", - " 'expected_loss': 21.61296874689926,\n", - " 'lift': 0.8,\n", - " 'diff': 80.0}},\n", - " 'ratio': 0.4}}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "experiment.run_experiment(df_results_per_user=df)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:52.010513Z", - "start_time": "2020-12-04T17:16:51.996566Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
conversionrevenuearpu
chance_to_beat0.2467270.4013560.289212
expected_loss0.20485490.26219681.585327
lift-0.333333-0.166667-0.444444
diff-0.250000-120.000000-80.000000
\n", - "
" - ], - "text/plain": [ - " conversion revenue arpu\n", - "chance_to_beat 0.246727 0.401356 0.289212\n", - "expected_loss 0.204854 90.262196 81.585327\n", - "lift -0.333333 -0.166667 -0.444444\n", - "diff -0.250000 -120.000000 -80.000000" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame.from_dict(experiment.results['A']['statistics'])" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2020-12-04T17:16:52.031314Z", - "start_time": "2020-12-04T17:16:52.012627Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AB
users64
sales33
paids33
revenue600720
conversion0.50.75
ticket200.0240.0
arpu100.0180.0
statistics{'conversion': {'chance_to_beat': 0.246727, 'e...{'conversion': {'chance_to_beat': 0.753273, 'e...
ratio0.60.4
\n", - "
" - ], - "text/plain": [ - " A \\\n", - "users 6 \n", - "sales 3 \n", - "paids 3 \n", - "revenue 600 \n", - "conversion 0.5 \n", - "ticket 200.0 \n", - "arpu 100.0 \n", - "statistics {'conversion': {'chance_to_beat': 0.246727, 'e... \n", - "ratio 0.6 \n", - "\n", - " B \n", - "users 4 \n", - "sales 3 \n", - "paids 3 \n", - "revenue 720 \n", - "conversion 0.75 \n", - "ticket 240.0 \n", - "arpu 180.0 \n", - "statistics {'conversion': {'chance_to_beat': 0.753273, 'e... \n", - "ratio 0.4 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame.from_dict(experiment.results)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
userssalespaidsrevenueconversionticketarpuratio
A6336000.5200.0100.00.6
B4337200.75240.0180.00.4
\n", - "
" - ], - "text/plain": [ - " users sales paids revenue conversion ticket arpu ratio\n", - "A 6 3 3 600 0.5 200.0 100.0 0.6\n", - "B 4 3 3 720 0.75 240.0 180.0 0.4" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "_df = pd.DataFrame.from_dict(experiment.results).drop('statistics')\n", - "\n", - "_df.T" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
conversionrevenuearpu
chance_to_beat0.2467270.4013560.289212
expected_loss0.20485490.26219681.585327
lift-0.333333-0.166667-0.444444
diff-0.250000-120.000000-80.000000
\n", - "
" - ], - "text/plain": [ - " conversion revenue arpu\n", - "chance_to_beat 0.246727 0.401356 0.289212\n", - "expected_loss 0.204854 90.262196 81.585327\n", - "lift -0.333333 -0.166667 -0.444444\n", - "diff -0.250000 -120.000000 -80.000000" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "statsA = pd.DataFrame.from_dict(experiment.results['A']['statistics'])\n", - "statsB = pd.DataFrame.from_dict(experiment.results['B']['statistics'])\n", - "\n", - "statsA" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## References to compare\n", - "\n", - "https://vidogreg.shinyapps.io/bayes-arpu-test/\n", - "\n", - "https://abtestguide.com/bayesian/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/janus/__init__.py b/janus/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/janus/stats/__init__.py b/janus/stats/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/janus/stats/constants.py b/janus/stats/constants.py deleted file mode 100644 index 9313640..0000000 --- a/janus/stats/constants.py +++ /dev/null @@ -1,3 +0,0 @@ -MAX_ROUND_DIGITS = 5 -SAMPLE_SIZE = 1000000 -BOOSTRAP_SAMPLES = 10000 diff --git a/janus/stats/experiment.py b/janus/stats/experiment.py deleted file mode 100644 index 3f44e8d..0000000 --- a/janus/stats/experiment.py +++ /dev/null @@ -1,247 +0,0 @@ -# Databricks notebook source -# COMMAND ---------- - -from typing import Any, Dict, Iterator, List, Optional, Text -from janus.stats.constants import MAX_ROUND_DIGITS, SAMPLE_SIZE, BOOSTRAP_SAMPLES -from janus.stats.metrics import ( - Distribution, - ConversionDistribution, - RevenueDistribution, - ARPUDistribution, -) -from janus.stats.pyspark_bootstraping import ( - get_bootstraped_mean, - get_parallel_bootstrap, -) -import numpy as np -import pandas as pd -import logging - - -class Variant: - """ - Store variant relevant information and methods to consolidate data. - Variant is synonym of alternative. - """ - - def __init__(self, name: str): - self.name = name - self.distributions = {} - self.statistics = {} - - def consolidate_results(self, df: pd.DataFrame): - """ - Store resume metrics for later statistic calculations over a input dataframe that contains the - situation of each impression made on users - """ - - variant_df = df.query(f"alternative == '{self.name}'") - - self.variant_df = variant_df - self.users = variant_df.shape[0] - self.sales = variant_df["sales"].sum() - self.paids = variant_df["sales"].sum() - self.revenue = variant_df["revenue"].sum() - self.conversion = ( - round(self.paids / self.users, MAX_ROUND_DIGITS) if self.users > 0 else 0 - ) - self.ticket = ( - round(self.revenue / self.paids, MAX_ROUND_DIGITS) if self.paids > 0 else 0 - ) - self.arpu = ( - round(self.revenue / self.users, MAX_ROUND_DIGITS) if self.users > 0 else 0 - ) - - def calculate_conversion(self): - # get prior distribution for conversion and update it with results - conversion_distribution = ConversionDistribution() - not_paids = self.users - self.paids - conversion_distribution.update(self.paids, not_paids) - - # sample values for conversion from the posterior distribution - self.conversion_sampling = conversion_distribution.sample(SAMPLE_SIZE) - self.distributions["conversion"] = conversion_distribution - self.statistics["conversion"] = {} - - def calculate_revenue(self): - # get prior distribution for revenue and update it with results - revenue_distribution = RevenueDistribution() - revenue_distribution.update(self.paids, self.revenue) - - # sample values for revenue from the posterior distribution - self.revenue_sampling = revenue_distribution.sample(SAMPLE_SIZE) - self.distributions["revenue"] = revenue_distribution - self.statistics["revenue"] = {} - - def calculate_arpu(self): - # get prior distribution for ARPU and update it with results - arpu_distribution = ARPUDistribution() - - # sample values for ARPU from the posterior distribution (not necessary since its been calculated already) - self.arpu_sampling = self.conversion_sampling / self.revenue_sampling - self.distributions["arpu"] = arpu_distribution - self.statistics["arpu"] = {} - - def calculate_probabilities(self, other_variant, metric_name: str): - distribution = self.distributions[metric_name] - self_metric_samples = getattr(self, metric_name + "_sampling") - other_variant_metric_samples = getattr(other_variant, metric_name + "_sampling") - - if metric_name == "revenue": - # for revenue, we shall compare 1/theta's - self_metric_samples = 1 / self_metric_samples - other_variant_metric_samples = 1 / other_variant_metric_samples - - # calculate chance to beat the other variant - self.statistics[metric_name]["chance_to_beat"] = distribution.chance_to_beat( - self_metric_samples, other_variant_metric_samples, SAMPLE_SIZE - ) - - # calculate expected loss for the variant compared to the other - self.statistics[metric_name]["expected_loss"] = distribution.expected_loss( - self_metric_samples, other_variant_metric_samples, SAMPLE_SIZE - ) - - def calculate_bootstrap( - self, metric_name: str, eval_function=get_bootstraped_mean, spark_session=None - ): - if metric_name == "revenue": - data = list(self.variant_df.query("sales == 1")["revenue"]) - elif metric_name == "conversion": - data = list(self.variant_df["sales"]) - elif metric_name == "arpu": - data = list(self.variant_df["revenue"]) - - self.statistics[metric_name]["bootstrapped_mean"] = get_parallel_bootstrap( - eval_function, data, BOOSTRAP_SAMPLES, spark_session - ) - - -class Experiment: - """ - Class to handle an experiment containing two variants. - """ - - def __init__( - self, - name: str, - keymetrics: List[str], - baseline_variant_name: str = "baseline", - do_boostrap=False, - spark_session=None, - ): - self.name = name - self.keymetrics = keymetrics - self.baseline_variant_name = baseline_variant_name - self.do_boostrap = do_boostrap - if do_boostrap and not spark_session: - raise Exception( - "A spark session is necessary to run bootstrap calculations." - ) - self.spark_session = spark_session - self.results = {} - - def run_experiment(self, df_results_per_user: pd.DataFrame): - logging.info(f"INITIALIZING experiment '{self.name}' evaluation...") - variant_names = list(df_results_per_user.alternative.unique()) - assert ( - len(variant_names) == 2 - ), "experiment does not have exactly two variants as expected." - assert ( - self.baseline_variant_name in variant_names - ), "baseline variant name informed is not present on experiment data." - treatment_variant_name = [ - c for c in variant_names if c != self.baseline_variant_name - ][0] - - # Build variants and consolidate user data over them - logging.info("consolidating metrics over variants...") - variantA = Variant(name=self.baseline_variant_name) - variantB = Variant(name=treatment_variant_name) - self.variants = [variantA, variantB] - - variantA.consolidate_results(df_results_per_user) - variantB.consolidate_results(df_results_per_user) - - # Extract Bayesian Statistics for each metric and boostrap if necessary - for metric_name in self.keymetrics: - logging.info( - f"calculating bayesian statistics over variants for metric {metric_name}" - ) - self.evaluate_statistics(variantA, variantB, metric_name) - - logging.info("consolidating final results...") - self.consolidate_results(variantA, variantB) - - logging.info("FINISHED experiment evaluation.") - - return self.results - - def evaluate_statistics( - self, variantA: Variant, variantB: Variant, metric_name: str - ): - for variant in [variantA, variantB]: - logging.info(f"sampling data for variant {variant.name}...") - if metric_name == "conversion": - variant.calculate_conversion() - if metric_name == "revenue": - variant.calculate_revenue() - if metric_name == "arpu": - variant.calculate_arpu() - - if self.do_boostrap: - variant.calculate_boostrap(metric_name) - - for variant1, variant2 in zip((variantA, variantB), (variantB, variantA)): - logging.info( - f"calculating probability to beat and expected loss for variant {variant1.name}..." - ) - variant1.calculate_probabilities(variant2, metric_name) - - def consolidate_results(self, variantA: Variant, variantB: Variant): - for variant_interest, variant_other in zip( - (variantA, variantB), (variantB, variantA) - ): - logging.info( - f"consolidating final results for variant {variant_interest.name}..." - ) - results = {} - metrics_list = [ - "users", - "sales", - "paids", - "revenue", - "conversion", - "ticket", - "arpu", - "statistics", - ] - for metric in metrics_list: - results[metric] = getattr(variant_interest, metric) - - results["ratio"] = variant_interest.users / ( - variant_interest.users + variant_other.users - ) - for metric in self.keymetrics: - print("metric: ", metric) - if metric == "revenue": - # for variances, revenue modelling is the ticket - # this is a confusion to be fixed in the future - _metric = "ticket" - else: - _metric = metric - interest_value = getattr(variant_interest, _metric) - other_value = getattr(variant_other, _metric) - results["statistics"][metric]["lift"] = get_lift( - interest_value, other_value - ) - results["statistics"][metric]["diff"] = interest_value - other_value - - self.results[variant_interest.name] = results - - -def get_lift(a: float, b: float) -> float: - if a > 0: - return a / b - 1 - else: - return 0.0 diff --git a/janus/stats/metrics.py b/janus/stats/metrics.py deleted file mode 100644 index 37fbff8..0000000 --- a/janus/stats/metrics.py +++ /dev/null @@ -1,101 +0,0 @@ -from typing import Any, Dict, Iterator, List, Optional, Text -import numpy as np -from scipy.stats import beta - - -class Distribution: - def sample(self) -> np.ndarray: - raise NotImplementedError( - "An Distribution must implement a sampler for its own distribution" - ) - - def update(self): - raise NotImplementedError( - "An Distribution must implement a update method for posterior calculations" - ) - - def chance_to_beat( - self, - sample_metrics_for_variant_of_interest: np.ndarray, - sample_metrics_for_other_variant: np.ndarray, - sample_size: int, - ) -> float: - """ - In this method, it is assumed that one wants to know the probability that the metric on - variant of interest (conversion, revenue, ARPU etc) is higher than other variant's metric, so pay attention to the order of arguments. - This is the Monte Carlo implementation for Definition 6.1 on VWO paper. - """ - return ( - sum( - sample_metrics_for_variant_of_interest - > sample_metrics_for_other_variant - ) - / sample_size - ) - - def expected_loss( - self, - sample_metrics_for_variant_of_interest: np.ndarray, - sample_metrics_for_other_variant: np.ndarray, - sample_size: int, - ) -> float: - """ - In this method, it is assumed that one wants to know the expected loss for the metric on - variant of interest (conversion, revenue, ARPU etc) compared to the other variant's metric, so pay attention to the order of arguments. - This is the Monte Carlo implementation for Definition 6.1 on VWO paper. - """ - - diff = sample_metrics_for_other_variant - sample_metrics_for_variant_of_interest - - return sum(diff * (diff > 0)) / sample_size - - -class ConversionDistribution(Distribution): - """ - Class to handle Converstion Rate (Lambda) posterior calculations and sampling. - """ - - def __init__(self, a=1, b=1): - self.a = a - self.b = b - - def sample(self, sample_size: int) -> np.ndarray: - return np.random.beta(a=self.a, b=self.b, size=sample_size) - - def update(self, paids: int, not_paids: int): - self.a += paids - self.b += not_paids - - -class RevenueDistribution(Distribution): - """ - Class to handle Revenue (theta) posterior calculations and sampling. - """ - - def __init__(self, k=1, theta=1): - self.k = k - self.theta = theta - - def sample(self, sample_size: int) -> np.ndarray: - return np.random.gamma(shape=self.k, scale=self.theta, size=sample_size) - - def update(self, paids: int, revenue: float): - self.k += paids - self.theta = 1 / (1 + revenue) - - -class ARPUDistribution(Distribution): - """ - Class to handle ARPU (theta) posterior calculations and sampling. - We do not need any implementation, because the calculation uses - both Conversion and Revenue's Distributions. - """ - - def __init__(self): - pass - - def sample(self, sample_size: int) -> np.ndarray: - pass - - def update(self, paids: int, revenue: float): - pass diff --git a/janus/stats/pyspark_bootstraping.py b/janus/stats/pyspark_bootstraping.py deleted file mode 100644 index 4ed81f0..0000000 --- a/janus/stats/pyspark_bootstraping.py +++ /dev/null @@ -1,38 +0,0 @@ -# Databricks notebook source - -# código de exemplo -# https://medium.com/udemy-engineering/bootstrapping-with-spark-f7ac338702d6 - -# Porque o spark não pode ser usado para paralelizar completamente o processo -# https://medium.com/udemy-engineering/pyspark-under-the-hood-randomsplit-and-sample-inconsistencies-examined-7c6ec62644bc - -# Alternativa para acelerar: construir a função de média -# do sample em scala e chamar dentro do Python -# https://aseigneurin.github.io/2016/09/01/spark-calling-scala-code-from-pyspark.html -# https://community.cloudera.com/t5/Support-Questions/Is-it-possible-to-call-a-scala-function-in-python-pyspark/td-p/174835 - -from typing import List, Union -import random as rd -from pyspark.sql import SparkSession, DataFrame as SparkDataFrame - - -def get_bootstraped_mean(data: List[Union[float, int]]) -> float: - n_samples = len(data) - samples = [data[rd.randint(0, n_samples - 1)] for _ in range(0, n_samples)] - return sum(samples) / n_samples - - -def get_parallel_bootstrap( - function, - data: List[Union[float, int]], - num_samples: int, - spark_session: SparkSession, -) -> SparkDataFrame: - rdd = spark_session.sparkContext.parallelize(list(range(1, num_samples + 1))) - df = ( - rdd.map(lambda x: (x, function(data))) - .toDF() - .withColumnRenamed("_1", "sample") - .withColumnRenamed("_2", "sample_metric") - ) - return df diff --git a/janus/utils/__init__.py b/janus/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/janus/utils/make_dataframe_multivariate.py b/janus/utils/make_dataframe_multivariate.py deleted file mode 100644 index d87927c..0000000 --- a/janus/utils/make_dataframe_multivariate.py +++ /dev/null @@ -1,112 +0,0 @@ -from typing import List, Union -import numpy as np -import pandas as pd - - -def create_per_user_dataframe_multivariate( - df_summary_daily: pd.DataFrame, - conversion_value_cols: List[str], -): - """ - Produces a per-user dataframe using data from summary of an experiment on - some period basis like daily. The best case is to extract this data from - your sources, but this approximation will work in the absence of this data - and with much more insights. - - The arguments are mostly referenced in list of values per variant. It is - designed to be used in a for-loop for each period summary. - - Args: - """ - # check columns - ALLOWED_COLS = [ - "alternative", - "exposure_period", - "exposures", - "conversions", - ] + conversion_value_cols - - assert set(ALLOWED_COLS).issubset( - set(df_summary_daily.columns) - ), f"Use allowed cols plus 'conversion_value_cols': {ALLOWED_COLS}. Your cols are: {list(df_summary_daily.columns)}" - - df = pd.DataFrame( - columns=[ - "alternative", - "user_id", - "exposure_period", - "converted", - ] - + conversion_value_cols - ) - - # fix datetimes - df_summary_daily["exposure_period"] = pd.to_datetime( - df_summary_daily["exposure_period"] - ) - - # basic variables - variants = list(df_summary_daily.alternative.unique()) - periods = list(df_summary_daily.exposure_period.unique()) - all_users = [] - - # loop per period - for period in periods: - for variant in variants: - _df = df_summary_daily[ - (df_summary_daily.exposure_period == period) - & (df_summary_daily.alternative == variant) - ] - exposures = _df.exposures.values[0] - conversions = _df.conversions.values[0] - not_converted = exposures - conversions - - alternative_converted = np.repeat(variant, conversions) - alternative_not_converted = np.repeat(variant, not_converted) - did_user_converted = np.repeat(1, conversions) - did_users_not_converted = np.repeat(0, not_converted) - - start_id = all_users[-1] + 1 if all_users else 1 - user_id = np.linspace(start_id, start_id + exposures - 1, exposures) - user_id = [int(i) for i in user_id] - user_id_converted = user_id[:conversions] - user_id_not_converted = user_id[conversions:] - all_users.extend(user_id_converted) - all_users.extend(user_id_not_converted) - - # simulates that every user converted the same value - conversion_values_per_user_converted = {} - conversion_values_per_user_not_converted = {} - for conversion_col in conversion_value_cols: - conversion_values_per_user_converted[conversion_col] = np.repeat( - _df[conversion_col].values[0] / conversions, conversions - ) - conversion_values_per_user_not_converted[conversion_col] = np.repeat( - 0, not_converted - ) - - # Generate dataframes - data_converted = { - "alternative": alternative_converted, - "user_id": user_id_converted, - "exposure_period": [period] * len(user_id_converted), - "converted": did_user_converted, - } - data_converted.update(conversion_values_per_user_converted) - # return data_converted - df_converted = pd.DataFrame(data=data_converted) - - data_not_converted = { - "alternative": alternative_not_converted, - "user_id": user_id_not_converted, - "exposure_period": [period] * len(user_id_not_converted), - "converted": did_users_not_converted, - } - data_not_converted.update(conversion_values_per_user_not_converted) - df_not_converted = pd.DataFrame(data=data_not_converted) - - # Gather - df_period = pd.concat([df_converted, df_not_converted], axis=0) - df = pd.concat([df, df_period], axis=0) - - return df diff --git a/janus/utils/make_per_user_dataset_from_summary.ipynb b/janus/utils/make_per_user_dataset_from_summary.ipynb deleted file mode 100644 index 3477a84..0000000 --- a/janus/utils/make_per_user_dataset_from_summary.ipynb +++ /dev/null @@ -1,30 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.13 64-bit ('3.9.13')", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.9.13" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "8b5b582196851f055280fec937521dd2c1ff8a8374656d3cc566739664afc497" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/janus/utils/make_test_dataframe.py b/janus/utils/make_test_dataframe.py deleted file mode 100644 index 2327d3f..0000000 --- a/janus/utils/make_test_dataframe.py +++ /dev/null @@ -1,61 +0,0 @@ -import numpy as np -import pandas as pd -import math - - -def createTestDataFrame( - number_users: int, - ratio_baseline: float, # entre 0 e 1 - conversion_baseline: float, # entre 0 e 1 - conversion_alternative: float, # entre 0 e 1 - average_ticket_baseline: float, - average_ticket_alternative: float, -): - # testes antes de começar função: - # TODO: 1. nenhum parâmetro nulo - # TODO: 2. todos os tipos estão certos - # TODO: 3. parâmetros percentuais estão entre 0 e 1 - - # parâmetros da distribuição gama: - # k (shape) = 1 + pagos - # Θ (scale) = 1 / (1 + revenue total) - - # TODO: create for loop for more alternatives - - # Parâmetros da distribuição gamma do baseline - users_baseline = round(ratio_baseline * number_users) - paids_baseline = math.ceil(users_baseline * conversion_baseline) - revenue_total_baseline = paids_baseline * average_ticket_baseline - shape_baseline = 1 + paids_baseline - scale_baseline = 1 / (1 + revenue_total_baseline) - ## TODO: check on paper if payment estimation are ok - # here is the gamma inverse - payments_baseline = np.random.gamma( - shape_baseline, scale_baseline, size=paids_baseline - ) - payments_baseline = np.array([round(1 / xi, 2) for xi in payments_baseline]) - zeros_baseline = np.repeat(0, users_baseline - paids_baseline) - result_baseline = np.concatenate((payments_baseline, zeros_baseline)) - baseline = pd.DataFrame({"revenue": result_baseline, "alternative": "baseline"}) - - # Parâmetros da distribuição gamma da alternative - users_alternative = number_users - users_baseline - paids_alternative = math.ceil(users_alternative * conversion_alternative) - revenue_total_alternative = paids_alternative * average_ticket_alternative - shape_alternative = 1 + paids_alternative - scale_alternative = 1 / (1 + revenue_total_alternative) - payments_alternative = np.random.gamma( - shape_alternative, scale_alternative, size=paids_alternative - ) - payments_alternative = np.array([round(1 / xi, 2) for xi in payments_alternative]) - zeros_alternative = np.repeat(0, users_alternative - paids_alternative) - result_alternative = np.concatenate((payments_alternative, zeros_alternative)) - alternative = pd.DataFrame({"revenue": result_alternative, "alternative": "test"}) - - # Dataframe final - df = pd.concat([baseline, alternative], ignore_index=True) - ## TODO: check why this sampling is not working well - df = df.sample(frac=1) - df = df.reset_index().rename(columns={"index": "user_id"}).sort_values("user_id") - - return df diff --git a/logo.png b/logo.png deleted file mode 100644 index 7cddc34..0000000 Binary files a/logo.png and /dev/null differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..bd4c31c --- /dev/null +++ b/main.py @@ -0,0 +1,354 @@ +from fastapi import FastAPI, Request, Form, HTTPException +from fastapi.responses import HTMLResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates +from pydantic import BaseModel +from typing import List, Optional +import numpy as np +import pandas as pd +from bayesian_testing.experiments import ( + BinaryDataTest, + DeltaLognormalDataTest, + ExponentialDataTest, +) +from dataclasses import dataclass +import json +import os +import logging +import traceback +from datetime import datetime + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[logging.FileHandler("app.log"), logging.StreamHandler()], +) +logger = logging.getLogger(__name__) + +# Create directories if they don't exist +os.makedirs("static", exist_ok=True) +os.makedirs("templates", exist_ok=True) + +app = FastAPI(title="Janus: Bayesian A/B Testing App") + +# Mount static files directory +app.mount("/static", StaticFiles(directory="static"), name="static") + +# Set up Jinja2 templates +templates = Jinja2Templates(directory="templates") + + +@dataclass +class Variant: + name: str + impressions: int + conversions: int + revenue: float + + +class WebsiteExperiment: + """ + Class to run website experiments from aggregated data. + Focused in conversion, revenue and ARPU metrics. + """ + + def __init__(self, variants: List[Variant], baseline_variant: str): + self.variants: List[Variant] = variants + self.variants_results = [] + self.baseline_variant: str = baseline_variant + + def run_conversion_experiment(self, sim_count: int = 100_000, show=False): + self.conversion_test: BinaryDataTest = BinaryDataTest() + for v in self.variants: + self.conversion_test.add_variant_data_agg( + v.name, totals=v.impressions, positives=v.conversions + ) + + self.conversion_results = self.conversion_test.evaluate() + if show: + print( + pd.DataFrame(self.conversion_results).to_markdown( + tablefmt="grid", index=False + ) + ) + + def run_arpu_experiment(self, sim_count: int = 100_000, show=False): + self.arpu_test: DeltaLognormalDataTest = DeltaLognormalDataTest() + for v in self.variants: + rev_logs = [np.log(v.revenue / v.conversions)] * v.conversions + self.arpu_test.add_variant_data_agg( + v.name, + totals=v.impressions, + positives=v.conversions, + sum_values=v.revenue, + sum_logs=sum(rev_logs), + sum_logs_2=sum([np.square(l) for l in rev_logs]), + ) + + self.arpu_results = self.arpu_test.evaluate() + if show: + print( + pd.DataFrame(self.arpu_results).to_markdown( + tablefmt="grid", index=False + ) + ) + + def run_revenue_per_sale_experiment(self, sim_count: int = 100_000, show=False): + self.revenue_per_sale_test: ExponentialDataTest = ExponentialDataTest() + for v in self.variants: + if v.conversions > 0: + # For revenue per sale, we use the average revenue per conversion + # as the scale parameter for the exponential distribution + avg_revenue_per_sale = v.revenue / v.conversions + self.revenue_per_sale_test.add_variant_data_agg( + v.name, totals=v.conversions, sum_values=v.revenue + ) + else: + # Handle the case where there are no conversions + self.revenue_per_sale_test.add_variant_data_agg( + v.name, totals=0, sum_values=0 + ) + + # Higher revenue per sale is better, so min_is_best=False + self.revenue_per_sale_results = self.revenue_per_sale_test.evaluate( + sim_count=sim_count + ) + if show: + print( + pd.DataFrame(self.revenue_per_sale_results).to_markdown( + tablefmt="grid", index=False + ) + ) + + def run(self, **kargs): + self.run_conversion_experiment(**kargs) + self.run_arpu_experiment(**kargs) + self.run_revenue_per_sale_experiment(**kargs) + + def compile_full_data( + self, + show: bool = False, + revenue_precision: int = 4, + conversion_precision: int = 4, + probs_precision: int = 4, + ): + compiled_res = [] + for v, conv_res, arpu_res, rev_per_sale_res in zip( + self.variants, + self.conversion_results, + self.arpu_results, + self.revenue_per_sale_results, + ): + res = {} + # header info + res.update({"variant": v.name}) + res.update( + { + "summary": { + "impressions": int(v.impressions), + "conversions": int(v.conversions), + "revenue": round(v.revenue, revenue_precision), + "conversion": round(v.conversions / v.impressions, 4), + "avg_ticket": ( + round(v.revenue / v.conversions, 4) + if v.conversions > 0 + else 0 + ), + "arpu": round(v.revenue / v.impressions, 4), + } + } + ) + # conversion results + res.update( + { + "conversion": { + "expected_loss": round(conv_res["expected_loss"], 4), + "prob_being_best": round( + conv_res["prob_being_best"], probs_precision + ), + } + } + ) + # arpu results + res.update( + { + "arpu": { + "expected_loss": round(arpu_res["expected_loss"], 4), + "prob_being_best": round( + arpu_res["prob_being_best"], probs_precision + ), + } + } + ) + # revenue per sale results + res.update( + { + "revenue_per_sale": { + "expected_loss": round(rev_per_sale_res["expected_loss"], 4), + "prob_being_best": round( + rev_per_sale_res["prob_being_best"], probs_precision + ), + } + } + ) + compiled_res.append(res) + + self.compiled_res = compiled_res + return compiled_res + + def get_reports(self, probs_precision: int = 4): + self.compile_full_data() + + summaries = [] + conv_stats = [] + arpu_stats = [] + rev_per_sale_stats = [] + baseline_res = [ + res + for res in self.compiled_res + if res.get("variant") == self.baseline_variant + ][0] + for variant in self.compiled_res: + summary = {"variant": variant.get("variant")} + summary.update(variant.get("summary")) + summaries.append(summary) + + conv = {"variant": variant.get("variant")} + conv.update(variant.get("conversion")) + conv.update( + { + "lift": round( + summary["conversion"] + / baseline_res.get("summary").get("conversion") + - 1, + probs_precision, + ) + } + ) + conv_stats.append(conv) + + arpu = {"variant": variant.get("variant")} + arpu.update(variant.get("arpu")) + arpu.update( + { + "lift": round( + summary["arpu"] / baseline_res.get("summary").get("arpu") - 1, + probs_precision, + ) + } + ) + arpu_stats.append(arpu) + + rev_per_sale = {"variant": variant.get("variant")} + rev_per_sale.update(variant.get("revenue_per_sale")) + baseline_avg_ticket = baseline_res.get("summary").get("avg_ticket") + variant_avg_ticket = summary["avg_ticket"] + # Handle division by zero + if baseline_avg_ticket > 0 and variant_avg_ticket > 0: + rev_per_sale.update( + { + "lift": round( + variant_avg_ticket / baseline_avg_ticket - 1, + probs_precision, + ) + } + ) + else: + rev_per_sale.update({"lift": 0}) + rev_per_sale_stats.append(rev_per_sale) + + _df_summary = pd.DataFrame(summaries) + _df_conv = pd.DataFrame(conv_stats) + _df_arpu = pd.DataFrame(arpu_stats) + _df_rev_per_sale = pd.DataFrame(rev_per_sale_stats) + + return _df_summary, _df_conv, _df_arpu, _df_rev_per_sale + + +# Pydantic models for API +class VariantInput(BaseModel): + name: str + impressions: int + conversions: int + revenue: float + + +class ExperimentInput(BaseModel): + variants: List[VariantInput] + baseline_variant: str + + +class ExperimentResult(BaseModel): + summary: dict + conversion_stats: dict + arpu_stats: dict + + +@app.get("/", response_class=HTMLResponse) +async def home(request: Request): + return templates.TemplateResponse("index.html", {"request": request}) + + +@app.post("/api/analyze") +async def analyze_experiment(experiment_input: ExperimentInput): + logger.info( + f"Received experiment analysis request with {len(experiment_input.variants)} variants" + ) + try: + # Log input data summary + logger.info(f"Baseline variant: {experiment_input.baseline_variant}") + for v in experiment_input.variants: + logger.info( + f"Variant {v.name}: impressions={v.impressions}, conversions={v.conversions}, revenue={v.revenue}" + ) + + # Convert input to Variant objects + variants = [ + Variant( + name=v.name, + impressions=v.impressions, + conversions=v.conversions, + revenue=v.revenue, + ) + for v in experiment_input.variants + ] + + # Create and run experiment + logger.info("Creating experiment and running analysis") + experiment = WebsiteExperiment(variants, experiment_input.baseline_variant) + experiment.run() + + # Get reports + logger.info("Generating experiment reports") + df_summary, df_conv, df_arpu, df_rev_per_sale = experiment.get_reports() + + # Convert DataFrames to dictionaries + summary_dict = df_summary.to_dict(orient="records") + conv_dict = df_conv.to_dict(orient="records") + arpu_dict = df_arpu.to_dict(orient="records") + rev_per_sale_dict = df_rev_per_sale.to_dict(orient="records") + + logger.info("Successfully completed experiment analysis") + return { + "summary": summary_dict, + "conversion_stats": conv_dict, + "arpu_stats": arpu_dict, + "revenue_per_sale_stats": rev_per_sale_dict, + } + except Exception as e: + error_msg = f"Error in experiment analysis: {str(e)}" + stack_trace = traceback.format_exc() + logger.error(f"{error_msg}\n{stack_trace}") + raise HTTPException(status_code=400, detail=error_msg) + + +@app.get("/health") +async def health_check(): + return {"status": "healthy"} + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) diff --git a/pages/1_Analyze_with_Summary_Information.py b/pages/1_Analyze_with_Summary_Information.py deleted file mode 100644 index ee2679d..0000000 --- a/pages/1_Analyze_with_Summary_Information.py +++ /dev/null @@ -1,121 +0,0 @@ -import pandas as pd -import numpy as np -import scipy.stats -from scipy.stats import norm -import streamlit as st - -import logging - -import janus -from janus.stats.experiment import Experiment, Variant -from janus.utils.make_dataframe_multivariate import ( - create_per_user_dataframe_multivariate, -) - -from utils import save_results_in_session_state, explain_metrics - -st.set_page_config(page_title=" A/B Testing using summary information", page_icon="📊") - -st.markdown( - """ -# 📊 A/B Testing using Summary Information - -This is the most simple approach to analyze your A/B Test. Just input these summary information and wait for calculations: - -- **total impressions in control/treatment**: total of participants in each variant. -- **total conversions in control/treatment**: total of conversions in each variant. -- **total conversion value in control/treatment**: the sum of conversion values in each variant (e.g.: revenue). -""" -) - - -with st.form(key="my_form"): - # Control - control_impressions = st.number_input( - label="Impressions in Control", - value=1000, - ) - control_conversions = st.number_input( - label="Conversions in Control", - value=100, - ) - control_total_value = st.number_input( - label="Total Conversion Value in Control", value=200.0, step=1.0, format="%.4f" - ) - - # Treatment - test_impressions = st.number_input( - label="Impressions in Treatment", - value=1000, - ) - test_conversions = st.number_input( - label="Conversions in Treatment", - value=120, - ) - test_total_value = st.number_input( - label="Total Conversion Value in Treatment", - value=250.00, - step=1.0, - format="%.4f", - ) - - experiment_name = st.text_input( - label="Experiment Name (Optional)", value="My Experiment" - ) - - submit_button = st.form_submit_button(label="Run Experiment") - -if submit_button: - # create dataframe with summary results - df = pd.DataFrame( - data={ - "alternative": ["control", "treatment"], - "exposure_period": ["2022-01-01"] * 2, # hacking, not to be used - "exposures": [control_impressions, test_impressions], - "conversions": [control_conversions, test_conversions], - "total_value": [control_total_value, test_total_value], - } - ) - conversion_bool_col = "conversions" - conversion_value_cols = ["total_value"] - - df_per_user_simulated = create_per_user_dataframe_multivariate( - df, conversion_value_cols=conversion_value_cols - ) - - # st.write("df") - # st.dataframe(df) - # st.write("df_per_user_simulated") - # st.dataframe(df_per_user_simulated) - - # Initialize Experiment - with st.spinner(f"Analyzing Experiment..."): - # fix cols names - # TODO: generalize this code for all pages and generalize - # lib's revenue col to monetary values - df_per_user_simulated = df_per_user_simulated.rename( - columns={"converted": "sales", "total_value": "revenue"} - ) # hacking, sales are generic conversions in janus lib - experiment = Experiment( - name=experiment_name, - keymetrics=["conversion", "revenue", "arpu"], - baseline_variant_name="control", - ) - experiment.run_experiment(df_results_per_user=df_per_user_simulated) - save_results_in_session_state( - experiment, control_label="control", treatment_label="treatment" - ) - - # Show Results in dataframe form v0 - st.write("## Summary Results") - _df = pd.DataFrame.from_dict(experiment.results).drop("statistics").T - st.dataframe(data=_df) - - st.write("## Statistical Results") - explain_metrics() - - st.write("### Control") - st.dataframe(data=pd.DataFrame.from_dict(st.session_state.control_stats)) - - st.write("### Treatment") - st.dataframe(data=pd.DataFrame.from_dict(st.session_state.treatment_stats)) diff --git a/pages/2_Analyze_with_Summary_CSV.py b/pages/2_Analyze_with_Summary_CSV.py deleted file mode 100644 index c5ccdb1..0000000 --- a/pages/2_Analyze_with_Summary_CSV.py +++ /dev/null @@ -1,173 +0,0 @@ -import pandas as pd -import numpy as np -import scipy.stats -from scipy.stats import norm -import streamlit as st - -import logging - -import janus -from janus.stats.experiment import Experiment, Variant -from janus.utils.make_dataframe_multivariate import ( - create_per_user_dataframe_multivariate, -) - -from utils import save_results_in_session_state, explain_metrics - -st.set_page_config(page_title="A/B Testing using summary CSV", page_icon="📊") - -st.markdown( - """ -# 📊 A/B Testing using summary CSV - -This approach uses a CSV with summary results per day, week or any period (see example below in _Use example CSV_). This is suitable for cases -where you only have access to gross results per day/week/periods. If you just have data for the whole experiment, check the page 'Analyze with Summary Information' page. - -The CSV **must be separated only by commas** and must have these at least these columns: -- **alternative (string or integer)**: which alternative the participant got exposured. -- **exposure_period (string):** period (e.g day, week etc) -- **exposures (integer):** how many impressions the alternative had in that period. -- **conversions (integer):** number of conversions -- **value (float):** value from conversions in that period. You can choose it in the form below, so the column name can be different. This can actually be any monetary value from conversions, typically it is _revenue_. But it can be other examples like cost, return etc. -""" -) - -uploaded_file = st.file_uploader("Upload my CSV", type=".csv") - -use_example_file = st.checkbox( - "Use example CSV", False, help="Use in-built example file to demo the app" -) - -ab_default = None - -# If CSV is not uploaded and checkbox is filled, use values from the example file -# and pass them down to the next if block -logging.info(f"Using example file: {use_example_file}...") -if use_example_file: - uploaded_file = "examples/dataset_summary.csv" - ab_default = ["alternative"] - -if uploaded_file: - df = pd.read_csv(uploaded_file) - df["exposure_period"] = pd.to_datetime(df.exposure_period) - - st.markdown("### Data preview") - st.dataframe(df.head()) - - st.markdown("### Select columns for analysis:") - with st.form(key="my_form"): - label_values = st.selectbox( - "Column with alternative labels", - options=df.columns, - help="Select which column refers to your A/B testing labels.", - ) - logging.info(f"label_values: {label_values}") - if label_values: - logging.info(f"alternatives: {df[label_values].unique()}") - control = df[label_values].unique()[0] - treatment = df[label_values].unique()[1] - decide = st.radio( - f"Is *{treatment}* group the treatment one?", - options=["Yes", "No"], - help="Select yes if this is group B (or the treatment group) from your test.", - ) - if decide == "No": - control, treatment = treatment, control - visitors_a = df[label_values].value_counts()[control] - visitors_b = df[label_values].value_counts()[treatment] - else: - st.warning( - "Please select both an **treatment column** and a **Result column**." - ) - st.stop() - - # choose conversion boolean col - conversion_bool_col = st.selectbox( - "Column with boolean indicator of conversions", - options=[ - c - for c in df.columns - if c not in [label_values, "exposure_period", "exposures"] - ], - help="Select which column refers to number of conversions.", - ) - - # choose conversion value col - conversion_value_cols = st.multiselect( - "Column with value from conversions", - options=[ - c - for c in df.columns - if c - not in [ - label_values, - "sales", - "exposure_period", - "exposures", - "conversions", - ] - ], - help="Select which column refers to the value that comes from conversions, e.g.: revenue or cost from conversons.", - ) - - submit_button = st.form_submit_button(label="Continue") - - if submit_button: - # Treat dataframe to use in the same engine - conversion_bool_col = conversion_bool_col[0] - logging.info(f"conversion_bool_col: {conversion_bool_col}") - logging.info(f"conversion_value_col: {conversion_value_cols}") - - df = df.rename( - columns={ - conversion_bool_col: "conversions", - } - ) - df_per_user_simulated = create_per_user_dataframe_multivariate( - df, conversion_value_cols=conversion_value_cols - ) - # st.markdown( - # """ - # ### Data Simulated per user preview - # We use this form as input to our Statistical Engine. - # """ - # ) - # st.dataframe(df_per_user_simulated.head()) - - # type(uploaded_file) == str, means the example file was used - name = ( - "dataset_summary.csv" - if isinstance(uploaded_file, str) - else uploaded_file.name - ) - experiment_name = name.split(".")[0] - - # Initialize Experiment - with st.spinner(f"Analyzing Experiment for CSV '{name}'..."): - # fix cols names - df_per_user_simulated = df_per_user_simulated.rename( - columns={"converted": "sales"} - ) # hacking, sales are generic conversions in janus lib - experiment = Experiment( - name=experiment_name, - keymetrics=["conversion", "revenue", "arpu"], - baseline_variant_name=control, - ) - experiment.run_experiment(df_results_per_user=df_per_user_simulated) - save_results_in_session_state( - experiment, control_label=control, treatment_label=treatment - ) - - # Show Results in dataframe form v0 - st.write("## Summary Results") - _df = pd.DataFrame.from_dict(experiment.results).drop("statistics").T - st.dataframe(data=_df) - - st.write("## Statistical Results") - explain_metrics() - - st.write("### Control") - st.dataframe(data=pd.DataFrame.from_dict(st.session_state.control_stats)) - - st.write("### Treatment") - st.dataframe(data=pd.DataFrame.from_dict(st.session_state.treatment_stats)) diff --git a/pages/3_Analyze_with_Per_Impression_CSV.py b/pages/3_Analyze_with_Per_Impression_CSV.py deleted file mode 100644 index 4712546..0000000 --- a/pages/3_Analyze_with_Per_Impression_CSV.py +++ /dev/null @@ -1,120 +0,0 @@ -import pandas as pd -import numpy as np -import scipy.stats -from scipy.stats import norm -import streamlit as st - -import logging - -import janus -from janus.stats.experiment import Experiment, Variant - -from utils import save_results_in_session_state, explain_metrics - -st.set_page_config(page_title="A/B Testing using per-participant CSV", page_icon="📊") - -st.markdown( - """ -# 📊 A/B Testing using per-participant CSV - -This way of analyzing uses a CSV with one participant per row.You can upload your experiment results in CSV format to see with significance which alternative has more probability of being the best. -The CSV should have one row per exposure, e.g., one row per participant user. - -The CSV **must be separated only by commas** and must have these at least these columns: -- **id (string or integer)**: any unique id or unique label. -- **alternative (string or integer)**: which alternative the participant got exposured. -- **revenue (float)**: total quantity value for conversions (e.g.: money). -- **sales (integer)**: how many conversions the participant had (typically 0 or 1). - - -You can see an example clicking in 'Use example file' below for a demonstration. -""" -) - -uploaded_file = st.file_uploader("Upload my CSV", type=".csv") - -use_example_file = st.checkbox( - "Use example CSV", False, help="Use in-built example file to demo the app" -) - -ab_default = None - - -# If CSV is not uploaded and checkbox is filled, use values from the example file -# and pass them down to the next if block -logging.info(f"Using example file: {use_example_file}...") -if use_example_file: - uploaded_file = "examples/results_per_user.csv" - ab_default = ["alternative"] - -if uploaded_file: - df = pd.read_csv(uploaded_file) - - st.markdown("### Data preview") - st.dataframe(df.head()) - - st.markdown("### Select columns for analysis:") - with st.form(key="my_form"): - label_values = st.multiselect( - "Column with alternative labels", - options=df.columns, - help="Select which column refers to your A/B testing labels.", - default=ab_default, - )[0] - if label_values: - logging.info(f"alternatives: {df[label_values].unique()}") - control = df[label_values].unique()[0] - treatment = df[label_values].unique()[1] - decide = st.radio( - f"Is *{treatment}* group the treatment one?", - options=["Yes", "No"], - help="Select yes if this is group B (or the treatment group) from your test.", - ) - if decide == "No": - control, treatment = treatment, control - visitors_a = df[label_values].value_counts()[control] - visitors_b = df[label_values].value_counts()[treatment] - - submit_button = st.form_submit_button(label="Run Experiment") - - if submit_button: - logging.info("Running Experiment...") - if not label_values: - st.warning( - "Please select both an **treatment column** and a **Result column**." - ) - st.stop() - - # type(uploaded_file) == str, means the example file was used - name = ( - "Website_Results.csv" - if isinstance(uploaded_file, str) - else uploaded_file.name - ) - experiment_name = name.split(".")[0] - - # Initialize Experiment - with st.spinner(f"Analyzing Experiment for CSV '{name}'..."): - experiment = Experiment( - name=experiment_name, - keymetrics=["conversion", "revenue", "arpu"], - baseline_variant_name=control, - ) - experiment.run_experiment(df_results_per_user=df) - save_results_in_session_state( - experiment, control_label=control, treatment_label=treatment - ) - - # Show Results in dataframe form v0 - st.write("## Summary Results") - _df = pd.DataFrame.from_dict(experiment.results).drop("statistics").T - st.dataframe(data=_df) - - st.write("## Statistical Results") - explain_metrics() - - st.write("### Control") - st.dataframe(data=pd.DataFrame.from_dict(st.session_state.control_stats)) - - st.write("### Treatment") - st.dataframe(data=pd.DataFrame.from_dict(st.session_state.treatment_stats)) diff --git a/pages/4_Why_Bayesian.py b/pages/4_Why_Bayesian.py deleted file mode 100644 index af349a2..0000000 --- a/pages/4_Why_Bayesian.py +++ /dev/null @@ -1,48 +0,0 @@ -import streamlit as st - -import logging - -st.set_page_config(page_title="Why Bayesian?", page_icon="📊") - -logging.basicConfig(level=logging.INFO) - -st.markdown( - """ -# 📊 Why Bayesian? - -This engine is more powerful than common online websites becasue it measures statistics for 3 very important variables at once: -- **conversion rate** (e.g., percentage of visits that turn into sales) -- **monetary value for conversions** (e.g., revenue per transaction) -- **average value per impression** (e.g., Average Revenue per User) - -Sometimes, conversion rate is not the best metric for your test: sometimes the most important is if you're bringing more money to the table. That's why ARPU helps you a lot. Revenue also helps you to undestand how your ticket sale is affected between variants. - -These are not usual even in famous tools like [abtestguide](https://abtestguide.com/bayesian/) or their [frequentist approach](https://abtestguide.com/calc/), because they only measures for conversion rate.\n - - -See below the main advantages of this method: -""" -) - -st.image("stats-differences.png") - - -st.markdown( - """ -## References -* [What is A/B Testing](https://en.wikipedia.org/wiki/A/B_testing) -* [Its time to rethink A/B Testing](https://www.gamedeveloper.com/business/it-s-time-to-re-think-a-b-testing) -* [VWO Website](https://vwo.com/). VWO is a reference on this subject. The bayesian calculations here were implemented based on [this VWO white paper](https://cdn2.hubspot.net/hubfs/310840/VWO_SmartStats_technical_whitepaper.pdf). -* [Agile A/B testing with Bayesian Statistics and Python](https://web.archive.org/web/20150419163005/http://www.bayesianwitch.com/blog/2014/bayesian_ab_test.html) -* [Understanding Bayesian A/B testing (using baseball statistics)](http://varianceexplained.org/r/bayesian_ab_baseball/) -* [It’s time to abandon A/B testing](https://mobiledevmemo.com/its-time-to-abandon-a-b-testing/) -* [Conjugate Priors](https://en.wikipedia.org/wiki/Conjugate_prior) -* [Bayesian A/B Testing Course by Lazy Programmer at Udemy](https://www.udemy.com/course/bayesian-machine-learning-in-python-ab-testing) -* [Binomial Distributions](https://www.youtube.com/watch?v=8idr1WZ1A7Q) -* [Bayes theorem](https://www.youtube.com/watch?v=HZGCoVF3YvM&t=9s) -* [The quick proof of Bayes Theorem](https://www.youtube.com/watch?v=U_85TaXbeIo) - -See more more at my [github](https://github.com/lgabs/janus). - -""" -) diff --git a/requirements.txt b/requirements.txt index 735bb36..b9dbaa1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,12 @@ -pandas==1.4.4 -numpy==1.25.1 -scipy==1.11.1 -streamlit==1.13.0 -altair==4.0 -pyspark==3.0.0 \ No newline at end of file +fastapi==0.115.11 +uvicorn==0.24.0 +pydantic==2.4.2 +bayesian-testing==0.6.2 +tabulate==0.9.0 +numpy==1.26.2 +pandas==2.1.3 +scipy==1.11.3 +python-multipart==0.0.6 +jinja2==3.1.2 +pytest==8.3.5 +httpx==0.28.1 \ No newline at end of file diff --git a/static/app.js b/static/app.js new file mode 100644 index 0000000..f672535 --- /dev/null +++ b/static/app.js @@ -0,0 +1,402 @@ +document.addEventListener('DOMContentLoaded', function() { + // DOM elements + const experimentForm = document.getElementById('experimentForm'); + const variantsContainer = document.getElementById('variantsContainer'); + const addVariantBtn = document.getElementById('addVariantBtn'); + const baselineVariantInput = document.getElementById('baselineVariant'); + const resultsSection = document.getElementById('resultsSection'); + const summaryTable = document.getElementById('summaryTable').querySelector('tbody'); + const conversionTable = document.getElementById('conversionTable').querySelector('tbody'); + const arpuTable = document.getElementById('arpuTable').querySelector('tbody'); + const revenuePerSaleTable = document.getElementById('revenuePerSaleTable').querySelector('tbody'); + const exportResultsBtn = document.getElementById('exportResultsBtn'); + + // Template for variant inputs + const variantTemplate = document.getElementById('variantTemplate'); + + // Counter for variant numbering + let variantCounter = 0; + + // Add initial variants (at least 2) + addVariant('A'); + addVariant('B'); + + // Event listeners + addVariantBtn.addEventListener('click', () => { + const nextLetter = String.fromCharCode(65 + variantCounter); // A, B, C, ... + addVariant(nextLetter); + }); + + experimentForm.addEventListener('submit', handleFormSubmit); + exportResultsBtn.addEventListener('click', exportResults); + + // Add a new variant input to the form + function addVariant(suggestedName = '') { + variantCounter++; + + // Clone the template + const variantNode = document.importNode(variantTemplate.content, true); + + // Update variant number + variantNode.querySelector('.variant-number').textContent = variantCounter; + + // Set suggested name if provided + if (suggestedName) { + variantNode.querySelector('.variant-name').value = suggestedName; + } + + // Add remove event listener + variantNode.querySelector('.remove-variant').addEventListener('click', function() { + this.closest('.variant-card').remove(); + updateVariantNumbers(); + }); + + // Add to container + variantsContainer.appendChild(variantNode); + + // If this is the first variant, suggest it as baseline + if (variantCounter === 1 && !baselineVariantInput.value) { + baselineVariantInput.value = suggestedName; + } + } + + // Update variant numbers after removal + function updateVariantNumbers() { + const variants = variantsContainer.querySelectorAll('.variant-card'); + variants.forEach((variant, index) => { + variant.querySelector('.variant-number').textContent = index + 1; + }); + variantCounter = variants.length; + } + + // Handle form submission + async function handleFormSubmit(event) { + event.preventDefault(); + + // Validate form + if (!validateForm()) { + return; + } + + // Show loading state + const submitBtn = experimentForm.querySelector('button[type="submit"]'); + const originalBtnText = submitBtn.innerHTML; + submitBtn.innerHTML = ' Analyzing...'; + submitBtn.disabled = true; + + try { + // Collect form data + const formData = collectFormData(); + + // Send API request + const response = await fetch('/api/analyze', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(formData) + }); + + if (!response.ok) { + throw new Error('API request failed'); + } + + const data = await response.json(); + + // Display results + displayResults(data); + + // Show results section + resultsSection.classList.remove('d-none'); + resultsSection.scrollIntoView({ behavior: 'smooth' }); + } catch (error) { + console.error('Error:', error); + alert('An error occurred while analyzing the experiment. Please try again.'); + } finally { + // Restore button state + submitBtn.innerHTML = originalBtnText; + submitBtn.disabled = false; + } + } + + // Validate form inputs + function validateForm() { + // Check if baseline variant is specified + if (!baselineVariantInput.value.trim()) { + alert('Please specify a baseline variant.'); + baselineVariantInput.focus(); + return false; + } + + // Check if at least 2 variants are added + const variants = variantsContainer.querySelectorAll('.variant-card'); + if (variants.length < 2) { + alert('Please add at least 2 variants for comparison.'); + return false; + } + + // Check if baseline variant exists in the variants + let baselineExists = false; + const baselineName = baselineVariantInput.value.trim(); + + variants.forEach(variant => { + const variantName = variant.querySelector('.variant-name').value.trim(); + if (variantName === baselineName) { + baselineExists = true; + } + }); + + if (!baselineExists) { + alert(`Baseline variant "${baselineName}" does not exist. Please add it as a variant or choose an existing variant as baseline.`); + return false; + } + + // Check for duplicate variant names + const variantNames = new Set(); + let hasDuplicates = false; + + variants.forEach(variant => { + const variantName = variant.querySelector('.variant-name').value.trim(); + if (variantNames.has(variantName)) { + hasDuplicates = true; + } + variantNames.add(variantName); + }); + + if (hasDuplicates) { + alert('Duplicate variant names found. Please ensure all variants have unique names.'); + return false; + } + + // Check if conversions <= impressions for each variant + let invalidConversions = false; + + variants.forEach(variant => { + const impressions = parseInt(variant.querySelector('.variant-impressions').value); + const conversions = parseInt(variant.querySelector('.variant-conversions').value); + + if (conversions > impressions) { + invalidConversions = true; + variant.querySelector('.variant-conversions').classList.add('is-invalid'); + } else { + variant.querySelector('.variant-conversions').classList.remove('is-invalid'); + } + }); + + if (invalidConversions) { + alert('Conversions cannot be greater than impressions. Please check your inputs.'); + return false; + } + + return true; + } + + // Collect form data + function collectFormData() { + const variants = []; + const variantElements = variantsContainer.querySelectorAll('.variant-card'); + + variantElements.forEach(variantElement => { + variants.push({ + name: variantElement.querySelector('.variant-name').value.trim(), + impressions: parseInt(variantElement.querySelector('.variant-impressions').value), + conversions: parseInt(variantElement.querySelector('.variant-conversions').value), + revenue: parseFloat(variantElement.querySelector('.variant-revenue').value) + }); + }); + + return { + variants: variants, + baseline_variant: baselineVariantInput.value.trim() + }; + } + + // Display results in tables + function displayResults(data) { + // Clear previous results + summaryTable.innerHTML = ''; + conversionTable.innerHTML = ''; + arpuTable.innerHTML = ''; + revenuePerSaleTable.innerHTML = ''; + + // Find best variant for each metric + const bestConversionVariant = findBestVariant(data.conversion_stats, 'prob_being_best'); + const bestArpuVariant = findBestVariant(data.arpu_stats, 'prob_being_best'); + const bestRevenuePerSaleVariant = findBestVariant(data.revenue_per_sale_stats, 'prob_being_best'); + + // Populate summary table + data.summary.forEach(variant => { + const row = document.createElement('tr'); + + // Highlight baseline + if (variant.variant === baselineVariantInput.value.trim()) { + row.classList.add('table-secondary'); + } + + row.innerHTML = ` + ${variant.variant} + ${variant.impressions.toLocaleString()} + ${variant.conversions.toLocaleString()} + ${variant.revenue} + ${formatPercentage(variant.conversion)} + ${variant.avg_ticket} + ${variant.arpu} + `; + + summaryTable.appendChild(row); + }); + + // Populate conversion stats table + data.conversion_stats.forEach(variant => { + const row = document.createElement('tr'); + + // Highlight baseline and best variant + if (variant.variant === baselineVariantInput.value.trim()) { + row.classList.add('table-secondary'); + } + if (variant.variant === bestConversionVariant) { + row.classList.add('best-variant'); + } + + row.innerHTML = ` + ${variant.variant} + ${variant.expected_loss.toLocaleString()} + ${formatProbability(variant.prob_being_best)} + ${formatLift(variant.lift)} + `; + + conversionTable.appendChild(row); + }); + + // Populate ARPU stats table + data.arpu_stats.forEach(variant => { + const row = document.createElement('tr'); + + // Highlight baseline and best variant + if (variant.variant === baselineVariantInput.value.trim()) { + row.classList.add('table-secondary'); + } + if (variant.variant === bestArpuVariant) { + row.classList.add('best-variant'); + } + + row.innerHTML = ` + ${variant.variant} + ${variant.expected_loss} + ${formatProbability(variant.prob_being_best)} + ${formatLift(variant.lift)} + `; + + arpuTable.appendChild(row); + }); + + // Populate revenue per sale stats table + data.revenue_per_sale_stats.forEach(variant => { + const row = document.createElement('tr'); + + // Highlight baseline and best variant + if (variant.variant === baselineVariantInput.value.trim()) { + row.classList.add('table-secondary'); + } + if (variant.variant === bestRevenuePerSaleVariant) { + row.classList.add('best-variant'); + } + + row.innerHTML = ` + ${variant.variant} + ${variant.expected_loss} + ${formatProbability(variant.prob_being_best)} + ${formatLift(variant.lift)} + `; + + revenuePerSaleTable.appendChild(row); + }); + } + + // Find the best variant based on a metric + function findBestVariant(variants, metric) { + let bestVariant = null; + let bestValue = -Infinity; + + variants.forEach(variant => { + if (variant[metric] > bestValue) { + bestValue = variant[metric]; + bestVariant = variant.variant; + } + }); + + return bestVariant; + } + + // Format percentage values + function formatPercentage(value) { + return (value * 100).toFixed(4) + '%'; + } + + // Format lift values + function formatLift(value) { + const sign = value >= 0 ? '+' : ''; + const className = value >= 0 ? 'text-success' : 'text-danger'; + return `${sign}${(value * 100).toFixed(4)}%`; + } + + // Format probability values with color coding + function formatProbability(value) { + let className = 'low-prob'; + if (value >= 0.8) { + className = 'high-prob'; + } else if (value >= 0.5) { + className = 'medium-prob'; + } + + return `${(value * 100).toFixed(4)}%`; + } + + // Export results as CSV + function exportResults() { + // Get table data + const summaryData = getTableData(document.getElementById('summaryTable')); + const conversionData = getTableData(document.getElementById('conversionTable')); + const arpuData = getTableData(document.getElementById('arpuTable')); + const revenuePerSaleData = getTableData(document.getElementById('revenuePerSaleTable')); + + // Combine data + const csvContent = [ + '# Summary', + summaryData, + '', + '# Conversion Statistics', + conversionData, + '', + '# ARPU Statistics', + arpuData, + '', + '# Revenue Per Sale Statistics', + revenuePerSaleData + ].join('\n'); + + // Create download link + const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); + const url = URL.createObjectURL(blob); + const link = document.createElement('a'); + link.setAttribute('href', url); + link.setAttribute('download', 'experiment_results.csv'); + link.style.visibility = 'hidden'; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + } + + // Get table data as CSV + function getTableData(table) { + const rows = Array.from(table.querySelectorAll('tr')); + + return rows.map(row => { + const cells = Array.from(row.querySelectorAll('th, td')); + return cells.map(cell => { + // Get text content without HTML + return `"${cell.textContent.trim().replace(/"/g, '""')}"`; + }).join(','); + }).join('\n'); + } +}); \ No newline at end of file diff --git a/static/styles.css b/static/styles.css new file mode 100644 index 0000000..29b9637 --- /dev/null +++ b/static/styles.css @@ -0,0 +1,153 @@ +/* Main layout */ +body { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + background-color: #f8f9fa; +} + +.sidebar { + position: fixed; + top: 0; + bottom: 0; + left: 0; + z-index: 100; + padding: 48px 0 0; + box-shadow: inset -1px 0 0 rgba(0, 0, 0, .1); +} + +.sidebar .nav-link { + font-weight: 500; + color: #333; + padding: 0.5rem 1rem; + border-radius: 0.25rem; + margin: 0.2rem 0; +} + +.sidebar .nav-link:hover { + background-color: rgba(0, 123, 255, 0.1); +} + +.sidebar .nav-link.active { + color: #007bff; + background-color: rgba(0, 123, 255, 0.1); +} + +main { + padding-top: 1.5rem; +} + +/* Cards and form elements */ +.card { + border-radius: 0.5rem; + box-shadow: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.075); + margin-bottom: 1.5rem; +} + +.card-header { + background-color: #f8f9fa; + border-bottom: 1px solid rgba(0, 0, 0, 0.125); + padding: 0.75rem 1.25rem; +} + +.form-control:focus { + border-color: #80bdff; + box-shadow: 0 0 0 0.2rem rgba(0, 123, 255, 0.25); +} + +/* Tables */ +.table { + margin-bottom: 0; +} + +.table th { + font-weight: 600; + background-color: #f8f9fa; +} + +.table-hover tbody tr:hover { + background-color: rgba(0, 123, 255, 0.05); +} + +/* Buttons */ +.btn-primary { + background-color: #007bff; + border-color: #007bff; +} + +.btn-primary:hover { + background-color: #0069d9; + border-color: #0062cc; +} + +.btn-outline-primary { + color: #007bff; + border-color: #007bff; +} + +.btn-outline-primary:hover { + background-color: #007bff; + color: white; +} + +/* Variant cards */ +.variant-card { + transition: all 0.3s ease; +} + +.variant-card:hover { + box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15); +} + +/* Responsive adjustments */ +@media (max-width: 767.98px) { + .sidebar { + position: static; + height: auto; + padding-top: 1rem; + } + + main { + margin-top: 1rem; + } +} + +/* Results section styling */ +#resultsSection { + transition: opacity 0.5s ease; +} + +/* Highlight for best variant */ +.best-variant { + background-color: rgba(40, 167, 69, 0.1) !important; +} + +/* Formatting for probability values */ +.prob-value { + font-weight: bold; +} + +.high-prob { + color: #28a745; +} + +.medium-prob { + color: #fd7e14; +} + +.low-prob { + color: #dc3545; +} + +/* Loading indicator */ +.loading-spinner { + display: inline-block; + width: 1rem; + height: 1rem; + border: 0.2em solid currentColor; + border-right-color: transparent; + border-radius: 50%; + animation: spinner-border .75s linear infinite; +} + +@keyframes spinner-border { + to { transform: rotate(360deg); } +} \ No newline at end of file diff --git a/stats-differences.png b/stats-differences.png deleted file mode 100644 index 12be070..0000000 Binary files a/stats-differences.png and /dev/null differ diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..8f85f08 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,240 @@ + + + + + + Janus: Bayesian A/B Testing App + + + + + +
+
+ + + + +
+
+

Multivariant Experiment Analysis

+
+ + +
+
+
Experiment Data
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ + +
+
+
+
+ + +
+
+

Results

+
+ +
+
+ + +
+
+
Summary
+
+
+
+ + + + + + + + + + + + + + + +
VariantImpressionsConversionsRevenueConversion RateAvg TicketARPU
+
+
+
+ + +
+
+
Conversion Statistics
+
+
+
+ + + + + + + + + + + + +
VariantExpected LossProbability of Being BestLift vs Baseline
+
+
+
+ + +
+
+
ARPU Statistics
+
+
+
+ + + + + + + + + + + + +
VariantExpected LossProbability of Being BestLift vs Baseline
+
+
+
+ + +
+
+
Revenue Per Sale Statistics
+
+
+
+ + + + + + + + + + + + +
VariantExpected LossProbability of Being BestLift vs Baseline
+
+
+
+
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/test_main.py b/test_main.py new file mode 100644 index 0000000..951af9d --- /dev/null +++ b/test_main.py @@ -0,0 +1,96 @@ +import pytest +from fastapi.testclient import TestClient +from main import app, WebsiteExperiment, Variant + +# Create a test client for the FastAPI app +client = TestClient(app) + + +# Test the home endpoint +@pytest.mark.asyncio +def test_home(): + response = client.get("/") + assert response.status_code == 200 + assert "Janus: Bayesian A/B Testing App" in response.text + + +# Test the health check endpoint +@pytest.mark.asyncio +def test_health_check(): + response = client.get("/health") + assert response.status_code == 200 + assert response.json() == {"status": "healthy"} + + +# Test the WebsiteExperiment class +@pytest.mark.asyncio +def test_website_experiment(): + variants = [ + Variant(name="A", impressions=1000, conversions=100, revenue=1000.0), + Variant(name="B", impressions=1000, conversions=150, revenue=1500.0), + ] + experiment = WebsiteExperiment(variants=variants, baseline_variant="A") + experiment.run(show=False) + assert len(experiment.conversion_results) == 2 + assert len(experiment.arpu_results) == 2 + assert len(experiment.revenue_per_sale_results) == 2 + + +@pytest.mark.asyncio +def test_run_conversion_experiment(): + variants = [ + Variant(name="A", impressions=1000, conversions=100, revenue=1000.0), + Variant(name="B", impressions=1000, conversions=150, revenue=1500.0), + ] + experiment = WebsiteExperiment(variants=variants, baseline_variant="A") + experiment.run_conversion_experiment(show=False) + assert len(experiment.conversion_results) == 2 + + +@pytest.mark.asyncio +def test_run_arpu_experiment(): + variants = [ + Variant(name="A", impressions=1000, conversions=100, revenue=1000.0), + Variant(name="B", impressions=1000, conversions=150, revenue=1500.0), + ] + experiment = WebsiteExperiment(variants=variants, baseline_variant="A") + experiment.run_arpu_experiment(show=False) + assert len(experiment.arpu_results) == 2 + + +@pytest.mark.asyncio +def test_run_revenue_per_sale_experiment(): + variants = [ + Variant(name="A", impressions=1000, conversions=100, revenue=1000.0), + Variant(name="B", impressions=1000, conversions=150, revenue=1500.0), + ] + experiment = WebsiteExperiment(variants=variants, baseline_variant="A") + experiment.run_revenue_per_sale_experiment(show=False) + assert len(experiment.revenue_per_sale_results) == 2 + + +@pytest.mark.asyncio +def test_compile_full_data(): + variants = [ + Variant(name="A", impressions=1000, conversions=100, revenue=1000.0), + Variant(name="B", impressions=1000, conversions=150, revenue=1500.0), + ] + experiment = WebsiteExperiment(variants=variants, baseline_variant="A") + experiment.run(show=False) + compiled_data = experiment.compile_full_data() + assert len(compiled_data) == 2 + + +@pytest.mark.asyncio +def test_get_reports(): + variants = [ + Variant(name="A", impressions=1000, conversions=100, revenue=1000.0), + Variant(name="B", impressions=1000, conversions=150, revenue=1500.0), + ] + experiment = WebsiteExperiment(variants=variants, baseline_variant="A") + experiment.run(show=False) + df_summary, df_conv, df_arpu, df_rev_per_sale = experiment.get_reports() + assert not df_summary.empty + assert not df_conv.empty + assert not df_arpu.empty + assert not df_rev_per_sale.empty diff --git a/testing_make_dataframe_from_summary.ipynb b/testing_make_dataframe_from_summary.ipynb deleted file mode 100644 index 921103a..0000000 --- a/testing_make_dataframe_from_summary.ipynb +++ /dev/null @@ -1,701 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 11, - "id": "1b2e9e65", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
variant_idexposure_periodexposuresconversionsrevenuecost
0A'2022-01-01'10010100.020.0
1A'2022-01-02'20020200.040.0
2A'2022-01-03'10012120.024.0
3B'2022-01-01'10012120.024.0
4B'2022-01-02'11015180.036.0
5B'2022-01-03'20030350.070.0
\n", - "
" - ], - "text/plain": [ - " variant_id exposure_period exposures conversions revenue cost\n", - "0 A '2022-01-01' 100 10 100.0 20.0\n", - "1 A '2022-01-02' 200 20 200.0 40.0\n", - "2 A '2022-01-03' 100 12 120.0 24.0\n", - "3 B '2022-01-01' 100 12 120.0 24.0\n", - "4 B '2022-01-02' 110 15 180.0 36.0\n", - "5 B '2022-01-03' 200 30 350.0 70.0" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "from janus.utils.make_dataframe_multivariate import create_per_user_dataframe_multivariate\n", - "\n", - "df = pd.read_csv('tests/dataset_summary.csv')\n", - "for col in ['revenue', 'cost']:\n", - " df[col] = df[col].astype('float64')\n", - "\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "24a499d2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 6 entries, 0 to 5\n", - "Data columns (total 6 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 variant_id 6 non-null object \n", - " 1 exposure_period 6 non-null object \n", - " 2 exposures 6 non-null int64 \n", - " 3 conversions 6 non-null int64 \n", - " 4 revenue 6 non-null float64\n", - " 5 cost 6 non-null float64\n", - "dtypes: float64(2), int64(2), object(2)\n", - "memory usage: 416.0+ bytes\n" - ] - } - ], - "source": [ - "df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "867fc964", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
variant_iduser_idexposure_periodconvertedrevenuecost
0A12022-01-01110.02.0
1A22022-01-01110.02.0
2A32022-01-01110.02.0
3A42022-01-01110.02.0
4A52022-01-01110.02.0
.....................
165B8062022-01-0300.00.0
166B8072022-01-0300.00.0
167B8082022-01-0300.00.0
168B8092022-01-0300.00.0
169B8102022-01-0300.00.0
\n", - "

810 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " variant_id user_id exposure_period converted revenue cost\n", - "0 A 1 2022-01-01 1 10.0 2.0\n", - "1 A 2 2022-01-01 1 10.0 2.0\n", - "2 A 3 2022-01-01 1 10.0 2.0\n", - "3 A 4 2022-01-01 1 10.0 2.0\n", - "4 A 5 2022-01-01 1 10.0 2.0\n", - ".. ... ... ... ... ... ...\n", - "165 B 806 2022-01-03 0 0.0 0.0\n", - "166 B 807 2022-01-03 0 0.0 0.0\n", - "167 B 808 2022-01-03 0 0.0 0.0\n", - "168 B 809 2022-01-03 0 0.0 0.0\n", - "169 B 810 2022-01-03 0 0.0 0.0\n", - "\n", - "[810 rows x 6 columns]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "conversion_value_cols=['revenue', 'cost']\n", - "df2 = create_per_user_dataframe_multivariate(df, conversion_value_cols)\n", - "\n", - "df2" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "12c81181", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
variant_idexposure_periodexposuresconversionsrevenuecost
0A2022-01-0110010100.020.0
1A2022-01-0220020200.040.0
2A2022-01-0310012120.024.0
3B2022-01-0110012120.024.0
4B2022-01-0211015180.036.0
5B2022-01-0320030350.070.0
\n", - "
" - ], - "text/plain": [ - " variant_id exposure_period exposures conversions revenue cost\n", - "0 A 2022-01-01 100 10 100.0 20.0\n", - "1 A 2022-01-02 200 20 200.0 40.0\n", - "2 A 2022-01-03 100 12 120.0 24.0\n", - "3 B 2022-01-01 100 12 120.0 24.0\n", - "4 B 2022-01-02 110 15 180.0 36.0\n", - "5 B 2022-01-03 200 30 350.0 70.0" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agg_dict = {\n", - " 'user_id': 'count',\n", - " 'converted': 'sum',\n", - " }\n", - "agg_dict.update({c: 'sum' for c in conversion_value_cols})\n", - "\n", - "\n", - "df3 = (\n", - " df2\n", - " .groupby(['variant_id', 'exposure_period'])\n", - " .agg(agg_dict)\n", - " .reset_index()\n", - " .rename(columns={\n", - " 'user_id': 'exposures',\n", - " 'converted': 'conversions',\n", - " })\n", - ")\n", - "for col in conversion_value_cols:\n", - " df3[col] = df3[col].astype('float64')\n", - "df3['conversions'] = df3['conversions'].astype(\"int64\")\n", - "\n", - "df3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc485b04", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "04ccbec5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
variant_idexposure_periodexposuresconversionsrevenuecost
0A2022-01-0110010100.020.0
1A2022-01-0220020200.040.0
2A2022-01-0310012120.024.0
3B2022-01-0110012120.024.0
4B2022-01-0211015180.036.0
5B2022-01-0320030350.070.0
\n", - "
" - ], - "text/plain": [ - " variant_id exposure_period exposures conversions revenue cost\n", - "0 A 2022-01-01 100 10 100.0 20.0\n", - "1 A 2022-01-02 200 20 200.0 40.0\n", - "2 A 2022-01-03 100 12 120.0 24.0\n", - "3 B 2022-01-01 100 12 120.0 24.0\n", - "4 B 2022-01-02 110 15 180.0 36.0\n", - "5 B 2022-01-03 200 30 350.0 70.0" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "de59bfd8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0, 0, 0, 0, 0, 0])" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(df['conversions'] - df3['conversions']).values" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "afb75e4b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cols = [\n", - " 'variant_id', \n", - " 'exposure_period', \n", - " 'exposures',\n", - " 'conversions'\n", - "]\n", - "pd.DataFrame.compare(df[cols], df3[cols])" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "6895b578", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.equals(df3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25a9b7d6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tests/make_data.py b/tests/make_data.py deleted file mode 100644 index ba1dad6..0000000 --- a/tests/make_data.py +++ /dev/null @@ -1,4 +0,0 @@ -import pandas as pd -import numpy as np - -# make toy dataframes to test two variants with several users diff --git a/tests/test_experiment.py b/tests/test_experiment.py deleted file mode 100644 index 684f7bc..0000000 --- a/tests/test_experiment.py +++ /dev/null @@ -1,32 +0,0 @@ -import unittest -import pandas as pd -from janus.stats.experiment import Variant, Experiment, get_lift - - -class ExperimentTestCase(unittest.TestCase): - def test_init(self): - args = { - "name": "my_experiment", - "keymetrics": ["conversion"], - "baseline_variant_name": "baseline", - } - - experiment = Experiment(**args) - for arg, value in args.items(): - self.assertEqual(value, getattr(experiment, arg)) - - def test_run_experiment_one_variant(self): - args = { - "name": "my_experiment", - "keymetrics": ["conversion"], - "baseline_variant_name": "baseline", - } - experiment = Experiment(**args) - - df_results_per_user = pd.read_csv("examples/results_per_user.csv") - with self.assertRaises(AssertionError): - experiment.run_experiment(df_results_per_user=df_results_per_user) - - -if __name__ == "__main__": - unittest.main(verbosity=1) diff --git a/tests/test_metrics.py b/tests/test_metrics.py deleted file mode 100644 index 5e55903..0000000 --- a/tests/test_metrics.py +++ /dev/null @@ -1,61 +0,0 @@ -import unittest -import pandas as pd -import numpy as np -from janus.stats.metrics import ( - Distribution, - ConversionDistribution, - RevenueDistribution, - ARPUDistribution, -) -from janus.stats.constants import SAMPLE_SIZE - - -class DistributionTestCase(unittest.TestCase): - def test_sample(self): - dist = Distribution() - self.assertRaises(NotImplementedError, dist.sample) - - def test_update(self): - dist = Distribution() - self.assertRaises(NotImplementedError, dist.update) - - def test_chance_to_beat_winner(self): - dist = Distribution() - # to test this method, we'll create two sintetic samplings - # from uniform distributions of disjoint ranges, so we will - # know the result for sure, using typical conversion values - - samplingA = np.array([0.1, 0.1, 0.1]) - samplingB = np.array([0.2, 0.2, 0.2]) - self.assertEqual(dist.chance_to_beat(samplingB, samplingA, 3), 1.0) - - def test_chance_to_beat_loser(self): - dist = Distribution() - samplingA = np.array([0.1, 0.1, 0.1]) - samplingB = np.array([0.1, 0.08, 0.09]) - self.assertEqual(dist.chance_to_beat(samplingB, samplingA, 3), 0.0) - - def test_chance_to_beat_almost_winner(self): - dist = Distribution() - samplingA = np.array([0.1, 0.1, 0.1]) - samplingB = np.array([0.1, 0.2, 0.2]) - self.assertEqual(dist.chance_to_beat(samplingB, samplingA, 3), 2 / 3) - - def test_expected_loss_winner(self): - dist = Distribution() - # to test this method, we'll create two sintetic samplings - # from uniform distributions of disjoint ranges, so we will - # know the result for sure, using typical conversion values - samplingA = np.array([0.1, 0.3, 0.1]) - samplingB = np.array([0.2, 0.2, 0.2]) - # diff is relative to other variant - variant of interest - diff = np.array([-0.1, 0.1, -0.1]) - expected_loss = round(sum(diff * (diff > 0)) / 3, 2) - - self.assertEqual( - round(dist.expected_loss(samplingB, samplingA, 3), 2), expected_loss - ) - - -if __name__ == "__main__": - unittest.main(verbosity=1) diff --git a/tests/test_variants.py b/tests/test_variants.py deleted file mode 100644 index 511e259..0000000 --- a/tests/test_variants.py +++ /dev/null @@ -1,46 +0,0 @@ -import unittest -import pandas as pd -from janus.stats.experiment import Variant, Experiment, get_lift - - -class VariantTestCase(unittest.TestCase): - def test_init(self): - variant = Variant(name="A") - self.assertEqual(variant.name, "A") - - def test_consolidate_results(self): - results_per_user = pd.read_csv("examples/results_per_user.csv") - variant_results = { - "A": { - "users": 6, - "sales": 3, - "paids": 3, - "revenue": 600, - "conversion": 0.5, - "ticket": 200, - "arpu": 100, - }, - "B": { - "users": 4, - "sales": 3, - "paids": 3, - "revenue": 720, - "conversion": 0.75, - "ticket": 240, - "arpu": 180, - }, - } - - for variant_name, true_results in variant_results.items(): - variant = Variant(name=variant_name) - variant.consolidate_results(df=results_per_user) - for metric_name, true_value in true_results.items(): - self.assertEqual( - getattr(variant, metric_name), - true_value, - msg=f"Error on {metric_name} value, variant {variant_name}.", - ) - - -if __name__ == "__main__": - unittest.main(verbosity=1) diff --git a/utils.py b/utils.py deleted file mode 100644 index b5e1fa1..0000000 --- a/utils.py +++ /dev/null @@ -1,33 +0,0 @@ -import streamlit as st -import pandas as pd - - -def save_results_in_session_state(experiment, control_label, treatment_label): - st.session_state.experiment_results = experiment.results - st.session_state.treatment_stats = pd.DataFrame.from_dict( - experiment.results[treatment_label]["statistics"] - ) - st.session_state.control_stats = pd.DataFrame.from_dict( - experiment.results[control_label]["statistics"] - ) - - -def explain_metrics(): - st.write( - """ - - **chance_to_beat**: chance that the variant is better than the other. - - **expected_loss**: a measure of the risk you're assuming if you stay with this variant. The lower the risk, the best (e.g: 0.10 in conversion means that your risk of staying with the variant compared to the other is to lose 10% p.p. 0.10 for arpu is a risk of loosing $0.10 per user.) - - **lift**: the observed relative difference compared to the other variant for each metric. - - **diff**: the observed absolute difference compared to the other variant for each metric. - """ - ) - - -def print_warning(): - st.warning( - """ - We're making a major breaking change in the project to use the [bayesian-testing](https://github.com/Matt52/bayesian-testing) library for better experiment management, - and a full stack application will be developed to build a website for Janus, so this Streamlit application will be discontinued. - Please, consider using the [distributed package in pypi](https://pypi.org/project/janus-web-ab-testing/), which comes from the `evolve-janus-backend` branch. - """ - )