diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml
new file mode 100644
index 000000000..74dd68dd9
--- /dev/null
+++ b/.github/workflows/publish-docs.yml
@@ -0,0 +1,47 @@
+# This workflow builds and publishes the latest docs to
+# the `gh-pages` branch.
+# For more details: https://github.com/marketplace/actions/deploy-to-github-pages
+name: Publish docs
+
+on:
+  release:
+    types: [created]
+  workflow_dispatch:
+
+jobs:
+  build-and-deploy:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pages: write
+    defaults:
+      run:
+        shell: bash -l {0}
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        # fetch all tags so `versioneer` can properly determine current version
+        fetch-depth: 0
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.11'
+    - name: Install pandoc
+      uses: pandoc/actions/setup@v1
+    - name: Install dependencies
+      run: |
+        pip install -r requirements.txt
+        pip install -r requirements-ml.txt
+        pip install -r requirements-reports.txt
+        pip install -r requirements-docs.txt
+        pip install -e .
+
+    - name: Build
+      run: |
+        cd _docs/docs
+        python update_documentation.py
+    - name: Publish
+      uses: JamesIves/github-pages-deploy-action@v4
+      with:
+        branch: gh-pages
+        folder: _docs/docs/LATEST/html
diff --git a/.github/workflows/publish-package.yml b/.github/workflows/publish-package.yml
new file mode 100644
index 000000000..9a230cd27
--- /dev/null
+++ b/.github/workflows/publish-package.yml
@@ -0,0 +1,52 @@
+# This workflow publishes the package to pypi.
+# For more details:
+# https://docs.github.com/en/actions/guides/building-and-testing-python#publishing-to-package-registries
+name: Publish to PyPi
+
+on:
+  release:
+    types: [created]
+  workflow_dispatch:
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+      # fetch all tags so `versioneer` can properly determine current version
+      with:
+        fetch-depth: 0
+    - name: Check if current commit is tagged
+      # fails and cancels release if the current commit is not tagged
+      run: |
+        git describe --exact-match --tags
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.11'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        if [ -f requirements-ml.txt ]; then pip install -r requirements-ml.txt; fi
+        if [ -f requirements-reports.txt ]; then pip install -r requirements-reports.txt; fi
+        pip install setuptools wheel twine
+    - name: Build
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        TWINE_REPOSITORY: pypi
+      run: |
+        python setup.py sdist bdist_wheel
+    - name: Test build
+      # fails and cancels release if the built package fails to import
+      run: |
+        pip install dist/*.whl
+        python -c 'import dataprofiler; print(dataprofiler.__version__)'
+    - name: Publish
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        TWINE_REPOSITORY: pypi
+      run: |
+        twine upload dist/*
diff --git a/.github/workflows/publish-python-package.yml b/.github/workflows/publish-python-package.yml
deleted file mode 100644
index 4ed9e1bf3..000000000
--- a/.github/workflows/publish-python-package.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-
-# This workflow will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
-name: Publish Python Package
-
-on:
-  release:
-    types: [created]
-    branches:
-      - 'release/*'
-
-jobs:
-  deploy:
-
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-        if [ -f requirements-ml.txt ]; then pip install -r requirements-ml.txt; fi
-        if [ -f requirements-reports.txt ]; then pip install -r requirements-reports.txt; fi
-        pip install setuptools wheel twine
-    - name: Build and publish
-      env:
-        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
-        TWINE_REPOSITORY: pypi
-      run: |
-        python setup.py sdist bdist_wheel
-        twine upload dist/*
diff --git a/.github/workflows/test-python-package.yml b/.github/workflows/test-package.yml
similarity index 88%
rename from .github/workflows/test-python-package.yml
rename to .github/workflows/test-package.yml
index 3c88e7211..47416a938 100644
--- a/.github/workflows/test-python-package.yml
+++ b/.github/workflows/test-package.yml
@@ -7,8 +7,6 @@ on:
   pull_request:
     branches:
       - 'main'
-      - 'feature/**'
-      - 'dev'
 
 jobs:
   build:
@@ -16,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.9, "3.10", "3.11"]
+        python-version: ["3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v4
@@ -38,4 +36,4 @@ jobs:
         pre-commit run --all-files
     - name: Test with pytest
       run: |
-        DATAPROFILER_SEED=0 pytest --forked --cov=dataprofiler --cov-fail-under=80
+        DATAPROFILER_SEED=0 pytest --cov=dataprofiler --cov-fail-under=80
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 092cc5a48..07b1d865a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,6 +5,7 @@ repos:
     rev: 24.10.0
     hooks:
       - id: black
+        exclude: (versioneer.py|dataprofiler/_version.py|_docs/)
         types: [file, python]
         language_version: python3
   # Isort: sort import statements
@@ -15,6 +16,7 @@ repos:
     rev: 5.12.0
     hooks:
       - id: isort
+        exclude: _docs/
         language_version: python3
   # Flake8: complexity and style checking
   # https://flake8.pycqa.org/en/latest/user/using-hooks.html
@@ -23,7 +25,7 @@ repos:
     hooks:
       - id: flake8
         additional_dependencies: [flake8-docstrings]
-        exclude: (^docs/|^dataprofiler/tests/|^.*/__init__.py)
+        exclude: (^docs/|^dataprofiler/tests/|^.*/__init__.py|_docs/)
         language_version: python3
   # General fixers: format files for white spaces and trailing new lines, warn on debug statements
   # https://github.com/pre-commit/pre-commit-hooks#hooks-available
@@ -31,17 +33,17 @@ repos:
     rev: v4.0.1
     hooks:
       - id: trailing-whitespace
-        exclude: (^dataprofiler/tests/data/|^dataprofiler/tests/speed_tests/data/)
+        exclude: (^dataprofiler/tests/data/|^dataprofiler/tests/speed_tests/data/|_docs/)
       - id: debug-statements
       - id: end-of-file-fixer
-        exclude: (^dataprofiler/tests/data/)
+        exclude: (^dataprofiler/tests/data/|_docs/)
   # Mypy: Optional static type checking
   # https://github.com/pre-commit/mirrors-mypy
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.11.2
     hooks:
       - id: mypy
-        exclude: (^dataprofiler/tests/|^resources/|^examples|venv*/)
+        exclude: (^dataprofiler/tests/|^resources/|^examples|venv*/|versioneer.py|dataprofiler/_version.py|_docs/)
         language_version: python3
         additional_dependencies: # Keep up-to-date with the respective requirement files
           [
@@ -93,7 +95,6 @@ repos:
 
             # requirements-test.txt
             coverage>=5.0.1,
-            dask>=2.29.0,
             fsspec>=0.3.3,
             pytest>=6.0.1,
             pytest-cov>=2.8.1,
@@ -108,24 +109,23 @@ repos:
     rev: "0.48"
     hooks:
       - id: check-manifest
-        additional_dependencies:
-          [
-            'matplotlib', 'h5py', 'wheel', 'future', 'numpy<2.0.0', 'pandas',
-            'python-dateutil', 'pytz', 'pyarrow', 'chardet', 'fastavro',
-            'python-snappy', 'charset-normalizer', 'psutil', 'scipy', 'requests',
-            'networkx','typing-extensions', 'HLL', 'datasketches', 'boto3',
-          ]
+        additional_dependencies: ['h5py', 'wheel', 'future', 'numpy<2.0.0', 'pandas',
+        'python-dateutil', 'pytz', 'pyarrow', 'chardet', 'fastavro',
+        'python-snappy', 'charset-normalizer', 'psutil', 'scipy', 'requests',
+        'networkx','typing-extensions', 'HLL', 'datasketches', 'boto3']
   # Pyupgrade - standardize and modernize Python syntax for newer versions of the language
   - repo: https://github.com/asottile/pyupgrade
     rev: v3.3.0
     hooks:
       - id: pyupgrade
-        args: ["--py39-plus"]
+        args: ["--py310-plus"]
+        exclude: (versioneer.py|dataprofiler/_version.py| _docs/)
   # Autoflake - cleanup unused variables and imports
   - repo: https://github.com/PyCQA/autoflake
     rev: v2.0.0
     hooks:
       - id: autoflake
+        exclude: _docs/
         args:
           - "--in-place"
           - "--ignore-pass-statements"
diff --git a/.whitesource b/.whitesource
new file mode 100644
index 000000000..37dfa8e25
--- /dev/null
+++ b/.whitesource
@@ -0,0 +1,3 @@
+{
+    "settingsInheritedFrom": "capitalone/whitesource-config"
+}
diff --git a/MANIFEST.in b/MANIFEST.in
index 9a62e405e..3f426b7bb 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -17,4 +17,17 @@ recursive-include resources *.json
 recursive-include resources *.pb
 recursive-include resources *.py
 
-recursive-include dataprofiler/labelers/embeddings/*.txt
+recursive-include dataprofiler/labelers/embeddings *.txt
+include versioneer.py
+include dataprofiler/_version.py
+include .whitesource
+
+recursive-exclude _docs *.html
+recursive-exclude _docs *.cfg
+exclude _docs/LICENSE
+recursive-exclude _docs *.md
+recursive-exclude _docs *.nojekyll
+recursive-exclude _docs *.png
+recursive-exclude _docs *.py
+recursive-exclude _docs *.rst
+recursive-exclude _docs Makefile
diff --git a/_docs/README.md b/_docs/README.md
new file mode 100644
index 000000000..0f925ac58
--- /dev/null
+++ b/_docs/README.md
@@ -0,0 +1,59 @@
+Visit our [documentation page.](https://capitalone.github.io/DataProfiler)
+
+### How to properly write documentation:
+
+#### Packages
+In any package directory, overall package comments can be made in the
+\_\_init\_\_.py of the directory. At the top of the \_\_init\_\_.py,
+include your comments in between triple quotations.
+
+#### Classes
+In any class file, include overall class comments at the top of the file
+in between triple quotes and/or in the init function.
+
+#### Functions
+reStructuredText Docstring Format is the standard. Here is an example:
+
+    def format_data(self, predictions, verbose=False):
+        """
+        Formats word level labeling of the Unstructured Data Labeler as you want
+
+        :param predictions: A 2D list of word level predictions/labeling
+        :type predictions: Dict
+        :param verbose: A flag to determine verbosity
+        :type verbose: Bool
+        :return: JSON structure containing specified formatted output
+        :rtype: JSON
+
+        :Example:
+            Look at this test. Don't forget the double colons to make a code block::
+                This is a codeblock
+                Type example code here
+        """
+
+### How to update the documentation:
+
+
+1. Set up your local environment
+```bash
+# install sphinx requirements
+# install the requirements from the feature branch
+pip install pandoc &&
+pip install -r requirements.txt &&
+pip install -r requirements-ml.txt && 
+pip install -r requirements-reports.txt && 
+pip install -r requirements-docs.txt  &&
+pip install -e . 
+
+```
+2. And finally, from the root of `DataProfiler`, run the following commands to generate the sphinx documentation:
+```bash
+cd _docs/docs
+python update_documentation.py
+
+```
+
+3. View new docs
+```bash
+open index.html
+```
diff --git a/_docs/docs/Makefile b/_docs/docs/Makefile
new file mode 100644
index 000000000..81ca02cf5
--- /dev/null
+++ b/_docs/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = buildcode
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/_docs/docs/make.bat b/_docs/docs/make.bat
new file mode 100644
index 000000000..6247f7e23
--- /dev/null
+++ b/_docs/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/_docs/docs/source/API.rst b/_docs/docs/source/API.rst
new file mode 100644
index 000000000..fdbf2242b
--- /dev/null
+++ b/_docs/docs/source/API.rst
@@ -0,0 +1,16 @@
+.. _API:
+
+API
+***
+
+The API is split into 4 main components: Profilers, Labelers, Data Readers, and
+Validators.
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Contents:
+   
+   dataprofiler.data_readers
+   dataprofiler.profilers
+   dataprofiler.labelers
+   dataprofiler.validators
\ No newline at end of file
diff --git a/_docs/docs/source/DL-Flowchart.png b/_docs/docs/source/DL-Flowchart.png
new file mode 100644
index 000000000..696eeb5dc
Binary files /dev/null and b/_docs/docs/source/DL-Flowchart.png differ
diff --git a/_docs/docs/source/_static/custom.css b/_docs/docs/source/_static/custom.css
new file mode 100644
index 000000000..8a7c7cb54
--- /dev/null
+++ b/_docs/docs/source/_static/custom.css
@@ -0,0 +1,50 @@
+/*
+    the ipython3 code blocks coming from the notebooks
+    were not getting the dark theme styles applied, so
+    manually overriding them
+*/
+@media (prefers-color-scheme: dark) {
+    .highlight-ipython3 {
+        border: none !important;
+        border-radius: 2px !important;
+        background: #202020 !important;
+        color: #d0d0d0 !important;
+    }
+}
+
+@media (prefers-color-scheme: dark) {
+    tr:nth-child(odd) {
+	background-color: #202020 !important;
+    }
+}
+
+@media (prefers-color-scheme: dark) {
+    .dataframe {
+	color: white !important;
+    }
+}
+
+.hidden {
+    display: none;
+}
+
+.version {
+    text-align: right;
+    font-size: 24px;
+    margin-top: -47px;
+    margin-right: 3px;
+}
+
+.sidebar-brand {
+    margin-bottom: -10px;
+    margin-top: 10px;
+}
+
+/* unknown warning was showing, manually hiding */
+#Visualizing-Logged-Dataframes .admonition.warning {
+    display: none;
+}
+
+div.output_area.stderr {
+    display: none;
+}
diff --git a/_docs/docs/source/_static/images/DataProfilerDarkLogoLong.png b/_docs/docs/source/_static/images/DataProfilerDarkLogoLong.png
new file mode 100644
index 000000000..a339e0f6a
Binary files /dev/null and b/_docs/docs/source/_static/images/DataProfilerDarkLogoLong.png differ
diff --git a/_docs/docs/source/_static/images/DataProfilerLogoLightTheme.png b/_docs/docs/source/_static/images/DataProfilerLogoLightTheme.png
new file mode 100644
index 000000000..35e59c349
Binary files /dev/null and b/_docs/docs/source/_static/images/DataProfilerLogoLightTheme.png differ
diff --git a/_docs/docs/source/_static/images/DataProfilerLogoLightThemeLong.png b/_docs/docs/source/_static/images/DataProfilerLogoLightThemeLong.png
new file mode 100644
index 000000000..ca86fe167
Binary files /dev/null and b/_docs/docs/source/_static/images/DataProfilerLogoLightThemeLong.png differ
diff --git a/_docs/docs/source/_static/images/branching_workflow_diagram.png b/_docs/docs/source/_static/images/branching_workflow_diagram.png
new file mode 100644
index 000000000..60a9515d0
Binary files /dev/null and b/_docs/docs/source/_static/images/branching_workflow_diagram.png differ
diff --git a/_docs/docs/source/_static/images/histogram_example_0.png b/_docs/docs/source/_static/images/histogram_example_0.png
new file mode 100644
index 000000000..9b8301363
Binary files /dev/null and b/_docs/docs/source/_static/images/histogram_example_0.png differ
diff --git a/_docs/docs/source/_static/images/histogram_example_1.png b/_docs/docs/source/_static/images/histogram_example_1.png
new file mode 100644
index 000000000..062dfdbb9
Binary files /dev/null and b/_docs/docs/source/_static/images/histogram_example_1.png differ
diff --git a/_docs/docs/source/_static/images/histogram_example_2.png b/_docs/docs/source/_static/images/histogram_example_2.png
new file mode 100644
index 000000000..1aedf7549
Binary files /dev/null and b/_docs/docs/source/_static/images/histogram_example_2.png differ
diff --git a/_docs/docs/source/_static/images/missing_value_barchart_example_0.png b/_docs/docs/source/_static/images/missing_value_barchart_example_0.png
new file mode 100644
index 000000000..33cb7afd2
Binary files /dev/null and b/_docs/docs/source/_static/images/missing_value_barchart_example_0.png differ
diff --git a/_docs/docs/source/_static/images/missing_value_matrix_example_0.png b/_docs/docs/source/_static/images/missing_value_matrix_example_0.png
new file mode 100644
index 000000000..21799cddf
Binary files /dev/null and b/_docs/docs/source/_static/images/missing_value_matrix_example_0.png differ
diff --git a/_docs/docs/source/add_new_model_to_data_labeler.nblink b/_docs/docs/source/add_new_model_to_data_labeler.nblink
new file mode 100644
index 000000000..4c5fe646a
--- /dev/null
+++ b/_docs/docs/source/add_new_model_to_data_labeler.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/add_new_model_to_data_labeler.ipynb"
+}
\ No newline at end of file
diff --git a/_docs/docs/source/architecture.rst b/_docs/docs/source/architecture.rst
new file mode 100644
index 000000000..469308993
--- /dev/null
+++ b/_docs/docs/source/architecture.rst
@@ -0,0 +1,48 @@
+.. _architecture:
+
+Architecture & Design Overview
+******************************
+
+This section describes the design rationale, algorithmic choices, assumptions, testing strategy, and contribution process used in the DataProfiler library.
+
+Overview
+--------
+
+DataProfiler computes numeric statistics (e.g., mean, variance, skewness, kurtosis) using **streaming algorithms** that allow efficient, incremental updates without recomputing from raw data. Approximate quantile metrics like the median are calculated using histogram-based estimation, making the system scalable for large or streaming datasets.
+
+Additionally, DataProfiler uses a **Convolutional Neural Network (CNN)** to detect and label entities (e.g., names, emails, credit cards) in unstructured text. This supports critical tasks such as **PII detection**, **schema inference**, and **data quality analysis** across structured and unstructured data.
+
+Algorithm Rationale
+-------------------
+
+The algorithms used are designed for **speed, scalability, and flexibility**:
+
+- **Streaming numeric methods** (e.g., Welford's algorithm, moment-based metrics, histogram binning) efficiently summarize data without full recomputation.
+- **CNNs for entity detection** are fast, high-throughput, and well-suited for sequence labeling tasks in production environments.
+
+These choices align with the tool's goal of delivering fast, accurate data profiling with minimal configuration.
+
+Assumptions & Limitations
+-------------------------
+
+- **Consistent formatting** of sensitive entities is assumed (e.g., standardized credit card or SSN formats).
+- **Overlapping entity types** (e.g., phone vs. SSN) may lead to misclassification without context.
+- **Synthetic training data** may not fully capture real-world diversity, reducing model accuracy on natural or unstructured text.
+- **Quantile estimation** (e.g., median) is approximate and based on binning rather than exact sorting.
+
+Testing & Validation
+--------------------
+
+- Comprehensive **unit testing** is performed across Python 3.9, 3.10, and 3.11.
+- Tests are executed on every pull request targeting `dev` or `main` branches.
+- All pull requests require **two code reviewer approvals** before merging.
+- Testing includes correctness, performance, and compatibility checks to ensure production readiness.
+
+Versioning & Contributions
+--------------------------
+
+- Versioning and development are managed via **GitHub**.
+- Future changes must follow the guidelines in `CONTRIBUTING.md`, including:
+  - Forking the repo and branching from `dev` or an active feature branch.
+  - Ensuring **80%+ unit test coverage** for all new functionality.
+  - Opening a PR and securing **two approvals** prior to merging.
diff --git a/_docs/docs/source/column_name_labeler_example.nblink b/_docs/docs/source/column_name_labeler_example.nblink
new file mode 100644
index 000000000..c39e674fb
--- /dev/null
+++ b/_docs/docs/source/column_name_labeler_example.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/column_name_labeler.ipynb"
+}
\ No newline at end of file
diff --git a/_docs/docs/source/conf.py b/_docs/docs/source/conf.py
new file mode 100644
index 000000000..80168effd
--- /dev/null
+++ b/_docs/docs/source/conf.py
@@ -0,0 +1,84 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+import re
+
+sys.path.insert(0, os.path.abspath(f'../../../'))
+
+# -- Project information -----------------------------------------------------
+
+project = 'Data Profiler'
+copyright = '2024, Jeremy Goodsitt, Austin Walters, Anh Truong, Grant Eden, and Chris Wallace'
+author = 'Jeremy Goodsitt, Austin Walters, Anh Truong, Grant Eden, and Chris Wallace'
+
+# The full version, including alpha/beta/rc tags
+# release = '21.01.20'
+from dataprofiler import __version__ as version  # noqa F401
+
+
+version_clip = re.search(r'\s*([\d.]+)', version).group(1)
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.intersphinx',
+    'nbsphinx',
+    'nbsphinx_link',
+]
+
+# Don't execute the notebook cells when generating the documentation
+# This can be configured on a per notebook basis as well
+# See: https://nbsphinx.readthedocs.io/en/0.2.15/never-execute.html#Explicitly-Dis-/
+nbsphinx_execute = "never"
+nbsphinx_prolog = """
+`View this notebook on GitHub <https://github.com/capitalone/rubicon/tree/main/notebooks/{{ env.doc2path(env.docname, base=None) }}>`_
+"""
+
+autoclass_content = 'both'
+autodoc_default_options = {
+    'members': True,
+    'member-order': 'bysource',
+    'undoc-members': True,
+    'exclude-members': '__weakref__',
+    'inherited-members': True,
+}
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build']
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "furo"
+html_title = f"<div class='hidden'>Data Profiler</div> <div class='version'> v{version_clip}</div>"
+html_static_path = ["_static"]
+html_css_files = ["custom.css"]
+html_favicon = "_static/images/DataProfilerLogoLightTheme.png"
+html_theme_options = {
+    "light_logo": "images/DataProfilerLogoLightThemeLong.png",
+    "dark_logo": "images/DataProfilerDarkLogoLong.png",
+}
diff --git a/_docs/docs/source/data_labeling.rst b/_docs/docs/source/data_labeling.rst
new file mode 100644
index 000000000..db76fe791
--- /dev/null
+++ b/_docs/docs/source/data_labeling.rst
@@ -0,0 +1,365 @@
+.. _data_labeling:
+
+Labeler (Sensitive Data)
+************************
+
+In this library, the term *data labeling* refers to entity recognition.
+
+Builtin to the data profiler is a classifier which evaluates the complex data types of the dataset.
+For structured data, it determines the complex data type of each column. When
+running the data profile, it uses the default data labeling model builtin to the
+library. However, the data labeler allows users to train their own data labeler
+as well.
+
+*Data Labels* are determined per cell for structured data (column/row when 
+the *profiler* is used) or at the character level for unstructured data. This
+is a list of the default labels.
+
+* UNKNOWN
+* ADDRESS
+* BAN (bank account number, 10-18 digits)
+* CREDIT_CARD
+* EMAIL_ADDRESS
+* UUID 
+* HASH_OR_KEY (md5, sha1, sha256, random hash, etc.)
+* IPV4
+* IPV6
+* MAC_ADDRESS
+* PERSON
+* PHONE_NUMBER
+* SSN
+* URL
+* US_STATE
+* DRIVERS_LICENSE
+* DATE
+* TIME
+* DATETIME
+* INTEGER
+* FLOAT
+* QUANTITY
+* ORDINAL
+
+
+Identify Entities in Structured Data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Makes predictions and identifying labels:
+
+.. code-block:: python
+
+    import dataprofiler as dp
+
+    # load data and data labeler
+    data = dp.Data("your_data.csv")
+    data_labeler = dp.DataLabeler(labeler_type='structured')
+
+    # make predictions and get labels per cell
+    predictions = data_labeler.predict(data)
+
+Identify Entities in Unstructured Data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Predict which class characters belong to in unstructured text:
+
+.. code-block:: python
+
+    import dataprofiler as dp
+
+    data_labeler = dp.DataLabeler(labeler_type='unstructured')
+
+    # Example sample string, must be in an array (multiple arrays can be passed)
+    sample = ["Help\tJohn Macklemore\tneeds\tfood.\tPlease\tCall\t555-301-1234."
+              "\tHis\tssn\tis\tnot\t334-97-1234. I'm a BAN: 000043219499392912.\n"]
+
+    # Prediction what class each character belongs to
+    model_predictions = data_labeler.predict(
+        sample, predict_options=dict(show_confidences=True))
+
+    # Predictions / confidences are at the character level
+    final_results = model_predictions["pred"]
+    final_confidences = model_predictions["conf"]
+
+It's also possible to change output formats, output similar to a **SpaCy** format:
+
+.. code-block:: python
+
+    import dataprofiler as dp
+
+    data_labeler = dp.DataLabeler(labeler_type='unstructured', trainable=True)
+
+    # Example sample string, must be in an array (multiple arrays can be passed)
+    sample = ["Help\tJohn Macklemore\tneeds\tfood.\tPlease\tCall\t555-301-1234."
+              "\tHis\tssn\tis\tnot\t334-97-1234. I'm a BAN: 000043219499392912.\n"]
+
+    # Set the output to the NER format (start position, end position, label)
+    data_labeler.set_params(
+        { 'postprocessor': { 'output_format':'ner', 'use_word_level_argmax':True } } 
+    )
+
+    results = data_labeler.predict(sample)
+
+    print(results)
+
+Train a New Data Labeler
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Mechanism for training your own data labeler on their own set of structured data
+(tabular):
+
+.. code-block:: python
+    
+    import dataprofiler as dp
+
+    # Will need one column with a default label of UNKNOWN
+    data = dp.Data("your_file.csv")
+
+    data_labeler = dp.train_structured_labeler(
+        data=data,
+        save_dirpath="/path/to/save/labeler",
+        epochs=2
+    )
+
+    data_labeler.save_to_disk("my/save/path") # Saves the data labeler for reuse
+
+Load an Existing Data Labeler
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Mechanism for loading an existing data_labeler:
+
+.. code-block:: python
+
+    import dataprofiler as dp
+
+    data_labeler = dp.DataLabeler(
+        labeler_type='structured', dirpath="/path/to/my/labeler")
+
+    # get information about the parameters/inputs/output formats for the DataLabeler
+    data_labeler.help()
+
+Extending a Data Labeler with Transfer Learning
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Extending or changing labels of a data labeler w/ transfer learning:
+Note: By default, **a labeler loaded will not be trainable**. In order to load a 
+trainable DataLabeler, the user must set `trainable=True` or load a labeler 
+using the `TrainableDataLabeler` class.
+
+The following illustrates how to change the labels:
+
+.. code-block:: python
+
+    import dataprofiler as dp
+
+    labels = ['label1', 'label2', ...]  # new label set can also be an encoding dict
+    data = dp.Data("your_file.csv")  # contains data with new labels
+
+    # load default structured Data Labeler w/ trainable set to True
+    data_labeler = dp.DataLabeler(labeler_type='structured', trainable=True)
+
+    # this will use transfer learning to retrain the data labeler on your new 
+    # dataset and labels.
+    # NOTE: data must be in an acceptable format for the preprocessor to interpret.
+    #       please refer to the preprocessor/model for the expected data format.
+    #       Currently, the DataLabeler cannot take in Tabular data, but requires 
+    #       data to be ingested with two columns [X, y] where X is the samples and 
+    #       y is the labels.
+    model_results = data_labeler.fit(x=data['samples'], y=data['labels'], 
+                                     validation_split=0.2, epochs=2, labels=labels)
+
+    # final_results, final_confidences are a list of results for each epoch
+    epoch_id = 0
+    final_results = model_results[epoch_id]["pred"]
+    final_confidences = model_results[epoch_id]["conf"]
+
+The following illustrates how to extend the labels:
+
+.. code-block:: python
+
+    import dataprofiler as dp
+
+    new_labels = ['label1', 'label2', ...]
+    data = dp.Data("your_file.csv")  # contains data with new labels
+
+    # load default structured Data Labeler w/ trainable set to True
+    data_labeler = dp.DataLabeler(labeler_type='structured', trainable=True)
+
+    # this will maintain current labels and model weights, but extend the model's 
+    # labels
+    for label in new_labels:
+        data_labeler.add_label(label)
+    
+    # NOTE: a user can also add a label which maps to the same index as an existing 
+    # label
+    # data_labeler.add_label(label, same_as='<label_name>')
+
+    # For a trainable model, the user must then train the model to be able to 
+    # continue using the labeler since the model's graph has likely changed
+    # NOTE: data must be in an acceptable format for the preprocessor to interpret.
+    #       please refer to the preprocessor/model for the expected data format.
+    #       Currently, the DataLabeler cannot take in Tabular data, but requires 
+    #       data to be ingested with two columns [X, y] where X is the samples and 
+    #       y is the labels.
+    model_results = data_labeler.fit(x=data['samples'], y=data['labels'], 
+                                     validation_split=0.2, epochs=2)
+
+    # final_results, final_confidences are a list of results for each epoch
+    epoch_id = 0
+    final_results = model_results[epoch_id]["pred"]
+    final_confidences = model_results[epoch_id]["conf"]
+
+
+Changing pipeline parameters:
+
+.. code-block:: python
+
+    import dataprofiler as dp
+
+    # load default Data Labeler
+    data_labeler = dp.DataLabeler(labeler_type='structured')
+
+    # change parameters of specific component
+    data_labeler.preprocessor.set_params({'param1': 'value1'})
+
+    # change multiple simultaneously.
+    data_labeler.set_params({
+        'preprocessor':  {'param1': 'value1'},
+        'model':         {'param2': 'value2'},
+        'postprocessor': {'param3': 'value3'}
+    })
+
+
+Build Your Own Data Labeler
+===========================
+
+The DataLabeler has 3 main components: preprocessor, model, and postprocessor. 
+To create your own DataLabeler, each one would have to be created or an 
+existing component can be reused.
+
+Given a set of the 3 components, you can construct your own DataLabeler:
+
+.. code-block:: python
+    from dataprofiler.labelers.base_data_labeler import BaseDataLabeler, \
+                                                        TrainableDataLabeler
+    from dataprofiler.labelers.character_level_cnn_model import CharacterLevelCnnModel
+    from dataprofiler.labelers.data_processing import \
+         StructCharPreprocessor, StructCharPostprocessor
+
+    # load a non-trainable data labeler
+    model = CharacterLevelCnnModel(...)
+    preprocessor = StructCharPreprocessor(...)
+    postprocessor = StructCharPostprocessor(...)
+
+    data_labeler = BaseDataLabeler.load_with_components(
+        preprocessor=preprocessor, model=model, postprocessor=postprocessor)
+
+    # check for basic compatibility between the processors and the model
+    data_labeler.check_pipeline()
+
+
+    # load trainable data labeler
+    data_labeler = TrainableDataLabeler.load_with_components(
+        preprocessor=preprocessor, model=model, postprocessor=postprocessor)
+
+    # check for basic compatibility between the processors and the model
+    data_labeler.check_pipeline()
+
+Option for swapping out specific components of an existing labeler.
+
+.. code-block:: python
+
+    import dataprofiler as dp
+    from dataprofiler.labelers.character_level_cnn_model import \
+        CharacterLevelCnnModel
+    from dataprofiler.labelers.data_processing import \
+        StructCharPreprocessor, StructCharPostprocessor
+
+    model = CharacterLevelCnnModel(...)
+    preprocessor = StructCharPreprocessor(...)
+    postprocessor = StructCharPostprocessor(...)
+    
+    data_labeler = dp.DataLabeler(labeler_type='structured')
+    data_labeler.set_preprocessor(preprocessor)
+    data_labeler.set_model(model)
+    data_labeler.set_postprocessor(postprocessor)
+    
+    # check for basic compatibility between the processors and the model
+    data_labeler.check_pipeline()
+
+
+Model Component
+~~~~~~~~~~~~~~~
+
+In order to create your own model component for data labeling, you can utilize 
+the `BaseModel` class from `dataprofiler.labelers.base_model` and
+overriding the abstract class methods.
+
+Reviewing `CharacterLevelCnnModel` from 
+`dataprofiler.labelers.character_level_cnn_model` illustrates the functions 
+which need an override. 
+
+#. `__init__`: specifying default parameters and calling base `__init__`
+#. `_validate_parameters`: validating parameters given by user during setting
+#. `_need_to_reconstruct_model`: flag for when to reconstruct a model (i.e. 
+   parameters change or labels change require a model reconstruction)
+#. `_construct_model`: initial construction of the model given the parameters
+#. `_reconstruct_model`: updates model architecture for new label set while 
+   maintaining current model weights
+#. `fit`: mechanism for the model to learn given training data
+#. `predict`: mechanism for model to make predictions on data
+#. `details`: prints a summary of the model construction
+#. `save_to_disk`: saves model and model parameters to disk
+#. `load_from_disk`: loads model given a path on disk
+  
+  
+Preprocessor Component
+~~~~~~~~~~~~~~~~~~~~~~
+
+In order to create your own preprocessor component for data labeling, you can 
+utilize the `BaseDataPreprocessor` class 
+from `dataprofiler.labelers.data_processing` and override the abstract class 
+methods.
+
+Reviewing `StructCharPreprocessor` from 
+`dataprofiler.labelers.data_processing` illustrates the functions which 
+need an override.
+
+#. `__init__`: passing parameters to the base class and executing any 
+   extraneous calculations to be saved as parameters
+#. `_validate_parameters`: validating parameters given by user during
+   setting
+#. `process`: takes in the user data and converts it into an digestible, 
+   iterable format for the model
+#. `set_params` (optional): if a parameter requires processing before setting,
+   a user can override this function to assist with setting the parameter
+#. `_save_processor` (optional): if a parameter is not JSON serializable, a 
+   user can override this function to assist in saving the processor and its 
+   parameters
+#. `load_from_disk` (optional): if a parameter(s) is not JSON serializable, a 
+   user can override this function to assist in loading the processor 
+
+Postprocessor Component
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The postprocessor is nearly identical to the preprocessor except it handles 
+the output of the model for processing. In order to create your own 
+postprocessor component for data  labeling, you can utilize the 
+`BaseDataPostprocessor` class from  `dataprofiler.labelers.data_processing` 
+and override the abstract class methods.
+
+Reviewing `StructCharPostprocessor` from 
+`dataprofiler.labelers.data_processing` illustrates the functions which 
+need an override.
+
+#. `__init__`: passing parameters to the base class and executing any 
+   extraneous calculations to be saved as parameters
+#. `_validate_parameters`: validating parameters given by user during
+   setting
+#. `process`: takes in the output of the model and processes for output to 
+   the user
+#. `set_params` (optional): if a parameter requires processing before setting,
+   a user can override this function to assist with setting the parameter 
+#. `_save_processor` (optional): if a parameter is not JSON serializable, a 
+   user can override this function to assist in saving the processor and its 
+   parameters
+#. `load_from_disk` (optional): if a parameter(s) is not JSON serializable, a 
+   user can override this function to assist in loading the processor 
diff --git a/_docs/docs/source/data_reader.nblink b/_docs/docs/source/data_reader.nblink
new file mode 100644
index 000000000..8d7215f46
--- /dev/null
+++ b/_docs/docs/source/data_reader.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/data_readers.ipynb"
+}
\ No newline at end of file
diff --git a/_docs/docs/source/data_readers.rst b/_docs/docs/source/data_readers.rst
new file mode 100644
index 000000000..877ea56dd
--- /dev/null
+++ b/_docs/docs/source/data_readers.rst
@@ -0,0 +1,184 @@
+.. _data_readers:
+
+Data Readers
+************
+
+The `Data` class itself will identify then output one of the following `Data` class types. 
+Using the data reader is easy, just pass it through the Data object. 
+
+.. code-block:: python
+
+    import dataprofiler as dp
+    data = dp.Data("your_file.csv")
+
+The supported file types are: 
+
+* CSV file (or any delimited file)
+* JSON object
+* Avro file
+* Parquet file
+* Graph data file
+* Text file
+* Pandas DataFrame
+* A URL that points to one of the supported file types above
+
+It's also possible to specifically call one of the data classes such as the following command:
+
+.. code-block:: python
+
+    from dataprofiler.data_readers.csv_data import CSVData
+    data = CSVData("your_file.csv", options={"delimiter": ","})
+
+Additionally any of the data classes can be loaded using a URL:
+
+.. code-block:: python
+
+    import dataprofiler as dp
+    data = dp.Data("https://you_website.com/your_file.file", options={"verify_ssl": "True"})
+
+Below are descriptions of the various `Data` classes and the available options.
+
+CSVData
+=======
+
+Data class for loading datasets of type CSV. Can be specified by passing
+in memory data or via a file path. Options pertaining the CSV may also
+be specified using the options dict parameter.
+
+`CSVData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* delimiter - Must be a string, for example `"delimiter": ","`
+* data_format - Must be a string, possible choices: "dataframe", "records"
+* selected_columns - Columns being selected from the entire dataset, must be a 
+  list `["column 1", "ssn"]`
+* sample_nrows - Reservoir sampling to sample `"n"` rows out of a total of `"M"` rows.
+  Specified for how many rows to sample, default None.
+* header - Define the header, for example
+
+  * `"header": 'auto'` for auto detection
+  * `"header": None` for no header
+  * `"header": <INT>` to specify the header row (0 based index)
+
+JSONData
+========
+
+Data class for loading datasets of type JSON. Can be specified by
+passing in memory data or via a file path. Options pertaining the JSON
+may also be specified using the options dict parameter. JSON data can be 
+accessed via the "data" property, the "metadata" property, and the 
+"data_and_metadata" property.
+
+`JSONData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* data_format - must be a string, choices: "dataframe", "records", "json", "flattened_dataframe"
+  
+  * "flattened_dataframe" is best used for JSON structure typically found in data streams that contain
+    nested lists of dictionaries and a payload. For example: `{"data": [ columns ], "response": 200}`
+* selected_keys - columns being selected from the entire dataset, must be a list `["column 1", "ssn"]`
+* payload_keys - The dictionary keys for the payload of the JSON, typically called "data"
+  or "payload". Defaults to ["data", "payload", "response"].
+
+
+AVROData
+========
+
+Data class for loading datasets of type AVRO. Can be specified by
+passing in memory data or via a file path. Options pertaining the AVRO
+may also be specified using the options dict parameter.
+
+`AVROData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* data_format - must be a string, choices: "dataframe", "records", "avro", "json", "flattened_dataframe"
+
+  * "flattened_dataframe" is best used for AVROs with a JSON structure typically found in data streams that contain
+    nested lists of dictionaries and a payload. For example: `{"data": [ columns ], "response": 200}`
+* selected_keys - columns being selected from the entire dataset, must be a list `["column 1", "ssn"]`
+
+ParquetData
+===========
+
+Data class for loading datasets of type PARQUET. Can be specified by
+passing in memory data or via a file path. Options pertaining the
+PARQUET may also be specified using the options dict parameter.
+
+`ParquetData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* data_format - must be a string, choices: "dataframe", "records", "json"
+* selected_keys - columns being selected from the entire dataset, must be a list `["column 1", "ssn"]`
+* sample_nrows - Random sampling to sample `"n"` rows out of a total of `"M"` rows.
+  Specified for how many rows to sample, default None.
+
+GraphData
+=========
+
+Data Class for loading datasets of graph data. Currently takes CSV format,
+further type formats will be supported. Can be specified by passing
+in memory data (NetworkX Graph) or via a file path. Options pertaining the CSV file may also
+be specified using the options dict parameter. Loads data from CSV into memory
+as a NetworkX Graph.
+
+`GraphData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* delimiter - must be a string, for example `"delimiter": ","`
+* data_format - must be a string, possible choices: "graph", "dataframe", "records"
+* header - Define the header, for example
+
+  * `"header": 'auto'` for auto detection
+  * `"header": None` for no header
+  * `"header": <INT>` to specify the header row (0 based index)
+
+TextData
+========
+
+Data class for loading datasets of type TEXT. Can be specified by
+passing in memory data or via a file path. Options pertaining the TEXT
+may also be specified using the options dict parameter.
+
+`TextData(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* data_format: user selected format in which to return data. Currently only supports "text".
+* samples_per_line - chunks by which to read in the specified dataset
+
+
+Data Using a URL
+================
+
+Data class for loading datasets of any type using a URL. Specified by passing in 
+any valid URL that points to one of the valid data types. Options pertaining the 
+URL may also be specified using the options dict parameter.
+
+`Data(input_file_path=None, data=None, options=None)`
+
+Possible `options`:
+
+* verify_ssl: must be a boolean string, choices: "True", "False". Set to "True" by default.
+
+Data Using an AWS S3 URI
+========================
+
+Data class for loading datasets from AWS S3 URI. Specified by passing in 
+any valid bucket path that points to one of the valid data types.
+
+`Data('s3a://my-bucket/file_name.txt')`
+
+Possible `options`:
+
+* `storage_options`: must be a dictionary where the keys for boto3 initialization are set
+  If `storage_options` is provided in `options`, the below variables are retrieved from the dictionary provided. Otherwise, will retrieve from `environment variables <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables>`_.
+  
+  * `AWS_ACCESS_KEY_ID`
+  * `AWS_SECRET_ACCESS_KEY`
+  * `AWS_SESSION_TOKEN`
+  * `AWS_REGION` (default `us-east-1`)
diff --git a/_docs/docs/source/examples.rst b/_docs/docs/source/examples.rst
new file mode 100644
index 000000000..3637da6ac
--- /dev/null
+++ b/_docs/docs/source/examples.rst
@@ -0,0 +1,24 @@
+.. _examples:
+
+Examples
+********
+
+These examples provide a more in-depth look into the details of the ``Data Profiler`` library.
+
+Basics
+------
+
+.. toctree::
+   :maxdepth: 0
+
+   Overview of Data Profiler<overview>
+   Data Reader<data_reader>
+   Structured Profiler<profiler_example>
+   Unstructured Profiler<unstructured_profiler_example>
+   Graph Profiler<graph_data_demo>
+   Labeler<labeler>
+   Adding Models to a Labeler Pipeline<add_new_model_to_data_labeler>
+   Creating a Regex Labeler<regex_labeler_from_scratch>
+   Creating a ColumnName Labeler<column_name_labeler_example>
+   Merge Profile List<merge_profile_list>
+   Dataloader with Popmon Reports<popmon_dp_loader_example>
diff --git a/_docs/docs/source/graph_data_demo.nblink b/_docs/docs/source/graph_data_demo.nblink
new file mode 100644
index 000000000..40408c3ae
--- /dev/null
+++ b/_docs/docs/source/graph_data_demo.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/graph_data_demo.ipynb"
+}
diff --git a/_docs/docs/source/graphs.rst b/_docs/docs/source/graphs.rst
new file mode 100644
index 000000000..23c2d316b
--- /dev/null
+++ b/_docs/docs/source/graphs.rst
@@ -0,0 +1,196 @@
+.. _reports:
+
+Graphs
+******
+
+Graph Your Data
+===============
+
+We can plot some of our data as seaborn histogram plots. Below will demonstrate how to do so and provide examples.
+
+The following plots are currently available to work directly with your profilers:
+
+ * histogram (numeric columns only)
+ * missing values matrix
+
+Below shows how to do so with examples.
+
+What we need to import
+~~~~~~~~~~~~~~~~~~~~~~
+.. code-block:: python
+
+    from dataprofiler.reports import graphs
+
+The main functions that is used to plot histograms are in graphs. **You will also need the `dataprofiler[reports]` requirement to be installed**:
+
+.. code-block:: console
+
+    pip install 'dataprofiler[reports]'
+
+Plotting from a StructuredProfiler class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+With a StructuredProfiler class variable, we can specify what columns we want to be plotted, and plot them into histograms.
+
+.. code-block:: python
+
+    graphs.plot_histograms(profiler, column_names, column_inds)
+
+These are what the variables mean:
+
+    * **profiler** - StructuredProfiler class variable that contains the data we want
+    * **columns** - (Optional) The list of IntColumn or FloatColumn *names* we want to specifically plot. If specified, `column_inds` cannot be specified.
+    * **column_inds** - (Optional) The list of IntColumn or FloatColumn *indexes* we want to specifically plot. If specified, `column_names` cannot be specified.
+
+
+Additionally, we can also plot the missing values matrix for a StructuredProfiler:
+
+.. code-block:: python
+
+    graphs.plot_missing_values_matrix(profiler, ax, title)
+
+These are what the variables mean:
+
+    * **profiler** - StructuredProfiler class variable that contains the data we want
+    * **ax** - (Optional) MatPlotLib Axes to plot the matrix within.
+    * **title** - (Optional) The title of the axes we want to define.
+
+
+Plotting an individual IntColumn or FloatColumn
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+With a column's Int or Float profile, we can plot their respective histograms.
+
+.. code-block:: python
+
+    graphs.plot_col_histogram(column, axes, title)
+
+These are what the variables mean:
+
+    * **column** - The IntColumn or FloatColumn we want to plot
+    * **axes** - (Optional) The MatPlotLib Axes to plot the histogram within.
+    * **title** - (Optional) The title of the axes we want to define.
+
+
+Additionally, we can also plot the missing values bargraph for any column profile:
+
+.. code-block:: python
+
+    graphs.plot_col_missing_values(profiler, ax, title)
+
+These are what the variables mean:
+
+    * **profiler** - The StructuredColProfiler we want to plot
+    * **ax** - (Optional) MatPlotLib Axes to plot the matrix within.
+    * **title** - (Optional) The title of the axes we want to define.
+
+Examples
+~~~~~~~~
+
+Histograms
+----------
+
+1. This example demonstrates how we can take a StructuredProfiler class and plot histograms of the specified columns.
+
+.. code-block:: python
+
+    import dataprofiler as dp
+    from dataprofiler.reports import graphs
+    
+
+    data = [[1, 'a', 1.0],
+            [2, 'b', 2.2],
+            [3, 'c', 3.5],
+            [None, 'd', 10.0]]
+    profiler = dp.StructuredProfiler(data)
+
+    # This will plot all IntColumn and FloatColumn as histograms (The first and last column).
+    fig = graphs.plot_histograms(profiler)
+    fig.show()
+
+    # This will only plot the specified column, 0.
+    columns_names = [0]
+    fig = graphs.plot_histograms(profiler, columns_names)
+    fig.show()
+
+.. image:: _static/images/histogram_example_0.png
+    :alt: First Histogram Example Image
+
+.. image:: _static/images/histogram_example_1.png
+    :alt: Second Histogram Example Image
+
+2. This example demonstrates how we can plot a low level profiler.
+
+.. code-block:: python
+
+    import pandas as pd
+    
+    from dataprofiler.profilers import IntColumn
+    from dataprofiler.reports import graphs
+    
+
+    data = pd.Series([1, 2, 3], dtype=str)
+    profiler = IntColumn('example')
+    profiler.update(data)
+
+    # Plot the axes
+    ax = graphs.plot_col_histogram(profiler)
+    
+    # get and show the figure of the plotted histogram
+    fig = ax.get_figure()
+    fig.show()
+
+.. image:: _static/images/histogram_example_2.png
+    :alt: Histogram Column Only Example Image
+
+
+Missing Values Matrix
+---------------------
+
+1. This example demonstrates how we can take a StructuredProfiler class and plot a missing values matrix.
+
+.. code-block:: python
+
+    import dataprofiler as dp
+    from dataprofiler.reports import graphs
+
+
+    data = pd.DataFrame(
+        [[None, '', 1.0, '1/2/2021'],
+         [3, None, 3.5, ''],
+         [1, None, 1.0, '2/5/2020'],
+         [None, 1, 10.0, '3/5/2020']],
+        columns=['integer', 'str', 'float', 'datetime'],
+        dtype=object
+    )
+    profiler = dp.StructuredProfiler(data)
+
+    # This will plot the missing values matrix for all columns.
+    fig = graphs.plot_missing_values_matrix(profiler)
+    fig.show()
+
+.. image:: _static/images/missing_value_matrix_example_0.png
+    :alt: Missing Values Matrix Example Image
+
+2. This example demonstrates how we can plot barchart of a column's missing values.
+
+.. code-block:: python
+
+    import pandas as pd
+
+    from dataprofiler.profilers.profile_builder import StructuredColProfiler
+    from dataprofiler.reports import graphs
+
+
+    data = pd.Series([1, 2, 3, None, None, 4], name='example', dtype=str)
+    profiler = StructuredColProfiler(data)
+
+    # Plot the axes, can be a list of multiple columns
+    ax = graphs.plot_col_missing_values([profiler])
+
+    # get and show the figure of the plotted histogram
+    fig = ax.get_figure()
+    fig.show()
+
+.. image:: _static/images/missing_value_barchart_example_0.png
+    :alt: Missing Values Column Only Example Image
\ No newline at end of file
diff --git a/_docs/docs/source/index.rst b/_docs/docs/source/index.rst
new file mode 100644
index 000000000..8225be28f
--- /dev/null
+++ b/_docs/docs/source/index.rst
@@ -0,0 +1,479 @@
+.. _Data Profiler:
+
+====================================
+Data Profiler | What's in your data?
+====================================
+
+Purpose
+=======
+
+The DataProfiler is a Python library designed to make data analysis, monitoring and **sensitive data detection** easy.
+
+Loading **Data** with a single command, the library automatically formats & loads files into a DataFrame. **Profiling** the Data, the library identifies the schema, statistics, entities and more. Data Profiles can then be used in downstream applications or reports.
+
+The Data Profiler comes with a cutting edge pre-trained deep learning model, used to efficiently identify **sensitive data** (or **PII**). If customization is needed, it's easy to add new entities to the existing pre-trained model or insert a new pipeline for entity recognition.
+
+The best part? Getting started only takes a few lines of code (`Example CSV`_):
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+    
+    data = Data("your_file.csv") # Auto-Detect & Load: CSV, AVRO, Parquet, JSON, Text
+    print(data.data.head(5)) # Access data directly via a compatible Pandas DataFrame
+    
+    profile = Profiler(data) # Calculate Statistics, Entity Recognition, etc
+    readable_report = profile.report(report_options={"output_format":"pretty"})
+    print(json.dumps(readable_report, indent=4))
+
+
+To install the full package from pypi: 
+
+.. code-block:: console
+
+    pip install DataProfiler[ml]
+
+If the ML requirements are too strict (say, you don't want to install tensorflow), you can install a slimmer package. The slimmer package disables the default sensitive data detection / entity recognition (labler)
+
+Install from pypi: 
+
+.. code-block:: console
+
+    pip install DataProfiler
+
+If you have suggestions or find a bug, please open an `issue`_.
+
+Visit the :ref:`API<API>` to explore Data Profiler's terminology.
+
+
+What is a Data Profile?
+=======================
+
+In the case of this library, a data profile is a dictionary containing statistics and predictions about the underlying dataset. There are "global statistics" or `global_stats`, which contain dataset level data and there are "column/row level statistics" or `data_stats` (each column is a new key-value entry). 
+
+The format for a structured profile is below:
+
+.. code-block:: python
+
+    "global_stats": {
+        "samples_used": int,
+        "column_count": int,
+        "row_count": int,
+        "row_has_null_ratio": float,
+        "row_is_null_ratio": float,    
+        "unique_row_ratio": float,
+        "duplicate_row_count": int,
+        "file_type": string,
+        "encoding": string,
+        "correlation_matrix": list[list[int]], (*)
+        "chi2_matrix": list[list[float]],
+        "profile_schema": dict[string, list[int]]
+    },
+    "data_stats": [
+        {
+            "column_name": string,
+            "data_type": string,
+            "data_label": string,
+            "categorical": bool,
+            "order": string,
+            "samples": list[str],
+            "statistics": {
+                "sample_size": int,
+                "null_count": int,
+                "null_types": list[string],
+                "null_types_index": dict[string, list[int]],
+                "data_type_representation": dict[string, list[string]],
+                "min": [null, float],
+                "max": [null, float],
+                "sum": float,
+                "mode": list[float],
+                "median": float,
+                "median_absolute_deviation": float,
+                "mean": float,
+                "variance": float,
+                "stddev": float,
+                "skewness": float,
+                "kurtosis": float,
+                "num_zeros": int,
+                "num_negatives": int,
+                "histogram": { 
+                    "bin_counts": list[int],
+                    "bin_edges": list[float],
+                },
+                "quantiles": {
+                    int: float
+                },
+                "vocab": list[char],
+                "avg_predictions": dict[string, float], 
+                "data_label_representation": dict[string, float],
+                "categories": list[str],
+                "unique_count": int,
+                "unique_ratio": float,
+                "categorical_count": dict[string, int],
+                "gini_impurity": float,
+                "unalikeability": float,
+                "precision": {
+                    'min': int,
+                    'max': int,
+                    'mean': float,
+                    'var': float,
+                    'std': float,
+                    'sample_size': int,
+                    'margin_of_error': float,
+                    'confidence_level': float		
+                },
+                "times": dict[string, float],
+                "format": string
+            },
+            "null_replication_metrics": {
+                "class_prior": list[int],
+                "class_sum": list[list[int]],
+                "class_mean": list[list[int]]
+            }
+        }
+    ]
+
+(*) Currently the correlation matrix update is toggled off. It will be reset in a later update. Users can still use it as desired with the is_enable option set to True.
+
+The format for an unstructured profile is below:
+
+.. code-block:: python
+
+    "global_stats": {
+        "samples_used": int,
+        "empty_line_count": int,
+        "file_type": string,
+        "encoding": string,
+        "memory_size": float, # in MB
+    },
+    "data_stats": {
+        "data_label": {
+            "entity_counts": {
+                "word_level": dict[string, int],
+                "true_char_level": dict[string, int],
+                "postprocess_char_level": dict[string, int]
+            },
+            "entity_percentages": {
+                "word_level": dict[string, float],
+                "true_char_level": dict[string, float],
+                "postprocess_char_level": dict[string, float]
+            },
+            "times": dict[string, float]
+        },
+        "statistics": {
+            "vocab": list[char],
+            "vocab_count": dict[string, int],
+            "words": list[string],
+            "word_count": dict[string, int],
+            "times": dict[string, float]
+        }
+    }
+
+The format for a graph profile is below:
+
+.. code-block:: python
+    
+    "num_nodes": int,
+    "num_edges": int,
+    "categorical_attributes": list[string],
+    "continuous_attributes": list[string],
+    "avg_node_degree": float,
+    "global_max_component_size": int,
+    "continuous_distribution": {
+        "<attribute_1>": {
+            "name": string,
+            "scale": float,
+            "properties": list[float, np.array]
+        },
+        "<attribute_2>": None,
+    },
+    "categorical_distribution": {
+        "<attribute_1>": None,
+        "<attribute_2>": {
+            "bin_counts": list[int],
+            "bin_edges": list[float]
+        },
+    }, 
+    "times": dict[string, float]
+
+Supported Data Formats
+~~~~~~~~~~~~~~~~~~~~~~
+
+* Any delimited file (CSV, TSV, etc.)
+* JSON object
+* Avro file
+* Parquet file
+* Text file
+* Pandas DataFrame
+* A URL that points to one of the supported file types above
+
+
+Data Labels
+~~~~~~~~~~~
+
+*Data Labels* are determined per cell for structured data (column/row when the *profiler* is used) or at the character level for unstructured data.
+
+* UNKNOWN
+* ADDRESS
+* BAN (bank account number, 10-18 digits)
+* CREDIT_CARD
+* EMAIL_ADDRESS
+* UUID 
+* HASH_OR_KEY (md5, sha1, sha256, random hash, etc.)
+* IPV4
+* IPV6
+* MAC_ADDRESS
+* PERSON
+* PHONE_NUMBER
+* SSN
+* URL
+* US_STATE
+* DRIVERS_LICENSE
+* DATE
+* TIME
+* DATETIME
+* INTEGER
+* FLOAT
+* QUANTITY
+* ORDINAL
+
+
+Get Started
+===========
+
+Load a File
+~~~~~~~~~~~
+
+The profiler should automatically identify the file type and load the data into a `Data Class`.
+
+Along with other attributtes the `Data class` enables structured data to be accessed via a valid Pandas DataFrame.
+
+.. code-block:: python
+
+    # Load a csv file, return a CSVData object
+    csv_data = Data('your_file.csv') 
+
+    # Print the first 10 rows of the csv file
+    print(csv_data.data.head(10))
+
+    # Load a parquet file, return a ParquetData object
+    parquet_data = Data('your_file.parquet')
+
+    # Sort the data by the name column
+    parquet_data.data.sort_values(by='name', inplace=True)
+
+    # Print the sorted first 10 rows of the parquet data
+    print(parquet_data.data.head(10))
+
+
+If the file type is not automatically identified (rare), you can specify them 
+specifically, see section Data Readers.
+
+Profile a File 
+~~~~~~~~~~~~~~
+
+Example uses a CSV file for example, but CSV, JSON, Avro, Parquet or Text should also work.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+
+    # Load file (CSV should be automatically identified)
+    data = Data("your_file.csv") 
+
+    # Profile the dataset
+    profile = Profiler(data)
+
+    # Generate a report and use json to prettify.
+    report  = profile.report(report_options={"output_format":"pretty"})
+
+    # Print the report
+    print(json.dumps(report, indent=4))
+
+Updating Profiles
+~~~~~~~~~~~~~~~~~
+
+Currently, the data profiler is equipped to update its profile in batches.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+
+    # Load and profile a CSV file
+    data = Data("your_file.csv")
+    profile = Profiler(data)
+
+    # Update the profile with new data:
+    new_data = Data("new_data.csv")
+    profile.update_profile(new_data)
+
+    # Print the report using json to prettify.
+    report  = profile.report(report_options={"output_format":"pretty"})
+    print(json.dumps(report, indent=4))
+
+
+Merging Profiles
+~~~~~~~~~~~~~~~~
+
+If you have two files with the same schema (but different data), it is possible to merge the two profiles together via an addition operator. 
+
+This also enables profiles to be determined in a distributed manner.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+
+    # Load a CSV file with a schema
+    data1 = Data("file_a.csv")
+    profile1 = Profiler(data)
+
+    # Load another CSV file with the same schema
+    data2 = Data("file_b.csv")
+    profile2 = Profiler(data)
+
+    profile3 = profile1 + profile2
+
+    # Print the report using json to prettify.
+    report  = profile3.report(report_options={"output_format":"pretty"})
+    print(json.dumps(report, indent=4))
+
+Profile a Pandas DataFrame
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    import pandas as pd
+    import dataprofiler as dp
+    import json
+
+    my_dataframe = pd.DataFrame([[1, 2.0],[1, 2.2],[-1, 3]])
+    profile = dp.Profiler(my_dataframe)
+
+    # print the report using json to prettify.
+    report = profile.report(report_options={"output_format":"pretty"})
+    print(json.dumps(report, indent=4))
+
+    # read a specified column, in this case it is labeled 0:
+    print(json.dumps(report["data stats"][0], indent=4))
+
+
+Unstructured Profiler
+~~~~~~~~~~~~~~~~~~~~~
+
+In addition to the structured profiler, the Data Profiler provides unstructured 
+profiling for the TextData object or string. Unstructured profiling also works 
+with list(string), pd.Series(string) or pd.DataFrame(string) given profiler_type
+option specified as `unstructured`. Below is an example of unstructured profile 
+with a text file. 
+
+.. code-block:: python
+
+    import dataprofiler as dp
+    import json
+    my_text = dp.Data('text_file.txt')
+    profile = dp.Profiler(my_text)
+    
+    # print the report using json to prettify.
+    report = profile.report(report_options={"output_format":"pretty"})
+    print(json.dumps(report, indent=4))
+    
+Another example of unstructured profile with pd.Series of string is given as below
+
+.. code-block:: python
+
+    import dataprofiler as dp
+    import pandas as pd
+    import json
+    
+    text_data = pd.Series(['first string', 'second string'])
+    profile = dp.Profiler(text_data, profiler_type="unstructured")
+    
+    # print the report using json to prettify.
+    report = profile.report(report_options={"output_format":"pretty"})
+    print(json.dumps(report, indent=4))
+
+
+Graph Profiler
+~~~~~~~~~~~~~~
+
+DataProfiler also provides the ability to profile graph data from a csv file. Below is an example of the graph profiler with a graph data csv file:
+
+.. code-block:: python
+
+    import dataprofiler as dp
+    import pprint
+
+    my_graph = dp.Data('graph_file.csv')
+    profile = dp.Profiler(my_graph)
+
+    # print the report using pretty print (json dump does not work on numpy array values inside dict)
+    report = profile.report()
+    printer = pprint.PrettyPrinter(sort_dicts=False, compact=True)
+    printer.pprint(report)
+
+
+Specifying a Filetype or Delimiter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Example of specifying a CSV data type, with a `,` delimiter.
+In addition, it utilizes only the first 10,000 rows.
+
+.. code-block:: python
+
+    import json
+    import os
+    from dataprofiler import Data, Profiler
+    from dataprofiler.data_readers.csv_data import CSVData
+
+    # Load a CSV file, with "," as the delimiter
+    data = CSVData("your_file.csv", options={"delimiter": ","})
+
+    # Split the data, such that only the first 10,000 rows are used
+    data = data.data[0:10000]
+
+    # Read in profile and print results
+    profile = Profiler(data)
+    print(json.dumps(profile.report(report_options={"output_format":"pretty"}), indent=4))
+
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+   :caption: Getting Started:
+
+   Intro<self>
+   install.rst
+   data_readers.rst
+   profiler.rst
+   data_labeling.rst
+   graphs.rst
+   architecture.rst
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+   :caption: User Guide:
+
+   examples.rst
+   API.rst
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+   :caption: Community:
+
+   roadmap.rst
+   Changelog<https://github.com/capitalone/DataProfiler/releases>
+   Feedback<https://github.com/capitalone/DataProfiler/issues/new/choose>
+   GitHub<https://github.com/capitalone/DataProfiler>
+   Contributing<https://github.com/capitalone/DataProfiler/blob/main/.github/CONTRIBUTING.md>
+
+.. _Example CSV: https://raw.githubusercontent.com/capitalone/DataProfiler/main/dataprofiler/tests/data/csv/aws_honeypot_marx_geo.csv
+.. _issue: https://github.com/capitalone/DataProfiler/issues/new/choose
+
+
+
diff --git a/_docs/docs/source/install.rst b/_docs/docs/source/install.rst
new file mode 100644
index 000000000..bdf4c3bb4
--- /dev/null
+++ b/_docs/docs/source/install.rst
@@ -0,0 +1,145 @@
+.. _install:
+
+Install
+*******
+
+To install the full package from pypi: 
+
+.. code-block:: console
+
+    pip install DataProfiler[ml]
+
+If the ML requirements are too strict (say, you don't want to install 
+tensorflow), you can install a slimmer package. The slimmer package disables 
+the default sensitive data detection / entity recognition (labler)
+
+Install from pypi: 
+
+.. code-block:: console
+
+    pip install DataProfiler
+
+Snappy Installation
+===================
+
+This is required to profile parquet/avro datasets
+
+MacOS (intel chip) with homebrew:
+
+.. code-block:: console
+
+    brew install snappy && CPPFLAGS="-I/usr/local/include -L/usr/local/lib" pip install python-snappy
+
+
+MacOS (apple chip) with homebrew:
+
+.. code-block:: console
+
+    brew install snappy && CPPFLAGS="-I/opt/homebrew/include -L/opt/homebrew/lib" pip install python-snappy
+
+
+Linux install:
+
+.. code-block:: console
+
+    sudo apt-get -y install libsnappy-dev
+
+
+Build From Scratch
+==================
+
+NOTE: Installation for python3
+
+virtualenv install:
+
+.. code-block:: console
+    
+    python3 -m pip install virtualenv
+
+
+Setup virtual env:
+
+.. code-block:: console
+
+    python3 -m virtualenv --python=python3 venv3
+    source venv3/bin/activate
+
+
+Install requirements:
+
+.. code-block:: console
+
+    pip3 install -r requirements.txt
+
+Install labeler dependencies:
+
+.. code-block:: console
+
+    pip3 install -r requirements-ml.txt
+
+
+Install via the repo -- Build setup.py and install locally:
+
+.. code-block:: console
+
+    python3 setup.py sdist bdist bdist_wheel
+    pip3 install dist/DataProfiler*-py3-none-any.whl
+
+
+If you see:
+
+.. code-block:: console
+
+    ERROR: Double requirement given:dataprofiler==X.Y.Z from dataprofiler/dist/DataProfiler-X.Y.Z-py3-none-any.whl (already in dataprofiler==X2.Y2.Z2 from dataprofiler/dist/DataProfiler-X2.Y2.Z2-py3-none-any.whl, name='dataprofiler')
+
+This means that you have multiple versions of the DataProfiler distribution 
+in the dist folder.
+To resolve, either remove the older one or delete the folder and rerun the steps
+above.
+
+Install via github:
+
+.. code-block:: console
+
+    pip3 install git+https://github.com/capitalone/dataprofiler.git#egg=dataprofiler
+
+
+
+Testing
+=======
+
+For testing, install test requirements:
+
+.. code-block:: console
+
+    pip3 install -r requirements-test.txt
+
+
+To run all unit tests, use:
+
+.. code-block:: console
+
+    DATAPROFILER_SEED=0 python3 -m unittest discover -p "test*.py"
+
+
+To run file of unit tests, use form:
+
+.. code-block:: console
+
+    DATAPROFILER_SEED=0 python3 -m unittest discover -p test_profile_builder.py
+
+
+To run a file with Pytest use:
+
+.. code-block:: console
+
+    DATAPROFILER_SEED=0 pytest dataprofiler/tests/data_readers/test_csv_data.py -v
+
+
+To run individual of unit test, use form:
+
+.. code-block:: console
+    
+    DATAPROFILER_SEED=0 python3 -m unittest dataprofiler.tests.profilers.test_profile_builder.TestProfiler
+
+
diff --git a/_docs/docs/source/labeler.nblink b/_docs/docs/source/labeler.nblink
new file mode 100644
index 000000000..f862443fd
--- /dev/null
+++ b/_docs/docs/source/labeler.nblink
@@ -0,0 +1,6 @@
+{
+    "path": "../../../examples/labeler.ipynb",
+    "extra-media": [
+        "../../../examples/DL-Flowchart.png"
+  ]
+}
\ No newline at end of file
diff --git a/_docs/docs/source/merge_profile_list.nblink b/_docs/docs/source/merge_profile_list.nblink
new file mode 100644
index 000000000..39102658b
--- /dev/null
+++ b/_docs/docs/source/merge_profile_list.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/merge_profile_list.ipynb"
+}
\ No newline at end of file
diff --git a/_docs/docs/source/modules.rst b/_docs/docs/source/modules.rst
new file mode 100644
index 000000000..0593459df
--- /dev/null
+++ b/_docs/docs/source/modules.rst
@@ -0,0 +1,7 @@
+dataprofiler
+============
+
+.. toctree::
+   :maxdepth: 4
+
+   dataprofiler
diff --git a/_docs/docs/source/overview.nblink b/_docs/docs/source/overview.nblink
new file mode 100644
index 000000000..4c118878e
--- /dev/null
+++ b/_docs/docs/source/overview.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/intro_data_profiler.ipynb"
+}
\ No newline at end of file
diff --git a/_docs/docs/source/popmon_dp_loader_example.nblink b/_docs/docs/source/popmon_dp_loader_example.nblink
new file mode 100644
index 000000000..1a288a318
--- /dev/null
+++ b/_docs/docs/source/popmon_dp_loader_example.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/popmon_dp_loader_example.ipynb"
+}
\ No newline at end of file
diff --git a/_docs/docs/source/profiler.rst b/_docs/docs/source/profiler.rst
new file mode 100644
index 000000000..56d16a274
--- /dev/null
+++ b/_docs/docs/source/profiler.rst
@@ -0,0 +1,965 @@
+.. _profiler:
+
+Profiler
+********
+
+Profile Your Data
+=================
+
+Profiling your data is easy. Just use the data reader, send the data to the 
+profiler, and print out the report.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+    
+    data = Data("your_file.csv") # Auto-Detect & Load: CSV, AVRO, Parquet, JSON, Text
+    
+    profile = Profiler(data) # Calculate Statistics, Entity Recognition, etc
+    
+    readable_report = profile.report(report_options={"output_format": "pretty"})
+    print(json.dumps(readable_report, indent=4))
+
+If the data is structured, the profile will return global statistics as well as
+column by column statistics. The vast amount of statistics are listed on the 
+intro page.
+
+Load a File
+~~~~~~~~~~~
+
+The profiler should automatically identify the file type and load the data into a `Data Class`.
+
+Along with other attributtes the `Data class` enables structured data to be accessed via a valid Pandas DataFrame.
+
+.. code-block:: python
+
+    # Load a csv file, return a CSVData object
+    csv_data = Data('your_file.csv') 
+
+    # Print the first 10 rows of the csv file
+    print(csv_data.data.head(10))
+
+    # Load a parquet file, return a ParquetData object
+    parquet_data = Data('your_file.parquet')
+
+    # Sort the data by the name column
+    parquet_data.data.sort_values(by='name', inplace=True)
+
+    # Print the sorted first 10 rows of the parquet data
+    print(parquet_data.data.head(10))
+
+
+If the file type is not automatically identified (rare), you can specify them 
+specifically, see section Data Readers.
+
+Profile a File 
+~~~~~~~~~~~~~~
+
+Example uses a CSV file for example, but CSV, JSON, Avro or Parquet should also work.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+
+    # Load file (CSV should be automatically identified)
+    data = Data("your_file.csv") 
+
+    # Profile the dataset
+    profile = Profiler(data)
+
+    # Generate a report and use json to prettify.
+    report  = profile.report(report_options={"output_format": "pretty"})
+
+    # Print the report
+    print(json.dumps(report, indent=4))
+
+Updating Profiles
+~~~~~~~~~~~~~~~~~
+
+Currently, the data profiler is equipped to update its profile in batches.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+
+    # Load and profile a CSV file
+    data = Data("your_file.csv")
+    profile = Profiler(data)
+
+    # Update the profile with new data:
+    new_data = Data("new_data.csv")
+    profile.update_profile(new_data)
+
+    # Print the report using json to prettify.
+    report  = profile.report(report_options={"output_format": "pretty"})
+    print(json.dumps(report, indent=4))
+
+
+Merging Profiles
+~~~~~~~~~~~~~~~~
+
+If you have two files with the same schema (but different data), it is possible to merge the two profiles together via an addition operator. 
+
+This also enables profiles to be determined in a distributed manner.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+
+    # Load a CSV file with a schema
+    data1 = Data("file_a.csv")
+    profile1 = Profiler(data)
+
+    # Load another CSV file with the same schema
+    data2 = Data("file_b.csv")
+    profile2 = Profiler(data)
+
+    profile3 = profile1 + profile2
+
+    # Print the report using json to prettify.
+    report  = profile3.report(report_options={"output_format": "pretty"})
+    print(json.dumps(report, indent=4))
+
+
+Profile Differences
+~~~~~~~~~~~~~~~~~~~
+
+Profile differences take two profiles and find the differences
+between them. Create the difference report like this:
+
+.. code-block:: python
+
+    from dataprofiler import Data, Profiler
+    
+    # Load a CSV file
+    data1 = Data("file_a.csv")
+    profile1 = Profiler(data)
+    
+    # Load another CSV file
+    data2 = Data("file_b.csv")
+    profile2 = Profiler(data)
+    
+    diff_report = profile1.diff(profile2)
+    print(diff_report)
+    
+The `.diff()` operation is available between two profiles, although there are different
+outputs depending on the type of profile being differenced. For example, for numerical
+column profiles (e.g. integers and floats), two valuable calculations that 
+`.diff()` returns are `t-test`, `chi2-test`, and `psi` (Popoulation Stability Index)
+for understanding distributional changes.
+
+The difference report contains a dictionary that mirrors the profile report. 
+Each data type has its own difference:
+
+* **Int/Float** - One profile subtracts the value from the other.
+
+* **String** - The strings will be shown in a list:
+
+  - [profile1 str, profile2 str]
+* **List** - A list of 3 will be returned showing the unique values of
+  each profile and the shared values:
+
+  - [profile 1 unique values, shared values, profile 2 unique values]
+* **Dict** - Some dictionaries with varied keys will also return a list
+  of three in the format:
+
+  - [profile 1 unique key-values, shared key differences, profile 2 unique key-values]
+
+Otherwise, when no differences occur:
+
+* **Any Type No Differences** - A string will report: "unchanged".
+
+Below is the structured difference report:
+
+.. code-block:: python
+
+    {
+        'global_stats': {
+            'file_type': [str, str], 
+            'encoding': [str, str],
+            'samples_used': int, 
+            'column_count': int,
+            'row_count': int, 
+            'row_has_null_ratio': float,
+            'row_is_null_ratio': float,
+            'unique_row_ratio': float,
+            'duplicate_row_count': int,
+            'correlation_matrix': list[list[float]],
+            'chi2_matrix': list[list[float]],
+            'profile_schema': list[dict[str, int]]
+        },
+        'data_stats': [{
+            'column_name': str, 
+            'data_type': [str, str],
+            'data_label': [list[str], list[str], list[str]],
+            'categorical': [str, str],
+            'order': [str, str],
+            'statistics': {
+                'min': float,
+                'max': float,
+                'sum': float,
+                'mean': float,
+                'median': float,
+                'mode': [list[float], list[float], list[float]],
+                'median_absolute_deviation': float,
+                'variance': float,
+                'stddev': float,
+                't-test': {
+                    't-statistic': float,
+                    'conservative': {'deg_of_free': int,
+                                     'p-value': float},
+                    'welch': {'deg_of_free': float,
+                              'p-value': float}},
+                'psi': float,
+                "chi2-test": {
+                    "chi2-statistic": float,
+                    "deg_of_free": int,
+                    "p-value": float
+                },
+                'unique_count': int,
+                'unique_ratio': float,
+                'categories': [list[str], list[str], list[str]],
+                'gini_impurity': float,
+                'unalikeability': float,
+                'categorical_count': [dict[str, int], dict[str, int], dict[str, int]],
+                'avg_predictions': [dict[str, float]],
+                'label_representation': [dict[str, float]],
+                'sample_size': int,
+                'null_count': int,
+                'null_types': [list[str], list[str], list[str]],
+                'null_types_index': [dict[str, int], dict[str, int], dict[str, int]],
+                'data_type_representation': [dict[str, float]]
+            },
+            "null_replication_metrics": {
+                "class_prior": list[int],
+                "class_sum": list[list[int]],
+                "class_mean": list[list[int]]
+            }
+        }
+        
+Below is the unstructured difference report:
+
+.. code-block:: python
+    
+    {
+        'global_stats': {
+            'file_type': [str, str], 
+            'encoding': [str, str], 
+            'samples_used': int, 
+            'empty_line_count': int, 
+            'memory_size': float
+        }, 
+        'data_stats': {
+            'data_label': {
+                'entity_counts': {
+                    'word_level': dict[str, int], 
+                    'true_char_level': dict[str, int], 
+                    'postprocess_char_level': dict[str, int]
+                }, 
+                'entity_percentages': {
+                    'word_level': dict[str, float], 
+                    'true_char_level': dict[str, float], 
+                    'postprocess_char_level': dict[str, float]
+                }
+            }, 
+            'statistics': {
+                'vocab': [list[str], list[str], list[str]], 
+                'vocab_count': [dict[str, int], dict[str, int], dict[str, int]], 
+                'words': [list[str], list[str], list[str]], 
+                'word_count': [dict[str, int], dict[str, int], dict[str, int]]
+            }
+        }
+    }
+    
+
+Saving and Loading a Profile
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The profiles can easily be saved and loaded as shown below:
+
+**NOTE: Json saving and loading only supports Structured Profiles currently.**
+
+There are two save/load methods:
+
+* **Pickle save/load**
+
+  * Save a profile as a `.pkl` file.
+  * Load a `.pkl` file as a profile object.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+
+    # Load a CSV file, with "," as the delimiter
+    data = Data("your_file.csv")
+
+    # Read data into profile
+    profile = Profiler(data)
+
+    # save structured profile to pkl file
+    profile.save(filepath="my_profile.pkl")
+
+    # load pkl file to structured profile
+    loaded_pkl_profile = dp.Profiler.load(filepath="my_profile.pkl")
+
+    print(json.dumps(loaded_pkl_profile.report(report_options={"output_format": "compact"}),
+                                           indent=4))
+
+* **Json save/load**
+
+  * Save a profile as a human-readable `.json` file.
+  * Load a `.json` file as a profile object.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+
+    # Load a CSV file, with "," as the delimiter
+    data = Data("your_file.csv")
+
+    # Read data into profile
+    profile = Profiler(data)
+
+    # save structured profile to json file
+    profile.save(filepath="my_profile.json", save_method="json")
+
+    # load json file to structured profile
+    loaded_json_profile = dp.Profiler.load(filepath="my_profile.json", load_method="json")
+
+    print(json.dumps(loaded_json_profile.report(report_options={"output_format": "compact"}),
+                                           indent=4))
+
+
+Structured vs Unstructured Profiles
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When using the profiler, the data profiler will automatically infer whether to
+create the structured profile or the unstructured profile. However, you can be 
+explicit as shown below: 
+
+.. code-block:: python
+    
+    import json
+    from dataprofiler import Data, Profiler
+    
+    # Creating a structured profile
+    data1 = Data("normal_csv_file.csv")
+    structured_profile = Profiler(data1, profiler_type="structured")
+    
+    structured_report = structured_profile.report(report_options={"output_format": "pretty"})
+    print(json.dumps(structured_report, indent=4))
+    
+    # Creating an unstructured profile
+    data2 = Data("normal_text_file.txt")
+    unstructured_profile = Profiler(data2, profiler_type="unstructured")
+    
+    unstructured_report = unstructured_profile.report(report_options={"output_format": "pretty"})
+    print(json.dumps(unstructured_report, indent=4))
+    
+
+Setting the Sample Size
+~~~~~~~~~~~~~~~~~~~~~~~
+
+There are two ways to set sample size in a profile: samples_per_update and 
+min_true_samples. Samples_per_update takes an integer as the exact amount that
+will be sampled. Min_true_samples will set the minimum amount of samples that
+are not null. For example:
+
+.. code-block:: python
+
+    from dataprofiler import Profiler
+    
+    sample_array = [1.0, NULL, 2.0]
+    profile = dp.Profiler(sample_array, samples_per_update=2) 
+    
+The first two samples (1.0 and NULL) are used for the statistical analysis.
+ 
+In contrast, if we also set min_true_samples to 2 then the Data Reader will 
+continue to read until the minimum true samples were found for the given column.
+For example: 
+
+.. code-block:: python
+
+    from dataprofiler import Profiler
+    
+    sample_array = [1.0, NULL, 2.0]
+    profile = dp.Profiler(sample_array, samples_per_update=2, min_true_samples=2)
+   
+This will use all samples in the statistical analysis until the number of "true" 
+(non-NULL) values are reached. Both min_true_samples and 
+samples_per_update conditions must be met. In this case, the profile will grab
+the first two samples (1.0 and NULL) to satisfy the samples_per_update, and then
+it will grab the first two VALID samples (1.0 and 2.0) to satisfy the 
+min_true_samples.
+
+Profile a Pandas DataFrame
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    import pandas as pd
+    import dataprofiler as dp
+    import json
+
+    my_dataframe = pd.DataFrame([[1, 2.0],[1, 2.2],[-1, 3]])
+    profile = dp.Profiler(my_dataframe)
+
+    # print the report using json to prettify.
+    report = profile.report(report_options={"output_format": "pretty"})
+    print(json.dumps(report, indent=4))
+
+    # read a specified column, in this case it is labeled 0:
+    print(json.dumps(report["data stats"][0], indent=4))
+
+
+Specifying a Filetype or Delimiter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Example of specifying a CSV data type, with a `,` delimiter.
+In addition, it utilizes only the first 10,000 rows.
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler
+    from dataprofiler.data_readers.csv_data import CSVData
+
+    # Load a CSV file, with "," as the delimiter
+    data = CSVData("your_file.csv", options={"delimiter": ","})
+
+    # Split the data, such that only the first 10,000 rows are used
+    data = data.data[0:10000]
+
+    # Read in profile and print results
+    profile = Profiler(data)
+    print(json.dumps(profile.report(report_options={"output_format": "pretty"}), indent=4))
+
+Setting Profiler Seed
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Example of specifying a seed for reproducibility.
+
+.. code-block:: python
+
+    import dataprofiler as dp
+
+    # Set seed to non-negative integer value or None
+    dp.set_seed(0)
+
+
+Profile Statistic Descriptions
+==============================
+
+Structured Profile
+~~~~~~~~~~~~~~~~~~
+
+**global_stats**:
+
+* samples_used - number of input data samples used to generate this profile
+* column_count - the number of columns contained in the input dataset
+* row_count - the number of rows contained in the input dataset
+* row_has_null_ratio - the proportion of rows that contain at least one null value to the total number of rows
+* row_is_null_ratio - the proportion of rows that are fully comprised of null values (null rows) to the total number of rows
+* unique_row_ratio - the proportion of distinct rows in the input dataset to the total number of rows
+* duplicate_row_count - the number of rows that occur more than once in the input dataset
+* file_type - the format of the file containing the input dataset (ex: .csv)
+* encoding - the encoding of the file containing the input dataset (ex: UTF-8)
+* correlation_matrix - matrix of shape `column_count` x `column_count` containing the correlation coefficients between each column in the dataset 
+* chi2_matrix - matrix of shape `column_count` x `column_count` containing the chi-square statistics between each column in the dataset
+* profile_schema - a description of the format of the input dataset labeling each column and its index in the dataset
+    * string - the label of the column in question and its index in the profile schema
+* times - the duration of time it took to generate the global statistics for this dataset in milliseconds
+
+**data_stats**:
+
+* column_name - the label/title of this column in the input dataset
+* data_type - the primitive python data type that is contained within this column
+* data_label - the label/entity of the data in this column as determined by the Labeler component
+* categorical - 'true' if this column contains categorical data
+* order - the way in which the data in this column is ordered, if any, otherwise “random”
+* samples - a small subset of data entries from this column
+* statistics - statistical information on the column
+    * sample_size - number of input data samples used to generate this profile
+    * null_count - the number of null entries in the sample
+    * null_types - a list of the different null types present within this sample
+    * null_types_index - a dict containing each null type and a respective list of the indicies that it is present within this sample
+    * data_type_representation - the percentage of samples used identifying as each data_type
+    * min - minimum value in the sample
+    * max - maximum value in the sample
+    * mode - mode of the entries in the sample
+    * median - median of the entries in the sample
+    * median_absolute_deviation - the median absolute deviation of the entries in the sample
+    * sum - the total of all sampled values from the column
+    * mean - the average of all entries in the sample
+    * variance - the variance of all entries in the sample
+    * stddev - the standard deviation of all entries in the sample
+    * skewness - the statistical skewness of all entries in the sample
+    * kurtosis - the statistical kurtosis of all entries in the sample
+    * num_zeros - the number of entries in this sample that have the value 0
+    * num_negatives - the number of entries in this sample that have a value less than 0
+    * histogram - contains histogram relevant information
+        * bin_counts - the number of entries within each bin
+        * bin_edges - the thresholds of each bin
+    * quantiles - the value at each percentile in the order they are listed based on the entries in the sample
+    * vocab - a list of the characters used within the entries in this sample
+    * avg_predictions - average of the data label prediction confidences across all data points sampled
+    * categories - a list of each distinct category within the sample if `categorial` = 'true'
+    * unique_count - the number of distinct entries in the sample
+    * unique_ratio - the proportion of the number of distinct entries in the sample to the total number of entries in the sample
+    * categorical_count - number of entries sampled for each category if `categorical` = 'true'
+    * gini_impurity - measure of how often a randomly chosen element from the set would be incorrectly labeled if it was randomly labeled according to the distribution of labels in the subset
+    * unalikeability - a value denoting how frequently entries differ from one another within the sample
+    * precision - a dict of statistics with respect to the number of digits in a number for each sample
+    * times - the duration of time it took to generate this sample's statistics in milliseconds
+    * format - list of possible datetime formats
+* null_replication_metrics - statistics of data partitioned based on whether column value is null (index 1 of lists referenced by dict keys) or not (index 0)
+    * class_prior - a list containing probability of a column value being null and not null
+    * class_sum - a list containing sum of all other rows based on whether column value is null or not
+    * class_mean - a list containing mean of all other rows based on whether column value is null or not
+
+Unstructured Profile
+~~~~~~~~~~~~~~~~~~~~
+
+**global_stats**:
+
+* samples_used - number of input data samples used to generate this profile
+* empty_line_count - the number of empty lines in the input data
+* file_type - the file type of the input data (ex: .txt)
+* encoding - file encoding of the input data file (ex: UTF-8)
+* memory_size - size of the input data in MB
+* times - duration of time it took to generate this profile in milliseconds
+
+**data_stats**:
+
+* data_label - labels and statistics on the labels of the input data
+    * entity_counts - the number of times a specific label or entity appears inside the input data
+        * word_level - the number of words counted within each label or entity
+        * true_char_level - the number of characters counted within each label or entity as determined by the model
+        * postprocess_char_level - the number of characters counted within each label or entity as determined by the postprocessor
+    * entity_percentages - the percentages of each label or entity within the input data
+        * word_level - the percentage of words in the input data that are contained within each label or entity
+        * true_char_level - the percentage of characters in the input data that are contained within each label or entity as determined by the model
+        * postprocess_char_level - the percentage of characters in the input data that are contained within each label or entity as determined by the postprocessor
+    * times - the duration of time it took for the data labeler to predict on the data
+* statistics - statistics of the input data
+    * vocab - a list of each character in the input data
+    * vocab_count - the number of occurrences of each distinct character in the input data
+    * words - a list of each word in the input data
+    * word_count - the number of occurrences of each distinct word in the input data
+    * times - the duration of time it took to generate the vocab and words statistics in milliseconds
+
+Graph Profile
+~~~~~~~~~~~~~~~~~~
+
+* num_nodes - number of nodes in the graph
+* num_edges - number of edges in the graph
+* categorical_attributes - list of categorical edge attributes
+* continuous_attributes - list of continuous edge attributes
+* avg_node_degree - average degree of nodes in the graph
+* global_max_component_size: size of the global max component
+
+**continuous_distribution**:
+
+* <attribute_N>: name of N-th edge attribute in list of attributes
+    * name - name of distribution for attribute
+    * scale - negative log likelihood used to scale and compare distributions
+    * properties - list of statistical properties describing the distribution
+        * [shape (optional), loc, scale, mean, variance, skew, kurtosis]
+
+**categorical_distribution**:
+
+* <attribute_N>: name of N-th edge attribute in list of attributes
+    * bin_counts: counts in each bin of the distribution histogram
+    * bin_edges: edges of each bin of the distribution histogram
+
+* times - duration of time it took to generate this profile in milliseconds
+
+Profile Options
+===============
+
+The data profiler accepts several options to toggle on and off 
+features. The 8 columns (int options, float options, datetime options,
+text options, order options, category options, data labeler options) can be 
+enabled or disabled. By default, all options are toggled on. Below is an example
+of how to alter these options. Options shared by structured and unstructured options
+must be specified as structured or unstructured when setting (ie. datalabeler options).
+
+
+.. code-block:: python
+
+    import json
+    from dataprofiler import Data, Profiler, ProfilerOptions
+
+    # Load and profile a CSV file
+    data = Data("your_file.csv")
+    profile_options = ProfilerOptions()
+
+    #All of these are different examples of adjusting the profile options
+
+    # Options can be toggled directly like this:
+    profile_options.structured_options.text.is_enabled = False
+    profile_options.structured_options.text.vocab.is_enabled = True
+    profile_options.structured_options.int.variance.is_enabled = True
+    profile_options.structured_options.data_labeler.data_labeler_dirpath = \
+        "Wheres/My/Datalabeler"
+    profile_options.structured_options.data_labeler.is_enabled = False
+
+    # A dictionary can be sent in to set the properties for all the options
+    profile_options.set({"structured_options.data_labeler.is_enabled": False, "min.is_enabled": False})
+
+    # Specific columns can be set/disabled/enabled in the same way
+    profile_options.structured_options.text.set({"max.is_enabled":True, 
+                                             "variance.is_enabled": True})
+
+    # numeric stats can be turned off/on entirely
+    profile_options.set({"is_numeric_stats_enabled": False})
+    profile_options.set({"int.is_numeric_stats_enabled": False})
+
+    profile = Profiler(data, options=profile_options)
+
+    # Print the report using json to prettify.
+    report  = profile.report(report_options={"output_format": "pretty"})
+    print(json.dumps(report, indent=4))
+
+
+Below is an breakdown of all the options.
+
+* **ProfilerOptions** - The top-level options class that contains options for the Profiler class
+
+  * **presets** - A pre-configured mapping of a string name to group of options:
+
+    * **default is None**
+
+    * **"complete"**
+
+    .. code-block:: python
+
+        options = ProfilerOptions(presets="complete")
+
+    * **"data_types"**
+
+    .. code-block:: python
+
+        options = ProfilerOptions(presets="data_types")
+
+    * **"numeric_stats_disabled"**
+
+    .. code-block:: python
+
+        options = ProfilerOptions(presets="numeric_stats_disabled")
+
+    * **"lower_memory_sketching"**
+
+    .. code-block:: python
+
+        options = ProfilerOptions(presets="lower_memory_sketching")
+
+  * **structured_options** - Options responsible for all structured data
+
+    * **multiprocess** - Option to enable multiprocessing. If on, multiprocessing is toggled on if the dataset contains more than 750,000 rows or more than 20 columns.
+      Automatically selects the optimal number of pooling processes to utilize based on system constraints when toggled on.
+
+      * is_enabled - (Boolean) Enables or disables multiprocessing
+
+    * **sampling_ratio** - A percentage, as a decimal, ranging from greater than 0 to less than or equal to 1 indicating how much input data to sample. Default value set to 0.2.
+
+    * **int** - Options for the integer columns
+
+      * is_enabled - (Boolean) Enables or disables the integer operations
+
+      * min - Finds minimum value in a column
+
+        * is_enabled - (Boolean) Enables or disables min
+
+      * max - Finds maximum value in a column
+
+        * is_enabled - (Boolean) Enables or disables max
+
+      * mode - Finds mode(s) in a column
+
+        * is_enabled - (Boolean) Enables or disables mode
+        * top_k_modes - (Int) Sets the number of modes to return if multiple exist. Default returns max 5 modes.
+      * median - Finds median value in a column
+
+        * is_enabled - (Boolean) Enables or disables median
+      * sum - Finds sum of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables sum
+
+      * variance - Finds variance of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables variance
+      * skewness - Finds skewness of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables skewness
+      * kurtosis - Finds kurtosis of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables kurtosis
+      * median_abs_deviation - Finds median absolute deviation of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables median absolute deviation
+      * num_zeros - Finds the count of zeros in a column
+
+        * is_enabled - (Boolean) Enables or disables num_zeros
+      * num_negatives - Finds the count of negative numbers in a column
+
+        * is_enabled - (Boolean) Enables or disables num_negatives
+      * bias_correction - Applies bias correction to variance, skewness, and kurtosis calculations
+
+        * is_enabled - (Boolean) Enables or disables bias correction
+      * histogram_and_quantiles - Generates a histogram and quantiles
+        from the column values
+
+        * bin_count_or_method - (String/List[String]) Designates preferred method for calculating histogram bins or the number of bins to use.  
+          If left unspecified (None) the optimal method will be chosen by attempting all methods.  
+          If multiple specified (list) the optimal method will be chosen by attempting the provided ones.  
+          methods: 'auto', 'fd', 'doane', 'scott', 'rice', 'sturges', 'sqrt'  
+          Note: 'auto' is used to choose optimally between 'fd' and 'sturges'
+        * num_quantiles - (Int) Number of quantiles to bin the data. 
+          Default value is set to 1,000 quantiles.
+        * is_enabled - (Boolean) Enables or disables histogram and quantiles
+    * **float** - Options for the float columns
+
+      * is_enabled - (Boolean) Enables or disables the float operations
+      * precision - Finds the precision (significant figures) within the column
+
+        * is_enabled - (Boolean) Enables or disables precision
+      * sample_ratio - (Float) The ratio of 0 to 1 how much data (identified as floats) to utilize as samples in determining precision
+
+      * min - Finds minimum value in a column
+
+        * is_enabled - (Boolean) Enables or disables min
+      * max - Finds maximum value in a column
+
+        * is_enabled - (Boolean) Enables or disables max
+      * mode - Finds mode(s) in a column
+
+        * is_enabled - (Boolean) Enables or disables mode
+        * top_k_modes - (Int) Sets the number of modes to return if multiple exist. Default returns max 5 modes.
+      * median - Finds median value in a column
+
+        * is_enabled - (Boolean) Enables or disables median
+      * sum - Finds sum of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables sum
+      * variance - Finds variance of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables variance
+      * skewness - Finds skewness of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables skewness
+      * kurtosis - Finds kurtosis of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables kurtosis
+      * median_abs_deviation - Finds median absolute deviation of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables median absolute deviation
+      * is_numeric_stats_enabled - (Boolean) enable or disable all numeric stats
+      * num_zeros - Finds the count of zeros in a column
+
+        * is_enabled - (Boolean) Enables or disables num_zeros
+      * num_negatives - Finds the count of negative numbers in a column
+
+        * is_enabled - (Boolean) Enables or disables num_negatives
+      * bias_correction - Applies bias correction to variance, skewness, and kurtosis calculations
+
+        * is_enabled - (Boolean) Enables or disables bias correction
+      * histogram_and_quantiles - Generates a histogram and quantiles
+        from the column values
+
+        * bin_count_or_method - (String/List[String]) Designates preferred method for calculating histogram bins or the number of bins to use.  
+          If left unspecified (None) the optimal method will be chosen by attempting all methods.  
+          If multiple specified (list) the optimal method will be chosen by attempting the provided ones.  
+          methods: 'auto', 'fd', 'doane', 'scott', 'rice', 'sturges', 'sqrt'  
+          Note: 'auto' is used to choose optimally between 'fd' and 'sturges'
+        * num_quantiles - (Int) Number of quantiles to bin the data. 
+          Default value is set to 1,000 quantiles.
+        * is_enabled - (Boolean) Enables or disables histogram and quantiles        
+    * **text** - Options for the text columns
+
+      * is_enabled - (Boolean) Enables or disables the text operations
+      * vocab - Finds all the unique characters used in a column
+
+        * is_enabled - (Boolean) Enables or disables vocab
+      * min - Finds minimum value in a column
+
+        * is_enabled - (Boolean) Enables or disables min
+      * max - Finds maximum value in a column
+
+        * is_enabled - (Boolean) Enables or disables max
+      * mode - Finds mode(s) in a column
+
+        * is_enabled - (Boolean) Enables or disables mode
+        * top_k_modes - (Int) Sets the number of modes to return if multiple exist. Default returns max 5 modes.
+      * median - Finds median value in a column
+
+        * is_enabled - (Boolean) Enables or disables median
+      * sum - Finds sum of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables sum
+      * variance - Finds variance of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables variance
+      * skewness - Finds skewness of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables skewness
+      * kurtosis - Finds kurtosis of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables kurtosis
+      * median_abs_deviation - Finds median absolute deviation of all values in a column
+
+        * is_enabled - (Boolean) Enables or disables median absolute deviation
+      * bias_correction - Applies bias correction to variance, skewness, and kurtosis calculations
+
+        * is_enabled - (Boolean) Enables or disables bias correction
+      * is_numeric_stats_enabled - (Boolean) enable or disable all numeric stats
+      * num_zeros - Finds the count of zeros in a column
+
+        * is_enabled - (Boolean) Enables or disables num_zeros
+      * num_negatives - Finds the count of negative numbers in a column
+
+        * is_enabled - (Boolean) Enables or disables num_negatives
+      * histogram_and_quantiles - Generates a histogram and quantiles
+        from the column values
+
+        * bin_count_or_method - (String/List[String]) Designates preferred method for calculating histogram bins or the number of bins to use.  
+          If left unspecified (None) the optimal method will be chosen by attempting all methods.  
+          If multiple specified (list) the optimal method will be chosen by attempting the provided ones.  
+          methods: 'auto', 'fd', 'doane', 'scott', 'rice', 'sturges', 'sqrt'  
+          Note: 'auto' is used to choose optimally between 'fd' and 'sturges'
+        * num_quantiles - (Int) Number of quantiles to bin the data. 
+          Default value is set to 1,000 quantiles.
+        * is_enabled - (Boolean) Enables or disables histogram and quantiles  
+    * **datetime** - Options for the datetime columns
+
+      * is_enabled - (Boolean) Enables or disables the datetime operations
+    * **order** - Options for the order columns
+
+      * is_enabled - (Boolean) Enables or disables the order operations
+    * **category** - Options for the category columns
+
+      * is_enabled  - (Boolean) Enables or disables the category operations
+      * top_k_categories - (int) Number of categories to be displayed when reporting
+      * max_sample_size_to_check_stop_condition - (int) The maximum sample size before categorical stop conditions are checked
+      * stop_condition_unique_value_ratio - (float) The highest ratio of unique values to dataset size that is to be considered a categorical type
+      * cms - (Boolean) Enables or Disables the use of count min sketch / heavy hitters for approximate frequency counts
+      * cms_confidence - (float) Defines the number of hashes used in CMS, default 0.95
+      * cms_relative_error - (float) Defines the number of buckets used in CMS, default 0.01
+      * cms_max_num_heavy_hitters - (int) The value used to define the threshold for minimum frequency required by a category to be counted
+    * **data_labeler** - Options for the data labeler columns
+
+      * is_enabled - (Boolean) Enables or disables the data labeler operations
+      * data_labeler_dirpath - (String) Directory path to data labeler
+      * data_labeler_object - (BaseDataLabeler) Datalabeler to replace 
+        the default labeler 
+      * max_sample_size - (Int) The max number of samples for the data
+        labeler
+    * **correlation** - Option set for correlation profiling
+      * is_enabled - (Boolean) Enables or disables performing correlation profiling
+      * columns - Columns considered to calculate correlation
+    * **row_statistics** - (Boolean) Option to enable/disable row statistics calculations
+
+      * unique_count - (UniqueCountOptions) Option to enable/disable unique row count calculations
+
+        * is_enabled - (Bool) Enables or disables options for unique row count
+        * hashing_method - (String) Property to specify row hashing method ("full" | "hll")
+        * hll - (HyperLogLogOptions) Options for alternative method of estimating unique row count (activated when `hll` is the selected hashing_method)
+
+          * seed - (Int) Used to set HLL hashing function seed
+          * register_count - (Int) Number of registers is equal to 2^register_count
+
+      * null_count - (Boolean) Option to enable/disable functionalities for row_has_null_ratio and row_is_null_ratio
+    * **chi2_homogeneity** - Options for the chi-squared test matrix
+
+      * is_enabled - (Boolean) Enables or disables performing chi-squared tests for homogeneity between the categorical columns of the dataset.
+    * **null_replication_metrics** - Options for calculating null replication metrics
+
+      * is_enabled - (Boolean) Enables or disables calculation of null replication metrics
+  * **unstructured_options** - Options responsible for all unstructured data
+
+    * **text** - Options for the text profile
+      
+      * is_case_sensitive - (Boolean) Specify whether the profile is case sensitive
+      * stop_words - (List of Strings) List of stop words to be removed when profiling
+      * top_k_chars - (Int) Number of top characters to be retrieved when profiling
+      * top_k_words - (Int) Number of top words to be retrieved when profiling
+      * vocab - Options for vocab count
+
+        * is_enabled - (Boolean) Enables or disables the vocab stats
+      * words - Options for word count
+
+        * is_enabled - (Boolean) Enables or disables the word stats
+    * **data_labeler** - Options for the data labeler
+
+      * is_enabled - (Boolean) Enables or disables the data labeler operations
+      * data_labeler_dirpath - (String) Directory path to data labeler
+      * data_labeler_object - (BaseDataLabeler) Datalabeler to replace 
+        the default labeler 
+      * max_sample_size - (Int) The max number of samples for the data 
+        labeler
+    
+
+
+Statistical Dependency on Order of Updates
+==========================================
+
+Some profile features/statistics are dependent on the order in which the profiler
+is updated with new data.
+
+Order Profile
+~~~~~~~~~~~~~
+
+The order profiler utilizes the last value in the previous data batch to ensure
+the subsequent dataset is above/below/equal to that value when predicting
+non-random order.
+
+For instance, a dataset to be predicted as ascending would require the following
+batch data update to be ascending and its first value `>=` than that of the
+previous batch of data.
+
+Ex. of ascending:
+
+.. code-block:: python
+
+    batch_1 = [0, 1, 2]
+    batch_2 = [3, 4, 5]
+
+Ex. of random:
+
+.. code-block:: python
+
+    batch_1 = [0, 1, 2]
+    batch_2 = [1, 2, 3] # notice how the first value is less than the last value in the previous batch
+
+
+Reporting Structure
+===================
+
+For every profile, we can provide a report and customize it with a couple optional parameters:
+
+* output_format (string)
+
+  * This will allow the user to decide the output format for report.
+
+    * Options are one of [pretty, compact, serializable, flat]:
+
+      * Pretty: floats are rounded to four decimal places, and lists are shortened.
+      * Compact: Similar to pretty, but removes detailed statistics such as runtimes, label probabilities, index locations of null types, etc.
+      * Serializable: Output is json serializable and not prettified
+      * Flat: Nested output is returned as a flattened dictionary
+* num_quantile_groups (int)
+
+  * You can sample your data as you like! With a minimum of one and a maximum of 1000, you can decide the number of quantile groups!
+
+.. code-block:: python
+
+    report  = profile.report(report_options={"output_format": "pretty"})
+    report  = profile.report(report_options={"output_format": "compact"})
+    report  = profile.report(report_options={"output_format": "serializable"})
+    report  = profile.report(report_options={"output_format": "flat"})
diff --git a/_docs/docs/source/profiler_example.nblink b/_docs/docs/source/profiler_example.nblink
new file mode 100644
index 000000000..142ebd97f
--- /dev/null
+++ b/_docs/docs/source/profiler_example.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/structured_profilers.ipynb"
+}
\ No newline at end of file
diff --git a/_docs/docs/source/regex_labeler_from_scratch.nblink b/_docs/docs/source/regex_labeler_from_scratch.nblink
new file mode 100644
index 000000000..3d98c5f1e
--- /dev/null
+++ b/_docs/docs/source/regex_labeler_from_scratch.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/regex_labeler_from_scratch/DataLabeler_from_scratch.ipynb"
+}
\ No newline at end of file
diff --git a/_docs/docs/source/roadmap.rst b/_docs/docs/source/roadmap.rst
new file mode 100644
index 000000000..93886690b
--- /dev/null
+++ b/_docs/docs/source/roadmap.rst
@@ -0,0 +1,58 @@
+.. _roadmap:
+
+Roadmap
+*******
+
+For more detailed tasks, checkout the repo's github issues page here: 
+`Github Issues <https://github.com/capitalone/DataProfiler/issues>`_.
+
+
+Data Reader Updates
+===================
+- Read data from S3 bucket
+    - All in the current `dp.Data()` API paradigm, we want to enable passing an S3 bucket file path to read in data from AWS s3.
+- Pass list of data file paths to data reader
+- Pass in linst of data frames to data reader 
+
+New Model
+=========
+- Transformer model from sensitive data detection
+
+Historical Profiles 
+===================
+- Some questions about Historical Profiles / need to step back and rething design to start:
+    - Meta profile on top?
+    - Stored windowed info inside? Etc...
+- Branch with current state of Historical Profiles
+- Two example notebooks of current state: 
+    - Notebook example `one <https://github.com/capitalone/DataProfiler/blob/feature/historical_profiler/examples/historical_profiler.ipynb>`_.
+    - Notebook example `two <https://github.com/capitalone/DataProfiler/blob/feature/historical_profiler/examples/WIP_historical_profiler_advanced.ipynb>`_.
+
+
+Conditional Report Metric
+=========================
+- Based on what is populated on other metrics in the report, have "secondary" / "derivatives" of that number (or that number in conjunction with another number) populate in thie report as well.
+- For example, if null_count is not None, then populate a null_percent key with a value of the dividence of (null_count / sample_count).
+
+Space / Time Testing
+====================
+- Automatic comparison testing for space and time analysis on PR’s
+    - Standardize a report for space time analysis for future comparisons (create baseline numbers)
+    - Include those in integration tests that will automatically run on code when it is changed in PRs
+- Could be an optional test, if the user thinks there is concern around the change driving an issue in the library performance 
+
+Testing Suite Upgrades
+======================
+- Add mocking to unit tests where mocking is not utilized
+- Integration testing separated out from the unit testing suite. Determine how to only run remotely during PRs
+- Backward compatibility testing along with informative warnings and errors when a user is utilizing incompatible versions of the library and saved profile object
+
+Historical Versions
+===================
+- Legacy version upgrades to enable patches to prior versions of the Data Profiler 
+
+Miscellaneous
+==============
+- Refact/or Pandas to Polars DataFrames
+- Spearman correlation calculation
+- Workflow Profiles
diff --git a/_docs/docs/source/unstructured_profiler_example.nblink b/_docs/docs/source/unstructured_profiler_example.nblink
new file mode 100644
index 000000000..5b6829754
--- /dev/null
+++ b/_docs/docs/source/unstructured_profiler_example.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/unstructured_profilers.ipynb"
+}
\ No newline at end of file
diff --git a/_docs/docs/update_documentation.py b/_docs/docs/update_documentation.py
new file mode 100644
index 000000000..7be79612d
--- /dev/null
+++ b/_docs/docs/update_documentation.py
@@ -0,0 +1,87 @@
+#!/usr/bin/python
+"""Script which auto updates the github pages documentation."""
+import os
+import subprocess
+import sys
+
+sys.path.insert(0, os.path.abspath(f'../../'))
+
+from dataprofiler import __version__ as version  # noqa F401
+
+# Make the rst files from the current repo
+subprocess.run(
+    [
+        "sphinx-apidoc",
+        "--templatedir=./source/_templates/",
+        "-f",
+        "-e",
+        "-o",
+        "../docs/source",
+        f"../../dataprofiler",
+        f"../../dataprofiler/tests/",
+    ]
+)
+
+update_index_rst = True
+
+if not version:
+    Exception("There must be a valid version argument.")
+
+# Check if the source index file has already been updated
+source_index = open("source/index.rst", "r+")
+source_index_lines = source_index.readlines()
+source_index.close()
+for sentence in source_index_lines:
+    if sentence.startswith("* `" + version):
+        update_index_rst = False
+
+# Update the index file if needed
+version_reference = ""
+if update_index_rst:
+    buffer = 0
+    source_index = open("source/index.rst", "w")
+    for sentence in source_index_lines:
+        if sentence.startswith("Documentation for"):
+            doc_version = "Documentation for " + version + "\n"
+            source_index.write(doc_version)
+        elif sentence.startswith("Versions"):
+            source_index.write("Versions\n")
+            source_index.write("========\n")
+            version_tag = "* `" + version + "`_\n"
+            source_index.write(version_tag)
+            version_reference = (
+                ".. _" + version + ": ../../" + version + "/html/index.html\n\n"
+            )
+            buffer = 1
+        else:
+            if buffer == 0:
+                source_index.write(sentence)
+            else:
+                buffer = buffer - 1
+    source_index.write(version_reference)
+source_index.close()
+
+# Make the html files
+
+build_directory = "BUILDDIR= LATEST"
+subprocess.run(["make", "html", build_directory])
+
+# update the index file to redirect to the most current version of documentation
+index_file = open("../index.html", "w")
+redirect_link = (
+    '<meta http-equiv="refresh" content="0; url=./docs/'
+    + "LATEST"
+    + '/html/index.html" />'
+)
+index_file.write(redirect_link)
+index_file.close()
+
+# update the profiler_options.html file to redirect to detailed options docs
+index_file = open("../profiler_options.html", "w")
+redirect_link = (
+    '<meta http-equiv="refresh" content="0; url=./docs/'
+    + "LATEST"
+    + '/html/profiler.html#profile-options" />'
+)
+index_file.write(redirect_link)
+index_file.close()
diff --git a/_docs/index.html b/_docs/index.html
new file mode 100644
index 000000000..fb51eaca9
--- /dev/null
+++ b/_docs/index.html
@@ -0,0 +1 @@
+<meta http-equiv="refresh" content="0; url=./docs/0.12.0/html/index.html" />
\ No newline at end of file
diff --git a/_docs/profiler_options.html b/_docs/profiler_options.html
new file mode 100644
index 000000000..831f653ff
--- /dev/null
+++ b/_docs/profiler_options.html
@@ -0,0 +1 @@
+<meta http-equiv="refresh" content="0; url=./docs/0.12.0/html/profiler.html#profile-options" />
\ No newline at end of file
diff --git a/_docs/setup.cfg b/_docs/setup.cfg
new file mode 100644
index 000000000..c9c21e52f
--- /dev/null
+++ b/_docs/setup.cfg
@@ -0,0 +1,7 @@
+[flake8]
+max-line-length = 88
+extend-ignore = E203
+
+[isort]
+multi_line_output=3
+profile=black
diff --git a/dataprofiler/__init__.py b/dataprofiler/__init__.py
index f23cf6494..0c7919242 100644
--- a/dataprofiler/__init__.py
+++ b/dataprofiler/__init__.py
@@ -1,6 +1,7 @@
 """Package for dataprofiler."""
 
 from . import settings
+from ._version import get_versions
 from .data_readers.data import Data
 from .dp_logging import get_logger, set_verbosity
 from .labelers.data_labelers import (
@@ -19,8 +20,8 @@
 from .profilers.profiler_options import ProfilerOptions
 from .reports import graphs
 from .validators.base_validators import Validator
-from .version import __version__
 
+__version__ = get_versions()["version"]
 
 def set_seed(seed=None):
     # also check it's an integer
diff --git a/dataprofiler/_version.py b/dataprofiler/_version.py
new file mode 100644
index 000000000..669959883
--- /dev/null
+++ b/dataprofiler/_version.py
@@ -0,0 +1,524 @@
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = "dataprofiler-"
+    cfg.versionfile_source = "dataprofiler/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY = {} # type: ignore
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    p = None
+    for c in commands:
+        try:
+            dispcmd = str([c] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
+                                 stdout=subprocess.PIPE,
+                                 stderr=(subprocess.PIPE if hide_stderr
+                                         else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print(f"unable to find command, tried {commands}")
+        return None, None
+    stdout = p.communicate()[0].strip().decode()
+    if p.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, p.returncode
+    return stdout, p.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for i in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        else:
+            rootdirs.append(root)
+            root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        f = open(versionfile_abs)
+        for line in f.readlines():
+            if line.strip().startswith("git_refnames ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["refnames"] = mo.group(1)
+            if line.strip().startswith("git_full ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["full"] = mo.group(1)
+            if line.strip().startswith("git_date ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["date"] = mo.group(1)
+        f.close()
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if not keywords:
+        raise NotThisMethod("no keywords at all, weird")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                          hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
+                                          "--always", "--long",
+                                          "--match", "%s*" % tag_prefix],
+                                   cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
+                                    cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
+                       cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_pre(pieces):
+    """TAG[.post0.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post0.dev%d" % pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for i in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/dataprofiler/tests/profilers/test_histogram_utils.py b/dataprofiler/tests/profilers/test_histogram_utils.py
index 3be8cdcae..10c88b344 100644
--- a/dataprofiler/tests/profilers/test_histogram_utils.py
+++ b/dataprofiler/tests/profilers/test_histogram_utils.py
@@ -32,7 +32,7 @@ def mock_sqrt_return_nan(profile):
     return float("nan")
 
 
-class TestColumn(NumericStatsMixin):
+class MockColumn(NumericStatsMixin):
     def __init__(self):
         NumericStatsMixin.__init__(self)
         self.times = defaultdict(float)
@@ -75,7 +75,7 @@ def test_ptp(self):
 
     def test_calc_doane_bin_width_from_profile(self):
         # Initial setup of profile
-        profile = TestColumn()
+        profile = MockColumn()
 
         with mock.patch(
             "dataprofiler.profilers.NumericStatsMixin.stddev", new_callable=mock_stddev
@@ -177,7 +177,7 @@ def test_calc_doane_bin_width_from_profile(self):
 
     def test_calc_rice_bin_width_from_profile(self):
         # Initial setup of profile
-        profile = TestColumn()
+        profile = MockColumn()
 
         # Case 1: min, max, and match_count are set
         expected_dataset_size = profile.match_count
@@ -230,7 +230,7 @@ def test_calc_rice_bin_width_from_profile(self):
 
     def test_calc_sturges_bin_width_from_profile(self):
         # Initial setup of profile
-        profile = TestColumn()
+        profile = MockColumn()
 
         # Case 1: min, max, and match_count are set
         expected_dataset_size = profile.match_count
@@ -283,7 +283,7 @@ def test_calc_sturges_bin_width_from_profile(self):
 
     def test_calc_sqrt_bin_width_from_profile(self):
         # Initial setup of profile
-        profile = TestColumn()
+        profile = MockColumn()
 
         # Case 1: min, max, and match_count are set
         expected_dataset_size = profile.match_count
@@ -336,7 +336,7 @@ def test_calc_sqrt_bin_width_from_profile(self):
 
     def test_calc_fd_bin_width_from_profile(self):
         # Initial setup of profile
-        profile = TestColumn()
+        profile = MockColumn()
 
         with mock.patch(
             "dataprofiler.profilers.NumericStatsMixin._get_percentile",
@@ -359,7 +359,7 @@ def test_calc_fd_bin_width_from_profile(self):
 
     def test_calc_auto_bin_width_from_profile(self):
         # Initial setup of profile
-        profile = TestColumn()
+        profile = MockColumn()
 
         with mock.patch(
             "dataprofiler.profilers.histogram_utils._calc_fd_bin_width_from_profile"
@@ -396,7 +396,7 @@ def test_calc_auto_bin_width_from_profile(self):
 
     def test_calc_scott_bin_width_from_profile(self):
         # Initial setup of profile
-        profile = TestColumn()
+        profile = MockColumn()
 
         with mock.patch(
             "dataprofiler.profilers.NumericStatsMixin.stddev", new_callable=mock_stddev
@@ -418,7 +418,7 @@ def test_calc_scott_bin_width_from_profile(self):
 
     def test_calculate_bins_from_profile(self):
         # Initial setup of profile
-        profile = TestColumn()
+        profile = MockColumn()
 
         # Case 1: bin method not in set of valid bin methods
         with self.assertRaises(ValueError):
@@ -457,7 +457,7 @@ def test_calculate_bins_from_profile(self):
             dataprofiler.profilers.histogram_utils._hist_bin_width_selectors_for_profile,
             {"sqrt": mock_sqrt_return_none},
         ):
-            profile = TestColumn()
+            profile = MockColumn()
             actual = histogram_utils._calculate_bins_from_profile(profile, "sqrt")
             self.assertEqual(1, actual)
 
@@ -466,6 +466,6 @@ def test_calculate_bins_from_profile(self):
             dataprofiler.profilers.histogram_utils._hist_bin_width_selectors_for_profile,
             {"sqrt": mock_sqrt_return_nan},
         ):
-            profile = TestColumn()
+            profile = MockColumn()
             actual = histogram_utils._calculate_bins_from_profile(profile, "sqrt")
             self.assertEqual(1, actual)
diff --git a/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py b/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
index e112781ab..cac04bfc9 100644
--- a/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
+++ b/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
@@ -18,7 +18,7 @@
 test_root_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 
 
-class TestColumn(NumericStatsMixin):
+class MockColumn(NumericStatsMixin):
     def __init__(self):
         NumericStatsMixin.__init__(self)
         self.match_count = 0
@@ -31,7 +31,7 @@ def _filter_properties_w_options(self, calculations, options):
         pass
 
 
-class TestColumnWProps(TestColumn):
+class MockColumnWProps(MockColumn):
     # overrides the property func
     median = None
     mode = None
@@ -117,9 +117,9 @@ def test_check_int(self):
 
     def test_hist_loss_on_merge(self):
         # Initial setup of profiles
-        profile3 = TestColumn()
-        profile1 = TestColumn()
-        profile2 = TestColumn()
+        profile3 = MockColumn()
+        profile1 = MockColumn()
+        profile2 = MockColumn()
         mock_histogram1 = {
             "bin_counts": np.array([1, 1, 1, 1]),
             "bin_edges": np.array([2, 4, 6, 8, 10]),
@@ -161,7 +161,7 @@ def test_update_variance(self):
         Checks update variance
         :return:
         """
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         # test update variance
         data1 = [-3.0, 2.0, 11.0]
@@ -209,7 +209,7 @@ def test_update_variance_with_varying_data_length(self):
         data1 = []
         mean1, var1, count1 = 0, np.nan, 0
 
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
         num_profiler._biased_variance = num_profiler._update_variance(
             mean1, var1, count1
         )
@@ -221,7 +221,7 @@ def test_update_variance_with_varying_data_length(self):
         data2 = [5.0]
         mean2, var2, count2 = 5.0, 0, 1
 
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
         num_profiler._biased_variance = num_profiler._update_variance(
             mean2, var2, count2
         )
@@ -239,7 +239,7 @@ def test_update_variance_with_varying_data_length(self):
             + (-11.0 - mean3) ** 2
         ) / 3
 
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
         num_profiler._biased_variance = num_profiler._update_variance(
             mean3, var3 * 3 / 4, count3
         )
@@ -252,7 +252,7 @@ def test_update_variance_with_empty_data(self):
         Checks update variance
         :return:
         """
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         data1 = [-3.0, 2.0, 11.0]
         mean1 = (-3.0 + 2.0 + 11.0) / 3
@@ -284,7 +284,7 @@ def test_timeit_merge(self):
         Checks profiles have been merged and timed
         :return:
         """
-        num_profiler, other1, other2 = TestColumn(), TestColumn(), TestColumn()
+        num_profiler, other1, other2 = MockColumn(), MockColumn(), MockColumn()
         mock_histogram = {
             "bin_counts": np.array([1, 1, 1, 1]),
             "bin_edges": np.array([2.0, 5.25, 8.5, 11.75, 15.0]),
@@ -331,7 +331,7 @@ def test_timeit(self):
         Checks stat properties have been timed
         :return:
         """
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         # Dummy data to make min call
         prev_dependent_properties = {
@@ -402,8 +402,8 @@ def test_from_dict_helper(self):
         fake_profile_name = "Fake profile name"
 
         # Build expected CategoricalColumn
-        actual_profile = TestColumn()
-        expected_profile = TestColumn()
+        actual_profile = MockColumn()
+        expected_profile = MockColumn()
         mock_saved_profile = dict(
             {
                 "quantiles": None,
@@ -429,7 +429,7 @@ def test_from_dict_helper(self):
         test_utils.assert_profiles_equal(expected_profile, actual_profile)
 
     def test_histogram_bin_error(self):
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         # Dummy data for calculating bin error
         num_profiler._stored_histogram = {
@@ -475,7 +475,7 @@ def test_histogram_bin_error(self):
         assert sum_error == np.inf
 
     def test_get_best_histogram_profile(self):
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         num_profiler._histogram_for_profile = mock.MagicMock(
             side_effect=[("hist_1", 3), ("hist_2", 2), ("hist_3", 1)]
@@ -509,7 +509,7 @@ def test_get_best_histogram_profile(self):
         assert best_histogram == "hist_3"
 
     def test_get_best_histogram_profile_infinite_loss(self):
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         num_profiler._histogram_for_profile = mock.MagicMock(return_value=("hist_1", 3))
 
@@ -529,7 +529,7 @@ def test_get_best_histogram_profile_infinite_loss(self):
         assert best_histogram == "hist_1"
 
     def test_get_percentile_median(self):
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
         # Dummy data for calculating bin error
         num_profiler._stored_histogram = {
             "histogram": {
@@ -541,7 +541,7 @@ def test_get_percentile_median(self):
         self.assertListEqual([10, 10], median)
 
     def test_num_zeros(self):
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         # Dummy data to make num_zeros call
         prev_dependent_properties = {"mean": 0}
@@ -568,7 +568,7 @@ def test_num_zeros(self):
         self.assertEqual(subset_properties["num_zeros"], 4)
 
     def test_num_negatives(self):
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         # Dummy data to make num_negatives call
         prev_dependent_properties = {"mean": 0}
@@ -595,7 +595,7 @@ def test_num_negatives(self):
         self.assertEqual(subset_properties["num_negatives"], 4)
 
     def test_fold_histogram(self):
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         # the break point is at the mid point of a bin
         bin_counts = np.array([1 / 6, 1 / 6, 1 / 6, 1 / 6, 1 / 6, 1 / 6])
@@ -670,7 +670,7 @@ def test_timeit_num_zeros_and_negatives(self):
         Checks num_zeros and num_negatives have been timed
         :return:
         """
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         # Dummy data to make min call
         prev_dependent_properties = {"mean": 0}
@@ -702,14 +702,14 @@ def test_merge_num_zeros_and_negatives(self):
         Checks num_zeros and num_negatives can be merged
         :return:
         """
-        num_profiler, other1, other2 = TestColumn(), TestColumn(), TestColumn()
+        num_profiler, other1, other2 = MockColumn(), MockColumn(), MockColumn()
         other1.num_zeros, other1.num_negatives = 3, 1
         other2.num_zeros, other2.num_negatives = 7, 1
         num_profiler._add_helper(other1, other2)
         self.assertEqual(num_profiler.num_zeros, 10)
         self.assertEqual(num_profiler.num_negatives, 2)
 
-        num_profiler, other1, other2 = TestColumn(), TestColumn(), TestColumn()
+        num_profiler, other1, other2 = MockColumn(), MockColumn(), MockColumn()
         other1.num_zeros, other1.num_negatives = 0, 0
         other2.num_zeros, other2.num_negatives = 0, 0
         num_profiler._add_helper(other1, other2)
@@ -717,7 +717,7 @@ def test_merge_num_zeros_and_negatives(self):
         self.assertEqual(num_profiler.num_negatives, 0)
 
     def test_profile(self):
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         mock_profile = dict(
             min=1.0,
@@ -815,7 +815,7 @@ def test_report(self):
             self.assertIn(disabled_key, report_keys)
 
     def test_report_no_numerical_options(self):
-        num_profiler = TestColumn()
+        num_profiler = MockColumn()
 
         num_profiler.match_count = 0
         num_profiler.times = defaultdict(float)
@@ -833,7 +833,7 @@ def test_diff(self):
         Checks _diff_helper() works appropriately.
         """
 
-        other1, other2 = TestColumnWProps(), TestColumnWProps()
+        other1, other2 = MockColumnWProps(), MockColumnWProps()
         other1.min = 3
         other1.max = 4
         other1._biased_variance = 1
@@ -881,7 +881,7 @@ def test_diff(self):
         self.assertDictEqual(expected_diff, difference)
 
         # Invalid statistics
-        other1, other2 = TestColumnWProps(), TestColumnWProps()
+        other1, other2 = MockColumnWProps(), MockColumnWProps()
         other1.min = 3
         other1.max = 4
         other1._biased_variance = np.nan  # NaN variance
@@ -931,7 +931,7 @@ def test_diff(self):
         self.assertTrue(np.isnan([expected_var, var, expected_stddev, stddev]).all())
 
         # Insufficient match count
-        other1, other2 = TestColumnWProps(), TestColumnWProps()
+        other1, other2 = MockColumnWProps(), MockColumnWProps()
         other1.min = 3
         other1.max = 4
         other1._biased_variance = 1
@@ -980,7 +980,7 @@ def test_diff(self):
         self.assertTrue(np.isnan([expected_var, var, expected_stddev, stddev]).all())
 
         # Constant values
-        other1, other2 = TestColumnWProps(), TestColumnWProps()
+        other1, other2 = MockColumnWProps(), MockColumnWProps()
         other1.min = 3
         other1.max = 4
         other1._biased_variance = 0  # constant value has 0 variance
@@ -1028,7 +1028,7 @@ def test_diff(self):
         self.assertDictEqual(expected_diff, difference)
 
         # Small p-value
-        other1, other2 = TestColumnWProps(), TestColumnWProps()
+        other1, other2 = MockColumnWProps(), MockColumnWProps()
         other1.min = 3
         other1.max = 4
         other1._biased_variance = 1
@@ -1075,11 +1075,11 @@ def test_diff(self):
             other1.diff("Inproper input")
         self.assertEqual(
             str(exc.exception),
-            "Unsupported operand type(s) for diff: 'TestColumnWProps' and" " 'str'",
+            "Unsupported operand type(s) for diff: 'MockColumnWProps' and" " 'str'",
         )
 
         # PSI same distribution test
-        other1, other2 = TestColumnWProps(), TestColumnWProps()
+        other1, other2 = MockColumnWProps(), MockColumnWProps()
         other1.match_count = 55
         other1._stored_histogram = {
             "total_loss": 0,
@@ -1112,7 +1112,7 @@ def test_diff(self):
         self.assertEqual(expected_psi_value, psi_value)
 
         # PSI min_min_edge == max_max_edge
-        other1, other2 = TestColumnWProps(), TestColumnWProps()
+        other1, other2 = MockColumnWProps(), MockColumnWProps()
         other1.match_count = 10
         other1._stored_histogram = {
             "total_loss": 0,
@@ -1139,7 +1139,7 @@ def test_diff(self):
         self.assertEqual(expected_psi_value, psi_value)
 
         # PSI regen other / not self
-        other1, other2 = TestColumnWProps(), TestColumnWProps()
+        other1, other2 = MockColumnWProps(), MockColumnWProps()
         other1.match_count = 55
         other1._stored_histogram = {
             "total_loss": 0,
diff --git a/dataprofiler/tests/test_data_profiler.py b/dataprofiler/tests/test_data_profiler.py
index 54a5f2d82..2c8e9583a 100644
--- a/dataprofiler/tests/test_data_profiler.py
+++ b/dataprofiler/tests/test_data_profiler.py
@@ -31,8 +31,6 @@ def setUpClass(cls):
     def test_set_seed(self):
         import dataprofiler as dp
 
-        self.assertEqual(dp.settings._seed, None)
-
         dp.set_seed(5)
         self.assertEqual(dp.settings._seed, 5)
 
diff --git a/dataprofiler/tests/test_dp_logging.py b/dataprofiler/tests/test_dp_logging.py
index 7f78903ee..99496e314 100644
--- a/dataprofiler/tests/test_dp_logging.py
+++ b/dataprofiler/tests/test_dp_logging.py
@@ -22,12 +22,6 @@ def tearDownClass(cls):
         root_logger.removeHandler(dp_logging.get_logger())
         dp_logging._dp_logger = None
 
-    def test_default_verbosity(self, mock_stdout):
-        # Ensure that default effective level is INFO
-        self.assertEqual(
-            logging.INFO, logging.getLogger("DataProfiler").getEffectiveLevel()
-        )
-
     def test_set_verbosity(self, mock_stdout):
         from dataprofiler import dp_logging
 
diff --git a/dataprofiler/version.py b/dataprofiler/version.py
deleted file mode 100644
index b4e4c2b9c..000000000
--- a/dataprofiler/version.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""File contains the version number for the package."""
-
-MAJOR = 0
-MINOR = 13
-MICRO = 1
-POST = None  # otherwise None
-
-VERSION = "%d.%d.%d" % (MAJOR, MINOR, MICRO)
-
-_post_str = ""
-if POST:
-    _post_str = f".post{POST}"
-__version__ = VERSION + _post_str
diff --git a/requirements-docs.txt b/requirements-docs.txt
new file mode 100644
index 000000000..36517b45f
--- /dev/null
+++ b/requirements-docs.txt
@@ -0,0 +1,7 @@
+Sphinx>=5.0.0
+sphinx-rtd-theme
+nbsphinx
+furo
+nbsphinx-link
+pre-commit
+tornado
diff --git a/requirements.txt b/requirements.txt
index 3ccc4c6f5..e32f32851 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,11 +11,12 @@ python-snappy>=0.7.1
 charset-normalizer>=1.3.6
 psutil>=4.0.0
 scipy>=1.10.0
-requests==2.32.*
+requests>=2.32.4
 networkx>=2.5.1
 typing-extensions>=3.10.0.2
 HLL>=2.0.3
 datasketches>=4.1.0
 packaging>=23.0
-boto3>=1.28.61
-# adding comment to trigger mend check
+boto3>=1.37.15
+urllib3>=2.5.0
+versioneer
diff --git a/setup.cfg b/setup.cfg
index 6c2be03be..5930fdfa3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,10 +1,19 @@
+[versioneer]
+vcs = git
+style = pep440
+versionfile_source = dataprofiler/_version.py
+versionfile_build = dataprofiler/_version.py
+tag_prefix = ""
+parentdir_prefix = dataprofiler-
 [flake8]
 max-line-length = 88
 extend-ignore = E203
+exclude = versioneer.py, dataprofiler/_version.py
 
 [isort]
+
 multi_line_output=3
-skip=dataprofiler/tests/data/,venv/
+skip=dataprofiler/tests/data/,venv/, versioneer.py, dataprofiler/_version.py
 profile=black
 include_trailing_comma=True
 force_grid_wrap=0
diff --git a/setup.py b/setup.py
index eeca6629b..f1f799446 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
 from setuptools import find_packages, setup
 
 # Load package version
-from dataprofiler.version import __version__
+import versioneer
 
 here = path.abspath(path.dirname(__file__))
 
@@ -53,8 +53,9 @@
 
 setup(
     name="DataProfiler",
-    version=__version__,
-    python_requires=">=3.9",
+    version=versioneer.get_version(),
+    cmdclass=versioneer.get_cmdclass(),
+    python_requires=">=3.10",
     description=DESCRIPTION,
     long_description=LONG_DESCRIPTION,
     long_description_content_type="text/markdown",
diff --git a/tox.ini b/tox.ini
index 4ee6081bd..caf70d437 100644
--- a/tox.ini
+++ b/tox.ini
@@ -16,7 +16,7 @@ deps =
     -rrequirements-reports.txt
     -rrequirements-test.txt
 commands =
-    python3 -m pytest dataprofiler/tests/ --cov=dataprofiler --cov-fail-under=80 --cov-report=xml:coverage.xml --forked
+    python3 -m pytest dataprofiler/tests/ --cov=dataprofiler --cov-fail-under=80 --cov-report=xml:coverage.xml
 
 # add "docs" to `envlist` to run the docs build
 #[testenv:docs]
diff --git a/versioneer.py b/versioneer.py
new file mode 100644
index 000000000..fcbc15bd1
--- /dev/null
+++ b/versioneer.py
@@ -0,0 +1,1741 @@
+# Version: 0.19
+
+"""The Versioneer - like a rocketeer, but for versions.
+The Versioneer
+==============
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+## Quick Install
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+## Version Identifiers
+Source trees come from a variety of places:
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+The version identifier is used for multiple purposes:
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+## Theory of Operation
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+## Installation
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+## Version-String Flavors
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+Both functions return a dictionary with different flavors of version
+information:
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+## Styles
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+## Debugging
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+## Known Limitations
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+### Subprojects
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+### Editable installs with setuptools <= 18.5
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+## Updating Versioneer
+To upgrade your project to a new release of Versioneer, do the following:
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+## Future Directions
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+## Similar projects
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+## License
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+"""
+
+import configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = (
+            "Versioneer was unable to run the project root directory. "
+            "Versioneer requires setup.py to be executed from "
+            "its immediate directory (like 'python setup.py COMMAND'), "
+            "or in a way that lets it use sys.argv[0] to find the root "
+            "(like 'python path/to/setup.py COMMAND')."
+        )
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        me = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(me)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print(
+                "Warning: build in %s is using versioneer.py from %s"
+                % (os.path.dirname(me), versioneer_py)
+            )
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise EnvironmentError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg) as f:
+        parser.read_file(f)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    def get(parser, name):
+        if parser.has_option("versioneer", name):
+            return parser.get("versioneer", name)
+        return None
+
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = get(parser, "style") or ""
+    cfg.versionfile_source = get(parser, "versionfile_source")
+    cfg.versionfile_build = get(parser, "versionfile_build")
+    cfg.tag_prefix = get(parser, "tag_prefix")
+    if cfg.tag_prefix in ("''", '""'):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = get(parser, "parentdir_prefix")
+    cfg.verbose = get(parser, "verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    p = None
+    for c in commands:
+        try:
+            dispcmd = str([c] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            p = subprocess.Popen(
+                [c] + args,
+                cwd=cwd,
+                env=env,
+                stdout=subprocess.PIPE,
+                stderr=(subprocess.PIPE if hide_stderr else None),
+            )
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print(f"unable to find command, tried {commands}")
+        return None, None
+    stdout = p.communicate()[0].strip().decode()
+    if p.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, p.returncode
+    return stdout, p.returncode
+
+
+LONG_VERSION_PY[
+    "git"
+] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+# This file is released into the public domain. Generated by
+# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)
+"""Git implementation of _version.py."""
+import errno
+import os
+import re
+import subprocess
+import sys
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+LONG_VERSION_PY = {}
+HANDLERS = {}
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    p = None
+    for c in commands:
+        try:
+            dispcmd = str([c] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
+                                 stdout=subprocess.PIPE,
+                                 stderr=(subprocess.PIPE if hide_stderr
+                                         else None))
+            break
+        except EnvironmentError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = p.communicate()[0].strip().decode()
+    if p.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, p.returncode
+    return stdout, p.returncode
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+    for i in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        else:
+            rootdirs.append(root)
+            root = os.path.dirname(root)  # up a level
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        f = open(versionfile_abs, "r")
+        for line in f.readlines():
+            if line.strip().startswith("git_refnames ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["refnames"] = mo.group(1)
+            if line.strip().startswith("git_full ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["full"] = mo.group(1)
+            if line.strip().startswith("git_date ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["date"] = mo.group(1)
+        f.close()
+    except EnvironmentError:
+        pass
+    return keywords
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if not keywords:
+        raise NotThisMethod("no keywords at all, weird")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = set([r for r in refs if re.search(r'\d', r)])
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                          hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
+                                          "--always", "--long",
+                                          "--match", "%%s*" %% tag_prefix],
+                                   cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+    # now we have TAG-NUM-gHEX or HEX
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
+                                    cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
+                       cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    return pieces
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+def render_pep440_pre(pieces):
+    """TAG[.post0.devDISTANCE] -- No -dirty.
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post0.dev%%d" %% pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+    The ".dev0" means dirty.
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+    Like 'git describe --tags --dirty --always'.
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+    if not style or style == "default":
+        style = "pep440"  # the default
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+    cfg = get_config()
+    verbose = cfg.verbose
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for i in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        f = open(versionfile_abs)
+        for line in f.readlines():
+            if line.strip().startswith("git_refnames ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["refnames"] = mo.group(1)
+            if line.strip().startswith("git_full ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["full"] = mo.group(1)
+            if line.strip().startswith("git_date ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["date"] = mo.group(1)
+        f.close()
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if not keywords:
+        raise NotThisMethod("no keywords at all, weird")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r"\d", r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix) :]
+            if verbose:
+                print("picking %s" % r)
+            return {
+                "version": r,
+                "full-revisionid": keywords["full"].strip(),
+                "dirty": False,
+                "error": None,
+                "date": date,
+            }
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {
+        "version": "0+unknown",
+        "full-revisionid": keywords["full"].strip(),
+        "dirty": False,
+        "error": "no suitable tags",
+        "date": None,
+    }
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = run_command(
+        GITS,
+        [
+            "describe",
+            "--tags",
+            "--dirty",
+            "--always",
+            "--long",
+            "--match",
+            "%s*" % tag_prefix,
+        ],
+        cwd=root,
+    )
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[: git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format(
+                full_tag,
+                tag_prefix,
+            )
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
+        0
+    ].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        me = __file__
+        if me.endswith(".pyc") or me.endswith(".pyo"):
+            me = os.path.splitext(me)[0] + ".py"
+        versioneer_file = os.path.relpath(me)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        f = open(".gitattributes")
+        for line in f.readlines():
+            if line.strip().startswith(versionfile_source):
+                if "export-subst" in line.strip().split()[1:]:
+                    present = True
+        f.close()
+    except OSError:
+        pass
+    if not present:
+        f = open(".gitattributes", "a+")
+        f.write("%s export-subst\n" % versionfile_source)
+        f.close()
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for i in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {
+                "version": dirname[len(parentdir_prefix) :],
+                "full-revisionid": None,
+                "dirty": False,
+                "error": None,
+                "date": None,
+            }
+        else:
+            rootdirs.append(root)
+            root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print(
+            "Tried directories %s but none started with prefix %s"
+            % (str(rootdirs), parentdir_prefix)
+        )
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.19) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+import json
+version_json = '''
+%s
+'''  # END VERSION_JSON
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(
+        r"version_json = '''\n(.*)'''  # END VERSION_JSON", contents, re.M | re.S
+    )
+    if not mo:
+        mo = re.search(
+            r"version_json = '''\r\n(.*)'''  # END VERSION_JSON", contents, re.M | re.S
+        )
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set {} to '{}'".format(filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_pre(pieces):
+    """TAG[.post0.devDISTANCE] -- No -dirty.
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post0.dev%d" % pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+    The ".dev0" means dirty.
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+    Like 'git describe --tags --dirty --always'.
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {
+            "version": "unknown",
+            "full-revisionid": pieces.get("long"),
+            "dirty": None,
+            "error": pieces["error"],
+            "date": None,
+        }
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {
+        "version": rendered,
+        "full-revisionid": pieces["long"],
+        "dirty": pieces["dirty"],
+        "error": None,
+        "date": pieces.get("date"),
+    }
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert (
+        cfg.versionfile_source is not None
+    ), "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print(f"got version from file {versionfile_abs} {ver}")
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {
+        "version": "0+unknown",
+        "full-revisionid": None,
+        "dirty": None,
+        "error": "unable to compute version",
+        "date": None,
+    }
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # we override different "build_py" commands for both environments
+    if "build_py" in cmds:
+        _build_py = cmds["build_py"]
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_py import build_py as _build_py
+    else:
+        from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+    cmds["build_py"] = cmd_build_py
+
+    if "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(
+                        LONG
+                        % {
+                            "DOLLAR": "$",
+                            "STYLE": cfg.style,
+                            "TAG_PREFIX": cfg.tag_prefix,
+                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        }
+                    )
+
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if "py2exe" in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(
+                        LONG
+                        % {
+                            "DOLLAR": "$",
+                            "STYLE": cfg.style,
+                            "TAG_PREFIX": cfg.tag_prefix,
+                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        }
+                    )
+
+        cmds["py2exe"] = cmd_py2exe
+
+    # we override different "sdist" commands for both environments
+    if "sdist" in cmds:
+        _sdist = cmds["sdist"]
+    elif "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(
+                target_versionfile, self._versioneer_generated_versions
+            )
+
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+You will also need to edit your setup.py to use the results:
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+"""
+
+INIT_PY_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e:
+        if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg", file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(
+            LONG
+            % {
+                "DOLLAR": "$",
+                "STYLE": cfg.style,
+                "TAG_PREFIX": cfg.tag_prefix,
+                "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                "VERSIONFILE_SOURCE": cfg.versionfile_source,
+            }
+        )
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy) as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        if INIT_PY_SNIPPET not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(INIT_PY_SNIPPET)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in) as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except OSError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(
+            " appending versionfile_source ('%s') to MANIFEST.in"
+            % cfg.versionfile_source
+        )
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)