htm-community · ctrl-z-9000-times · Jul 29, 2019 · Jul 24, 2019 · Jul 24, 2019 · Jul 24, 2019
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,4 @@ plot_*/
 pyenv2/
 build/
 dist/
+htm.core/
diff --git a/nab/detectors/htmcore/README.md b/nab/detectors/htmcore/README.md
@@ -0,0 +1,115 @@
+# Numenta and NumentaTM detectors
+
+This directory holds the Python 2 code required to run the `numenta` and
+`numentaTM` detectors against the NAB data. In 2019 the main body of the
+benchmark's code was ported to Python 3 however these detectors rely on NuPIC
+which is Python 2 only.
+
+This code can be used to replicate results listed on the scoreboard of
+the main repository for the following detectors:
+
+    numenta
+    numentaTM
+
+## Installation
+
+We assume you have a working version of Python 3 installed as your default Python.
+If your default system Python is still Python 2 you can skip the virtual environment
+creation below.
+
+### Requirements to install
+
+- [Python 2.7](https://www.python.org/download/)
+- [Virtualenv](https://pypi.org/project/virtualenv/)
+
+### Install a virtual environment
+
+Create a new Python 2 virtual environment in this directory.
+
+`virtualenv -p path/to/python2 env`
+
+Activate that virtual environment.
+
+`./env/Scripts/activate`
+
+or
+
+`env\Scripts\activate.bat` on Windows.
+
+Confirm you have a local Python 2
+
+```
+$ python
+Python 2.7.13 (v2.7.13:a06454b1afa1, Dec 17 2016, 20:53:40) [MSC v.1500 64 bit (AMD64)] on win32
+Type "help", "copyright", "credits" or "license" for more information.
+>>>
+```
+
+### Install detectors
+
+`cd /path/to/NAB/`
+`pip install nupic`
+`python nab/detectors/numenta/setup.py develop`
+
+## Usage
+
+### Detection
+
+This directory contains a modified version of the `run.py` script which exists
+in the main NAB directory. It can be used to run *detection* only using the
+`numenta` and `numentaTM` detectors against NAB data.
+
+By default it will run both `numenta` and `numentaTM` detectors and output
+results to the main NAB/results directory.
+
+`python2 run.py`
+
+Note: By default `run.py` tries to use all the cores on your machine. The above
+command should take about 20-30 minutes on a current powerful laptop with 4-8
+cores.
+
+To run only one of the detectors use the `-d` option:
+
+`python2 run.py -d numenta`
+
+To see all options of this script type:
+
+`python2 run.py --help`
+
+### Optimizing, Scoring and Normalizing
+
+Once you have run either of the detectors herein against the NAB data you will need
+to exit the Python 2 virtual environment and move into the main NAB directory.
+
+```
+(env) /NAB/nab/detectors/numenta
+$ deactivate                                                          
+/NAB/nab/detectors/numenta      
+$ cd ../../../
+/NAB
+$
+```
+
+Then follow the instructions in the main README to run optimization, scoring, and normalization, e.g.:
+
+`python run.py -d numenta,numentaTM --optimize --score --normalize`
+
+### Run a subset of NAB data files
+
+For debugging it is sometimes useful to be able to run your algorithm on a
+subset of the NAB data files or on your own set of data files. You can do that
+by creating a custom `combined_windows.json` file that only contains labels for
+the files you want to run. This new file should be in exactly the same format as
+`combined_windows.json` except it would only contain windows for the files you
+are interested in.
+
+**Example**: an example file containing two files is in
+`labels/combined_windows_tiny.json`. (Under of the main NAB directory) The
+following command shows you how to run NAB on a subset of labels:
+
+    python2 run.py -d numenta --detect --windowsFile labels/combined_windows_tiny.json
+
+This will run the `detect` phase of NAB on the data files specified in the above
+JSON file. Note that scoring and normalization are not supported with this
+option. Note also that you may see warning messages regarding the lack of labels
+for other files. You can ignore these warnings.
diff --git a/nab/detectors/htmcore/__init__.py b/nab/detectors/htmcore/__init__.py
diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py
@@ -0,0 +1,135 @@
+# ----------------------------------------------------------------------
+# Copyright (C) 2014, Numenta, Inc.  Unless you have an agreement
+# with Numenta, Inc., for a separate license for this software code, the
+# following terms and conditions apply:
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero Public License version 3 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero Public License for more details.
+#
+# You should have received a copy of the GNU Affero Public License
+# along with this program.  If not, see http://www.gnu.org/licenses.
+#
+# http://numenta.org/licenses/
+# ----------------------------------------------------------------------
+
+import math
+
+# htm.core imports
+from htm.bindings.algorithms import SpatialPooler   as SP
+from htm.bindings.algorithms import TemporalMemory  as TM
+from htm.bindings.algorithms import Predictor
+
+from htm.algorithms.anomaly_likelihood import AnomalyLikelihood
+from nab.detectors.base import AnomalyDetector
+
+# Fraction outside of the range of values seen so far that will be considered
+# a spatial anomaly regardless of the anomaly likelihood calculation. This
+# accounts for the human labelling bias for spatial values larger than what
+# has been seen so far.
+SPATIAL_TOLERANCE = 0.05
+
+
+
+class HtmcoreDetector(AnomalyDetector):
+  """
+  This detector uses an HTM based anomaly detection technique.
+  """
+
+  def __init__(self, *args, **kwargs):
+
+    super(HtmcoreDetector, self).__init__(*args, **kwargs)
+
+    self.anomalyLikelihood = None
+    # Keep track of value range for spatial anomaly detection
+    self.minVal = None
+    self.maxVal = None
+
+    # Set this to False if you want to get results based on raw scores
+    # without using AnomalyLikelihood. This will give worse results, but
+    # useful for checking the efficacy of AnomalyLikelihood. You will need
+    # to re-optimize the thresholds when running with this setting.
+    self.useLikelihood = True
+
+
+  def getAdditionalHeaders(self):
+    """Returns a list of strings."""
+    return ["raw_score"] #TODO add "prediction"
+
+
+  def handleRecord(self, inputData):
+    """Returns a tuple (anomalyScore, rawScore).
+
+    Internally to NuPIC "anomalyScore" corresponds to "likelihood_score"
+    and "rawScore" corresponds to "anomaly_score". Sorry about that.
+    """
+    # Send it to Numenta detector and get back the results
+    result = [] #FIXME self.model.run(inputData)
+
+    # Get the value
+    value = inputData["value"]
+
+    # Retrieve the anomaly score and write it to a file
+    rawScore = 0.5 #FIXME result.inferences["anomalyScore"]
+
+    # Update min/max values and check if there is a spatial anomaly
+    spatialAnomaly = False #TODO make this computed in SP (and later improve)
+    if self.minVal != self.maxVal:
+      tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE
+      maxExpected = self.maxVal + tolerance
+      minExpected = self.minVal - tolerance
+      if value > maxExpected or value < minExpected:
+        spatialAnomaly = True
+    if self.maxVal is None or value > self.maxVal:
+      self.maxVal = value
+    if self.minVal is None or value < self.minVal:
+      self.minVal = value
+
+    if self.useLikelihood:
+      # Compute log(anomaly likelihood)
+      anomalyScore = self.anomalyLikelihood.anomalyProbability(inputData["value"], rawScore, inputData["timestamp"])
+      logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)
+      finalScore = logScore #TODO returls logScore and not probability? Fix that in our Likelihood.cpp; #TODO TM to provide anomaly {none, raw, likelihood} 
+    else:
+      finalScore = rawScore
+
+    if spatialAnomaly:
+      finalScore = 1.0
+
+    return (finalScore, rawScore)
+
+
+  def initialize(self):
+    # Get config params, setting the RDSE resolution
+    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
+    minVal=self.inputMin-rangePadding
+    maxVal=self.inputMax+rangePadding
+    minResolution=0.001 #TODO there params should form default_params for encoder etc
+
+    if self.useLikelihood:
+      # Initialize the anomaly likelihood object
+      numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
+      self.anomalyLikelihood = AnomalyLikelihood( #TODO make these default for py anomaly_likelihood? as NAB is likely tuned for best Likelihood!
+        learningPeriod=numentaLearningPeriod,
+        estimationSamples=self.probationaryPeriod-numentaLearningPeriod,
+        reestimationPeriod=100
+      )
+
+
+  def _setupEncoderParams(self, encoderParams):
+    # The encoder must expect the NAB-specific datafile headers
+    encoderParams["timestamp_dayOfWeek"] = encoderParams.pop("c0_dayOfWeek")
+    encoderParams["timestamp_timeOfDay"] = encoderParams.pop("c0_timeOfDay")
+    encoderParams["timestamp_timeOfDay"]["fieldname"] = "timestamp"
+    encoderParams["timestamp_timeOfDay"]["name"] = "timestamp"
+    encoderParams["timestamp_weekend"] = encoderParams.pop("c0_weekend")
+    encoderParams["value"] = encoderParams.pop("c1")
+    encoderParams["value"]["fieldname"] = "value"
+    encoderParams["value"]["name"] = "value"
+
+    self.sensorParams = encoderParams["value"]
diff --git a/nab/detectors/htmcore/requirements.txt b/nab/detectors/htmcore/requirements.txt
@@ -0,0 +1 @@
+htm.core>=2.0
diff --git a/nab/detectors/htmcore/setup.py b/nab/detectors/htmcore/setup.py
@@ -0,0 +1,115 @@
+# ----------------------------------------------------------------------
+# Copyright (C) 2014-2015, Numenta, Inc.  Unless you have an agreement
+# with Numenta, Inc., for a separate license for this software code, the
+# following terms and conditions apply:
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero Public License version 3 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero Public License for more details.
+#
+# You should have received a copy of the GNU Affero Public License
+# along with this program.  If not, see http://www.gnu.org/licenses.
+#
+# http://numenta.org/licenses/
+# ----------------------------------------------------------------------
+
+import os
+import sys
+import pkg_resources
+import warnings
+from setuptools import setup, find_packages
+
+REPO_DIR = os.path.dirname(os.path.realpath(__file__))
+
+
+# Utility function to read the README file.
+# Used for the long_description.  It"s nice, because now 1) we have a top level
+# README file and 2) it"s easier to type in the README file than to put a raw
+# string in below ...
+def read(fname):
+  with open(os.path.join(os.path.dirname(__file__), fname)) as f:
+    result = f.read()
+  return result
+
+
+
+def nupicInstalled():
+  """
+  Determine whether NuPIC is already installed.
+   :return: boolean
+  """
+  try:
+    _ = pkg_resources.get_distribution("htm.core")
+    return True
+  except pkg_resources.DistributionNotFound:
+    return False  # Silently ignore. NuPIC will be installed later.
+
+
+def parseFile(requirementFile):
+  """
+  Parse requirement file.
+  :return: list of requirements.
+  """
+  try:
+    return [
+      line.strip()
+      for line in open(requirementFile).readlines()
+      if not line.startswith("#")
+    ]
+  except IOError:
+    return []
+
+
+
+def findRequirements():
+  """
+  Read the requirements.txt file and parse into requirements for setup's
+  install_requirements option.
+  """
+  requirements = parseFile(os.path.join(REPO_DIR, "requirements.txt")) #REPO_DIR is local htmcore/ "repo"
+
+  if not nupicInstalled():
+    # The user already has a version of htm.core (NuPIC) installed. If not, we'll attempt custom install from git.
+    # custom install htm.core #TODO when htm.core is published on PyPI, remove this whole custom setup.py, and just add htm.core to REPO/requirements.txt
+    try:
+      os.system("git status")
+    except:
+      raise "Git must be installed for htm.core detector installation!"
+
+    curr_dir = os.getcwd()
+    os.chdir(REPO_DIR)
+    os.system("git clone --depth=5 https://github.com/htm-community/htm.core")
+    os.chdir(os.path.join(REPO_DIR, "htm.core"))
+    os.system("git pull origin master") #if the clone fails (repo exists) we want to update it
+    os.system("python setup.py install") #installing htm.core
+    os.chdir(curr_dir)
+
+  return requirements
+
+
+
+if __name__ == "__main__":
+  requirements = findRequirements()
+
+  setup(
+    name="nab-detector-htmcore",
+    version="1.0",
+    author="@breznak",
+    author_email="nab@numenta.org",
+    description=(
+      "HTH.core detector from HTM community repo for NAB (in Python 3)"),
+    license="AGPL",
+    packages=find_packages(),
+    long_description=read(os.path.join(REPO_DIR, "htm.core","README.md")),
+    install_requires=requirements,
+    entry_points={
+      "console_scripts": [
+        "nab-plot = nab.plot:main",
+      ],
+    },
+  )
diff --git a/run.py b/run.py
@@ -159,7 +159,7 @@ def main(args):
   parser.add_argument("-d", "--detectors",
                     nargs="*",
                     type=str,
-                    default=["numenta", "numentaTM", "htmjava", "null", "random",
+                    default=["numenta", "numentaTM", "htmcore", "htmjava", "null", "random",
                              "bayesChangePt", "windowedGaussian", "expose",
                              "relativeEntropy", "earthgeckoSkyline"],
                     help="Comma separated list of detector(s) to use, e.g. "
@@ -227,6 +227,9 @@ def main(args):
     ContextOSEDetector )
   if "earthgeckoSkyline" in args.detectors:
     from nab.detectors.earthgecko_skyline.earthgecko_skyline_detector import EarthgeckoSkylineDetector
+  if "htmcore" in args.detectors:
+    from nab.detectors.htmcore.htmcore_detector import HtmcoreDetector
+
   # Special hacks for detectors requiring Python 2:
   # TODO the imports are failing, remove? Py2 detectors have special treatment in `getDetectorClassConstructors()` above
   #
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,3 +19,4 @@ plot_*/ @@
     pyenv2/
     build/
     dist/
+    htm.core/