diff --git a/.gitignore b/.gitignore index 2b77f6935..50652975f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ deprecated/ .idea/ .project .pydevproject +*.swp htmjava.log nab/detectors/htmjava/.pydevproject scripts/.ipynb_checkpoints/ @@ -19,3 +20,4 @@ plot_*/ pyenv2/ build/ dist/ +htm.core/ diff --git a/README.md b/README.md index 7b4ef0512..1dde4fa9e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -The Numenta Anomaly Benchmark [![Build Status](https://travis-ci.org/numenta/NAB.svg?branch=master)](https://travis-ci.org/numenta/NAB) +# The Numenta Anomaly Benchmark [![Build Status](https://travis-ci.org/numenta/NAB.svg?branch=master)](https://travis-ci.org/numenta/NAB) ----------------------------- Welcome. This repository contains the data and scripts comprising the Numenta @@ -28,26 +28,44 @@ Ahmad, S., Lavin, A., Purdy, S., & Agha, Z. (2017). Unsupervised real-time anomaly detection for streaming data. Neurocomputing, Available online 2 June 2017, ISSN 0925-2312, https://doi.org/10.1016/j.neucom.2017.04.070 -#### Scoreboard +## Community edition + +This repo is [NAB community edition](https://github.com/htm-community/NAB) which is a for of the original [Numenta's NAB](https://github.com/numenta/NAB). One of the reasons for forking +was a lack of developer activity in the upstream repo. + +### Features: + +- [x] Identical algorithms and datasets as the Numenta's NAB. So the results are `reproducible`. +- [x] `Python 3` codebase (as Python 2 reaches end-of-life at 1/1/2020 and Numenta's not yet ported) +- [x] additional community-provided detectors: + - `htmcore`: currently the only HTM implementation able to run in NAB natively in python 3. (with many improvements in [Community HTM implementation, successor of nupic.core](https://github.com/htm-community/htm.core/). + - `numenta`, `numenta_TM` detectors (original from Numenta) made compatible with the Py3 codebase (only requires Py2 installed) +- [ ] additional datasets + - TBD, none so far + +Statement: We'll try to upstream any changes, new detectors and datasets to upstream Numenta's NAB, when the devs have time to apply the changes. + +## Scoreboard The NAB scores are normalized such that the maximum possible is 100.0 (i.e. the perfect detector), and a baseline of 0.0 is determined by the "null" detector (which makes no detections). -| Detector | Standard Profile | Reward Low FP | Reward Low FN | -|---------------|------------------|---------------|---------------| -| Perfect | 100.0 | 100.0 | 100.0 | -| [Numenta HTM](https://github.com/numenta/nupic)* | 70.5-69.7 | 62.6-61.7 | 75.2-74.2 | -| [CAD OSE](https://github.com/smirmik/CAD)† | 69.9 | 67.0 | 73.2 | -| [earthgecko Skyline](https://github.com/earthgecko/skyline) | 58.2 | 46.2 | 63.9 | -| [KNN CAD](https://github.com/numenta/NAB/tree/master/nab/detectors/knncad)† | 58.0 | 43.4 | 64.8 | -| [Relative Entropy](http://www.hpl.hp.com/techreports/2011/HPL-2011-8.pdf) | 54.6 | 47.6 | 58.8 | -| [Random Cut Forest](http://proceedings.mlr.press/v48/guha16.pdf) **** | 51.7 | 38.4 | 59.7 | -| [Twitter ADVec v1.0.0](https://github.com/twitter/AnomalyDetection)| 47.1 | 33.6 | 53.5 | -| [Windowed Gaussian](https://github.com/numenta/NAB/blob/master/nab/detectors/gaussian/windowedGaussian_detector.py) | 39.6 | 20.9 | 47.4 | -| [Etsy Skyline](https://github.com/etsy/skyline) | 35.7 | 27.1 | 44.5 | -| Bayesian Changepoint** | 17.7 | 3.2 | 32.2 | -| [EXPoSE](https://arxiv.org/abs/1601.06602v3) | 16.4 | 3.2 | 26.9 | -| Random*** | 11.0 | 1.2 | 19.5 | -| Null | 0.0 | 0.0 | 0.0 | +| Detector | Standard Profile | Reward Low FP | Reward Low FN | Detector name | Time (s) | +|---------------|------------------|---------------|---------------|---------------|------------| +| Perfect | 100.0 | 100.0 | 100.0 | | | +| [Numenta HTM](https://github.com/numenta/nupic)* | 70.5-69.7 | 62.6-61.7 | 75.2-74.2 | `numenta` | | +| [CAD OSE](https://github.com/smirmik/CAD)† | 69.9 | 67.0 | 73.2 | | | +| [earthgecko Skyline](https://github.com/earthgecko/skyline) | 58.2 | 46.2 | 63.9 | | | +| [KNN CAD](https://github.com/htm-community/NAB/tree/master/nab/detectors/knncad)† | 58.0 | 43.4 | 64.8 | | | +| [Relative Entropy](http://www.hpl.hp.com/techreports/2011/HPL-2011-8.pdf) | 54.6 | 47.6 | 58.8 | | | +| [Random Cut Forest](http://proceedings.mlr.press/v48/guha16.pdf) **** | 51.7 | 38.4 | 59.7 | | | +| [htm.core](https://github.com/htm-community/htm.core/) | 50.83 | 49.95 | 52.64 | `htmcore` | | +| [Twitter ADVec v1.0.0](https://github.com/twitter/AnomalyDetection)| 47.1 | 33.6 | 53.5 | | | +| [Windowed Gaussian](https://github.com/htm-community/NAB/blob/master/nab/detectors/gaussian/windowedGaussian_detector.py) | 39.6 | 20.9 | 47.4 | | | +| [Etsy Skyline](https://github.com/etsy/skyline) | 35.7 | 27.1 | 44.5 | | | +| Bayesian Changepoint** | 17.7 | 3.2 | 32.2 | | | +| [EXPoSE](https://arxiv.org/abs/1601.06602v3) | 16.4 | 3.2 | 26.9 | | | +| Random*** | 11.0 | 1.2 | 19.5 | | | +| Null | 0.0 | 0.0 | 0.0 | | | *As of NAB v1.0* @@ -64,22 +82,6 @@ The NAB scores are normalized such that the maximum possible is 100.0 (i.e. the Please see [the wiki section on contributing algorithms](https://github.com/numenta/NAB/wiki/NAB-Contributions-Criteria#anomaly-detection-algorithms) for discussion on posting algorithms to the scoreboard. -#### Corpus - -The NAB corpus of 58 timeseries data files is designed to provide data for research -in streaming anomaly detection. It is comprised of both -real-world and artifical timeseries data containing labeled anomalous periods of behavior. - -The majority of the data is real-world from a variety of sources such as AWS -server metrics, Twitter volume, advertisement clicking metrics, traffic data, -and more. All data is included in the repository, with more details in the [data -readme](https://github.com/numenta/NAB/tree/master/data). We are in the process -of adding more data, and actively searching for more data. Please contact us at -[nab@numenta.org](mailto:nab@numenta.org) if you have similar data (ideally with -known anomalies) that you would like to see incorporated into NAB. - -The NAB version will be updated whenever new data (and corresponding labels) is -added to the corpus; NAB is currently in v1.0. #### Additional Scores @@ -88,13 +90,8 @@ For comparison, here are the NAB V1.0 scores for some additional flavors of HTM. * Numenta HTM using NuPIC v.0.5.6: This version of NuPIC was used to generate the data for the paper mentioned above (Unsupervised real-time anomaly detection for streaming data. Neurocomputing, ISSN 0925-2312, https://doi.org/10.1016/j.neucom.2017.04.070). If you are interested in replicating the results shown in the paper, use this version. * [HTM Java](https://github.com/numenta/htm.java) is a Community-Driven Java port of HTM. * [nab-comportex](https://github.com/floybix/nab-comportex) is a twist on HTM anomaly detection using [Comportex](https://github.com/htm-community/comportex), a community-driven HTM implementation in Clojure. Please see [Felix Andrew's blog post](http://floybix.github.io/2016/07/01/attempting-nab) on experiments with this algorithm. -* NumentaTM HTM detector uses the implementation of temporal memory found -[here](https://github.com/numenta/nupic.core/blob/master/src/nupic/algorithms/TemporalMemory.hpp). -* Numenta HTM detector with no likelihood uses the raw anomaly scores directly. To -run without likelihood, set the variable `self.useLikelihood` in -[numenta_detector.py](https://github.com/numenta/NAB/blob/master/nab/detectors/numenta/numenta_detector.py) -to `False`. - +* NumentaTM HTM detector uses the implementation of temporal memory found [here](https://github.com/numenta/nupic.core/blob/master/src/nupic/algorithms/TemporalMemory.hpp). +* Numenta HTM detector with no likelihood uses the raw anomaly scores directly. To run without likelihood, set the variable `self.useLikelihood` in [numenta_detector.py](https://github.com/numenta/NAB/blob/master/nab/detectors/numenta/numenta_detector.py) to `False`. @@ -102,21 +99,40 @@ to `False`. |---------------|---------|------------------|---------------| | Numenta HTMusing NuPIC v0.5.6* | 70.1 | 63.1 | 74.3 | | [nab-comportex](https://github.com/floybix/nab-comportex)† | 64.6 | 58.8 | 69.6 | -| [NumentaTM HTM](https://github.com/numenta/NAB/blob/master/nab/detectors/numenta/numentaTM_detector.py)* | 64.6 | 56.7 | 69.2 | -| [HTM Java](https://github.com/numenta/NAB/blob/master/nab/detectors/htmjava) | 56.8 | 50.7 | 61.4 | +| [NumentaTM HTM](https://github.com/htm-community/NAB/blob/master/nab/detectors/numenta/numentaTM_detector.py)* | 64.6 | 56.7 | 69.2 | +| [HTM Java](https://github.com/htm-community/NAB/blob/master/nab/detectors/htmjava) | 56.8 | 50.7 | 61.4 | | Numenta HTM*, no likelihood | 53.62 | 34.15 | 61.89 | \* From NuPIC version 0.5.6 ([available on PyPI](https://pypi.python.org/pypi/nupic/0.5.6)). - † Algorithm was an entry to the [2016 NAB Competition](http://numenta.com/blog/2016/08/10/numenta-anomaly-benchmark-nab-competition-2016-winners/). -Installing NAB 1.0 + + +## Corpus + +The NAB corpus of 58 timeseries data files is designed to provide data for research +in streaming anomaly detection. It is comprised of both +real-world and artifical timeseries data containing labeled anomalous periods of behavior. + +The majority of the data is real-world from a variety of sources such as AWS +server metrics, Twitter volume, advertisement clicking metrics, traffic data, +and more. All data is included in the repository, with more details in the [data +readme](https://github.com/numenta/NAB/tree/master/data). We are in the process +of adding more data, and actively searching for more data. Please contact us at +[nab@numenta.org](mailto:nab@numenta.org) if you have similar data (ideally with +known anomalies) that you would like to see incorporated into NAB. + +The NAB version will be updated whenever new data (and corresponding labels) is +added to the corpus; NAB is currently in v1.0. + + +## Installing NAB 1.0 ------------------ ### Supported Platforms - OSX 10.9 and higher -- Amazon Linux (via AMI) +- Linux Other platforms may work but have not been tested. @@ -125,34 +141,22 @@ Other platforms may work but have not been tested. You need to manually install the following: -- [Python 2.7](https://www.python.org/download/) +- [Python 3](https://www.python.org/download/) - [pip](https://pip.pypa.io/en/latest/installing.html) -- [NumPy](http://www.numpy.org/) -- [NuPIC](http://www.github.com/numenta/nupic) (only required if running the Numenta detector) - -##### Download this repository -Use the Github links provided in the right sidebar. +#### Download this repository -##### Install the Python requirements +Use the Github [download links](https://github.com/htm-community/NAB/archive/master.zip) provided in the right sidebar, +or `git clone https://github.com/htm-community/NAB` - cd NAB - (sudo) pip install -r requirements.txt - -This will install the required modules. - -##### Install NAB +#### Install NAB Recommended: + cd NAB pip install . --user -> Note: If NuPIC is not already installed, the version specified in -`NAB/requirements.txt` will be installed. If NuPIC is already installed, it - will not be re-installed. - - If you want to manage dependency versions yourself, you can skip dependencies with: @@ -198,13 +202,11 @@ follow the directions below to "Run a subset of NAB". ##### Run HTM with NAB -First make sure NuPIC is installed and working properly. Then: - cd /path/to/nab - python run.py -d numenta --detect --optimize --score --normalize + python run.py -d htmcore --detect --optimize --score --normalize -This will run the Numenta detector only and produce normalized scores. Note that -by default it tries to use all the cores on your machine. The above command +This will run the community HTM detector `htmcore` (to run Numenta's detector use `-d numenta`) and produce normalized scores. +Note that by default it tries to use all the cores on your machine. The above command should take about 20-30 minutes on a current powerful laptop with 4-8 cores. For debugging you can run subsets of the data files by modifying and specifying specific label files (see section below). Please type: @@ -229,11 +231,10 @@ the specific version of NuPIC (and associated nupic.core) that is noted in the This will run everything and produce results files for all anomaly detection methods. Several algorithms are included in the repo, such as the Numenta -HTM anomaly detection method, as well as methods from the [Etsy -Skyline](https://github.com/etsy/skyline) anomaly detection library, a sliding -window detector, Bayes Changepoint, and so on. This will also pass those results -files to the scoring script to generate final NAB scores. **Note**: this option -will take many many hours to run. +HTM anomaly detection method, as well as methods from the [Etsy Skyline](https://github.com/etsy/skyline) anomaly detection library, +a sliding window detector, Bayes Changepoint, and so on. +This will also pass those results files to the scoring script to generate final NAB scores. +**Note**: this option will take many many hours to run. ##### Run subset of NAB data files diff --git a/config/thresholds.json b/config/thresholds.json index 69127a41c..f3099ebc1 100644 --- a/config/thresholds.json +++ b/config/thresholds.json @@ -55,6 +55,20 @@ "threshold": 0.9947875976562506 } }, + "htmcore": { + "reward_low_FN_rate": { + "score": -1.6511067861578468, + "threshold": 0.5014187204194446 + }, + "reward_low_FP_rate": { + "score": 20.39992539458499, + "threshold": 0.5122987896930875 + }, + "standard": { + "score": 30.348893213842153, + "threshold": 0.5014187204194446 + } + }, "htmjava": { "reward_low_FN_rate": { "score": 8.764037437134272, @@ -209,4 +223,4 @@ "threshold": 1.0 } } -} +} \ No newline at end of file diff --git a/nab/detectors/htmcore/README.md b/nab/detectors/htmcore/README.md new file mode 100644 index 000000000..9036ab4c1 --- /dev/null +++ b/nab/detectors/htmcore/README.md @@ -0,0 +1,32 @@ +# HtmcoreDetector HTM implementation from [htm.core](https://github.com/htm-community/htm.core/) + +This detector provides HTM implementation from [htm.core](https://github.com/htm-community/htm.core/), +which is an actively developed, community version of Numenta's [nupic.core](https://github.com/numenta/nupic.core). + +This is a python 3 detector, called `htmcore`, as Numenta is switching NAB to python 3, this is the closes detector you can get to +`numenta`, `numentaTM` detectors. + +`Htm.core` offers API and features similar and compatible with the official HTM implementations `nupic`, `nupic.core`. Although there +are significant speed and features improvements available! For more details please see [the htm.core project's README](https://github.com/htm-community/htm.core/blob/master/README.md) +Bugs and questions should also be reported there. + +## Installation + +`htmcore` detector is automatically installed with your `NAB` installation (`python setup.py install`), +so you don't have to do anything to have it available. + +### Requirements to install + +- [Python 3](https://www.python.org/download/) +- [Git](https://git-scm.com/downloads) + + +## Usage + +Is the same as the default detectors, see [NAB README section Usage](https://github.com/htm-community/NAB/blob/master/README.md#usage) + +### Example +Follow the instructions in the main README to run optimization, scoring, and normalization, e.g.: + +`python run.py -d htmcore --optimize --score --normalize` + diff --git a/nab/detectors/htmcore/__init__.py b/nab/detectors/htmcore/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py new file mode 100644 index 000000000..9b60412d1 --- /dev/null +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -0,0 +1,283 @@ +# ---------------------------------------------------------------------- +# Copyright (C) 2014, Numenta, Inc. Unless you have an agreement +# with Numenta, Inc., for a separate license for this software code, the +# following terms and conditions apply: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero Public License version 3 as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License +# along with this program. If not, see http://www.gnu.org/licenses. +# +# Copyright (C) 2019, @breznak +# +# http://numenta.org/licenses/ +# ---------------------------------------------------------------------- + +import math +import datetime + +# htm.core imports +from htm.bindings.sdr import SDR, Metrics +from htm.encoders.rdse import RDSE, RDSE_Parameters +from htm.encoders.date import DateEncoder +from htm.bindings.algorithms import SpatialPooler +from htm.bindings.algorithms import TemporalMemory +from htm.algorithms.anomaly_likelihood import AnomalyLikelihood +from htm.bindings.algorithms import Predictor + +from nab.detectors.base import AnomalyDetector + +# Fraction outside of the range of values seen so far that will be considered +# a spatial anomaly regardless of the anomaly likelihood calculation. This +# accounts for the human labelling bias for spatial values larger than what +# has been seen so far. +SPATIAL_TOLERANCE = 0.05 + +parameters_numenta_comparable = { + # there are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) + 'enc': { + "value" : # RDSE for value + {'resolution': 0.001, + 'size': 4000, + 'sparsity': 0.10 + }, + "time": { # DateTime for timestamps + 'timeOfDay': (21, 9.49), + 'weekend': 0 #21 TODO try impact of weekend + }}, + 'predictor': {'sdrc_alpha': 0.1}, + 'sp': { + 'boostStrength': 0.0, + 'columnCount': 2048, + 'localAreaDensity': 40/2048, + 'potentialPct': 0.4, + 'synPermActiveInc': 0.003, + 'synPermConnected': 0.2, + 'synPermInactiveDec': 0.0005}, + 'tm': { + 'activationThreshold': 13, + 'cellsPerColumn': 32, + 'initialPerm': 0.21, + 'maxSegmentsPerCell': 128, + 'maxSynapsesPerSegment': 32, + 'minThreshold': 10, + 'newSynapseCount': 20, + 'permanenceDec': 0.1, + 'permanenceInc': 0.1}, + 'anomaly': { + 'likelihood': { + #'learningPeriod': int(math.floor(self.probationaryPeriod / 2.0)), + #'probationaryPeriod': self.probationaryPeriod-default_parameters["anomaly"]["likelihood"]["learningPeriod"], + 'probationaryPct': 0.1, + 'reestimationPeriod': 100}} +} + + +class HtmcoreDetector(AnomalyDetector): + """ + This detector uses an HTM based anomaly detection technique. + """ + + def __init__(self, *args, **kwargs): + + super(HtmcoreDetector, self).__init__(*args, **kwargs) + + ## API for controlling settings of htm.core HTM detector: + + # Set this to False if you want to get results based on raw scores + # without using AnomalyLikelihood. This will give worse results, but + # useful for checking the efficacy of AnomalyLikelihood. You will need + # to re-optimize the thresholds when running with this setting. + self.useLikelihood = True + self.useSpatialAnomaly = True + self.verbose = True + + ## internal members + # (listed here for easier understanding) + # initialized in `initialize()` + self.encTimestamp = None + self.encValue = None + self.sp = None + self.tm = None + self.anLike = None + # optional debug info + self.enc_info = None + self.sp_info = None + self.tm_info = None + # internal helper variables: + self.inputs_ = [] + self.iteration_ = 0 + + + def getAdditionalHeaders(self): + """Returns a list of strings.""" + return ["raw_score"] #TODO optional: add "prediction" + + + def handleRecord(self, inputData): + """Returns a tuple (anomalyScore, rawScore). + + @param inputData is a dict {"timestamp" : Timestamp(), "value" : float} + + @return tuple (anomalyScore, , ...) + """ + # Send it to Numenta detector and get back the results + return self.modelRun(inputData["timestamp"], inputData["value"]) + + + + def initialize(self): + # toggle parameters here + #parameters = default_parameters + parameters = parameters_numenta_comparable + + # setup spatial anomaly + if self.useSpatialAnomaly: + # Keep track of value range for spatial anomaly detection + self.minVal = None + self.maxVal = None + + ## setup Enc, SP, TM, Likelihood + # Make the Encoders. These will convert input data into binary representations. + self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], + weekend = parameters["enc"]["time"]["weekend"]) + + scalarEncoderParams = RDSE_Parameters() + scalarEncoderParams.size = parameters["enc"]["value"]["size"] + scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] + scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] + + self.encValue = RDSE( scalarEncoderParams ) + encodingWidth = (self.encTimestamp.size + self.encValue.size) + self.enc_info = Metrics( [encodingWidth], 999999999 ) + + # Make the HTM. SpatialPooler & TemporalMemory & associated tools. + # SpatialPooler + spParams = parameters["sp"] + self.sp = SpatialPooler( + inputDimensions = (encodingWidth,), + columnDimensions = (spParams["columnCount"],), + potentialPct = spParams["potentialPct"], + potentialRadius = encodingWidth, + globalInhibition = True, + localAreaDensity = spParams["localAreaDensity"], + synPermInactiveDec = spParams["synPermInactiveDec"], + synPermActiveInc = spParams["synPermActiveInc"], + synPermConnected = spParams["synPermConnected"], + boostStrength = spParams["boostStrength"], + wrapAround = True + ) + self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 ) + + # TemporalMemory + tmParams = parameters["tm"] + self.tm = TemporalMemory( + columnDimensions = (spParams["columnCount"],), + cellsPerColumn = tmParams["cellsPerColumn"], + activationThreshold = tmParams["activationThreshold"], + initialPermanence = tmParams["initialPerm"], + connectedPermanence = spParams["synPermConnected"], + minThreshold = tmParams["minThreshold"], + maxNewSynapseCount = tmParams["newSynapseCount"], + permanenceIncrement = tmParams["permanenceInc"], + permanenceDecrement = tmParams["permanenceDec"], + predictedSegmentDecrement = 0.0, + maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], + maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] + ) + self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 ) + + # setup likelihood, these settings are used in NAB + if self.useLikelihood: + anParams = parameters["anomaly"]["likelihood"] + learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) + self.anomalyLikelihood = AnomalyLikelihood( + learningPeriod= learningPeriod, + estimationSamples= self.probationaryPeriod - learningPeriod, + reestimationPeriod= anParams["reestimationPeriod"]) + # Predictor + # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) + # predictor_resolution = 1 + + + def modelRun(self, ts, val): + """ + Run a single pass through HTM model + + @params ts - Timestamp + @params val - float input value + + @return rawAnomalyScore computed for the `val` in this step + """ + ## run data through our model pipeline: enc -> SP -> TM -> Anomaly + self.inputs_.append( val ) + self.iteration_ += 1 + + # 1. Encoding + # Call the encoders to create bit representations for each value. These are SDR objects. + dateBits = self.encTimestamp.encode(ts) + valueBits = self.encValue.encode(float(val)) + # Concatenate all these encodings into one large encoding for Spatial Pooling. + encoding = SDR( self.encTimestamp.size + self.encValue.size ).concatenate([valueBits, dateBits]) + self.enc_info.addData( encoding ) + + # 2. Spatial Pooler + # Create an SDR to represent active columns, This will be populated by the + # compute method below. It must have the same dimensions as the Spatial Pooler. + activeColumns = SDR( self.sp.getColumnDimensions() ) + # Execute Spatial Pooling algorithm over input space. + self.sp.compute(encoding, True, activeColumns) + self.sp_info.addData( activeColumns ) + + # 3. Temporal Memory + # Execute Temporal Memory algorithm over active mini-columns. + self.tm.compute(activeColumns, learn=True) + self.tm_info.addData( self.tm.getActiveCells().flatten() ) + + # 4.1 (optional) Predictor #TODO optional + #TODO optional: also return an error metric on predictions (RMSE, R2,...) + + # 4.2 Anomaly + # handle spatial, contextual (raw, likelihood) anomalies + # -Spatial + spatialAnomaly = 0.0 #TODO optional: make this computed in SP (and later improve) + if self.useSpatialAnomaly: + # Update min/max values and check if there is a spatial anomaly + if self.minVal != self.maxVal: + tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE + maxExpected = self.maxVal + tolerance + minExpected = self.minVal - tolerance + if val > maxExpected or val < minExpected: + spatialAnomaly = 1.0 + if self.maxVal is None or val > self.maxVal: + self.maxVal = val + if self.minVal is None or val < self.minVal: + self.minVal = val + + # -temporal (raw) + raw = self.tm.anomaly + temporalAnomaly = raw + + if self.useLikelihood: + # Compute log(anomaly likelihood) + like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) + logScore = self.anomalyLikelihood.computeLogLikelihood(like) + temporalAnomaly = logScore #TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood + + anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB + + # 5. print stats + if self.verbose and self.iteration_ % 1000 == 0: + # print(self.enc_info) + # print(self.sp_info) + # print(self.tm_info) + pass + + return (anomalyScore, raw) diff --git a/nab/detectors/htmcore/requirements.txt b/nab/detectors/htmcore/requirements.txt new file mode 100644 index 000000000..37a275c41 --- /dev/null +++ b/nab/detectors/htmcore/requirements.txt @@ -0,0 +1 @@ +htm.core>=2.0 diff --git a/nab/detectors/htmcore/setup.py b/nab/detectors/htmcore/setup.py new file mode 100644 index 000000000..d0aa1ead1 --- /dev/null +++ b/nab/detectors/htmcore/setup.py @@ -0,0 +1,115 @@ +# ---------------------------------------------------------------------- +# Copyright (C) 2014-2015, Numenta, Inc. Unless you have an agreement +# with Numenta, Inc., for a separate license for this software code, the +# following terms and conditions apply: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero Public License version 3 as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License +# along with this program. If not, see http://www.gnu.org/licenses. +# +# http://numenta.org/licenses/ +# ---------------------------------------------------------------------- + +import os +import sys +import pkg_resources +import warnings +from setuptools import setup, find_packages + +REPO_DIR = os.path.dirname(os.path.realpath(__file__)) + + +# Utility function to read the README file. +# Used for the long_description. It"s nice, because now 1) we have a top level +# README file and 2) it"s easier to type in the README file than to put a raw +# string in below ... +def read(fname): + with open(os.path.join(os.path.dirname(__file__), fname)) as f: + result = f.read() + return result + + + +def nupicInstalled(): + """ + Determine whether NuPIC is already installed. + :return: boolean + """ + try: + _ = pkg_resources.get_distribution("htm.core") + return True + except pkg_resources.DistributionNotFound: + return False # Silently ignore. NuPIC will be installed later. + + +def parseFile(requirementFile): + """ + Parse requirement file. + :return: list of requirements. + """ + try: + return [ + line.strip() + for line in open(requirementFile).readlines() + if not line.startswith("#") + ] + except IOError: + return [] + + + +def findRequirements(): + """ + Read the requirements.txt file and parse into requirements for setup's + install_requirements option. + """ + requirements = parseFile(os.path.join(REPO_DIR, "requirements.txt")) #REPO_DIR is local htmcore/ "repo" + + if not nupicInstalled(): + # The user already has a version of htm.core (NuPIC) installed. If not, we'll attempt custom install from git. + # custom install htm.core #TODO when htm.core is published on PyPI, remove this whole custom setup.py, and just add htm.core to REPO/requirements.txt + try: + os.system("git status") + except: + raise "Git must be installed for htm.core detector installation!" + + curr_dir = os.getcwd() + os.chdir(REPO_DIR) + os.system("git clone --depth=5 https://github.com/htm-community/htm.core") + os.chdir(os.path.join(REPO_DIR, "htm.core")) + os.system("git pull origin master") #if the clone fails (repo exists) we want to update it + os.system("python setup.py install") #installing htm.core + os.chdir(curr_dir) + + return requirements + + + +if __name__ == "__main__": + requirements = findRequirements() + + setup( + name="nab-detector-htmcore", + version="1.0", + author="@breznak", + author_email="nab@numenta.org", + description=( + "HTH.core detector from HTM community repo for NAB (in Python 3)"), + license="AGPL", + packages=find_packages(), + long_description=read(os.path.join(REPO_DIR, "README.md")), + install_requires=requirements, + entry_points={ + "console_scripts": [ + "nab-plot = nab.plot:main", + ], + }, + ) diff --git a/results/final_results.json b/results/final_results.json index 8161ade25..295cef20b 100644 --- a/results/final_results.json +++ b/results/final_results.json @@ -14,6 +14,11 @@ "reward_low_FP_rate": 3.1909311068965485, "standard": 16.43666922426724 }, + "htmcore": { + "reward_low_FN_rate": 66.1922106936328, + "reward_low_FP_rate": 58.7930712907694, + "standard": 63.081419488725054 + }, "htmjava": { "reward_low_FN_rate": 70.42407766520115, "reward_low_FP_rate": 53.25694971610345, @@ -35,9 +40,9 @@ "standard": 70.10105611943965 }, "numentaTM": { - "reward_low_FN_rate": 69.185068229060349, + "reward_low_FN_rate": 69.18506822906035, "reward_low_FP_rate": 56.665308225043105, - "standard": 64.553464412543107 + "standard": 64.55346441254311 }, "random": { "reward_low_FN_rate": 25.876351314080456, diff --git a/run.py b/run.py index 196ac4fc2..01a096a35 100755 --- a/run.py +++ b/run.py @@ -159,7 +159,7 @@ def main(args): parser.add_argument("-d", "--detectors", nargs="*", type=str, - default=["numenta", "numentaTM", "htmjava", "null", "random", + default=["numenta", "numentaTM", "htmcore", "htmjava", "null", "random", "bayesChangePt", "windowedGaussian", "expose", "relativeEntropy", "earthgeckoSkyline"], help="Comma separated list of detector(s) to use, e.g. " @@ -227,6 +227,9 @@ def main(args): ContextOSEDetector ) if "earthgeckoSkyline" in args.detectors: from nab.detectors.earthgecko_skyline.earthgecko_skyline_detector import EarthgeckoSkylineDetector + if "htmcore" in args.detectors: + from nab.detectors.htmcore.htmcore_detector import HtmcoreDetector + # Special hacks for detectors requiring Python 2: # TODO the imports are failing, remove? Py2 detectors have special treatment in `getDetectorClassConstructors()` above # diff --git a/setup.py b/setup.py index 41793fbaa..8c6da2ece 100644 --- a/setup.py +++ b/setup.py @@ -122,4 +122,9 @@ def findRequirements(): except: print("Unable to install python2 dependencies: numenta, numentaTM, htmjava detectors not available!") - + # install community HTM htm.core + try: + import os + os.system('python nab/detectors/htmcore/setup.py install') + except: + print("Failed to install htm.core detector!")