From 12d98e58d3df3221ed6fce22f3be31f82f51e436 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Wed, 24 Jul 2019 14:54:14 +0200 Subject: [PATCH 01/13] HTM.core new detector installed install htm.core detector from setup.py if installed, just skip, otherwise build from github --- .gitignore | 1 + nab/detectors/htmcore/README.md | 115 +++++++++++++++ nab/detectors/htmcore/__init__.py | 0 nab/detectors/htmcore/numentaTM_detector.py | 75 ++++++++++ nab/detectors/htmcore/numenta_detector.py | 152 ++++++++++++++++++++ nab/detectors/htmcore/requirements.txt | 1 + nab/detectors/htmcore/setup.py | 115 +++++++++++++++ run.py | 5 +- setup.py | 7 +- 9 files changed, 469 insertions(+), 2 deletions(-) create mode 100644 nab/detectors/htmcore/README.md create mode 100644 nab/detectors/htmcore/__init__.py create mode 100644 nab/detectors/htmcore/numentaTM_detector.py create mode 100644 nab/detectors/htmcore/numenta_detector.py create mode 100644 nab/detectors/htmcore/requirements.txt create mode 100644 nab/detectors/htmcore/setup.py diff --git a/.gitignore b/.gitignore index 46bb4785b..36809ca35 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ plot_* pyenv2/ build/ dist/ +htm.core/ diff --git a/nab/detectors/htmcore/README.md b/nab/detectors/htmcore/README.md new file mode 100644 index 000000000..7308cf11c --- /dev/null +++ b/nab/detectors/htmcore/README.md @@ -0,0 +1,115 @@ +# Numenta and NumentaTM detectors + +This directory holds the Python 2 code required to run the `numenta` and +`numentaTM` detectors against the NAB data. In 2019 the main body of the +benchmark's code was ported to Python 3 however these detectors rely on NuPIC +which is Python 2 only. + +This code can be used to replicate results listed on the scoreboard of +the main repository for the following detectors: + + numenta + numentaTM + +## Installation + +We assume you have a working version of Python 3 installed as your default Python. +If your default system Python is still Python 2 you can skip the virtual environment +creation below. + +### Requirements to install + +- [Python 2.7](https://www.python.org/download/) +- [Virtualenv](https://pypi.org/project/virtualenv/) + +### Install a virtual environment + +Create a new Python 2 virtual environment in this directory. + +`virtualenv -p path/to/python2 env` + +Activate that virtual environment. + +`./env/Scripts/activate` + +or + +`env\Scripts\activate.bat` on Windows. + +Confirm you have a local Python 2 + +``` +$ python +Python 2.7.13 (v2.7.13:a06454b1afa1, Dec 17 2016, 20:53:40) [MSC v.1500 64 bit (AMD64)] on win32 +Type "help", "copyright", "credits" or "license" for more information. +>>> +``` + +### Install detectors + +`cd /path/to/NAB/` +`pip install nupic` +`python nab/detectors/numenta/setup.py develop` + +## Usage + +### Detection + +This directory contains a modified version of the `run.py` script which exists +in the main NAB directory. It can be used to run *detection* only using the +`numenta` and `numentaTM` detectors against NAB data. + +By default it will run both `numenta` and `numentaTM` detectors and output +results to the main NAB/results directory. + +`python2 run.py` + +Note: By default `run.py` tries to use all the cores on your machine. The above +command should take about 20-30 minutes on a current powerful laptop with 4-8 +cores. + +To run only one of the detectors use the `-d` option: + +`python2 run.py -d numenta` + +To see all options of this script type: + +`python2 run.py --help` + +### Optimizing, Scoring and Normalizing + +Once you have run either of the detectors herein against the NAB data you will need +to exit the Python 2 virtual environment and move into the main NAB directory. + +``` +(env) /NAB/nab/detectors/numenta +$ deactivate +/NAB/nab/detectors/numenta +$ cd ../../../ +/NAB +$ +``` + +Then follow the instructions in the main README to run optimization, scoring, and normalization, e.g.: + +`python run.py -d numenta,numentaTM --optimize --score --normalize` + +### Run a subset of NAB data files + +For debugging it is sometimes useful to be able to run your algorithm on a +subset of the NAB data files or on your own set of data files. You can do that +by creating a custom `combined_windows.json` file that only contains labels for +the files you want to run. This new file should be in exactly the same format as +`combined_windows.json` except it would only contain windows for the files you +are interested in. + +**Example**: an example file containing two files is in +`labels/combined_windows_tiny.json`. (Under of the main NAB directory) The +following command shows you how to run NAB on a subset of labels: + + python2 run.py -d numenta --detect --windowsFile labels/combined_windows_tiny.json + +This will run the `detect` phase of NAB on the data files specified in the above +JSON file. Note that scoring and normalization are not supported with this +option. Note also that you may see warning messages regarding the lack of labels +for other files. You can ignore these warnings. diff --git a/nab/detectors/htmcore/__init__.py b/nab/detectors/htmcore/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nab/detectors/htmcore/numentaTM_detector.py b/nab/detectors/htmcore/numentaTM_detector.py new file mode 100644 index 000000000..0ba71b197 --- /dev/null +++ b/nab/detectors/htmcore/numentaTM_detector.py @@ -0,0 +1,75 @@ +# ---------------------------------------------------------------------- +# Copyright (C) 2016, Numenta, Inc. Unless you have an agreement +# with Numenta, Inc., for a separate license for this software code, the +# following terms and conditions apply: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero Public License version 3 as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License +# along with this program. If not, see http://www.gnu.org/licenses. +# +# http://numenta.org/licenses/ +# ---------------------------------------------------------------------- + +import os +import math +import simplejson as json + +from nupic.algorithms import anomaly_likelihood +from nupic.frameworks.opf.common_models.cluster_params import ( + getScalarMetricWithTimeOfDayAnomalyParams) +try: + from nupic.frameworks.opf.model_factory import ModelFactory +except: + # Try importing it the old way (version < 0.7.0.dev0) + from nupic.frameworks.opf.modelfactory import ModelFactory + +from nab.detectors.numenta.numenta_detector import NumentaDetector + + + +class NumentaTMDetector(NumentaDetector): + """ + This detector uses the implementation of temporal memory in + https://github.com/numenta/nupic.core/blob/master/src/nupic/algorithms/TemporalMemory.hpp. + It differs from its parent detector in temporal memory and its parameters. + """ + + def __init__(self, *args, **kwargs): + + super(NumentaTMDetector, self).__init__(*args, **kwargs) + + + def initialize(self): + # Get config params, setting the RDSE resolution + rangePadding = abs(self.inputMax - self.inputMin) * 0.2 + + modelParams = getScalarMetricWithTimeOfDayAnomalyParams( + metricData=[0], + minVal=self.inputMin-rangePadding, + maxVal=self.inputMax+rangePadding, + minResolution=0.001, + tmImplementation="tm_cpp" + )["modelConfig"] + + self._setupEncoderParams( + modelParams["modelParams"]["sensorParams"]["encoders"]) + + self.model = ModelFactory.create(modelParams) + + self.model.enableInference({"predictedField": "value"}) + + # Initialize the anomaly likelihood object + numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) + self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( + learningPeriod=numentaLearningPeriod, + estimationSamples=self.probationaryPeriod-numentaLearningPeriod, + reestimationPeriod=100 + ) diff --git a/nab/detectors/htmcore/numenta_detector.py b/nab/detectors/htmcore/numenta_detector.py new file mode 100644 index 000000000..782333ac1 --- /dev/null +++ b/nab/detectors/htmcore/numenta_detector.py @@ -0,0 +1,152 @@ +# ---------------------------------------------------------------------- +# Copyright (C) 2014, Numenta, Inc. Unless you have an agreement +# with Numenta, Inc., for a separate license for this software code, the +# following terms and conditions apply: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero Public License version 3 as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License +# along with this program. If not, see http://www.gnu.org/licenses. +# +# http://numenta.org/licenses/ +# ---------------------------------------------------------------------- + +import math + +from nupic.algorithms import anomaly_likelihood +from nupic.frameworks.opf.common_models.cluster_params import ( + getScalarMetricWithTimeOfDayAnomalyParams) +try: + from nupic.frameworks.opf.model_factory import ModelFactory +except: + # Try importing it the old way (version < 0.7.0.dev0) + from nupic.frameworks.opf.modelfactory import ModelFactory + +from nab.detectors.base import AnomalyDetector + +# Fraction outside of the range of values seen so far that will be considered +# a spatial anomaly regardless of the anomaly likelihood calculation. This +# accounts for the human labelling bias for spatial values larger than what +# has been seen so far. +SPATIAL_TOLERANCE = 0.05 + + + +class NumentaDetector(AnomalyDetector): + """ + This detector uses an HTM based anomaly detection technique. + """ + + def __init__(self, *args, **kwargs): + + super(NumentaDetector, self).__init__(*args, **kwargs) + + self.model = None + self.sensorParams = None + self.anomalyLikelihood = None + # Keep track of value range for spatial anomaly detection + self.minVal = None + self.maxVal = None + + # Set this to False if you want to get results based on raw scores + # without using AnomalyLikelihood. This will give worse results, but + # useful for checking the efficacy of AnomalyLikelihood. You will need + # to re-optimize the thresholds when running with this setting. + self.useLikelihood = True + + + def getAdditionalHeaders(self): + """Returns a list of strings.""" + return ["raw_score"] + + + def handleRecord(self, inputData): + """Returns a tuple (anomalyScore, rawScore). + + Internally to NuPIC "anomalyScore" corresponds to "likelihood_score" + and "rawScore" corresponds to "anomaly_score". Sorry about that. + """ + # Send it to Numenta detector and get back the results + result = self.model.run(inputData) + + # Get the value + value = inputData["value"] + + # Retrieve the anomaly score and write it to a file + rawScore = result.inferences["anomalyScore"] + + # Update min/max values and check if there is a spatial anomaly + spatialAnomaly = False + if self.minVal != self.maxVal: + tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE + maxExpected = self.maxVal + tolerance + minExpected = self.minVal - tolerance + if value > maxExpected or value < minExpected: + spatialAnomaly = True + if self.maxVal is None or value > self.maxVal: + self.maxVal = value + if self.minVal is None or value < self.minVal: + self.minVal = value + + if self.useLikelihood: + # Compute log(anomaly likelihood) + anomalyScore = self.anomalyLikelihood.anomalyProbability( + inputData["value"], rawScore, inputData["timestamp"]) + logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) + finalScore = logScore + else: + finalScore = rawScore + + if spatialAnomaly: + finalScore = 1.0 + + return (finalScore, rawScore) + + + def initialize(self): + # Get config params, setting the RDSE resolution + rangePadding = abs(self.inputMax - self.inputMin) * 0.2 + modelParams = getScalarMetricWithTimeOfDayAnomalyParams( + metricData=[0], + minVal=self.inputMin-rangePadding, + maxVal=self.inputMax+rangePadding, + minResolution=0.001, + tmImplementation = "cpp" + )["modelConfig"] + + self._setupEncoderParams( + modelParams["modelParams"]["sensorParams"]["encoders"]) + + self.model = ModelFactory.create(modelParams) + + self.model.enableInference({"predictedField": "value"}) + + if self.useLikelihood: + # Initialize the anomaly likelihood object + numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) + self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( + learningPeriod=numentaLearningPeriod, + estimationSamples=self.probationaryPeriod-numentaLearningPeriod, + reestimationPeriod=100 + ) + + + def _setupEncoderParams(self, encoderParams): + # The encoder must expect the NAB-specific datafile headers + encoderParams["timestamp_dayOfWeek"] = encoderParams.pop("c0_dayOfWeek") + encoderParams["timestamp_timeOfDay"] = encoderParams.pop("c0_timeOfDay") + encoderParams["timestamp_timeOfDay"]["fieldname"] = "timestamp" + encoderParams["timestamp_timeOfDay"]["name"] = "timestamp" + encoderParams["timestamp_weekend"] = encoderParams.pop("c0_weekend") + encoderParams["value"] = encoderParams.pop("c1") + encoderParams["value"]["fieldname"] = "value" + encoderParams["value"]["name"] = "value" + + self.sensorParams = encoderParams["value"] diff --git a/nab/detectors/htmcore/requirements.txt b/nab/detectors/htmcore/requirements.txt new file mode 100644 index 000000000..37a275c41 --- /dev/null +++ b/nab/detectors/htmcore/requirements.txt @@ -0,0 +1 @@ +htm.core>=2.0 diff --git a/nab/detectors/htmcore/setup.py b/nab/detectors/htmcore/setup.py new file mode 100644 index 000000000..cfebf8ebc --- /dev/null +++ b/nab/detectors/htmcore/setup.py @@ -0,0 +1,115 @@ +# ---------------------------------------------------------------------- +# Copyright (C) 2014-2015, Numenta, Inc. Unless you have an agreement +# with Numenta, Inc., for a separate license for this software code, the +# following terms and conditions apply: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero Public License version 3 as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License +# along with this program. If not, see http://www.gnu.org/licenses. +# +# http://numenta.org/licenses/ +# ---------------------------------------------------------------------- + +import os +import sys +import pkg_resources +import warnings +from setuptools import setup, find_packages + +REPO_DIR = os.path.dirname(os.path.realpath(__file__)) + + +# Utility function to read the README file. +# Used for the long_description. It"s nice, because now 1) we have a top level +# README file and 2) it"s easier to type in the README file than to put a raw +# string in below ... +def read(fname): + with open(os.path.join(os.path.dirname(__file__), fname)) as f: + result = f.read() + return result + + + +def nupicInstalled(): + """ + Determine whether NuPIC is already installed. + :return: boolean + """ + try: + _ = pkg_resources.get_distribution("htm.core") + return True + except pkg_resources.DistributionNotFound: + return False # Silently ignore. NuPIC will be installed later. + + +def parseFile(requirementFile): + """ + Parse requirement file. + :return: list of requirements. + """ + try: + return [ + line.strip() + for line in open(requirementFile).readlines() + if not line.startswith("#") + ] + except IOError: + return [] + + + +def findRequirements(): + """ + Read the requirements.txt file and parse into requirements for setup's + install_requirements option. + """ + requirements = parseFile(os.path.join(REPO_DIR, "requirements.txt")) #REPO_DIR is local htmcore/ "repo" + + if not nupicInstalled(): + # The user already has a version of htm.core (NuPIC) installed. If not, we'll attempt custom install from git. + # custom install htm.core #TODO when htm.core is published on PyPI, remove this whole custom setup.py, and just add htm.core to REPO/requirements.txt + try: + os.system("git status") + except: + raise "Git must be installed for htm.core detector installation!" + + curr_dir = os.getcwd() + os.chdir(REPO_DIR) + os.system("git clone --depth=5 https://github.com/htm-community/htm.core") + os.chdir(os.path.join(REPO_DIR, "htm.core")) + os.system("git pull origin master") #if the clone fails (repo exists) we want to update it + os.system("python setup.py install") #installing htm.core + os.chdir(curr_dir) + + return requirements + + + +if __name__ == "__main__": + requirements = findRequirements() + + setup( + name="nab-detector-htmcore", + version="1.0", + author="@breznak", + author_email="nab@numenta.org", + description=( + "HTH.core detector from HTM community repo for NAB (in Python 3)"), + license="AGPL", + packages=find_packages(), + long_description=read(os.path.join(REPO_DIR, "htm.core","README.md")), + install_requires=requirements, + entry_points={ + "console_scripts": [ + "nab-plot = nab.plot:main", + ], + }, + ) diff --git a/run.py b/run.py index 196ac4fc2..b7c96de76 100755 --- a/run.py +++ b/run.py @@ -159,7 +159,7 @@ def main(args): parser.add_argument("-d", "--detectors", nargs="*", type=str, - default=["numenta", "numentaTM", "htmjava", "null", "random", + default=["numenta", "numentaTM", "htmcore", "htmjava", "null", "random", "bayesChangePt", "windowedGaussian", "expose", "relativeEntropy", "earthgeckoSkyline"], help="Comma separated list of detector(s) to use, e.g. " @@ -227,6 +227,9 @@ def main(args): ContextOSEDetector ) if "earthgeckoSkyline" in args.detectors: from nab.detectors.earthgecko_skyline.earthgecko_skyline_detector import EarthgeckoSkylineDetector + if "htmcore" in args.detectors: + print("TODO htmcore detector") + # Special hacks for detectors requiring Python 2: # TODO the imports are failing, remove? Py2 detectors have special treatment in `getDetectorClassConstructors()` above # diff --git a/setup.py b/setup.py index 8995334cf..a8249ae41 100644 --- a/setup.py +++ b/setup.py @@ -121,4 +121,9 @@ def findRequirements(): except: print("Unable to install python2 dependencies: numenta, numentaTM, htmjava detectors not available!") - + # install community HTM htm.core + try: + import os + os.system('python nab/detectors/htmcore/setup.py install') + except: + print("Failed to install htm.core detector!") From 8de00c6ce3b77261bcd54d2feb7bac0099d2243c Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Wed, 24 Jul 2019 17:27:56 +0200 Subject: [PATCH 02/13] new HtmcoreDetector POC runable POC for community HTM detector from famous htm.core repo! --- ...umenta_detector.py => htmcore_detector.py} | 51 +++++-------- nab/detectors/htmcore/numentaTM_detector.py | 75 ------------------- run.py | 2 +- 3 files changed, 18 insertions(+), 110 deletions(-) rename nab/detectors/htmcore/{numenta_detector.py => htmcore_detector.py} (76%) delete mode 100644 nab/detectors/htmcore/numentaTM_detector.py diff --git a/nab/detectors/htmcore/numenta_detector.py b/nab/detectors/htmcore/htmcore_detector.py similarity index 76% rename from nab/detectors/htmcore/numenta_detector.py rename to nab/detectors/htmcore/htmcore_detector.py index 782333ac1..adc02aa68 100644 --- a/nab/detectors/htmcore/numenta_detector.py +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -20,15 +20,12 @@ import math -from nupic.algorithms import anomaly_likelihood -from nupic.frameworks.opf.common_models.cluster_params import ( - getScalarMetricWithTimeOfDayAnomalyParams) -try: - from nupic.frameworks.opf.model_factory import ModelFactory -except: - # Try importing it the old way (version < 0.7.0.dev0) - from nupic.frameworks.opf.modelfactory import ModelFactory +# htm.core imports +from htm.bindings.algorithms import SpatialPooler as SP +from htm.bindings.algorithms import TemporalMemory as TM +from htm.bindings.algorithms import Predictor +from htm.algorithms.anomaly_likelihood import AnomalyLikelihood from nab.detectors.base import AnomalyDetector # Fraction outside of the range of values seen so far that will be considered @@ -39,17 +36,15 @@ -class NumentaDetector(AnomalyDetector): +class HtmcoreDetector(AnomalyDetector): """ This detector uses an HTM based anomaly detection technique. """ def __init__(self, *args, **kwargs): - super(NumentaDetector, self).__init__(*args, **kwargs) + super(HtmcoreDetector, self).__init__(*args, **kwargs) - self.model = None - self.sensorParams = None self.anomalyLikelihood = None # Keep track of value range for spatial anomaly detection self.minVal = None @@ -64,7 +59,7 @@ def __init__(self, *args, **kwargs): def getAdditionalHeaders(self): """Returns a list of strings.""" - return ["raw_score"] + return ["raw_score"] #TODO add "prediction" def handleRecord(self, inputData): @@ -74,16 +69,16 @@ def handleRecord(self, inputData): and "rawScore" corresponds to "anomaly_score". Sorry about that. """ # Send it to Numenta detector and get back the results - result = self.model.run(inputData) + result = [] #FIXME self.model.run(inputData) # Get the value value = inputData["value"] # Retrieve the anomaly score and write it to a file - rawScore = result.inferences["anomalyScore"] + rawScore = 0.5 #FIXME result.inferences["anomalyScore"] # Update min/max values and check if there is a spatial anomaly - spatialAnomaly = False + spatialAnomaly = False #TODO make this computed in SP (and later improve) if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE maxExpected = self.maxVal + tolerance @@ -97,10 +92,9 @@ def handleRecord(self, inputData): if self.useLikelihood: # Compute log(anomaly likelihood) - anomalyScore = self.anomalyLikelihood.anomalyProbability( - inputData["value"], rawScore, inputData["timestamp"]) + anomalyScore = self.anomalyLikelihood.anomalyProbability(inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) - finalScore = logScore + finalScore = logScore #TODO returls logScore and not probability? Fix that in our Likelihood.cpp; #TODO TM to provide anomaly {none, raw, likelihood} else: finalScore = rawScore @@ -113,25 +107,14 @@ def handleRecord(self, inputData): def initialize(self): # Get config params, setting the RDSE resolution rangePadding = abs(self.inputMax - self.inputMin) * 0.2 - modelParams = getScalarMetricWithTimeOfDayAnomalyParams( - metricData=[0], - minVal=self.inputMin-rangePadding, - maxVal=self.inputMax+rangePadding, - minResolution=0.001, - tmImplementation = "cpp" - )["modelConfig"] - - self._setupEncoderParams( - modelParams["modelParams"]["sensorParams"]["encoders"]) - - self.model = ModelFactory.create(modelParams) - - self.model.enableInference({"predictedField": "value"}) + minVal=self.inputMin-rangePadding + maxVal=self.inputMax+rangePadding + minResolution=0.001 #TODO there params should form default_params for encoder etc if self.useLikelihood: # Initialize the anomaly likelihood object numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) - self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( + self.anomalyLikelihood = AnomalyLikelihood( #TODO make these default for py anomaly_likelihood? as NAB is likely tuned for best Likelihood! learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod-numentaLearningPeriod, reestimationPeriod=100 diff --git a/nab/detectors/htmcore/numentaTM_detector.py b/nab/detectors/htmcore/numentaTM_detector.py deleted file mode 100644 index 0ba71b197..000000000 --- a/nab/detectors/htmcore/numentaTM_detector.py +++ /dev/null @@ -1,75 +0,0 @@ -# ---------------------------------------------------------------------- -# Copyright (C) 2016, Numenta, Inc. Unless you have an agreement -# with Numenta, Inc., for a separate license for this software code, the -# following terms and conditions apply: -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero Public License version 3 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero Public License for more details. -# -# You should have received a copy of the GNU Affero Public License -# along with this program. If not, see http://www.gnu.org/licenses. -# -# http://numenta.org/licenses/ -# ---------------------------------------------------------------------- - -import os -import math -import simplejson as json - -from nupic.algorithms import anomaly_likelihood -from nupic.frameworks.opf.common_models.cluster_params import ( - getScalarMetricWithTimeOfDayAnomalyParams) -try: - from nupic.frameworks.opf.model_factory import ModelFactory -except: - # Try importing it the old way (version < 0.7.0.dev0) - from nupic.frameworks.opf.modelfactory import ModelFactory - -from nab.detectors.numenta.numenta_detector import NumentaDetector - - - -class NumentaTMDetector(NumentaDetector): - """ - This detector uses the implementation of temporal memory in - https://github.com/numenta/nupic.core/blob/master/src/nupic/algorithms/TemporalMemory.hpp. - It differs from its parent detector in temporal memory and its parameters. - """ - - def __init__(self, *args, **kwargs): - - super(NumentaTMDetector, self).__init__(*args, **kwargs) - - - def initialize(self): - # Get config params, setting the RDSE resolution - rangePadding = abs(self.inputMax - self.inputMin) * 0.2 - - modelParams = getScalarMetricWithTimeOfDayAnomalyParams( - metricData=[0], - minVal=self.inputMin-rangePadding, - maxVal=self.inputMax+rangePadding, - minResolution=0.001, - tmImplementation="tm_cpp" - )["modelConfig"] - - self._setupEncoderParams( - modelParams["modelParams"]["sensorParams"]["encoders"]) - - self.model = ModelFactory.create(modelParams) - - self.model.enableInference({"predictedField": "value"}) - - # Initialize the anomaly likelihood object - numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) - self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( - learningPeriod=numentaLearningPeriod, - estimationSamples=self.probationaryPeriod-numentaLearningPeriod, - reestimationPeriod=100 - ) diff --git a/run.py b/run.py index b7c96de76..01a096a35 100755 --- a/run.py +++ b/run.py @@ -228,7 +228,7 @@ def main(args): if "earthgeckoSkyline" in args.detectors: from nab.detectors.earthgecko_skyline.earthgecko_skyline_detector import EarthgeckoSkylineDetector if "htmcore" in args.detectors: - print("TODO htmcore detector") + from nab.detectors.htmcore.htmcore_detector import HtmcoreDetector # Special hacks for detectors requiring Python 2: # TODO the imports are failing, remove? Py2 detectors have special treatment in `getDetectorClassConstructors()` above From 8e62c87b45ea6f6d710ee6dd28253e2fa3f163e4 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Wed, 24 Jul 2019 18:12:53 +0200 Subject: [PATCH 03/13] Htmcore: updated Readme for this detector --- nab/detectors/htmcore/README.md | 115 +++++--------------------------- nab/detectors/htmcore/setup.py | 2 +- 2 files changed, 17 insertions(+), 100 deletions(-) diff --git a/nab/detectors/htmcore/README.md b/nab/detectors/htmcore/README.md index 7308cf11c..9036ab4c1 100644 --- a/nab/detectors/htmcore/README.md +++ b/nab/detectors/htmcore/README.md @@ -1,115 +1,32 @@ -# Numenta and NumentaTM detectors +# HtmcoreDetector HTM implementation from [htm.core](https://github.com/htm-community/htm.core/) -This directory holds the Python 2 code required to run the `numenta` and -`numentaTM` detectors against the NAB data. In 2019 the main body of the -benchmark's code was ported to Python 3 however these detectors rely on NuPIC -which is Python 2 only. +This detector provides HTM implementation from [htm.core](https://github.com/htm-community/htm.core/), +which is an actively developed, community version of Numenta's [nupic.core](https://github.com/numenta/nupic.core). -This code can be used to replicate results listed on the scoreboard of -the main repository for the following detectors: +This is a python 3 detector, called `htmcore`, as Numenta is switching NAB to python 3, this is the closes detector you can get to +`numenta`, `numentaTM` detectors. - numenta - numentaTM +`Htm.core` offers API and features similar and compatible with the official HTM implementations `nupic`, `nupic.core`. Although there +are significant speed and features improvements available! For more details please see [the htm.core project's README](https://github.com/htm-community/htm.core/blob/master/README.md) +Bugs and questions should also be reported there. ## Installation -We assume you have a working version of Python 3 installed as your default Python. -If your default system Python is still Python 2 you can skip the virtual environment -creation below. +`htmcore` detector is automatically installed with your `NAB` installation (`python setup.py install`), +so you don't have to do anything to have it available. ### Requirements to install -- [Python 2.7](https://www.python.org/download/) -- [Virtualenv](https://pypi.org/project/virtualenv/) +- [Python 3](https://www.python.org/download/) +- [Git](https://git-scm.com/downloads) -### Install a virtual environment - -Create a new Python 2 virtual environment in this directory. - -`virtualenv -p path/to/python2 env` - -Activate that virtual environment. - -`./env/Scripts/activate` - -or - -`env\Scripts\activate.bat` on Windows. - -Confirm you have a local Python 2 - -``` -$ python -Python 2.7.13 (v2.7.13:a06454b1afa1, Dec 17 2016, 20:53:40) [MSC v.1500 64 bit (AMD64)] on win32 -Type "help", "copyright", "credits" or "license" for more information. ->>> -``` - -### Install detectors - -`cd /path/to/NAB/` -`pip install nupic` -`python nab/detectors/numenta/setup.py develop` ## Usage -### Detection - -This directory contains a modified version of the `run.py` script which exists -in the main NAB directory. It can be used to run *detection* only using the -`numenta` and `numentaTM` detectors against NAB data. - -By default it will run both `numenta` and `numentaTM` detectors and output -results to the main NAB/results directory. - -`python2 run.py` - -Note: By default `run.py` tries to use all the cores on your machine. The above -command should take about 20-30 minutes on a current powerful laptop with 4-8 -cores. - -To run only one of the detectors use the `-d` option: - -`python2 run.py -d numenta` - -To see all options of this script type: - -`python2 run.py --help` - -### Optimizing, Scoring and Normalizing - -Once you have run either of the detectors herein against the NAB data you will need -to exit the Python 2 virtual environment and move into the main NAB directory. - -``` -(env) /NAB/nab/detectors/numenta -$ deactivate -/NAB/nab/detectors/numenta -$ cd ../../../ -/NAB -$ -``` - -Then follow the instructions in the main README to run optimization, scoring, and normalization, e.g.: - -`python run.py -d numenta,numentaTM --optimize --score --normalize` - -### Run a subset of NAB data files - -For debugging it is sometimes useful to be able to run your algorithm on a -subset of the NAB data files or on your own set of data files. You can do that -by creating a custom `combined_windows.json` file that only contains labels for -the files you want to run. This new file should be in exactly the same format as -`combined_windows.json` except it would only contain windows for the files you -are interested in. +Is the same as the default detectors, see [NAB README section Usage](https://github.com/htm-community/NAB/blob/master/README.md#usage) -**Example**: an example file containing two files is in -`labels/combined_windows_tiny.json`. (Under of the main NAB directory) The -following command shows you how to run NAB on a subset of labels: +### Example +Follow the instructions in the main README to run optimization, scoring, and normalization, e.g.: - python2 run.py -d numenta --detect --windowsFile labels/combined_windows_tiny.json +`python run.py -d htmcore --optimize --score --normalize` -This will run the `detect` phase of NAB on the data files specified in the above -JSON file. Note that scoring and normalization are not supported with this -option. Note also that you may see warning messages regarding the lack of labels -for other files. You can ignore these warnings. diff --git a/nab/detectors/htmcore/setup.py b/nab/detectors/htmcore/setup.py index cfebf8ebc..d0aa1ead1 100644 --- a/nab/detectors/htmcore/setup.py +++ b/nab/detectors/htmcore/setup.py @@ -105,7 +105,7 @@ def findRequirements(): "HTH.core detector from HTM community repo for NAB (in Python 3)"), license="AGPL", packages=find_packages(), - long_description=read(os.path.join(REPO_DIR, "htm.core","README.md")), + long_description=read(os.path.join(REPO_DIR, "README.md")), install_requires=requirements, entry_points={ "console_scripts": [ From 1649ec4e3ad522d1f7da369ee0215a16a7d554ff Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 25 Jul 2019 09:37:16 +0200 Subject: [PATCH 04/13] Htm.core detector: cleanup handleRecord() --- .gitignore | 1 + nab/detectors/htmcore/htmcore_detector.py | 75 +++++++++++++++-------- 2 files changed, 49 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 976011596..50652975f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ deprecated/ .idea/ .project .pydevproject +*.swp htmjava.log nab/detectors/htmjava/.pydevproject scripts/.ipynb_checkpoints/ diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py index adc02aa68..3dfe79996 100644 --- a/nab/detectors/htmcore/htmcore_detector.py +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -15,6 +15,8 @@ # You should have received a copy of the GNU Affero Public License # along with this program. If not, see http://www.gnu.org/licenses. # +# Copyright (C) 2019, @breznak +# # http://numenta.org/licenses/ # ---------------------------------------------------------------------- @@ -45,16 +47,14 @@ def __init__(self, *args, **kwargs): super(HtmcoreDetector, self).__init__(*args, **kwargs) - self.anomalyLikelihood = None - # Keep track of value range for spatial anomaly detection - self.minVal = None - self.maxVal = None + ## API for controlling settings of htm.core HTM detector: # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True + self.useSpatialAnomaly = True def getAdditionalHeaders(self): @@ -65,43 +65,45 @@ def getAdditionalHeaders(self): def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore). - Internally to NuPIC "anomalyScore" corresponds to "likelihood_score" - and "rawScore" corresponds to "anomaly_score". Sorry about that. + @param inputData is a dict {"timestamp" : Timestamp(), "value" : float} + + @return tuple (anomalyScore, , ...) """ # Send it to Numenta detector and get back the results - result = [] #FIXME self.model.run(inputData) + result = self.modelRun(inputData["timestamp"], inputData["value"]) # Get the value value = inputData["value"] # Retrieve the anomaly score and write it to a file - rawScore = 0.5 #FIXME result.inferences["anomalyScore"] - - # Update min/max values and check if there is a spatial anomaly - spatialAnomaly = False #TODO make this computed in SP (and later improve) - if self.minVal != self.maxVal: - tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE - maxExpected = self.maxVal + tolerance - minExpected = self.minVal - tolerance - if value > maxExpected or value < minExpected: - spatialAnomaly = True - if self.maxVal is None or value > self.maxVal: - self.maxVal = value - if self.minVal is None or value < self.minVal: - self.minVal = value + rawScore = result + + ## handle anomalies: raw -> ?spatial or ?likelihood -> finalScore + spatialAnomaly = 0.0 #TODO make this computed in SP (and later improve) + if self.useSpatialAnomaly: + # Update min/max values and check if there is a spatial anomaly + if self.minVal != self.maxVal: + tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE + maxExpected = self.maxVal + tolerance + minExpected = self.minVal - tolerance + if value > maxExpected or value < minExpected: + spatialAnomaly = 1.0 + if self.maxVal is None or value > self.maxVal: + self.maxVal = value + if self.minVal is None or value < self.minVal: + self.minVal = value if self.useLikelihood: # Compute log(anomaly likelihood) anomalyScore = self.anomalyLikelihood.anomalyProbability(inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) - finalScore = logScore #TODO returls logScore and not probability? Fix that in our Likelihood.cpp; #TODO TM to provide anomaly {none, raw, likelihood} + temporalAnomaly = logScore #TODO TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood else: - finalScore = rawScore + temporalAnomaly = rawScore - if spatialAnomaly: - finalScore = 1.0 + anomalyScore = max(spatialAnomaly, temporalAnomaly) - return (finalScore, rawScore) + return (anomalyScore, rawScore) def initialize(self): @@ -111,8 +113,8 @@ def initialize(self): maxVal=self.inputMax+rangePadding minResolution=0.001 #TODO there params should form default_params for encoder etc + # setup anomaly likelihood if self.useLikelihood: - # Initialize the anomaly likelihood object numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( #TODO make these default for py anomaly_likelihood? as NAB is likely tuned for best Likelihood! learningPeriod=numentaLearningPeriod, @@ -120,6 +122,13 @@ def initialize(self): reestimationPeriod=100 ) + # setup spatial anomaly + if self.useSpatialAnomaly: + # Keep track of value range for spatial anomaly detection + self.minVal = None + self.maxVal = None + + def _setupEncoderParams(self, encoderParams): # The encoder must expect the NAB-specific datafile headers @@ -133,3 +142,15 @@ def _setupEncoderParams(self, encoderParams): encoderParams["value"]["name"] = "value" self.sensorParams = encoderParams["value"] + + def modelRun(self, ts, val): + """ + Run a single pass through HTM model + + @params ts - Timestamp + @params val - float input value + + @return rawAnomalyScore computed for the `val` in this step + """ + #FIXME do enc->SP->TM->AN here + return 0.0 From ae84be0bfc084cc65485daa027c02b6503799d79 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 25 Jul 2019 12:09:27 +0200 Subject: [PATCH 05/13] Htm.core detector: params and Enc, SP, TM initialized params and initialization same as in hotgym.py example in htm.core repo. With one expection: RDSE resulotion changed to match one of numenta_detector, not the former value swarmed in hotgym. --- nab/detectors/htmcore/htmcore_detector.py | 148 ++++++++++++++++++---- 1 file changed, 124 insertions(+), 24 deletions(-) diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py index 3dfe79996..e51cca5a7 100644 --- a/nab/detectors/htmcore/htmcore_detector.py +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -23,11 +23,14 @@ import math # htm.core imports -from htm.bindings.algorithms import SpatialPooler as SP -from htm.bindings.algorithms import TemporalMemory as TM +from htm.bindings.sdr import SDR, Metrics +from htm.encoders.rdse import RDSE, RDSE_Parameters +from htm.encoders.date import DateEncoder +from htm.bindings.algorithms import SpatialPooler +from htm.bindings.algorithms import TemporalMemory +from htm.algorithms.anomaly_likelihood import AnomalyLikelihood from htm.bindings.algorithms import Predictor -from htm.algorithms.anomaly_likelihood import AnomalyLikelihood from nab.detectors.base import AnomalyDetector # Fraction outside of the range of values seen so far that will be considered @@ -36,7 +39,41 @@ # has been seen so far. SPATIAL_TOLERANCE = 0.05 - +## parameters to initialize our HTM model (encoder, SP, TM, AnomalyLikelihood) +# TODO optional: optimize these params, either manually and/or swarming. But first keep comparable to numenta_detector +default_parameters = { + # there are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) + 'enc': { + "value" : #RDSE for value + {'resolution': 0.001, 'size': 700, 'sparsity': 0.02}, + "time": #DateTime for timestamps + {'timeOfDay': (30, 1), 'weekend': 21}}, + 'predictor': {'sdrc_alpha': 0.1}, + 'sp': { + 'boostStrength': 3.0, + 'columnCount': 1638, + 'localAreaDensity': 0.04395604395604396, + 'potentialPct': 0.85, + 'synPermActiveInc': 0.04, + 'synPermConnected': 0.14, + 'synPermInactiveDec': 0.006}, + 'tm': { + 'activationThreshold': 17, + 'cellsPerColumn': 13, + 'initialPerm': 0.21, + 'maxSegmentsPerCell': 128, + 'maxSynapsesPerSegment': 64, + 'minThreshold': 10, + 'newSynapseCount': 32, + 'permanenceDec': 0.1, + 'permanenceInc': 0.1}, + 'anomaly': { + 'likelihood': { + #'learningPeriod': int(math.floor(self.probationaryPeriod / 2.0)), + #'probationaryPeriod': self.probationaryPeriod-default_parameters["anomaly"]["likelihood"]["learningPeriod"], + 'probationaryPct': 0.1, + 'reestimationPeriod': 100}} +} class HtmcoreDetector(AnomalyDetector): """ @@ -53,13 +90,28 @@ def __init__(self, *args, **kwargs): # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. - self.useLikelihood = True - self.useSpatialAnomaly = True + self.useLikelihood = True + self.useSpatialAnomaly = True + self.verbose = False + + ## internal members + # (listed here for easier understanding) + # initialized in `initialize()` + self.encTimestamp = None + self.encValue = None + self.sp = None + self.tm = None + self.anLike = None + # optional debug info + self.enc_info = None + self.sp_info = None + self.tm_info = None + #TODO optional: also return an error metric on predictions (RMSE, R2,...) def getAdditionalHeaders(self): """Returns a list of strings.""" - return ["raw_score"] #TODO add "prediction" + return ["raw_score"] #TODO optional: add "prediction" def handleRecord(self, inputData): @@ -79,7 +131,7 @@ def handleRecord(self, inputData): rawScore = result ## handle anomalies: raw -> ?spatial or ?likelihood -> finalScore - spatialAnomaly = 0.0 #TODO make this computed in SP (and later improve) + spatialAnomaly = 0.0 #TODO optional: make this computed in SP (and later improve) if self.useSpatialAnomaly: # Update min/max values and check if there is a spatial anomaly if self.minVal != self.maxVal: @@ -97,30 +149,16 @@ def handleRecord(self, inputData): # Compute log(anomaly likelihood) anomalyScore = self.anomalyLikelihood.anomalyProbability(inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) - temporalAnomaly = logScore #TODO TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood + temporalAnomaly = logScore #TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood else: temporalAnomaly = rawScore anomalyScore = max(spatialAnomaly, temporalAnomaly) - return (anomalyScore, rawScore) def initialize(self): - # Get config params, setting the RDSE resolution - rangePadding = abs(self.inputMax - self.inputMin) * 0.2 - minVal=self.inputMin-rangePadding - maxVal=self.inputMax+rangePadding - minResolution=0.001 #TODO there params should form default_params for encoder etc - - # setup anomaly likelihood - if self.useLikelihood: - numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) - self.anomalyLikelihood = AnomalyLikelihood( #TODO make these default for py anomaly_likelihood? as NAB is likely tuned for best Likelihood! - learningPeriod=numentaLearningPeriod, - estimationSamples=self.probationaryPeriod-numentaLearningPeriod, - reestimationPeriod=100 - ) + parameters = default_parameters # setup spatial anomaly if self.useSpatialAnomaly: @@ -128,6 +166,68 @@ def initialize(self): self.minVal = None self.maxVal = None + ## setup Enc, SP, TM, Likelihood + # Make the Encoders. These will convert input data into binary representations. + self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], + weekend = parameters["enc"]["time"]["weekend"]) + + scalarEncoderParams = RDSE_Parameters() + scalarEncoderParams.size = parameters["enc"]["value"]["size"] + scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] + scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] + + self.encValue = RDSE( scalarEncoderParams ) + encodingWidth = (self.encTimestamp.size + self.encValue.size) + self.enc_info = Metrics( [encodingWidth], 999999999 ) + + # Make the HTM. SpatialPooler & TemporalMemory & associated tools. + # SpatialPooler + spParams = parameters["sp"] + self.sp = SpatialPooler( + inputDimensions = (encodingWidth,), + columnDimensions = (spParams["columnCount"],), + potentialPct = spParams["potentialPct"], + potentialRadius = encodingWidth, + globalInhibition = True, + localAreaDensity = spParams["localAreaDensity"], + synPermInactiveDec = spParams["synPermInactiveDec"], + synPermActiveInc = spParams["synPermActiveInc"], + synPermConnected = spParams["synPermConnected"], + boostStrength = spParams["boostStrength"], + wrapAround = True + ) + self.sp_info = Metrics( sp.getColumnDimensions(), 999999999 ) + + # TemporalMemory + tmParams = parameters["tm"] + self.tm = TemporalMemory( + columnDimensions = (spParams["columnCount"],), + cellsPerColumn = tmParams["cellsPerColumn"], + activationThreshold = tmParams["activationThreshold"], + initialPermanence = tmParams["initialPerm"], + connectedPermanence = spParams["synPermConnected"], + minThreshold = tmParams["minThreshold"], + maxNewSynapseCount = tmParams["newSynapseCount"], + permanenceIncrement = tmParams["permanenceInc"], + permanenceDecrement = tmParams["permanenceDec"], + predictedSegmentDecrement = 0.0, + maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], + maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] + ) + self.tm_info = Metrics( [tm.numberOfCells()], 999999999 ) + + # setup likelihood, these settings are used in NAB + if self.useLikelihood: + anParams = parameters["anomaly"]["likelihood"] + learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) + self.anomalyLikelihood = AnomalyLikelihood( + learningPeriod= learningPeriod, + estimationSamples= self.probationaryPeriod - learningPeriod, + reestimationPeriod= anParams["reestimationPeriod"]) + # Predictor + self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) + predictor_resolution = 1 + def _setupEncoderParams(self, encoderParams): From 7b49b09e3147de6f3c9a9483082de4fa316e06e6 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 25 Jul 2019 12:51:58 +0200 Subject: [PATCH 06/13] Htm.core runable detector! implemented the model and runs nice! (Model same as hotgym.py) --- nab/detectors/htmcore/htmcore_detector.py | 102 ++++++++++++++-------- 1 file changed, 64 insertions(+), 38 deletions(-) diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py index e51cca5a7..1d777a1c0 100644 --- a/nab/detectors/htmcore/htmcore_detector.py +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -21,6 +21,7 @@ # ---------------------------------------------------------------------- import math +import datetime # htm.core imports from htm.bindings.sdr import SDR, Metrics @@ -106,7 +107,8 @@ def __init__(self, *args, **kwargs): self.enc_info = None self.sp_info = None self.tm_info = None - #TODO optional: also return an error metric on predictions (RMSE, R2,...) + # internal helper variables: + self.inputs_ = [] def getAdditionalHeaders(self): @@ -122,39 +124,8 @@ def handleRecord(self, inputData): @return tuple (anomalyScore, , ...) """ # Send it to Numenta detector and get back the results - result = self.modelRun(inputData["timestamp"], inputData["value"]) + return self.modelRun(inputData["timestamp"], inputData["value"]) - # Get the value - value = inputData["value"] - - # Retrieve the anomaly score and write it to a file - rawScore = result - - ## handle anomalies: raw -> ?spatial or ?likelihood -> finalScore - spatialAnomaly = 0.0 #TODO optional: make this computed in SP (and later improve) - if self.useSpatialAnomaly: - # Update min/max values and check if there is a spatial anomaly - if self.minVal != self.maxVal: - tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE - maxExpected = self.maxVal + tolerance - minExpected = self.minVal - tolerance - if value > maxExpected or value < minExpected: - spatialAnomaly = 1.0 - if self.maxVal is None or value > self.maxVal: - self.maxVal = value - if self.minVal is None or value < self.minVal: - self.minVal = value - - if self.useLikelihood: - # Compute log(anomaly likelihood) - anomalyScore = self.anomalyLikelihood.anomalyProbability(inputData["value"], rawScore, inputData["timestamp"]) - logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) - temporalAnomaly = logScore #TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood - else: - temporalAnomaly = rawScore - - anomalyScore = max(spatialAnomaly, temporalAnomaly) - return (anomalyScore, rawScore) def initialize(self): @@ -196,7 +167,7 @@ def initialize(self): boostStrength = spParams["boostStrength"], wrapAround = True ) - self.sp_info = Metrics( sp.getColumnDimensions(), 999999999 ) + self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 ) # TemporalMemory tmParams = parameters["tm"] @@ -214,7 +185,7 @@ def initialize(self): maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) - self.tm_info = Metrics( [tm.numberOfCells()], 999999999 ) + self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB if self.useLikelihood: @@ -231,7 +202,7 @@ def initialize(self): def _setupEncoderParams(self, encoderParams): - # The encoder must expect the NAB-specific datafile headers + # The encoder must expect the NAB-specific datafile headers #FIXME can be removed? encoderParams["timestamp_dayOfWeek"] = encoderParams.pop("c0_dayOfWeek") encoderParams["timestamp_timeOfDay"] = encoderParams.pop("c0_timeOfDay") encoderParams["timestamp_timeOfDay"]["fieldname"] = "timestamp" @@ -252,5 +223,60 @@ def modelRun(self, ts, val): @return rawAnomalyScore computed for the `val` in this step """ - #FIXME do enc->SP->TM->AN here - return 0.0 + ## run data through our model pipeline: enc -> SP -> TM -> Anomaly + self.inputs_.append( val ) + + # 1. Encoding + # Call the encoders to create bit representations for each value. These are SDR objects. + dateBits = self.encTimestamp.encode(ts) + valueBits = self.encValue.encode(float(val)) + # Concatenate all these encodings into one large encoding for Spatial Pooling. + encoding = SDR( self.encTimestamp.size + self.encValue.size ).concatenate([valueBits, dateBits]) + self.enc_info.addData( encoding ) + + # 2. Spatial Pooler + # Create an SDR to represent active columns, This will be populated by the + # compute method below. It must have the same dimensions as the Spatial Pooler. + activeColumns = SDR( self.sp.getColumnDimensions() ) + # Execute Spatial Pooling algorithm over input space. + self.sp.compute(encoding, True, activeColumns) + self.sp_info.addData( activeColumns ) + + # 3. Temporal Memory + # Execute Temporal Memory algorithm over active mini-columns. + self.tm.compute(activeColumns, learn=True) + self.tm_info.addData( self.tm.getActiveCells().flatten() ) #FIXME for anomaly, should we use active cells, or winner cells? And also convert to columns! + + # 4.1 (optional) Predictor #TODO optional + #TODO optional: also return an error metric on predictions (RMSE, R2,...) + + # 4.2 Anomaly + # handle spatial, contextual (raw, likelihood) anomalies + # -Spatial + spatialAnomaly = 0.0 #TODO optional: make this computed in SP (and later improve) + if self.useSpatialAnomaly: + # Update min/max values and check if there is a spatial anomaly + if self.minVal != self.maxVal: + tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE + maxExpected = self.maxVal + tolerance + minExpected = self.minVal - tolerance + if val > maxExpected or val < minExpected: + spatialAnomaly = 1.0 + if self.maxVal is None or val > self.maxVal: + self.maxVal = val + if self.minVal is None or val < self.minVal: + self.minVal = val + + # -temporal (raw) + raw = self.tm.anomaly + temporalAnomaly = raw + + if self.useLikelihood: + # Compute log(anomaly likelihood) + like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) + logScore = self.anomalyLikelihood.computeLogLikelihood(like) + temporalAnomaly = logScore #TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood + + anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB + + return (anomalyScore, raw) From 02bbf1f1b60060e310e138e12d7a9ecdeaff71f1 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 25 Jul 2019 12:53:49 +0200 Subject: [PATCH 07/13] Htm.core detector remove relic from numenta's Model API as unused in this (cleaner) HTM model. --- nab/detectors/htmcore/htmcore_detector.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py index 1d777a1c0..593e82a7e 100644 --- a/nab/detectors/htmcore/htmcore_detector.py +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -200,20 +200,6 @@ def initialize(self): predictor_resolution = 1 - - def _setupEncoderParams(self, encoderParams): - # The encoder must expect the NAB-specific datafile headers #FIXME can be removed? - encoderParams["timestamp_dayOfWeek"] = encoderParams.pop("c0_dayOfWeek") - encoderParams["timestamp_timeOfDay"] = encoderParams.pop("c0_timeOfDay") - encoderParams["timestamp_timeOfDay"]["fieldname"] = "timestamp" - encoderParams["timestamp_timeOfDay"]["name"] = "timestamp" - encoderParams["timestamp_weekend"] = encoderParams.pop("c0_weekend") - encoderParams["value"] = encoderParams.pop("c1") - encoderParams["value"]["fieldname"] = "value" - encoderParams["value"]["name"] = "value" - - self.sensorParams = encoderParams["value"] - def modelRun(self, ts, val): """ Run a single pass through HTM model From 3c11d61edf21157a95e784cf0666e6cfb34f9b85 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 25 Jul 2019 13:18:12 +0200 Subject: [PATCH 08/13] Updated README --- README.md | 112 +++++++++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 7b4ef0512..5cdb62e35 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -The Numenta Anomaly Benchmark [![Build Status](https://travis-ci.org/numenta/NAB.svg?branch=master)](https://travis-ci.org/numenta/NAB) +# The Numenta Anomaly Benchmark [![Build Status](https://travis-ci.org/numenta/NAB.svg?branch=master)](https://travis-ci.org/numenta/NAB) ----------------------------- Welcome. This repository contains the data and scripts comprising the Numenta @@ -28,7 +28,24 @@ Ahmad, S., Lavin, A., Purdy, S., & Agha, Z. (2017). Unsupervised real-time anomaly detection for streaming data. Neurocomputing, Available online 2 June 2017, ISSN 0925-2312, https://doi.org/10.1016/j.neucom.2017.04.070 -#### Scoreboard +## Community edition + +This repo is [NAB community edition](https://github.com/htm-community/NAB) which is a for of the original [Numenta's NAB](https://github.com/numenta/NAB). One of the reasons for forking +was a lack of developer activity in the upstream repo. + +### Features: + +- [x] Identical algorithms and datasets as the Numenta's NAB. So the results are `reproducible`. +- [x] `Python 3` codebase (as Python 2 reaches end-of-life at 1/1/2020 and Numenta's not yet ported) +- [x] additional community-provided detectors: + - `htmcore`: currently the only HTM implementation able to run in NAB natively in python 3. (with many improvements in [Community HTM implementation, successor of nupic.core](https://github.com/htm-community/htm.core/). + - `numenta`, `numenta_TM` detectors (original from Numenta) made compatible with the Py3 codebase (only requires Py2 installed) +- [ ] additional datasets + - TBD, none so far + +Statement: We'll try to upstream any changes, new detectors and datasets to upstream Numenta's NAB, when the devs have time to apply the changes. + +## Scoreboard The NAB scores are normalized such that the maximum possible is 100.0 (i.e. the perfect detector), and a baseline of 0.0 is determined by the "null" detector (which makes no detections). @@ -64,22 +81,6 @@ The NAB scores are normalized such that the maximum possible is 100.0 (i.e. the Please see [the wiki section on contributing algorithms](https://github.com/numenta/NAB/wiki/NAB-Contributions-Criteria#anomaly-detection-algorithms) for discussion on posting algorithms to the scoreboard. -#### Corpus - -The NAB corpus of 58 timeseries data files is designed to provide data for research -in streaming anomaly detection. It is comprised of both -real-world and artifical timeseries data containing labeled anomalous periods of behavior. - -The majority of the data is real-world from a variety of sources such as AWS -server metrics, Twitter volume, advertisement clicking metrics, traffic data, -and more. All data is included in the repository, with more details in the [data -readme](https://github.com/numenta/NAB/tree/master/data). We are in the process -of adding more data, and actively searching for more data. Please contact us at -[nab@numenta.org](mailto:nab@numenta.org) if you have similar data (ideally with -known anomalies) that you would like to see incorporated into NAB. - -The NAB version will be updated whenever new data (and corresponding labels) is -added to the corpus; NAB is currently in v1.0. #### Additional Scores @@ -88,13 +89,8 @@ For comparison, here are the NAB V1.0 scores for some additional flavors of HTM. * Numenta HTM using NuPIC v.0.5.6: This version of NuPIC was used to generate the data for the paper mentioned above (Unsupervised real-time anomaly detection for streaming data. Neurocomputing, ISSN 0925-2312, https://doi.org/10.1016/j.neucom.2017.04.070). If you are interested in replicating the results shown in the paper, use this version. * [HTM Java](https://github.com/numenta/htm.java) is a Community-Driven Java port of HTM. * [nab-comportex](https://github.com/floybix/nab-comportex) is a twist on HTM anomaly detection using [Comportex](https://github.com/htm-community/comportex), a community-driven HTM implementation in Clojure. Please see [Felix Andrew's blog post](http://floybix.github.io/2016/07/01/attempting-nab) on experiments with this algorithm. -* NumentaTM HTM detector uses the implementation of temporal memory found -[here](https://github.com/numenta/nupic.core/blob/master/src/nupic/algorithms/TemporalMemory.hpp). -* Numenta HTM detector with no likelihood uses the raw anomaly scores directly. To -run without likelihood, set the variable `self.useLikelihood` in -[numenta_detector.py](https://github.com/numenta/NAB/blob/master/nab/detectors/numenta/numenta_detector.py) -to `False`. - +* NumentaTM HTM detector uses the implementation of temporal memory found [here](https://github.com/numenta/nupic.core/blob/master/src/nupic/algorithms/TemporalMemory.hpp). +* Numenta HTM detector with no likelihood uses the raw anomaly scores directly. To run without likelihood, set the variable `self.useLikelihood` in [numenta_detector.py](https://github.com/numenta/NAB/blob/master/nab/detectors/numenta/numenta_detector.py) to `False`. @@ -107,16 +103,35 @@ to `False`. | Numenta HTM*, no likelihood | 53.62 | 34.15 | 61.89 | \* From NuPIC version 0.5.6 ([available on PyPI](https://pypi.python.org/pypi/nupic/0.5.6)). - † Algorithm was an entry to the [2016 NAB Competition](http://numenta.com/blog/2016/08/10/numenta-anomaly-benchmark-nab-competition-2016-winners/). -Installing NAB 1.0 + + +## Corpus + +The NAB corpus of 58 timeseries data files is designed to provide data for research +in streaming anomaly detection. It is comprised of both +real-world and artifical timeseries data containing labeled anomalous periods of behavior. + +The majority of the data is real-world from a variety of sources such as AWS +server metrics, Twitter volume, advertisement clicking metrics, traffic data, +and more. All data is included in the repository, with more details in the [data +readme](https://github.com/numenta/NAB/tree/master/data). We are in the process +of adding more data, and actively searching for more data. Please contact us at +[nab@numenta.org](mailto:nab@numenta.org) if you have similar data (ideally with +known anomalies) that you would like to see incorporated into NAB. + +The NAB version will be updated whenever new data (and corresponding labels) is +added to the corpus; NAB is currently in v1.0. + + +## Installing NAB 1.0 ------------------ ### Supported Platforms - OSX 10.9 and higher -- Amazon Linux (via AMI) +- Linux Other platforms may work but have not been tested. @@ -125,34 +140,22 @@ Other platforms may work but have not been tested. You need to manually install the following: -- [Python 2.7](https://www.python.org/download/) +- [Python 3](https://www.python.org/download/) - [pip](https://pip.pypa.io/en/latest/installing.html) -- [NumPy](http://www.numpy.org/) -- [NuPIC](http://www.github.com/numenta/nupic) (only required if running the Numenta detector) - -##### Download this repository -Use the Github links provided in the right sidebar. +#### Download this repository -##### Install the Python requirements +Use the Github [download links](https://github.com/htm-community/NAB/archive/master.zip) provided in the right sidebar, +or `git clone https://github.com/htm-community/NAB` - cd NAB - (sudo) pip install -r requirements.txt - -This will install the required modules. - -##### Install NAB +#### Install NAB Recommended: + cd NAB pip install . --user -> Note: If NuPIC is not already installed, the version specified in -`NAB/requirements.txt` will be installed. If NuPIC is already installed, it - will not be re-installed. - - If you want to manage dependency versions yourself, you can skip dependencies with: @@ -198,13 +201,11 @@ follow the directions below to "Run a subset of NAB". ##### Run HTM with NAB -First make sure NuPIC is installed and working properly. Then: - cd /path/to/nab - python run.py -d numenta --detect --optimize --score --normalize + python run.py -d htmcore --detect --optimize --score --normalize -This will run the Numenta detector only and produce normalized scores. Note that -by default it tries to use all the cores on your machine. The above command +This will run the community HTM detector `htmcore` (to run Numenta's detector use `-d numenta`) and produce normalized scores. +Note that by default it tries to use all the cores on your machine. The above command should take about 20-30 minutes on a current powerful laptop with 4-8 cores. For debugging you can run subsets of the data files by modifying and specifying specific label files (see section below). Please type: @@ -229,11 +230,10 @@ the specific version of NuPIC (and associated nupic.core) that is noted in the This will run everything and produce results files for all anomaly detection methods. Several algorithms are included in the repo, such as the Numenta -HTM anomaly detection method, as well as methods from the [Etsy -Skyline](https://github.com/etsy/skyline) anomaly detection library, a sliding -window detector, Bayes Changepoint, and so on. This will also pass those results -files to the scoring script to generate final NAB scores. **Note**: this option -will take many many hours to run. +HTM anomaly detection method, as well as methods from the [Etsy Skyline](https://github.com/etsy/skyline) anomaly detection library, +a sliding window detector, Bayes Changepoint, and so on. +This will also pass those results files to the scoring script to generate final NAB scores. +**Note**: this option will take many many hours to run. ##### Run subset of NAB data files From a1c262fa3b555ddd0f2e12df65be802e1a04b896 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 25 Jul 2019 13:41:50 +0200 Subject: [PATCH 09/13] README: add htmcore detector's results not so good, need to optimize parameters. --- README.md | 37 +++++++++++++++++++------------------ config/thresholds.json | 16 +++++++++++++++- results/final_results.json | 9 +++++++-- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 5cdb62e35..1dde4fa9e 100644 --- a/README.md +++ b/README.md @@ -49,22 +49,23 @@ Statement: We'll try to upstream any changes, new detectors and datasets to upst The NAB scores are normalized such that the maximum possible is 100.0 (i.e. the perfect detector), and a baseline of 0.0 is determined by the "null" detector (which makes no detections). -| Detector | Standard Profile | Reward Low FP | Reward Low FN | -|---------------|------------------|---------------|---------------| -| Perfect | 100.0 | 100.0 | 100.0 | -| [Numenta HTM](https://github.com/numenta/nupic)* | 70.5-69.7 | 62.6-61.7 | 75.2-74.2 | -| [CAD OSE](https://github.com/smirmik/CAD)† | 69.9 | 67.0 | 73.2 | -| [earthgecko Skyline](https://github.com/earthgecko/skyline) | 58.2 | 46.2 | 63.9 | -| [KNN CAD](https://github.com/numenta/NAB/tree/master/nab/detectors/knncad)† | 58.0 | 43.4 | 64.8 | -| [Relative Entropy](http://www.hpl.hp.com/techreports/2011/HPL-2011-8.pdf) | 54.6 | 47.6 | 58.8 | -| [Random Cut Forest](http://proceedings.mlr.press/v48/guha16.pdf) **** | 51.7 | 38.4 | 59.7 | -| [Twitter ADVec v1.0.0](https://github.com/twitter/AnomalyDetection)| 47.1 | 33.6 | 53.5 | -| [Windowed Gaussian](https://github.com/numenta/NAB/blob/master/nab/detectors/gaussian/windowedGaussian_detector.py) | 39.6 | 20.9 | 47.4 | -| [Etsy Skyline](https://github.com/etsy/skyline) | 35.7 | 27.1 | 44.5 | -| Bayesian Changepoint** | 17.7 | 3.2 | 32.2 | -| [EXPoSE](https://arxiv.org/abs/1601.06602v3) | 16.4 | 3.2 | 26.9 | -| Random*** | 11.0 | 1.2 | 19.5 | -| Null | 0.0 | 0.0 | 0.0 | +| Detector | Standard Profile | Reward Low FP | Reward Low FN | Detector name | Time (s) | +|---------------|------------------|---------------|---------------|---------------|------------| +| Perfect | 100.0 | 100.0 | 100.0 | | | +| [Numenta HTM](https://github.com/numenta/nupic)* | 70.5-69.7 | 62.6-61.7 | 75.2-74.2 | `numenta` | | +| [CAD OSE](https://github.com/smirmik/CAD)† | 69.9 | 67.0 | 73.2 | | | +| [earthgecko Skyline](https://github.com/earthgecko/skyline) | 58.2 | 46.2 | 63.9 | | | +| [KNN CAD](https://github.com/htm-community/NAB/tree/master/nab/detectors/knncad)† | 58.0 | 43.4 | 64.8 | | | +| [Relative Entropy](http://www.hpl.hp.com/techreports/2011/HPL-2011-8.pdf) | 54.6 | 47.6 | 58.8 | | | +| [Random Cut Forest](http://proceedings.mlr.press/v48/guha16.pdf) **** | 51.7 | 38.4 | 59.7 | | | +| [htm.core](https://github.com/htm-community/htm.core/) | 50.83 | 49.95 | 52.64 | `htmcore` | | +| [Twitter ADVec v1.0.0](https://github.com/twitter/AnomalyDetection)| 47.1 | 33.6 | 53.5 | | | +| [Windowed Gaussian](https://github.com/htm-community/NAB/blob/master/nab/detectors/gaussian/windowedGaussian_detector.py) | 39.6 | 20.9 | 47.4 | | | +| [Etsy Skyline](https://github.com/etsy/skyline) | 35.7 | 27.1 | 44.5 | | | +| Bayesian Changepoint** | 17.7 | 3.2 | 32.2 | | | +| [EXPoSE](https://arxiv.org/abs/1601.06602v3) | 16.4 | 3.2 | 26.9 | | | +| Random*** | 11.0 | 1.2 | 19.5 | | | +| Null | 0.0 | 0.0 | 0.0 | | | *As of NAB v1.0* @@ -98,8 +99,8 @@ For comparison, here are the NAB V1.0 scores for some additional flavors of HTM. |---------------|---------|------------------|---------------| | Numenta HTMusing NuPIC v0.5.6* | 70.1 | 63.1 | 74.3 | | [nab-comportex](https://github.com/floybix/nab-comportex)† | 64.6 | 58.8 | 69.6 | -| [NumentaTM HTM](https://github.com/numenta/NAB/blob/master/nab/detectors/numenta/numentaTM_detector.py)* | 64.6 | 56.7 | 69.2 | -| [HTM Java](https://github.com/numenta/NAB/blob/master/nab/detectors/htmjava) | 56.8 | 50.7 | 61.4 | +| [NumentaTM HTM](https://github.com/htm-community/NAB/blob/master/nab/detectors/numenta/numentaTM_detector.py)* | 64.6 | 56.7 | 69.2 | +| [HTM Java](https://github.com/htm-community/NAB/blob/master/nab/detectors/htmjava) | 56.8 | 50.7 | 61.4 | | Numenta HTM*, no likelihood | 53.62 | 34.15 | 61.89 | \* From NuPIC version 0.5.6 ([available on PyPI](https://pypi.python.org/pypi/nupic/0.5.6)). diff --git a/config/thresholds.json b/config/thresholds.json index 69127a41c..67b028c7b 100644 --- a/config/thresholds.json +++ b/config/thresholds.json @@ -55,6 +55,20 @@ "threshold": 0.9947875976562506 } }, + "htmcore": { + "reward_low_FN_rate": { + "score": -48.81063346506114, + "threshold": 0.5120920067439757 + }, + "reward_low_FP_rate": { + "score": -0.12559751317561307, + "threshold": 1.0 + }, + "standard": { + "score": 1.9244399025813976, + "threshold": 1.0 + } + }, "htmjava": { "reward_low_FN_rate": { "score": 8.764037437134272, @@ -209,4 +223,4 @@ "threshold": 1.0 } } -} +} \ No newline at end of file diff --git a/results/final_results.json b/results/final_results.json index 8161ade25..9e73752e3 100644 --- a/results/final_results.json +++ b/results/final_results.json @@ -14,6 +14,11 @@ "reward_low_FP_rate": 3.1909311068965485, "standard": 16.43666922426724 }, + "htmcore": { + "reward_low_FN_rate": 52.640622567511166, + "reward_low_FP_rate": 49.94586314087259, + "standard": 50.82949995800923 + }, "htmjava": { "reward_low_FN_rate": 70.42407766520115, "reward_low_FP_rate": 53.25694971610345, @@ -35,9 +40,9 @@ "standard": 70.10105611943965 }, "numentaTM": { - "reward_low_FN_rate": 69.185068229060349, + "reward_low_FN_rate": 69.18506822906035, "reward_low_FP_rate": 56.665308225043105, - "standard": 64.553464412543107 + "standard": 64.55346441254311 }, "random": { "reward_low_FN_rate": 25.876351314080456, From c893cb2c7f899cb99c47899233fed0eb072bcb8e Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 25 Jul 2019 14:05:05 +0200 Subject: [PATCH 10/13] Htmcore detector: add model params from numenta_detector for information only, not used --- nab/detectors/htmcore/htmcore_detector.py | 74 +++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py index 593e82a7e..ba230f837 100644 --- a/nab/detectors/htmcore/htmcore_detector.py +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -41,6 +41,80 @@ SPATIAL_TOLERANCE = 0.05 ## parameters to initialize our HTM model (encoder, SP, TM, AnomalyLikelihood) +# These are all the original params from `numenta` detector: +# These values are not directly used, only for information here +numenta_params = { + u'aggregationInfo': {u'seconds': 0, u'fields': [], u'months': 0, u'days': 0, u'years': 0, u'hours': 0, u'microseconds': 0, u'weeks': 0, u'minutes': 0, u'milliseconds': 0}, + u'model': u'HTMPrediction', + u'version': 1, + u'predictAheadTime': None, + u'modelParams': { + u'sensorParams': { + u'sensorAutoReset': None, + u'encoders': { + 'value': { + u'name': 'value', + 'resolution': 0.95846153846153836, + u'seed': 42, + u'fieldname': 'value', + u'type': u'RandomDistributedScalarEncoder'}, + 'timestamp_dayOfWeek': None, + 'timestamp_timeOfDay': { + u'fieldname': 'timestamp', + u'timeOfDay': [21, 9.49], + u'type': u'DateEncoder', + u'name': 'timestamp'}, + 'timestamp_weekend': None}, + u'verbosity': 0}, + u'anomalyParams': { + u'anomalyCacheRecords': None, + u'autoDetectThreshold': None, + u'autoDetectWaitRecords': 5030}, + u'spParams': { + u'columnCount': 2048, + u'synPermInactiveDec': 0.0005, + u'spatialImp': u'cpp', + u'inputWidth': 0, + u'spVerbosity': 0, + u'synPermConnected': 0.2, + u'synPermActiveInc': 0.003, + u'potentialPct': 0.8, + u'numActiveColumnsPerInhArea': 40, + u'boostStrength': 0.0, + u'globalInhibition': 1, + u'seed': 1956}, + u'trainSPNetOnlyIfRequested': False, + u'clParams': { + u'alpha': 0.035828933612158, + u'verbosity': 0, + u'steps': u'1', + u'regionName': + u'SDRClassifierRegion'}, + u'tmParams': { + u'columnCount': 2048, + u'activationThreshold': 13, + u'pamLength': 3, + u'cellsPerColumn': 32, + u'permanenceDec': 0.1, + u'minThreshold': 10, + u'inputWidth': 2048, + u'maxSynapsesPerSegment': 32, + u'outputType': u'normal', + u'initialPerm': 0.21, + u'globalDecay': 0.0, + u'maxAge':0, + u'newSynapseCount': 20, + u'maxSegmentsPerCell': 128, + u'permanenceInc': 0.1, + u'temporalImp': u'cpp', + u'seed': 1960, + u'verbosity': 0}, + u'tmEnable': True, + u'clEnable': False, + u'spEnable': True, + u'inferenceType': u'TemporalAnomaly'} +} + # TODO optional: optimize these params, either manually and/or swarming. But first keep comparable to numenta_detector default_parameters = { # there are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) From 4f2e8a511d83887507df37101cfa91c25f5d9c0a Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 25 Jul 2019 14:31:47 +0200 Subject: [PATCH 11/13] Htmcore: add params comparable with numenta detector use the same params as much as possible to have comparison, print statistics --- nab/detectors/htmcore/htmcore_detector.py | 84 ++++++++++++++++++----- 1 file changed, 67 insertions(+), 17 deletions(-) diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py index ba230f837..fa75c04d8 100644 --- a/nab/detectors/htmcore/htmcore_detector.py +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -115,31 +115,34 @@ u'inferenceType': u'TemporalAnomaly'} } +# these params can be used with our model and are equivalent (where possible) to that of numenta_detector # TODO optional: optimize these params, either manually and/or swarming. But first keep comparable to numenta_detector -default_parameters = { +parameters_numenta_comparable = { # there are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) 'enc': { "value" : #RDSE for value - {'resolution': 0.001, 'size': 700, 'sparsity': 0.02}, - "time": #DateTime for timestamps - {'timeOfDay': (30, 1), 'weekend': 21}}, + {'resolution': 0.001, 'size': 4000, 'sparsity': 0.2}, + "time": { #DateTime for timestamps + 'timeOfDay': (21, 9.49), + 'weekend': 0 #21 TODO try impact of weekend + }}, 'predictor': {'sdrc_alpha': 0.1}, 'sp': { - 'boostStrength': 3.0, - 'columnCount': 1638, - 'localAreaDensity': 0.04395604395604396, - 'potentialPct': 0.85, - 'synPermActiveInc': 0.04, - 'synPermConnected': 0.14, - 'synPermInactiveDec': 0.006}, + 'boostStrength': 0.0, + 'columnCount': 2048, + 'localAreaDensity': 0.04395604395604396, # numenta's numActiveColumnsPerInhArea=40 deprecated, compute corresponding value? + 'potentialPct': 0.8, + 'synPermActiveInc': 0.005, + 'synPermConnected': 0.2, + 'synPermInactiveDec': 0.0005}, 'tm': { - 'activationThreshold': 17, - 'cellsPerColumn': 13, + 'activationThreshold': 13, + 'cellsPerColumn': 32, 'initialPerm': 0.21, 'maxSegmentsPerCell': 128, - 'maxSynapsesPerSegment': 64, + 'maxSynapsesPerSegment': 32, 'minThreshold': 10, - 'newSynapseCount': 32, + 'newSynapseCount': 20, 'permanenceDec': 0.1, 'permanenceInc': 0.1}, 'anomaly': { @@ -150,6 +153,43 @@ 'reestimationPeriod': 100}} } +# default parameters used with our model (originaly taken from `hotgym.py`) +# TODO optional: optimize these params, either manually and/or swarming. +default_paramenters = { +# there are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) + 'enc': { + "value" : #RDSE for value + {'resolution': 0.001, 'size': 700, 'sparsity': 0.02}, + "time": #DateTime for timestamps + {'timeOfDay': (30, 1), 'weekend': 21}}, + 'predictor': {'sdrc_alpha': 0.1}, + 'sp': { + 'boostStrength': 3.0, + 'columnCount': 1638, + 'localAreaDensity': 0.04395604395604396, + 'potentialPct': 0.85, + 'synPermActiveInc': 0.04, + 'synPermConnected': 0.14, + 'synPermInactiveDec': 0.006}, + 'tm': { + 'activationThreshold': 17, + 'cellsPerColumn': 13, + 'initialPerm': 0.21, + 'maxSegmentsPerCell': 128, + 'maxSynapsesPerSegment': 64, + 'minThreshold': 10, + 'newSynapseCount': 32, + 'permanenceDec': 0.1, + 'permanenceInc': 0.1}, + 'anomaly': { + 'likelihood': { + #'learningPeriod': int(math.floor(self.probationaryPeriod / 2.0)), + #'probationaryPeriod': self.probationaryPeriod-default_parameters["anomaly"]["likelihood"]["learningPeriod"], + 'probationaryPct': 0.1, + 'reestimationPeriod': 100}} +} + + class HtmcoreDetector(AnomalyDetector): """ This detector uses an HTM based anomaly detection technique. @@ -167,7 +207,7 @@ def __init__(self, *args, **kwargs): # to re-optimize the thresholds when running with this setting. self.useLikelihood = True self.useSpatialAnomaly = True - self.verbose = False + self.verbose = True ## internal members # (listed here for easier understanding) @@ -183,6 +223,7 @@ def __init__(self, *args, **kwargs): self.tm_info = None # internal helper variables: self.inputs_ = [] + self.iteration_ = 0 def getAdditionalHeaders(self): @@ -203,7 +244,9 @@ def handleRecord(self, inputData): def initialize(self): - parameters = default_parameters + # toggle parameters here + #parameters = default_parameters + parameters = parameters_numenta_comparable # setup spatial anomaly if self.useSpatialAnomaly: @@ -285,6 +328,7 @@ def modelRun(self, ts, val): """ ## run data through our model pipeline: enc -> SP -> TM -> Anomaly self.inputs_.append( val ) + self.iteration_ += 1 # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. @@ -339,4 +383,10 @@ def modelRun(self, ts, val): anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB + # 5. print stats + if self.verbose and self.iteration_ % 1000 == 0: + print(enc_info) + print(sp_info) + print(tm_info) + return (anomalyScore, raw) From 01f4c41d30935619462be4eb37f41a705f5e0de5 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 25 Jul 2019 14:42:10 +0200 Subject: [PATCH 12/13] remove wrong comment FIXME for anomaly computation is not needed, as TM provides TM.anomaly already! --- nab/detectors/htmcore/htmcore_detector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py index fa75c04d8..8792f9ce1 100644 --- a/nab/detectors/htmcore/htmcore_detector.py +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -349,7 +349,7 @@ def modelRun(self, ts, val): # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. self.tm.compute(activeColumns, learn=True) - self.tm_info.addData( self.tm.getActiveCells().flatten() ) #FIXME for anomaly, should we use active cells, or winner cells? And also convert to columns! + self.tm_info.addData( self.tm.getActiveCells().flatten() ) # 4.1 (optional) Predictor #TODO optional #TODO optional: also return an error metric on predictions (RMSE, R2,...) From 3097a723e04bb1e97cfb4606873f84db98d26ef8 Mon Sep 17 00:00:00 2001 From: ctrl-z-9000-times Date: Mon, 29 Jul 2019 09:12:30 -0400 Subject: [PATCH 13/13] Improved parameters for htm.core detector. --- config/thresholds.json | 12 +- nab/detectors/htmcore/htmcore_detector.py | 139 +++------------------- results/final_results.json | 6 +- 3 files changed, 24 insertions(+), 133 deletions(-) diff --git a/config/thresholds.json b/config/thresholds.json index 67b028c7b..f3099ebc1 100644 --- a/config/thresholds.json +++ b/config/thresholds.json @@ -57,16 +57,16 @@ }, "htmcore": { "reward_low_FN_rate": { - "score": -48.81063346506114, - "threshold": 0.5120920067439757 + "score": -1.6511067861578468, + "threshold": 0.5014187204194446 }, "reward_low_FP_rate": { - "score": -0.12559751317561307, - "threshold": 1.0 + "score": 20.39992539458499, + "threshold": 0.5122987896930875 }, "standard": { - "score": 1.9244399025813976, - "threshold": 1.0 + "score": 30.348893213842153, + "threshold": 0.5014187204194446 } }, "htmjava": { diff --git a/nab/detectors/htmcore/htmcore_detector.py b/nab/detectors/htmcore/htmcore_detector.py index 8792f9ce1..9b60412d1 100644 --- a/nab/detectors/htmcore/htmcore_detector.py +++ b/nab/detectors/htmcore/htmcore_detector.py @@ -40,89 +40,15 @@ # has been seen so far. SPATIAL_TOLERANCE = 0.05 -## parameters to initialize our HTM model (encoder, SP, TM, AnomalyLikelihood) -# These are all the original params from `numenta` detector: -# These values are not directly used, only for information here -numenta_params = { - u'aggregationInfo': {u'seconds': 0, u'fields': [], u'months': 0, u'days': 0, u'years': 0, u'hours': 0, u'microseconds': 0, u'weeks': 0, u'minutes': 0, u'milliseconds': 0}, - u'model': u'HTMPrediction', - u'version': 1, - u'predictAheadTime': None, - u'modelParams': { - u'sensorParams': { - u'sensorAutoReset': None, - u'encoders': { - 'value': { - u'name': 'value', - 'resolution': 0.95846153846153836, - u'seed': 42, - u'fieldname': 'value', - u'type': u'RandomDistributedScalarEncoder'}, - 'timestamp_dayOfWeek': None, - 'timestamp_timeOfDay': { - u'fieldname': 'timestamp', - u'timeOfDay': [21, 9.49], - u'type': u'DateEncoder', - u'name': 'timestamp'}, - 'timestamp_weekend': None}, - u'verbosity': 0}, - u'anomalyParams': { - u'anomalyCacheRecords': None, - u'autoDetectThreshold': None, - u'autoDetectWaitRecords': 5030}, - u'spParams': { - u'columnCount': 2048, - u'synPermInactiveDec': 0.0005, - u'spatialImp': u'cpp', - u'inputWidth': 0, - u'spVerbosity': 0, - u'synPermConnected': 0.2, - u'synPermActiveInc': 0.003, - u'potentialPct': 0.8, - u'numActiveColumnsPerInhArea': 40, - u'boostStrength': 0.0, - u'globalInhibition': 1, - u'seed': 1956}, - u'trainSPNetOnlyIfRequested': False, - u'clParams': { - u'alpha': 0.035828933612158, - u'verbosity': 0, - u'steps': u'1', - u'regionName': - u'SDRClassifierRegion'}, - u'tmParams': { - u'columnCount': 2048, - u'activationThreshold': 13, - u'pamLength': 3, - u'cellsPerColumn': 32, - u'permanenceDec': 0.1, - u'minThreshold': 10, - u'inputWidth': 2048, - u'maxSynapsesPerSegment': 32, - u'outputType': u'normal', - u'initialPerm': 0.21, - u'globalDecay': 0.0, - u'maxAge':0, - u'newSynapseCount': 20, - u'maxSegmentsPerCell': 128, - u'permanenceInc': 0.1, - u'temporalImp': u'cpp', - u'seed': 1960, - u'verbosity': 0}, - u'tmEnable': True, - u'clEnable': False, - u'spEnable': True, - u'inferenceType': u'TemporalAnomaly'} -} - -# these params can be used with our model and are equivalent (where possible) to that of numenta_detector -# TODO optional: optimize these params, either manually and/or swarming. But first keep comparable to numenta_detector parameters_numenta_comparable = { # there are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) 'enc': { - "value" : #RDSE for value - {'resolution': 0.001, 'size': 4000, 'sparsity': 0.2}, - "time": { #DateTime for timestamps + "value" : # RDSE for value + {'resolution': 0.001, + 'size': 4000, + 'sparsity': 0.10 + }, + "time": { # DateTime for timestamps 'timeOfDay': (21, 9.49), 'weekend': 0 #21 TODO try impact of weekend }}, @@ -130,9 +56,9 @@ 'sp': { 'boostStrength': 0.0, 'columnCount': 2048, - 'localAreaDensity': 0.04395604395604396, # numenta's numActiveColumnsPerInhArea=40 deprecated, compute corresponding value? - 'potentialPct': 0.8, - 'synPermActiveInc': 0.005, + 'localAreaDensity': 40/2048, + 'potentialPct': 0.4, + 'synPermActiveInc': 0.003, 'synPermConnected': 0.2, 'synPermInactiveDec': 0.0005}, 'tm': { @@ -153,42 +79,6 @@ 'reestimationPeriod': 100}} } -# default parameters used with our model (originaly taken from `hotgym.py`) -# TODO optional: optimize these params, either manually and/or swarming. -default_paramenters = { -# there are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) - 'enc': { - "value" : #RDSE for value - {'resolution': 0.001, 'size': 700, 'sparsity': 0.02}, - "time": #DateTime for timestamps - {'timeOfDay': (30, 1), 'weekend': 21}}, - 'predictor': {'sdrc_alpha': 0.1}, - 'sp': { - 'boostStrength': 3.0, - 'columnCount': 1638, - 'localAreaDensity': 0.04395604395604396, - 'potentialPct': 0.85, - 'synPermActiveInc': 0.04, - 'synPermConnected': 0.14, - 'synPermInactiveDec': 0.006}, - 'tm': { - 'activationThreshold': 17, - 'cellsPerColumn': 13, - 'initialPerm': 0.21, - 'maxSegmentsPerCell': 128, - 'maxSynapsesPerSegment': 64, - 'minThreshold': 10, - 'newSynapseCount': 32, - 'permanenceDec': 0.1, - 'permanenceInc': 0.1}, - 'anomaly': { - 'likelihood': { - #'learningPeriod': int(math.floor(self.probationaryPeriod / 2.0)), - #'probationaryPeriod': self.probationaryPeriod-default_parameters["anomaly"]["likelihood"]["learningPeriod"], - 'probationaryPct': 0.1, - 'reestimationPeriod': 100}} -} - class HtmcoreDetector(AnomalyDetector): """ @@ -313,8 +203,8 @@ def initialize(self): estimationSamples= self.probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) # Predictor - self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) - predictor_resolution = 1 + # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) + # predictor_resolution = 1 def modelRun(self, ts, val): @@ -385,8 +275,9 @@ def modelRun(self, ts, val): # 5. print stats if self.verbose and self.iteration_ % 1000 == 0: - print(enc_info) - print(sp_info) - print(tm_info) + # print(self.enc_info) + # print(self.sp_info) + # print(self.tm_info) + pass return (anomalyScore, raw) diff --git a/results/final_results.json b/results/final_results.json index 9e73752e3..295cef20b 100644 --- a/results/final_results.json +++ b/results/final_results.json @@ -15,9 +15,9 @@ "standard": 16.43666922426724 }, "htmcore": { - "reward_low_FN_rate": 52.640622567511166, - "reward_low_FP_rate": 49.94586314087259, - "standard": 50.82949995800923 + "reward_low_FN_rate": 66.1922106936328, + "reward_low_FP_rate": 58.7930712907694, + "standard": 63.081419488725054 }, "htmjava": { "reward_low_FN_rate": 70.42407766520115,