diff --git a/DESCRIPTION b/DESCRIPTION index d0c5dbb..02138b8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,10 +8,7 @@ Authors@R: c( comment = c(ORCID = "0000-0002-1057-3721")), person("Pascal", "Mettes", role = "ctb", - comment = "Author of the initial C++ implementation of the SLIC Superpixel algorithm for image data"), - person("Charles", "Jekel", - role = "ctb", - comment = "Author of underlying C++ code for dtw")) + comment = "Author of the initial C++ implementation of the SLIC Superpixel algorithm for image data")) Description: Creates superpixels based on input spatial data. This package works on spatial data with one variable (e.g., continuous raster), many variables (e.g., RGB rasters), and spatial patterns (e.g., areas in categorical rasters). It is based on the SLIC algorithm (Achanta et al. (2012) ), and readapts it to work with arbitrary dissimilarity measures. diff --git a/man/supercells-package.Rd b/man/supercells-package.Rd index 8510c83..002de90 100644 --- a/man/supercells-package.Rd +++ b/man/supercells-package.Rd @@ -23,7 +23,6 @@ Useful links: Other contributors: \itemize{ \item Pascal Mettes (Author of the initial C++ implementation of the SLIC Superpixel algorithm for image data) [contributor] - \item Charles Jekel (Author of underlying C++ code for dtw) [contributor] } } diff --git a/src/distances.cpp b/src/distances.cpp index d08fcee..c6fb557 100644 --- a/src/distances.cpp +++ b/src/distances.cpp @@ -1,5 +1,4 @@ #include "distances.h" -#include "dtw/include/DTW.hpp" // using namespace cpp11::literals; // so we can use ""_nm syntax double get_vals_dist(const std::vector& values1, const std::vector& values2, @@ -9,7 +8,7 @@ double get_vals_dist(const std::vector& values1, const std::vector& values1, const std::vector& return dist; } -double dtw3(const std::vector& values1, const std::vector& values2){ - double p = 2; // the p-norm to use; 2.0 = euclidean, 1.0 = manhattan +double dtw1d(const std::vector& values1, const std::vector& values2){ int len1 = values1.size(); + int len2 = values2.size(); + if (len1 == 0 || len2 == 0) { + return NAN; + } - std::vector > a; - std::vector > b; + std::vector prev(len2); + std::vector curr(len2); - a.reserve(len1); - b.reserve(len1); + prev[0] = std::fabs(values1[0] - values2[0]); + for (int j = 1; j < len2; j++) { + prev[j] = prev[j - 1] + std::fabs(values1[0] - values2[j]); + } - for(int i = 0; i < len1; i++){ - std::vector pair1(2); std::vector pair2(2); - pair1[0] = i; - pair1[1] = values1[i]; - pair2[0] = i; - pair2[1] = values2[i]; - a.push_back(pair1); b.push_back(pair2); + for (int i = 1; i < len1; i++) { + curr[0] = prev[0] + std::fabs(values1[i] - values2[0]); + for (int j = 1; j < len2; j++) { + double cost = std::fabs(values1[i] - values2[j]); + double best = std::fmin(std::fmin(prev[j], curr[j - 1]), prev[j - 1]); + curr[j] = cost + best; + } + prev.swap(curr); } - double scost = DTW::dtw_distance_only(a, b, p); - return(scost); + return prev[len2 - 1]; } double dtw2d(const std::vector& values1, const std::vector& values2){ - double p = 2; // the p-norm to use; 2.0 = euclidean, 1.0 = manhattan int len1 = values1.size() / 2; + int len2 = values2.size() / 2; + if (len1 == 0 || len2 == 0) { + return NAN; + } - std::vector > a; - std::vector > b; + std::vector prev(len2); + std::vector curr(len2); - a.reserve(len1); - b.reserve(len1); + auto cost = [&](int i, int j) { + double dx = values1[i + len1] - values2[j + len2]; + double dy = values1[i] - values2[j]; + return std::sqrt(dx * dx + dy * dy); + }; - for(int i = 0; i < len1; i++){ - std::vector pair1(2); std::vector pair2(2); - pair1[0] = values1[i + (len1 - 1)]; - pair1[1] = values1[i]; - pair2[0] = values2[i + (len1 - 1)]; - pair2[1] = values2[i]; - a.push_back(pair1); b.push_back(pair2); + prev[0] = cost(0, 0); + for (int j = 1; j < len2; j++) { + prev[j] = prev[j - 1] + cost(0, j); + } + + for (int i = 1; i < len1; i++) { + curr[0] = prev[0] + cost(i, 0); + for (int j = 1; j < len2; j++) { + double best = std::fmin(std::fmin(prev[j], curr[j - 1]), prev[j - 1]); + curr[j] = cost(i, j) + best; + } + prev.swap(curr); } - double scost = DTW::dtw_distance_only(a, b, p); - return(scost); + return prev[len2 - 1]; } diff --git a/src/distances.h b/src/distances.h index e81b86d..4c6a5bf 100644 --- a/src/distances.h +++ b/src/distances.h @@ -11,7 +11,7 @@ double euclidean(const std::vector& values1, const std::vector& double manhattan(const std::vector& values1, const std::vector& values2); double jensen_shannon(const std::vector& values1, const std::vector& values2); double custom_log2(double x); -double dtw3(const std::vector& values1, const std::vector& values2); +double dtw1d(const std::vector& values1, const std::vector& values2); double dtw2d(const std::vector& values1, const std::vector& values2); double custom_distance(const std::vector& values1, const std::vector& values2, const std::string& dist_name); diff --git a/src/dtw/LICENSE b/src/dtw/LICENSE deleted file mode 100644 index 837df6c..0000000 --- a/src/dtw/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2019 Charles Jekel - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/src/dtw/README.md b/src/dtw/README.md deleted file mode 100644 index d03644c..0000000 --- a/src/dtw/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# DTW_cpp -A small Dynamic Time Warping (DTW) single header library for C++ - -[DTW.hpp](https://github.com/cjekel/DTW_cpp/blob/master/include/DTW.hpp) computes the DTW distance between two c++ vectors ```a``` and ```b```! - -[![Build Status](https://travis-ci.com/cjekel/DTW_cpp.svg?branch=master)](https://travis-ci.com/cjekel/DTW_cpp) [![Coverage Status](https://coveralls.io/repos/github/cjekel/DTW_cpp/badge.svg?branch=master)](https://coveralls.io/github/cjekel/DTW_cpp?branch=master) - -# Features - -- Supports N-Dimensional data -- ```a``` and ```b```can have different number of data points -- Compute the distance using any ```p```-norm -- ```DTW::dtw_distance_only(a, b, p);``` function which returns only the DTW distance -- ```DTW::DTW MyDtw (a, b, p);``` class contains the pairwise distance vector, DTW distance vector, DTW distance, and a function to calculate the DTW alignment path - -# What is Dynamic Time Warping ? - -Dynamic Time Warping (DTW) is an algorithm to measure the similarity between two temporal curves. The wiki page on [DTW](https://en.wikipedia.org/wiki/Dynamic_time_warping) is a great place to learn more. - -![Image of two different curves](https://raw.githubusercontent.com/cjekel/similarity_measures/master/images/TwoCurves.png) - -Consider the above Numerical and Experimental curves in 2D space. DTW can be used to measure the similarity between the two curves. A DTW distance of zero would mean that the warped curves match exactly. - -The order of data points matters. Each curve is a sequence of data points, with a known beginning and ending. - -# Examples - -Check out the two [examples](https://github.com/cjekel/DTW_cpp/tree/master/examples). - -# Tests - -Run [run_tests.sh](https://github.com/cjekel/DTW_cpp/blob/master/run_tests.sh) in a linux environment. -- travisci tests using Ubuntu Xenial and g++ version 5.4.0 -- also tested on openSUSE Leap 15.1 and g++ version 7.4.0 - -# Requirements - -- C++11 standard or later \ No newline at end of file diff --git a/src/dtw/include/DTW.hpp b/src/dtw/include/DTW.hpp deleted file mode 100644 index 6b7926d..0000000 --- a/src/dtw/include/DTW.hpp +++ /dev/null @@ -1,245 +0,0 @@ -// -// DTW.hpp -// -// Copyright (c) 2019 Charles Jekel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -// - - -#include -#include -#include -#include -#include -const std::string DTW_VERSION = "0.0.1"; - - -namespace DTW -{ - /** - * Compute the p_norm between two 1D c++ vectors. - * - * The p_norm is sometimes referred to as the Minkowski norm. Common - * p_norms include p=2.0 for the euclidean norm, or p=1.0 for the - * manhattan distance. See also - * https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm - * - * @a 1D vector of m size, where m is the number of dimensions. - * @b 1D vector of m size (must be the same size as b). - * @p value of norm to use. - */ - double p_norm (std::vector a, std::vector b, double p) { - double d = 0; - for (int i = 0; i < a.size() ; i++) { - d += std::pow(std::abs(a[i] - b[i]), p); - } - return std::pow(d, 1.0/p); - }; - - /** - * Compute the DTW distance between two 2D c++ vectors. - * - * The c++ vectors can have different number of data points, but must - * have the same number of dimensions. This will raise - * std::invalid_argument if the dimmensions of a and b are different. - * Here the vectors should be formatted as - * [number_of_data_points][number_of_dimensions]. The DTW distance can - * be computed for any p_norm. See the wiki for more DTW info. - * https://en.wikipedia.org/wiki/Dynamic_time_warping - * - * @a 2D vector of [number_of_data_points][number_of_dimensions]. - * @b 2D vector of [number_of_data_points][number_of_dimensions]. - * @p value of p_norm to use. - */ - double dtw_distance_only(std::vector> a, - std::vector> b, - double p) - { - int n = a.size(); - int o = b.size(); - int a_m = a[0].size(); - int b_m = b[0].size(); - if (a_m != b_m) - { - throw std::invalid_argument( "a and b must have the same number of dimensions!" ); - } - std::vector > d(n, std::vector (o, 0.0)); - d[0][0] = p_norm(a[0], b[0], p); - for (int i=1; i < n; i++) - { - d[i][0] = d[i-1][0] + p_norm(a[i], b[0], p); - } - for (int i=1; i < o ; i++) - { - d[0][i] = d[0][i-1] + p_norm(a[0], b[i], p); - } - for (int i=1; i < n ; i++) - { - for (int j=1; j < o; j++){ - d[i][j] = p_norm(a[i], b[j], p) + std::fmin(std::fmin(d[i-1][j], d[i][j-1]), d[i-1][j-1]); - } - } - return d[n-1][o-1]; - }; - - /** - * Assembles a 2D c++ DTW distance vector. - * - * The DTW distance vector represents the matrix of DTW distances for - * all possible alignments. The c++ vectors must have the same 2D size. - * d.size() == c.size() == number of a data points, where d[0].size == - * c[0].size() == number of b data points. - * - * @d 2D DTW distance vector of [a data points][b data points]. - * @c 2D pairwise distance vector between every a and b data point. - */ - std::vector > dtw_vector_assemble(std::vector> d, - std::vector> c) - { - int n = d.size(); - int o = d[0].size(); - for (int i=1; i < n; i++) - { - d[i][0] = d[i-1][0] + c[i][0]; - } - for (int i=1; i < o ; i++) - { - d[0][i] = d[0][i-1] + c[0][i]; - } - for (int i=1; i < n ; i++) - { - for (int j=1; j < o; j++){ - d[i][j] = c[i][j] + std::fmin(std::fmin(d[i-1][j], d[i][j-1]), d[i-1][j-1]); - } - } - return d; - }; - - - class DTW { - - public: - std::vector > a_vector, b_vector; - int a_data, n_dim, b_data; - double p, distance; - std::vector > dtw_vector, pairwise_vector; - - /** - * Class for Dynamic Time Warping distance between two 2D c++ vectors. - * - * The c++ vectors can have different number of data points, but must - * have the same number of dimensions. This will raise - * std::invalid_argument if the dimmensions of a and b are different. - * Here the vectors should be formatted as - * [number_of_data_points][number_of_dimensions]. The DTW distance can - * be computed for any p_norm. See the wiki for more DTW info. - * https://en.wikipedia.org/wiki/Dynamic_time_warping - * - * @a 2D vector of [number_of_data_points][number_of_dimensions]. - * @b 2D vector of [number_of_data_points][number_of_dimensions]. - * @p value of p_norm to use. - * - * This class stores the following: - * - * @DTW.distance Computed DTW distance. - * @DTW.pairwise_vector P_norm distance between each a and b data point. - * @DTW.dtw_vector DTW distance matrix. - * - * The class has the following methods: - * - * @DTW.path() Returns a 2D vector of the alignment path between a and b. - */ - DTW (std::vector > a, std::vector > b, double p) : - a_vector(a), b_vector(b), p(p) { - a_data = a.size(); - b_data = b.size(); - int a_m = a_vector[0].size(); - int b_m = b_vector[0].size(); - - if (a_m != b_m) - { - throw std::invalid_argument( "a and b must have the same number of dimensions!" ); - } - else - { - n_dim = a_m; - } - - std::vector > c(a_data, std::vector (b_data, 0.0)); - - for (int i=0; i < a_data; i++){ - for (int j=0; j < b_data; j++) { - c[i][j] = p_norm(a_vector[i], b_vector[j], p); - } - } - pairwise_vector = c; - std::vector > d(a_data, std::vector (b_data, 0.0)); - d[0][0] = pairwise_vector[0][0]; - dtw_vector = dtw_vector_assemble(d, pairwise_vector); - distance = dtw_vector[a_data-1][b_data-1]; - }; - - /** - * Returns a 2D vector of the alignment path between a and b. - * - * The DTW path is a 2D integer vector, where [path_length][i] represents - * the i'th data point on curve a, and [path_length][j] represents the j'th - * data point on curve b. The path_length will depend upon the optimal DTW - * alignment. - */ - std::vector > path () { - int i = a_data - 1; - int j = b_data - 1; - std::vector > my_path = { {i, j} }; - while (i > 0 || j > 0) { - if (i == 0) - { - j -= 1; - } - else if (j == 0) - { - i -= 1; - } - else - { - double temp_step = std::fmin(std::fmin(dtw_vector[i-1][j], dtw_vector[i][j-1]), - dtw_vector[i-1][j-1]); - if (temp_step == dtw_vector[i-1][j]) - { - i -= 1; - } - else if (temp_step == dtw_vector[i][j-1]) - { - j -= 1; - } - else - { - i -= 1; - j -= 1; - } - } - my_path.push_back ({i, j}); - } - std::reverse(my_path.begin(), my_path.end()); - return my_path; - } - }; - -} diff --git a/vignettes/articles/v2-changes-since-v1.Rmd b/vignettes/articles/v2-changes-since-v1.Rmd index 02ea380..6b6900f 100644 --- a/vignettes/articles/v2-changes-since-v1.Rmd +++ b/vignettes/articles/v2-changes-since-v1.Rmd @@ -73,7 +73,7 @@ terra::plot(vol_ids) ``` While, `sc_slic()` is the main function, the other two functions are useful for specific tasks. -For example, `sc_slic_points()` is helpful for visualizing initial supercell centers, while `sc_slic_raster()` is useful for large datasets where polygon outputs may be too memory-intensive. +For example, `sc_slic_points()` is helpful for inspecting supercell center locations, while `sc_slic_raster()` is useful for large datasets where polygon outputs may be too memory-intensive. ## Compactness tuning and iteration diagnostics @@ -155,6 +155,5 @@ plot(sf::st_geometry(vol_sc_slic0), add = TRUE, lwd = 0.6, border = "violet") - New utilities: Added `use_meters()` helper for `step` and `use_adaptive()` for adaptive compactness in `sc_slic()`/`sc_slic_points()`/`sc_slic_raster()`. - Behavior: Since version 1.0, the way coordinates are summarized internally has changed, and results in versions after 1.0 may differ slightly from those prior to 1.0. - Performance: Improved speed and memory efficiency. -- New utilities: Added experimental `sc_merge_supercells()` for adjacency-constrained greedy merging. - Chunking and memory: Chunk sizes align to `step`, deterministic ID offsets are used for file-backed merges, and `options(supercells.chunk_mem_gb)` controls memory. - Parallelization: Removed **future**-based parallel chunking.