Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions diskann-providers/src/index/diskann_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3006,15 +3006,21 @@ pub(crate) mod tests {

let neighbor_accessor = &mut index.provider().neighbors();
// check that we have an unpruned graph
let stats = index.get_degree_stats(neighbor_accessor).await.unwrap();
let stats = index
.get_degree_stats(neighbor_accessor, index.provider().iter())
.await
.unwrap();
assert!(stats.max_degree.into_usize() > max_degree);

// prune graph and check that max_degree is respected
index
.prune_range(&FullPrecision, ctx, 0..256)
.await
.unwrap();
let stats = index.get_degree_stats(neighbor_accessor).await.unwrap();
let stats = index
.get_degree_stats(neighbor_accessor, index.provider().iter())
.await
.unwrap();
assert!(stats.max_degree.into_usize() <= max_degree);
}

Expand Down Expand Up @@ -3293,14 +3299,17 @@ pub(crate) mod tests {
.await
.unwrap();
let mut accessor_sat = inmem::FullAccessor::new(index_sat.provider());
let res_sat = index_sat.get_degree_stats(&mut accessor_sat).await.unwrap();
let res_sat = index_sat
.get_degree_stats(&mut accessor_sat, index_sat.provider().iter())
.await
.unwrap();

let index_unsat = create_retry_saturated_index(NonZeroU32::new(1).unwrap(), false)
.await
.unwrap();
let mut accessor_unsat = inmem::FullAccessor::new(index_unsat.provider());
let res_unsat = index_sat
.get_degree_stats(&mut accessor_unsat)
let res_unsat = index_unsat
.get_degree_stats(&mut accessor_unsat, index_unsat.provider().iter())
.await
.unwrap();
assert!(
Expand All @@ -3315,14 +3324,17 @@ pub(crate) mod tests {
.await
.unwrap();
let mut accessor_sat = inmem::FullAccessor::new(index_sat.provider());
let res_sat = index_sat.get_degree_stats(&mut accessor_sat).await.unwrap();
let res_sat = index_sat
.get_degree_stats(&mut accessor_sat, index_sat.provider().iter())
.await
.unwrap();

let index_unsat = create_retry_saturated_index(NonZeroU32::new(1).unwrap(), false)
.await
.unwrap();
let mut accessor_unsat = inmem::FullAccessor::new(index_unsat.provider());
let res_unsat = index_sat
.get_degree_stats(&mut accessor_unsat)
.get_degree_stats(&mut accessor_unsat, index_unsat.provider().iter())
.await
.unwrap();
assert!(
Expand Down
7 changes: 4 additions & 3 deletions diskann-providers/src/index/wrapped_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,12 +406,13 @@ where
.block_on(self.inner.count_reachable_nodes(start_points, accessor))
}

pub fn get_degree_stats<NA>(&self, accessor: &mut NA) -> ANNResult<DegreeStats>
pub fn get_degree_stats<NA, Itr>(&self, accessor: &mut NA, itr: Itr) -> ANNResult<DegreeStats>
where
for<'a> &'a DP: IntoIterator<Item = DP::InternalId, IntoIter: Send>,
Itr: IntoIterator<Item = DP::InternalId, IntoIter: Send> + Send,
NA: AsNeighbor<Id = DP::InternalId>,
{
self.handle.block_on(self.inner.get_degree_stats(accessor))
self.handle
.block_on(self.inner.get_degree_stats(accessor, itr))
}
}

Expand Down
56 changes: 53 additions & 3 deletions diskann/src/graph/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ pub struct DegreeStats {
pub cnt_less_than_two: usize, // Number of vertices with degree less than 2
}

#[cfg(test)]
crate::test::cmp::verbose_eq!(DegreeStats {
max_degree,
avg_degree,
min_degree,
cnt_less_than_two,
});

/// Statistics collected during a search operation.
///
/// This struct provides detailed metrics about the search process, including
Expand Down Expand Up @@ -2491,9 +2499,13 @@ where
}
}

pub fn get_degree_stats<NA>(&self, accessor: &mut NA) -> impl SendFuture<ANNResult<DegreeStats>>
pub fn get_degree_stats<NA, Itr>(
&self,
accessor: &mut NA,
itr: Itr,
) -> impl SendFuture<ANNResult<DegreeStats>>
where
for<'a> &'a DP: IntoIterator<Item = DP::InternalId, IntoIter: Send>,
Itr: IntoIterator<Item = DP::InternalId, IntoIter: Send> + Send,
NA: AsNeighbor<Id = DP::InternalId>,
{
async move {
Comment thread
JordanMaples marked this conversation as resolved.
Expand All @@ -2504,7 +2516,7 @@ where
let mut total_live_points = 0;

let mut neighbors = AdjacencyList::with_capacity(self.max_degree_with_slack());
for id in &self.data_provider {
for id in itr {
total_live_points += 1;
accessor.get_neighbors(id, &mut neighbors).await?;
Comment thread
JordanMaples marked this conversation as resolved.
Comment thread
JordanMaples marked this conversation as resolved.
let pool_size = neighbors.len();
Expand All @@ -2517,6 +2529,17 @@ where
}

let total_f32 = total as f32;

// protecting against the divide by zero below.
if total_live_points == 0 {
return Ok(DegreeStats {
max_degree: 0,
avg_degree: 0.0,
min_degree: 0,
cnt_less_than_two: 0,
});
}

Ok(DegreeStats {
max_degree: u32::try_from(max_degree_usize)?,
avg_degree: total_f32 / total_live_points as f32,
Expand Down Expand Up @@ -3164,3 +3187,30 @@ struct BatchIdMismatch {
batch_len: usize,
ids_len: usize,
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn query_label_provider_on_visit_default() {
#[derive(Debug)]
struct BasicValidation;

impl QueryLabelProvider<u32> for BasicValidation {
fn is_match(&self, id: u32) -> bool {
id.is_multiple_of(2)
}
}

let filter = BasicValidation;
assert!(matches!(
filter.on_visit(Neighbor::new(0, 1.0)),
QueryVisitDecision::Accept(_)
));
assert!(matches!(
filter.on_visit(Neighbor::new(1, 1.0)),
QueryVisitDecision::Reject
));
}
}
12 changes: 5 additions & 7 deletions diskann/src/graph/test/cases/consolidate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ use crate::{
test::tokio::current_thread_runtime,
};

use super::helpers::{
assert_neighbors, create_2d_unit_square, generate_2d_square_adjacency_list, setup_2d_square,
};
use super::helpers::{assert_neighbors, generate_2d_square_adjacency_list, setup_2d_square};

/// Build a small index with explicit vectors and adjacency lists for consolidation testing.
///
Expand Down Expand Up @@ -137,7 +135,7 @@ fn consolidate_deleted_vertex_returns_deleted() {

let adjacency_lists = generate_2d_square_adjacency_list();

let index = setup_2d_square(create_2d_unit_square(), adjacency_lists, 4);
let index = setup_2d_square(adjacency_lists, 4);
let ctx = test_provider::Context::new();
let strategy = Strategy::new();

Expand All @@ -158,7 +156,7 @@ fn consolidate_nothing_to_do_returns_complete() {

let adjacency_lists = generate_2d_square_adjacency_list();

let index = setup_2d_square(create_2d_unit_square(), adjacency_lists, 4);
let index = setup_2d_square(adjacency_lists, 4);
let ctx = test_provider::Context::new();
let strategy = Strategy::new();

Expand All @@ -184,7 +182,7 @@ fn consolidate_repairs_after_deletion() {
AdjacencyList::from_iter_untrusted([0, 1, 2, 3]),
];

let index = setup_2d_square(create_2d_unit_square(), adjacency_lists, 4);
let index = setup_2d_square(adjacency_lists, 4);
let ctx = test_provider::Context::new();
let strategy = Strategy::new();

Expand Down Expand Up @@ -223,7 +221,7 @@ fn consolidate_prune_only_no_deleted_neighbors() {

// pruned_degree=2, but start node (4) has degree 4 → must prune
let adjacency_lists = generate_2d_square_adjacency_list();
let index = setup_2d_square(create_2d_unit_square(), adjacency_lists, 2);
let index = setup_2d_square(adjacency_lists, 2);
let ctx = test_provider::Context::new();
let strategy = Strategy::new();

Expand Down
8 changes: 1 addition & 7 deletions diskann/src/graph/test/cases/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

use std::{iter, sync::Arc};

use diskann_utils::views::Matrix;
use diskann_vector::distance::Metric;

use crate::{
Expand All @@ -21,21 +20,16 @@ use crate::{
provider::NeighborAccessor,
};

/// Generate the 2D unit square vectors using the synthetic grid infrastructure.
pub(super) fn create_2d_unit_square() -> Matrix<f32> {
Grid::Two.data(2)
}

/// Build a 2D square index with a start point at (0.5, 0.5).
///
/// The `pruned_degree` controls the index's target degree. The provider's max degree
/// is set to the largest adjacency list size to allow pre-populating graphs that
/// may exceed the index limit (useful for consolidation tests).
pub(super) fn setup_2d_square(
vectors: Matrix<f32>,
adjacency_lists: Vec<AdjacencyList<u32>>,
pruned_degree: usize,
) -> Arc<DiskANNIndex<Provider>> {
let vectors = Grid::Two.data(2);
let num_points = vectors.nrows();
let dim = vectors.ncols();
assert!(
Expand Down
Loading
Loading