-
Notifications
You must be signed in to change notification settings - Fork 65
Bugfix for 0 4 0 release #208
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 46 commits
74097cf
d59d7b6
ff733f5
1015737
4bd1387
60aa403
067f3b9
17b959f
76621b4
6f1286f
83d6f0a
2659f30
2b8735b
05183d9
dec2e6f
909247d
8807cc5
68d3082
3697f89
087b7a3
43894bd
910a99b
091bd58
7c1c527
400f869
1cf46b4
aab0eee
4e9127a
f6ae405
9c7d37a
6e6972e
ccb96f2
b4d4a40
b52910b
b7fd4a9
0f413fa
996ebf0
84e90b8
f0749e1
4611d84
77d06be
3a69313
70bc27d
d4014cf
c351f16
5300f09
bd0d329
b32e8a2
c602cf7
0bf1904
9382891
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -77,10 +77,12 @@ def add(self, value: T, score: float, is_exact_match: bool = True) -> None: | |
| existing_match.hit_count += 1 | ||
| existing_match.score += score | ||
| else: | ||
| # Related (non-exact) match: only accumulate related counters. | ||
| existing_match.related_hit_count += 1 | ||
| existing_match.related_score += score | ||
| else: | ||
| if is_exact_match: | ||
| # New exact match: starts with hit_count=1 and the given score. | ||
| self.set_match( | ||
| Match( | ||
| value, | ||
|
|
@@ -91,10 +93,14 @@ def add(self, value: T, score: float, is_exact_match: bool = True) -> None: | |
| ) | ||
| ) | ||
| else: | ||
| # New related-only match: hit_count stays 0 because | ||
| # only exact matches count as direct hits. This matters | ||
| # for select_with_hit_count / _matches_with_min_hit_count | ||
| # which filter on hit_count to weed out noise. | ||
| self.set_match( | ||
| Match( | ||
| value, | ||
| hit_count=1, | ||
| hit_count=0, | ||
| score=0.0, | ||
| related_hit_count=1, | ||
| related_score=score, | ||
|
|
@@ -250,9 +256,22 @@ def smooth_match_score[T](match: Match[T]) -> None: | |
|
|
||
|
|
||
| class SemanticRefAccumulator(MatchAccumulator[SemanticRefOrdinal]): | ||
| def __init__(self, search_term_matches: set[str] = set()): | ||
| """Accumulates scored semantic reference matches. | ||
|
||
|
|
||
| ``search_term_matches`` tracks which search terms produced hits (provenance). | ||
| Use ``clone`` to create a derived accumulator that inherits a | ||
| *copy* of the parent's provenance. | ||
| """ | ||
|
|
||
| def __init__(self) -> None: | ||
| super().__init__() | ||
| self.search_term_matches = search_term_matches | ||
| self.search_term_matches: set[str] = set() | ||
|
|
||
| def clone(self) -> "SemanticRefAccumulator": | ||
bmerkle marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """Create a new empty accumulator inheriting a copy of this one's term-match provenance.""" | ||
| acc = self.__class__() | ||
| acc.search_term_matches = set(self.search_term_matches) | ||
| return acc | ||
|
|
||
| def add_term_matches( | ||
| self, | ||
|
|
@@ -330,8 +349,7 @@ async def group_matches_by_type( | |
| semantic_ref = await semantic_refs.get_item(match.value) | ||
| group = groups.get(semantic_ref.knowledge.knowledge_type) | ||
| if group is None: | ||
| group = SemanticRefAccumulator() | ||
| group.search_term_matches = self.search_term_matches | ||
| group = self.clone() | ||
| groups[semantic_ref.knowledge.knowledge_type] = group | ||
| group.set_match(match) | ||
| return groups | ||
|
|
@@ -341,7 +359,7 @@ async def get_matches_in_scope( | |
| semantic_refs: ISemanticRefCollection, | ||
| ranges_in_scope: "TextRangesInScope", | ||
| ) -> "SemanticRefAccumulator": | ||
| accumulator = SemanticRefAccumulator(self.search_term_matches) | ||
| accumulator = self.clone() | ||
| for match in self: | ||
| if ranges_in_scope.is_range_in_scope( | ||
| (await semantic_refs.get_item(match.value)).range | ||
|
|
@@ -513,15 +531,16 @@ def add_ranges(self, text_ranges: "list[TextRange] | TextRangeCollection") -> No | |
| for text_range in text_ranges._ranges: | ||
| self.add_range(text_range) | ||
|
|
||
| def is_in_range(self, inner_range: TextRange) -> bool: | ||
| def contains_range(self, inner_range: TextRange) -> bool: | ||
| if len(self._ranges) == 0: | ||
| return False | ||
| i = bisect.bisect_left(self._ranges, inner_range) | ||
| for outer_range in self._ranges[i:]: | ||
| if outer_range.start > inner_range.start: | ||
| break | ||
| for outer_range in self._ranges: | ||
| if inner_range in outer_range: | ||
| return True | ||
| # Since ranges are sorted by start, once we pass inner_range's start | ||
| # no further range can contain it. | ||
| if outer_range.start > inner_range.start: | ||
| break | ||
| return False | ||
bmerkle marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
|
|
@@ -544,7 +563,7 @@ def is_range_in_scope(self, inner_range: TextRange) -> bool: | |
| # We have a very simple impl: we don't intersect/union ranges yet. | ||
| # Instead, we ensure that the inner range is not rejected by any outer ranges. | ||
| for outer_ranges in self.text_ranges: | ||
| if not outer_ranges.is_in_range(inner_range): | ||
| if not outer_ranges.contains_range(inner_range): | ||
| return False | ||
| return True | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.