Skip to content

Commit a995b4c

Browse files
vectorstore[minor]: Add support to filter by IS NULL and IS NOT NULL criteria (#40)
Description: Add support to filter by IS NULL criteria in the metadata Issue: N/A Dependencies: N/A Twitter handle: @martinferenaz This PR fixes the problem when you want to search if a tag has a value (exists) in the metadata. To check that you need to compare with the "IS NULL" method and the "$eq" operator only checks that the tag exists and has a null value assigned. In addition, a test for the $nin condition was added and a bug arrised, the bug was resolved adding a not condition after the in_
1 parent f6ea1b2 commit a995b4c

File tree

4 files changed

+57
-3
lines changed

4 files changed

+57
-3
lines changed

‎examples/vectorstore.ipynb‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,8 +235,8 @@
235235
"\n",
236236
"The vectorstore supports a set of filters that can be applied against the metadata fields of the documents.\n",
237237
"\n",
238-
"| Operator | Meaning/Category |\n",
239-
"|----------|-------------------------|\n",
238+
"| Operator | Meaning/Category |\n",
239+
"|-----------|-------------------------|\n",
240240
"| \\$eq | Equality (==) |\n",
241241
"| \\$ne | Inequality (!=) |\n",
242242
"| \\$lt | Less than (<) |\n",
@@ -246,6 +246,7 @@
246246
"| \\$in | Special Cased (in) |\n",
247247
"| \\$nin | Special Cased (not in) |\n",
248248
"| \\$between | Special Cased (between) |\n",
249+
"| \\$exists | Special Cased (is null) |\n",
249250
"| \\$like | Text (like) |\n",
250251
"| \\$ilike | Text (case-insensitive like) |\n",
251252
"| \\$and | Logical (and) |\n",

‎langchain_postgres/vectorstores.py‎

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ class DistanceStrategy(str, enum.Enum):
6666
"$in",
6767
"$nin",
6868
"$between",
69+
"$exists",
6970
}
7071

7172
TEXT_OPERATORS = {
@@ -702,13 +703,24 @@ def _handle_field_filter(
702703
if operator in {"$in"}:
703704
return queried_field.in_([str(val) for val in filter_value])
704705
elif operator in {"$nin"}:
705-
return queried_field.nin_([str(val) for val in filter_value])
706+
return ~queried_field.in_([str(val) for val in filter_value])
706707
elif operator in {"$like"}:
707708
return queried_field.like(filter_value)
708709
elif operator in {"$ilike"}:
709710
return queried_field.ilike(filter_value)
710711
else:
711712
raise NotImplementedError()
713+
elif operator == "$exists":
714+
if not isinstance(filter_value, bool):
715+
raise ValueError(
716+
"Expected a boolean value for $exists "
717+
f"operator, but got: {filter_value}"
718+
)
719+
condition = func.jsonb_exists(
720+
self.EmbeddingStore.cmetadata,
721+
field,
722+
)
723+
return ~condition if filter_value else condition
712724
else:
713725
raise NotImplementedError()
714726

‎tests/unit_tests/fixtures/filtering_test_cases.py‎

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,16 @@
198198
{"id": {"$between": (1, 1)}},
199199
[1],
200200
),
201+
# Test in
201202
(
202203
{"name": {"$in": ["adam", "bob"]}},
203204
[1, 2],
204205
),
206+
# Test nin
207+
(
208+
{"name": {"$nin": ["adam", "bob"]}},
209+
[3],
210+
),
205211
]
206212

207213
TYPE_5_FILTERING_TEST_CASES = [
@@ -216,3 +222,23 @@
216222
[1, 3],
217223
),
218224
]
225+
226+
TYPE_6_FILTERING_TEST_CASES = [
227+
# These involve the special operator $exists
228+
(
229+
{"happiness": {"$exists": True}},
230+
[],
231+
),
232+
(
233+
{"happiness": {"$exists": False}},
234+
[1, 2, 3],
235+
),
236+
(
237+
{"sadness": {"$exists": True}},
238+
[3],
239+
),
240+
(
241+
{"sadness": {"$exists": False}},
242+
[1, 2],
243+
),
244+
]

‎tests/unit_tests/test_vectorstore.py‎

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
TYPE_3_FILTERING_TEST_CASES,
1818
TYPE_4_FILTERING_TEST_CASES,
1919
TYPE_5_FILTERING_TEST_CASES,
20+
TYPE_6_FILTERING_TEST_CASES,
2021
)
2122
from tests.utils import VECTORSTORE_CONNECTION_STRING as CONNECTION_STRING
2223

@@ -484,6 +485,17 @@ def test_pgvector_with_with_metadata_filters_5(
484485
assert [doc.metadata["id"] for doc in docs] == expected_ids, test_filter
485486

486487

488+
@pytest.mark.parametrize("test_filter, expected_ids", TYPE_6_FILTERING_TEST_CASES)
489+
def test_pgvector_with_with_metadata_filters_6(
490+
pgvector: PGVector,
491+
test_filter: Dict[str, Any],
492+
expected_ids: List[int],
493+
) -> None:
494+
"""Test end to end construction and search."""
495+
docs = pgvector.similarity_search("meow", k=5, filter=test_filter)
496+
assert [doc.metadata["id"] for doc in docs] == expected_ids, test_filter
497+
498+
487499
@pytest.mark.parametrize(
488500
"invalid_filter",
489501
[
@@ -496,6 +508,8 @@ def test_pgvector_with_with_metadata_filters_5(
496508
{"$and": {}},
497509
{"$between": {}},
498510
{"$eq": {}},
511+
{"$exists": {}},
512+
{"$exists": 1},
499513
],
500514
)
501515
def test_invalid_filters(pgvector: PGVector, invalid_filter: Any) -> None:
@@ -510,6 +524,7 @@ def test_validate_operators() -> None:
510524
"$and",
511525
"$between",
512526
"$eq",
527+
"$exists",
513528
"$gt",
514529
"$gte",
515530
"$ilike",

0 commit comments

Comments
 (0)