Skip to content

Commit e01274d

Browse files
DARREN OBERSTDARREN OBERST
authored andcommitted
updating sentence transformer test example configs
1 parent 7c8f58a commit e01274d

File tree

5 files changed

+92
-196
lines changed

5 files changed

+92
-196
lines changed

‎examples/Embedding/using_sentence_transformer.py‎

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11

2-
"""This example shows how to use sentence transformers as a vector embedding model with llmware"""
2+
"""This example shows how to use sentence transformers as a vector embedding model with llmware.
33
4-
"""Note: this example illustrates capability from llmware==0.1.13 - please update pip install, or pull from repo"""
4+
To use models from the SentenceTransformer catalog, you may need to install as follows:
5+
6+
pip3 install sentence-transformers
7+
8+
"""
59

610

711
import os
@@ -10,7 +14,12 @@
1014
from llmware.library import Library
1115
from llmware.retrieval import Query
1216
from llmware.models import ModelCatalog
17+
from llmware.configs import LLMWareConfig
1318

19+
from importlib import util
20+
if not util.find_spec("sentence_transformers"):
21+
print("\nto run this example, you should install the SentenceTransformer library with: "
22+
"pip3 install sentence-transformers.")
1423

1524
def build_lib (library_name, folder="Agreements"):
1625

@@ -39,7 +48,9 @@ def build_lib (library_name, folder="Agreements"):
3948

4049
print("update: Step 1- starting here- building library- parsing PDFs into text chunks")
4150

42-
lib = build_lib("st_embedding_0_454")
51+
LLMWareConfig().set_active_db("sqlite")
52+
53+
lib = build_lib("st_embedding_0")
4354

4455
# register a model from the sentence transformers library/repository
4556

‎llmware/requirements.txt‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ huggingface-hub==0.19.4
44
numpy>=1.23.2
55
openai>=1.0
66
pymongo>=4.7.0
7-
sentence-transformers==2.2.2
87
tabulate==0.9.0
98
tokenizers>=0.15.0
109
torch>=1.13.1
@@ -18,7 +17,7 @@ colorama==0.4.6
1817
einops==0.7.0
1918
librosa>=0.10.0
2019

21-
requests~=2.31.0
20+
requests>=2.31.0
2221
tqdm~=4.66.1
2322
botocore~=1.27.96
2423
setuptools~=68.2.0

‎setup.py‎

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ def glob_fix(package_name, glob):
5959
'numpy>=1.23.2',
6060
'openai>=1.0.0',
6161
'pymongo>=4.7.0',
62-
'sentence-transformers==2.2.2',
6362
'tabulate==0.9.0',
6463
'tokenizers>=0.15.0',
6564
'torch>=1.13.1',

‎tests/embeddings/test_all_sentence_transformer_models.py‎

Lines changed: 0 additions & 190 deletions
This file was deleted.
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
2+
""" Tests that sentence transformer model is loaded and yielding a structurally correct embedding vector.
3+
4+
To use this test, you may need install the SentenceTransformer library as follows:
5+
6+
-- pip3 install sentence-transformers
7+
8+
"""
9+
10+
11+
from llmware.models import ModelCatalog
12+
from sentence_transformers import SentenceTransformer
13+
14+
15+
def test_sentence_transformer_model_local_load():
16+
17+
# This model list was generated by here https://www.sbert.net/docs/pretrained_models.html and
18+
# selecting the "All Models" switch
19+
20+
sentence_transformer_models = [
21+
'all-MiniLM-L12-v1',
22+
'all-MiniLM-L12-v2',
23+
'all-MiniLM-L6-v1',
24+
'all-MiniLM-L6-v2',
25+
'all-distilroberta-v1',
26+
'all-mpnet-base-v1',
27+
'all-mpnet-base-v2',
28+
'all-roberta-large-v1',
29+
'average_word_embeddings_glove.6B.300d',
30+
'average_word_embeddings_komninos',
31+
'gtr-t5-base',
32+
'gtr-t5-large',
33+
'gtr-t5-xl',
34+
'gtr-t5-xxl',
35+
'msmarco-bert-base-dot-v5',
36+
'msmarco-distilbert-base-tas-b',
37+
'msmarco-distilbert-dot-v5',
38+
'multi-qa-MiniLM-L6-cos-v1',
39+
'multi-qa-MiniLM-L6-dot-v1',
40+
'multi-qa-distilbert-cos-v1',
41+
'multi-qa-distilbert-dot-v1',
42+
'multi-qa-mpnet-base-cos-v1',
43+
'multi-qa-mpnet-base-dot-v1',
44+
'paraphrase-MiniLM-L12-v2',
45+
'paraphrase-MiniLM-L3-v2',
46+
'paraphrase-MiniLM-L6-v2',
47+
'paraphrase-TinyBERT-L6-v2',
48+
'paraphrase-albert-small-v2',
49+
'paraphrase-distilroberta-base-v2',
50+
'paraphrase-mpnet-base-v2',
51+
'paraphrase-multilingual-MiniLM-L12-v2',
52+
'paraphrase-multilingual-mpnet-base-v2',
53+
'sentence-t5-base',
54+
'sentence-t5-large',
55+
'sentence-t5-xl',
56+
'sentence-t5-xxl'
57+
]
58+
59+
test_text = ("This is just a sample text to confirm that the embedding model is loading and correctly "
60+
"converting into a structurally accurate embedding vector.")
61+
62+
for model_name in sentence_transformer_models:
63+
64+
print(f"\nloading sentence transformer model: {model_name}")
65+
66+
st_model = SentenceTransformer(model_name)
67+
model = ModelCatalog().load_sentence_transformer_model(st_model, model_name=model_name)
68+
embedding_vector = model.embedding([test_text])
69+
70+
assert embedding_vector is not None
71+
72+
print(f"created vector successfully with dimensions: ", embedding_vector.shape)
73+
74+
return 0
75+
76+
77+
test_sentence_transformer_model_local_load()

0 commit comments

Comments
 (0)