Skip to content

Commit 0d277a0

Browse files
DARREN OBERSTDARREN OBERST
authored andcommitted
updating hf datasets as optional config examples tests
1 parent e01274d commit 0d277a0

File tree

7 files changed

+82
-18
lines changed

7 files changed

+82
-18
lines changed

‎examples/Models/dragon_gguf_fast_start.py‎

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
11

2-
"""This example demonstrates running a 7B RAG-instruct fine-tuned DRAGON model locally on a laptop"""
2+
"""This example demonstrates running a 7B RAG-instruct fine-tuned DRAGON model locally on a laptop.
3+
4+
This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
5+
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
6+
datasets library, which can be installed with:
7+
8+
`pip3 install datasets`
9+
10+
"""
11+
312

413
import time
514
from llmware.prompts import Prompt
6-
7-
from datasets import load_dataset
15+
from llmware.exceptions import LLMWareException
16+
from importlib import util
17+
if not util.find_spec("datasets"):
18+
raise LLMWareException(message="\nto run this example, you need to install HuggingFace datasets: "
19+
"`pip3 install datasets`")
20+
21+
try:
22+
from datasets import load_dataset
23+
except:
24+
raise LLMWareException(message="Exception: datasets not found and required for example.")
825

926

1027
# Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo

‎examples/Models/dragon_rag_benchmark_tests_huggingface.py‎

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11

22
""" This example demonstrates running a benchmarks set of tests against llmware DRAGON models
33
https://huggingface.co/collections/llmware/dragon-models-65552d7648093c3f6e35d1bf
4+
5+
This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
6+
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
7+
datasets library, which can be installed with:
8+
9+
`pip3 install datasets`
10+
411
"""
512

613
import time
@@ -11,7 +18,8 @@
1118
try:
1219
from datasets import load_dataset
1320
except ImportError:
14-
raise ImportError ("This example requires the 'datasets' Python package. You can install it with 'pip install datasets'")
21+
raise ImportError ("This example requires the 'datasets' Python package. "
22+
"You can install it with 'pip3 install datasets'")
1523

1624

1725
# Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo

‎examples/Models/dragon_rag_benchmark_tests_llmware.py‎

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,24 @@
33
https://huggingface.co/collections/llmware/dragon-models-65552d7648093c3f6e35d1bf
44
The model loading and interaction is handled with the llmware Prompt class which provides additional
55
capabilities like evidence checking
6+
7+
This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
8+
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
9+
datasets library, which can be installed with:
10+
11+
`pip3 install datasets`
12+
613
"""
714

815
import time
916
from llmware.prompts import Prompt
17+
1018
# The datasets package is not installed automatically by llmware
1119
try:
1220
from datasets import load_dataset
1321
except ImportError:
14-
raise ImportError ("This example requires the 'datasets' Python package. You can install it with 'pip install datasets'")
22+
raise ImportError ("This example requires the 'datasets' Python package. "
23+
"You can install it with 'pip3 install datasets'")
1524

1625

1726
# Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo

‎examples/Models/llmware_model_fast_start.py‎

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,29 @@
55
Usage: You can pass in a model name:
66
python llmware_model_fast_start.py llmware/bling-1b-0.1
77
If you do not specify a model you will be prompted to pick one
8+
9+
This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
10+
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
11+
datasets library, which can be installed with:
12+
13+
`pip3 install datasets`
14+
815
"""
916

1017
import re
1118
import sys
1219
import time
1320
import torch
14-
from datasets import load_dataset
1521
from huggingface_hub import hf_api, ModelFilter, ModelCard
16-
from tabulate import tabulate
1722
from transformers import AutoModelForCausalLM, AutoTokenizer
1823

24+
# The datasets package is not installed automatically by llmware
25+
try:
26+
from datasets import load_dataset
27+
except ImportError:
28+
raise ImportError ("This example requires the 'datasets' Python package. "
29+
"You can install it with 'pip3 install datasets'")
30+
1931

2032
# Query HuggingFace and get the llmware models. Return the the components of a table: headers and data
2133
def get_llmware_models():
@@ -38,13 +50,21 @@ def get_llmware_models():
3850

3951

4052
def print_llmware_models():
53+
4154
table_headers, table_data = get_llmware_models()
42-
print(tabulate(table_data, headers=table_headers, tablefmt="plain", numalign="right"))
55+
56+
print(table_headers[0], "\t\t", table_headers[1], "\t\t", table_headers[2])
57+
for row in table_data:
58+
print(row[0], "\t\t", row[1], "\t\t", row[2])
4359

4460

4561
def prompt_user_for_model_selection(prompt=None):
62+
4663
table_headers, table_data = get_llmware_models()
47-
print(tabulate(table_data, headers=table_headers, tablefmt="plain", numalign="right"))
64+
65+
print(table_headers[0], "\t\t", table_headers[1], "\t\t", table_headers[2])
66+
for row in table_data:
67+
print(row[0], "\t\t", row[1], "\t\t", row[2])
4868

4969
num_models = len(table_data)
5070

‎llmware/requirements.txt‎

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,20 @@
11
boto3==1.24.53
2-
datasets==2.15.0
3-
huggingface-hub==0.19.4
42
numpy>=1.23.2
53
openai>=1.0
64
pymongo>=4.7.0
7-
tabulate==0.9.0
8-
tokenizers>=0.15.0
95
torch>=1.13.1
106
transformers>=4.36.0
11-
word2number==1.1
127
Wikipedia-API==0.6.0
138
psycopg-binary==3.1.17
149
psycopg==3.1.17
1510
pgvector==0.2.4
1611
colorama==0.4.6
1712
einops==0.7.0
1813
librosa>=0.10.0
14+
word2number==1.1
1915

16+
tokenizers>=0.15.0
17+
huggingface-hub==0.19.4
2018
requests>=2.31.0
2119
tqdm~=4.66.1
2220
botocore~=1.27.96

‎setup.py‎

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,10 @@ def glob_fix(package_name, glob):
5454
zip_safe=True,
5555
install_requires=[
5656
'boto3==1.24.53',
57-
'datasets==2.15.0',
5857
'huggingface-hub==0.19.4',
5958
'numpy>=1.23.2',
6059
'openai>=1.0.0',
6160
'pymongo>=4.7.0',
62-
'tabulate==0.9.0',
6361
'tokenizers>=0.15.0',
6462
'torch>=1.13.1',
6563
'transformers>=4.36.0',

‎tests/models/test_prompt_benchmark_test.py‎

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,27 @@
11

22
"""This runs a benchmark test dataset against a series of prompts. It can be used to test any model type for
3-
longer running series of prompts, as well as the fact-checking capability. """
3+
longer running series of prompts, as well as the fact-checking capability.
4+
5+
This test uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
6+
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
7+
datasets library, which can be installed with:
8+
9+
`pip3 install datasets`
10+
"""
411

512

613
import time
714
import random
815

916
from llmware.prompts import Prompt
10-
from datasets import load_dataset
17+
18+
# The datasets package is not installed automatically by llmware
19+
try:
20+
from datasets import load_dataset
21+
except ImportError:
22+
raise ImportError ("This test requires the 'datasets' Python package. "
23+
"You can install it with 'pip3 install datasets'")
24+
1125

1226

1327
def load_rag_benchmark_tester_dataset():

0 commit comments

Comments
 (0)