Skip to content

Commit bca2297

Browse files
authored
Merge pull request llmware-ai#701 from llmware-ai/remove-werkzeug-utils-dependency
removing werkzeug.utils dependency
2 parents 7d1eb1b + 3e409a5 commit bca2297

File tree

6 files changed

+43
-18
lines changed

6 files changed

+43
-18
lines changed

‎llmware/library.py‎

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
updating, and other tasks pertaining to Libraries via the Library Card.
2121
"""
2222

23-
from werkzeug.utils import secure_filename
2423
import shutil
2524
import os
2625
import json
@@ -141,7 +140,7 @@ def create_new_library(self, library_name, account_name="llmware"):
141140
self.account_name = account_name
142141

143142
# apply safety check to library_name path
144-
library_name = secure_filename(library_name)
143+
library_name = Utilities().secure_filename(library_name)
145144

146145
library_exists = self.check_if_library_exists(library_name,account_name)
147146

@@ -150,6 +149,9 @@ def create_new_library(self, library_name, account_name="llmware"):
150149
logging.info("update: library already exists - returning library - %s - %s ", library_name, account_name)
151150
return self.load_library(library_name, account_name)
152151

152+
# assign self.library_name to the 'safe' library_name
153+
self.library_name = library_name
154+
153155
# allow 'dynamic' creation of a new account path
154156
account_path = os.path.join(LLMWareConfig.get_library_path(), account_name)
155157
if not os.path.exists(account_path):

‎llmware/parsers.py‎

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939

4040
import time
4141
import json
42-
from werkzeug.utils import secure_filename
4342
import os
4443
from zipfile import ZipFile, ZIP_DEFLATED
4544
import shutil
@@ -555,7 +554,7 @@ def zip_extract_handler(self):
555554
if success_code == 1:
556555

557556
# iterate thru all of the files found in the zip archive
558-
# apply secure_filename and prep_filename
557+
# apply secure filename and prep filename
559558
# route to the appropriate work folder, if applicable
560559

561560
for f in z.namelist():
@@ -2163,7 +2162,8 @@ def parse_pdf_by_ocr_images(self, input_fp, write_to_db=True, save_history=True,
21632162

21642163
ext = file.split(".")[-1]
21652164
if ext == "pdf":
2166-
doc_fn = secure_filename(file)
2165+
2166+
doc_fn = Utilities().secure_filename(file)
21672167

21682168
# get new doc_ID number
21692169
if write_to_db_on == 1:
@@ -2417,7 +2417,7 @@ def parse_wiki(self, topic_list, write_to_db=True, save_history=False, target_re
24172417

24182418
for i, topic in enumerate(topic_list):
24192419

2420-
fn = "wiki-topic-" + secure_filename(topic) + ".txt"
2420+
fn = "wiki-topic-" + Utilities().secure_filename(topic) + ".txt"
24212421

24222422
logging.info("update: parse_wiki - %s - %s", topic, fn)
24232423

@@ -2961,8 +2961,8 @@ def parse_website(self, url_base, write_to_db=True, save_history=True, get_links
29612961

29622962
website_name = "my_website.html"
29632963

2964-
# apply secure_filename to remove any extra "/"
2965-
secure_url_name = secure_filename(website.url_main.split(".")[-2])
2964+
# apply secure filename to remove any extra "/"
2965+
secure_url_name = Utilities().secure_filename(website.url_main.split(".")[-2])
29662966

29672967
out_name = str(random.randint(100000, 999999)) + "_" + secure_url_name + ".html"
29682968

@@ -3013,7 +3013,7 @@ def prep_filename(self, fn, secure_name=True, prepend_string=None, postpend_stri
30133013

30143014
# default - apply basic secure name, e.g., remove / and insert _
30153015
if secure_name:
3016-
fn_out= secure_filename(fn)
3016+
fn_out= Utilities().secure_filename(fn)
30173017

30183018
# if requested prepend or postpend
30193019
if prepend_string:
@@ -3094,7 +3094,7 @@ def input_ingestion_comparison_from_parser_state (self, file_list):
30943094
found_file = -1
30953095
for j, ingested_file in enumerate(doc_fn_out):
30963096

3097-
# need to confirm 'symmetrical' transformations, e.g., secure_filename and any prepend/postpend
3097+
# need to confirm 'symmetrical' transformations, e.g., secure filename and any prepend/postpend
30983098
if input_file == ingested_file:
30993099
found_file = 1
31003100
found_list.append(input_file)
@@ -3357,7 +3357,7 @@ def parse_one_pdf_by_ocr_images(self, input_fp, input_fn, save_history=True):
33573357

33583358
if ext == "pdf":
33593359

3360-
doc_fn = secure_filename(input_fn)
3360+
doc_fn = Utilities().secure_filename(input_fn)
33613361

33623362
output_by_page = ImageParser(self).process_pdf_by_ocr(input_fp, input_fn)
33633363

@@ -3403,7 +3403,7 @@ def parse_one_image(self, input_fp, input_fn, save_history=True):
34033403

34043404
if ext in self.ocr_types:
34053405

3406-
doc_fn = secure_filename(input_fn)
3406+
doc_fn = Utilities().secure_filename(input_fn)
34073407
ocr_output = ImageParser(self).process_ocr(input_fp, input_fn)
34083408

34093409
meta = {"author": "", "modified_date": "", "created_date": "", "creator_tool": ""}

‎llmware/requirements.txt‎

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ tabulate==0.9.0
1414
tokenizers>=0.15.0
1515
torch>=1.13.1
1616
transformers>=4.36.0
17-
Werkzeug==3.0.1
1817
word2number==1.1
1918
Wikipedia-API==0.6.0
2019
yfinance>=0.2.28

‎llmware/resources.py‎

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@
4545
except ImportError:
4646
pass
4747

48-
from werkzeug.utils import secure_filename
49-
5048
from llmware.configs import LLMWareConfig, PostgresConfig, LLMWareTableSchema, SQLiteConfig, AWSS3Config
5149

5250
from llmware.exceptions import LLMWareException, UnsupportedCollectionDatabaseException, InvalidNameException
@@ -4248,7 +4246,15 @@ def connect_to_user_s3_bucket (self, aws_access_key, aws_secret_key,
42484246
files = bucket.objects.all()
42494247

42504248
for file in files:
4251-
f = secure_filename(file.key)
4249+
4250+
# strip os.sep from file name
4251+
safe_file_name = str(file.key)
4252+
if safe_file_name.startswith(os.sep):
4253+
safe_file_name = safe_file_name[1:]
4254+
4255+
f = safe_file_name.replace(os.sep, "_")
4256+
f = f.replace(" ", "_")
4257+
42524258
file_type = f.split(".")[-1].lower()
42534259
if file_type in accepted_file_formats:
42544260
s3.download_file(user_bucket_name, file.key, local_download_path + f)

‎llmware/util.py‎

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,23 @@ def convert_media_file_to_wav(self, path_to_file_to_convert, save_path=None, fil
833833

834834
return outfile_path
835835

836+
def secure_filename(self, fn):
837+
838+
""" New utility method to remove os.sep from proposed filenames. """
839+
840+
# strip os.sep from file name
841+
safe_file_name = str(fn)
842+
if safe_file_name.startswith(os.sep):
843+
safe_file_name = safe_file_name[1:]
844+
845+
# removes os separator
846+
secure_fn = safe_file_name.replace(os.sep, "_")
847+
848+
# converts spaces into underscores
849+
secure_fn = secure_fn.replace(" ", "_")
850+
851+
return secure_fn
852+
836853

837854
class CorpTokenizer:
838855

‎setup.py‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,10 @@ def glob_fix(package_name, glob):
4444
"License :: OSI Approved :: Apache Software License",
4545
"Programming Language :: Python :: 3.9",
4646
"Programming Language :: Python :: 3.10",
47+
"Programming Language :: Python :: 3.11",
48+
"Programming Language :: Python :: 3.12"
4749
],
48-
keywords="ai,data,development", # Optional
50+
keywords="ai,llm,rag,data,development", # Optional
4951
packages=['llmware'],
5052
package_data={'llmware': ['*.c', '*.so', '*.dylib', '.dylibs/*', *glob_fix('llmware', 'lib/**/*')], 'llmware.libs': ['*']},
5153
python_requires=">=3.9",
@@ -67,7 +69,6 @@ def glob_fix(package_name, glob):
6769
'tokenizers>=0.15.0',
6870
'torch>=1.13.1',
6971
'transformers>=4.36.0',
70-
'Werkzeug==3.0.1',
7172
'word2number==1.1',
7273
'Wikipedia-API==0.6.0',
7374
'yfinance>=0.2.28',

0 commit comments

Comments
 (0)