|
39 | 39 |
|
40 | 40 | import time |
41 | 41 | import json |
42 | | -from werkzeug.utils import secure_filename |
43 | 42 | import os |
44 | 43 | from zipfile import ZipFile, ZIP_DEFLATED |
45 | 44 | import shutil |
@@ -555,7 +554,7 @@ def zip_extract_handler(self): |
555 | 554 | if success_code == 1: |
556 | 555 |
|
557 | 556 | # iterate thru all of the files found in the zip archive |
558 | | - # apply secure_filename and prep_filename |
| 557 | + # apply secure filename and prep filename |
559 | 558 | # route to the appropriate work folder, if applicable |
560 | 559 |
|
561 | 560 | for f in z.namelist(): |
@@ -2163,7 +2162,8 @@ def parse_pdf_by_ocr_images(self, input_fp, write_to_db=True, save_history=True, |
2163 | 2162 |
|
2164 | 2163 | ext = file.split(".")[-1] |
2165 | 2164 | if ext == "pdf": |
2166 | | - doc_fn = secure_filename(file) |
| 2165 | + |
| 2166 | + doc_fn = Utilities().secure_filename(file) |
2167 | 2167 |
|
2168 | 2168 | # get new doc_ID number |
2169 | 2169 | if write_to_db_on == 1: |
@@ -2417,7 +2417,7 @@ def parse_wiki(self, topic_list, write_to_db=True, save_history=False, target_re |
2417 | 2417 |
|
2418 | 2418 | for i, topic in enumerate(topic_list): |
2419 | 2419 |
|
2420 | | - fn = "wiki-topic-" + secure_filename(topic) + ".txt" |
| 2420 | + fn = "wiki-topic-" + Utilities().secure_filename(topic) + ".txt" |
2421 | 2421 |
|
2422 | 2422 | logging.info("update: parse_wiki - %s - %s", topic, fn) |
2423 | 2423 |
|
@@ -2961,8 +2961,8 @@ def parse_website(self, url_base, write_to_db=True, save_history=True, get_links |
2961 | 2961 |
|
2962 | 2962 | website_name = "my_website.html" |
2963 | 2963 |
|
2964 | | - # apply secure_filename to remove any extra "/" |
2965 | | - secure_url_name = secure_filename(website.url_main.split(".")[-2]) |
| 2964 | + # apply secure filename to remove any extra "/" |
| 2965 | + secure_url_name = Utilities().secure_filename(website.url_main.split(".")[-2]) |
2966 | 2966 |
|
2967 | 2967 | out_name = str(random.randint(100000, 999999)) + "_" + secure_url_name + ".html" |
2968 | 2968 |
|
@@ -3013,7 +3013,7 @@ def prep_filename(self, fn, secure_name=True, prepend_string=None, postpend_stri |
3013 | 3013 |
|
3014 | 3014 | # default - apply basic secure name, e.g., remove / and insert _ |
3015 | 3015 | if secure_name: |
3016 | | - fn_out= secure_filename(fn) |
| 3016 | + fn_out= Utilities().secure_filename(fn) |
3017 | 3017 |
|
3018 | 3018 | # if requested prepend or postpend |
3019 | 3019 | if prepend_string: |
@@ -3094,7 +3094,7 @@ def input_ingestion_comparison_from_parser_state (self, file_list): |
3094 | 3094 | found_file = -1 |
3095 | 3095 | for j, ingested_file in enumerate(doc_fn_out): |
3096 | 3096 |
|
3097 | | - # need to confirm 'symmetrical' transformations, e.g., secure_filename and any prepend/postpend |
| 3097 | + # need to confirm 'symmetrical' transformations, e.g., secure filename and any prepend/postpend |
3098 | 3098 | if input_file == ingested_file: |
3099 | 3099 | found_file = 1 |
3100 | 3100 | found_list.append(input_file) |
@@ -3357,7 +3357,7 @@ def parse_one_pdf_by_ocr_images(self, input_fp, input_fn, save_history=True): |
3357 | 3357 |
|
3358 | 3358 | if ext == "pdf": |
3359 | 3359 |
|
3360 | | - doc_fn = secure_filename(input_fn) |
| 3360 | + doc_fn = Utilities().secure_filename(input_fn) |
3361 | 3361 |
|
3362 | 3362 | output_by_page = ImageParser(self).process_pdf_by_ocr(input_fp, input_fn) |
3363 | 3363 |
|
@@ -3403,7 +3403,7 @@ def parse_one_image(self, input_fp, input_fn, save_history=True): |
3403 | 3403 |
|
3404 | 3404 | if ext in self.ocr_types: |
3405 | 3405 |
|
3406 | | - doc_fn = secure_filename(input_fn) |
| 3406 | + doc_fn = Utilities().secure_filename(input_fn) |
3407 | 3407 | ocr_output = ImageParser(self).process_ocr(input_fp, input_fn) |
3408 | 3408 |
|
3409 | 3409 | meta = {"author": "", "modified_date": "", "created_date": "", "creator_tool": ""} |
|
0 commit comments