Skip to content

Commit 0987d3c

Browse files
hey-thanksNicklasTegner
andauthored
Added pathlib support. (JessicaTegner#289)
* Added pathlib support. * Added pathlib types to convert_file signature. * Added pathlib support for outputfile, created test, and updated README.md. * Documented pathlib.Path support for outputfile parameter in convert_file and convert_text docstrings. * force update of pip and setuptool when building Co-authored-by: NicklasTegner <NicklasMCHD@live.dk>
1 parent 4f720ba commit 0987d3c

File tree

4 files changed

+96
-22
lines changed

4 files changed

+96
-22
lines changed

‎.travis.yml‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ addons:
4040
- lmodern
4141

4242
before_install:
43+
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then python -m pip install -U pip setuptools; fi
4344
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then python -m pip install poetry; fi
4445
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then sysctl -n machdep.cpu.brand_string; fi
46+
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then python3 -m pip install -U pip setuptools; fi
4547
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then python3 -m pip install poetry; fi
4648
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install basictex; fi
4749
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then eval "$(/usr/libexec/path_helper)"; fi

‎README.md‎

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,25 @@ pypandoc.convert_file(['D:/book_front.md', 'book2/*.md'], 'docx', outputfile="so
163163
```
164164

165165

166+
pathlib is also supported.
167+
168+
```python
169+
import pypandoc
170+
from pathlib import Path
171+
172+
# single file
173+
input = Path('somefile.md')
174+
output = input.with_suffix('.docx')
175+
pypandoc.convert_file(input, 'docx', outputfile=output)
176+
177+
# convert all markdown files in a chapters/ subdirectory.
178+
pypandoc.convert_file(Path('chapters').glob('*.md'), 'docx', outputfile="somefile.docx")
179+
180+
# convert all markdown files in the book1 and book2 directories.
181+
pypandoc.convert_file([*Path('book1').glob('*.md'), *Path('book2').glob('*.md')], 'docx', outputfile="somefile.docx")
182+
# pathlib globs must be unpacked if they are inside lists.
183+
```
184+
166185
In addition to `format`, it is possible to pass `extra_args`.
167186
That makes it possible to access various pandoc options easily.
168187

@@ -286,6 +305,7 @@ Note that for citeproc tests to pass you'll need to have [pandoc-citeproc](https
286305
* [Kolen Cheung](https://github.com/ickc) - Implement `_get_pandoc_urls` for installing arbitrary version as well as the latest version of pandoc. Minor: README, Travis, setup.py.
287306
* [Rebecca Heineman](https://github.com/burgerbecky) - Added scanning code for finding pandoc in Windows
288307
* [Andrew Barraford](https://github.com/abarrafo) - Download destination.
308+
* [Alex Kneisel](https://github.com/hey-thanks/) - Added pathlib.Path support to convert_file.
289309

290310
## License
291311

‎pypandoc/__init__.py‎

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import absolute_import, print_function, with_statement
33
from typing import Iterable
44
from typing import Union
5+
from typing import Generator
56

67
import logging
78
import os
@@ -11,6 +12,7 @@
1112
import tempfile
1213
import textwrap
1314
import glob
15+
from pathlib import Path
1416

1517
from .handler import _check_log_handler
1618
from .pandoc_download import DEFAULT_TARGET_FOLDER, download_pandoc
@@ -52,7 +54,7 @@
5254
logger = logging.getLogger(__name__)
5355

5456
def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encoding:str='utf-8',
55-
outputfile:Union[None, str]=None, filters:Union[Iterable, None]=None, verify_format:bool=True,
57+
outputfile:Union[None, str, Path]=None, filters:Union[Iterable, None]=None, verify_format:bool=True,
5658
sandbox:bool=True, cworkdir:Union[str, None]=None) -> str:
5759
"""Converts given `source` from `format` to `to`.
5860
@@ -68,8 +70,9 @@ def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encodin
6870
6971
:param str encoding: the encoding of the input bytes (Default value = 'utf-8')
7072
71-
:param str outputfile: output will be written to outfilename or the converted content
72-
returned if None (Default value = None)
73+
:param str outputfile: output will be written to outputfile or the converted content
74+
returned if None. The output filename can be specified as a string
75+
or pathlib.Path object. (Default value = None)
7376
7477
:param list filters: pandoc filters e.g. filters=['pandoc-citeproc']
7578
@@ -93,12 +96,17 @@ def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encodin
9396
cworkdir=cworkdir)
9497

9598

96-
def convert_file(source_file:Union[list, str], to:str, format:Union[str, None]=None, extra_args:Iterable=(), encoding:str='utf-8',
97-
outputfile:Union[None, str]=None, filters:Union[Iterable, None]=None, verify_format:bool=True,
98-
sandbox:bool=True, cworkdir:Union[str, None]=None) -> str:
99+
def convert_file(source_file:Union[list, str, Path, Generator], to:str, format:Union[str, None]=None,
100+
extra_args:Iterable=(), encoding:str='utf-8', outputfile:Union[None, str, Path]=None,
101+
filters:Union[Iterable, None]=None, verify_format:bool=True, sandbox:bool=True,
102+
cworkdir:Union[str, None]=None) -> str:
99103
"""Converts given `source` from `format` to `to`.
100104
101-
:param (str, list) source_file: Either a full file path, relative file path, a file patterh (like dir/*.md), or a list if file or file patterns.
105+
:param (str, list, pathlib.Path) source_file: If a string, should be either
106+
an absolute file path, relative file path, or a file pattern (like dir/*.md).
107+
If a list, should be a list of file paths, file patterns, or pathlib.Path
108+
objects. In addition, pathlib.Path objects as well as the generators produced by
109+
pathlib.Path.glob may be specified.
102110
103111
:param str to: format into which the input should be converted; can be one of
104112
`pypandoc.get_pandoc_formats()[1]`
@@ -112,8 +120,9 @@ def convert_file(source_file:Union[list, str], to:str, format:Union[str, None]=N
112120
113121
:param str encoding: the encoding of the file or the input bytes (Default value = 'utf-8')
114122
115-
:param str outputfile: output will be written to outfilename or the converted content
116-
returned if None (Default value = None)
123+
:param str outputfile: output will be written to outputfile or the converted content
124+
returned if None. The output filename can be specified as a string
125+
or pathlib.Path object. (Default value = None)
117126
118127
:param list filters: pandoc filters e.g. filters=['pandoc-citeproc']
119128
@@ -130,6 +139,14 @@ def convert_file(source_file:Union[list, str], to:str, format:Union[str, None]=N
130139
:raises OSError: if pandoc is not found; make sure it has been installed and is available at
131140
path.
132141
"""
142+
# This if block effectively adds support for pathlib.Path objects
143+
# and generators produced by pathlib.Path().glob().
144+
if not isinstance(source_file, str):
145+
try:
146+
source_file = list(map(str, source_file))
147+
except TypeError:
148+
source_file = str(source_file)
149+
133150
if not _identify_path(source_file):
134151
raise RuntimeError("source_file is not a valid path")
135152
if _is_network_path(source_file): # if the source_file is an url
@@ -145,18 +162,15 @@ def convert_file(source_file:Union[list, str], to:str, format:Union[str, None]=N
145162
if isinstance(source_file, list): # a list of possibly file or file patterns. Expand all with glob
146163
for filepath in source_file:
147164
discovered_source_files.extend(glob.glob(filepath))
148-
if len(discovered_source_files) == 1: # behavior for a single file or a pattern
149-
format = _identify_format_from_path(discovered_source_files[0], format)
150-
return _convert_input(discovered_source_files[0], format, 'path', to, extra_args=extra_args,
151-
outputfile=outputfile, filters=filters,
152-
verify_format=verify_format, sandbox=sandbox,
153-
cworkdir=cworkdir)
154-
else: # behavior for multiple files or file patterns
155-
format = _identify_format_from_path(discovered_source_files[0], format)
156-
return _convert_input(discovered_source_files, format, 'path', to, extra_args=extra_args,
157-
outputfile=outputfile, filters=filters,
158-
verify_format=verify_format, sandbox=sandbox,
159-
cworkdir=cworkdir)
165+
166+
format = _identify_format_from_path(discovered_source_files[0], format)
167+
if len(discovered_source_files) == 1:
168+
discovered_source_files = discovered_source_files[0]
169+
170+
return _convert_input(discovered_source_files, format, 'path', to, extra_args=extra_args,
171+
outputfile=outputfile, filters=filters,
172+
verify_format=verify_format, sandbox=sandbox,
173+
cworkdir=cworkdir)
160174

161175

162176
def _identify_path(source) -> bool:
@@ -330,7 +344,7 @@ def _convert_input(source, format, input_type, to, extra_args=(),
330344
args += input_file
331345

332346
if outputfile:
333-
args.append("--output=" + outputfile)
347+
args.append("--output=" + str(outputfile))
334348

335349
if sandbox:
336350
if ensure_pandoc_minimal_version(2,15): # sandbox was introduced in pandoc 2.15, so only add if we are using 2.15 or above.

‎tests.py‎

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import tempfile
1313
import unittest
1414
import warnings
15+
from pathlib import Path
1516

1617
import pypandoc
1718
from pypandoc.py3compat import path2url, string_types, unicode_type
@@ -508,6 +509,43 @@ def create_sample_lua(self):
508509
out, err = p.communicate()
509510
return out.decode('utf-8')
510511

512+
def test_basic_conversion_from_file_pathlib(self):
513+
with closed_tempfile('.md', text='# some title\n') as file_name:
514+
expected = u'some title{0}=========={0}{0}'.format(os.linesep)
515+
received_from_str_filename_input = pypandoc.convert_file(file_name, 'rst')
516+
received_from_path_filename_input = pypandoc.convert_file(Path(file_name), 'rst')
517+
self.assertEqualExceptForNewlineEnd(expected, received_from_str_filename_input)
518+
self.assertEqualExceptForNewlineEnd(expected, received_from_path_filename_input)
519+
520+
def test_basic_conversion_from_multiple_files_pathlib(self):
521+
with closed_tempfile('.md', text='some title') as file_name1:
522+
with closed_tempfile('.md', text='some title') as file_name2:
523+
expected = '<p>some title</p>\n<p>some title</p>'
524+
received_from_str_filename_input = pypandoc.convert_file([file_name1, file_name2], 'html')
525+
received_from_path_filename_input = pypandoc.convert_file([Path(file_name1), Path(file_name2)], 'html')
526+
self.assertEqualExceptForNewlineEnd(expected, received_from_str_filename_input)
527+
self.assertEqualExceptForNewlineEnd(expected, received_from_path_filename_input)
528+
529+
def test_basic_conversion_from_file_pattern_pathlib_glob(self):
530+
received_from_str_filename_input = pypandoc.convert_file("./*.md", 'html').lower()
531+
received_from_path_filename_input = pypandoc.convert_file(Path(".").glob("*.md"), 'html').lower()
532+
assert received_from_str_filename_input == received_from_path_filename_input
533+
534+
def test_basic_conversion_from_file_pattern_with_input_list_pathlib_glob(self):
535+
received_from_str_filename_input = pypandoc.convert_file(["./*.md", "./*.md"], 'html').lower()
536+
received_from_path_filename_input = pypandoc.convert_file([*Path(".").glob("*.md"), *Path(".").glob("*.md")],
537+
'html').lower()
538+
assert received_from_str_filename_input == received_from_path_filename_input
539+
540+
def test_basic_conversion_to_pathlib_file(self):
541+
with closed_tempfile('.rst', ) as file_name:
542+
expected = u'some title{0}=========={0}{0}'.format(os.linesep)
543+
received = pypandoc.convert_text('# some title\n', to='rst', format='md', outputfile=Path(file_name))
544+
self.assertEqualExceptForNewlineEnd("", received)
545+
with io.open(file_name) as f:
546+
written = f.read()
547+
self.assertEqualExceptForNewlineEnd(expected, written)
548+
511549
def assertEqualExceptForNewlineEnd(self, expected, received): # noqa
512550
# output written to a file does not seem to have os.linesep
513551
# handle everything here by replacing the os linesep by a simple \n

0 commit comments

Comments
 (0)