1"""Utility for calling pandoc"""
2# Copyright (c) IPython Development Team.
3# Distributed under the terms of the Modified BSD License.
4
5import re
6import shutil
7import subprocess
8import warnings
9from io import BytesIO, TextIOWrapper
10
11from nbconvert.utils.version import check_version
12
13from .exceptions import ConversionException
14
15_minimal_version = "2.9.2"
16_maximal_version = "4.0.0"
17
18
19def pandoc(source, fmt, to, extra_args=None, encoding="utf-8"):
20 """Convert an input string using pandoc.
21
22 Pandoc converts an input string `from` a format `to` a target format.
23
24 Parameters
25 ----------
26 source : string
27 Input string, assumed to be valid format `from`.
28 fmt : string
29 The name of the input format (markdown, etc.)
30 to : string
31 The name of the output format (html, etc.)
32
33 Returns
34 -------
35 out : unicode
36 Output as returned by pandoc.
37
38 Raises
39 ------
40 PandocMissing
41 If pandoc is not installed.
42 Any error messages generated by pandoc are printed to stderr.
43
44 """
45 cmd = ["pandoc", "-f", fmt, "-t", to]
46 if extra_args:
47 cmd.extend(extra_args)
48
49 # this will raise an exception that will pop us out of here
50 check_pandoc_version()
51
52 # we can safely continue
53 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) # noqa: S603
54 out, _ = p.communicate(source.encode())
55 out_str = TextIOWrapper(BytesIO(out), encoding, "replace").read()
56 return out_str.rstrip("\n")
57
58
59def get_pandoc_version():
60 """Gets the Pandoc version if Pandoc is installed.
61
62 If the minimal version is not met, it will probe Pandoc for its version, cache it and return that value.
63 If the minimal version is met, it will return the cached version and stop probing Pandoc
64 (unless `clean_cache()` is called).
65
66 Raises
67 ------
68 PandocMissing
69 If pandoc is unavailable.
70 """
71 global __version # noqa: PLW0603
72
73 if __version is None:
74 if not shutil.which("pandoc"):
75 raise PandocMissing()
76
77 out = subprocess.check_output(["pandoc", "-v"]) # noqa: S607, S603
78 out_lines = out.splitlines()
79 version_pattern = re.compile(r"^\d+(\.\d+){1,}$")
80 for tok in out_lines[0].decode("ascii", "replace").split():
81 if version_pattern.match(tok):
82 __version = tok # type:ignore[assignment]
83 break
84 return __version
85
86
87def check_pandoc_version():
88 """Returns True if pandoc's version meets at least minimal version.
89
90 Raises
91 ------
92 PandocMissing
93 If pandoc is unavailable.
94 """
95 if check_pandoc_version._cached is not None: # type:ignore[attr-defined]
96 return check_pandoc_version._cached # type:ignore[attr-defined]
97
98 v = get_pandoc_version()
99 if v is None:
100 warnings.warn(
101 "Sorry, we cannot determine the version of pandoc.\n"
102 "Please consider reporting this issue and include the"
103 "output of pandoc --version.\nContinuing...",
104 RuntimeWarning,
105 stacklevel=2,
106 )
107 return False
108 ok = check_version(v, _minimal_version, max_v=_maximal_version)
109 check_pandoc_version._cached = ok # type:ignore[attr-defined]
110 if not ok:
111 warnings.warn(
112 "You are using an unsupported version of pandoc (%s).\n" % v
113 + "Your version must be at least (%s) " % _minimal_version
114 + "but less than (%s).\n" % _maximal_version
115 + "Refer to https://pandoc.org/installing.html.\nContinuing with doubts...",
116 RuntimeWarning,
117 stacklevel=2,
118 )
119 return ok
120
121
122check_pandoc_version._cached = None # type:ignore[attr-defined]
123
124# -----------------------------------------------------------------------------
125# Exception handling
126# -----------------------------------------------------------------------------
127
128
129class PandocMissing(ConversionException):
130 """Exception raised when Pandoc is missing."""
131
132 def __init__(self, *args, **kwargs):
133 """Initialize the exception."""
134 super().__init__(
135 "Pandoc wasn't found.\n"
136 "Please check that pandoc is installed:\n"
137 "https://pandoc.org/installing.html"
138 )
139
140
141# -----------------------------------------------------------------------------
142# Internal state management
143# -----------------------------------------------------------------------------
144def clean_cache():
145 """Clean the internal cache."""
146 global __version # noqa: PLW0603
147 __version = None
148
149
150__version = None