1""" io on the clipboard """
2from __future__ import annotations
3
4from io import StringIO
5from typing import TYPE_CHECKING
6import warnings
7
8from pandas._libs import lib
9from pandas.util._exceptions import find_stack_level
10from pandas.util._validators import check_dtype_backend
11
12from pandas.core.dtypes.generic import ABCDataFrame
13
14from pandas import (
15 get_option,
16 option_context,
17)
18
19if TYPE_CHECKING:
20 from pandas._typing import DtypeBackend
21
22
23def read_clipboard(
24 sep: str = r"\s+",
25 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
26 **kwargs,
27): # pragma: no cover
28 r"""
29 Read text from clipboard and pass to :func:`~pandas.read_csv`.
30
31 Parses clipboard contents similar to how CSV files are parsed
32 using :func:`~pandas.read_csv`.
33
34 Parameters
35 ----------
36 sep : str, default '\\s+'
37 A string or regex delimiter. The default of ``'\\s+'`` denotes
38 one or more whitespace characters.
39
40 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
41 Back-end data type applied to the resultant :class:`DataFrame`
42 (still experimental). Behaviour is as follows:
43
44 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
45 (default).
46 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
47 DataFrame.
48
49 .. versionadded:: 2.0
50
51 **kwargs
52 See :func:`~pandas.read_csv` for the full argument list.
53
54 Returns
55 -------
56 DataFrame
57 A parsed :class:`~pandas.DataFrame` object.
58
59 See Also
60 --------
61 DataFrame.to_clipboard : Copy object to the system clipboard.
62 read_csv : Read a comma-separated values (csv) file into DataFrame.
63 read_fwf : Read a table of fixed-width formatted lines into DataFrame.
64
65 Examples
66 --------
67 >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
68 >>> df.to_clipboard() # doctest: +SKIP
69 >>> pd.read_clipboard() # doctest: +SKIP
70 A B C
71 0 1 2 3
72 1 4 5 6
73 """
74 encoding = kwargs.pop("encoding", "utf-8")
75
76 # only utf-8 is valid for passed value because that's what clipboard
77 # supports
78 if encoding is not None and encoding.lower().replace("-", "") != "utf8":
79 raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
80
81 check_dtype_backend(dtype_backend)
82
83 from pandas.io.clipboard import clipboard_get
84 from pandas.io.parsers import read_csv
85
86 text = clipboard_get()
87
88 # Try to decode (if needed, as "text" might already be a string here).
89 try:
90 text = text.decode(kwargs.get("encoding") or get_option("display.encoding"))
91 except AttributeError:
92 pass
93
94 # Excel copies into clipboard with \t separation
95 # inspect no more then the 10 first lines, if they
96 # all contain an equal number (>0) of tabs, infer
97 # that this came from excel and set 'sep' accordingly
98 lines = text[:10000].split("\n")[:-1][:10]
99
100 # Need to remove leading white space, since read_csv
101 # accepts:
102 # a b
103 # 0 1 2
104 # 1 3 4
105
106 counts = {x.lstrip(" ").count("\t") for x in lines}
107 if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
108 sep = "\t"
109 # check the number of leading tabs in the first line
110 # to account for index columns
111 index_length = len(lines[0]) - len(lines[0].lstrip(" \t"))
112 if index_length != 0:
113 kwargs.setdefault("index_col", list(range(index_length)))
114
115 # Edge case where sep is specified to be None, return to default
116 if sep is None and kwargs.get("delim_whitespace") is None:
117 sep = r"\s+"
118
119 # Regex separator currently only works with python engine.
120 # Default to python if separator is multi-character (regex)
121 if len(sep) > 1 and kwargs.get("engine") is None:
122 kwargs["engine"] = "python"
123 elif len(sep) > 1 and kwargs.get("engine") == "c":
124 warnings.warn(
125 "read_clipboard with regex separator does not work properly with c engine.",
126 stacklevel=find_stack_level(),
127 )
128
129 return read_csv(StringIO(text), sep=sep, dtype_backend=dtype_backend, **kwargs)
130
131
132def to_clipboard(
133 obj, excel: bool | None = True, sep: str | None = None, **kwargs
134) -> None: # pragma: no cover
135 """
136 Attempt to write text representation of object to the system clipboard
137 The clipboard can be then pasted into Excel for example.
138
139 Parameters
140 ----------
141 obj : the object to write to the clipboard
142 excel : bool, defaults to True
143 if True, use the provided separator, writing in a csv
144 format for allowing easy pasting into excel.
145 if False, write a string representation of the object
146 to the clipboard
147 sep : optional, defaults to tab
148 other keywords are passed to to_csv
149
150 Notes
151 -----
152 Requirements for your platform
153 - Linux: xclip, or xsel (with PyQt4 modules)
154 - Windows:
155 - OS X:
156 """
157 encoding = kwargs.pop("encoding", "utf-8")
158
159 # testing if an invalid encoding is passed to clipboard
160 if encoding is not None and encoding.lower().replace("-", "") != "utf8":
161 raise ValueError("clipboard only supports utf-8 encoding")
162
163 from pandas.io.clipboard import clipboard_set
164
165 if excel is None:
166 excel = True
167
168 if excel:
169 try:
170 if sep is None:
171 sep = "\t"
172 buf = StringIO()
173
174 # clipboard_set (pyperclip) expects unicode
175 obj.to_csv(buf, sep=sep, encoding="utf-8", **kwargs)
176 text = buf.getvalue()
177
178 clipboard_set(text)
179 return
180 except TypeError:
181 warnings.warn(
182 "to_clipboard in excel mode requires a single character separator.",
183 stacklevel=find_stack_level(),
184 )
185 elif sep is not None:
186 warnings.warn(
187 "to_clipboard with excel=False ignores the sep argument.",
188 stacklevel=find_stack_level(),
189 )
190
191 if isinstance(obj, ABCDataFrame):
192 # str(df) has various unhelpful defaults, like truncation
193 with option_context("display.max_colwidth", None):
194 objstr = obj.to_string(**kwargs)
195 else:
196 objstr = str(obj)
197 clipboard_set(objstr)