1""" io on the clipboard """
2from __future__ import annotations
3
4from io import StringIO
5from typing import TYPE_CHECKING
6import warnings
7
8from pandas._libs import lib
9from pandas.util._exceptions import find_stack_level
10from pandas.util._validators import check_dtype_backend
11
12from pandas.core.dtypes.generic import ABCDataFrame
13
14from pandas import (
15 get_option,
16 option_context,
17)
18
19if TYPE_CHECKING:
20 from pandas._typing import DtypeBackend
21
22
23def read_clipboard(
24 sep: str = r"\s+",
25 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
26 **kwargs,
27): # pragma: no cover
28 r"""
29 Read text from clipboard and pass to read_csv.
30
31 Parameters
32 ----------
33 sep : str, default '\s+'
34 A string or regex delimiter. The default of '\s+' denotes
35 one or more whitespace characters.
36
37 dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
38 Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
39 arrays, nullable dtypes are used for all dtypes that have a nullable
40 implementation when "numpy_nullable" is set, pyarrow is used for all
41 dtypes if "pyarrow" is set.
42
43 The dtype_backends are still experimential.
44
45 .. versionadded:: 2.0
46
47 **kwargs
48 See read_csv for the full argument list.
49
50 Returns
51 -------
52 DataFrame
53 A parsed DataFrame object.
54 """
55 encoding = kwargs.pop("encoding", "utf-8")
56
57 # only utf-8 is valid for passed value because that's what clipboard
58 # supports
59 if encoding is not None and encoding.lower().replace("-", "") != "utf8":
60 raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
61
62 check_dtype_backend(dtype_backend)
63
64 from pandas.io.clipboard import clipboard_get
65 from pandas.io.parsers import read_csv
66
67 text = clipboard_get()
68
69 # Try to decode (if needed, as "text" might already be a string here).
70 try:
71 text = text.decode(kwargs.get("encoding") or get_option("display.encoding"))
72 except AttributeError:
73 pass
74
75 # Excel copies into clipboard with \t separation
76 # inspect no more then the 10 first lines, if they
77 # all contain an equal number (>0) of tabs, infer
78 # that this came from excel and set 'sep' accordingly
79 lines = text[:10000].split("\n")[:-1][:10]
80
81 # Need to remove leading white space, since read_csv
82 # accepts:
83 # a b
84 # 0 1 2
85 # 1 3 4
86
87 counts = {x.lstrip(" ").count("\t") for x in lines}
88 if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
89 sep = "\t"
90 # check the number of leading tabs in the first line
91 # to account for index columns
92 index_length = len(lines[0]) - len(lines[0].lstrip(" \t"))
93 if index_length != 0:
94 kwargs.setdefault("index_col", list(range(index_length)))
95
96 # Edge case where sep is specified to be None, return to default
97 if sep is None and kwargs.get("delim_whitespace") is None:
98 sep = r"\s+"
99
100 # Regex separator currently only works with python engine.
101 # Default to python if separator is multi-character (regex)
102 if len(sep) > 1 and kwargs.get("engine") is None:
103 kwargs["engine"] = "python"
104 elif len(sep) > 1 and kwargs.get("engine") == "c":
105 warnings.warn(
106 "read_clipboard with regex separator does not work properly with c engine.",
107 stacklevel=find_stack_level(),
108 )
109
110 return read_csv(StringIO(text), sep=sep, dtype_backend=dtype_backend, **kwargs)
111
112
113def to_clipboard(
114 obj, excel: bool | None = True, sep: str | None = None, **kwargs
115) -> None: # pragma: no cover
116 """
117 Attempt to write text representation of object to the system clipboard
118 The clipboard can be then pasted into Excel for example.
119
120 Parameters
121 ----------
122 obj : the object to write to the clipboard
123 excel : bool, defaults to True
124 if True, use the provided separator, writing in a csv
125 format for allowing easy pasting into excel.
126 if False, write a string representation of the object
127 to the clipboard
128 sep : optional, defaults to tab
129 other keywords are passed to to_csv
130
131 Notes
132 -----
133 Requirements for your platform
134 - Linux: xclip, or xsel (with PyQt4 modules)
135 - Windows:
136 - OS X:
137 """
138 encoding = kwargs.pop("encoding", "utf-8")
139
140 # testing if an invalid encoding is passed to clipboard
141 if encoding is not None and encoding.lower().replace("-", "") != "utf8":
142 raise ValueError("clipboard only supports utf-8 encoding")
143
144 from pandas.io.clipboard import clipboard_set
145
146 if excel is None:
147 excel = True
148
149 if excel:
150 try:
151 if sep is None:
152 sep = "\t"
153 buf = StringIO()
154
155 # clipboard_set (pyperclip) expects unicode
156 obj.to_csv(buf, sep=sep, encoding="utf-8", **kwargs)
157 text = buf.getvalue()
158
159 clipboard_set(text)
160 return
161 except TypeError:
162 warnings.warn(
163 "to_clipboard in excel mode requires a single character separator.",
164 stacklevel=find_stack_level(),
165 )
166 elif sep is not None:
167 warnings.warn(
168 "to_clipboard with excel=False ignores the sep argument.",
169 stacklevel=find_stack_level(),
170 )
171
172 if isinstance(obj, ABCDataFrame):
173 # str(df) has various unhelpful defaults, like truncation
174 with option_context("display.max_colwidth", None):
175 objstr = obj.to_string(**kwargs)
176 else:
177 objstr = str(obj)
178 clipboard_set(objstr)