Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pyarrow/__init__.py: 24%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
18# flake8: noqa
20"""
21PyArrow is the python implementation of Apache Arrow.
23Apache Arrow is a cross-language development platform for in-memory data.
24It specifies a standardized language-independent columnar memory format for
25flat and hierarchical data, organized for efficient analytic operations on
26modern hardware. It also provides computational libraries and zero-copy
27streaming messaging and interprocess communication.
29For more information see the official page at https://arrow.apache.org
30"""
32import gc as _gc
33import importlib as _importlib
34import os as _os
35import platform as _platform
36import sys as _sys
37import warnings as _warnings
39try:
40 from ._generated_version import version as __version__
41except ImportError:
42 # Package is not installed, parse git tag at runtime
43 try:
44 import setuptools_scm
45 # Code duplicated from setup.py to avoid a dependency on each other
47 def parse_git(root, **kwargs):
48 """
49 Parse function for setuptools_scm that ignores tags for non-C++
50 subprojects, e.g. apache-arrow-js-XXX tags.
51 """
52 from setuptools_scm.git import parse
53 kwargs['describe_command'] = \
54 "git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'"
55 return parse(root, **kwargs)
56 __version__ = setuptools_scm.get_version('../',
57 parse=parse_git)
58 except ImportError:
59 __version__ = None
61# ARROW-8684: Disable GC while initializing Cython extension module,
62# to workaround Cython bug in https://github.com/cython/cython/issues/3603
63_gc_enabled = _gc.isenabled()
64_gc.disable()
65import pyarrow.lib as _lib
66if _gc_enabled:
67 _gc.enable()
69from pyarrow.lib import (BuildInfo, RuntimeInfo, set_timezone_db_path,
70 MonthDayNano, VersionInfo, cpp_build_info,
71 cpp_version, cpp_version_info, runtime_info,
72 cpu_count, set_cpu_count, enable_signal_handlers,
73 io_thread_count, set_io_thread_count)
76def show_versions():
77 """
78 Print various version information, to help with error reporting.
79 """
80 def print_entry(label, value):
81 print(f"{label: <26}: {value: <8}")
83 print("pyarrow version info\n--------------------")
84 print_entry("Package kind", cpp_build_info.package_kind
85 if len(cpp_build_info.package_kind) > 0
86 else "not indicated")
87 print_entry("Arrow C++ library version", cpp_build_info.version)
88 print_entry("Arrow C++ compiler",
89 f"{cpp_build_info.compiler_id} {cpp_build_info.compiler_version}")
90 print_entry("Arrow C++ compiler flags", cpp_build_info.compiler_flags)
91 print_entry("Arrow C++ git revision", cpp_build_info.git_id)
92 print_entry("Arrow C++ git description", cpp_build_info.git_description)
93 print_entry("Arrow C++ build type", cpp_build_info.build_type)
96def _module_is_available(module):
97 try:
98 _importlib.import_module(f'pyarrow.{module}')
99 except ImportError:
100 return False
101 else:
102 return True
105def _filesystem_is_available(fs):
106 try:
107 import pyarrow.fs
108 except ImportError:
109 return False
111 try:
112 getattr(pyarrow.fs, fs)
113 except (ImportError, AttributeError):
114 return False
115 else:
116 return True
119def show_info():
120 """
121 Print detailed version and platform information, for error reporting
122 """
123 show_versions()
125 def print_entry(label, value):
126 print(f" {label: <20}: {value: <8}")
128 print("\nPlatform:")
129 print_entry("OS / Arch", f"{_platform.system()} {_platform.machine()}")
130 print_entry("SIMD Level", runtime_info().simd_level)
131 print_entry("Detected SIMD Level", runtime_info().detected_simd_level)
133 pool = default_memory_pool()
134 print("\nMemory:")
135 print_entry("Default backend", pool.backend_name)
136 print_entry("Bytes allocated", f"{pool.bytes_allocated()} bytes")
137 print_entry("Max memory", f"{pool.max_memory()} bytes")
138 print_entry("Supported Backends", ', '.join(supported_memory_backends()))
140 print("\nOptional modules:")
141 modules = ["csv", "cuda", "dataset", "feather", "flight", "fs", "gandiva", "json",
142 "orc", "parquet"]
143 for module in modules:
144 status = "Enabled" if _module_is_available(module) else "-"
145 print(f" {module: <20}: {status: <8}")
147 print("\nFilesystems:")
148 filesystems = ["AzureFileSystem", "GcsFileSystem",
149 "HadoopFileSystem", "S3FileSystem"]
150 for fs in filesystems:
151 status = "Enabled" if _filesystem_is_available(fs) else "-"
152 print(f" {fs: <20}: {status: <8}")
154 print("\nCompression Codecs:")
155 codecs = ["brotli", "bz2", "gzip", "lz4_frame", "lz4", "snappy", "zstd"]
156 for codec in codecs:
157 status = "Enabled" if Codec.is_available(codec) else "-"
158 print(f" {codec: <20}: {status: <8}")
161from pyarrow.lib import (null, bool_,
162 int8, int16, int32, int64,
163 uint8, uint16, uint32, uint64,
164 time32, time64, timestamp, date32, date64, duration,
165 month_day_nano_interval,
166 float16, float32, float64,
167 binary, string, utf8, binary_view, string_view,
168 large_binary, large_string, large_utf8,
169 decimal32, decimal64, decimal128, decimal256,
170 list_, large_list, list_view, large_list_view,
171 map_, struct,
172 union, sparse_union, dense_union,
173 dictionary,
174 run_end_encoded,
175 bool8, fixed_shape_tensor, json_, opaque, uuid,
176 field,
177 type_for_alias,
178 DataType, DictionaryType, StructType,
179 ListType, LargeListType, FixedSizeListType,
180 ListViewType, LargeListViewType,
181 MapType, UnionType, SparseUnionType, DenseUnionType,
182 TimestampType, Time32Type, Time64Type, DurationType,
183 FixedSizeBinaryType,
184 Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type,
185 BaseExtensionType, ExtensionType,
186 RunEndEncodedType, Bool8Type, FixedShapeTensorType,
187 JsonType, OpaqueType, UuidType,
188 PyExtensionType, UnknownExtensionType,
189 register_extension_type, unregister_extension_type,
190 DictionaryMemo,
191 KeyValueMetadata,
192 Field,
193 Schema,
194 schema,
195 unify_schemas,
196 Array, Tensor,
197 array, chunked_array, record_batch, nulls, repeat,
198 SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
199 SparseCSFTensor,
200 infer_type, from_numpy_dtype,
201 NullArray,
202 NumericArray, IntegerArray, FloatingPointArray,
203 BooleanArray,
204 Int8Array, UInt8Array,
205 Int16Array, UInt16Array,
206 Int32Array, UInt32Array,
207 Int64Array, UInt64Array,
208 HalfFloatArray, FloatArray, DoubleArray,
209 ListArray, LargeListArray, FixedSizeListArray,
210 ListViewArray, LargeListViewArray,
211 MapArray, UnionArray,
212 BinaryArray, StringArray,
213 LargeBinaryArray, LargeStringArray,
214 BinaryViewArray, StringViewArray,
215 FixedSizeBinaryArray,
216 DictionaryArray,
217 Date32Array, Date64Array, TimestampArray,
218 Time32Array, Time64Array, DurationArray,
219 MonthDayNanoIntervalArray,
220 Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
221 StructArray, ExtensionArray,
222 RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
223 JsonArray, OpaqueArray, UuidArray,
224 scalar, NA, _NULL as NULL, Scalar,
225 NullScalar, BooleanScalar,
226 Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
227 UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
228 HalfFloatScalar, FloatScalar, DoubleScalar,
229 Decimal32Scalar, Decimal64Scalar, Decimal128Scalar, Decimal256Scalar,
230 ListScalar, LargeListScalar, FixedSizeListScalar,
231 ListViewScalar, LargeListViewScalar,
232 Date32Scalar, Date64Scalar,
233 Time32Scalar, Time64Scalar,
234 TimestampScalar, DurationScalar,
235 MonthDayNanoIntervalScalar,
236 BinaryScalar, LargeBinaryScalar, BinaryViewScalar,
237 StringScalar, LargeStringScalar, StringViewScalar,
238 FixedSizeBinaryScalar, DictionaryScalar,
239 MapScalar, StructScalar, UnionScalar,
240 RunEndEncodedScalar, Bool8Scalar, ExtensionScalar,
241 FixedShapeTensorScalar, JsonScalar, OpaqueScalar, UuidScalar)
243# Buffers, allocation
244from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
245 default_cpu_memory_manager)
247from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
248 Codec, compress, decompress, allocate_buffer)
250from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
251 total_allocated_bytes, set_memory_pool,
252 default_memory_pool, system_memory_pool,
253 jemalloc_memory_pool, mimalloc_memory_pool,
254 logging_memory_pool, proxy_memory_pool,
255 log_memory_allocations, jemalloc_set_decay_ms,
256 supported_memory_backends)
258# I/O
259from pyarrow.lib import (NativeFile, PythonFile,
260 BufferedInputStream, BufferedOutputStream, CacheOptions,
261 CompressedInputStream, CompressedOutputStream,
262 TransformInputStream, transcoding_input_stream,
263 FixedSizeBufferWriter,
264 BufferReader, BufferOutputStream,
265 OSFile, MemoryMappedFile, memory_map,
266 create_memory_map, MockOutputStream,
267 input_stream, output_stream,
268 have_libhdfs)
270from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
271 concat_arrays, concat_tables, TableGroupBy,
272 RecordBatchReader, concat_batches)
274# Exceptions
275from pyarrow.lib import (ArrowCancelled,
276 ArrowCapacityError,
277 ArrowException,
278 ArrowKeyError,
279 ArrowIndexError,
280 ArrowInvalid,
281 ArrowIOError,
282 ArrowMemoryError,
283 ArrowNotImplementedError,
284 ArrowTypeError,
285 ArrowSerializationError)
287from pyarrow.ipc import serialize_pandas, deserialize_pandas
288import pyarrow.ipc as ipc
290import pyarrow.types as types
293# ----------------------------------------------------------------------
294# Deprecations
296from pyarrow.util import _deprecate_api, _deprecate_class
299# TODO: Deprecate these somehow in the pyarrow namespace
300from pyarrow.ipc import (Message, MessageReader, MetadataVersion,
301 RecordBatchFileReader, RecordBatchFileWriter,
302 RecordBatchStreamReader, RecordBatchStreamWriter)
304# ----------------------------------------------------------------------
305# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
306# wheels)
309def get_include():
310 """
311 Return absolute path to directory containing Arrow C++ include
312 headers. Similar to numpy.get_include
313 """
314 return _os.path.join(_os.path.dirname(__file__), 'include')
317def _get_pkg_config_executable():
318 return _os.environ.get('PKG_CONFIG', 'pkg-config')
321def _has_pkg_config(pkgname):
322 import subprocess
323 try:
324 return subprocess.call([_get_pkg_config_executable(),
325 '--exists', pkgname]) == 0
326 except FileNotFoundError:
327 return False
330def _read_pkg_config_variable(pkgname, cli_args):
331 import subprocess
332 cmd = [_get_pkg_config_executable(), pkgname] + cli_args
333 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
334 stderr=subprocess.PIPE)
335 out, err = proc.communicate()
336 if proc.returncode != 0:
337 raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
338 return out.rstrip().decode('utf8')
341def get_libraries():
342 """
343 Return list of library names to include in the `libraries` argument for C
344 or Cython extensions using pyarrow
345 """
346 return ['arrow_python', 'arrow']
349def create_library_symlinks():
350 """
351 With Linux and macOS wheels, the bundled shared libraries have an embedded
352 ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them
353 with -larrow won't work unless we create symlinks at locations like
354 site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses
355 prior problems we had with shipping two copies of the shared libraries to
356 permit third party projects like turbodbc to build their C++ extensions
357 against the pyarrow wheels.
359 This function must only be invoked once and only when the shared libraries
360 are bundled with the Python package, which should only apply to wheel-based
361 installs. It requires write access to the site-packages/pyarrow directory
362 and so depending on your system may need to be run with root.
363 """
364 import glob
365 if _sys.platform == 'win32':
366 return
367 package_cwd = _os.path.dirname(__file__)
369 if _sys.platform == 'linux':
370 bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*'))
372 def get_symlink_path(hard_path):
373 return hard_path.rsplit('.', 1)[0]
374 else:
375 bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib'))
377 def get_symlink_path(hard_path):
378 return '.'.join((hard_path.rsplit('.', 2)[0], 'dylib'))
380 for lib_hard_path in bundled_libs:
381 symlink_path = get_symlink_path(lib_hard_path)
382 if _os.path.exists(symlink_path):
383 continue
384 try:
385 _os.symlink(lib_hard_path, symlink_path)
386 except PermissionError:
387 print("Tried creating symlink {}. If you need to link to "
388 "bundled shared libraries, run "
389 "pyarrow.create_library_symlinks() as root")
392def get_library_dirs():
393 """
394 Return lists of directories likely to contain Arrow C++ libraries for
395 linking C or Cython extensions using pyarrow
396 """
397 package_cwd = _os.path.dirname(__file__)
398 library_dirs = [package_cwd]
400 def append_library_dir(library_dir):
401 if library_dir not in library_dirs:
402 library_dirs.append(library_dir)
404 # Search library paths via pkg-config. This is necessary if the user
405 # installed libarrow and the other shared libraries manually and they
406 # are not shipped inside the pyarrow package (see also ARROW-2976).
407 pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
408 for pkgname in ["arrow", "arrow_python"]:
409 if _has_pkg_config(pkgname):
410 library_dir = _read_pkg_config_variable(pkgname,
411 ["--libs-only-L"])
412 # pkg-config output could be empty if Arrow is installed
413 # as a system package.
414 if library_dir:
415 if not library_dir.startswith("-L"):
416 raise ValueError(
417 "pkg-config --libs-only-L returned unexpected "
418 "value {!r}".format(library_dir))
419 append_library_dir(library_dir[2:])
421 if _sys.platform == 'win32':
422 # TODO(wesm): Is this necessary, or does setuptools within a conda
423 # installation add Library\lib to the linker path for MSVC?
424 python_base_install = _os.path.dirname(_sys.executable)
425 library_dir = _os.path.join(python_base_install, 'Library', 'lib')
427 if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
428 append_library_dir(library_dir)
430 # ARROW-4074: Allow for ARROW_HOME to be set to some other directory
431 if _os.environ.get('ARROW_HOME'):
432 append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
433 else:
434 # Python wheels bundle the Arrow libraries in the pyarrow directory.
435 append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))
437 return library_dirs