Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/sql.py: 16%

1"""

2Collection of query wrappers / abstractions to both facilitate data

3retrieval and to reduce dependency on DB-specific API.

4"""

6from __future__ import annotations

8from abc import (

9 ABC,

10 abstractmethod,

11)

12from contextlib import (

13 ExitStack,

14 contextmanager,

15)

16from datetime import (

17 date,

18 datetime,

19 time,

20)

21from functools import partial

22import re

23from typing import (

24 TYPE_CHECKING,

25 Any,

26 Callable,

27 Literal,

28 cast,

29 overload,

30)

31import warnings

33import numpy as np

35from pandas._config import using_pyarrow_string_dtype

37from pandas._libs import lib

38from pandas.compat._optional import import_optional_dependency

39from pandas.errors import (

40 AbstractMethodError,

41 DatabaseError,

42)

43from pandas.util._exceptions import find_stack_level

44from pandas.util._validators import check_dtype_backend

46from pandas.core.dtypes.common import (

47 is_dict_like,

48 is_list_like,

49)

50from pandas.core.dtypes.dtypes import (

51 ArrowDtype,

52 DatetimeTZDtype,

53)

54from pandas.core.dtypes.missing import isna

56from pandas import get_option

57from pandas.core.api import (

58 DataFrame,

59 Series,

60)

61from pandas.core.arrays import ArrowExtensionArray

62from pandas.core.base import PandasObject

63import pandas.core.common as com

64from pandas.core.common import maybe_make_list

65from pandas.core.internals.construction import convert_object_array

66from pandas.core.tools.datetimes import to_datetime

68if TYPE_CHECKING:

69 from collections.abc import (

70 Iterator,

71 Mapping,

72 )

74 from sqlalchemy import Table

75 from sqlalchemy.sql.expression import (

76 Select,

77 TextClause,

78 )

80 from pandas._typing import (

81 DateTimeErrorChoices,

82 DtypeArg,

83 DtypeBackend,

84 IndexLabel,

85 Self,

86 )

88 from pandas import Index

90# -----------------------------------------------------------------------------

91# -- Helper functions

94def _process_parse_dates_argument(parse_dates):

95 """Process parse_dates argument for read_sql functions"""

96 # handle non-list entries for parse_dates gracefully

97 if parse_dates is True or parse_dates is None or parse_dates is False:

98 parse_dates = []

100 elif not hasattr(parse_dates, "__iter__"):

101 parse_dates = [parse_dates]

102 return parse_dates

103

104

105def _handle_date_column(

106 col, utc: bool = False, format: str | dict[str, Any] | None = None

107):

108 if isinstance(format, dict):

109 # GH35185 Allow custom error values in parse_dates argument of

110 # read_sql like functions.

111 # Format can take on custom to_datetime argument values such as

112 # {"errors": "coerce"} or {"dayfirst": True}

113 error: DateTimeErrorChoices = format.pop("errors", None) or "ignore"

114 if error == "ignore":

115 try:

116 return to_datetime(col, **format)

117 except (TypeError, ValueError):

118 # TODO: not reached 2023-10-27; needed?

119 return col

120 return to_datetime(col, errors=error, **format)

121 else:

122 # Allow passing of formatting string for integers

123 # GH17855

124 if format is None and (

125 issubclass(col.dtype.type, np.floating)

126 or issubclass(col.dtype.type, np.integer)

127 ):

128 format = "s"

129 if format in ["D", "d", "h", "m", "s", "ms", "us", "ns"]:

130 return to_datetime(col, errors="coerce", unit=format, utc=utc)

131 elif isinstance(col.dtype, DatetimeTZDtype):

132 # coerce to UTC timezone

133 # GH11216

134 return to_datetime(col, utc=True)

135 else:

136 return to_datetime(col, errors="coerce", format=format, utc=utc)

137

138

139def _parse_date_columns(data_frame, parse_dates):

140 """

141 Force non-datetime columns to be read as such.

142 Supports both string formatted and integer timestamp columns.

143 """

144 parse_dates = _process_parse_dates_argument(parse_dates)

145

146 # we want to coerce datetime64_tz dtypes for now to UTC

147 # we could in theory do a 'nice' conversion from a FixedOffset tz

148 # GH11216

149 for i, (col_name, df_col) in enumerate(data_frame.items()):

150 if isinstance(df_col.dtype, DatetimeTZDtype) or col_name in parse_dates:

151 try:

152 fmt = parse_dates[col_name]

153 except (KeyError, TypeError):

154 fmt = None

155 data_frame.isetitem(i, _handle_date_column(df_col, format=fmt))

156

157 return data_frame

158

159

160def _convert_arrays_to_dataframe(

161 data,

162 columns,

163 coerce_float: bool = True,

164 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

165) -> DataFrame:

166 content = lib.to_object_array_tuples(data)

167 arrays = convert_object_array(

168 list(content.T),

169 dtype=None,

170 coerce_float=coerce_float,

171 dtype_backend=dtype_backend,

172 )

173 if dtype_backend == "pyarrow":

174 pa = import_optional_dependency("pyarrow")

175

176 result_arrays = []

177 for arr in arrays:

178 pa_array = pa.array(arr, from_pandas=True)

179 if arr.dtype == "string":

180 # TODO: Arrow still infers strings arrays as regular strings instead

181 # of large_string, which is what we preserver everywhere else for

182 # dtype_backend="pyarrow". We may want to reconsider this

183 pa_array = pa_array.cast(pa.string())

184 result_arrays.append(ArrowExtensionArray(pa_array))

185 arrays = result_arrays # type: ignore[assignment]

186 if arrays:

187 df = DataFrame(dict(zip(list(range(len(columns))), arrays)))

188 df.columns = columns

189 return df

190 else:

191 return DataFrame(columns=columns)

192

193

194def _wrap_result(

195 data,

196 columns,

197 index_col=None,

198 coerce_float: bool = True,

199 parse_dates=None,

200 dtype: DtypeArg | None = None,

201 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

202):

203 """Wrap result set of a SQLAlchemy query in a DataFrame."""

204 frame = _convert_arrays_to_dataframe(data, columns, coerce_float, dtype_backend)

205

206 if dtype:

207 frame = frame.astype(dtype)

208

209 frame = _parse_date_columns(frame, parse_dates)

210

211 if index_col is not None:

212 frame = frame.set_index(index_col)

213

214 return frame

215

216

217def _wrap_result_adbc(

218 df: DataFrame,

219 *,

220 index_col=None,

221 parse_dates=None,

222 dtype: DtypeArg | None = None,

223 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

224) -> DataFrame:

225 """Wrap result set of a SQLAlchemy query in a DataFrame."""

226 if dtype:

227 df = df.astype(dtype)

228

229 df = _parse_date_columns(df, parse_dates)

230

231 if index_col is not None:

232 df = df.set_index(index_col)

233

234 return df

235

236

237def execute(sql, con, params=None):

238 """

239 Execute the given SQL query using the provided connection object.

240

241 Parameters

242 ----------

243 sql : string

244 SQL query to be executed.

245 con : SQLAlchemy connection or sqlite3 connection

246 If a DBAPI2 object, only sqlite3 is supported.

247 params : list or tuple, optional, default: None

248 List of parameters to pass to execute method.

249

250 Returns

251 -------

252 Results Iterable

253 """

254 warnings.warn(

255 "`pandas.io.sql.execute` is deprecated and "

256 "will be removed in the future version.",

257 FutureWarning,

258 stacklevel=find_stack_level(),

259 ) # GH50185

260 sqlalchemy = import_optional_dependency("sqlalchemy", errors="ignore")

261

262 if sqlalchemy is not None and isinstance(con, (str, sqlalchemy.engine.Engine)):

263 raise TypeError("pandas.io.sql.execute requires a connection") # GH50185

264 with pandasSQL_builder(con, need_transaction=True) as pandas_sql:

265 return pandas_sql.execute(sql, params)

266

267

268# -----------------------------------------------------------------------------

269# -- Read and write to DataFrames

270

271

272@overload

273def read_sql_table(

274 table_name: str,

275 con,

276 schema=...,

277 index_col: str | list[str] | None = ...,

278 coerce_float=...,

279 parse_dates: list[str] | dict[str, str] | None = ...,

280 columns: list[str] | None = ...,

281 chunksize: None = ...,

282 dtype_backend: DtypeBackend | lib.NoDefault = ...,

283) -> DataFrame:

284 ...

285

286

287@overload

288def read_sql_table(

289 table_name: str,

290 con,

291 schema=...,

292 index_col: str | list[str] | None = ...,

293 coerce_float=...,

294 parse_dates: list[str] | dict[str, str] | None = ...,

295 columns: list[str] | None = ...,

296 chunksize: int = ...,

297 dtype_backend: DtypeBackend | lib.NoDefault = ...,

298) -> Iterator[DataFrame]:

299 ...

300

301

302def read_sql_table(

303 table_name: str,

304 con,

305 schema: str | None = None,

306 index_col: str | list[str] | None = None,

307 coerce_float: bool = True,

308 parse_dates: list[str] | dict[str, str] | None = None,

309 columns: list[str] | None = None,

310 chunksize: int | None = None,

311 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,

312) -> DataFrame | Iterator[DataFrame]:

313 """

314 Read SQL database table into a DataFrame.

315

316 Given a table name and a SQLAlchemy connectable, returns a DataFrame.

317 This function does not support DBAPI connections.

318

319 Parameters

320 ----------

321 table_name : str

322 Name of SQL table in database.

323 con : SQLAlchemy connectable or str

324 A database URI could be provided as str.

325 SQLite DBAPI connection mode not supported.

326 schema : str, default None

327 Name of SQL schema in database to query (if database flavor

328 supports this). Uses default schema if None (default).

329 index_col : str or list of str, optional, default: None

330 Column(s) to set as index(MultiIndex).

331 coerce_float : bool, default True

332 Attempts to convert values of non-string, non-numeric objects (like

333 decimal.Decimal) to floating point. Can result in loss of Precision.

334 parse_dates : list or dict, default None

335 - List of column names to parse as dates.

336 - Dict of ``{column_name: format string}`` where format string is

337 strftime compatible in case of parsing string times or is one of

338 (D, s, ns, ms, us) in case of parsing integer timestamps.

339 - Dict of ``{column_name: arg dict}``, where the arg dict corresponds

340 to the keyword arguments of :func:`pandas.to_datetime`

341 Especially useful with databases without native Datetime support,

342 such as SQLite.

343 columns : list, default None

344 List of column names to select from SQL table.

345 chunksize : int, default None

346 If specified, returns an iterator where `chunksize` is the number of

347 rows to include in each chunk.

348 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'

349 Back-end data type applied to the resultant :class:`DataFrame`

350 (still experimental). Behaviour is as follows:

351

352 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`

353 (default).

354 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`

355 DataFrame.

356

357 .. versionadded:: 2.0

358

359 Returns

360 -------

361 DataFrame or Iterator[DataFrame]

362 A SQL table is returned as two-dimensional data structure with labeled

363 axes.

364

365 See Also

366 --------

367 read_sql_query : Read SQL query into a DataFrame.

368 read_sql : Read SQL query or database table into a DataFrame.

369

370 Notes

371 -----

372 Any datetime values with time zone information will be converted to UTC.

373

374 Examples

375 --------

376 >>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP

377 """

378

379 check_dtype_backend(dtype_backend)

380 if dtype_backend is lib.no_default:

381 dtype_backend = "numpy" # type: ignore[assignment]

382 assert dtype_backend is not lib.no_default

383

384 with pandasSQL_builder(con, schema=schema, need_transaction=True) as pandas_sql:

385 if not pandas_sql.has_table(table_name):

386 raise ValueError(f"Table {table_name} not found")

387

388 table = pandas_sql.read_table(

389 table_name,

390 index_col=index_col,

391 coerce_float=coerce_float,

392 parse_dates=parse_dates,

393 columns=columns,

394 chunksize=chunksize,

395 dtype_backend=dtype_backend,

396 )

397

398 if table is not None:

399 return table

400 else:

401 raise ValueError(f"Table {table_name} not found", con)

402

403

404@overload

405def read_sql_query(

406 sql,

407 con,

408 index_col: str | list[str] | None = ...,

409 coerce_float=...,

410 params: list[Any] | Mapping[str, Any] | None = ...,

411 parse_dates: list[str] | dict[str, str] | None = ...,

412 chunksize: None = ...,

413 dtype: DtypeArg | None = ...,

414 dtype_backend: DtypeBackend | lib.NoDefault = ...,

415) -> DataFrame:

416 ...

417

418

419@overload

420def read_sql_query(

421 sql,

422 con,

423 index_col: str | list[str] | None = ...,

424 coerce_float=...,

425 params: list[Any] | Mapping[str, Any] | None = ...,

426 parse_dates: list[str] | dict[str, str] | None = ...,

427 chunksize: int = ...,

428 dtype: DtypeArg | None = ...,

429 dtype_backend: DtypeBackend | lib.NoDefault = ...,

430) -> Iterator[DataFrame]:

431 ...

432

433

434def read_sql_query(

435 sql,

436 con,

437 index_col: str | list[str] | None = None,

438 coerce_float: bool = True,

439 params: list[Any] | Mapping[str, Any] | None = None,

440 parse_dates: list[str] | dict[str, str] | None = None,

441 chunksize: int | None = None,

442 dtype: DtypeArg | None = None,

443 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,

444) -> DataFrame | Iterator[DataFrame]:

445 """

446 Read SQL query into a DataFrame.

447

448 Returns a DataFrame corresponding to the result set of the query

449 string. Optionally provide an `index_col` parameter to use one of the

450 columns as the index, otherwise default integer index will be used.

451

452 Parameters

453 ----------

454 sql : str SQL query or SQLAlchemy Selectable (select or text object)

455 SQL query to be executed.

456 con : SQLAlchemy connectable, str, or sqlite3 connection

457 Using SQLAlchemy makes it possible to use any DB supported by that

458 library. If a DBAPI2 object, only sqlite3 is supported.

459 index_col : str or list of str, optional, default: None

460 Column(s) to set as index(MultiIndex).

461 coerce_float : bool, default True

462 Attempts to convert values of non-string, non-numeric objects (like

463 decimal.Decimal) to floating point. Useful for SQL result sets.

464 params : list, tuple or mapping, optional, default: None

465 List of parameters to pass to execute method. The syntax used

466 to pass parameters is database driver dependent. Check your

467 database driver documentation for which of the five syntax styles,

468 described in PEP 249's paramstyle, is supported.

469 Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}.

470 parse_dates : list or dict, default: None

471 - List of column names to parse as dates.

472 - Dict of ``{column_name: format string}`` where format string is

473 strftime compatible in case of parsing string times, or is one of

474 (D, s, ns, ms, us) in case of parsing integer timestamps.

475 - Dict of ``{column_name: arg dict}``, where the arg dict corresponds

476 to the keyword arguments of :func:`pandas.to_datetime`

477 Especially useful with databases without native Datetime support,

478 such as SQLite.

479 chunksize : int, default None

480 If specified, return an iterator where `chunksize` is the number of

481 rows to include in each chunk.

482 dtype : Type name or dict of columns

483 Data type for data or columns. E.g. np.float64 or

484 {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.

485

486 .. versionadded:: 1.3.0

487 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'

488 Back-end data type applied to the resultant :class:`DataFrame`

489 (still experimental). Behaviour is as follows:

490

491 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`

492 (default).

493 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`

494 DataFrame.

495

496 .. versionadded:: 2.0

497

498 Returns

499 -------

500 DataFrame or Iterator[DataFrame]

501

502 See Also

503 --------

504 read_sql_table : Read SQL database table into a DataFrame.

505 read_sql : Read SQL query or database table into a DataFrame.

506

507 Notes

508 -----

509 Any datetime values with time zone information parsed via the `parse_dates`

510 parameter will be converted to UTC.

511

512 Examples

513 --------

514 >>> from sqlalchemy import create_engine # doctest: +SKIP

515 >>> engine = create_engine("sqlite:///database.db") # doctest: +SKIP

516 >>> with engine.connect() as conn, conn.begin(): # doctest: +SKIP

517 ... data = pd.read_sql_table("data", conn) # doctest: +SKIP

518 """

519

520 check_dtype_backend(dtype_backend)

521 if dtype_backend is lib.no_default:

522 dtype_backend = "numpy" # type: ignore[assignment]

523 assert dtype_backend is not lib.no_default

524

525 with pandasSQL_builder(con) as pandas_sql:

526 return pandas_sql.read_query(

527 sql,

528 index_col=index_col,

529 params=params,

530 coerce_float=coerce_float,

531 parse_dates=parse_dates,

532 chunksize=chunksize,

533 dtype=dtype,

534 dtype_backend=dtype_backend,

535 )

536

537

538@overload

539def read_sql(

540 sql,

541 con,

542 index_col: str | list[str] | None = ...,

543 coerce_float=...,

544 params=...,

545 parse_dates=...,

546 columns: list[str] = ...,

547 chunksize: None = ...,

548 dtype_backend: DtypeBackend | lib.NoDefault = ...,

549 dtype: DtypeArg | None = None,

550) -> DataFrame:

551 ...

552

553

554@overload

555def read_sql(

556 sql,

557 con,

558 index_col: str | list[str] | None = ...,

559 coerce_float=...,

560 params=...,

561 parse_dates=...,

562 columns: list[str] = ...,

563 chunksize: int = ...,

564 dtype_backend: DtypeBackend | lib.NoDefault = ...,

565 dtype: DtypeArg | None = None,

566) -> Iterator[DataFrame]:

567 ...

568

569

570def read_sql(

571 sql,

572 con,

573 index_col: str | list[str] | None = None,

574 coerce_float: bool = True,

575 params=None,

576 parse_dates=None,

577 columns: list[str] | None = None,

578 chunksize: int | None = None,

579 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,

580 dtype: DtypeArg | None = None,

581) -> DataFrame | Iterator[DataFrame]:

582 """

583 Read SQL query or database table into a DataFrame.

584

585 This function is a convenience wrapper around ``read_sql_table`` and

586 ``read_sql_query`` (for backward compatibility). It will delegate

587 to the specific function depending on the provided input. A SQL query

588 will be routed to ``read_sql_query``, while a database table name will

589 be routed to ``read_sql_table``. Note that the delegated function might

590 have more specific notes about their functionality not listed here.

591

592 Parameters

593 ----------

594 sql : str or SQLAlchemy Selectable (select or text object)

595 SQL query to be executed or a table name.

596 con : ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection

597 ADBC provides high performance I/O with native type support, where available.

598 Using SQLAlchemy makes it possible to use any DB supported by that

599 library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible

600 for engine disposal and connection closure for the ADBC connection and

601 SQLAlchemy connectable; str connections are closed automatically. See

602 `here <https://docs.sqlalchemy.org/en/20/core/connections.html>`_.

603 index_col : str or list of str, optional, default: None

604 Column(s) to set as index(MultiIndex).

605 coerce_float : bool, default True

606 Attempts to convert values of non-string, non-numeric objects (like

607 decimal.Decimal) to floating point, useful for SQL result sets.

608 params : list, tuple or dict, optional, default: None

609 List of parameters to pass to execute method. The syntax used

610 to pass parameters is database driver dependent. Check your

611 database driver documentation for which of the five syntax styles,

612 described in PEP 249's paramstyle, is supported.

613 Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}.

614 parse_dates : list or dict, default: None

615 - List of column names to parse as dates.

616 - Dict of ``{column_name: format string}`` where format string is

617 strftime compatible in case of parsing string times, or is one of

618 (D, s, ns, ms, us) in case of parsing integer timestamps.

619 - Dict of ``{column_name: arg dict}``, where the arg dict corresponds

620 to the keyword arguments of :func:`pandas.to_datetime`

621 Especially useful with databases without native Datetime support,

622 such as SQLite.

623 columns : list, default: None

624 List of column names to select from SQL table (only used when reading

625 a table).

626 chunksize : int, default None

627 If specified, return an iterator where `chunksize` is the

628 number of rows to include in each chunk.

629 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'

630 Back-end data type applied to the resultant :class:`DataFrame`

631 (still experimental). Behaviour is as follows:

632

633 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`

634 (default).

635 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`

636 DataFrame.

637

638 .. versionadded:: 2.0

639 dtype : Type name or dict of columns

640 Data type for data or columns. E.g. np.float64 or

641 {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.

642 The argument is ignored if a table is passed instead of a query.

643

644 .. versionadded:: 2.0.0

645

646 Returns

647 -------

648 DataFrame or Iterator[DataFrame]

649

650 See Also

651 --------

652 read_sql_table : Read SQL database table into a DataFrame.

653 read_sql_query : Read SQL query into a DataFrame.

654

655 Examples

656 --------

657 Read data from SQL via either a SQL query or a SQL tablename.

658 When using a SQLite database only SQL queries are accepted,

659 providing only the SQL tablename will result in an error.

660

661 >>> from sqlite3 import connect

662 >>> conn = connect(':memory:')

663 >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']],

664 ... columns=['int_column', 'date_column'])

665 >>> df.to_sql(name='test_data', con=conn)

666 2

667

668 >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn)

669 int_column date_column

670 0 0 10/11/12

671 1 1 12/11/10

672

673 >>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP

674

675 Apply date parsing to columns through the ``parse_dates`` argument

676 The ``parse_dates`` argument calls ``pd.to_datetime`` on the provided columns.

677 Custom argument values for applying ``pd.to_datetime`` on a column are specified

678 via a dictionary format:

679

680 >>> pd.read_sql('SELECT int_column, date_column FROM test_data',

681 ... conn,

682 ... parse_dates={"date_column": {"format": "%d/%m/%y"}})

683 int_column date_column

684 0 0 2012-11-10

685 1 1 2010-11-12

686

687 .. versionadded:: 2.2.0

688

689 pandas now supports reading via ADBC drivers

690

691 >>> from adbc_driver_postgresql import dbapi # doctest:+SKIP

692 >>> with dbapi.connect('postgres:///db_name') as conn: # doctest:+SKIP

693 ... pd.read_sql('SELECT int_column FROM test_data', conn)

694 int_column

695 0 0

696 1 1

697 """

698

699 check_dtype_backend(dtype_backend)

700 if dtype_backend is lib.no_default:

701 dtype_backend = "numpy" # type: ignore[assignment]

702 assert dtype_backend is not lib.no_default

703

704 with pandasSQL_builder(con) as pandas_sql:

705 if isinstance(pandas_sql, SQLiteDatabase):

706 return pandas_sql.read_query(

707 sql,

708 index_col=index_col,

709 params=params,

710 coerce_float=coerce_float,

711 parse_dates=parse_dates,

712 chunksize=chunksize,

713 dtype_backend=dtype_backend,

714 dtype=dtype,

715 )

716

717 try:

718 _is_table_name = pandas_sql.has_table(sql)

719 except Exception:

720 # using generic exception to catch errors from sql drivers (GH24988)

721 _is_table_name = False

722

723 if _is_table_name:

724 return pandas_sql.read_table(

725 sql,

726 index_col=index_col,

727 coerce_float=coerce_float,

728 parse_dates=parse_dates,

729 columns=columns,

730 chunksize=chunksize,

731 dtype_backend=dtype_backend,

732 )

733 else:

734 return pandas_sql.read_query(

735 sql,

736 index_col=index_col,

737 params=params,

738 coerce_float=coerce_float,

739 parse_dates=parse_dates,

740 chunksize=chunksize,

741 dtype_backend=dtype_backend,

742 dtype=dtype,

743 )

744

745

746def to_sql(

747 frame,

748 name: str,

749 con,

750 schema: str | None = None,

751 if_exists: Literal["fail", "replace", "append"] = "fail",

752 index: bool = True,

753 index_label: IndexLabel | None = None,

754 chunksize: int | None = None,

755 dtype: DtypeArg | None = None,

756 method: Literal["multi"] | Callable | None = None,

757 engine: str = "auto",

758 **engine_kwargs,

759) -> int | None:

760 """

761 Write records stored in a DataFrame to a SQL database.

762

763 Parameters

764 ----------

765 frame : DataFrame, Series

766 name : str

767 Name of SQL table.

768 con : ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection

769 or sqlite3 DBAPI2 connection

770 ADBC provides high performance I/O with native type support, where available.

771 Using SQLAlchemy makes it possible to use any DB supported by that

772 library.

773 If a DBAPI2 object, only sqlite3 is supported.

774 schema : str, optional

775 Name of SQL schema in database to write to (if database flavor

776 supports this). If None, use default schema (default).

777 if_exists : {'fail', 'replace', 'append'}, default 'fail'

778 - fail: If table exists, do nothing.

779 - replace: If table exists, drop it, recreate it, and insert data.

780 - append: If table exists, insert data. Create if does not exist.

781 index : bool, default True

782 Write DataFrame index as a column.

783 index_label : str or sequence, optional

784 Column label for index column(s). If None is given (default) and

785 `index` is True, then the index names are used.

786 A sequence should be given if the DataFrame uses MultiIndex.

787 chunksize : int, optional

788 Specify the number of rows in each batch to be written at a time.

789 By default, all rows will be written at once.

790 dtype : dict or scalar, optional

791 Specifying the datatype for columns. If a dictionary is used, the

792 keys should be the column names and the values should be the

793 SQLAlchemy types or strings for the sqlite3 fallback mode. If a

794 scalar is provided, it will be applied to all columns.

795 method : {None, 'multi', callable}, optional

796 Controls the SQL insertion clause used:

797

798 - None : Uses standard SQL ``INSERT`` clause (one per row).

799 - ``'multi'``: Pass multiple values in a single ``INSERT`` clause.

800 - callable with signature ``(pd_table, conn, keys, data_iter) -> int | None``.

801

802 Details and a sample callable implementation can be found in the

803 section :ref:`insert method <io.sql.method>`.

804 engine : {'auto', 'sqlalchemy'}, default 'auto'

805 SQL engine library to use. If 'auto', then the option

806 ``io.sql.engine`` is used. The default ``io.sql.engine``

807 behavior is 'sqlalchemy'

808

809 .. versionadded:: 1.3.0

810

811 **engine_kwargs

812 Any additional kwargs are passed to the engine.

813

814 Returns

815 -------

816 None or int

817 Number of rows affected by to_sql. None is returned if the callable

818 passed into ``method`` does not return an integer number of rows.

819

820 .. versionadded:: 1.4.0

821

822 Notes

823 -----

824 The returned rows affected is the sum of the ``rowcount`` attribute of ``sqlite3.Cursor``

825 or SQLAlchemy connectable. If using ADBC the returned rows are the result

826 of ``Cursor.adbc_ingest``. The returned value may not reflect the exact number of written

827 rows as stipulated in the

828 `sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or

829 `SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__

830 """ # noqa: E501

831 if if_exists not in ("fail", "replace", "append"):

832 raise ValueError(f"'{if_exists}' is not valid for if_exists")

833

834 if isinstance(frame, Series):

835 frame = frame.to_frame()

836 elif not isinstance(frame, DataFrame):

837 raise NotImplementedError(

838 "'frame' argument should be either a Series or a DataFrame"

839 )

840

841 with pandasSQL_builder(con, schema=schema, need_transaction=True) as pandas_sql:

842 return pandas_sql.to_sql(

843 frame,

844 name,

845 if_exists=if_exists,

846 index=index,

847 index_label=index_label,

848 schema=schema,

849 chunksize=chunksize,

850 dtype=dtype,

851 method=method,

852 engine=engine,

853 **engine_kwargs,

854 )

855

856

857def has_table(table_name: str, con, schema: str | None = None) -> bool:

858 """

859 Check if DataBase has named table.

860

861 Parameters

862 ----------

863 table_name: string

864 Name of SQL table.

865 con: ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection

866 ADBC provides high performance I/O with native type support, where available.

867 Using SQLAlchemy makes it possible to use any DB supported by that

868 library.

869 If a DBAPI2 object, only sqlite3 is supported.

870 schema : string, default None

871 Name of SQL schema in database to write to (if database flavor supports

872 this). If None, use default schema (default).

873

874 Returns

875 -------

876 boolean

877 """

878 with pandasSQL_builder(con, schema=schema) as pandas_sql:

879 return pandas_sql.has_table(table_name)

880

881

882table_exists = has_table

883

884

885def pandasSQL_builder(

886 con,

887 schema: str | None = None,

888 need_transaction: bool = False,

889) -> PandasSQL:

890 """

891 Convenience function to return the correct PandasSQL subclass based on the

892 provided parameters. Also creates a sqlalchemy connection and transaction

893 if necessary.

894 """

895 import sqlite3

896

897 if isinstance(con, sqlite3.Connection) or con is None:

898 return SQLiteDatabase(con)

899

900 sqlalchemy = import_optional_dependency("sqlalchemy", errors="ignore")

901

902 if isinstance(con, str) and sqlalchemy is None:

903 raise ImportError("Using URI string without sqlalchemy installed.")

904

905 if sqlalchemy is not None and isinstance(con, (str, sqlalchemy.engine.Connectable)):

906 return SQLDatabase(con, schema, need_transaction)

907

908 adbc = import_optional_dependency("adbc_driver_manager.dbapi", errors="ignore")

909 if adbc and isinstance(con, adbc.Connection):

910 return ADBCDatabase(con)

911

912 warnings.warn(

913 "pandas only supports SQLAlchemy connectable (engine/connection) or "

914 "database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 "

915 "objects are not tested. Please consider using SQLAlchemy.",

916 UserWarning,

917 stacklevel=find_stack_level(),

918 )

919 return SQLiteDatabase(con)

920

921

922class SQLTable(PandasObject):

923 """

924 For mapping Pandas tables to SQL tables.

925 Uses fact that table is reflected by SQLAlchemy to

926 do better type conversions.

927 Also holds various flags needed to avoid having to

928 pass them between functions all the time.

929 """

930

931 # TODO: support for multiIndex

932

933 def __init__(

934 self,

935 name: str,

936 pandas_sql_engine,

937 frame=None,

938 index: bool | str | list[str] | None = True,

939 if_exists: Literal["fail", "replace", "append"] = "fail",

940 prefix: str = "pandas",

941 index_label=None,

942 schema=None,

943 keys=None,

944 dtype: DtypeArg | None = None,

945 ) -> None:

946 self.name = name

947 self.pd_sql = pandas_sql_engine

948 self.prefix = prefix

949 self.frame = frame

950 self.index = self._index_name(index, index_label)

951 self.schema = schema

952 self.if_exists = if_exists

953 self.keys = keys

954 self.dtype = dtype

955

956 if frame is not None:

957 # We want to initialize based on a dataframe

958 self.table = self._create_table_setup()

959 else:

960 # no data provided, read-only mode

961 self.table = self.pd_sql.get_table(self.name, self.schema)

962

963 if self.table is None:

964 raise ValueError(f"Could not init table '{name}'")

965

966 if not len(self.name):

967 raise ValueError("Empty table name specified")

968

969 def exists(self):

970 return self.pd_sql.has_table(self.name, self.schema)

971

972 def sql_schema(self) -> str:

973 from sqlalchemy.schema import CreateTable

974

975 return str(CreateTable(self.table).compile(self.pd_sql.con))

976

977 def _execute_create(self) -> None:

978 # Inserting table into database, add to MetaData object

979 self.table = self.table.to_metadata(self.pd_sql.meta)

980 with self.pd_sql.run_transaction():

981 self.table.create(bind=self.pd_sql.con)

982

983 def create(self) -> None:

984 if self.exists():

985 if self.if_exists == "fail":

986 raise ValueError(f"Table '{self.name}' already exists.")

987 if self.if_exists == "replace":

988 self.pd_sql.drop_table(self.name, self.schema)

989 self._execute_create()

990 elif self.if_exists == "append":

991 pass

992 else:

993 raise ValueError(f"'{self.if_exists}' is not valid for if_exists")

994 else:

995 self._execute_create()

996

997 def _execute_insert(self, conn, keys: list[str], data_iter) -> int:

998 """

999 Execute SQL statement inserting data

1000

1001 Parameters

1002 ----------

1003 conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection

1004 keys : list of str

1005 Column names

1006 data_iter : generator of list

1007 Each item contains a list of values to be inserted

1008 """

1009 data = [dict(zip(keys, row)) for row in data_iter]

1010 result = conn.execute(self.table.insert(), data)

1011 return result.rowcount

1012

1013 def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int:

1014 """

1015 Alternative to _execute_insert for DBs support multi-value INSERT.

1016

1017 Note: multi-value insert is usually faster for analytics DBs

1018 and tables containing a few columns

1019 but performance degrades quickly with increase of columns.

1020

1021 """

1022

1023 from sqlalchemy import insert

1024

1025 data = [dict(zip(keys, row)) for row in data_iter]

1026 stmt = insert(self.table).values(data)

1027 result = conn.execute(stmt)

1028 return result.rowcount

1029

1030 def insert_data(self) -> tuple[list[str], list[np.ndarray]]:

1031 if self.index is not None:

1032 temp = self.frame.copy()

1033 temp.index.names = self.index

1034 try:

1035 temp.reset_index(inplace=True)

1036 except ValueError as err:

1037 raise ValueError(f"duplicate name in index/columns: {err}") from err

1038 else:

1039 temp = self.frame

1040

1041 column_names = list(map(str, temp.columns))

1042 ncols = len(column_names)

1043 # this just pre-allocates the list: None's will be replaced with ndarrays

1044 # error: List item 0 has incompatible type "None"; expected "ndarray"

1045 data_list: list[np.ndarray] = [None] * ncols # type: ignore[list-item]

1046

1047 for i, (_, ser) in enumerate(temp.items()):

1048 if ser.dtype.kind == "M":

1049 if isinstance(ser._values, ArrowExtensionArray):

1050 import pyarrow as pa

1051

1052 if pa.types.is_date(ser.dtype.pyarrow_dtype):

1053 # GH#53854 to_pydatetime not supported for pyarrow date dtypes

1054 d = ser._values.to_numpy(dtype=object)

1055 else:

1056 with warnings.catch_warnings():

1057 warnings.filterwarnings("ignore", category=FutureWarning)

1058 # GH#52459 to_pydatetime will return Index[object]

1059 d = np.asarray(ser.dt.to_pydatetime(), dtype=object)

1060 else:

1061 d = ser._values.to_pydatetime()

1062 elif ser.dtype.kind == "m":

1063 vals = ser._values

1064 if isinstance(vals, ArrowExtensionArray):

1065 vals = vals.to_numpy(dtype=np.dtype("m8[ns]"))

1066 # store as integers, see GH#6921, GH#7076

1067 d = vals.view("i8").astype(object)

1068 else:

1069 d = ser._values.astype(object)

1070

1071 assert isinstance(d, np.ndarray), type(d)

1072

1073 if ser._can_hold_na:

1074 # Note: this will miss timedeltas since they are converted to int

1075 mask = isna(d)

1076 d[mask] = None

1077

1078 data_list[i] = d

1079

1080 return column_names, data_list

1081

1082 def insert(

1083 self,

1084 chunksize: int | None = None,

1085 method: Literal["multi"] | Callable | None = None,

1086 ) -> int | None:

1087 # set insert method

1088 if method is None:

1089 exec_insert = self._execute_insert

1090 elif method == "multi":

1091 exec_insert = self._execute_insert_multi

1092 elif callable(method):

1093 exec_insert = partial(method, self)

1094 else:

1095 raise ValueError(f"Invalid parameter `method`: {method}")

1096

1097 keys, data_list = self.insert_data()

1098

1099 nrows = len(self.frame)

1100

1101 if nrows == 0:

1102 return 0

1103

1104 if chunksize is None:

1105 chunksize = nrows

1106 elif chunksize == 0:

1107 raise ValueError("chunksize argument should be non-zero")

1108

1109 chunks = (nrows // chunksize) + 1

1110 total_inserted = None

1111 with self.pd_sql.run_transaction() as conn:

1112 for i in range(chunks):

1113 start_i = i * chunksize

1114 end_i = min((i + 1) * chunksize, nrows)

1115 if start_i >= end_i:

1116 break

1117

1118 chunk_iter = zip(*(arr[start_i:end_i] for arr in data_list))

1119 num_inserted = exec_insert(conn, keys, chunk_iter)

1120 # GH 46891

1121 if num_inserted is not None:

1122 if total_inserted is None:

1123 total_inserted = num_inserted

1124 else:

1125 total_inserted += num_inserted

1126 return total_inserted

1127

1128 def _query_iterator(

1129 self,

1130 result,

1131 exit_stack: ExitStack,

1132 chunksize: int | None,

1133 columns,

1134 coerce_float: bool = True,

1135 parse_dates=None,

1136 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

1137 ):

1138 """Return generator through chunked result set."""

1139 has_read_data = False

1140 with exit_stack:

1141 while True:

1142 data = result.fetchmany(chunksize)

1143 if not data:

1144 if not has_read_data:

1145 yield DataFrame.from_records(

1146 [], columns=columns, coerce_float=coerce_float

1147 )

1148 break

1149

1150 has_read_data = True

1151 self.frame = _convert_arrays_to_dataframe(

1152 data, columns, coerce_float, dtype_backend

1153 )

1154

1155 self._harmonize_columns(

1156 parse_dates=parse_dates, dtype_backend=dtype_backend

1157 )

1158

1159 if self.index is not None:

1160 self.frame.set_index(self.index, inplace=True)

1161

1162 yield self.frame

1163

1164 def read(

1165 self,

1166 exit_stack: ExitStack,

1167 coerce_float: bool = True,

1168 parse_dates=None,

1169 columns=None,

1170 chunksize: int | None = None,

1171 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

1172 ) -> DataFrame | Iterator[DataFrame]:

1173 from sqlalchemy import select

1174

1175 if columns is not None and len(columns) > 0:

1176 cols = [self.table.c[n] for n in columns]

1177 if self.index is not None:

1178 for idx in self.index[::-1]:

1179 cols.insert(0, self.table.c[idx])

1180 sql_select = select(*cols)

1181 else:

1182 sql_select = select(self.table)

1183 result = self.pd_sql.execute(sql_select)

1184 column_names = result.keys()

1185

1186 if chunksize is not None:

1187 return self._query_iterator(

1188 result,

1189 exit_stack,

1190 chunksize,

1191 column_names,

1192 coerce_float=coerce_float,

1193 parse_dates=parse_dates,

1194 dtype_backend=dtype_backend,

1195 )

1196 else:

1197 data = result.fetchall()

1198 self.frame = _convert_arrays_to_dataframe(

1199 data, column_names, coerce_float, dtype_backend

1200 )

1201

1202 self._harmonize_columns(

1203 parse_dates=parse_dates, dtype_backend=dtype_backend

1204 )

1205

1206 if self.index is not None:

1207 self.frame.set_index(self.index, inplace=True)

1208

1209 return self.frame

1210

1211 def _index_name(self, index, index_label):

1212 # for writing: index=True to include index in sql table

1213 if index is True:

1214 nlevels = self.frame.index.nlevels

1215 # if index_label is specified, set this as index name(s)

1216 if index_label is not None:

1217 if not isinstance(index_label, list):

1218 index_label = [index_label]

1219 if len(index_label) != nlevels:

1220 raise ValueError(

1221 "Length of 'index_label' should match number of "

1222 f"levels, which is {nlevels}"

1223 )

1224 return index_label

1225 # return the used column labels for the index columns

1226 if (

1227 nlevels == 1

1228 and "index" not in self.frame.columns

1229 and self.frame.index.name is None

1230 ):

1231 return ["index"]

1232 else:

1233 return com.fill_missing_names(self.frame.index.names)

1234

1235 # for reading: index=(list of) string to specify column to set as index

1236 elif isinstance(index, str):

1237 return [index]

1238 elif isinstance(index, list):

1239 return index

1240 else:

1241 return None

1242

1243 def _get_column_names_and_types(self, dtype_mapper):

1244 column_names_and_types = []

1245 if self.index is not None:

1246 for i, idx_label in enumerate(self.index):

1247 idx_type = dtype_mapper(self.frame.index._get_level_values(i))

1248 column_names_and_types.append((str(idx_label), idx_type, True))

1249

1250 column_names_and_types += [

1251 (str(self.frame.columns[i]), dtype_mapper(self.frame.iloc[:, i]), False)

1252 for i in range(len(self.frame.columns))

1253 ]

1254

1255 return column_names_and_types

1256

1257 def _create_table_setup(self):

1258 from sqlalchemy import (

1259 Column,

1260 PrimaryKeyConstraint,

1261 Table,

1262 )

1263 from sqlalchemy.schema import MetaData

1264

1265 column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type)

1266

1267 columns: list[Any] = [

1268 Column(name, typ, index=is_index)

1269 for name, typ, is_index in column_names_and_types

1270 ]

1271

1272 if self.keys is not None:

1273 if not is_list_like(self.keys):

1274 keys = [self.keys]

1275 else:

1276 keys = self.keys

1277 pkc = PrimaryKeyConstraint(*keys, name=self.name + "_pk")

1278 columns.append(pkc)

1279

1280 schema = self.schema or self.pd_sql.meta.schema

1281

1282 # At this point, attach to new metadata, only attach to self.meta

1283 # once table is created.

1284 meta = MetaData()

1285 return Table(self.name, meta, *columns, schema=schema)

1286

1287 def _harmonize_columns(

1288 self,

1289 parse_dates=None,

1290 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

1291 ) -> None:

1292 """

1293 Make the DataFrame's column types align with the SQL table

1294 column types.

1295 Need to work around limited NA value support. Floats are always

1296 fine, ints must always be floats if there are Null values.

1297 Booleans are hard because converting bool column with None replaces

1298 all Nones with false. Therefore only convert bool if there are no

1299 NA values.

1300 Datetimes should already be converted to np.datetime64 if supported,

1301 but here we also force conversion if required.

1302 """

1303 parse_dates = _process_parse_dates_argument(parse_dates)

1304

1305 for sql_col in self.table.columns:

1306 col_name = sql_col.name

1307 try:

1308 df_col = self.frame[col_name]

1309

1310 # Handle date parsing upfront; don't try to convert columns

1311 # twice

1312 if col_name in parse_dates:

1313 try:

1314 fmt = parse_dates[col_name]

1315 except TypeError:

1316 fmt = None

1317 self.frame[col_name] = _handle_date_column(df_col, format=fmt)

1318 continue

1319

1320 # the type the dataframe column should have

1321 col_type = self._get_dtype(sql_col.type)

1322

1323 if (

1324 col_type is datetime

1325 or col_type is date

1326 or col_type is DatetimeTZDtype

1327 ):

1328 # Convert tz-aware Datetime SQL columns to UTC

1329 utc = col_type is DatetimeTZDtype

1330 self.frame[col_name] = _handle_date_column(df_col, utc=utc)

1331 elif dtype_backend == "numpy" and col_type is float:

1332 # floats support NA, can always convert!

1333 self.frame[col_name] = df_col.astype(col_type, copy=False)

1334

1335 elif dtype_backend == "numpy" and len(df_col) == df_col.count():

1336 # No NA values, can convert ints and bools

1337 if col_type is np.dtype("int64") or col_type is bool:

1338 self.frame[col_name] = df_col.astype(col_type, copy=False)

1339 except KeyError:

1340 pass # this column not in results

1341

1342 def _sqlalchemy_type(self, col: Index | Series):

1343 dtype: DtypeArg = self.dtype or {}

1344 if is_dict_like(dtype):

1345 dtype = cast(dict, dtype)

1346 if col.name in dtype:

1347 return dtype[col.name]

1348

1349 # Infer type of column, while ignoring missing values.

1350 # Needed for inserting typed data containing NULLs, GH 8778.

1351 col_type = lib.infer_dtype(col, skipna=True)

1352

1353 from sqlalchemy.types import (

1354 TIMESTAMP,

1355 BigInteger,

1356 Boolean,

1357 Date,

1358 DateTime,

1359 Float,

1360 Integer,

1361 SmallInteger,

1362 Text,

1363 Time,

1364 )

1365

1366 if col_type in ("datetime64", "datetime"):

1367 # GH 9086: TIMESTAMP is the suggested type if the column contains

1368 # timezone information

1369 try:

1370 # error: Item "Index" of "Union[Index, Series]" has no attribute "dt"

1371 if col.dt.tz is not None: # type: ignore[union-attr]

1372 return TIMESTAMP(timezone=True)

1373 except AttributeError:

1374 # The column is actually a DatetimeIndex

1375 # GH 26761 or an Index with date-like data e.g. 9999-01-01

1376 if getattr(col, "tz", None) is not None:

1377 return TIMESTAMP(timezone=True)

1378 return DateTime

1379 if col_type == "timedelta64":

1380 warnings.warn(

1381 "the 'timedelta' type is not supported, and will be "

1382 "written as integer values (ns frequency) to the database.",

1383 UserWarning,

1384 stacklevel=find_stack_level(),

1385 )

1386 return BigInteger

1387 elif col_type == "floating":

1388 if col.dtype == "float32":

1389 return Float(precision=23)

1390 else:

1391 return Float(precision=53)

1392 elif col_type == "integer":

1393 # GH35076 Map pandas integer to optimal SQLAlchemy integer type

1394 if col.dtype.name.lower() in ("int8", "uint8", "int16"):

1395 return SmallInteger

1396 elif col.dtype.name.lower() in ("uint16", "int32"):

1397 return Integer

1398 elif col.dtype.name.lower() == "uint64":

1399 raise ValueError("Unsigned 64 bit integer datatype is not supported")

1400 else:

1401 return BigInteger

1402 elif col_type == "boolean":

1403 return Boolean

1404 elif col_type == "date":

1405 return Date

1406 elif col_type == "time":

1407 return Time

1408 elif col_type == "complex":

1409 raise ValueError("Complex datatypes not supported")

1410

1411 return Text

1412

1413 def _get_dtype(self, sqltype):

1414 from sqlalchemy.types import (

1415 TIMESTAMP,

1416 Boolean,

1417 Date,

1418 DateTime,

1419 Float,

1420 Integer,

1421 )

1422

1423 if isinstance(sqltype, Float):

1424 return float

1425 elif isinstance(sqltype, Integer):

1426 # TODO: Refine integer size.

1427 return np.dtype("int64")

1428 elif isinstance(sqltype, TIMESTAMP):

1429 # we have a timezone capable type

1430 if not sqltype.timezone:

1431 return datetime

1432 return DatetimeTZDtype

1433 elif isinstance(sqltype, DateTime):

1434 # Caution: np.datetime64 is also a subclass of np.number.

1435 return datetime

1436 elif isinstance(sqltype, Date):

1437 return date

1438 elif isinstance(sqltype, Boolean):

1439 return bool

1440 return object

1441

1442

1443class PandasSQL(PandasObject, ABC):

1444 """

1445 Subclasses Should define read_query and to_sql.

1446 """

1447

1448 def __enter__(self) -> Self:

1449 return self

1450

1451 def __exit__(self, *args) -> None:

1452 pass

1453

1454 def read_table(

1455 self,

1456 table_name: str,

1457 index_col: str | list[str] | None = None,

1458 coerce_float: bool = True,

1459 parse_dates=None,

1460 columns=None,

1461 schema: str | None = None,

1462 chunksize: int | None = None,

1463 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

1464 ) -> DataFrame | Iterator[DataFrame]:

1465 raise NotImplementedError

1466

1467 @abstractmethod

1468 def read_query(

1469 self,

1470 sql: str,

1471 index_col: str | list[str] | None = None,

1472 coerce_float: bool = True,

1473 parse_dates=None,

1474 params=None,

1475 chunksize: int | None = None,

1476 dtype: DtypeArg | None = None,

1477 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

1478 ) -> DataFrame | Iterator[DataFrame]:

1479 pass

1480

1481 @abstractmethod

1482 def to_sql(

1483 self,

1484 frame,

1485 name: str,

1486 if_exists: Literal["fail", "replace", "append"] = "fail",

1487 index: bool = True,

1488 index_label=None,

1489 schema=None,

1490 chunksize: int | None = None,

1491 dtype: DtypeArg | None = None,

1492 method: Literal["multi"] | Callable | None = None,

1493 engine: str = "auto",

1494 **engine_kwargs,

1495 ) -> int | None:

1496 pass

1497

1498 @abstractmethod

1499 def execute(self, sql: str | Select | TextClause, params=None):

1500 pass

1501

1502 @abstractmethod

1503 def has_table(self, name: str, schema: str | None = None) -> bool:

1504 pass

1505

1506 @abstractmethod

1507 def _create_sql_schema(

1508 self,

1509 frame: DataFrame,

1510 table_name: str,

1511 keys: list[str] | None = None,

1512 dtype: DtypeArg | None = None,

1513 schema: str | None = None,

1514 ) -> str:

1515 pass

1516

1517

1518class BaseEngine:

1519 def insert_records(

1520 self,

1521 table: SQLTable,

1522 con,

1523 frame,

1524 name: str,

1525 index: bool | str | list[str] | None = True,

1526 schema=None,

1527 chunksize: int | None = None,

1528 method=None,

1529 **engine_kwargs,

1530 ) -> int | None:

1531 """

1532 Inserts data into already-prepared table

1533 """

1534 raise AbstractMethodError(self)

1535

1536

1537class SQLAlchemyEngine(BaseEngine):

1538 def __init__(self) -> None:

1539 import_optional_dependency(

1540 "sqlalchemy", extra="sqlalchemy is required for SQL support."

1541 )

1542

1543 def insert_records(

1544 self,

1545 table: SQLTable,

1546 con,

1547 frame,

1548 name: str,

1549 index: bool | str | list[str] | None = True,

1550 schema=None,

1551 chunksize: int | None = None,

1552 method=None,

1553 **engine_kwargs,

1554 ) -> int | None:

1555 from sqlalchemy import exc

1556

1557 try:

1558 return table.insert(chunksize=chunksize, method=method)

1559 except exc.StatementError as err:

1560 # GH34431

1561 # https://stackoverflow.com/a/67358288/6067848

1562 msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?#

1563 )|inf can not be used with MySQL"""

1564 err_text = str(err.orig)

1565 if re.search(msg, err_text):

1566 raise ValueError("inf cannot be used with MySQL") from err

1567 raise err

1568

1569

1570def get_engine(engine: str) -> BaseEngine:

1571 """return our implementation"""

1572 if engine == "auto":

1573 engine = get_option("io.sql.engine")

1574

1575 if engine == "auto":

1576 # try engines in this order

1577 engine_classes = [SQLAlchemyEngine]

1578

1579 error_msgs = ""

1580 for engine_class in engine_classes:

1581 try:

1582 return engine_class()

1583 except ImportError as err:

1584 error_msgs += "\n - " + str(err)

1585

1586 raise ImportError(

1587 "Unable to find a usable engine; "

1588 "tried using: 'sqlalchemy'.\n"

1589 "A suitable version of "

1590 "sqlalchemy is required for sql I/O "

1591 "support.\n"

1592 "Trying to import the above resulted in these errors:"

1593 f"{error_msgs}"

1594 )

1595

1596 if engine == "sqlalchemy":

1597 return SQLAlchemyEngine()

1598

1599 raise ValueError("engine must be one of 'auto', 'sqlalchemy'")

1600

1601

1602class SQLDatabase(PandasSQL):

1603 """

1604 This class enables conversion between DataFrame and SQL databases

1605 using SQLAlchemy to handle DataBase abstraction.

1606

1607 Parameters

1608 ----------

1609 con : SQLAlchemy Connectable or URI string.

1610 Connectable to connect with the database. Using SQLAlchemy makes it

1611 possible to use any DB supported by that library.

1612 schema : string, default None

1613 Name of SQL schema in database to write to (if database flavor

1614 supports this). If None, use default schema (default).

1615 need_transaction : bool, default False

1616 If True, SQLDatabase will create a transaction.

1617

1618 """

1619

1620 def __init__(

1621 self, con, schema: str | None = None, need_transaction: bool = False

1622 ) -> None:

1623 from sqlalchemy import create_engine

1624 from sqlalchemy.engine import Engine

1625 from sqlalchemy.schema import MetaData

1626

1627 # self.exit_stack cleans up the Engine and Connection and commits the

1628 # transaction if any of those objects was created below.

1629 # Cleanup happens either in self.__exit__ or at the end of the iterator

1630 # returned by read_sql when chunksize is not None.

1631 self.exit_stack = ExitStack()

1632 if isinstance(con, str):

1633 con = create_engine(con)

1634 self.exit_stack.callback(con.dispose)

1635 if isinstance(con, Engine):

1636 con = self.exit_stack.enter_context(con.connect())

1637 if need_transaction and not con.in_transaction():

1638 self.exit_stack.enter_context(con.begin())

1639 self.con = con

1640 self.meta = MetaData(schema=schema)

1641 self.returns_generator = False

1642

1643 def __exit__(self, *args) -> None:

1644 if not self.returns_generator:

1645 self.exit_stack.close()

1646

1647 @contextmanager

1648 def run_transaction(self):

1649 if not self.con.in_transaction():

1650 with self.con.begin():

1651 yield self.con

1652 else:

1653 yield self.con

1654

1655 def execute(self, sql: str | Select | TextClause, params=None):

1656 """Simple passthrough to SQLAlchemy connectable"""

1657 args = [] if params is None else [params]

1658 if isinstance(sql, str):

1659 return self.con.exec_driver_sql(sql, *args)

1660 return self.con.execute(sql, *args)

1661

1662 def read_table(

1663 self,

1664 table_name: str,

1665 index_col: str | list[str] | None = None,

1666 coerce_float: bool = True,

1667 parse_dates=None,

1668 columns=None,

1669 schema: str | None = None,

1670 chunksize: int | None = None,

1671 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

1672 ) -> DataFrame | Iterator[DataFrame]:

1673 """

1674 Read SQL database table into a DataFrame.

1675

1676 Parameters

1677 ----------

1678 table_name : str

1679 Name of SQL table in database.

1680 index_col : string, optional, default: None

1681 Column to set as index.

1682 coerce_float : bool, default True

1683 Attempts to convert values of non-string, non-numeric objects

1684 (like decimal.Decimal) to floating point. This can result in

1685 loss of precision.

1686 parse_dates : list or dict, default: None

1687 - List of column names to parse as dates.

1688 - Dict of ``{column_name: format string}`` where format string is

1689 strftime compatible in case of parsing string times, or is one of

1690 (D, s, ns, ms, us) in case of parsing integer timestamps.

1691 - Dict of ``{column_name: arg}``, where the arg corresponds

1692 to the keyword arguments of :func:`pandas.to_datetime`.

1693 Especially useful with databases without native Datetime support,

1694 such as SQLite.

1695 columns : list, default: None

1696 List of column names to select from SQL table.

1697 schema : string, default None

1698 Name of SQL schema in database to query (if database flavor

1699 supports this). If specified, this overwrites the default

1700 schema of the SQL database object.

1701 chunksize : int, default None

1702 If specified, return an iterator where `chunksize` is the number

1703 of rows to include in each chunk.

1704 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'

1705 Back-end data type applied to the resultant :class:`DataFrame`

1706 (still experimental). Behaviour is as follows:

1707

1708 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`

1709 (default).

1710 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`

1711 DataFrame.

1712

1713 .. versionadded:: 2.0

1714

1715 Returns

1716 -------

1717 DataFrame

1718

1719 See Also

1720 --------

1721 pandas.read_sql_table

1722 SQLDatabase.read_query

1723

1724 """

1725 self.meta.reflect(bind=self.con, only=[table_name], views=True)

1726 table = SQLTable(table_name, self, index=index_col, schema=schema)

1727 if chunksize is not None:

1728 self.returns_generator = True

1729 return table.read(

1730 self.exit_stack,

1731 coerce_float=coerce_float,

1732 parse_dates=parse_dates,

1733 columns=columns,

1734 chunksize=chunksize,

1735 dtype_backend=dtype_backend,

1736 )

1737

1738 @staticmethod

1739 def _query_iterator(

1740 result,

1741 exit_stack: ExitStack,

1742 chunksize: int,

1743 columns,

1744 index_col=None,

1745 coerce_float: bool = True,

1746 parse_dates=None,

1747 dtype: DtypeArg | None = None,

1748 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

1749 ):

1750 """Return generator through chunked result set"""

1751 has_read_data = False

1752 with exit_stack:

1753 while True:

1754 data = result.fetchmany(chunksize)

1755 if not data:

1756 if not has_read_data:

1757 yield _wrap_result(

1758 [],

1759 columns,

1760 index_col=index_col,

1761 coerce_float=coerce_float,

1762 parse_dates=parse_dates,

1763 dtype=dtype,

1764 dtype_backend=dtype_backend,

1765 )

1766 break

1767

1768 has_read_data = True

1769 yield _wrap_result(

1770 data,

1771 columns,

1772 index_col=index_col,

1773 coerce_float=coerce_float,

1774 parse_dates=parse_dates,

1775 dtype=dtype,

1776 dtype_backend=dtype_backend,

1777 )

1778

1779 def read_query(

1780 self,

1781 sql: str,

1782 index_col: str | list[str] | None = None,

1783 coerce_float: bool = True,

1784 parse_dates=None,

1785 params=None,

1786 chunksize: int | None = None,

1787 dtype: DtypeArg | None = None,

1788 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

1789 ) -> DataFrame | Iterator[DataFrame]:

1790 """

1791 Read SQL query into a DataFrame.

1792

1793 Parameters

1794 ----------

1795 sql : str

1796 SQL query to be executed.

1797 index_col : string, optional, default: None

1798 Column name to use as index for the returned DataFrame object.

1799 coerce_float : bool, default True

1800 Attempt to convert values of non-string, non-numeric objects (like

1801 decimal.Decimal) to floating point, useful for SQL result sets.

1802 params : list, tuple or dict, optional, default: None

1803 List of parameters to pass to execute method. The syntax used

1804 to pass parameters is database driver dependent. Check your

1805 database driver documentation for which of the five syntax styles,

1806 described in PEP 249's paramstyle, is supported.

1807 Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}

1808 parse_dates : list or dict, default: None

1809 - List of column names to parse as dates.

1810 - Dict of ``{column_name: format string}`` where format string is

1811 strftime compatible in case of parsing string times, or is one of

1812 (D, s, ns, ms, us) in case of parsing integer timestamps.

1813 - Dict of ``{column_name: arg dict}``, where the arg dict

1814 corresponds to the keyword arguments of

1815 :func:`pandas.to_datetime` Especially useful with databases

1816 without native Datetime support, such as SQLite.

1817 chunksize : int, default None

1818 If specified, return an iterator where `chunksize` is the number

1819 of rows to include in each chunk.

1820 dtype : Type name or dict of columns

1821 Data type for data or columns. E.g. np.float64 or

1822 {'a': np.float64, 'b': np.int32, 'c': 'Int64'}

1823

1824 .. versionadded:: 1.3.0

1825

1826 Returns

1827 -------

1828 DataFrame

1829

1830 See Also

1831 --------

1832 read_sql_table : Read SQL database table into a DataFrame.

1833 read_sql

1834

1835 """

1836 result = self.execute(sql, params)

1837 columns = result.keys()

1838

1839 if chunksize is not None:

1840 self.returns_generator = True

1841 return self._query_iterator(

1842 result,

1843 self.exit_stack,

1844 chunksize,

1845 columns,

1846 index_col=index_col,

1847 coerce_float=coerce_float,

1848 parse_dates=parse_dates,

1849 dtype=dtype,

1850 dtype_backend=dtype_backend,

1851 )

1852 else:

1853 data = result.fetchall()

1854 frame = _wrap_result(

1855 data,

1856 columns,

1857 index_col=index_col,

1858 coerce_float=coerce_float,

1859 parse_dates=parse_dates,

1860 dtype=dtype,

1861 dtype_backend=dtype_backend,

1862 )

1863 return frame

1864

1865 read_sql = read_query

1866

1867 def prep_table(

1868 self,

1869 frame,

1870 name: str,

1871 if_exists: Literal["fail", "replace", "append"] = "fail",

1872 index: bool | str | list[str] | None = True,

1873 index_label=None,

1874 schema=None,

1875 dtype: DtypeArg | None = None,

1876 ) -> SQLTable:

1877 """

1878 Prepares table in the database for data insertion. Creates it if needed, etc.

1879 """

1880 if dtype:

1881 if not is_dict_like(dtype):

1882 # error: Value expression in dictionary comprehension has incompatible

1883 # type "Union[ExtensionDtype, str, dtype[Any], Type[object],

1884 # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]],

1885 # Type[str], Type[float], Type[int], Type[complex], Type[bool],

1886 # Type[object]]]]"; expected type "Union[ExtensionDtype, str,

1887 # dtype[Any], Type[object]]"

1888 dtype = {col_name: dtype for col_name in frame} # type: ignore[misc]

1889 else:

1890 dtype = cast(dict, dtype)

1891

1892 from sqlalchemy.types import TypeEngine

1893

1894 for col, my_type in dtype.items():

1895 if isinstance(my_type, type) and issubclass(my_type, TypeEngine):

1896 pass

1897 elif isinstance(my_type, TypeEngine):

1898 pass

1899 else:

1900 raise ValueError(f"The type of {col} is not a SQLAlchemy type")

1901

1902 table = SQLTable(

1903 name,

1904 self,

1905 frame=frame,

1906 index=index,

1907 if_exists=if_exists,

1908 index_label=index_label,

1909 schema=schema,

1910 dtype=dtype,

1911 )

1912 table.create()

1913 return table

1914

1915 def check_case_sensitive(

1916 self,

1917 name: str,

1918 schema: str | None,

1919 ) -> None:

1920 """

1921 Checks table name for issues with case-sensitivity.

1922 Method is called after data is inserted.

1923 """

1924 if not name.isdigit() and not name.islower():

1925 # check for potentially case sensitivity issues (GH7815)

1926 # Only check when name is not a number and name is not lower case

1927 from sqlalchemy import inspect as sqlalchemy_inspect

1928

1929 insp = sqlalchemy_inspect(self.con)

1930 table_names = insp.get_table_names(schema=schema or self.meta.schema)

1931 if name not in table_names:

1932 msg = (

1933 f"The provided table name '{name}' is not found exactly as "

1934 "such in the database after writing the table, possibly "

1935 "due to case sensitivity issues. Consider using lower "

1936 "case table names."

1937 )

1938 warnings.warn(

1939 msg,

1940 UserWarning,

1941 stacklevel=find_stack_level(),

1942 )

1943

1944 def to_sql(

1945 self,

1946 frame,

1947 name: str,

1948 if_exists: Literal["fail", "replace", "append"] = "fail",

1949 index: bool = True,

1950 index_label=None,

1951 schema: str | None = None,

1952 chunksize: int | None = None,

1953 dtype: DtypeArg | None = None,

1954 method: Literal["multi"] | Callable | None = None,

1955 engine: str = "auto",

1956 **engine_kwargs,

1957 ) -> int | None:

1958 """

1959 Write records stored in a DataFrame to a SQL database.

1960

1961 Parameters

1962 ----------

1963 frame : DataFrame

1964 name : string

1965 Name of SQL table.

1966 if_exists : {'fail', 'replace', 'append'}, default 'fail'

1967 - fail: If table exists, do nothing.

1968 - replace: If table exists, drop it, recreate it, and insert data.

1969 - append: If table exists, insert data. Create if does not exist.

1970 index : boolean, default True

1971 Write DataFrame index as a column.

1972 index_label : string or sequence, default None

1973 Column label for index column(s). If None is given (default) and

1974 `index` is True, then the index names are used.

1975 A sequence should be given if the DataFrame uses MultiIndex.

1976 schema : string, default None

1977 Name of SQL schema in database to write to (if database flavor

1978 supports this). If specified, this overwrites the default

1979 schema of the SQLDatabase object.

1980 chunksize : int, default None

1981 If not None, then rows will be written in batches of this size at a

1982 time. If None, all rows will be written at once.

1983 dtype : single type or dict of column name to SQL type, default None

1984 Optional specifying the datatype for columns. The SQL type should

1985 be a SQLAlchemy type. If all columns are of the same type, one

1986 single value can be used.

1987 method : {None', 'multi', callable}, default None

1988 Controls the SQL insertion clause used:

1989

1990 * None : Uses standard SQL ``INSERT`` clause (one per row).

1991 * 'multi': Pass multiple values in a single ``INSERT`` clause.

1992 * callable with signature ``(pd_table, conn, keys, data_iter)``.

1993

1994 Details and a sample callable implementation can be found in the

1995 section :ref:`insert method <io.sql.method>`.

1996 engine : {'auto', 'sqlalchemy'}, default 'auto'

1997 SQL engine library to use. If 'auto', then the option

1998 ``io.sql.engine`` is used. The default ``io.sql.engine``

1999 behavior is 'sqlalchemy'

2000

2001 .. versionadded:: 1.3.0

2002

2003 **engine_kwargs

2004 Any additional kwargs are passed to the engine.

2005 """

2006 sql_engine = get_engine(engine)

2007

2008 table = self.prep_table(

2009 frame=frame,

2010 name=name,

2011 if_exists=if_exists,

2012 index=index,

2013 index_label=index_label,

2014 schema=schema,

2015 dtype=dtype,

2016 )

2017

2018 total_inserted = sql_engine.insert_records(

2019 table=table,

2020 con=self.con,

2021 frame=frame,

2022 name=name,

2023 index=index,

2024 schema=schema,

2025 chunksize=chunksize,

2026 method=method,

2027 **engine_kwargs,

2028 )

2029

2030 self.check_case_sensitive(name=name, schema=schema)

2031 return total_inserted

2032

2033 @property

2034 def tables(self):

2035 return self.meta.tables

2036

2037 def has_table(self, name: str, schema: str | None = None) -> bool:

2038 from sqlalchemy import inspect as sqlalchemy_inspect

2039

2040 insp = sqlalchemy_inspect(self.con)

2041 return insp.has_table(name, schema or self.meta.schema)

2042

2043 def get_table(self, table_name: str, schema: str | None = None) -> Table:

2044 from sqlalchemy import (

2045 Numeric,

2046 Table,

2047 )

2048

2049 schema = schema or self.meta.schema

2050 tbl = Table(table_name, self.meta, autoload_with=self.con, schema=schema)

2051 for column in tbl.columns:

2052 if isinstance(column.type, Numeric):

2053 column.type.asdecimal = False

2054 return tbl

2055

2056 def drop_table(self, table_name: str, schema: str | None = None) -> None:

2057 schema = schema or self.meta.schema

2058 if self.has_table(table_name, schema):

2059 self.meta.reflect(

2060 bind=self.con, only=[table_name], schema=schema, views=True

2061 )

2062 with self.run_transaction():

2063 self.get_table(table_name, schema).drop(bind=self.con)

2064 self.meta.clear()

2065

2066 def _create_sql_schema(

2067 self,

2068 frame: DataFrame,

2069 table_name: str,

2070 keys: list[str] | None = None,

2071 dtype: DtypeArg | None = None,

2072 schema: str | None = None,

2073 ) -> str:

2074 table = SQLTable(

2075 table_name,

2076 self,

2077 frame=frame,

2078 index=False,

2079 keys=keys,

2080 dtype=dtype,

2081 schema=schema,

2082 )

2083 return str(table.sql_schema())

2084

2085

2086# ---- SQL without SQLAlchemy ---

2087

2088

2089class ADBCDatabase(PandasSQL):

2090 """

2091 This class enables conversion between DataFrame and SQL databases

2092 using ADBC to handle DataBase abstraction.

2093

2094 Parameters

2095 ----------

2096 con : adbc_driver_manager.dbapi.Connection

2097 """

2098

2099 def __init__(self, con) -> None:

2100 self.con = con

2101

2102 @contextmanager

2103 def run_transaction(self):

2104 with self.con.cursor() as cur:

2105 try:

2106 yield cur

2107 except Exception:

2108 self.con.rollback()

2109 raise

2110 self.con.commit()

2111

2112 def execute(self, sql: str | Select | TextClause, params=None):

2113 if not isinstance(sql, str):

2114 raise TypeError("Query must be a string unless using sqlalchemy.")

2115 args = [] if params is None else [params]

2116 cur = self.con.cursor()

2117 try:

2118 cur.execute(sql, *args)

2119 return cur

2120 except Exception as exc:

2121 try:

2122 self.con.rollback()

2123 except Exception as inner_exc: # pragma: no cover

2124 ex = DatabaseError(

2125 f"Execution failed on sql: {sql}\n{exc}\nunable to rollback"

2126 )

2127 raise ex from inner_exc

2128

2129 ex = DatabaseError(f"Execution failed on sql '{sql}': {exc}")

2130 raise ex from exc

2131

2132 def read_table(

2133 self,

2134 table_name: str,

2135 index_col: str | list[str] | None = None,

2136 coerce_float: bool = True,

2137 parse_dates=None,

2138 columns=None,

2139 schema: str | None = None,

2140 chunksize: int | None = None,

2141 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

2142 ) -> DataFrame | Iterator[DataFrame]:

2143 """

2144 Read SQL database table into a DataFrame.

2145

2146 Parameters

2147 ----------

2148 table_name : str

2149 Name of SQL table in database.

2150 coerce_float : bool, default True

2151 Raises NotImplementedError

2152 parse_dates : list or dict, default: None

2153 - List of column names to parse as dates.

2154 - Dict of ``{column_name: format string}`` where format string is

2155 strftime compatible in case of parsing string times, or is one of

2156 (D, s, ns, ms, us) in case of parsing integer timestamps.

2157 - Dict of ``{column_name: arg}``, where the arg corresponds

2158 to the keyword arguments of :func:`pandas.to_datetime`.

2159 Especially useful with databases without native Datetime support,

2160 such as SQLite.

2161 columns : list, default: None

2162 List of column names to select from SQL table.

2163 schema : string, default None

2164 Name of SQL schema in database to query (if database flavor

2165 supports this). If specified, this overwrites the default

2166 schema of the SQL database object.

2167 chunksize : int, default None

2168 Raises NotImplementedError

2169 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'

2170 Back-end data type applied to the resultant :class:`DataFrame`

2171 (still experimental). Behaviour is as follows:

2172

2173 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`

2174 (default).

2175 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`

2176 DataFrame.

2177

2178 .. versionadded:: 2.0

2179

2180 Returns

2181 -------

2182 DataFrame

2183

2184 See Also

2185 --------

2186 pandas.read_sql_table

2187 SQLDatabase.read_query

2188

2189 """

2190 if coerce_float is not True:

2191 raise NotImplementedError(

2192 "'coerce_float' is not implemented for ADBC drivers"

2193 )

2194 if chunksize:

2195 raise NotImplementedError("'chunksize' is not implemented for ADBC drivers")

2196

2197 if columns:

2198 if index_col:

2199 index_select = maybe_make_list(index_col)

2200 else:

2201 index_select = []

2202 to_select = index_select + columns

2203 select_list = ", ".join(f'"{x}"' for x in to_select)

2204 else:

2205 select_list = "*"

2206 if schema:

2207 stmt = f"SELECT {select_list} FROM {schema}.{table_name}"

2208 else:

2209 stmt = f"SELECT {select_list} FROM {table_name}"

2210

2211 mapping: type[ArrowDtype] | None | Callable

2212 if dtype_backend == "pyarrow":

2213 mapping = ArrowDtype

2214 elif dtype_backend == "numpy_nullable":

2215 from pandas.io._util import _arrow_dtype_mapping

2216

2217 mapping = _arrow_dtype_mapping().get

2218 elif using_pyarrow_string_dtype():

2219 from pandas.io._util import arrow_string_types_mapper

2220

2221 arrow_string_types_mapper()

2222 else:

2223 mapping = None

2224

2225 with self.con.cursor() as cur:

2226 cur.execute(stmt)

2227 df = cur.fetch_arrow_table().to_pandas(types_mapper=mapping)

2228

2229 return _wrap_result_adbc(

2230 df,

2231 index_col=index_col,

2232 parse_dates=parse_dates,

2233 )

2234

2235 def read_query(

2236 self,

2237 sql: str,

2238 index_col: str | list[str] | None = None,

2239 coerce_float: bool = True,

2240 parse_dates=None,

2241 params=None,

2242 chunksize: int | None = None,

2243 dtype: DtypeArg | None = None,

2244 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

2245 ) -> DataFrame | Iterator[DataFrame]:

2246 """

2247 Read SQL query into a DataFrame.

2248

2249 Parameters

2250 ----------

2251 sql : str

2252 SQL query to be executed.

2253 index_col : string, optional, default: None

2254 Column name to use as index for the returned DataFrame object.

2255 coerce_float : bool, default True

2256 Raises NotImplementedError

2257 params : list, tuple or dict, optional, default: None

2258 Raises NotImplementedError

2259 parse_dates : list or dict, default: None

2260 - List of column names to parse as dates.

2261 - Dict of ``{column_name: format string}`` where format string is

2262 strftime compatible in case of parsing string times, or is one of

2263 (D, s, ns, ms, us) in case of parsing integer timestamps.

2264 - Dict of ``{column_name: arg dict}``, where the arg dict

2265 corresponds to the keyword arguments of

2266 :func:`pandas.to_datetime` Especially useful with databases

2267 without native Datetime support, such as SQLite.

2268 chunksize : int, default None

2269 Raises NotImplementedError

2270 dtype : Type name or dict of columns

2271 Data type for data or columns. E.g. np.float64 or

2272 {'a': np.float64, 'b': np.int32, 'c': 'Int64'}

2273

2274 .. versionadded:: 1.3.0

2275

2276 Returns

2277 -------

2278 DataFrame

2279

2280 See Also

2281 --------

2282 read_sql_table : Read SQL database table into a DataFrame.

2283 read_sql

2284

2285 """

2286 if coerce_float is not True:

2287 raise NotImplementedError(

2288 "'coerce_float' is not implemented for ADBC drivers"

2289 )

2290 if params:

2291 raise NotImplementedError("'params' is not implemented for ADBC drivers")

2292 if chunksize:

2293 raise NotImplementedError("'chunksize' is not implemented for ADBC drivers")

2294

2295 mapping: type[ArrowDtype] | None | Callable

2296 if dtype_backend == "pyarrow":

2297 mapping = ArrowDtype

2298 elif dtype_backend == "numpy_nullable":

2299 from pandas.io._util import _arrow_dtype_mapping

2300

2301 mapping = _arrow_dtype_mapping().get

2302 else:

2303 mapping = None

2304

2305 with self.con.cursor() as cur:

2306 cur.execute(sql)

2307 df = cur.fetch_arrow_table().to_pandas(types_mapper=mapping)

2308

2309 return _wrap_result_adbc(

2310 df,

2311 index_col=index_col,

2312 parse_dates=parse_dates,

2313 dtype=dtype,

2314 )

2315

2316 read_sql = read_query

2317

2318 def to_sql(

2319 self,

2320 frame,

2321 name: str,

2322 if_exists: Literal["fail", "replace", "append"] = "fail",

2323 index: bool = True,

2324 index_label=None,

2325 schema: str | None = None,

2326 chunksize: int | None = None,

2327 dtype: DtypeArg | None = None,

2328 method: Literal["multi"] | Callable | None = None,

2329 engine: str = "auto",

2330 **engine_kwargs,

2331 ) -> int | None:

2332 """

2333 Write records stored in a DataFrame to a SQL database.

2334

2335 Parameters

2336 ----------

2337 frame : DataFrame

2338 name : string

2339 Name of SQL table.

2340 if_exists : {'fail', 'replace', 'append'}, default 'fail'

2341 - fail: If table exists, do nothing.

2342 - replace: If table exists, drop it, recreate it, and insert data.

2343 - append: If table exists, insert data. Create if does not exist.

2344 index : boolean, default True

2345 Write DataFrame index as a column.

2346 index_label : string or sequence, default None

2347 Raises NotImplementedError

2348 schema : string, default None

2349 Name of SQL schema in database to write to (if database flavor

2350 supports this). If specified, this overwrites the default

2351 schema of the SQLDatabase object.

2352 chunksize : int, default None

2353 Raises NotImplementedError

2354 dtype : single type or dict of column name to SQL type, default None

2355 Raises NotImplementedError

2356 method : {None', 'multi', callable}, default None

2357 Raises NotImplementedError

2358 engine : {'auto', 'sqlalchemy'}, default 'auto'

2359 Raises NotImplementedError if not set to 'auto'

2360 """

2361 if index_label:

2362 raise NotImplementedError(

2363 "'index_label' is not implemented for ADBC drivers"

2364 )

2365 if chunksize:

2366 raise NotImplementedError("'chunksize' is not implemented for ADBC drivers")

2367 if dtype:

2368 raise NotImplementedError("'dtype' is not implemented for ADBC drivers")

2369 if method:

2370 raise NotImplementedError("'method' is not implemented for ADBC drivers")

2371 if engine != "auto":

2372 raise NotImplementedError(

2373 "engine != 'auto' not implemented for ADBC drivers"

2374 )

2375

2376 if schema:

2377 table_name = f"{schema}.{name}"

2378 else:

2379 table_name = name

2380

2381 # pandas if_exists="append" will still create the

2382 # table if it does not exist; ADBC is more explicit with append/create

2383 # as applicable modes, so the semantics get blurred across

2384 # the libraries

2385 mode = "create"

2386 if self.has_table(name, schema):

2387 if if_exists == "fail":

2388 raise ValueError(f"Table '{table_name}' already exists.")

2389 elif if_exists == "replace":

2390 with self.con.cursor() as cur:

2391 cur.execute(f"DROP TABLE {table_name}")

2392 elif if_exists == "append":

2393 mode = "append"

2394

2395 import pyarrow as pa

2396

2397 try:

2398 tbl = pa.Table.from_pandas(frame, preserve_index=index)

2399 except pa.ArrowNotImplementedError as exc:

2400 raise ValueError("datatypes not supported") from exc

2401

2402 with self.con.cursor() as cur:

2403 total_inserted = cur.adbc_ingest(

2404 table_name=name, data=tbl, mode=mode, db_schema_name=schema

2405 )

2406

2407 self.con.commit()

2408 return total_inserted

2409

2410 def has_table(self, name: str, schema: str | None = None) -> bool:

2411 meta = self.con.adbc_get_objects(

2412 db_schema_filter=schema, table_name_filter=name

2413 ).read_all()

2414

2415 for catalog_schema in meta["catalog_db_schemas"].to_pylist():

2416 if not catalog_schema:

2417 continue

2418 for schema_record in catalog_schema:

2419 if not schema_record:

2420 continue

2421

2422 for table_record in schema_record["db_schema_tables"]:

2423 if table_record["table_name"] == name:

2424 return True

2425

2426 return False

2427

2428 def _create_sql_schema(

2429 self,

2430 frame: DataFrame,

2431 table_name: str,

2432 keys: list[str] | None = None,

2433 dtype: DtypeArg | None = None,

2434 schema: str | None = None,

2435 ) -> str:

2436 raise NotImplementedError("not implemented for adbc")

2437

2438

2439# sqlite-specific sql strings and handler class

2440# dictionary used for readability purposes

2441_SQL_TYPES = {

2442 "string": "TEXT",

2443 "floating": "REAL",

2444 "integer": "INTEGER",

2445 "datetime": "TIMESTAMP",

2446 "date": "DATE",

2447 "time": "TIME",

2448 "boolean": "INTEGER",

2449}

2450

2451

2452def _get_unicode_name(name: object):

2453 try:

2454 uname = str(name).encode("utf-8", "strict").decode("utf-8")

2455 except UnicodeError as err:

2456 raise ValueError(f"Cannot convert identifier to UTF-8: '{name}'") from err

2457 return uname

2458

2459

2460def _get_valid_sqlite_name(name: object):

2461 # See https://stackoverflow.com/questions/6514274/how-do-you-escape-strings\

2462 # -for-sqlite-table-column-names-in-python

2463 # Ensure the string can be encoded as UTF-8.

2464 # Ensure the string does not include any NUL characters.

2465 # Replace all " with "".

2466 # Wrap the entire thing in double quotes.

2467

2468 uname = _get_unicode_name(name)

2469 if not len(uname):

2470 raise ValueError("Empty table or column name specified")

2471

2472 nul_index = uname.find("\x00")

2473 if nul_index >= 0:

2474 raise ValueError("SQLite identifier cannot contain NULs")

2475 return '"' + uname.replace('"', '""') + '"'

2476

2477

2478class SQLiteTable(SQLTable):

2479 """

2480 Patch the SQLTable for fallback support.

2481 Instead of a table variable just use the Create Table statement.

2482 """

2483

2484 def __init__(self, *args, **kwargs) -> None:

2485 super().__init__(*args, **kwargs)

2486

2487 self._register_date_adapters()

2488

2489 def _register_date_adapters(self) -> None:

2490 # GH 8341

2491 # register an adapter callable for datetime.time object

2492 import sqlite3

2493

2494 # this will transform time(12,34,56,789) into '12:34:56.000789'

2495 # (this is what sqlalchemy does)

2496 def _adapt_time(t) -> str:

2497 # This is faster than strftime

2498 return f"{t.hour:02d}:{t.minute:02d}:{t.second:02d}.{t.microsecond:06d}"

2499

2500 # Also register adapters for date/datetime and co

2501 # xref https://docs.python.org/3.12/library/sqlite3.html#adapter-and-converter-recipes

2502 # Python 3.12+ doesn't auto-register adapters for us anymore

2503

2504 adapt_date_iso = lambda val: val.isoformat()

2505 adapt_datetime_iso = lambda val: val.isoformat(" ")

2506

2507 sqlite3.register_adapter(time, _adapt_time)

2508

2509 sqlite3.register_adapter(date, adapt_date_iso)

2510 sqlite3.register_adapter(datetime, adapt_datetime_iso)

2511

2512 convert_date = lambda val: date.fromisoformat(val.decode())

2513 convert_timestamp = lambda val: datetime.fromisoformat(val.decode())

2514

2515 sqlite3.register_converter("date", convert_date)

2516 sqlite3.register_converter("timestamp", convert_timestamp)

2517

2518 def sql_schema(self) -> str:

2519 return str(";\n".join(self.table))

2520

2521 def _execute_create(self) -> None:

2522 with self.pd_sql.run_transaction() as conn:

2523 for stmt in self.table:

2524 conn.execute(stmt)

2525

2526 def insert_statement(self, *, num_rows: int) -> str:

2527 names = list(map(str, self.frame.columns))

2528 wld = "?" # wildcard char

2529 escape = _get_valid_sqlite_name

2530

2531 if self.index is not None:

2532 for idx in self.index[::-1]:

2533 names.insert(0, idx)

2534

2535 bracketed_names = [escape(column) for column in names]

2536 col_names = ",".join(bracketed_names)

2537

2538 row_wildcards = ",".join([wld] * len(names))

2539 wildcards = ",".join([f"({row_wildcards})" for _ in range(num_rows)])

2540 insert_statement = (

2541 f"INSERT INTO {escape(self.name)} ({col_names}) VALUES {wildcards}"

2542 )

2543 return insert_statement

2544

2545 def _execute_insert(self, conn, keys, data_iter) -> int:

2546 data_list = list(data_iter)

2547 conn.executemany(self.insert_statement(num_rows=1), data_list)

2548 return conn.rowcount

2549

2550 def _execute_insert_multi(self, conn, keys, data_iter) -> int:

2551 data_list = list(data_iter)

2552 flattened_data = [x for row in data_list for x in row]

2553 conn.execute(self.insert_statement(num_rows=len(data_list)), flattened_data)

2554 return conn.rowcount

2555

2556 def _create_table_setup(self):

2557 """

2558 Return a list of SQL statements that creates a table reflecting the

2559 structure of a DataFrame. The first entry will be a CREATE TABLE

2560 statement while the rest will be CREATE INDEX statements.

2561 """

2562 column_names_and_types = self._get_column_names_and_types(self._sql_type_name)

2563 escape = _get_valid_sqlite_name

2564

2565 create_tbl_stmts = [

2566 escape(cname) + " " + ctype for cname, ctype, _ in column_names_and_types

2567 ]

2568

2569 if self.keys is not None and len(self.keys):

2570 if not is_list_like(self.keys):

2571 keys = [self.keys]

2572 else:

2573 keys = self.keys

2574 cnames_br = ", ".join([escape(c) for c in keys])

2575 create_tbl_stmts.append(

2576 f"CONSTRAINT {self.name}_pk PRIMARY KEY ({cnames_br})"

2577 )

2578 if self.schema:

2579 schema_name = self.schema + "."

2580 else:

2581 schema_name = ""

2582 create_stmts = [

2583 "CREATE TABLE "

2584 + schema_name

2585 + escape(self.name)

2586 + " (\n"

2587 + ",\n ".join(create_tbl_stmts)

2588 + "\n)"

2589 ]

2590

2591 ix_cols = [cname for cname, _, is_index in column_names_and_types if is_index]

2592 if len(ix_cols):

2593 cnames = "_".join(ix_cols)

2594 cnames_br = ",".join([escape(c) for c in ix_cols])

2595 create_stmts.append(

2596 "CREATE INDEX "

2597 + escape("ix_" + self.name + "_" + cnames)

2598 + "ON "

2599 + escape(self.name)

2600 + " ("

2601 + cnames_br

2602 + ")"

2603 )

2604

2605 return create_stmts

2606

2607 def _sql_type_name(self, col):

2608 dtype: DtypeArg = self.dtype or {}

2609 if is_dict_like(dtype):

2610 dtype = cast(dict, dtype)

2611 if col.name in dtype:

2612 return dtype[col.name]

2613

2614 # Infer type of column, while ignoring missing values.

2615 # Needed for inserting typed data containing NULLs, GH 8778.

2616 col_type = lib.infer_dtype(col, skipna=True)

2617

2618 if col_type == "timedelta64":

2619 warnings.warn(

2620 "the 'timedelta' type is not supported, and will be "

2621 "written as integer values (ns frequency) to the database.",

2622 UserWarning,

2623 stacklevel=find_stack_level(),

2624 )

2625 col_type = "integer"

2626

2627 elif col_type == "datetime64":

2628 col_type = "datetime"

2629

2630 elif col_type == "empty":

2631 col_type = "string"

2632

2633 elif col_type == "complex":

2634 raise ValueError("Complex datatypes not supported")

2635

2636 if col_type not in _SQL_TYPES:

2637 col_type = "string"

2638

2639 return _SQL_TYPES[col_type]

2640

2641

2642class SQLiteDatabase(PandasSQL):

2643 """

2644 Version of SQLDatabase to support SQLite connections (fallback without

2645 SQLAlchemy). This should only be used internally.

2646

2647 Parameters

2648 ----------

2649 con : sqlite connection object

2650

2651 """

2652

2653 def __init__(self, con) -> None:

2654 self.con = con

2655

2656 @contextmanager

2657 def run_transaction(self):

2658 cur = self.con.cursor()

2659 try:

2660 yield cur

2661 self.con.commit()

2662 except Exception:

2663 self.con.rollback()

2664 raise

2665 finally:

2666 cur.close()

2667

2668 def execute(self, sql: str | Select | TextClause, params=None):

2669 if not isinstance(sql, str):

2670 raise TypeError("Query must be a string unless using sqlalchemy.")

2671 args = [] if params is None else [params]

2672 cur = self.con.cursor()

2673 try:

2674 cur.execute(sql, *args)

2675 return cur

2676 except Exception as exc:

2677 try:

2678 self.con.rollback()

2679 except Exception as inner_exc: # pragma: no cover

2680 ex = DatabaseError(

2681 f"Execution failed on sql: {sql}\n{exc}\nunable to rollback"

2682 )

2683 raise ex from inner_exc

2684

2685 ex = DatabaseError(f"Execution failed on sql '{sql}': {exc}")

2686 raise ex from exc

2687

2688 @staticmethod

2689 def _query_iterator(

2690 cursor,

2691 chunksize: int,

2692 columns,

2693 index_col=None,

2694 coerce_float: bool = True,

2695 parse_dates=None,

2696 dtype: DtypeArg | None = None,

2697 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

2698 ):

2699 """Return generator through chunked result set"""

2700 has_read_data = False

2701 while True:

2702 data = cursor.fetchmany(chunksize)

2703 if type(data) == tuple:

2704 data = list(data)

2705 if not data:

2706 cursor.close()

2707 if not has_read_data:

2708 result = DataFrame.from_records(

2709 [], columns=columns, coerce_float=coerce_float

2710 )

2711 if dtype:

2712 result = result.astype(dtype)

2713 yield result

2714 break

2715

2716 has_read_data = True

2717 yield _wrap_result(

2718 data,

2719 columns,

2720 index_col=index_col,

2721 coerce_float=coerce_float,

2722 parse_dates=parse_dates,

2723 dtype=dtype,

2724 dtype_backend=dtype_backend,

2725 )

2726

2727 def read_query(

2728 self,

2729 sql,

2730 index_col=None,

2731 coerce_float: bool = True,

2732 parse_dates=None,

2733 params=None,

2734 chunksize: int | None = None,

2735 dtype: DtypeArg | None = None,

2736 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",

2737 ) -> DataFrame | Iterator[DataFrame]:

2738 cursor = self.execute(sql, params)

2739 columns = [col_desc[0] for col_desc in cursor.description]

2740

2741 if chunksize is not None:

2742 return self._query_iterator(

2743 cursor,

2744 chunksize,

2745 columns,

2746 index_col=index_col,

2747 coerce_float=coerce_float,

2748 parse_dates=parse_dates,

2749 dtype=dtype,

2750 dtype_backend=dtype_backend,

2751 )

2752 else:

2753 data = self._fetchall_as_list(cursor)

2754 cursor.close()

2755

2756 frame = _wrap_result(

2757 data,

2758 columns,

2759 index_col=index_col,

2760 coerce_float=coerce_float,

2761 parse_dates=parse_dates,

2762 dtype=dtype,

2763 dtype_backend=dtype_backend,

2764 )

2765 return frame

2766

2767 def _fetchall_as_list(self, cur):

2768 result = cur.fetchall()

2769 if not isinstance(result, list):

2770 result = list(result)

2771 return result

2772

2773 def to_sql(

2774 self,

2775 frame,

2776 name: str,

2777 if_exists: str = "fail",

2778 index: bool = True,

2779 index_label=None,

2780 schema=None,

2781 chunksize: int | None = None,

2782 dtype: DtypeArg | None = None,

2783 method: Literal["multi"] | Callable | None = None,

2784 engine: str = "auto",

2785 **engine_kwargs,

2786 ) -> int | None:

2787 """

2788 Write records stored in a DataFrame to a SQL database.

2789

2790 Parameters

2791 ----------

2792 frame: DataFrame

2793 name: string

2794 Name of SQL table.

2795 if_exists: {'fail', 'replace', 'append'}, default 'fail'

2796 fail: If table exists, do nothing.

2797 replace: If table exists, drop it, recreate it, and insert data.

2798 append: If table exists, insert data. Create if it does not exist.

2799 index : bool, default True

2800 Write DataFrame index as a column

2801 index_label : string or sequence, default None

2802 Column label for index column(s). If None is given (default) and

2803 `index` is True, then the index names are used.

2804 A sequence should be given if the DataFrame uses MultiIndex.

2805 schema : string, default None

2806 Ignored parameter included for compatibility with SQLAlchemy

2807 version of ``to_sql``.

2808 chunksize : int, default None

2809 If not None, then rows will be written in batches of this

2810 size at a time. If None, all rows will be written at once.

2811 dtype : single type or dict of column name to SQL type, default None

2812 Optional specifying the datatype for columns. The SQL type should

2813 be a string. If all columns are of the same type, one single value

2814 can be used.

2815 method : {None, 'multi', callable}, default None

2816 Controls the SQL insertion clause used:

2817

2818 * None : Uses standard SQL ``INSERT`` clause (one per row).

2819 * 'multi': Pass multiple values in a single ``INSERT`` clause.

2820 * callable with signature ``(pd_table, conn, keys, data_iter)``.

2821

2822 Details and a sample callable implementation can be found in the

2823 section :ref:`insert method <io.sql.method>`.

2824 """

2825 if dtype:

2826 if not is_dict_like(dtype):

2827 # error: Value expression in dictionary comprehension has incompatible

2828 # type "Union[ExtensionDtype, str, dtype[Any], Type[object],

2829 # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]],

2830 # Type[str], Type[float], Type[int], Type[complex], Type[bool],

2831 # Type[object]]]]"; expected type "Union[ExtensionDtype, str,

2832 # dtype[Any], Type[object]]"

2833 dtype = {col_name: dtype for col_name in frame} # type: ignore[misc]

2834 else:

2835 dtype = cast(dict, dtype)

2836

2837 for col, my_type in dtype.items():

2838 if not isinstance(my_type, str):

2839 raise ValueError(f"{col} ({my_type}) not a string")

2840

2841 table = SQLiteTable(

2842 name,

2843 self,

2844 frame=frame,

2845 index=index,

2846 if_exists=if_exists,

2847 index_label=index_label,

2848 dtype=dtype,

2849 )

2850 table.create()

2851 return table.insert(chunksize, method)

2852

2853 def has_table(self, name: str, schema: str | None = None) -> bool:

2854 wld = "?"

2855 query = f"""

2856 SELECT

2857 name

2858 FROM

2859 sqlite_master

2860 WHERE

2861 type IN ('table', 'view')

2862 AND name={wld};

2863 """

2864

2865 return len(self.execute(query, [name]).fetchall()) > 0

2866

2867 def get_table(self, table_name: str, schema: str | None = None) -> None:

2868 return None # not supported in fallback mode

2869

2870 def drop_table(self, name: str, schema: str | None = None) -> None:

2871 drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}"

2872 self.execute(drop_sql)

2873

2874 def _create_sql_schema(

2875 self,

2876 frame,

2877 table_name: str,

2878 keys=None,

2879 dtype: DtypeArg | None = None,

2880 schema: str | None = None,

2881 ) -> str:

2882 table = SQLiteTable(

2883 table_name,

2884 self,

2885 frame=frame,

2886 index=False,

2887 keys=keys,

2888 dtype=dtype,

2889 schema=schema,

2890 )

2891 return str(table.sql_schema())

2892

2893

2894def get_schema(

2895 frame,

2896 name: str,

2897 keys=None,

2898 con=None,

2899 dtype: DtypeArg | None = None,

2900 schema: str | None = None,

2901) -> str:

2902 """

2903 Get the SQL db table schema for the given frame.

2904

2905 Parameters

2906 ----------

2907 frame : DataFrame

2908 name : str

2909 name of SQL table

2910 keys : string or sequence, default: None

2911 columns to use a primary key

2912 con: ADBC Connection, SQLAlchemy connectable, sqlite3 connection, default: None

2913 ADBC provides high performance I/O with native type support, where available.

2914 Using SQLAlchemy makes it possible to use any DB supported by that

2915 library

2916 If a DBAPI2 object, only sqlite3 is supported.

2917 dtype : dict of column name to SQL type, default None

2918 Optional specifying the datatype for columns. The SQL type should

2919 be a SQLAlchemy type, or a string for sqlite3 fallback connection.

2920 schema: str, default: None

2921 Optional specifying the schema to be used in creating the table.

2922 """

2923 with pandasSQL_builder(con=con) as pandas_sql:

2924 return pandas_sql._create_sql_schema(

2925 frame, name, keys=keys, dtype=dtype, schema=schema

2926 )