{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pandas\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Pandas](https://pandas.pydata.org/docs/) is a tool Python-based data analysis and manipulation\n", "* designed for working with heterogeneous data\n", "* well suited for data importing, aggregation and cleaning\n", "* quick visualizations of data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The best of pandas" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "df = pd.read_csv(\"titanic.csv\", sep=\"\\t\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(pandas.core.frame.DataFrame, (156, 12))" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(df), df.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
5603Moran, Mr. JamesmaleNaN003308778.4583NaNQ
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.1333NaNS
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.0708NaNC
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "5 6 0 3 \n", "6 7 0 1 \n", "7 8 0 3 \n", "8 9 1 3 \n", "9 10 1 2 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "5 Moran, Mr. James male NaN 0 \n", "6 McCarthy, Mr. Timothy J male 54.0 0 \n", "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S \n", "5 0 330877 8.4583 NaN Q \n", "6 0 17463 51.8625 E46 S \n", "7 1 349909 21.0750 NaN S \n", "8 2 347742 11.1333 NaN S \n", "9 0 237736 30.0708 NaN C " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassAgeSibSpParchFare
count156.000000156.000000156.000000126.000000156.000000156.000000156.000000
mean78.5000000.3461542.42307728.1415080.6153850.39743628.109587
std45.1774280.4772750.79545914.6138801.0562350.87014639.401047
min1.0000000.0000001.0000000.8300000.0000000.0000006.750000
25%39.7500000.0000002.00000019.0000000.0000000.0000008.003150
50%78.5000000.0000003.00000026.0000000.0000000.00000014.454200
75%117.2500001.0000003.00000035.0000001.0000000.00000030.371850
max156.0000001.0000003.00000071.0000005.0000005.000000263.000000
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass Age SibSp \\\n", "count 156.000000 156.000000 156.000000 126.000000 156.000000 \n", "mean 78.500000 0.346154 2.423077 28.141508 0.615385 \n", "std 45.177428 0.477275 0.795459 14.613880 1.056235 \n", "min 1.000000 0.000000 1.000000 0.830000 0.000000 \n", "25% 39.750000 0.000000 2.000000 19.000000 0.000000 \n", "50% 78.500000 0.000000 3.000000 26.000000 0.000000 \n", "75% 117.250000 1.000000 3.000000 35.000000 1.000000 \n", "max 156.000000 1.000000 3.000000 71.000000 5.000000 \n", "\n", " Parch Fare \n", "count 156.000000 156.000000 \n", "mean 0.397436 28.109587 \n", "std 0.870146 39.401047 \n", "min 0.000000 6.750000 \n", "25% 0.000000 8.003150 \n", "50% 0.000000 14.454200 \n", "75% 0.000000 30.371850 \n", "max 5.000000 263.000000 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 156 entries, 0 to 155\n", "Data columns (total 12 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 PassengerId 156 non-null int64 \n", " 1 Survived 156 non-null int64 \n", " 2 Pclass 156 non-null int64 \n", " 3 Name 156 non-null object \n", " 4 Sex 156 non-null object \n", " 5 Age 126 non-null float64\n", " 6 SibSp 156 non-null int64 \n", " 7 Parch 156 non-null int64 \n", " 8 Ticket 156 non-null object \n", " 9 Fare 156 non-null float64\n", " 10 Cabin 31 non-null object \n", " 11 Embarked 155 non-null object \n", "dtypes: float64(2), int64(5), object(5)\n", "memory usage: 14.8+ KB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Select columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use syntax `df[[col1, ..., colN]]`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 22.0\n", "1 38.0\n", "2 26.0\n", "3 35.0\n", "4 35.0\n", " ... \n", "151 22.0\n", "152 55.5\n", "153 40.5\n", "154 NaN\n", "155 51.0\n", "Name: Age, Length: 156, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df['Age']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Age
022.0
138.0
226.0
335.0
435.0
......
15122.0
15255.5
15340.5
154NaN
15551.0
\n", "

156 rows × 1 columns

\n", "
" ], "text/plain": [ " Age\n", "0 22.0\n", "1 38.0\n", "2 26.0\n", "3 35.0\n", "4 35.0\n", ".. ...\n", "151 22.0\n", "152 55.5\n", "153 40.5\n", "154 NaN\n", "155 51.0\n", "\n", "[156 rows x 1 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df[['Age']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(pandas.core.frame.DataFrame,\n", " pandas.core.series.Series,\n", " pandas.core.frame.DataFrame)" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "type(df), type(df['Age']), type(df[['Age']])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Indexing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
787912Caldwell, Master. Alden Gatesmale0.830224873829.0000NaNS
11912003Andersson, Miss. Ellis Anna Mariafemale2.004234708231.2750NaNS
7803Palsson, Master. Gosta Leonardmale2.003134990921.0750NaNS
161703Rice, Master. Eugenemale2.004138265229.1250NaNQ
434412Laroche, Miss. Simonne Marie Anne Andreefemale3.0012SC/Paris 212341.5792NaNC
636403Skoog, Master. Haraldmale4.003234708827.9000NaNS
101113Sandstrom, Miss. Marguerite Rutfemale4.0011PP 954916.7000G6S
585912West, Miss. Constance Miriumfemale5.0012C.A. 3465127.7500NaNS
505103Panula, Master. Juha Niilomale7.0041310129539.6875NaNS
242503Palsson, Miss. Torborg Danirafemale8.003134990921.0750NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass Name \\\n", "78 79 1 2 Caldwell, Master. Alden Gates \n", "119 120 0 3 Andersson, Miss. Ellis Anna Maria \n", "7 8 0 3 Palsson, Master. Gosta Leonard \n", "16 17 0 3 Rice, Master. Eugene \n", "43 44 1 2 Laroche, Miss. Simonne Marie Anne Andree \n", "63 64 0 3 Skoog, Master. Harald \n", "10 11 1 3 Sandstrom, Miss. Marguerite Rut \n", "58 59 1 2 West, Miss. Constance Mirium \n", "50 51 0 3 Panula, Master. Juha Niilo \n", "24 25 0 3 Palsson, Miss. Torborg Danira \n", "\n", " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", "78 male 0.83 0 2 248738 29.0000 NaN S \n", "119 female 2.00 4 2 347082 31.2750 NaN S \n", "7 male 2.00 3 1 349909 21.0750 NaN S \n", "16 male 2.00 4 1 382652 29.1250 NaN Q \n", "43 female 3.00 1 2 SC/Paris 2123 41.5792 NaN C \n", "63 male 4.00 3 2 347088 27.9000 NaN S \n", "10 female 4.00 1 1 PP 9549 16.7000 G6 S \n", "58 female 5.00 1 2 C.A. 34651 27.7500 NaN S \n", "50 male 7.00 4 1 3101295 39.6875 NaN S \n", "24 female 8.00 3 1 349909 21.0750 NaN S " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.sort_values(\"Age\", inplace=True)\n", "df.head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
10110203Petroff, Mr. Pastcho (\"Pentcho\")maleNaN003492157.8958NaNS
10710813Moss, Mr. Albert JohanmaleNaN003129917.7750NaNS
10911013Moran, Miss. BerthafemaleNaN1037111024.1500NaNQ
12112203Moore, Mr. Leonard CharlesmaleNaN00A4. 545108.0500NaNS
12612703McMahon, Mr. MartinmaleNaN003703727.7500NaNQ
12812913Peter, Miss. AnnafemaleNaN11266822.3583F E69C
14014103Boulos, Mrs. Joseph (Sultana)femaleNaN02267815.2458NaNC
15415503Olsen, Mr. Ole MartinmaleNaN00Fa 2653027.3125NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass Name Sex \\\n", "101 102 0 3 Petroff, Mr. Pastcho (\"Pentcho\") male \n", "107 108 1 3 Moss, Mr. Albert Johan male \n", "109 110 1 3 Moran, Miss. Bertha female \n", "121 122 0 3 Moore, Mr. Leonard Charles male \n", "126 127 0 3 McMahon, Mr. Martin male \n", "128 129 1 3 Peter, Miss. Anna female \n", "140 141 0 3 Boulos, Mrs. Joseph (Sultana) female \n", "154 155 0 3 Olsen, Mr. Ole Martin male \n", "\n", " Age SibSp Parch Ticket Fare Cabin Embarked \n", "101 NaN 0 0 349215 7.8958 NaN S \n", "107 NaN 0 0 312991 7.7750 NaN S \n", "109 NaN 1 0 371110 24.1500 NaN Q \n", "121 NaN 0 0 A4. 54510 8.0500 NaN S \n", "126 NaN 0 0 370372 7.7500 NaN Q \n", "128 NaN 1 1 2668 22.3583 F E69 C \n", "140 NaN 0 2 2678 15.2458 NaN C \n", "154 NaN 0 0 Fa 265302 7.3125 NaN S " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.tail(8)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 134\n", "Survived 1\n", "Pclass 2\n", "Name Weisz, Mrs. Leopold (Mathilde Francoise Pede)\n", "Sex female\n", "Age 29.0\n", "SibSp 1\n", "Parch 0\n", "Ticket 228414\n", "Fare 26.0\n", "Cabin NaN\n", "Embarked S\n", "Name: 133, dtype: object" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# access by index\n", "df.iloc[78]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PassengerId 79\n", "Survived 1\n", "Pclass 2\n", "Name Caldwell, Master. Alden Gates\n", "Sex male\n", "Age 0.83\n", "SibSp 0\n", "Parch 2\n", "Ticket 248738\n", "Fare 29.0\n", "Cabin NaN\n", "Embarked S\n", "Name: 78, dtype: object" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# access by label\n", "df.loc[78]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeCabin
780.83NaN
7930.00NaN
10028.00NaN
\n", "
" ], "text/plain": [ " Age Cabin\n", "78 0.83 NaN\n", "79 30.00 NaN\n", "100 28.00 NaN" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# multiple indexing\n", "df.loc[[78, 79, 100], [\"Age\", \"Cabin\"]] " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## `pd.Series`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1-d slice of dataframes has type `pd.Series`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.83, 2. , 2. , 2. , 3. ])" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df[\"Age\"].head(5).values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get access to index" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([78, 119, 7, 16, 43], dtype='int64')" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df[\"Age\"].head(5).index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Creating `pd.Series`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Red 1\n", "Green 2\n", "Blue 3\n", "dtype: int64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pd.Series([1, 2, 3], index=[\"Red\", \"Green\", \"Blue\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Red 1\n", "Green 1\n", "Blue 1\n", "dtype: int64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pd.Series(1, index=[\"Red\", \"Green\", \"Blue\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Convert Series to DataFrame" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.frame.DataFrame" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "s = pd.Series([1, 2, 3], index=[\"Red\", \"Green\", \"Blue\"])\n", "type(s.to_frame(\"Values\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### NaN's" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "78 NaN\n", "119 NaN\n", "7 NaN\n", "16 NaN\n", "43 NaN\n", "63 NaN\n", "10 G6\n", "58 NaN\n", "50 NaN\n", "24 NaN\n", "Name: Cabin, dtype: object" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df[\"Cabin\"].head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10 G6\n", "136 D47\n", "27 C23 C25 C27\n", "102 D26\n", "151 C2\n", "88 C23 C25 C27\n", "97 D10 D12\n", "118 B58 B60\n", "139 B86\n", "75 F G73\n", "Name: Cabin, dtype: object" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df[\"Cabin\"].dropna().head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "78 3\n", "119 3\n", "7 3\n", "16 3\n", "43 3\n", "63 3\n", "10 G6\n", "58 3\n", "50 3\n", "24 3\n", "Name: Cabin, dtype: object" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df[\"Cabin\"].fillna(3).head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "78 G6\n", "119 G6\n", "7 G6\n", "16 G6\n", "43 G6\n", "63 G6\n", "10 G6\n", "58 D47\n", "50 D47\n", "24 D47\n", "Name: Cabin, dtype: object" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df[\"Cabin\"].fillna(method=\"bfill\").head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "78 True\n", "119 True\n", "7 True\n", "16 True\n", "43 True\n", "63 True\n", "10 False\n", "58 True\n", "50 True\n", "24 True\n", "Name: Cabin, dtype: bool" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pd.isna(df[\"Cabin\"]).head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Визуализация" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.sort_index()[\"Fare\"].plot();" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAANhElEQVR4nO3df6xfdX3H8edLKgNaxw9Zbkhhu2wQDYEx9YpsbMtFyIK6CNucwTWzmGbNMkUzMLPTbCxhbrKNOUb2I40wuqQRFcnKdNORyp2bU2YrYoXiaPhlWaUafoyLZq7xvT/uYbtpLtz2nO+33/rh+Uhu7vec7+ecz6f943lPz/1+v01VIUlq04smvQBJ0vgYeUlqmJGXpIYZeUlqmJGXpIatmPQCAE488cSanp7udewzzzzDypUrR7sgSTpEhjRs+/bt36qqH3q+MYdF5Kenp9m2bVuvY+fm5pidnR3tgiTpEBnSsCQPLzfG2zWS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNWzbySW5MsjfJVxftOyHJ7Unu774f3+1Pkj9PsivJV5K8cpyLlyQ9vwO5kr8JuGi/fRuArVV1OrC12wZ4HXB697Ue+KvRLFOS1Meyka+qzwKP77f7YmBT93gTcMmi/X9bC74AHJfkpBGtVZJ0kPq+43WqqvZ0j78BTHWPVwNfXzRud7dvD/tJsp6Fq32mpqaYm5vrtZC9jz/F9Zu39Dp2qLNWHzuReSW1Y35+vnf/DsTgjzWoqkpy0P+9VFVtBDYCzMzMVN+39V6/eQvX7pjMpzM8tGZ2IvNKase4P5ql76trHnv2Nkz3fW+3/1HglEXjTu72SZImoG/kbwPWdo/XAlsW7X9r9yqbc4GnFt3WkSQdYsve50jyYWAWODHJbuAq4APAR5OsAx4G3twN/wfg9cAu4NvA28awZknSAVo28lX1lud46oIlxhbw9qGLkiSNhu94laSGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJapiRl6SGGXlJatigyCf5zST3JPlqkg8nOSrJqUnuTLIryUeSHDmqxUqSDk7vyCdZDbwTmKmqM4EjgEuBa4APVtVpwBPAulEsVJJ08IberlkBHJ1kBXAMsAd4LXBL9/wm4JKBc0iSelrR98CqejTJnwCPAN8B/gnYDjxZVfu6YbuB1Usdn2Q9sB5gamqKubm5XuuYOhquPGvf8gPHoO+aJelZ8/PzY21J78gnOR64GDgVeBL4GHDRgR5fVRuBjQAzMzM1Ozvbax3Xb97CtTt6/zEGeWjN7ETmldSOubk5+vbvQAy5XXMh8GBVfbOq/ge4FTgPOK67fQNwMvDowDVKknoaEvlHgHOTHJMkwAXAvcAdwJu6MWuBLcOWKEnqq3fkq+pOFn7B+iVgR3eujcB7gCuS7AJeCtwwgnVKknoYdDO7qq4Crtpv9wPAOUPOK0kaDd/xKkkNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1DAjL0kNM/KS1LBBkU9yXJJbktyXZGeSn0xyQpLbk9zffT9+VIuVJB2coVfy1wGfqqqXA2cDO4ENwNaqOh3Y2m1Lkiagd+STHAv8LHADQFV9t6qeBC4GNnXDNgGXDFuiJKmvIVfypwLfBP4myV1JPpRkJTBVVXu6Md8ApoYuUpLUz4qBx74SuLyq7kxyHfvdmqmqSlJLHZxkPbAeYGpqirm5uV6LmDoarjxrX69jh+q7Zkl61vz8/FhbMiTyu4HdVXVnt30LC5F/LMlJVbUnyUnA3qUOrqqNwEaAmZmZmp2d7bWI6zdv4dodQ/4Y/T20ZnYi80pqx9zcHH37dyB6366pqm8AX0/ysm7XBcC9wG3A2m7fWmDLoBVKknobegl8ObA5yZHAA8DbWPjB8dEk64CHgTcPnEOS1NOgyFfVl4GZJZ66YMh5JUmj4TteJalhRl6SGmbkJalhk3ntoSQdRqY3fHJic9900cqxnt8reUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlqmJGXpIYZeUlq2ODIJzkiyV1JPtFtn5rkziS7knwkyZHDlylJ6mMUV/LvAnYu2r4G+GBVnQY8AawbwRySpB4GRT7JycAbgA912wFeC9zSDdkEXDJkDklSfysGHv9nwG8BL+m2Xwo8WVX7uu3dwOqlDkyyHlgPMDU1xdzcXK8FTB0NV561b/mBY9B3zZIOL5NqCMD8/PxYW9I78kl+HthbVduTzB7s8VW1EdgIMDMzU7OzB30KAK7fvIVrdwz9WdXPQ2tmJzKvpNG6bMMnJzb3TRetpG//DsSQOp4HvDHJ64GjgB8ErgOOS7Kiu5o/GXh0+DIlSX30vidfVb9dVSdX1TRwKfCZqloD3AG8qRu2FtgyeJWSpF7G8Tr59wBXJNnFwj36G8YwhyTpAIzkZnZVzQFz3eMHgHNGcV5J0jC+41WSGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhRl6SGmbkJalhvSOf5JQkdyS5N8k9Sd7V7T8hye1J7u++Hz+65UqSDsaQK/l9wJVVdQZwLvD2JGcAG4CtVXU6sLXbliRNQO/IV9WeqvpS9/hpYCewGrgY2NQN2wRcMnCNkqSeUlXDT5JMA58FzgQeqarjuv0Bnnh2e79j1gPrAaampl51880395p77+NP8dh3eh062Fmrj53MxJJGasejT01s7lOPPYJVq1b1Ovb888/fXlUzzzdmcOSTrAL+GXh/Vd2a5MnFUU/yRFU97335mZmZ2rZtW6/5r9+8hWt3rOh17FAPfeANE5lX0mhNb/jkxOa+6aKVzM7O9jo2ybKRH/TqmiQvBj4ObK6qW7vdjyU5qXv+JGDvkDkkSf0NeXVNgBuAnVX1p4ueug1Y2z1eC2zpvzxJ0hBD7nOcB/wqsCPJl7t97wU+AHw0yTrgYeDNg1YoSeqtd+Sr6l+BPMfTF/Q9ryRpdHzHqyQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1zMhLUsOMvCQ1bCyRT3JRkq8l2ZVkwzjmkCQtb+SRT3IE8BfA64AzgLckOWPU80iSljeOK/lzgF1V9UBVfRe4Gbh4DPNIkpaxYgznXA18fdH2buA1+w9Ksh5Y323OJ/laz/lOBL7V89hBcs0kZpXUkvOvGdSwH1luwDgif0CqaiOwceh5kmyrqpkRLEmSDrlxN2wct2seBU5ZtH1yt0+SdIiNI/JfBE5PcmqSI4FLgdvGMI8kaRkjv11TVfuSvAP4NHAEcGNV3TPqeRYZfMtHkiZorA1LVY3z/JKkCfIdr5LUMCMvSQ1rLvJJZpN8YtLrkPTCkOSdSXYm2Tym8/9eknf3PX5ir5OXpEb8BnBhVe2e9EKWclheySeZTnJfkpuS/EeSzUkuTPK5JPcnOaf7+nySu5L8W5KXLXGelUluTPLv3Tg/XkHSyCT5a+BHgX9M8r6lepPksiR/l+T2JA8leUeSK7oxX0hyQjfu15J8McndST6e5Jgl5vuxJJ9Ksj3JvyR5+XJrPCwj3zkNuBZ4eff1K8BPA+8G3gvcB/xMVb0C+F3gD5Y4x/uAz1TVOcD5wB8nWXkI1i7pBaCqfh34Txb6spLn7s2ZwC8CrwbeD3y7a9fngbd2Y26tqldX1dnATmDdElNuBC6vqlex0MK/XG6Nh/PtmgeragdAknuArVVVSXYA08CxwKYkpwMFvHiJc/wc8MZF97OOAn6Yhb9ASRql5+oNwB1V9TTwdJKngL/v9u8Afrx7fGaS3weOA1ax8F6j/5NkFfBTwMeSPLv7B5Zb1OEc+f9e9Ph7i7a/x8K6r2bhL+4XkkwDc0ucI8AvVVXfDz+TpAO1ZG+SvIblewZwE3BJVd2d5DJgdr/zvwh4sqp+4mAWdTjfrlnOsfz/Z+Jc9hxjPg1cnu7HXpJXHIJ1SXphGtqblwB7krwYWLP/k1X1X8CDSX65O3+SnL3cSb+fI/9HwB8muYvn/hfJ1SzcxvlKd8vn6kO1OEkvOEN78zvAncDnWPid41LWAOuS3A3cwwH8Xx1+rIEkNez7+UpekrQMIy9JDTPyktQwIy9JDTPyktQwIy9JDTPyktSw/wWG0OVbgL9PwAAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df[\"Sex\"].hist();" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "19.493588689617926" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "np.sqrt(0.95) * 20" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "89.78113496070968" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "eps = 0.01\n", "q = 0.95\n", "np.log(eps) / np.log(q)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }