{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "b2c_DuRzIQn9"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "import seaborn as sns\n",
        "import matplotlib.pyplot as plt\n",
        "from sklearn.preprocessing import LabelEncoder, StandardScaler\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df = sns.load_dataset('titanic')\n",
        "# OR load from CSV if downloaded\n",
        "# df = pd.read_csv('titanic.csv')\n",
        "df.head()\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "e-hUE5nyIRs6",
        "outputId": "434cda7c-df23-4c09-ddc6-5473ecc9f729"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \\\n",
              "0         0       3    male  22.0      1      0   7.2500        S  Third   \n",
              "1         1       1  female  38.0      1      0  71.2833        C  First   \n",
              "2         1       3  female  26.0      0      0   7.9250        S  Third   \n",
              "3         1       1  female  35.0      1      0  53.1000        S  First   \n",
              "4         0       3    male  35.0      0      0   8.0500        S  Third   \n",
              "\n",
              "     who  adult_male deck  embark_town alive  alone  \n",
              "0    man        True  NaN  Southampton    no  False  \n",
              "1  woman       False    C    Cherbourg   yes  False  \n",
              "2  woman       False  NaN  Southampton   yes   True  \n",
              "3  woman       False    C  Southampton   yes  False  \n",
              "4    man        True  NaN  Southampton    no   True  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-2b6e4032-5367-455e-893a-202e107c6063\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>survived</th>\n",
              "      <th>pclass</th>\n",
              "      <th>sex</th>\n",
              "      <th>age</th>\n",
              "      <th>sibsp</th>\n",
              "      <th>parch</th>\n",
              "      <th>fare</th>\n",
              "      <th>embarked</th>\n",
              "      <th>class</th>\n",
              "      <th>who</th>\n",
              "      <th>adult_male</th>\n",
              "      <th>deck</th>\n",
              "      <th>embark_town</th>\n",
              "      <th>alive</th>\n",
              "      <th>alone</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>male</td>\n",
              "      <td>22.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>7.2500</td>\n",
              "      <td>S</td>\n",
              "      <td>Third</td>\n",
              "      <td>man</td>\n",
              "      <td>True</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>no</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>female</td>\n",
              "      <td>38.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>71.2833</td>\n",
              "      <td>C</td>\n",
              "      <td>First</td>\n",
              "      <td>woman</td>\n",
              "      <td>False</td>\n",
              "      <td>C</td>\n",
              "      <td>Cherbourg</td>\n",
              "      <td>yes</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1</td>\n",
              "      <td>3</td>\n",
              "      <td>female</td>\n",
              "      <td>26.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>7.9250</td>\n",
              "      <td>S</td>\n",
              "      <td>Third</td>\n",
              "      <td>woman</td>\n",
              "      <td>False</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>yes</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>female</td>\n",
              "      <td>35.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>53.1000</td>\n",
              "      <td>S</td>\n",
              "      <td>First</td>\n",
              "      <td>woman</td>\n",
              "      <td>False</td>\n",
              "      <td>C</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>yes</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>male</td>\n",
              "      <td>35.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>8.0500</td>\n",
              "      <td>S</td>\n",
              "      <td>Third</td>\n",
              "      <td>man</td>\n",
              "      <td>True</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>no</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2b6e4032-5367-455e-893a-202e107c6063')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-2b6e4032-5367-455e-893a-202e107c6063 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-2b6e4032-5367-455e-893a-202e107c6063');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    <div id=\"df-3071312e-5ca7-4f90-8e9e-7ea47ccd52b6\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-3071312e-5ca7-4f90-8e9e-7ea47ccd52b6')\"\n",
              "                title=\"Suggest charts\"\n",
              "                style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "      <script>\n",
              "        async function quickchart(key) {\n",
              "          const quickchartButtonEl =\n",
              "            document.querySelector('#' + key + ' button');\n",
              "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "          try {\n",
              "            const charts = await google.colab.kernel.invokeFunction(\n",
              "                'suggestCharts', [key], {});\n",
              "          } catch (error) {\n",
              "            console.error('Error during call to suggestCharts:', error);\n",
              "          }\n",
              "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "        }\n",
              "        (() => {\n",
              "          let quickchartButtonEl =\n",
              "            document.querySelector('#df-3071312e-5ca7-4f90-8e9e-7ea47ccd52b6 button');\n",
              "          quickchartButtonEl.style.display =\n",
              "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "        })();\n",
              "      </script>\n",
              "    </div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "df",
              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 891,\n  \"fields\": [\n    {\n      \"column\": \"survived\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"pclass\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 1,\n        \"max\": 3,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          3,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sex\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"female\",\n          \"male\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"age\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 14.526497332334044,\n        \"min\": 0.42,\n        \"max\": 80.0,\n        \"num_unique_values\": 88,\n        \"samples\": [\n          0.75,\n          22.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sibsp\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1,\n        \"min\": 0,\n        \"max\": 8,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"parch\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 6,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          0,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"fare\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 49.693428597180905,\n        \"min\": 0.0,\n        \"max\": 512.3292,\n        \"num_unique_values\": 248,\n        \"samples\": [\n          11.2417,\n          51.8625\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embarked\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"S\",\n          \"C\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"class\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Third\",\n          \"First\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"who\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"man\",\n          \"woman\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"adult_male\",\n      \"properties\": {\n        \"dtype\": \"boolean\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          false,\n          true\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"deck\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 7,\n        \"samples\": [\n          \"C\",\n          \"E\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embark_town\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Southampton\",\n          \"Cherbourg\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"alive\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"yes\",\n          \"no\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"alone\",\n      \"properties\": {\n        \"dtype\": \"boolean\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          true,\n          false\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 2
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(df.shape)\n",
        "print(df.info())\n",
        "print(df.describe())\n",
        "print(df.isnull().sum())\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TyUAJtUTITti",
        "outputId": "33b4bc45-90fb-4ab7-cbee-a7d0480e1eab"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(891, 15)\n",
            "<class 'pandas.core.frame.DataFrame'>\n",
            "RangeIndex: 891 entries, 0 to 890\n",
            "Data columns (total 15 columns):\n",
            " #   Column       Non-Null Count  Dtype   \n",
            "---  ------       --------------  -----   \n",
            " 0   survived     891 non-null    int64   \n",
            " 1   pclass       891 non-null    int64   \n",
            " 2   sex          891 non-null    object  \n",
            " 3   age          714 non-null    float64 \n",
            " 4   sibsp        891 non-null    int64   \n",
            " 5   parch        891 non-null    int64   \n",
            " 6   fare         891 non-null    float64 \n",
            " 7   embarked     889 non-null    object  \n",
            " 8   class        891 non-null    category\n",
            " 9   who          891 non-null    object  \n",
            " 10  adult_male   891 non-null    bool    \n",
            " 11  deck         203 non-null    category\n",
            " 12  embark_town  889 non-null    object  \n",
            " 13  alive        891 non-null    object  \n",
            " 14  alone        891 non-null    bool    \n",
            "dtypes: bool(2), category(2), float64(2), int64(4), object(5)\n",
            "memory usage: 80.7+ KB\n",
            "None\n",
            "         survived      pclass         age       sibsp       parch        fare\n",
            "count  891.000000  891.000000  714.000000  891.000000  891.000000  891.000000\n",
            "mean     0.383838    2.308642   29.699118    0.523008    0.381594   32.204208\n",
            "std      0.486592    0.836071   14.526497    1.102743    0.806057   49.693429\n",
            "min      0.000000    1.000000    0.420000    0.000000    0.000000    0.000000\n",
            "25%      0.000000    2.000000   20.125000    0.000000    0.000000    7.910400\n",
            "50%      0.000000    3.000000   28.000000    0.000000    0.000000   14.454200\n",
            "75%      1.000000    3.000000   38.000000    1.000000    0.000000   31.000000\n",
            "max      1.000000    3.000000   80.000000    8.000000    6.000000  512.329200\n",
            "survived         0\n",
            "pclass           0\n",
            "sex              0\n",
            "age            177\n",
            "sibsp            0\n",
            "parch            0\n",
            "fare             0\n",
            "embarked         2\n",
            "class            0\n",
            "who              0\n",
            "adult_male       0\n",
            "deck           688\n",
            "embark_town      2\n",
            "alive            0\n",
            "alone            0\n",
            "dtype: int64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Fill missing age with median\n",
        "df['age'].fillna(df['age'].median(), inplace=True)\n",
        "\n",
        "# Fill embarked with mode\n",
        "df['embarked'].fillna(df['embarked'].mode()[0], inplace=True)\n",
        "\n",
        "# Drop columns with too many missing values (like deck)\n",
        "df.drop(columns=['deck'], inplace=True)\n",
        "\n",
        "# Drop rows with any remaining nulls (optional)\n",
        "df.dropna(inplace=True)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ldFjXF4yIVZK",
        "outputId": "842225f4-6e78-4122-fc1f-14f44775f8d8"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-4-4c57bc72cc12>:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
            "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
            "\n",
            "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
            "\n",
            "\n",
            "  df['age'].fillna(df['age'].median(), inplace=True)\n",
            "<ipython-input-4-4c57bc72cc12>:5: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
            "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
            "\n",
            "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
            "\n",
            "\n",
            "  df['embarked'].fillna(df['embarked'].mode()[0], inplace=True)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "label_encoders = {}\n",
        "categorical_cols = ['sex', 'embarked', 'class', 'who', 'adult_male', 'alone']\n",
        "\n",
        "for col in categorical_cols:\n",
        "    le = LabelEncoder()\n",
        "    df[col] = le.fit_transform(df[col])\n",
        "    label_encoders[col] = le\n"
      ],
      "metadata": {
        "id": "r6Z8NhxoIW85"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "scaler = StandardScaler()\n",
        "df[['age', 'fare']] = scaler.fit_transform(df[['age', 'fare']])\n"
      ],
      "metadata": {
        "id": "ysVUzJFZJpOx"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(df.head())\n",
        "print(df.isnull().sum())\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ORBvQlgsJp4B",
        "outputId": "08522416-1443-479a-a2ff-1a54ee1cb78c"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "   survived  pclass  sex       age  sibsp  parch      fare  embarked  class  \\\n",
            "0         0       3    1 -0.563674      1      0 -0.500240         2      2   \n",
            "1         1       1    0  0.669217      1      0  0.788947         0      0   \n",
            "2         1       3    0 -0.255451      0      0 -0.486650         2      2   \n",
            "3         1       1    0  0.438050      1      0  0.422861         2      0   \n",
            "4         0       3    1  0.438050      0      0 -0.484133         2      2   \n",
            "\n",
            "   who  adult_male  embark_town alive  alone  \n",
            "0    1           1  Southampton    no      0  \n",
            "1    2           0    Cherbourg   yes      0  \n",
            "2    2           0  Southampton   yes      1  \n",
            "3    2           0  Southampton   yes      0  \n",
            "4    1           1  Southampton    no      1  \n",
            "survived       0\n",
            "pclass         0\n",
            "sex            0\n",
            "age            0\n",
            "sibsp          0\n",
            "parch          0\n",
            "fare           0\n",
            "embarked       0\n",
            "class          0\n",
            "who            0\n",
            "adult_male     0\n",
            "embark_town    0\n",
            "alive          0\n",
            "alone          0\n",
            "dtype: int64\n"
          ]
        }
      ]
    }
  ]
}