{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "Assignment 5: We have given a collection of 8 points.\n",
        "\n",
        "P1=[0.1,0.6]\n",
        "P2=[0.15,0.71]\n",
        "P3=[0.08,0.9]\n",
        "P4=[0.16, 0.85]\n",
        "P5=[0.2,0.3]\n",
        "P6=[0.25,0.5]\n",
        "P7=[0.24,0.1]\n",
        "P8=[0.3,0.2]\n",
        "\n",
        "Perform the k-mean clustering with initial centroids as m1=P1 =Cluster#1=C1 and\n",
        "m2=P8=cluster#2=C2.\n",
        "\n",
        "Answer the following:\n",
        "\n",
        "1] Which cluster does P6 belong to?\n",
        "\n",
        "2] What is the population of cluster around m2?\n",
        "\n",
        "3] What is updated value of m1 and m2?"
      ],
      "metadata": {
        "id": "IheMgsigWFNb"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "\n",
        "# Step 1: Define the points as a dictionary\n",
        "point_coords = {\n",
        "    'P1': np.array([0.1, 0.6]),\n",
        "    'P2': np.array([0.15, 0.71]),\n",
        "    'P3': np.array([0.08, 0.9]),\n",
        "    'P4': np.array([0.16, 0.85]),\n",
        "    'P5': np.array([0.2, 0.3]),\n",
        "    'P6': np.array([0.25, 0.5]),\n",
        "    'P7': np.array([0.24, 0.1]),\n",
        "    'P8': np.array([0.3, 0.2])\n",
        "}\n",
        "\n",
        "# Step 2: Initialize centroids\n",
        "m1 = point_coords['P1']  # Cluster 1\n",
        "m2 = point_coords['P8']  # Cluster 2\n",
        "\n",
        "# Step 3: Euclidean distance function\n",
        "def euclidean_distance(p1, p2):\n",
        "    return np.linalg.norm(p1 - p2)\n",
        "\n",
        "# Step 4: Assign points to clusters\n",
        "def assign_clusters(points_dict, m1, m2):\n",
        "    clusters = {1: [], 2: []}\n",
        "    for label, coord in points_dict.items():\n",
        "        dist_to_m1 = euclidean_distance(coord, m1)\n",
        "        dist_to_m2 = euclidean_distance(coord, m2)\n",
        "        if dist_to_m1 < dist_to_m2:\n",
        "            clusters[1].append(label)\n",
        "        else:\n",
        "            clusters[2].append(label)\n",
        "    return clusters\n",
        "\n",
        "# Step 5: Update centroids based on current cluster members\n",
        "def update_centroid(cluster_labels, all_points):\n",
        "    coords = [all_points[label] for label in cluster_labels]\n",
        "    return np.mean(coords, axis=0)\n",
        "\n",
        "# First clustering\n",
        "clusters = assign_clusters(point_coords, m1, m2)\n",
        "print(\"Initial clusters:\")\n",
        "for c in clusters:\n",
        "    print(f\"Cluster {c}: {clusters[c]}\")\n",
        "\n",
        "# Step 6: Compute new centroids\n",
        "m1_updated = update_centroid(clusters[1], point_coords)\n",
        "m2_updated = update_centroid(clusters[2], point_coords)\n",
        "\n",
        "print(\"\\nUpdated centroids:\")\n",
        "print(f\"m1: {m1_updated}\")\n",
        "print(f\"m2: {m2_updated}\")\n",
        "\n",
        "# Step 7: Reassign points (optional: to show second iteration)\n",
        "clusters_updated = assign_clusters(point_coords, m1_updated, m2_updated)\n",
        "print(\"\\nUpdated clusters after recomputing centroids:\")\n",
        "for c in clusters_updated:\n",
        "    print(f\"Cluster {c}: {clusters_updated[c]}\")\n",
        "\n",
        "# Answer the questions\n",
        "# 1. Which cluster does P6 belong to?\n",
        "p6_cluster = 'Cluster 1' if 'P6' in clusters_updated[1] else 'Cluster 2'\n",
        "print(f\"\\n1) P6 belongs to: {p6_cluster}\")\n",
        "\n",
        "# 2. What is the population of cluster around m2?\n",
        "pop_cluster_2 = len(clusters_updated[2])\n",
        "print(f\"2) Population of cluster around m2: {pop_cluster_2}\")\n",
        "\n",
        "# 3. What is the updated value of m1 and m2?\n",
        "print(f\"3) Updated value of m1: {m1_updated}\")\n",
        "print(f\"   Updated value of m2: {m2_updated}\")\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2iNNXAmkVYqT",
        "outputId": "8350b358-e5d6-4d96-ebc4-dd55f6847e1b"
      },
      "execution_count": 58,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Initial clusters:\n",
            "Cluster 1: ['P1', 'P2', 'P3', 'P4', 'P6']\n",
            "Cluster 2: ['P5', 'P7', 'P8']\n",
            "\n",
            "Updated centroids:\n",
            "m1: [0.148 0.712]\n",
            "m2: [0.24666667 0.2       ]\n",
            "\n",
            "Updated clusters after recomputing centroids:\n",
            "Cluster 1: ['P1', 'P2', 'P3', 'P4', 'P6']\n",
            "Cluster 2: ['P5', 'P7', 'P8']\n",
            "\n",
            "1) P6 belongs to: Cluster 1\n",
            "2) Population of cluster around m2: 3\n",
            "3) Updated value of m1: [0.148 0.712]\n",
            "   Updated value of m2: [0.24666667 0.2       ]\n"
          ]
        }
      ]
    }
  ]
}