{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "u-gvnqDWrSMu",
        "outputId": "0a44753c-5ba3-4e38-ab45-470557bb1a48"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Fri May  9 06:14:53 2025       \n",
            "+-----------------------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |\n",
            "|-----------------------------------------+------------------------+----------------------+\n",
            "| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |\n",
            "|                                         |                        |               MIG M. |\n",
            "|=========================================+========================+======================|\n",
            "|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |\n",
            "| N/A   62C    P8             13W /   70W |       0MiB /  15360MiB |      0%      Default |\n",
            "|                                         |                        |                  N/A |\n",
            "+-----------------------------------------+------------------------+----------------------+\n",
            "                                                                                         \n",
            "+-----------------------------------------------------------------------------------------+\n",
            "| Processes:                                                                              |\n",
            "|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |\n",
            "|        ID   ID                                                               Usage      |\n",
            "|=========================================================================================|\n",
            "|  No running processes found                                                             |\n",
            "+-----------------------------------------------------------------------------------------+\n"
          ]
        }
      ],
      "source": [
        "!nvidia-smi"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import cupy as cp\n",
        "import time\n"
      ],
      "metadata": {
        "id": "p0_bMmuOrYIv"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "N = 1024"
      ],
      "metadata": {
        "id": "LCjK9v93ruVQ"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "A_cpu = np.random.rand(N, N).astype(np.float32)\n",
        "B_cpu = np.random.rand(N, N).astype(np.float32)\n"
      ],
      "metadata": {
        "id": "KnZueJCrrwXA"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "start_cpu = time.time()\n",
        "C_cpu = np.matmul(A_cpu, B_cpu)\n",
        "end_cpu = time.time()\n"
      ],
      "metadata": {
        "id": "Y1WHn2u9ryEs"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(f\"CPU Matrix Multiplication Time: {end_cpu - start_cpu:.4f} seconds\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fu7Cu3zhrz6z",
        "outputId": "f9afe4b5-4201-4865-addd-ee42c60ce118"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "CPU Matrix Multiplication Time: 0.0331 seconds\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "A_gpu = cp.asarray(A_cpu)\n",
        "B_gpu = cp.asarray(B_cpu)\n"
      ],
      "metadata": {
        "id": "5-EcAU0sr1zt"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "start_gpu = time.time()"
      ],
      "metadata": {
        "id": "LCkkKZalr48a"
      },
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "C_gpu = cp.matmul(A_gpu, B_gpu)\n",
        "cp.cuda.Device(0).synchronize()\n",
        "end_gpu = time.time()\n"
      ],
      "metadata": {
        "id": "iGiFkgAXr7Zo"
      },
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(f\"GPU Matrix Multiplication Time: {end_gpu - start_gpu:.4f} seconds\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "LjsGHo0Fr9ev",
        "outputId": "bdecbce4-5b32-4052-e89f-9055615363a4"
      },
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "GPU Matrix Multiplication Time: 9.7265 seconds\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "BGCtHnfgr_y8"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}