{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n",
        "# Linear Regression Example\n",
        "The example below uses only the first feature of the `diabetes` dataset,\n",
        "in order to illustrate the data points within the two-dimensional plot.\n",
        "The straight line can be seen in the plot, showing how linear regression\n",
        "attempts to draw a straight line that will best minimize the\n",
        "residual sum of squares between the observed responses in the dataset,\n",
        "and the responses predicted by the linear approximation.\n",
        "\n",
        "The coefficients, residual sum of squares and the coefficient of\n",
        "determination are also calculated.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "collapsed": false
      },
      "outputs": [
        {
          "ename": "ModuleNotFoundError",
          "evalue": "No module named 'sklearn'",
          "output_type": "error",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
            "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m datasets, linear_model\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m mean_squared_error, r2_score\n\u001b[1;32m     10\u001b[0m \u001b[38;5;66;03m# Load the diabetes dataset\u001b[39;00m\n",
            "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sklearn'"
          ]
        }
      ],
      "source": [
        "# Code source: Jaques Grobler\n",
        "# License: BSD 3 clause\n",
        "\n",
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "\n",
        "from sklearn import datasets, linear_model\n",
        "from sklearn.metrics import mean_squared_error, r2_score\n",
        "\n",
        "# Load the diabetes dataset\n",
        "diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)\n",
        "\n",
        "# Use only one feature\n",
        "diabetes_X = diabetes_X[:, np.newaxis, 2]\n",
        "\n",
        "# Split the data into training/testing sets\n",
        "diabetes_X_train = diabetes_X[:-20]\n",
        "diabetes_X_test = diabetes_X[-20:]\n",
        "\n",
        "# Split the targets into training/testing sets\n",
        "diabetes_y_train = diabetes_y[:-20]\n",
        "diabetes_y_test = diabetes_y[-20:]\n",
        "\n",
        "# Create linear regression object\n",
        "regr = linear_model.LinearRegression()\n",
        "\n",
        "# Train the model using the training sets\n",
        "regr.fit(diabetes_X_train, diabetes_y_train)\n",
        "\n",
        "# Make predictions using the testing set\n",
        "diabetes_y_pred = regr.predict(diabetes_X_test)\n",
        "\n",
        "# The coefficients\n",
        "print(\"Coefficients: \\n\", regr.coef_)\n",
        "# The mean squared error\n",
        "print(\"Mean squared error: %.2f\" % mean_squared_error(diabetes_y_test, diabetes_y_pred))\n",
        "# The coefficient of determination: 1 is perfect prediction\n",
        "print(\"Coefficient of determination: %.2f\" % r2_score(diabetes_y_test, diabetes_y_pred))\n",
        "\n",
        "# Plot outputs\n",
        "plt.scatter(diabetes_X_test, diabetes_y_test, color=\"black\")\n",
        "plt.plot(diabetes_X_test, diabetes_y_pred, color=\"blue\", linewidth=3)\n",
        "\n",
        "plt.xticks(())\n",
        "plt.yticks(())\n",
        "\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Calculation of R2 Score\n",
        "from sklearn.model_selection import cross_val_score\n",
        "print(cross_val_score(linear_regression, x, y, cv=10, scoring=\"r2\").mean())"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.16"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
