Code Linear Regression Algorithm

You are given a pair of data points \( (X,y) \), where \(X\) is a list of \( (N , K ) \) and \(y\) is a list of \(N\) elements. Assuming \(X\) and \(y\) follows a linear relationship which is given by following equation
\begin{align} \hat{y} = a_1*x_1 + a_2*x_2 .... a_k*x_k + b \end{align}

Write a program to estimate values of \( a_1, a_2, .. a_k \) and \(b\), such that following cost function is minimized over a given dataset. \begin{align} J(a_0,a_1...a_k,b)=\frac{1}{2*N}*\sum_{i=1}^N( y_i-a_1*x_{1i} - a_2*x_{2i} ....-a_k*x_{ki}-b)^2 \end{align} Inputs

\( X \) : \((N , K )\) List
\(y\) : List of \(n\) elements
num_iteration : number of training steps
learning_rate: step size

Outputs

\(a\) : list of \(K\) parameters
\(b\) : scalar value

Solution

Algorithm Steps:

Step 1: Initialize values of \(a\) and \(b\)

Step 2: compute predicted value of \( \hat{y} \) for given \(a\) and \(b\) \begin{align} \hat{y}=a_1*x_1+a_2*x_2.....a_k*x_k +b \end{align}
Step 3: Compute Gradients \begin{align} \partial a_k &=\frac{\sum_{i=1}^N \ (y_i-\hat{y_i})*(-x_{ki})}{N} \\ \partial b &=\frac{\sum_{i=1}^N \ (y_i-\hat{y_i})*(-1)}{N} \\ \end{align}
Step 4: Apply gradient descent to update parameters \begin{align} a_k&=a_k-\alpha * \partial a_k \\ b&=b-\alpha * \partial b \end{align} \(\alpha\) is learning rate.

Code

from typing import List
import random
random.seed(10)
import numpy as np

class Solution:
    def train_linear_regression(self, X: List[List[float]], Y_True:List[float], num_iteration, learning_rate):
        """
        Function to train Linear Regression model
        :param X: list of N rows and each row has K features
        :param Y_True: list of N elements , which denotes actual values of Y
        :param num_iteration: number of time loop will run to train model parameters
        :param learning_rate: learning rate step of the model
        :return a , b: a is list of float with K elements and b is scalar float value
                    These values will be final trained values which will be used to make predictions
        """

        if len(X) != len(Y_True) or len(Y_True)==0:
            return -1
        # Initialize model parameters 'a' and 'b'
        K = len(X[0])
        if K <= 0:
            return -1
        a, b = self.initialize_parameter(K)
        # Loop through num_iteration
        for _ in range(num_iteration):
            # Compute predicted values
            Y_pred = self.make_predictions(X, a, b)

            # Compute gradient
            d_b, d_a = self.compute_gradient(X, Y_True, Y_pred)

            # Update parameters using gradient descent
            b -= learning_rate * d_b
            a = [a_i - learning_rate * d_a_i for a_i, d_a_i in zip(a, d_a)]

        return a, b

    def initialize_parameter(self, K: int):
        # Randomly initialize parameter 'a' as a list of K random floats
        a = np.random.rand(K).tolist()

        # Randomly initialize parameter 'b' as a single random float
        b = np.random.rand()

        return a, b


    def make_predictions(self, X: List[List[float]], a: List[float], b: float)->List[float]:
        
        # Initialize an empty list to store the predicted values
        predictions = []

        # Iterate over each row in the input X
        for row in X:
            # Compute the predicted value for the current row using the linear regression formula
            predicted_value = sum([a_i * x_i for a_i, x_i in zip(a, row)]) + b
            # Add the predicted value to the list of predictions
            predictions.append(predicted_value)

        return predictions

    def compute_gradient(self, X: List[List[float]], Y_True:List[float], Y_pred:List[float])->List[float]:
        # Initialize gradients
        N = len(Y_True)
        K = len(X[0])
        d_b = 0.0
        d_a = [0.0] * K  # Initialize gradient for 'a' as a list of zeros with the same length as the number of features

        # Compute gradients using the gradient descent formula
        for i in range(N):
            d_b += -1* (Y_True[i] - Y_pred[i])  # Gradient w.r.t. 'b'
            for j in range(K):
                d_a[j] += -1 * X[i][j] * (Y_True[i] - Y_pred[i])  # Gradient w.r.t. 'a[j]'

        # Normalize gradients by dividing by the number of samples
        d_b /= N
        d_a = [d_ij / N for d_ij in d_a]

        return d_b, d_a

    def compute_cost_function(self, y_pred: List[float], y_true:List[float]):
        # Compute the squared error between predicted and true values
        squared_errors = [(y_p - y_t) ** 2 for y_p, y_t in zip(y_pred, y_true)]

        # Compute the mean squared error (MSE)
        mean_squared_error = sum(squared_errors) / (2*len(y_pred))

        return mean_squared_error

Code Output