Code Logistic Regression

You are given a pair of data points \((X,y)\), where \(X\) is a list of \((N, K ) \) and \(y\) is a list of \(N\) elements. Each \(y_i \in \ \{0,1\}\).
Assuming \(X \) and \(y\) follows a relationship which is given by following equation \begin{align} p(y=1\mid X,a, b)=\frac{1}{1+e^{-(a_1*x_1 + a_2*x_2....+a_k*x_k + b)}} \end{align} Write a program to estimate values of \( a_1, a_2, .. a_k, b\), such that following cost function is minimized over given dataset. \begin{align} J(a_1,a_2.....a_k, b)=\frac{1}{m}*(-\sum_{i=1}^my_i*\log(\hat{p_i})-\sum_{i=1}^m(1-y_i)*\log(1-\hat{p_i})) \end{align}

Inputs

\( X \) : \((N , K )\) List
\(y\) : List of \(n\) elements
num_iteration : number of training steps
learning_rate: step size

Outputs

\(a\) : list of \(K\) parameters
\(b\) : scalar value

Solution

Algorithm Steps:

Step 1: Initialize values of \(a\) and \(b\)

Step 2: compute predicted value of \( \hat{y} \) for given \(a\) and \(b\) \begin{align} p(y=1\mid X,a, b)=\frac{1}{1+e^{-(a_1*x_1 + a_2*x_2....+a_k*x_k + b)}} \end{align}
Step 3: Compute Gradients \begin{align} \frac{\partial J}{\partial b}&=\frac{1}{m}*\sum_{i=1}^m(p(X_i)-y_i) \\ \frac{\partial J}{\partial a_1}&=\frac{1}{m}*\sum_{i=1}^m(p(X_i)-y_i) * X_1 \\ .\\ . \\ \frac{\partial J}{\partial a_k}&=\frac{1}{m}*\sum_{i=1}^m(p(X_i)-y_i)*X_k \\ \end{align}
Step 4: Apply gradient descent to update parameters \begin{align} a_k&=a_k-\alpha * \partial a_k \\ b&=b-\alpha * \partial b \end{align} \(\alpha\) is learning rate.

Code

from typing import List, Tuple
import numpy as np
import random
random.seed(10)
np.random.seed(10)

class Solution:

    def train_logistic_regression(self, X: List[List[float]], Y_True: List[int], num_iteration: int, learning_rate: float) -> Tuple[List[float], float, List[float]]:
        """
        Function to train Logistic Regression model
        Args:
            X: list of N rows and each row has K features
            Y_True: list of N elements , which denotes actual values of Y
            num_iteration: number of times loop will run to train model parameters
            learning_rate: learning rate step of the model
        Return:
            a, b:
                a: list of float with K elements (model parameters)
                b: scalar float value (model parameter)
        """
        # Initialize model parameters 'a' and 'b'
        K = len(X[0])
        a, b = self.initialize_parameter(K)

        cost_history = []  # List to store cost values at each iteration

        # Loop through num_iteration
        for _ in range(num_iteration):
            # Compute predicted probabilities
            Y_pred = self.make_predictions(X, a, b)

            # Compute cost function value
            cost = self.compute_cost_function(Y_pred, Y_True)

            # Compute gradient
            d_a, d_b = self.compute_gradient(X, Y_True, Y_pred)

            # Update parameters using gradient descent
            a = [a_i - learning_rate * d_a_i for a_i, d_a_i in zip(a, d_a)]
            b -= learning_rate * d_b

            # Append cost to cost_history
            cost_history.append(cost)

        return a, b

    def initialize_parameter(self, K: int):
        
        # Randomly initialize parameter 'a' as a list of K random floats
        a = np.random.rand(K).tolist()

        # Randomly initialize parameter 'b' as a single random float
        b = np.random.rand()

        return a, b

    def sigmoid(self, z: float) -> float:
        """
        Compute the sigmoid function
        """
        return 1 / (1 + np.exp(-z))

    def make_predictions(self, X: List[List[float]], a: List[float], b: float) -> List[float]:
        predicted_probabilities = []

        # Iterate over each data point in X
        for x in X:
            # Compute the linear combination of features and parameters
            z = sum(a_i * x_i for a_i, x_i in zip(a, x)) + b

            # Compute the predicted probability using the logistic function
            predicted_probability = self.sigmoid(z)

            # Append the predicted probability to the list of predicted probabilities
            predicted_probabilities.append(predicted_probability)

        return predicted_probabilities

    def compute_cost_function(self, y_pred: List[float], y_true: List[int]) -> float:
        
        cross_entropy_cost = 0.0
        epsilon = 0.0001

        # Iterate over each pair of predicted and true values
        for y_p, y_t in zip(y_pred, y_true):
            # Compute the cross-entropy cost for each pair
            cost = -y_t * np.log(y_p) - (1 - y_t) * np.log(1 - y_p)
            # Add the cost to the total cross-entropy cost
            cross_entropy_cost += cost

        # Normalize the cost by dividing by the number of samples
        cross_entropy_cost /= len(y_true)

        return cross_entropy_cost

    def compute_gradient(self, X: List[List[float]], Y_True: List[int], Y_pred: List[float]) -> Tuple[List[float], float]:
        
        N = len(Y_True)
        K= len(X[0])

        # Initialize gradients
        d_b = 0.0
        d_a = [0.0] * K  # Initialize gradient for 'a' as a list of zeros with the same length as the number of features

        # Compute gradients using the gradient descent formula
        for i in range(N):
            d_b += Y_pred[i] - Y_True[i]  # Gradient w.r.t. 'b'
            for j in range(K):
                d_a[j] += (Y_pred[i] - Y_True[i]) * X[i][j]  # Gradient w.r.t. 'a[j]'

        # Normalize gradients by dividing by the number of samples
        d_b /= N
        d_a = [d_ij / N for d_ij in d_a]

        return d_a, d_b

Code Output