Solution
Algorithm Steps:
-
Step 1: Initialize values of \(a\) and \(b\)
-
Step 2: compute predicted value of \( \hat{y} \) for given \(a\) and \(b\)
\begin{align}
p(y=1\mid X,a, b)=\frac{1}{1+e^{-(a_1*x_1 + a_2*x_2....+a_k*x_k + b)}}
\end{align}
-
Step 3: Compute Gradients
\begin{align}
\frac{\partial J}{\partial b}&=\frac{1}{m}*\sum_{i=1}^m(p(X_i)-y_i) \\
\frac{\partial J}{\partial a_1}&=\frac{1}{m}*\sum_{i=1}^m(p(X_i)-y_i) * X_1 \\ .\\ . \\
\frac{\partial J}{\partial a_k}&=\frac{1}{m}*\sum_{i=1}^m(p(X_i)-y_i)*X_k \\
\end{align}
-
Step 4: Apply gradient descent to update parameters
\begin{align}
a_k&=a_k-\alpha * \partial a_k \\
b&=b-\alpha * \partial b
\end{align}
\(\alpha\) is learning rate.
Code
from typing import List, Tuple
import numpy as np
import random
random.seed(10)
np.random.seed(10)
class Solution:
def train_logistic_regression(self, X: List[List[float]], Y_True: List[int], num_iteration: int, learning_rate: float) -> Tuple[List[float], float, List[float]]:
"""
Function to train Logistic Regression model
Args:
X: list of N rows and each row has K features
Y_True: list of N elements , which denotes actual values of Y
num_iteration: number of times loop will run to train model parameters
learning_rate: learning rate step of the model
Return:
a, b:
a: list of float with K elements (model parameters)
b: scalar float value (model parameter)
"""
# Initialize model parameters 'a' and 'b'
K = len(X[0])
a, b = self.initialize_parameter(K)
cost_history = [] # List to store cost values at each iteration
# Loop through num_iteration
for _ in range(num_iteration):
# Compute predicted probabilities
Y_pred = self.make_predictions(X, a, b)
# Compute cost function value
cost = self.compute_cost_function(Y_pred, Y_True)
# Compute gradient
d_a, d_b = self.compute_gradient(X, Y_True, Y_pred)
# Update parameters using gradient descent
a = [a_i - learning_rate * d_a_i for a_i, d_a_i in zip(a, d_a)]
b -= learning_rate * d_b
# Append cost to cost_history
cost_history.append(cost)
return a, b
def initialize_parameter(self, K: int):
# Randomly initialize parameter 'a' as a list of K random floats
a = np.random.rand(K).tolist()
# Randomly initialize parameter 'b' as a single random float
b = np.random.rand()
return a, b
def sigmoid(self, z: float) -> float:
"""
Compute the sigmoid function
"""
return 1 / (1 + np.exp(-z))
def make_predictions(self, X: List[List[float]], a: List[float], b: float) -> List[float]:
predicted_probabilities = []
# Iterate over each data point in X
for x in X:
# Compute the linear combination of features and parameters
z = sum(a_i * x_i for a_i, x_i in zip(a, x)) + b
# Compute the predicted probability using the logistic function
predicted_probability = self.sigmoid(z)
# Append the predicted probability to the list of predicted probabilities
predicted_probabilities.append(predicted_probability)
return predicted_probabilities
def compute_cost_function(self, y_pred: List[float], y_true: List[int]) -> float:
cross_entropy_cost = 0.0
epsilon = 0.0001
# Iterate over each pair of predicted and true values
for y_p, y_t in zip(y_pred, y_true):
# Compute the cross-entropy cost for each pair
cost = -y_t * np.log(y_p) - (1 - y_t) * np.log(1 - y_p)
# Add the cost to the total cross-entropy cost
cross_entropy_cost += cost
# Normalize the cost by dividing by the number of samples
cross_entropy_cost /= len(y_true)
return cross_entropy_cost
def compute_gradient(self, X: List[List[float]], Y_True: List[int], Y_pred: List[float]) -> Tuple[List[float], float]:
N = len(Y_True)
K= len(X[0])
# Initialize gradients
d_b = 0.0
d_a = [0.0] * K # Initialize gradient for 'a' as a list of zeros with the same length as the number of features
# Compute gradients using the gradient descent formula
for i in range(N):
d_b += Y_pred[i] - Y_True[i] # Gradient w.r.t. 'b'
for j in range(K):
d_a[j] += (Y_pred[i] - Y_True[i]) * X[i][j] # Gradient w.r.t. 'a[j]'
# Normalize gradients by dividing by the number of samples
d_b /= N
d_a = [d_ij / N for d_ij in d_a]
return d_a, d_b