Solution
This problem can be solved by deriving the recurrence relationship between old statictis and new statiscs
Mean Computation :
Suppose \( N \) data points have been processed so far, and the current mean is \( \mu_n \).
When the \( (N + 1)^{\text{th}} \) data point arrives
\begin{align}
\mu_{n+1} &= \frac{\sum_{i=1}^{N+1} x_i} {(N+1)} \\
\mu_{n+1} &= \frac{\sum_{i=1}^{N} x_i} {(N+1)} + \frac{x_{n+1}}{N+1}\\
\mu_{n+1} &= \frac{\mu_{n} * N}{N+1} + \frac{x_{n+1}}{N+1} \\
\mu_{n+1} &= \mu_{n} + \frac{x_{n+1} - \mu_{n}}{N+1} \\
\end{align}
Variance Computation :
Suppose \( N \) data points have been processed so far, and the current variance is \( \text{var}_n \).
When the \( (N + 1)^{\text{th}} \) data point arrives
\begin{align}
var_{new} &= \frac{\sum_{i=1}^{N+1}(x_i - \mu_{new})^2}{N+1} \\
var_{new} &= \frac{\sum_{i=1}^N x_i^2}{N+1} + \frac{x_{N+1}^2}{N+1} + \frac{\sum_{i=}^{N+1} \mu_{new}^2}{N+1} - \frac{2 * \sum_{i=1}^{N+1}x_i * \mu_{new}}{N+1} \ ......1 \\
var_{old} &= \frac{\sum_{i=1}^{N}(x_i - \mu_{old})^2}{N} \\
\\
\sum_{i=1}^N x_i^2 &= (var_{old}+\mu_{old}^2 ) * N \ ......2 \\
\\
var_{new} &= \frac{(var_{old}+\mu_{old}^2 ) * N}{N+1} + \frac{x_{N+1}^2}{N+1} - \mu_{new}^2 \ ( \text{by solving 1 and 2} ) \\ \\
\end{align}
Code
class Solution:
def __init__(self):
self.mean = None
self.variance = None
self.n = 0
def get_new_mean(self, x, n):
if n == 1:
return x
else:
return self.mean + (x-self.mean)/n
def get_new_variance(self, x, n, new_mean):
if n == 1:
return 0
else:
# new_mean = self.get_new_mean(x,n)
new_var = (self.variance + self.mean**2) * (self.n-1)/(self.n) + x**2/self.n - new_mean**2
return new_var
def update_mean_variance(self, x):
self.n+=1
new_mean = self.get_new_mean(x, self.n)
new_variance = self.get_new_variance(x, self.n, new_mean)
self.mean = new_mean
self.variance = new_variance