Source code for source.discrete.Finite

try:
    import numpy as np
    import scipy.special as ss
    from discrete._base import Base
    from typing import Union, Tuple, Dict, List
    import math as m
except Exception as e:
    print(f'some modules are missin{e}')


[docs]class Finite(Base): """ Description: Base class for probability tags. """ def __init__(self): if type(self) is Finite: raise TypeError('base class cannot be instantiated.')
[docs]class Bernoulli(Finite): """ This class contains methods concerning the Bernoulli Distribution. Bernoulli Distirbution is a special case of Binomial Distirbution [#]_ [#]_. .. math:: \\text{Bernoulli} (x;p) = p^n (1-p)^{1-x} Args: - p (float) : event of success. Either 0 or 1. - x (int) : possible outcomes. Either 0 or 1. References: .. [#] Weisstein, Eric W. "Bernoulli Distribution." From MathWorld--A Wolfram Web Resource. https://mathworld.wolfram.com/BernoulliDistribution.html .. [#] Wikipedia contributors. (2020, December 26). Bernoulli distribution. https://en.wikipedia.org/w/index.php?title=Bernoulli_distribution&oldid=996380822 """ def __init__(self, p: float): if p < 0 or p > 1: raise ValueError('parameter k is constrained in ∈ [0,1]') self.p = p
[docs] def pmf(self, x: Union[List[int], int, np.ndarray]) -> Union[float, np.ndarray]: """ Args: x (Union[List[int], int, numpy.ndarray]): random variable(s) Raises: ValueError: when there exist a value of x that is not 0 or 10 Returns: Union[float, numpy.ndarray]: evaluation of pmf at x """ p = self.p if isinstance(x, (List, np.ndarray)): if not type(x) is np.array: x = np.array(x) if np.all(np.logical_or(x == 0, x == 1)) == False: raise ValueError('all x must either be 1 or 0') return np.piecewise(x, [x == 0, x != 0], [1-p, p]) if x != 1 or x != 0: raise ValueError('all x must either be 1 or 0') return 1-p if x == 0 else p
[docs] @staticmethod def pmf_s(p: float, x: Union[List[int], int, np.ndarray]) -> Union[float, np.ndarray]: """ Args: p (float): event of success, either 0 or 1 x (Union[List[int], int, numpy.ndarray]): random variable(s) Raises: ValueError: when parameter p does not belong to the domain [0,1] ValueError: when there exist a value in a random variable that is not 0 or 1 Returns: Union[float, numpy.ndarray]: evaluation of pmf at x """ if p < 0 or p > 1: raise ValueError('parameter p is constrained in ∈ [0,1]') if isinstance(x, (List, np.ndarray)): if not type(x) is np.array: x - np.array(x) if np.all(np.logical_or(x == 0, x == 1)) == False: raise ValueError('all x must either be 1 or 0') return np.piecewise(x, [x == 0, x != 0], [1-p, p]) if x != 1 or x != 0: raise ValueError('all x must either be 1 or 0') return 1-p if x == 0 else p
[docs] def cdf(self, x: Union[List[int], int, np.ndarray]) -> Union[float, np.ndarray]: """ Args: x (Union[List[int], int, numpy.ndarray]): data point(s) of interest Raises: ValueError: when there exist a value of x not equal to 0 or 1 Returns: Union[float, numpy.ndarray]: evaluation of cdf at x """ p = self.p if isinstance(x, (List, np.ndarray)): if not type(x) is np.array: x = np.array(x) if np.any(np.logical_or(x != 0, x != 1)): raise ValueError('all x must either be 1 or 0') return np.piecewise(x, [x < 0, (x >= 0)*(x < 1), x >= 1], [0.0, 1-p, 1.0]) if x != 1 or x != 0: raise ValueError('all x must either be 1 or 0') return 0.0 if x < 0 else (1-p if x >= 0 and x > 1 else 1)
[docs] def mean(self) -> float: """ Returns: float: mean of Bernoulli distribution """ return self.p
[docs] def median(self) -> Union[List[int], int]: """ Returns: Union[List[int], int]: median of Bernoulli distribution """ p = self.p if p < 0.5: return 0 if p == 0.5: return [0, 1] return 1
[docs] def mode(self) -> Union[Tuple[int, int], int]: """ Returns: Union[Tuple[int, int], int]: mode of Bernoulli distribution """ p = self.p if p < 0.5: return 0 if p == 0.5: return (0, 1) return 1
[docs] def var(self) -> float: """ Returns: float: variance of Bernoulli distribution """ p = self.p q = 1 - p return p * q
[docs] def std(self) -> float: """ Returns: float: standard deviation of Bernoulli distribution """ p = self.p q = 1 - p return m.sqrt(p * q)
[docs] def skewness(self) -> float: """ Returns: float: skewness of Bernoulli distribution """ p = self.p q = 1 - p return (q - p) / m.sqrt(p * q)
[docs] def kurtosis(self) -> float: """ Returns: float: kurtosis of Bernoulli distribution """ p = self.p q = 1 - p return (1 - 6 * p * q) / (p * q)
[docs] def summary(self) -> Dict[str, Union[int, float, List[int], Tuple[int, int]]]: """ Returns: Dictionary of Bernoulli distirbution moments. This includes standard deviation. """ return { 'mean': self.mean(), 'median': self.median(), 'mode': self.mode(), 'var': self.var(), 'std': self.std(), 'skewness': self.skewness(), 'kurtosis': self.kurtosis() }
[docs]class Binomial(Finite): """ This class contains functions for finding the probability mass function and cumulative distribution function for binomial distirbution [#]_ [#]_ [#]_. .. math:: \\text{Binomial}(x;n,p) = \\binom{n}{x} p^k (1-p)^{n-x} Args: n (int): number of trials p (float): success probability for each trial. Where 0 <= p <= 1. x (int): number of successes References: .. [#] NIST/SEMATECH e-Handbook of Statistical Methods (2012). Binomial Distribution. Retrieved at http://www.itl.nist.gov/div898/handbook/, December 26, 2000. .. [#] Wikipedia contributors. (2020, December 19). Binomial distribution. https://en.wikipedia.org/w/index.php?title=Binomial_distribution&oldid=995095096 .. [#] Weisstein, Eric W. "Binomial Distribution." From MathWorld--A Wolfram Web Resource. https://mathworld.wolfram.com/BinomialDistribution.html """ def __init__(self, n: int, p: float): if p < 0 or p > 1: raise ValueError('parameter p is constrained to ∈ [0,1]') self.n = n self.p = p
[docs] def pmf(self, x: Union[List[int], int, np.ndarray]) -> Union[int, np.ndarray]: """ Args: x (Union[List[int], int]): random variable or list of random variables Returns: Union[int, numpy.ndarray]: evaluation of pmf at x """ n = self.n p = self.p if isinstance(x, (List, np.ndarray)): if not type(x) is np.ndarray: x = np.array(x) return ss.binom(n, x)*p**x*(1-p)**(n-x)
[docs] def cdf(self, x: Union[int, List[int], np.ndarray]) -> Union[int, np.ndarray]: """ Args: x (Union[int, List[int], np.ndarray]): random variable or list of random variables Returns: Union[int, numpy.ndarray]: evaluation of cdf at x """ n = self.n p = self.p if isinstance(x, List): if not type(x) is np.ndarray: x = np.array(x) return ss.betainc(n-x, 1+x, 1-p)
[docs] def mean(self) -> float: """ Returns: the mean of Binomial Distribution. """ return self.n * self.p
[docs] def median(self) -> Tuple[int, int]: """ Returns: the median of Binomial Distribution. Either one defined in the tuple of result. """ n = self.n p = self.p return m.floor(n * p), m.ceil(n * p)
[docs] def mode(self) -> Tuple[int, int]: """ Returns: the mode of Binomial Distribution. Either one defined in the tuple of result. """ n = self.n p = self.p return m.floor((n + 1) * p), m.ceil((n + 1) * p) - 1
[docs] def var(self) -> float: """ Returns: the variance of Binomial Distribution. """ n = self.n p = self.p q = 1 - p return n * p * q
[docs] def skewness(self) -> float: """ Returns: the skewness of Binomial Distribution. """ n = self.n p = self.p q = 1 - p return (q - p) / m.sqrt(n * p * q)
[docs] def kurtosis(self) -> float: """ Returns: the kurtosis of Binomial Distribution. """ n = self.n p = self.p q = 1 - p return (1 - 6 * p * q) / (n * p * q)
[docs] def keys(self) -> Dict[str, Union[float, int, Tuple[int, int]]]: """ Returns: Dictionary of Binomial distirbution moments. This includes standard deviation. """ return { 'mean': self.mean(), 'median': self.median(), 'mode': self.mode(), 'var': self.var(), 'std': self.std(), 'skewness': self.skewness(), 'kurtosis': self.kurtosis() }
[docs]class Geometric(Finite): """ This class contains functions for finding the probability mass function and cumulative distribution function for geometric distribution. We consider two definitions of the geometric distribution: one concerns itself to the number of X of Bernoulli trials needed to get one success, supported on the set {1,2,3,...}. The second one concerns with Y=X-1 of failures before the first success, supported on the set {0,1,2,3,...} [#]_ [#]_. .. math:: \\text{Geometric}_1(x;p) = (1-p)^{x-1}p .. math:: \\text{Geometric}_2(x;p) = (1-p)^{x}p Args: p (float): success probability for each trial. Where 0 <= p <= 1. x (int): number of successes References: .. [#] Weisstein, Eric W. "Geometric Distribution." From MathWorld--A Wolfram Web Resource. https://mathworld.wolfram.com/GeometricDistribution.html .. [#] Wikipedia contributors. (2020, December 27). Geometric distribution. https://en.wikipedia.org/w/index.php?title=Geometric_distribution&oldid=996517676 Note: Geometric distribution can be configured based through `_type` parameter in `pmf`, `cdf` and moments of the distribution, including the `std`. The default type is `_type='first'`, or :math:`\\text{Geometric_1}(x;p)`. """ def __init__(self, p: float): if p < 0 or p > 1: raise ValueError('parameter p is constrained at') self.p = p
[docs] def pmf(self, x: Union[List[int], int, np.ndarray], _type: str = 'first') -> Union[np.ndarray, float]: """ Args: x (Union[List[int], int, numpy.ndarray]): random variable(s) _type (str, optional): optional specifier for modifying the type of Geometric distribution. Defaults to 'first'. Raises: TypeError: when random variable(s) are not of type int ValueError: when a _type parameter is not 'first' or second Returns: Union[numpy.ndarray, float]: evaluation of pmf at x """ p = self.p try: generator = {'first': lambda p, k: pow(1-p, k-1)*p, 'second': lambda p, k: pow(1-p, k)*p} if isinstance(x, (List, np.ndarray)): if not type(x) is np.ndarray: x = np.array(x) if not np.issubdtype(x[0], np.integer): raise TypeError('parameter k must be of type int') return np.vectorize(generator[_type])(p, x) if type(x) is not int: raise TypeError('parameter k must be of type int') return generator[_type](p, x) except KeyError: raise ValueError( "Invalid argument. Type is either 'first' or 'second'.")
[docs] def cdf(self, x: Union[List[int], int], _type: str = 'first') -> Union[np.ndarray, float]: """ Args: x (Union[List[int], int, numpy.ndarray]): random variable(s) _type (str, optional): optional specifier for modifying the type of Geometric distribution. Defaults to 'first'. Raises: TypeError: when random variable(s) are not of type int ValueError: when a _type parameter is not 'first' or second Returns: Union[numpy.ndarray, float]: evaluation of cdf at x """ p = self.p try: generator = {'first': lambda p, k: 1-pow(1-p, k), 'second': lambda p, k: 1-pow(1-p, k+1)} if isinstance(x, (List, np.ndarray)): if not type(x) is np.ndarray: x = np.array(x) if not np.issubdtype(x[0], np.integer): raise TypeError('parameter k must be of type int') return np.vectorize(generator[_type])(p, x) if type(x) is not int: raise TypeError('parameter k must be of type int') return generator[_type](p, x) except KeyError: raise ValueError( "Invalid argument. Type is either 'first' or 'second'.")
[docs] def mean(self, _type='first') -> float: """ Args: _type (str, optional): modifies the type of Geometric distribution. Defaults to 'first'. Raises: ValueError: when _type is not 'first' or 'second' Returns: float: mean of Geometric distribution """ if _type == "first": return 1 / self.p elif _type == "second": return (1 - self.p) / self.p else: raise ValueError( "Invalid argument. Type is either 'first' or 'second'.")
[docs] def median(self, _type='first') -> int: """ Args: _type (str, optional): modifies the type of Geometric distribution. Defaults to 'first'. Raises: ValueError: when _type is not 'first' or 'second' Returns: int: median of Geometric distribution """ if _type == "first": return m.ceil(-1 / (m.log2(1 - self.p))) elif _type == "second": return m.ceil(-1 / (m.log2(1 - self.p))) - 1 else: raise ValueError( "Invalid argument. Type is either 'first' or 'second'.")
[docs] def mode(self, _type: str = 'first') -> int: """ Args: _type (str, optional): modifies the type of Geometric distribution. Defaults to 'first'. Raises: ValueError: when _type is not 'first' or 'second' Returns: int: mode of Geometric distribution """ if _type == "first": return 1 elif _type == "second": return 0 else: raise ValueError( "Invalid argument. Type is either 'first' or 'second'.")
[docs] def var(self) -> float: """ Returns: float: variance of Geometric distribution """ return (1 - self.p) / self.p**2
[docs] def skewness(self) -> float: """ Returns: float: skewness of Geometric distribution """ return (2 - self.p) / m.sqrt(1 - self.p)
[docs] def kurtosis(self) -> float: """ Returns: float: kurtosis of Geometric distribution """ return 6 + (self.p**2 / (1 - self.p))
[docs] def keys(self) -> Dict[str, Union[float, int]]: """ Returns: Dictionary of Geometric distirbution moments. This includes standard deviation. """ return { 'mean': self.mean(), 'median': self.median(), 'mode': self.mode(), 'var': self.var(), 'std': self.std(), 'skewness': self.skewness(), 'kurtosis': self.kurtosis() }
[docs]class Hypergeometric(Finite): """ This class contains methods concerning pmf and cdf evaluation of the hypergeometric distribution. Describes the probability if k successes (random draws for which the objsect drawn has specified deature) in n draws, without replacement, from a finite population size N that contains exactly K objects with that feature, wherein each draw is either a success or a failure [#]_ [#]_ [#]_. .. math:: \\text{Hypergeometric}(x;N,K,n) = {{{K \\choose x}{{N-K} \\choose {n-x}}} \\over {N \\choose n}} Args: N(int): population size :math:`N > 0` K(int): number of success states in the population :math:`K > 0` n(int): number of draws :math:`n > 0` k(int): number of observed successes :math:`x > 0` References: .. [#] Weisstein, Eric W. "Hypergeometric Distribution." From MathWorld--A Wolfram Web Resource. https://mathworld.wolfram.com/HypergeometricDistribution.html .. [#] Wikipedia contributors. (2020, December 22). Hypergeometric distribution. https://en.wikipedia.org/w/index.php?title=Hypergeometric_distribution&oldid=995715954 .. [#] Wolfram Research (2007). HypergeometricDistribution. https://reference.wolfram.com/language/ref/HypergeometricDistribution.html. """ def __init__(self, N: int, K: int, n: int): if type(N) and type(n) and type(K) is not int: raise TypeError('all parameters must be of type int') if any(i < 0 for i in [N, K, n]): raise ValueError('parameters must be positive integer') self.N = N self.K = K self.n = n
[docs] def pmf(self, x:Union[List, np.ndarray, float]) -> Union[np.ndarray, float]: """ Returns: float: evaluation of pmf """ n = self.n N = self.N K = self.K if isinstance(x, (List, np.ndarray)): if not type(x) is np.ndarray: x = np.array(x) if np.any(x < 0): raise ValueError('random variables are expected to be greater than or equal to 0') if x < 0: raise ValueError('random variables are expected to be greater than or equal to 0') # assumes n>k return ss.binom(K, x)*ss.binom(N-K, n-x)/ss.binom(N, n)
[docs] def cdf(self): """ Args: x (List[int]): random variable or list of random variables Returns: either cumulative density evaluation for some point or scatter plot of Hypergeometric distribution. """ return NotImplemented
[docs] def mean(self) -> float: """ Returns: the mean of Hypergeometric Distribution. """ return self.n * (self.K / self.N)
[docs] def median(self) -> str: """ Returns: the median of Hypergeometric Distribution. Currently unsupported or undefined. """ return "undefined"
[docs] def mode(self) -> Tuple[int, int]: """ Returns: Tuple[int, int]: mode """ n = self.n N = self.N K = self.K return m.ceil(((n + 1) * (K + 1)) / (N + 2)) - 1, m.floor( ((n + 1) * (K + 1)) / (N + 2))
[docs] def var(self) -> float: """ Returns: float: variance """ n = self.n N = self.N K = self.K return n * (K / N) * ((N - K) / N) * ((N - n) / (N - 1))
[docs] def skewness(self) -> float: """ Returns: float: skewness """ n = self.n N = self.N K = self.K return ((N - 2 * K) * pow(N - 1, 1 / 2) * (N - 2 * n)) / (m.sqrt(n * K * (N - K) * (N - n)) * (N - 2))
[docs] def kurtosis(self) -> float: """ Returns: float: kurtosis """ n = self.n N = self.N K = self.K scale = 1 / (n * K*(N - K) * (N - n) * (N - 2) * (N - 3)) return scale * ((N - 1) * N**2 * (N * (N + 1) - (6 * K * (N - K)) - (6 * n * (N - n))) + (6 * n * K*(N - K) * (N - n) * (5 * N - 6)))
[docs] def summary(self) -> Dict[str, Union[float, str, Tuple[int, int]]]: """ Returns: Dictionary of Hypergeometric distirbution moments. This includes standard deviation. """ return { 'mean': self.mean(), 'median': self.median(), 'mode': self.mode(), 'var': self.var(), 'std': self.std(), 'skewness': self.skewness(), 'kurtosis': self.kurtosis() }
[docs]class Uniform(Finite): """ This contains methods for finding the probability mass function and cumulative distribution function of Uniform distribution. Incudes scatter plot [#]_. .. math:: \\text{Uniform} (a,b) = {\\begin{cases}{\\frac {1}{b-a}}&\\mathrm {for} \\ a\\leq x\\leq b,\\ \\[8pt]0&\\mathrm {for} \\ x<a\ \\mathrm {or} \\ x>b\\end{cases}} Args: data (int): sample size Reference: .. [#] NIST/SEMATECH e-Handbook of Statistical Methods (2012). Uniform Distribution. Retrieved from http://www.itl.nist.gov/div898/handbook/, December 26, 2020. """ def __init__(self, a: int, b: int): if type(a) and type(b) is not int: raise TypeError('parameter a and b should be of type integer') self.a = a self.b = b self.n = abs(b-a+1)
[docs] def pmf(self, x: Union[List[int], np.ndarray, int]) -> Union[float, np.ndarray]: """ Args: x (Union[List[int], np.ndarray, int]): random variable(s) Returns: Union[float, np.ndarray]: evaluation of pmf at x """ if isinstance(x, (List, np.ndarray)): x = np.empty(len(x)) x[:] = 1/self.n return x return 1 / self.n
[docs] def cdf(self, x: Union[List[int], np.ndarray, int]) -> Union[float, np.ndarray]: """ Args: x (Union[List[int], np.ndarray, int]): data point(s) Returns: Union[float, np.ndarray]: evaluation of cdf at x """ a, b, n = self.a, self.b, self.n if isinstance(x, (List, np.ndarray)): if not type(x) is np.ndarray: x = np.array(x) if not np.issubdtype(x[0], np.integer): raise TypeError('random variables must be of type integer') return np.piecewise(x, [x < a, (x >= a) & (x <= b), x > b], [0.0, lambda x: (np.floor(x-a) + 1)/n, 1.0]) return (m.floor(x-a) + 1)/n if x >= a and x <= b else (0.0 if x < a else 1.0)
[docs] def mean(self) -> float: """ Returns: the mean of Uniform Distribution. """ return (self.a + self.b) / 2
[docs] def median(self) -> float: """ Returns: the median of Uniform Distribution. """ return (self.a + self.b) / 2
[docs] def mode(self) -> Tuple[int, int]: """ Returns: the mode of Uniform Distribution. """ return (self.a, self.b)
[docs] def var(self) -> float: """ Returns: the variance of Uniform Distribution. """ return (self.b - self.a)**2 / 12
[docs] def skewness(self) -> int: """ Returns: the skewness of Uniform Distribution. """ return 0
[docs] def kurtosis(self) -> float: """ Returns: the kurtosis of Uniform Distribution. """ return -6 / 5
[docs] def summary(self) -> Dict[str, Union[float, Tuple[int, int]]]: """ Returns: Dictionary of Uniform distirbution moments. This includes standard deviation. """ return { 'mean': self.mean(), 'median': self.median(), 'mode': self.mode(), 'var': self.var(), 'std': self.std(), 'skewness': self.skewness(), 'kurtosis': self.kurtosis() }
# class Zipf(Finite): # ...