## This file is part of MLPY.
## Compute metrics for assessing the performance of binary classification
## models.
    
## This code is written by Davide Albanese, <albanese@fbk.eu>.
## (C) 2008 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

__all__ = ['err', 'errp', 'errn', 'acc', 'sens', 'spec',
           'ppv', 'npv', 'mcc', 'single_auc', 'wmw_auc',
           'mse', 'r2', 'mse_vs_n']

from numpy import *

"""
Compute metrics for assessing the performance of binary classification models.

The Confusion Matrix:

Total Samples       (ts) | Actual Positives (ap) | Actual Negatives (an)
------------------------------------------------------------------------
Predicted Positives (pp) | True Positives   (tp) | False Positives  (fp)
------------------------------------------------------------------------
Predicted Negatives (pn) | False Negatives  (fn) | True Negatives   (tn)
"""


def err(y, p):
    """
    Compute the Error.

    error = (fp + fn) / ts

    Input
    
      * *y* - classes    (two classes) [1D numpy array integer]
      * *p* - prediction (two classes) [1D numpy array integer]

    Output
    
      * error
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("err() works only for two-classes")

    diff = (y == p)
    return diff[diff == False].shape[0] / float(y.shape[0])


def errp(y, p):
    """
    Compute the Error for positive samples.

    errp = fp / ap

    Input
    
      * *y* - classes    (two classes +1 and -1) [1D numpy array integer]
      * *p* - prediction (two classes +1 and -1) [1D numpy array integer]

    Output
    
      * error for positive samples
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("errp() works only for two-classes")

    diff = (y[y == 1] == p[y == 1])
    ap = diff.shape[0]

    if ap == 0:
        return 0.0

    fp = diff[diff == False].shape[0]

    return fp / float(ap)


def errn(y, p):
    """
    Compute the Error for negative samples.

    errn = fn / an

    Input
    
      * *y* - classes    (two classes +1 and -1) [1D numpy array integer]
      * *p* - prediction (two classes +1 and -1) [1D numpy array integer]

    Output
    
      * error for negative samples
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("errn() works only for two-classes")

    diff = (y[y == -1] == p[y == -1])
    an = diff.shape[0]

    if an == 0:
        return 0.0

    fn = diff[diff == False].shape[0]

    return fn / float(an)


def acc(y, p):
    """
    Compute the Accuracy.

    accuracy = (tp + tn) / ts

    Input
    
      * *y* - classes    (two classes) [1D numpy array integer]
      * *p* - prediction (two classes) [1D numpy array integer]

    Output
    
      * accuracy
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("acc() works only for two-classes")

    diff = (y == p)
    return diff[diff == True].shape[0] / float(y.shape[0])


def sens(y, p):
    """
    Compute the Sensitivity.

    sensitivity = tp / ap

    Input
    
      * *y* - classes    (two classes +1 and -1) [1D numpy array integer]
      * *p* - prediction (two classes +1 and -1) [1D numpy array integer]

    Output
    
      * sensitivity
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("sens() works only for two-classes")

    diff = (y[y == 1] == p[y == 1])
    ap = diff.shape[0]
    
    if ap == 0:
        return 0.0

    tp = diff[diff == True].shape[0]

    return tp / float(ap)


def spec(y, p):
    """
    Compute the Specificity.

    specificity = tn / an

    Input
    
      * *y* - classes    (two classes +1 and -1) [1D numpy array integer]
      * *p* - prediction (two classes +1 and -1) [1D numpy array integer]

    Output
    
      * specificity
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("spec() works only for two-classes")

    diff = (y[y == -1] == p[y == -1])
    an = diff.shape[0]
    
    if an == 0:
        return 0.0

    tn = diff[diff == True].shape[0]

    return tn / float(an)


def ppv(y, p):
    """
    Compute the Positive Predictive Value (PPV).

    PPV = tp / pp

    Input
    
      * *y* - classes    (two classes +1 and -1) [1D numpy array integer]
      * *p* - prediction (two classes +1 and -1) [1D numpy array integer]

    Output
    
      * PPV
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("ppv() works only for two-classes")

    diff = (y[p == 1] == p[p == 1])
        
    tp = diff[diff == True] .shape[0]
    pp = diff.shape[0]
    
    if pp == 0:
        return 0.0

    return tp / float(pp)


def npv(y, p):
    """
    Compute the Negative Predictive Value (NPV).

    NPV = tn / pn

    Input
    
      * *y* - classes    (two classes +1 and -1) [1D numpy array integer]
      * *p* - prediction (two classes +1 and -1) [1D numpy array integer]

    Output
    
      * NPV
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("npv() works only for two-classes")

    diff = (y[p == -1] == p[p == -1])
        
    tn = diff[diff == True] .shape[0]
    pn = diff.shape[0]
    
    if pn == 0:
        return 0.0  

    return tn / float(pn)


def mcc(y, p):
    """
    Compute the Matthews Correlation Coefficient (MCC).

    MCC = ((tp*tn)-(fp*fn)) / sqrt((tp+fn)*(tp+fp)*(tn+fn)*(tn+fp))

    Input
    
      * *y* - classes    (two classes +1 and -1) [1D numpy array integer]
      * *p* - prediction (two classes +1 and -1) [1D numpy array integer]

    Output
    
      * MCC
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("mcc() works only for two-classes")

    
    tpdiff = (y[y == 1]  == p[y == 1])
    tndiff = (y[y == -1] == p[y == -1])
    fpdiff = (y[p == 1]  == p[p == 1])
    fndiff = (y[p == -1] == p[p == -1])

    tp = tpdiff[tpdiff == True] .shape[0]   
    tn = tndiff[tndiff == True] .shape[0]
    fp = fpdiff[fpdiff == False].shape[0]   
    fn = fndiff[fndiff == False].shape[0]

    den = sqrt((tp+fn)*(tp+fp)*(tn+fn)*(tn+fp))

    if den == 0.0:
        return 0.0

    num = ((tp*tn)-(fp*fn))
    
    return num / den


def single_auc(y, p):
    """
    Compute the single AUC.

    Input
    
      * *y* - classes    (two classes +1 and -1) [1D numpy array integer]
      * *p* - prediction (two classes +1 and -1) [1D numpy array integer]

    Output
    
      * singleAUC
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("single_auc() works only for two-classes")

    sensitivity = sens(y, p)
    specificity = spec(y, p)
    return 0.5 * (sensitivity + specificity)


def wmw_auc(y, r):
    """
    Compute the AUC by using the Wilcoxon-Mann-Whitney formula. 

    Input
    
      * *y* - classes (two classes +1 and -1) [1D numpy array integer]
      * *r* - real-valued prediction          [1D numpy array float]

    Output
    
      * wmwAUC
    """

    if y.shape[0] != r.shape[0]:
        raise ValueError("y and r have different length")
    
    if unique(y).shape[0] > 2:
        raise ValueError("wmw_auc() works only for two-classes")


    idxp = where(y ==  1)[0]
    idxn = where(y == -1)[0]
    
    AUC = 0.0
    for p in idxp:
        for n in idxn:            
            if (r[p] - r[n]) > 0.0:
                AUC += 1.0

    return AUC / float(idxp.shape[0] * idxn.shape[0])

def mse(y, p):
    """Mean Squared Error
    """

    return sum((y - p)**2) / y.shape[0]


def r2(y, p):
    """Coefficient of determination (R^2)
    
    R^2 is computed as square of the 
    correlation coefficient.
    """

    return corrcoef(p, y)[0,1]**2

   
def mse_vs_n(mse, n):
    """
    """
    mse_min, mse_max = min(mse), max(mse)
    n_min, n_max = min(n), max(n)

    mse_norm = interp(mse, [mse_min, mse_max], [0.0, 1.0])
    n_norm = interp(n, [n_min, n_max], [0.0, 1.0])
    
    return 1.0 - sqrt((mse_norm**2 + n_norm**4) / 2)
    
