Computer Science‎ > ‎

Stats, ML, Data: Computing the Mean, Median, Mode, Standard Deviation, Confidence Interval

Mean

Average of all numbers. Sum them all up and divide them by the number of items. 
Mean of 1,2,3,4,5 = (1+2+3+4+5)/5 = 3

Median
Sort the numbers.
If the number of numbers is odd, then the middle element is the median. 
Else, the average of the two middle elements is the median.
Median of 1,2,3,4,5 is 3
Median of 1,2,3,4 is (2+3)/2 = 2.5

Mode
The number which occurs most frequently in an array.
So the modal value in 1,2,2,3,3,3 is 3
The modal value in 1,2,3,3,3,20,20,20,20,25 is 20 (occurs most frequently - 4 times)


Standard Deviation

A measure of how dispersed the data is. 
SD = (((x1-m)2+(x2-m)2+(x3-m)2+(x4-m)2+...(xN-m)2))/N)0.5





Python code to compute Mean, Median, Mode, Standard Deviation, Confidence Interval



import sys
import math

# Assume n = number of entries in the array, data = array of numbers from which we'll find the mean, median, mode, SD, confidence intervals

n = int(sys.stdin.readline())
#n=10
data = map(int, sys.stdin.readline().split())
#data = map(int, '64630 11735 14216 99233 14470 4978 73429 38120 51135 67060'.split())
datad = {}
summ = 0
for val in data:
    summ += val
    datad[val] = datad.get(val, 0) + 1


#1 Lets compute the Mean = sum/n
mean = float(summ)/n
print "%.1f" % (mean)

#2 Lets compute the median
data2 = sorted(data)
if n%2==0:
    print "%.1f" % ((data2[n/2-1]+data2[n/2])/2.)
else:
    print "%.1f" % (data[n/2])

#3 Let us compute the mode (most frequently occurring elements)
datad2 = []
for k in datad.keys():
    datad2.append([k, datad[k]])
datad2.sort(key = lambda x: x[0])
datad2.sort(key = lambda x: x[1], reverse = True)
print "%i" % datad2[0][0]

#4 Let us compute the standard deviation
dev = 0
for val in data:
    dev += (val-mean)**2
dev /= float(n)
dev = math.sqrt(dev)
print "%.1f" % dev

#5 Let us compute the lower and upper bounds of the confidence interval
t = 1.96 #0.95
delta_x = t * dev / math.sqrt(n)
#print delta_x
print "%.1f %.1f" % (mean-delta_x, mean+delta_x)


Java code to compute Mean, Median, Mode, Standard Deviation, Confidence Interval


import java.io.*;
import java.util.*;
import java.text.*;
import java.math.*;
import java.util.regex.*;

public class Solution {

    public static void main(String[] args) {
        /* Enter your code here. Read input from STDIN. Print output to STDOUT. Your class should be named Solution. */

        # Assume n = number of entries in the array, data = array of numbers from which we'll find the mean, median, mode, SD, confidence intervals

        Scanner sc = new Scanner(System.in);
        int N = sc.nextInt();     // number of integers
        int m=N>>1;               // half of N
        
        long[] arr = new long[N];  // array to store values
        double mean=0,sd=0.0,tt=0,upper=0,lower=0;   // variables to store the results
        int count=0,tcount=0;      // variables to find count and temporary count for mode of numbers
        long mode=0,temp=0;
        
        //Lets compute the Mean = sum/n
        // loop to find mean
        for(int i=0;i<N;++i)
        {
            arr[i]=sc.nextLong();
            mean += arr[i];
        }
        mean /=N;


        
        // sorting array to find mode
        Arrays.sort(arr);
        
        temp = arr[0];     // assuming 1st number to be mode
        tcount = 1;        // count set to 1
        tt = (arr[0]-mean);     // temp variable to store difference of value-mean
        sd += (tt*tt);          // accumulating square of every (value-mean) in SD
        for(int i=1;i<N;++i)
        {
            tt = (arr[i]-mean);
            sd += (tt*tt);
            if(temp==arr[i])
            {
                ++tcount;
            }
            else
            {
                if(tcount>count)
                {
                    count=tcount;
                    mode=temp;          // updating mode
                }
                    temp=arr[i];
                    tcount=1;
            }
        }
        
        sd /=N;
        sd = Math.sqrt(sd);             // calculating SD
        
        // calculating confidence interval
        
        upper = mean + 1.96*(sd/Math.sqrt(N));   
        lower = mean - 1.96*(sd/Math.sqrt(N));
        
        mean = Math.round(mean*100)/10;
        System.out.println(mean/10);       // mean
        if((N&1)==1)                // odd
        {
            System.out.println(arr[N>>1]);              // median
        }
        else                        // even
        {            
            tt = (arr[m]+arr[m-1]);
            tt /= 2;
            System.out.println(tt);   // meadian
        }
        
        System.out.println(mode);                        // mode
        sd = Math.round(sd*100)/10;
        System.out.println(sd/10);                      // standard deviation
        lower = Math.round(lower*100)/10;
        upper = Math.round(upper*100)/10;
        System.out.println(lower/10+" "+upper/10);
    }
}




C++ code to compute Mean, Median, Mode, Standard Deviation, Confidence Interval


#include <cmath>
#include <cstdio>
#include <cmath>
#include <cstring>
#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;


int main() {
    /* Enter your code here. Read input from STDIN. Print output to STDOUT */   
    int n;
    cin>>n;
    double a[n];
    for(int i=0;i<n;i++) cin>>a[i];
    sort(a,a+n);
    double sum=0;
    double sqrsm=0;
    for(int i=0;i<n;i++){
        sum+=a[i];
        sqrsm+=(a[i]*a[i]);
    }
    double mean = sum/n;
    double sqrsum = sqrsm/n;
    double median;
    if(n%2==0){
        median = (a[n/2]+a[(n-1)/2])/2;
    }
    else median = a[n/2];
    int freq[100001];
    memset(freq,0,sizeof(freq));
    for(int i=0;i<n;i++){
        freq[(int)a[i]]++;
    }
    vector<pair<int,int> > frequencies;
    for(int i=0;i<100001;i++){
        if(freq[i]!=0)
        frequencies.push_back(make_pair(freq[i],i));
    }
    sort(frequencies.begin(),frequencies.end(),greater<pair<int,int> >());
    double mode = frequencies[0].second;
    int frequ = frequencies[0].first;
    int i=1;
    while(i<frequencies.size() && frequ == frequencies[i].first) i++;
    mode = frequencies[i-1].second;
    double sd = sqrt(-(mean*mean)+sqrsum);
    double sd_ = sd/(sqrt(n));
    double ci_low = mean-(1.96*sd_);
    double ci_high = mean+(1.96*sd_);
    
    printf("%.1f\n",mean);
    printf("%.1f\n",median);
    printf("%.0f\n",mode);
    printf("%.1f\n",sd);
    printf("%.1f %.1f\n",ci_low,ci_high);
    
    return 0;
}


R Program to compute Mean, Median, Mode, Standard Deviation, Confidence Interval




# Enter your code here. Read input from STDIN. Print output to STDOUT
input <- readLines(file('stdin'))
n <- as.numeric(input[1])
values <- strsplit(input[2], " ")
values <- as.numeric(values[[1]])

mu <- mean(values)
cat(mu, '\n')
mdn <- median(values)
cat(mdn, '\n')
# calculate the mode
temp <- table(values)
temp <- as.numeric(names(temp)[temp == max(temp)])
mode <- min(temp)
cat(mode, '\n')
# calcualte the variance
temp <- (values - mu)^2
var <- sum(temp)/n
sd <- sqrt(var)
cat(sd, '\n')
#caculate the conf interval
se <- sd/sqrt(n)
ci <- mu + c(-1.96, 1.96) * se
cat(ci, '\n')

Ruby Program to compute Mean, Median, Mode, Standard Deviation, Confidence Interval


n=STDIN.read.split.drop(1).map{|i|i.to_f}.sort
s=n.length
m=n.inject(:+)/s
sd=Math.sqrt(n.inject(0){|s,i|s+(i-m)*(i-m)}/s)
[m,(n[(s-1)/2]+n[s/2])/2].each{|n|puts"%.1f"%n}
puts n.group_by{|i|i}.values.max_by{|a|a.length}[0].to_i
puts"%.1f"%sd
x=1.96*sd/Math.sqrt(s)
puts [m-x,m+x].map{|f|"%.1f"%f}.join(" ")