######################################################################################################################
#This Python Code for Climate Science is written for the book entitled "Climate Mathematics: Theory and Applications"#
#A Cambridge University Press book authored by SSP Shen and RCJ Somerville in July 2019                              #
#The Python codes were based on the R codes written by Samuel Shen Distinguished Professor,                          #
#San Diego State University, USA and were translated from R by Louis Selstad, Stephen Shen,                          #
#Gregori Clarke, and Dakota Newmann and edited by Samuel Shen.                                                       #
######################################################################################################################


#FIRST TIME Python users*****
#These package need to be installed (on the terminal or anaconda interface) before importing them below. 

#Follow this tutorial for package installation before
# https://towardsdatascience.com/importerror-no-module-named-xyz-45e4a5339e1b


#Change your file path to the folder where your downloaded data is stored
#MAC HELP: https://support.apple.com/guide/mac-help/go-directly-to-a-specific-folder-on-mac-mchlp1236/mac
#PC HELP: https://www.sony.com/electronics/support/articles/00015251
import os
# os.chdir("/Users/sshen/climmath/data")
os.chdir('/Users/HP/Documents/sshen/climmath/data')


%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}


#Style Dictionary to standardize plotting scheme between different python scripts 
import matplotlib.pyplot as plt

styledict = {'xtick.labelsize':20,
             'xtick.major.size':9,
             'xtick.major.width':1,
             'ytick.labelsize':20,
             'ytick.major.size':9,
             'ytick.major.width':1,
             'legend.framealpha':0.0,
             'legend.fontsize':15,
             'axes.labelsize':20,
             'axes.titlesize':25,
             'axes.linewidth':2,
             'figure.figsize':(12,8),
             'savefig.format':'jpg'}
plt.rcParams.update(**styledict)


#Function that creates personalized discrete Colormap
import numpy as np 
from matplotlib import cm as cm1
from matplotlib.colors import ListedColormap, to_rgba

def newColMap(colors):
    """
    This function creates a new color map from a list of colors given
    as a parameter. Recommended length of list of colors is at least 6.
    """
    first = np.repeat([to_rgba(colors[0])], 2, axis = 0)
    last = np.repeat([to_rgba(colors[-1])], 2, axis = 0)
    v = cm1.get_cmap('viridis', 16*(len(colors)-2))
    newcolors = v(np.linspace(0, 1, 16*(len(colors)-2)))
    for (i, col) in enumerate(colors[1:-1]):
        newcolors[16*i : 16*(i+1), :] = to_rgba(col)
    return ListedColormap(np.append(np.append(first,newcolors, axis=0), last, axis=0))


import numpy as np 
import matplotlib.pyplot as plt
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
import matplotlib.mlab as mlab
import pandas as pd
from mpl_toolkits.basemap import Basemap
from mpl_toolkits import mplot3d
from netCDF4 import Dataset as ds
from urllib import request
import scipy as sp
from scipy import stats as stt
from sklearn import datasets, linear_model
from sklearn.neighbors import KernelDensity as kd
from sklearn.linear_model import LinearRegression
from random import randint
from scipy.stats import norm
import statsmodels
import sklearn
import math
import statistics
import sympy as sy
from sympy import symbols, diff
import statsmodels.api as sm
from datetime import date
from scipy.stats import norm, kurtosis
from sklearn.linear_model import LinearRegression
import seaborn as sns
import statsmodels.api as sm 
import pylab as py 
from scipy.stats import norm, t
from scipy.integrate import quad
import warnings
warnings.filterwarnings("ignore")


# Download the NCEI spatial average time series of monthly data
# by internet search using the file name.
aveNCEI = pd.read_table('http://shen.sdsu.edu/data/' + 
                        'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt', 
                        header = None, delim_whitespace = True)
aveNCEI = aveNCEI.values
aveNCEI

array([[ 1.88000e+03,  1.00000e+00, -2.54882e-01, ...,  1.25049e-01,
         1.18830e-01,  1.50000e-02],
       [ 1.88000e+03,  2.00000e+00, -3.90030e-01, ...,  1.69170e-01,
         8.25840e-02,  8.65860e-02],
       [ 1.88000e+03,  3.00000e+00, -4.14509e-01, ...,  1.28022e-01,
         8.60760e-02,  4.19460e-02],
       ...,
       [ 2.01700e+03,  1.00000e+00,  6.36712e-01, ...,  1.25049e-01,
         1.28754e-01,  1.50000e-02],
       [ 2.01700e+03,  2.00000e+00,  6.96222e-01, ...,  1.69170e-01,
         1.81365e-01,  1.50000e-02],
       [ 2.01700e+03,  3.00000e+00,  7.69576e-01, ...,  1.28022e-01,
         1.61637e-01,  3.36150e-02]])


# Yields the dimensions of data
aveNCEI.shape

(1647, 10)


# Take only the second column of this data matrix 
tmean15 = aveNCEI[:,2]

# Yields the first 6 values
tmean15[:6]

array([-0.254882, -0.39003 , -0.414509, -0.314104, -0.327618, -0.421961])


# Yields the sample Mean
np.mean(tmean15)

-0.19504468609593198


# Yields the sample Standard Deviation
np.std(tmean15)

0.3289373979136597


# Yields the sample Variance
np.var(tmean15)

0.10819981174620931


# Yields the sample Skewness
# pd.DataFrame(tmean15).skew() # another way
stt.skew(tmean15)

0.6570797310887514


# Yields the sample Kurtosis
kurtosis(tmean15)

-0.09874723968157051


# Yields the sample Median
np.median(tmean15)

-0.263387


# Yields desired quantiles
pd.DataFrame(tmean15).quantile([0, 0.25, 0.5, 0.75, 1])


# Creating a year sequence 
timemo = np.linspace(1880, 2015, 1647)

# Creating Linear Regression 
lm = LinearRegression()

# Training the model
lm.fit(timemo.reshape(-1, 1), tmean15)

# Using the model to make predictions
predictions = lm.predict(timemo.reshape(-1, 1))

print("Coeff. of Linear Model:", lm.coef_)
print("Intercept of Linear Model: ", lm.intercept_)

Coeff. of Linear Model: [0.00698074]
Intercept of Linear Model:  -13.790040882746423


#Fig 3.1
# Linear Model Visualization
plt.close()

plt.plot(timemo, tmean15, '-', color="black")
plt.title('Global Annual Mean Land and Ocean Surface \n Temperature Anomalies: 1880-2015')
plt.xlabel('Year')
plt.ylabel('Temperature Anomaly [$\degree$C]')

plt.plot(timemo, predictions, '-', color="red")
font = {'family': 'serif',
        'color':  'red',
        'weight': 'normal',
        'size': 18,
        }
plt.text(1885, .52, 'Linear trend: 0.70 [$\degree$C] per Century', fontdict = font)
plt.grid()
plt.show()


# Yields the covariance
np.cov(timemo,tmean15)[0][1]

10.621333849840694


# Yields the correlation
np.corrcoef(timemo,tmean15)[0][1]

0.8275518334843865


# Verify the correlation result 
x = timemo
y = tmean15
sdx = np.std(x)
sdy = np.std(y)
cxy = np.cov(x,y)
rxy = cxy / (sdx*sdy)
rxy[0][1]

0.8280545988753246


# Verify the trend result
rxy*sdy/sdx

array([[1.00060753e+00, 6.98498363e-03],
       [6.98498363e-03, 7.11994446e-05]])


# Verify the trend result
cxy/(sdx**2)

array([[1.00060753e+00, 6.98498363e-03],
       [6.98498363e-03, 7.11994446e-05]])


#Fig 3.2
# Plot a histogram of the tmean15 data
h, mids = np.histogram(tmean15)
xfit = np.linspace(np.min(tmean15),np.max(tmean15),30)

# Set histograms bin width 
areat = (np.diff(mids[0:2])*np.size(tmean15))

# Create the fitter y-values for the normal curve
yfit = areat*(norm.pdf(xfit,np.mean(tmean15),np.std(tmean15)))

plt.figure(figsize=(10,8))

plt.hist(tmean15,facecolor='w',edgecolor="k")

# Plot the normal fit on the histogram
plt.plot(xfit,yfit,color = "blue")

plt.title("Histogram of 1880-2015 Temperature Anomalies")
plt.xlabel("Temperature Anomalies [$\degree$C]")
plt.ylabel("Frequency")

plt.show()


#Fig 3.3
# Boxplot of the global annual mean temperature anomalies from 1880-2015
plt.figure(figsize=(7, 9))
plt.boxplot(tmean15)
plt.ylabel('Temperature Anomalies [$\degree$C]')
plt.show()


#Fig 3.4
# Retrieving all necessary data
ust = pd.read_csv('USJantemp1951-2016-nohead.csv'
                  ,header=None)
soi = pd.read_csv('soi-data-nohead.csv'
                  ,header=None)
soid = np.array(soi.iloc[:,1])
soim = np.reshape(soid,(66,12))

# Make the SOI into a matrix with each month as a column 
# Take the first column for January SOI
soij = soim[:,0]

# Take the third column for January US temperature data
ustj = np.array(ust.iloc[:,2])

# Set up figure
plt.figure(figsize=(12,7))

# Create scatter plot
plt.scatter(soij,ustj,color='k')

# Create a linear trendline for the data
lin = np.polyfit(soij, ustj, 1)
linreg = np.poly1d(lin)

plt.plot(soij,linreg(soij),'r')

plt.ylabel('US Temp. Anomalies [$\degree$F]')
plt.xlabel('SOI [dimensionless]')
plt.title("Scatter plot between Jan. SOI and US Temp.", pad = 10)
plt.grid()
plt.show()


#Fig 3.5
# Q-Q plot for the standardized temperature anomalies 
tstand = (tmean15-np.mean(tmean15))/np.std(tmean15)

# Simulate 136 points that follow a norm(0,1) distribution
qn = np.random.normal(0,1,136)

# Sort the points
qns = np.sort(qn)

# Create the Q-Q line
# pp = sm.ProbPlot(qns, fit=True)
qq = sm.qqplot(tstand, line ='45', marker = 'o', markerfacecolor = 'w', markeredgecolor = 'k')

# qq = pp.qqplot(marker='.', markerfacecolor='k', markeredgecolor='k', alpha=0.3)
# sm.qqline(qq.axes[0], line='45', fmt='k--')
# Create the Q-Q plot 
py.scatter(qns, -qns[::-1], color = "purple")

py.title("Q-Q Plot for the Standardized Global Temp. Anomalies vs N(0,1)", size = 20)
py.ylabel("Quantile of Temperature Anomalies", size = 19)
py.xlabel("Quantile of N(0,1)", size = 19)
py.grid()
py.show()


#Fig 3.6
# Display the different cloudiness climates of three cities
labels = ['Las Vegas', 'San Diego', 'Seattle']
clear = [0.58,0.40,0.16]
cloudy = [0.42,0.6,0.84]

# Create the label locations
x = np.arange(len(labels))  
# Set the width of the bars
width = 0.35  

# Prepare figure
fig, ax = plt.subplots(figsize=(12,10))

# Adding data to figure 
rects1 = ax.bar(x - width/2, clear, width, label='Clear')
rects2 = ax.bar(x + width/2, cloudy, width, label='Cloudy')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Probability')
ax.set_title('Probability Distribution of Cloudiness')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


# Creating a function to label each rectange with its respective frequency
def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=15)

# Labeling each rectangle 
autolabel(rects1)
autolabel(rects2)

plt.show()


#Fig 3.7
# Set mu and sigma 
mu = 0
sigma = 1

# Create normal probability function
def intg(x):
    return 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (x - mu)**2 / (2 * sigma**2))

x = np.linspace(-3, 3)
y = intg(x)

# Set up figure
fig, ax = plt.subplots(figsize=(12,10))
ax.plot(x, y, 'black', linewidth=1)
ax.set_ylim(bottom=0)

# Set desired quantities
a_ , b_ = -3,-1.5
a, b = -1.2, 3

# Make the shaded region
ix_ = np.linspace(a_,b_)
ix = np.linspace(a, b)

# Using Integrating function
iy_ = intg(ix_)
iy = intg(ix)

# Creating desired vertical separations 
verts_ = [(a_, 0), *zip(ix_, iy_), (b_, 0)]
verts = [(a, 0), *zip(ix, iy), (b, 0)]

# Creating desired polygons
poly_ = Polygon(verts_, facecolor='lightblue', edgecolor='black', linewidth=0.5)
poly = Polygon(verts, facecolor='lightblue', edgecolor='black', linewidth=0.5)

# Adding polygon
ax.add_patch(poly_)
ax.add_patch(poly)

ax.text(-0.4,0.25,"Area = 1", fontsize=20)
ax.text(-1.78, .05, "$f(x)$", fontsize=15)
ax.text(-1.62, .01, "$x$", fontsize=15)
ax.text(-1.45, .08, "$dx$", fontsize=15)
ax.text(-1.2, .01, "$x+dx$", fontsize=15)
ax.text(-0.4, .09, '$\int^{\infty}_{-\infty}f(x)dx=1$', fontsize=15)

# Creating desired arrow
plt.arrow(-2,.2,0.65,-0.1)
ax.text(-2.3,0.2,"$dA=f(x)dx$", fontsize=15, fontweight="bold")

# Label figure
plt.title("PDF of Standard Normal Distribution")
plt.xlabel("Random Variable x")
plt.ylabel("Probability Density")

plt.show()


#Fig 3.8
# Creation of five different normal distributions 
bins = np.linspace(-8,8,200)
mu, sigma = 0,1
y1 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = 0,2
y2 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = 0,1/2
y3 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = 3,1
y4 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = -4,1
y5 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )


df = pd.DataFrame({'x': bins, 'y1': y1, 'y2': y2, 'y3': y3, 'y4': y4, 'y5': y5})

plt.figure(figsize=(12,10))

# Multiple line plot
plt.plot( 'x', 'y1', data=df, marker='', markerfacecolor='red', markersize=8, color='red'
         , linewidth=4, label="$\mu = 0$ and $\sigma = 1$")
plt.plot( 'x', 'y2', data=df, marker='', color='blue', linewidth=2, label="$\mu = 0$ and $\sigma = 2$")
plt.plot( 'x', 'y3', data=df, marker='', color='black', linewidth=2, label="$\mu = 0$ and $\sigma = 1/2$")
plt.plot( 'x', 'y4', data=df, marker='', color='purple', linewidth=2, label="$\mu = 3$ and $\sigma = 1$")
plt.plot( 'x', 'y5', data=df, marker='', color='green', linewidth=2,  label="$\mu = -4$ and $\sigma = 1$")
plt.legend(loc = 'upper right')
plt.title("Normal Distribution N$(\mu,\sigma^2)$")
plt.xlabel("Random Variable X")
plt.ylabel("Probability Density")
plt.grid()


# Verifying the normal equation
mu = 0
sigma = 1
def intg(x):
    return 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (x - mu)**2 / (2 * sigma**2) )

i = quad(intg, -2, 2)
print("The integral evaluated to:", round(i[0],6), "and the absolute error is:", i[1])

The integral evaluated to: 0.9545 and the absolute error is: 1.8403560456416157e-11


#Fig 3.9
x = np.linspace(-4,4,200)
# The t function in python does not allow for using infinity as a parameter, so we will use 100 million to 
# simulate the t-distribution with infinity degrees of freedom (dof). 
myinf = 100000000

plt.figure(figsize=(12,10))

# Plot the t-distribution with various degrees of freedom
plt.plot(x, t(3).pdf(x),linewidth=4,color="red", label="$df=3$")
plt.plot(x, t(1).pdf(x),linewidth=2,color="blue", label="$df=1$")
plt.plot(x, t(2).pdf(x),linewidth=2,color="black", label="$df=2$")
plt.plot(x, t(6).pdf(x),linewidth=2,color="purple", label="$df=6$")
plt.plot(x, t(myinf).pdf(x),linewidth=2,color="green", label="$df=\infty$")

# Creating legend
plt.legend()

# Labeling plot 
plt.xlabel("Random Variable t")
plt.ylabel("Probability Density")
plt.title("Student t-distribution T$(t,df)$")
plt.grid()
plt.show()


# Confidence interval simulation 
# True mean
mu = 14 

# True standard deviation
sig = 0.3 

# Sample size
n = 50 

d = 1.96*sig/n**(1/2)
lowerlim = mu-d
upperlim = mu+d

# Number of simulations
ksim = 10000  

# Simulation counter
k = 0 

xbar = []
for i in range(ksim):
    x = np.mean(np.random.normal(mu,sig,n))
    xbar.append(x)
    if (x >= lowerlim) & (x <= upperlim):
        k = k+1
        
print("Simulation Counter:", k,"and Total Number of Simulations:", ksim)    
print("Lower CI:", round(lowerlim,2),"and Upper CI:", round(upperlim,2))

Simulation Counter: 9529 and Total Number of Simulations: 10000
Lower CI: 13.92 and Upper CI: 14.08


#Fig 3.10
# Confidence Interval Simulation 
np.random.seed(19680801)
num_bins = 51

# Create histogram with parameters
fig = plt.subplots(figsize=(12,10))
n, bins,patches = plt.hist(xbar,num_bins, color='blue', alpha=0.5, histtype='bar', ec='black')

# Label histogram
plt.title("Histogram of the Simulated Sample Mean Temperatures")
plt.ylabel("Frequency")
plt.xlabel("Temperature [$\degree$C]")

# Show the plot with extra information
print("95% Confidence Interval (13.92,14.08)")
plt.show()

95% Confidence Interval (13.92,14.08)


#Fig 3.11
# Plot confidence intervals and tail probabilities 

# Set mu and sigma 
mu = 0
sigma = 1

# Create normal probability function
def intg(x):
    return 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (x - mu)**2 / (2 * sigma**2) )

# Set desired quantities
a_ , b_ = -3,3
a, b = -1.96, 1.96
a1, b1 = -1.0, 1.0 # integral limits

x = np.linspace(-3, 3)
y = intg(x)

# Set up figure
fig, ax = plt.subplots(figsize=(12,10))
ax.plot(x, y, 'black', linewidth=1)
ax.set_ylim(bottom=0)

# Make the shaded region
ix_ = np.linspace(a_,b_)
ix = np.linspace(a, b)
ix1 = np.linspace(a1, b1)

# Using Integrating function
iy_ = intg(ix_)
iy = intg(ix)
iy1 = intg(ix1)

# Creating desired vertical seperations 
verts_ = [(a_, 0), *zip(ix_, iy_), (b_, 0)]
verts = [(a, 0), *zip(ix, iy), (b, 0)]
verts1 = [(a1, 0), *zip(ix1, iy1), (b1, 0)]

# Creating desired polygons
poly_ = Polygon(verts_, facecolor='red', edgecolor='black', linewidth=0.5)
poly = Polygon(verts, facecolor='lightblue', edgecolor='black')
poly1 = Polygon(verts1, facecolor='white', edgecolor='black')

# Adding polygon
ax.add_patch(poly_)
ax.add_patch(poly)
ax.add_patch(poly1)

# Adding appropriate text
ax.text(0.5 * (a + b), 0.24, "Probability = 0.68",
        horizontalalignment='center', verticalalignment="center", fontsize=15, fontweight="bold")
ax.text(-1.55, .02, "SE", fontsize=15)
ax.text(1.40, .02, "SE", fontsize=15)
ax.text(-0.9, .02, "SE", fontsize=15)
ax.text(0.65, .02, "SE", fontsize=15)
ax.text(-0.05, .02, r'$\bar{x}$', fontsize=15)

# Creating desired arrows
plt.arrow(-2.2,.02,-0.5,0.05)
ax.text(-3,0.08,"Probability \n = 0.025", fontsize=15, fontweight="bold")

# Label the figure
plt.ylabel("Probability Density")
plt.xlabel("True Mean as a Random Variable")
plt.title("Confidence Intervals and Confidence Levels")

ax.set_xticks((a,a1,0,b1,b))
ax.set_xticklabels(('','','','',''))

plt.show()


# Estimate the mean and error bar for a large sample 
dat1 = pd.read_table('http://shen.sdsu.edu/data/' + 
                        'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt', 
                        header = None, delim_whitespace = True)
dat1 = dat1.values
tmean15 = dat1[:,2]
MeanEst = np.mean(tmean15)
sd1 = np.std(tmean15)
StandErr = sd1/(len(tmean15))**(1/2)
ErrorMar = 1.96*StandErr
print("Mean Estimate:", MeanEst, "\n Lower bound:", MeanEst-ErrorMar, "\n Upper bound:", MeanEst+ErrorMar)

Mean Estimate: -0.19504468609593198 
 Lower bound: -0.21093097749081338 
 Upper bound: -0.17915839470105058


#Fig 3.12
# Setting mu and sigma
mu = 0
sig = 1

# Creating a normal probability density function 
def intg(x):
    return 1/(sig * np.sqrt(2 * np.pi)) *np.exp( - (x - mu)**2 / (2 * sigma**2) )

# Creating desired quantities 
a_ , b_ = -3,-2.4
a, b = -1.96, 3

x = np.linspace(-3, 3)
y = intg(x)

# Creating figure
fig, ax = plt.subplots(figsize=(12,10))
ax.plot(x, y, 'black', linewidth=1)
ax.set_ylim(bottom=0)

# Make the shaded region
ix_ = np.linspace(a_,b_)
ix = np.linspace(a, b)

# Integration between appropriate values
iy_ = intg(ix_)
iy = intg(ix)

# Creating array of vertical separations
verts_ = [(a_, 0), *zip(ix_, iy_), (b_, 0)]
verts = [(a, 0), *zip(ix, iy), (b, 0)]

# Creating shaded polygons
poly_ = Polygon(verts_, facecolor='lightblue', edgecolor='black', linewidth=0.5)
poly = Polygon(verts, facecolor='#A4C9A4', edgecolor='black')

# Adding desired polygons
ax.add_patch(poly_)
ax.add_patch(poly)

# Adding text
ax.text(-0.05, 0.1, "$H_0$ Probability 0.975",
        horizontalalignment='center', verticalalignment="center", fontsize=15)

# Creating appropriately placed arrow
plt.arrow(-2.50,.02,-0.45,0.05)
ax.text(-3,0.08,"p-value", fontsize=15)

# Label plot
plt.ylabel("Probability Density")
plt.xlabel("x: Standard Normal Random Variable")
plt.title("z-score, p-value, and Significance Level")

plt.ylim((0,.5))

# Set various parameters
ax.set_xticks((b_,a))
ax.set_xticklabels(('z','$z_{0.025}$'),fontsize=15)

ax.text(1.15,0.3,"Where z is your z-score \n and $z_{0.025}=-1.96$", fontsize=15)

ax.set_yticks(())
ax.set_yticklabels((""))

ax.axvline(x=-1.96, linewidth=2, color='r')

plt.arrow(-3,0.43,0.9,0, color="lightblue", linewidth=5, head_width=0.002, head_length=0.02)
plt.text(-3,0.46,"$H_1$ region", fontsize=15)
plt.arrow(3,0.43,-4.8,0, color='#A4C9A4', linewidth=5, head_width=0.002, head_length=0.02)
plt.text(1.75,0.46,"$H_0$ region", fontsize=15)

plt.show()


# Hypothesis testing for NOAAGlobalTemp 2006-2015
dat1 = pd.read_table('http://shen.sdsu.edu/data/' + 
                        'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt', 
                        header = None, delim_whitespace = True)
dat1 = dat1.values
tm0615 = dat1[127:136,2]
MeanEst = np.mean(tm0615)
sd1 = np.std(tm0615)
n = 10
t_score = (MeanEst-0)/ (sd1/np.sqrt(n))
print(" Mean:", MeanEst, "\n","SD:",sd1, "\n","t-score:", t_score)

 Mean: -0.5753087777777778 
 SD: 0.08889553942037536 
 t-score: -20.465437383334567


# Yields the p-value
t.pdf(t_score,df=n-1)

1.5984763823270562e-09


# Yields the critical t-score
t.ppf(1-0.025,n-1)

2.2621571627409915


# Hypothesis testing for Global Temp for 1981-1990 and 1991-2000
dat1 = pd.read_table('http://shen.sdsu.edu/data/' + 
                        'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt', 
                        header = None, delim_whitespace = True)
dat1 = dat1.values

# Seperating data into 2 sets
tm8190 = dat1[101:111,2]
tm9100 = dat1[112:121,2]

# Finding mean of data 
barT1 = np.mean(tm8190)
barT2 = np.mean(tm9100)

# Finding standard deviation 
S1sd = np.std(tm8190)
S2sd = np.std(tm9100)

# Setting sample size
n1 = n2 = 10
Spool = np.sqrt((n1-1)*S1sd**2 + (n2-1)*S2sd**2)/(n1+n2-2)

# Yields t-score and boundary conditions for critical value 
T = (barT2-barT1)/(Spool*np.sqrt(1/n1 + 1/n2))
tlow = t.ppf(0.025,n1+n2-2)
tup = t.ppf(1-0.025,n1+n2-2)
print(" t-score:", round(T,5), "\n",
     "tlow =", tlow, "\n",
     "tup=", tup)

 t-score: -15.95129 
 tlow = -2.10092204024096 
 tup= 2.10092204024096


# Yields the p-value
print("p-value:",t.pdf(T,n1+n2-2))

p-value: 2.4257289463517674e-12


# Prints means of the separated data 
print(" 1981-1990 temp =", barT1, "\n", "1991-2000 temp =", barT2)

 1981-1990 temp = -0.2861512 
 1991-2000 temp = -0.42506244444444446


# Yields the difference between 
barT2-barT1

-0.13891124444444447


dat1 = pd.read_table('http://shen.sdsu.edu/data/' + 
                        'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt', 
          header = None, delim_whitespace = True)
dat1 = dat1.values
tm = dat1[:,2]
x = np.linspace(1880,2015,len(tm))
lm = LinearRegression()
lm.fit(x.reshape(-1, 1), tm)
predictions = lm.predict(timemo.reshape(-1, 1))
print("Coeff. of Linear Model:", lm.coef_)
print("Intercept of Linear Model: ", lm.intercept_)
print("The r-squared of Model: ", lm.score(x.reshape(-1, 1),tm))

Coeff. of Linear Model: [0.00698074]
Intercept of Linear Model:  -13.790040882746423
The r-squared of Model:  0.6848420371033697

Python Code Version 3.0 for¶

Climate Mathematics: Theory and Applications - Chapter 3¶

Chapter 3: Basic Statistical Methods for Climate Data Analysis¶

3.1: Statistical Indices from Global Temperature Data from 1880 to 2015¶

Mean, Variance, Standard Deviation, Skewness, Kurtosis, and Quantiles¶

Correlation, Covariance, and Linear Trend¶

3.2: Commonly used Statistical Plots¶

3.3: Probability Distributions¶

3.4: Estimate and Its Error¶

3.5: Statistical Inference of a Linear Trend¶

	0
0.00	-0.934475
0.25	-0.440763
0.50	-0.263387
0.75	0.006320
1.00	0.949248