This Version 3.0 is authored by Briana Ramirez, edited by Samuel Shen. Liu Yang, Sandra Villamar, and Joaquin Stawsky contributed codes to this version.
Video tutorial for the python code can be found at the following URL: https://www.youtube.com/channel/UC7D9i0kBMzPTHyEhU0h6W9g
This version is based upon the previous version described in the following box.
######################################################################################################################
#This Python Code for Climate Science is written for the book entitled "Climate Mathematics: Theory and Applications"#
#A Cambridge University Press book authored by SSP Shen and RCJ Somerville in July 2019 #
#The Python codes were based on the R codes written by Samuel Shen Distinguished Professor, #
#San Diego State University, USA and were translated from R by Louis Selstad, Stephen Shen, #
#Gregori Clarke, and Dakota Newmann and edited by Samuel Shen. #
######################################################################################################################
#FIRST TIME Python users*****
#These package need to be installed (on the terminal or anaconda interface) before importing them below.
#Follow this tutorial for package installation before
# https://towardsdatascience.com/importerror-no-module-named-xyz-45e4a5339e1b
#Change your file path to the folder where your downloaded data is stored
#MAC HELP: https://support.apple.com/guide/mac-help/go-directly-to-a-specific-folder-on-mac-mchlp1236/mac
#PC HELP: https://www.sony.com/electronics/support/articles/00015251
import os
# os.chdir("/Users/sshen/climmath/data")
os.chdir('/Users/HP/Documents/sshen/climmath/data')
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
return false;
}
#Style Dictionary to standardize plotting scheme between different python scripts
import matplotlib.pyplot as plt
styledict = {'xtick.labelsize':20,
'xtick.major.size':9,
'xtick.major.width':1,
'ytick.labelsize':20,
'ytick.major.size':9,
'ytick.major.width':1,
'legend.framealpha':0.0,
'legend.fontsize':15,
'axes.labelsize':20,
'axes.titlesize':25,
'axes.linewidth':2,
'figure.figsize':(12,8),
'savefig.format':'jpg'}
plt.rcParams.update(**styledict)
#Function that creates personalized discrete Colormap
import numpy as np
from matplotlib import cm as cm1
from matplotlib.colors import ListedColormap, to_rgba
def newColMap(colors):
"""
This function creates a new color map from a list of colors given
as a parameter. Recommended length of list of colors is at least 6.
"""
first = np.repeat([to_rgba(colors[0])], 2, axis = 0)
last = np.repeat([to_rgba(colors[-1])], 2, axis = 0)
v = cm1.get_cmap('viridis', 16*(len(colors)-2))
newcolors = v(np.linspace(0, 1, 16*(len(colors)-2)))
for (i, col) in enumerate(colors[1:-1]):
newcolors[16*i : 16*(i+1), :] = to_rgba(col)
return ListedColormap(np.append(np.append(first,newcolors, axis=0), last, axis=0))
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
import matplotlib.mlab as mlab
import pandas as pd
from mpl_toolkits.basemap import Basemap
from mpl_toolkits import mplot3d
from netCDF4 import Dataset as ds
from urllib import request
import scipy as sp
from scipy import stats as stt
from sklearn import datasets, linear_model
from sklearn.neighbors import KernelDensity as kd
from sklearn.linear_model import LinearRegression
from random import randint
from scipy.stats import norm
import statsmodels
import sklearn
import math
import statistics
import sympy as sy
from sympy import symbols, diff
import statsmodels.api as sm
from datetime import date
from scipy.stats import norm, kurtosis
from sklearn.linear_model import LinearRegression
import seaborn as sns
import statsmodels.api as sm
import pylab as py
from scipy.stats import norm, t
from scipy.integrate import quad
import warnings
warnings.filterwarnings("ignore")
# Download the NCEI spatial average time series of monthly data
# by internet search using the file name.
aveNCEI = pd.read_table('http://shen.sdsu.edu/data/' +
'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt',
header = None, delim_whitespace = True)
aveNCEI = aveNCEI.values
aveNCEI
array([[ 1.88000e+03, 1.00000e+00, -2.54882e-01, ..., 1.25049e-01, 1.18830e-01, 1.50000e-02], [ 1.88000e+03, 2.00000e+00, -3.90030e-01, ..., 1.69170e-01, 8.25840e-02, 8.65860e-02], [ 1.88000e+03, 3.00000e+00, -4.14509e-01, ..., 1.28022e-01, 8.60760e-02, 4.19460e-02], ..., [ 2.01700e+03, 1.00000e+00, 6.36712e-01, ..., 1.25049e-01, 1.28754e-01, 1.50000e-02], [ 2.01700e+03, 2.00000e+00, 6.96222e-01, ..., 1.69170e-01, 1.81365e-01, 1.50000e-02], [ 2.01700e+03, 3.00000e+00, 7.69576e-01, ..., 1.28022e-01, 1.61637e-01, 3.36150e-02]])
# Yields the dimensions of data
aveNCEI.shape
(1647, 10)
# Take only the second column of this data matrix
tmean15 = aveNCEI[:,2]
# Yields the first 6 values
tmean15[:6]
array([-0.254882, -0.39003 , -0.414509, -0.314104, -0.327618, -0.421961])
# Yields the sample Mean
np.mean(tmean15)
-0.19504468609593198
# Yields the sample Standard Deviation
np.std(tmean15)
0.3289373979136597
# Yields the sample Variance
np.var(tmean15)
0.10819981174620931
# Yields the sample Skewness
# pd.DataFrame(tmean15).skew() # another way
stt.skew(tmean15)
0.6570797310887514
# Yields the sample Kurtosis
kurtosis(tmean15)
-0.09874723968157051
# Yields the sample Median
np.median(tmean15)
-0.263387
# Yields desired quantiles
pd.DataFrame(tmean15).quantile([0, 0.25, 0.5, 0.75, 1])
0 | |
---|---|
0.00 | -0.934475 |
0.25 | -0.440763 |
0.50 | -0.263387 |
0.75 | 0.006320 |
1.00 | 0.949248 |
# Creating a year sequence
timemo = np.linspace(1880, 2015, 1647)
# Creating Linear Regression
lm = LinearRegression()
# Training the model
lm.fit(timemo.reshape(-1, 1), tmean15)
# Using the model to make predictions
predictions = lm.predict(timemo.reshape(-1, 1))
print("Coeff. of Linear Model:", lm.coef_)
print("Intercept of Linear Model: ", lm.intercept_)
Coeff. of Linear Model: [0.00698074] Intercept of Linear Model: -13.790040882746423
#Fig 3.1
# Linear Model Visualization
plt.close()
plt.plot(timemo, tmean15, '-', color="black")
plt.title('Global Annual Mean Land and Ocean Surface \n Temperature Anomalies: 1880-2015')
plt.xlabel('Year')
plt.ylabel('Temperature Anomaly [$\degree$C]')
plt.plot(timemo, predictions, '-', color="red")
font = {'family': 'serif',
'color': 'red',
'weight': 'normal',
'size': 18,
}
plt.text(1885, .52, 'Linear trend: 0.70 [$\degree$C] per Century', fontdict = font)
plt.grid()
plt.show()
# Yields the covariance
np.cov(timemo,tmean15)[0][1]
10.621333849840694
# Yields the correlation
np.corrcoef(timemo,tmean15)[0][1]
0.8275518334843865
# Verify the correlation result
x = timemo
y = tmean15
sdx = np.std(x)
sdy = np.std(y)
cxy = np.cov(x,y)
rxy = cxy / (sdx*sdy)
rxy[0][1]
0.8280545988753246
# Verify the trend result
rxy*sdy/sdx
array([[1.00060753e+00, 6.98498363e-03], [6.98498363e-03, 7.11994446e-05]])
# Verify the trend result
cxy/(sdx**2)
array([[1.00060753e+00, 6.98498363e-03], [6.98498363e-03, 7.11994446e-05]])
Histogram
#Fig 3.2
# Plot a histogram of the tmean15 data
h, mids = np.histogram(tmean15)
xfit = np.linspace(np.min(tmean15),np.max(tmean15),30)
# Set histograms bin width
areat = (np.diff(mids[0:2])*np.size(tmean15))
# Create the fitter y-values for the normal curve
yfit = areat*(norm.pdf(xfit,np.mean(tmean15),np.std(tmean15)))
plt.figure(figsize=(10,8))
plt.hist(tmean15,facecolor='w',edgecolor="k")
# Plot the normal fit on the histogram
plt.plot(xfit,yfit,color = "blue")
plt.title("Histogram of 1880-2015 Temperature Anomalies")
plt.xlabel("Temperature Anomalies [$\degree$C]")
plt.ylabel("Frequency")
plt.show()
Box Plot
#Fig 3.3
# Boxplot of the global annual mean temperature anomalies from 1880-2015
plt.figure(figsize=(7, 9))
plt.boxplot(tmean15)
plt.ylabel('Temperature Anomalies [$\degree$C]')
plt.show()
Scatter Plot
#Fig 3.4
# Retrieving all necessary data
ust = pd.read_csv('USJantemp1951-2016-nohead.csv'
,header=None)
soi = pd.read_csv('soi-data-nohead.csv'
,header=None)
soid = np.array(soi.iloc[:,1])
soim = np.reshape(soid,(66,12))
# Make the SOI into a matrix with each month as a column
# Take the first column for January SOI
soij = soim[:,0]
# Take the third column for January US temperature data
ustj = np.array(ust.iloc[:,2])
# Set up figure
plt.figure(figsize=(12,7))
# Create scatter plot
plt.scatter(soij,ustj,color='k')
# Create a linear trendline for the data
lin = np.polyfit(soij, ustj, 1)
linreg = np.poly1d(lin)
plt.plot(soij,linreg(soij),'r')
plt.ylabel('US Temp. Anomalies [$\degree$F]')
plt.xlabel('SOI [dimensionless]')
plt.title("Scatter plot between Jan. SOI and US Temp.", pad = 10)
plt.grid()
plt.show()
Q-Q Plot
#Fig 3.5
# Q-Q plot for the standardized temperature anomalies
tstand = (tmean15-np.mean(tmean15))/np.std(tmean15)
# Simulate 136 points that follow a norm(0,1) distribution
qn = np.random.normal(0,1,136)
# Sort the points
qns = np.sort(qn)
# Create the Q-Q line
# pp = sm.ProbPlot(qns, fit=True)
qq = sm.qqplot(tstand, line ='45', marker = 'o', markerfacecolor = 'w', markeredgecolor = 'k')
# qq = pp.qqplot(marker='.', markerfacecolor='k', markeredgecolor='k', alpha=0.3)
# sm.qqline(qq.axes[0], line='45', fmt='k--')
# Create the Q-Q plot
py.scatter(qns, -qns[::-1], color = "purple")
py.title("Q-Q Plot for the Standardized Global Temp. Anomalies vs N(0,1)", size = 20)
py.ylabel("Quantile of Temperature Anomalies", size = 19)
py.xlabel("Quantile of N(0,1)", size = 19)
py.grid()
py.show()
#Fig 3.6
# Display the different cloudiness climates of three cities
labels = ['Las Vegas', 'San Diego', 'Seattle']
clear = [0.58,0.40,0.16]
cloudy = [0.42,0.6,0.84]
# Create the label locations
x = np.arange(len(labels))
# Set the width of the bars
width = 0.35
# Prepare figure
fig, ax = plt.subplots(figsize=(12,10))
# Adding data to figure
rects1 = ax.bar(x - width/2, clear, width, label='Clear')
rects2 = ax.bar(x + width/2, cloudy, width, label='Cloudy')
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Probability')
ax.set_title('Probability Distribution of Cloudiness')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
# Creating a function to label each rectange with its respective frequency
def autolabel(rects):
"""Attach a text label above each bar in *rects*, displaying its height."""
for rect in rects:
height = rect.get_height()
ax.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom', fontsize=15)
# Labeling each rectangle
autolabel(rects1)
autolabel(rects2)
plt.show()
PDF of the Standard Normal Distribution
#Fig 3.7
# Set mu and sigma
mu = 0
sigma = 1
# Create normal probability function
def intg(x):
return 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (x - mu)**2 / (2 * sigma**2))
x = np.linspace(-3, 3)
y = intg(x)
# Set up figure
fig, ax = plt.subplots(figsize=(12,10))
ax.plot(x, y, 'black', linewidth=1)
ax.set_ylim(bottom=0)
# Set desired quantities
a_ , b_ = -3,-1.5
a, b = -1.2, 3
# Make the shaded region
ix_ = np.linspace(a_,b_)
ix = np.linspace(a, b)
# Using Integrating function
iy_ = intg(ix_)
iy = intg(ix)
# Creating desired vertical separations
verts_ = [(a_, 0), *zip(ix_, iy_), (b_, 0)]
verts = [(a, 0), *zip(ix, iy), (b, 0)]
# Creating desired polygons
poly_ = Polygon(verts_, facecolor='lightblue', edgecolor='black', linewidth=0.5)
poly = Polygon(verts, facecolor='lightblue', edgecolor='black', linewidth=0.5)
# Adding polygon
ax.add_patch(poly_)
ax.add_patch(poly)
ax.text(-0.4,0.25,"Area = 1", fontsize=20)
ax.text(-1.78, .05, "$f(x)$", fontsize=15)
ax.text(-1.62, .01, "$x$", fontsize=15)
ax.text(-1.45, .08, "$dx$", fontsize=15)
ax.text(-1.2, .01, "$x+dx$", fontsize=15)
ax.text(-0.4, .09, '$\int^{\infty}_{-\infty}f(x)dx=1$', fontsize=15)
# Creating desired arrow
plt.arrow(-2,.2,0.65,-0.1)
ax.text(-2.3,0.2,"$dA=f(x)dx$", fontsize=15, fontweight="bold")
# Label figure
plt.title("PDF of Standard Normal Distribution")
plt.xlabel("Random Variable x")
plt.ylabel("Probability Density")
plt.show()
Normal Distribution
#Fig 3.8
# Creation of five different normal distributions
bins = np.linspace(-8,8,200)
mu, sigma = 0,1
y1 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = 0,2
y2 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = 0,1/2
y3 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = 3,1
y4 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = -4,1
y5 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
df = pd.DataFrame({'x': bins, 'y1': y1, 'y2': y2, 'y3': y3, 'y4': y4, 'y5': y5})
plt.figure(figsize=(12,10))
# Multiple line plot
plt.plot( 'x', 'y1', data=df, marker='', markerfacecolor='red', markersize=8, color='red'
, linewidth=4, label="$\mu = 0$ and $\sigma = 1$")
plt.plot( 'x', 'y2', data=df, marker='', color='blue', linewidth=2, label="$\mu = 0$ and $\sigma = 2$")
plt.plot( 'x', 'y3', data=df, marker='', color='black', linewidth=2, label="$\mu = 0$ and $\sigma = 1/2$")
plt.plot( 'x', 'y4', data=df, marker='', color='purple', linewidth=2, label="$\mu = 3$ and $\sigma = 1$")
plt.plot( 'x', 'y5', data=df, marker='', color='green', linewidth=2, label="$\mu = -4$ and $\sigma = 1$")
plt.legend(loc = 'upper right')
plt.title("Normal Distribution N$(\mu,\sigma^2)$")
plt.xlabel("Random Variable X")
plt.ylabel("Probability Density")
plt.grid()
# Verifying the normal equation
mu = 0
sigma = 1
def intg(x):
return 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (x - mu)**2 / (2 * sigma**2) )
i = quad(intg, -2, 2)
print("The integral evaluated to:", round(i[0],6), "and the absolute error is:", i[1])
The integral evaluated to: 0.9545 and the absolute error is: 1.8403560456416157e-11
Student's t-distribution
#Fig 3.9
x = np.linspace(-4,4,200)
# The t function in python does not allow for using infinity as a parameter, so we will use 100 million to
# simulate the t-distribution with infinity degrees of freedom (dof).
myinf = 100000000
plt.figure(figsize=(12,10))
# Plot the t-distribution with various degrees of freedom
plt.plot(x, t(3).pdf(x),linewidth=4,color="red", label="$df=3$")
plt.plot(x, t(1).pdf(x),linewidth=2,color="blue", label="$df=1$")
plt.plot(x, t(2).pdf(x),linewidth=2,color="black", label="$df=2$")
plt.plot(x, t(6).pdf(x),linewidth=2,color="purple", label="$df=6$")
plt.plot(x, t(myinf).pdf(x),linewidth=2,color="green", label="$df=\infty$")
# Creating legend
plt.legend()
# Labeling plot
plt.xlabel("Random Variable t")
plt.ylabel("Probability Density")
plt.title("Student t-distribution T$(t,df)$")
plt.grid()
plt.show()
# Confidence interval simulation
# True mean
mu = 14
# True standard deviation
sig = 0.3
# Sample size
n = 50
d = 1.96*sig/n**(1/2)
lowerlim = mu-d
upperlim = mu+d
# Number of simulations
ksim = 10000
# Simulation counter
k = 0
xbar = []
for i in range(ksim):
x = np.mean(np.random.normal(mu,sig,n))
xbar.append(x)
if (x >= lowerlim) & (x <= upperlim):
k = k+1
print("Simulation Counter:", k,"and Total Number of Simulations:", ksim)
print("Lower CI:", round(lowerlim,2),"and Upper CI:", round(upperlim,2))
Simulation Counter: 9529 and Total Number of Simulations: 10000 Lower CI: 13.92 and Upper CI: 14.08
#Fig 3.10
# Confidence Interval Simulation
np.random.seed(19680801)
num_bins = 51
# Create histogram with parameters
fig = plt.subplots(figsize=(12,10))
n, bins,patches = plt.hist(xbar,num_bins, color='blue', alpha=0.5, histtype='bar', ec='black')
# Label histogram
plt.title("Histogram of the Simulated Sample Mean Temperatures")
plt.ylabel("Frequency")
plt.xlabel("Temperature [$\degree$C]")
# Show the plot with extra information
print("95% Confidence Interval (13.92,14.08)")
plt.show()
95% Confidence Interval (13.92,14.08)
#Fig 3.11
# Plot confidence intervals and tail probabilities
# Set mu and sigma
mu = 0
sigma = 1
# Create normal probability function
def intg(x):
return 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (x - mu)**2 / (2 * sigma**2) )
# Set desired quantities
a_ , b_ = -3,3
a, b = -1.96, 1.96
a1, b1 = -1.0, 1.0 # integral limits
x = np.linspace(-3, 3)
y = intg(x)
# Set up figure
fig, ax = plt.subplots(figsize=(12,10))
ax.plot(x, y, 'black', linewidth=1)
ax.set_ylim(bottom=0)
# Make the shaded region
ix_ = np.linspace(a_,b_)
ix = np.linspace(a, b)
ix1 = np.linspace(a1, b1)
# Using Integrating function
iy_ = intg(ix_)
iy = intg(ix)
iy1 = intg(ix1)
# Creating desired vertical seperations
verts_ = [(a_, 0), *zip(ix_, iy_), (b_, 0)]
verts = [(a, 0), *zip(ix, iy), (b, 0)]
verts1 = [(a1, 0), *zip(ix1, iy1), (b1, 0)]
# Creating desired polygons
poly_ = Polygon(verts_, facecolor='red', edgecolor='black', linewidth=0.5)
poly = Polygon(verts, facecolor='lightblue', edgecolor='black')
poly1 = Polygon(verts1, facecolor='white', edgecolor='black')
# Adding polygon
ax.add_patch(poly_)
ax.add_patch(poly)
ax.add_patch(poly1)
# Adding appropriate text
ax.text(0.5 * (a + b), 0.24, "Probability = 0.68",
horizontalalignment='center', verticalalignment="center", fontsize=15, fontweight="bold")
ax.text(-1.55, .02, "SE", fontsize=15)
ax.text(1.40, .02, "SE", fontsize=15)
ax.text(-0.9, .02, "SE", fontsize=15)
ax.text(0.65, .02, "SE", fontsize=15)
ax.text(-0.05, .02, r'$\bar{x}$', fontsize=15)
# Creating desired arrows
plt.arrow(-2.2,.02,-0.5,0.05)
ax.text(-3,0.08,"Probability \n = 0.025", fontsize=15, fontweight="bold")
# Label the figure
plt.ylabel("Probability Density")
plt.xlabel("True Mean as a Random Variable")
plt.title("Confidence Intervals and Confidence Levels")
ax.set_xticks((a,a1,0,b1,b))
ax.set_xticklabels(('','','','',''))
plt.show()
Confidence interval for NOAAGlobalTemp 1880-2015
# Estimate the mean and error bar for a large sample
dat1 = pd.read_table('http://shen.sdsu.edu/data/' +
'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt',
header = None, delim_whitespace = True)
dat1 = dat1.values
tmean15 = dat1[:,2]
MeanEst = np.mean(tmean15)
sd1 = np.std(tmean15)
StandErr = sd1/(len(tmean15))**(1/2)
ErrorMar = 1.96*StandErr
print("Mean Estimate:", MeanEst, "\n Lower bound:", MeanEst-ErrorMar, "\n Upper bound:", MeanEst+ErrorMar)
Mean Estimate: -0.19504468609593198 Lower bound: -0.21093097749081338 Upper bound: -0.17915839470105058
Statistical inference for xbar using a z-score
#Fig 3.12
# Setting mu and sigma
mu = 0
sig = 1
# Creating a normal probability density function
def intg(x):
return 1/(sig * np.sqrt(2 * np.pi)) *np.exp( - (x - mu)**2 / (2 * sigma**2) )
# Creating desired quantities
a_ , b_ = -3,-2.4
a, b = -1.96, 3
x = np.linspace(-3, 3)
y = intg(x)
# Creating figure
fig, ax = plt.subplots(figsize=(12,10))
ax.plot(x, y, 'black', linewidth=1)
ax.set_ylim(bottom=0)
# Make the shaded region
ix_ = np.linspace(a_,b_)
ix = np.linspace(a, b)
# Integration between appropriate values
iy_ = intg(ix_)
iy = intg(ix)
# Creating array of vertical separations
verts_ = [(a_, 0), *zip(ix_, iy_), (b_, 0)]
verts = [(a, 0), *zip(ix, iy), (b, 0)]
# Creating shaded polygons
poly_ = Polygon(verts_, facecolor='lightblue', edgecolor='black', linewidth=0.5)
poly = Polygon(verts, facecolor='#A4C9A4', edgecolor='black')
# Adding desired polygons
ax.add_patch(poly_)
ax.add_patch(poly)
# Adding text
ax.text(-0.05, 0.1, "$H_0$ Probability 0.975",
horizontalalignment='center', verticalalignment="center", fontsize=15)
# Creating appropriately placed arrow
plt.arrow(-2.50,.02,-0.45,0.05)
ax.text(-3,0.08,"p-value", fontsize=15)
# Label plot
plt.ylabel("Probability Density")
plt.xlabel("x: Standard Normal Random Variable")
plt.title("z-score, p-value, and Significance Level")
plt.ylim((0,.5))
# Set various parameters
ax.set_xticks((b_,a))
ax.set_xticklabels(('z','$z_{0.025}$'),fontsize=15)
ax.text(1.15,0.3,"Where z is your z-score \n and $z_{0.025}=-1.96$", fontsize=15)
ax.set_yticks(())
ax.set_yticklabels((""))
ax.axvline(x=-1.96, linewidth=2, color='r')
plt.arrow(-3,0.43,0.9,0, color="lightblue", linewidth=5, head_width=0.002, head_length=0.02)
plt.text(-3,0.46,"$H_1$ region", fontsize=15)
plt.arrow(3,0.43,-4.8,0, color='#A4C9A4', linewidth=5, head_width=0.002, head_length=0.02)
plt.text(1.75,0.46,"$H_0$ region", fontsize=15)
plt.show()
Mean of a Small Sample Size t-Test
# Hypothesis testing for NOAAGlobalTemp 2006-2015
dat1 = pd.read_table('http://shen.sdsu.edu/data/' +
'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt',
header = None, delim_whitespace = True)
dat1 = dat1.values
tm0615 = dat1[127:136,2]
MeanEst = np.mean(tm0615)
sd1 = np.std(tm0615)
n = 10
t_score = (MeanEst-0)/ (sd1/np.sqrt(n))
print(" Mean:", MeanEst, "\n","SD:",sd1, "\n","t-score:", t_score)
Mean: -0.5753087777777778 SD: 0.08889553942037536 t-score: -20.465437383334567
# Yields the p-value
t.pdf(t_score,df=n-1)
1.5984763823270562e-09
# Yields the critical t-score
t.ppf(1-0.025,n-1)
2.2621571627409915
Comparing Temperatures of Two Short Periods
# Hypothesis testing for Global Temp for 1981-1990 and 1991-2000
dat1 = pd.read_table('http://shen.sdsu.edu/data/' +
'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt',
header = None, delim_whitespace = True)
dat1 = dat1.values
# Seperating data into 2 sets
tm8190 = dat1[101:111,2]
tm9100 = dat1[112:121,2]
# Finding mean of data
barT1 = np.mean(tm8190)
barT2 = np.mean(tm9100)
# Finding standard deviation
S1sd = np.std(tm8190)
S2sd = np.std(tm9100)
# Setting sample size
n1 = n2 = 10
Spool = np.sqrt((n1-1)*S1sd**2 + (n2-1)*S2sd**2)/(n1+n2-2)
# Yields t-score and boundary conditions for critical value
T = (barT2-barT1)/(Spool*np.sqrt(1/n1 + 1/n2))
tlow = t.ppf(0.025,n1+n2-2)
tup = t.ppf(1-0.025,n1+n2-2)
print(" t-score:", round(T,5), "\n",
"tlow =", tlow, "\n",
"tup=", tup)
t-score: -15.95129 tlow = -2.10092204024096 tup= 2.10092204024096
# Yields the p-value
print("p-value:",t.pdf(T,n1+n2-2))
p-value: 2.4257289463517674e-12
# Prints means of the separated data
print(" 1981-1990 temp =", barT1, "\n", "1991-2000 temp =", barT2)
1981-1990 temp = -0.2861512 1991-2000 temp = -0.42506244444444446
# Yields the difference between
barT2-barT1
-0.13891124444444447
dat1 = pd.read_table('http://shen.sdsu.edu/data/' +
'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt',
header = None, delim_whitespace = True)
dat1 = dat1.values
tm = dat1[:,2]
x = np.linspace(1880,2015,len(tm))
lm = LinearRegression()
lm.fit(x.reshape(-1, 1), tm)
predictions = lm.predict(timemo.reshape(-1, 1))
print("Coeff. of Linear Model:", lm.coef_)
print("Intercept of Linear Model: ", lm.intercept_)
print("The r-squared of Model: ", lm.score(x.reshape(-1, 1),tm))
Coeff. of Linear Model: [0.00698074] Intercept of Linear Model: -13.790040882746423 The r-squared of Model: 0.6848420371033697