This Version 3.0 is authored by Briana Ramirez, edited by Samuel Shen. Liu Yang, Sandra Villamar, and Joaquin Stawsky contributed codes to this version.
Video tutorial for the python code can be found at the following URL: https://www.youtube.com/channel/UC7D9i0kBMzPTHyEhU0h6W9g
This version is based upon the previous version described in the following box.
######################################################################################################################
#This Python Code for Climate Science is written for the book entitled "Climate Mathematics: Theory and Applications"#
#A Cambridge University Press book authored by SSP Shen and RCJ Somerville in July 2019 #
#The Python codes were based on the R codes written by Samuel Shen Distinguished Professor, #
#San Diego State University, USA and were translated from R by Louis Selstad, Stephen Shen, #
#Gregori Clarke, and Dakota Newmann and edited by Samuel Shen. #
######################################################################################################################
#FIRST TIME Python users*****
#These package need to be installed (on the terminal or anaconda interface) before importing them below.
#Follow this tutorial for package installation before
# https://towardsdatascience.com/importerror-no-module-named-xyz-45e4a5339e1b
#Change your file path to the folder where your downloaded data is stored
#MAC HELP: https://support.apple.com/guide/mac-help/go-directly-to-a-specific-folder-on-mac-mchlp1236/mac
#PC HELP: https://www.sony.com/electronics/support/articles/00015251
import os
# os.chdir("/Users/sshen/climmath/data")
os.chdir('/Users/HP/Documents/sshen/climmath/data')
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
return false;
}
#Style Dictionary to standardize plotting scheme between different python scripts
import matplotlib.pyplot as plt
styledict = {'xtick.labelsize':20,
'xtick.major.size':9,
'xtick.major.width':1,
'ytick.labelsize':20,
'ytick.major.size':9,
'ytick.major.width':1,
'legend.framealpha':0.0,
'legend.fontsize':15,
'axes.labelsize':20,
'axes.titlesize':25,
'axes.linewidth':2,
'figure.figsize':(12,8),
'savefig.format':'jpg'}
plt.rcParams.update(**styledict)
#Function that creates personalized discrete Colormap
import numpy as np
from matplotlib import cm as cm1
from matplotlib.colors import ListedColormap, to_rgba
def newColMap(colors):
"""
This function creates a new color map from a list of colors given
as a parameter. Recommended length of list of colors is at least 6.
"""
first = np.repeat([to_rgba(colors[0])], 2, axis = 0)
last = np.repeat([to_rgba(colors[-1])], 2, axis = 0)
v = cm1.get_cmap('viridis', 16*(len(colors)-2))
newcolors = v(np.linspace(0, 1, 16*(len(colors)-2)))
for (i, col) in enumerate(colors[1:-1]):
newcolors[16*i : 16*(i+1), :] = to_rgba(col)
return ListedColormap(np.append(np.append(first,newcolors, axis=0), last, axis=0))
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
import matplotlib.mlab as mlab
import pandas as pd
from mpl_toolkits.basemap import Basemap
from mpl_toolkits import mplot3d
from netCDF4 import Dataset as ds
from urllib import request
import scipy as sp
from scipy import stats as stt
from sklearn import datasets, linear_model
from sklearn.neighbors import KernelDensity as kd
from sklearn.linear_model import LinearRegression
from random import randint
from scipy.stats import norm
import statsmodels
import sklearn
import math
import statistics
import sympy as sy
from sympy import symbols, diff
import statsmodels.api as sm
from datetime import date
from scipy.stats import norm, kurtosis
from sklearn.linear_model import LinearRegression
import seaborn as sns
import statsmodels.api as sm
import pylab as py
from scipy.stats import norm, t
from scipy.integrate import quad
import warnings
warnings.filterwarnings("ignore")
# Download the NCEI spatial average time series of monthly data
# by internet search using the file name.
aveNCEI = pd.read_table('http://shen.sdsu.edu/data/' +
'aravg.mon.land_ocean.90S.90N.v4.0.1.201703.asc.txt',
header = None, delim_whitespace = True)
aveNCEI = aveNCEI.values
aveNCEI
array([[ 1.88000e+03, 1.00000e+00, -2.54882e-01, ..., 1.25049e-01, 1.18830e-01, 1.50000e-02], [ 1.88000e+03, 2.00000e+00, -3.90030e-01, ..., 1.69170e-01, 8.25840e-02, 8.65860e-02], [ 1.88000e+03, 3.00000e+00, -4.14509e-01, ..., 1.28022e-01, 8.60760e-02, 4.19460e-02], ..., [ 2.01700e+03, 1.00000e+00, 6.36712e-01, ..., 1.25049e-01, 1.28754e-01, 1.50000e-02], [ 2.01700e+03, 2.00000e+00, 6.96222e-01, ..., 1.69170e-01, 1.81365e-01, 1.50000e-02], [ 2.01700e+03, 3.00000e+00, 7.69576e-01, ..., 1.28022e-01, 1.61637e-01, 3.36150e-02]])
# Yields the dimensions of data
aveNCEI.shape
(1647, 10)
# Take only the second column of this data matrix
tmean15 = aveNCEI[:,2]
# Yields the first 6 values
tmean15[:6]
array([-0.254882, -0.39003 , -0.414509, -0.314104, -0.327618, -0.421961])
# Yields the sample Mean
np.mean(tmean15)
-0.19504468609593198
# Yields the sample Standard Deviation
np.std(tmean15)
0.3289373979136597
# Yields the sample Variance
np.var(tmean15)
0.10819981174620931
# Yields the sample Skewness
# pd.DataFrame(tmean15).skew() # another way
stt.skew(tmean15)
0.6570797310887514
# Yields the sample Kurtosis
kurtosis(tmean15)
-0.09874723968157051
# Yields the sample Median
np.median(tmean15)
-0.263387
# Yields desired quantiles
pd.DataFrame(tmean15).quantile([0, 0.25, 0.5, 0.75, 1])
0 | |
---|---|
0.00 | -0.934475 |
0.25 | -0.440763 |
0.50 | -0.263387 |
0.75 | 0.006320 |
1.00 | 0.949248 |
# Creating a year sequence
timemo = np.linspace(1880, 2015, 1647)
# Creating Linear Regression
lm = LinearRegression()
# Training the model
lm.fit(timemo.reshape(-1, 1), tmean15)
# Using the model to make predictions
predictions = lm.predict(timemo.reshape(-1, 1))
print("Coeff. of Linear Model:", lm.coef_)
print("Intercept of Linear Model: ", lm.intercept_)
Coeff. of Linear Model: [0.00698074] Intercept of Linear Model: -13.790040882746423
#Fig 3.1
# Linear Model Visualization
plt.close()
plt.plot(timemo, tmean15, '-', color="black")
plt.title('Global Annual Mean Land and Ocean Surface \n Temperature Anomalies: 1880-2015')
plt.xlabel('Year')
plt.ylabel('Temperature Anomaly [$\degree$C]')
plt.plot(timemo, predictions, '-', color="red")
font = {'family': 'serif',
'color': 'red',
'weight': 'normal',
'size': 18,
}
plt.text(1885, .52, 'Linear trend: 0.70 [$\degree$C] per Century', fontdict = font)
plt.grid()
plt.show()
# Yields the covariance
np.cov(timemo,tmean15)[0][1]
10.621333849840694
# Yields the correlation
np.corrcoef(timemo,tmean15)[0][1]
0.8275518334843865
# Verify the correlation result
x = timemo
y = tmean15
sdx = np.std(x)
sdy = np.std(y)
cxy = np.cov(x,y)
rxy = cxy / (sdx*sdy)
rxy[0][1]
0.8280545988753246
# Verify the trend result
rxy*sdy/sdx
array([[1.00060753e+00, 6.98498363e-03], [6.98498363e-03, 7.11994446e-05]])
# Verify the trend result
cxy/(sdx**2)
array([[1.00060753e+00, 6.98498363e-03], [6.98498363e-03, 7.11994446e-05]])
Histogram
#Fig 3.2
# Plot a histogram of the tmean15 data
h, mids = np.histogram(tmean15)
xfit = np.linspace(np.min(tmean15),np.max(tmean15),30)
# Set histograms bin width
areat = (np.diff(mids[0:2])*np.size(tmean15))
# Create the fitter y-values for the normal curve
yfit = areat*(norm.pdf(xfit,np.mean(tmean15),np.std(tmean15)))
plt.figure(figsize=(10,8))
plt.hist(tmean15,facecolor='w',edgecolor="k")
# Plot the normal fit on the histogram
plt.plot(xfit,yfit,color = "blue")
plt.title("Histogram of 1880-2015 Temperature Anomalies")
plt.xlabel("Temperature Anomalies [$\degree$C]")
plt.ylabel("Frequency")
plt.show()
Box Plot
#Fig 3.3
# Boxplot of the global annual mean temperature anomalies from 1880-2015
plt.figure(figsize=(7, 9))
plt.boxplot(tmean15)
plt.ylabel('Temperature Anomalies [$\degree$C]')
plt.show()
Scatter Plot
#Fig 3.4
# Retrieving all necessary data
ust = pd.read_csv('USJantemp1951-2016-nohead.csv'
,header=None)
soi = pd.read_csv('soi-data-nohead.csv'
,header=None)
soid = np.array(soi.iloc[:,1])
soim = np.reshape(soid,(66,12))
# Make the SOI into a matrix with each month as a column
# Take the first column for January SOI
soij = soim[:,0]
# Take the third column for January US temperature data
ustj = np.array(ust.iloc[:,2])
# Set up figure
plt.figure(figsize=(12,7))
# Create scatter plot
plt.scatter(soij,ustj,color='k')
# Create a linear trendline for the data
lin = np.polyfit(soij, ustj, 1)
linreg = np.poly1d(lin)
plt.plot(soij,linreg(soij),'r')
plt.ylabel('US Temp. Anomalies [$\degree$F]')
plt.xlabel('SOI [dimensionless]')
plt.title("Scatter plot between Jan. SOI and US Temp.", pad = 10)
plt.grid()
plt.show()
Q-Q Plot
#Fig 3.5
# Q-Q plot for the standardized temperature anomalies
tstand = (tmean15-np.mean(tmean15))/np.std(tmean15)
# Simulate 136 points that follow a norm(0,1) distribution
qn = np.random.normal(0,1,136)
# Sort the points
qns = np.sort(qn)
# Create the Q-Q line
# pp = sm.ProbPlot(qns, fit=True)
qq = sm.qqplot(tstand, line ='45', marker = 'o', markerfacecolor = 'w', markeredgecolor = 'k')
# qq = pp.qqplot(marker='.', markerfacecolor='k', markeredgecolor='k', alpha=0.3)
# sm.qqline(qq.axes[0], line='45', fmt='k--')
# Create the Q-Q plot
py.scatter(qns, -qns[::-1], color = "purple")
py.title("Q-Q Plot for the Standardized Global Temp. Anomalies vs N(0,1)", size = 20)
py.ylabel("Quantile of Temperature Anomalies", size = 19)
py.xlabel("Quantile of N(0,1)", size = 19)
py.grid()
py.show()
#Fig 3.6
# Display the different cloudiness climates of three cities
labels = ['Las Vegas', 'San Diego', 'Seattle']
clear = [0.58,0.40,0.16]
cloudy = [0.42,0.6,0.84]
# Create the label locations
x = np.arange(len(labels))
# Set the width of the bars
width = 0.35
# Prepare figure
fig, ax = plt.subplots(figsize=(12,10))
# Adding data to figure
rects1 = ax.bar(x - width/2, clear, width, label='Clear')
rects2 = ax.bar(x + width/2, cloudy, width, label='Cloudy')
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Probability')
ax.set_title('Probability Distribution of Cloudiness')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
# Creating a function to label each rectange with its respective frequency
def autolabel(rects):
"""Attach a text label above each bar in *rects*, displaying its height."""
for rect in rects:
height = rect.get_height()
ax.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom', fontsize=15)
# Labeling each rectangle
autolabel(rects1)
autolabel(rects2)
plt.show()
PDF of the Standard Normal Distribution
#Fig 3.7
# Set mu and sigma
mu = 0
sigma = 1
# Create normal probability function
def intg(x):
return 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (x - mu)**2 / (2 * sigma**2))
x = np.linspace(-3, 3)
y = intg(x)
# Set up figure
fig, ax = plt.subplots(figsize=(12,10))
ax.plot(x, y, 'black', linewidth=1)
ax.set_ylim(bottom=0)
# Set desired quantities
a_ , b_ = -3,-1.5
a, b = -1.2, 3
# Make the shaded region
ix_ = np.linspace(a_,b_)
ix = np.linspace(a, b)
# Using Integrating function
iy_ = intg(ix_)
iy = intg(ix)
# Creating desired vertical separations
verts_ = [(a_, 0), *zip(ix_, iy_), (b_, 0)]
verts = [(a, 0), *zip(ix, iy), (b, 0)]
# Creating desired polygons
poly_ = Polygon(verts_, facecolor='lightblue', edgecolor='black', linewidth=0.5)
poly = Polygon(verts, facecolor='lightblue', edgecolor='black', linewidth=0.5)
# Adding polygon
ax.add_patch(poly_)
ax.add_patch(poly)
ax.text(-0.4,0.25,"Area = 1", fontsize=20)
ax.text(-1.78, .05, "$f(x)$", fontsize=15)
ax.text(-1.62, .01, "$x$", fontsize=15)
ax.text(-1.45, .08, "$dx$", fontsize=15)
ax.text(-1.2, .01, "$x+dx$", fontsize=15)
ax.text(-0.4, .09, '$\int^{\infty}_{-\infty}f(x)dx=1$', fontsize=15)
# Creating desired arrow
plt.arrow(-2,.2,0.65,-0.1)
ax.text(-2.3,0.2,"$dA=f(x)dx$", fontsize=15, fontweight="bold")
# Label figure
plt.title("PDF of Standard Normal Distribution")
plt.xlabel("Random Variable x")
plt.ylabel("Probability Density")
plt.show()
Normal Distribution
#Fig 3.8
# Creation of five different normal distributions
bins = np.linspace(-8,8,200)
mu, sigma = 0,1
y1 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = 0,2
y2 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = 0,1/2
y3 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = 3,1
y4 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
mu, sigma = -4,1
y5 = 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) )
df = pd.DataFrame({'x': bins, 'y1': y1, 'y2': y2, 'y3': y3, 'y4': y4, 'y5': y5})
plt.figure(figsize=(12,10))
# Multiple line plot
plt.plot( 'x', 'y1', data=df, marker='', markerfacecolor='red', markersize=8, color='red'
, linewidth=4, label="$\mu = 0$ and $\sigma = 1$")
plt.plot( 'x', 'y2', data=df, marker='', color='blue', linewidth=2, label="$\mu = 0$ and $\sigma = 2$")
plt.plot( 'x', 'y3', data=df, marker='', color='black', linewidth=2, label="$\mu = 0$ and $\sigma = 1/2$")
plt.plot( 'x', 'y4', data=df, marker='', color='purple', linewidth=2, label="$\mu = 3$ and $\sigma = 1$")
plt.plot( 'x', 'y5', data=df, marker='', color='green', linewidth=2, label="$\mu = -4$ and $\sigma = 1$")
plt.legend(loc = 'upper right')
plt.title("Normal Distribution N$(\mu,\sigma^2)$")
plt.xlabel("Random Variable X")
plt.ylabel("Probability Density")
plt.grid()