This Version 3.0 is authored by Briana Ramirez, edited by Samuel Shen. Liu Yang, Sandra Villamar, and Joaquin Stawsky contributed codes to this version.
Video tutorial for the python code can be found at the following URL: https://www.youtube.com/channel/UC7D9i0kBMzPTHyEhU0h6W9g
This version is based upon the previous version described in the following box.
######################################################################################################################
#This Python Code for Climate Science is written for the book entitled "Climate Mathematics: Theory and Applications"#
#A Cambridge University Press book authored by SSP Shen and RCJ Somerville in July 2019 #
#The Python codes were based on the R codes written by Samuel Shen Distinguished Professor, #
#San Diego State University, USA and were translated from R by Louis Selstad, Stephen Shen, #
#Gregori Clarke, and Dakota Newmann and edited by Samuel Shen. #
######################################################################################################################
#FIRST TIME Python users*****
#These package need to be installed (on the terminal or anaconda interface) before importing them below.
#Follow this tutorial for package installation before
# https://towardsdatascience.com/importerror-no-module-named-xyz-45e4a5339e1b
#Change your file path to the folder where your downloaded data is stored
#MAC HELP: https://support.apple.com/guide/mac-help/go-directly-to-a-specific-folder-on-mac-mchlp1236/mac
#PC HELP: https://www.sony.com/electronics/support/articles/00015251
import os
# os.chdir("/Users/sshen/climmath/data")
os.chdir('/Users/HP/Documents/sshen/climmath/data')
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
return false;
}
#Style Dictionary to standardize plotting scheme between different python scripts
import matplotlib.pyplot as plt
styledict = {'xtick.labelsize':20,
'xtick.major.size':9,
'xtick.major.width':1,
'ytick.labelsize':20,
'ytick.major.size':9,
'ytick.major.width':1,
'legend.framealpha':0.0,
'legend.fontsize':15,
'axes.labelsize':20,
'axes.titlesize':25,
'axes.linewidth':2,
'figure.figsize':(12,8),
'savefig.format':'jpg'}
plt.rcParams.update(**styledict)
#Function that creates personalized discrete Colormap
import numpy as np
from matplotlib import cm as cm1
from matplotlib.colors import ListedColormap, to_rgba
def newColMap(colors):
"""
This function creates a new color map from a list of colors given
as a parameter. Recommended length of list of colors is at least 6.
"""
first = np.repeat([to_rgba(colors[0])], 2, axis = 0)
last = np.repeat([to_rgba(colors[-1])], 2, axis = 0)
v = cm1.get_cmap('viridis', 16*(len(colors)-2))
newcolors = v(np.linspace(0, 1, 16*(len(colors)-2)))
for (i, col) in enumerate(colors[1:-1]):
newcolors[16*i : 16*(i+1), :] = to_rgba(col)
return ListedColormap(np.append(np.append(first,newcolors, axis=0), last, axis=0))
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
import matplotlib.mlab as mlab
import pandas as pd
from cartopy import crs, mpl
import matplotlib.ticker as mticker
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from mpl_toolkits.basemap import Basemap
from mpl_toolkits import mplot3d
from netCDF4 import Dataset as ds
from urllib import request
import scipy as sp
from scipy import stats as stt
from sklearn import datasets, linear_model
from sklearn.neighbors import KernelDensity as kd
from sklearn.linear_model import LinearRegression
from random import randint
from scipy.stats import norm
import statsmodels
import sklearn
import math
import statistics
import sympy as sy
from sympy import symbols, diff
import statsmodels.api as sm
from datetime import date
from scipy.stats import norm, kurtosis
from sklearn.linear_model import LinearRegression
import seaborn as sns
import pylab as py
from scipy.stats import norm, t
import warnings
warnings.filterwarnings("ignore")
# Creating a Matrix
np.mat([[1,2], [3,4]])
matrix([[1, 2], [3, 4]])
# Matrix Subtraction
np.mat([[1,1], [1,-1]]) - np.mat([[1,2], [3,0]])
matrix([[ 0, -1], [-2, -1]])
# Matrix Multiplication
np.mat([[1,1], [1,-1]]) * np.mat([[1,3], [2,4]])
matrix([[ 3, 7], [-1, -1]])
# Matrix Multiplication
np.mat([[1,3], [2,4]]) * np.mat([[1,1], [1,-1]])
matrix([[ 4, -2], [ 6, -2]])
# Yields matrix inverse
np.linalg.inv(np.mat([[1,1], [1,-1]]))
matrix([[ 0.5, 0.5], [ 0.5, -0.5]])
# Verify the result of inverse
np.mat([[0.5,0.5], [0.5,-0.5]]) * np.mat([[1,1], [1,-1]])
matrix([[1., 0.], [0., 1.]])
# Yields the solved linear system
np.linalg.solve(np.mat([[1,1], [1,-1]]),[20,4])
array([12., 8.])
# Creating Matrix A
A = np.mat([[1,2,3], [-1,0,1]])
A
matrix([[ 1, 2, 3], [-1, 0, 1]])
# Yields the Covariance Matrix of A
covm = (1/A.shape[1])*A*A.T
covm
matrix([[4.66666667, 0.66666667], [0.66666667, 0.66666667]])
# Initializing arbitrary vector u
u = np.mat([[1,-1]]).T
# Yields new vector v in a different direction
v = covm*u
v
matrix([[4.], [0.]])
# Yields eigenvalues and eigenvectors in one output
ew = np.linalg.eig(covm)
# Yields the eigenvalues of covm
ew[0]
array([4.77485177, 0.55848156])
# Yields the eigenvectors
ew[1]
matrix([[ 0.98708746, -0.16018224], [ 0.16018224, 0.98708746]])
# Verify the eigenvalue and eigenvectors
covm * ew[1] / ew[0]
# The first column corresponds to the first eigenvalue
matrix([[ 0.98708746, -0.16018224], [ 0.16018224, 0.98708746]])
# Develop a 2-by-3 space time data matrix for SVD
A = np.mat([[1,2,3],[-1,0,1]])
A
matrix([[ 1, 2, 3], [-1, 0, 1]])
# Perform SVD calculation
msvd = np.linalg.svd(A)
# Yields the D vector in SVD
d = msvd[1]
d
array([3.78477943, 1.29438969])
# Yields the U matrix in SVD
u = msvd[0].T
u
matrix([[-0.98708746, -0.16018224], [-0.16018224, 0.98708746]])
# Yields the V matrix of SVD
v = msvd[2]
v = np.delete(v,2,0).T
v
matrix([[-0.21848175, -0.88634026], [-0.52160897, -0.24750235], [-0.8247362 , 0.39133557]])
# Verify that A = UDV', where V' is the transpose of V.
# Yields Diagonal of D
diagd = np.diag(d)
# Yields the Transpose of V
vT = v.T
# Verify the SVD result
verim = u*diagd*vT
np.matrix.round(verim)
# This is the original matrix A
matrix([[ 1., 2., 3.], [-1., 0., 1.]])
# Yields the covariance of matrix A
covm = (1/A.shape[1])*A*A.T
# Yields the eigenvectors and eigenvalues of matrix A
eigcov = np.linalg.eig(covm)
# The first element of eigcov are the eigenvalues of matrix A
eigcov[0]
array([4.77485177, 0.55848156])
# Yields the eigenvectors of matrix A
eigcov[1]
matrix([[ 0.98708746, -0.16018224], [ 0.16018224, 0.98708746]])
# Verify eigenvalues with SVD
((d**2)/A.shape[1])
array([4.77485177, 0.55848156])
# Yields true eigenvalues of matrix A
eigcov[0]
array([4.77485177, 0.55848156])
Download pressure data for Tahiti from https://shen.sdsu.edu/data/PSTANDtahiti into your directory
Plot the SOI and the standardized SLP data at Darwin and Tahiti
# Set up File path
file = 'PSTANDtahiti.txt'
Pta = np.loadtxt(file,skiprows=0)
PTA = np.reshape(Pta[:,1:],(780))
# Make an array of years for our x-axis
xtime = np.linspace(1951,2016,(65*12)+1)[0:780]
#Fig 4.2 (a)
# Set up Figure
plt.figure(figsize = (15.,7.))
# Add data to plot
plt.plot(xtime,PTA,color='r')
# Add text to plot
plt.text(1950,2.7,'(a)', size = 20, alpha = 0.7)
# Label plot
plt.xlabel('Year')
plt.ylabel('Pressure')
plt.title('Standardized Tahiti SLP Anomalies')
plt.grid()
plt.show()
Download pressure data for Tahiti from http://shen.sdsu.edu/data/PSTANDdarwin.txt into your directory
# Upload and set up data
Pda = np.loadtxt('PSTANDdarwin.txt',skiprows=0)
PDA = np.reshape(Pda[:,1:],(780))
#Fig 4.2 (b)
plt.figure(figsize = (15.,7.))
plt.plot(xtime,PDA,color='b')
plt.text(1950, 2.7, '(b)', size = 20, alpha = 0.7)
plt.xlabel('Year')
plt.ylabel('Pressure')
plt.title('Standardized Darwin SLP Anomalies')
plt.grid()
plt.show()
Plot the SOI index
#Fig 4.2 (c)
# Set up figure
fig, soi = plt.subplots(figsize=(15,7))
# Ploting data against time
soi.plot(xtime,PTA-PDA, color = 'k')
soi.text(1950, 4.11, '(c)', size = 20, alpha = 0.7)
soi.axhline(y = 0.0, color = 'k', linestyle = '-')
soi.set_xlabel('Year')
soi.set_ylabel('SOI Index')
soi.grid()
# Making left Y axis
ax2 = soi.twiny()
# Plotting data against difference of Darwin and Tahiti standardized SLP data
ax2.plot(xtime,PTA-PDA, color='k')
ax2.set_xlabel('Year')
# Mirroring X axis for difference data
ax3 = soi.twinx()
ax3.plot(xtime,PTA-PDA, color='k')
ax3.set_ylabel('SOI Index')
# Adding some text
ax3.text(1960,4.6,"SOI = Tahiti Standardized SLP - Darwin Standardized SLP", fontsize=18)
plt.show()
CSOI and AMO time series comparison
#Fig 4.2 (d)
# Preparing variables
SOI = PTA-PDA
cnegsoi = -SOI.cumsum()
# Load AMO data
amodat = np.loadtxt('AMO1951-2015.txt',skiprows=0)
amots = amodat[:,1:13].T.flatten("F")
# Set up figures
fig, ax1 = plt.subplots(figsize=(15,7))
fig.text(0.123, 0.22, '(d)', size = 20, alpha = 0.7)
# Plot the data against CSOI Index
ax1.set_xlabel('Year')
ax1.set_ylabel('Negative CSOI Index')
ax1.plot(xtime, cnegsoi, color="purple", label="CSOI")
ax1.grid()
# Create a second, vertical axis that shares the same x-axis
ax2 = ax1.twinx()
# Set labels for new axis
ax2.set_ylabel('AMO Index')
ax2.plot(xtime, amots, color="darkgreen", label="AMO Index")
# Set legends
# ax1.legend(loc = (.169,.89))
# ax2.legend(loc = (.169,.84))
ax1.legend(loc = (.125,.13))
ax2.legend(loc = (.125,.08))
# Yields the full view of the right y-label
fig.tight_layout()
plt.title('CSOI and AMO Index Comparison', pad = 8)
plt.show()
Weighed SOI Computed by the SVD Method
# Yields transpose of the data
ptamonv = PTA.T
pdamonv = PDA.T
# Creates an array of arrays
ptada = [ptamonv,pdamonv]
# Compute SVD of ptada
svdptd_u, svdptd_s, svdptd_vh = np.linalg.svd(ptada,full_matrices=False)
# Verify SVD
recontada = np.dot(np.dot(svdptd_u,np.diag(svdptd_s)),svdptd_vh)
# Compute error of calculations
svd_error = ptada - recontada
# Error maximum
svd_error.max()
4.218847493575595e-15
# Error minimum
svd_error.min()
-7.105427357601002e-15
# Gather appropriate variables
Dd = np.diag(svdptd_s)
Uu = svdptd_u
Vv = svdptd_vh
# Set up time array
xtime = np.linspace(1951,2016,(65*12)+1)[0:780]
# Compute WSOI1 for plotting
wsoi1 = Dd[0,0] * (Vv)[0,:]
Plot WSOI1
#Fig 4.3 (a)
# Prepare figure
fig, pltt = plt.subplots(figsize = (15,7))
# Plot data
pltt.plot(xtime,wsoi1,color='k')
pltt.set_xlabel('Year')
pltt.set_ylabel('Weighted SOI1')
pltt.text(1980.2, -2.9, "Weighted SOI1 Index", fontsize=20)
pltt.text(1950.2, 3.7, "(a)", size = 16, alpha = 0.7)
pltt.grid()
Plot WSOI2
#Fig 4.3 (b)
# Set up data for WSOI2
wsoi2 = Dd[1,1] * Vv[1,:]
# Prepare figure
fig, wsoi = plt.subplots(figsize = (15,7))
# Plot data
wsoi.plot(xtime,wsoi2,color='darkblue')
wsoi.set_xlabel('Year')
wsoi.set_ylabel('WSOI2')
wsoi.text(1950.2, 1.8, "(b)", size = 16, alpha = 0.7)
wsoi.grid()
# Create second, horizontal axis
ax2 = wsoi.twiny()
ax2.plot(xtime,wsoi2,color='darkblue')
ax2.text(1970.2,1.76,"Weighted SOI2 Index", fontsize=20)
plt.show()
Plot comparison between CWSOI1 and the smoothed AMO index
#Fig 4.3(c)
# Create needed variables
cwsoi1 = np.cumsum(wsoi1)
xtime = np.linspace(1951,2016,(65*12)+1)[0:780]
# Prepare figure
fig, plott = plt.subplots(figsize=(15,7))
# Plot cumulative sums
plott.plot(xtime,cwsoi1,color='black',label='CWSOI1')
plott.set_xlabel('Year')
plott.set_ylabel('Cumulative Weighted SOI1', size = 20)
plott.grid()
plott.text(1950.2, -172.5, "(c)", size = 20, alpha = 0.7)
# Superimpose CSOI time series on this CWSOI1
cnegsoi = -np.cumsum(ptamonv-pdamonv)
plt.plot(xtime,cnegsoi,color='purple',label='CSOI')
plt.title('Comparison between CWSOI1 and the Smoothed AMO Index', size = 22)
ax2 = plott.twinx()
# Create 24-month ahead moving average of the monthly AMO index
i = 0
window_size = 24
moving_averages = []
while i < len(amots) - window_size + 1:
this_window = amots[i : i + window_size]
window_average = sum(this_window) / window_size
moving_averages.append(window_average)
i += 1
# Creating time sequence spanning from 1951 to 2016
xtime = np.linspace(1951,2016,(65*12)+1)[0:757]
# Plot the moving averages
ax2.plot(xtime, moving_averages,color='darkgreen',label='AMO Index')
ax2.set_ylabel('Smoothed AMO Index')
# Set legend
plott.legend(loc=(.17,.835))
ax2.legend(loc=(.17,.785))
# Ensures we can see all axis labels
fig.tight_layout()
plt.show()
Plot the Cumulative WSOI2: CWSOI2
# Create appropriate variables from SVD
uu, ss, vvh = np.linalg.svd(ptada,full_matrices=False)
PC = vvh
PC1 = PC[0,:]
PC2 = PC[1,:]
#Fig 4.3 (d)
# Prepare figure
plt.figure(figsize = (15,7))
# Prepare time sequence
xtime = np.linspace(1951,2016,(65*12)+1)[0:780]
# Plot the cumulative sum
plt.plot(xtime,PC2.cumsum(),color='b',label='Principal Component 2')
# Label appropriately
plt.xlabel('Year')
plt.ylabel('CWSOI2')
plt.title('CWSOI2 Index: The Cumulative PC2')
plt.text(1950.1, -5.9, '(d)', size = 16, alpha = 0.7)
plt.grid()
plt.show()
#Fig 4.4 (a)
# Display the two ENSO modes on a world map
# # Prepare the figure
# plt.figure(figsize=(15., 7.))
# # Using the basemap package to put the cylindrical projection map into the image
# wmap2 = Basemap(projection='cyl',llcrnrlat=-90.,urcrnrlat=90.,\
# llcrnrlon=0.,urcrnrlon=360.,resolution='c')
# # Draw coastlines, latitudes, and longitudes on the map
# wmap2.drawcoastlines(color='black', linewidth=1)
# wmap2.drawparallels(np.arange(-90.,91.,30.), labels = [1,0,0,1], fontsize=15)
# wmap2.drawmeridians(np.arange(00.,360.,30.), labels= [1,0,0,1], fontsize=15)
# # Add data to plot
# plt.scatter(131.,-12.,color='b',s=150)
# plt.scatter(220.,-20,color='r',s=150)
# # Add desired text to plot
# plt.text(100,-24, 'Darwin -0.79', color = 'b', fontsize = 22)
# plt.text(200,-32, 'Tahiti 0.61', color = 'r', fontsize = 22)
# plt.text(60,40, 'El Nino Southern Oscillation Model 1', color = 'm', fontsize = 30)
# plt.show()
# Prepare the Figure
plt.figure(figsize = (15., 7.))
# Using the CartoPy package to put the cylindrical projection map into the image
ax = plt.axes(projection = crs.PlateCarree())
ax.set_extent([-180, 180, -90, 90], crs = crs.PlateCarree())
# Draw coastlines, gridlines, longitudes, and latitudes on the map
ax.coastlines(color='black', linewidth=1)
gl = ax.gridlines(crs=crs.PlateCarree(), draw_labels=True,
linewidth=2, color='gray', alpha=0.5, linestyle='--')
gl.xlabels_top = True
gl.ylabels_left = True
gl.xlines = True
gridx = np.linspace(-180, 180, 13)
gl.xlocator = mticker.FixedLocator(gridx)
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
# Add data to plot
plt.scatter(131.,-12.,color='b',s=150)
plt.scatter(-129.,-30,color='r',s=150)
# Add descriptive text to plot
plt.text(100,-24, 'Darwin -0.79', color = 'b', fontsize = 22)
plt.text(-155,-42, 'Tahiti 0.61', color = 'r', fontsize = 22)
plt.text(-130,40, 'El Nino Southern Oscillation Model 1', color = 'm', fontsize = 30)
Text(-130, 40, 'El Nino Southern Oscillation Model 1')
#Fig 4.4 (b)
# Display the two ENSO modes on a world map
# Prepare figure
plt.figure(figsize=(15, 7))
# Using the basemap package to put the cylindrical projection map into the image
wmap2 = Basemap(projection='cyl',llcrnrlat=-90.,urcrnrlat=90.,\
llcrnrlon=0.,urcrnrlon=360.,resolution='c')
# Draw coastlines, latitudes, and longitudes on the map
wmap2.drawcoastlines(color='black', linewidth=1)
wmap2.drawparallels(np.arange(-90.,91.,30.), labels = [1,0,0,1], fontsize=15)
wmap2.drawmeridians(np.arange(-180.,181.,30.), labels= [1,0,0,1], fontsize=15)
# Add data to plot
plt.scatter(131.,-12.,color='r',s=150)
plt.scatter(220.,-20,color='r',s=150)
# Add desired text
plt.text(105,-24, 'Darwin 0.61', color = 'r', fontsize = 22)
plt.text(200,-32, 'Tahiti 0.79', color = 'r', fontsize = 22)
plt.text(60,40, 'El Nino Southern Oscillation Model 2', color = 'm', fontsize = 30)
plt.show()
Example 4.9
# Given the coordinates of 3 points
x1 = [1,2,3]
x2 = [2,1,3]
y = [-1,2,1]
# Put them into a dataframe
df = pd.DataFrame([x1,x2,y]).T
X_df = pd.DataFrame([x1,x2]).T
# Create the linear regression
lm = LinearRegression()
# Fit the linear regression
lm.fit(X_df, df[2])
# Display Coefficients
lm.coef_
array([ 1.66666667, -1.33333333])
# Verify the 3 points determining a plane
b1 = [i*1.667 for i in x1]
b2 = [j*1.333 for j in x2]
difference = []
zip_object = zip(b1, b2)
for b1_i, b2_i in zip_object:
difference.append(b1_i-b2_i)
difference
[-0.9989999999999999, 2.0010000000000003, 1.0020000000000007]
Example 4.10
u = [1,2,3,1]
v = [2,4,3,-1]
w = [1,-2,3,4]
df = pd.DataFrame([u,v,w]).T
X_df = pd.DataFrame([u,v]).T
lm = LinearRegression()
lm.fit(X_df, df[2])
lm.coef_
array([ 2. , -1.5])
lm.intercept_
0.9999999999999982
Example 4.11
# Show a general multivariate linear regression with 3 independent random variables
dat = np.mat([np.random.normal(0, 1, 4),np.random.normal(0, 1, 4),
np.random.normal(0, 1, 4),np.random.normal(0, 1, 4),
np.random.normal(0, 1, 4),np.random.normal(0, 1, 4),
np.random.normal(0, 1, 4),np.random.normal(0, 1, 4),
np.random.normal(0, 1, 4),np.random.normal(0, 1, 4)])
fdat = pd.DataFrame(dat)
lm = LinearRegression()
lm.fit(fdat.iloc[:,0:3], fdat.iloc[:,3])
lm.coef_
array([ 1.80217071, -1.52838385, 0.01316941])
lm.intercept_
1.0778300140269834