######################################################################################################################
#This Python Code for Climate Science is written for the book entitled "Climate Mathematics: Theory and Applications"#
#A Cambridge University Press book authored by SSP Shen and RCJ Somerville in July 2019                              #
#The Python codes were based on the R codes written by Samuel Shen Distinguished Professor,                          #
#San Diego State University, USA and were translated from R by Louis Selstad, Stephen Shen,                          #
#Gregori Clarke, and Dakota Newmann and edited by Samuel Shen.                                                       #
######################################################################################################################


#FIRST TIME Python users*****
#These package need to be installed (on the terminal or anaconda interface) before importing them below. 

#Follow this tutorial for package installation before
# https://towardsdatascience.com/importerror-no-module-named-xyz-45e4a5339e1b


#Change your file path to the folder where your downloaded data is stored
#MAC HELP: https://support.apple.com/guide/mac-help/go-directly-to-a-specific-folder-on-mac-mchlp1236/mac
#PC HELP: https://www.sony.com/electronics/support/articles/00015251
import os
# os.chdir("/Users/sshen/climmath/data")
os.chdir('/Users/HP/Documents/sshen/climmath/data')


%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}


#Style Dictionary to standardize plotting scheme between different python scripts 
import matplotlib.pyplot as plt

styledict = {'xtick.labelsize':20,
             'xtick.major.size':9,
             'xtick.major.width':1,
             'ytick.labelsize':20,
             'ytick.major.size':9,
             'ytick.major.width':1,
             'legend.framealpha':0.0,
             'legend.fontsize':15,
             'axes.labelsize':20,
             'axes.titlesize':25,
             'axes.linewidth':2,
             'figure.figsize':(12,8),
             'savefig.format':'jpg'}
plt.rcParams.update(**styledict)


#Function that creates personalized discrete Colormap
import numpy as np 
from matplotlib import cm as cm1
from matplotlib.colors import ListedColormap, to_rgba

def newColMap(colors):
    """
    This function creates a new color map from a list of colors given
    as a parameter. Recommended length of list of colors is at least 6.
    """
    first = np.repeat([to_rgba(colors[0])], 2, axis = 0)
    last = np.repeat([to_rgba(colors[-1])], 2, axis = 0)
    v = cm1.get_cmap('viridis', 16*(len(colors)-2))
    newcolors = v(np.linspace(0, 1, 16*(len(colors)-2)))
    for (i, col) in enumerate(colors[1:-1]):
        newcolors[16*i : 16*(i+1), :] = to_rgba(col)
    return ListedColormap(np.append(np.append(first,newcolors, axis=0), last, axis=0))


import numpy as np 
import matplotlib.pyplot as plt
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
import matplotlib.mlab as mlab
import pandas as pd

from cartopy import crs, mpl
import matplotlib.ticker as mticker
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

from mpl_toolkits.basemap import Basemap
from mpl_toolkits import mplot3d
from netCDF4 import Dataset as ds
from urllib import request
import scipy as sp
from scipy import stats as stt
from sklearn import datasets, linear_model
from sklearn.neighbors import KernelDensity as kd
from sklearn.linear_model import LinearRegression
from random import randint
from scipy.stats import norm
import statsmodels
import sklearn
import math
import statistics
import sympy as sy
from sympy import symbols, diff
import statsmodels.api as sm
from datetime import date
from scipy.stats import norm, kurtosis
from sklearn.linear_model import LinearRegression
import seaborn as sns
import pylab as py 
from scipy.stats import norm, t
import warnings
warnings.filterwarnings("ignore")


# Creating a Matrix 
np.mat([[1,2], [3,4]])

matrix([[1, 2],
        [3, 4]])


# Matrix Subtraction
np.mat([[1,1], [1,-1]]) - np.mat([[1,2], [3,0]])

matrix([[ 0, -1],
        [-2, -1]])


# Matrix Multiplication 
np.mat([[1,1], [1,-1]]) * np.mat([[1,3], [2,4]])

matrix([[ 3,  7],
        [-1, -1]])


# Matrix Multiplication 
np.mat([[1,3], [2,4]]) * np.mat([[1,1], [1,-1]])

matrix([[ 4, -2],
        [ 6, -2]])


# Yields matrix inverse 
np.linalg.inv(np.mat([[1,1], [1,-1]]))

matrix([[ 0.5,  0.5],
        [ 0.5, -0.5]])


# Verify the result of inverse
np.mat([[0.5,0.5], [0.5,-0.5]]) * np.mat([[1,1], [1,-1]])

matrix([[1., 0.],
        [0., 1.]])


# Yields the solved linear system 
np.linalg.solve(np.mat([[1,1], [1,-1]]),[20,4])

array([12.,  8.])


# Creating Matrix A
A = np.mat([[1,2,3], [-1,0,1]])
A

matrix([[ 1,  2,  3],
        [-1,  0,  1]])


# Yields the Covariance Matrix of A
covm = (1/A.shape[1])*A*A.T
covm

matrix([[4.66666667, 0.66666667],
        [0.66666667, 0.66666667]])


# Initializing arbitrary vector u
u = np.mat([[1,-1]]).T

# Yields new vector v in a different direction
v = covm*u
v

matrix([[4.],
        [0.]])


# Yields eigenvalues and eigenvectors in one output
ew = np.linalg.eig(covm)

# Yields the eigenvalues of covm 
ew[0]

array([4.77485177, 0.55848156])


# Yields the eigenvectors
ew[1]

matrix([[ 0.98708746, -0.16018224],
        [ 0.16018224,  0.98708746]])


# Verify the eigenvalue and eigenvectors 
covm * ew[1] / ew[0]
# The first column corresponds to the first eigenvalue

matrix([[ 0.98708746, -0.16018224],
        [ 0.16018224,  0.98708746]])


# Develop a 2-by-3 space time data matrix for SVD
A = np.mat([[1,2,3],[-1,0,1]])
A

matrix([[ 1,  2,  3],
        [-1,  0,  1]])


# Perform SVD calculation
msvd = np.linalg.svd(A)

# Yields the D vector in SVD 
d = msvd[1]
d

array([3.78477943, 1.29438969])


# Yields the U matrix in SVD 
u = msvd[0].T
u

matrix([[-0.98708746, -0.16018224],
        [-0.16018224,  0.98708746]])


# Yields the V matrix of SVD
v = msvd[2]
v = np.delete(v,2,0).T
v

matrix([[-0.21848175, -0.88634026],
        [-0.52160897, -0.24750235],
        [-0.8247362 ,  0.39133557]])


# Verify that A = UDV', where V' is the transpose of V.
# Yields Diagonal of D
diagd = np.diag(d)

# Yields the Transpose of V
vT = v.T

# Verify the SVD result 
verim = u*diagd*vT
np.matrix.round(verim)
# This is the original matrix A

matrix([[ 1.,  2.,  3.],
        [-1.,  0.,  1.]])


# Yields the covariance of matrix A
covm = (1/A.shape[1])*A*A.T

# Yields the eigenvectors and eigenvalues of matrix A
eigcov = np.linalg.eig(covm)

# The first element of eigcov are the eigenvalues of matrix A
eigcov[0]

array([4.77485177, 0.55848156])


# Yields the eigenvectors of matrix A
eigcov[1]

matrix([[ 0.98708746, -0.16018224],
        [ 0.16018224,  0.98708746]])


# Verify eigenvalues with SVD 
((d**2)/A.shape[1])

array([4.77485177, 0.55848156])


# Yields true eigenvalues of matrix A
eigcov[0]

array([4.77485177, 0.55848156])


# Set up File path
file = 'PSTANDtahiti.txt'
Pta = np.loadtxt(file,skiprows=0)
PTA = np.reshape(Pta[:,1:],(780))

# Make an array of years for our x-axis
xtime = np.linspace(1951,2016,(65*12)+1)[0:780]


#Fig 4.2 (a)
# Set up Figure 
plt.figure(figsize = (15.,7.))

# Add data to plot
plt.plot(xtime,PTA,color='r')

# Add text to plot
plt.text(1950,2.7,'(a)', size = 20, alpha = 0.7)

# Label plot
plt.xlabel('Year')
plt.ylabel('Pressure')
plt.title('Standardized Tahiti SLP Anomalies')
plt.grid()
plt.show()


# Upload and set up data 
Pda = np.loadtxt('PSTANDdarwin.txt',skiprows=0)
PDA = np.reshape(Pda[:,1:],(780))


#Fig 4.2 (b)
plt.figure(figsize = (15.,7.))

plt.plot(xtime,PDA,color='b')
plt.text(1950, 2.7, '(b)', size = 20, alpha = 0.7)

plt.xlabel('Year')
plt.ylabel('Pressure')
plt.title('Standardized Darwin SLP Anomalies')
plt.grid()

plt.show()


#Fig 4.2 (c)
# Set up figure 
fig, soi = plt.subplots(figsize=(15,7))

# Ploting data against time
soi.plot(xtime,PTA-PDA, color = 'k')
soi.text(1950, 4.11, '(c)', size = 20, alpha = 0.7)
soi.axhline(y = 0.0, color = 'k', linestyle = '-')
soi.set_xlabel('Year')
soi.set_ylabel('SOI Index')
soi.grid()

# Making left Y axis 
ax2 = soi.twiny()

# Plotting data against difference of Darwin and Tahiti standardized SLP data
ax2.plot(xtime,PTA-PDA, color='k')
ax2.set_xlabel('Year')

# Mirroring X axis for difference data 
ax3 = soi.twinx()
ax3.plot(xtime,PTA-PDA, color='k')
ax3.set_ylabel('SOI Index')

# Adding some text
ax3.text(1960,4.6,"SOI = Tahiti Standardized SLP - Darwin Standardized SLP", fontsize=18)

plt.show()


#Fig 4.2 (d)
# Preparing variables
SOI = PTA-PDA
cnegsoi = -SOI.cumsum()

# Load AMO data
amodat = np.loadtxt('AMO1951-2015.txt',skiprows=0)
amots = amodat[:,1:13].T.flatten("F")

# Set up figures 
fig, ax1 = plt.subplots(figsize=(15,7))
fig.text(0.123, 0.22, '(d)', size = 20, alpha = 0.7)

# Plot the data against CSOI Index 
ax1.set_xlabel('Year')
ax1.set_ylabel('Negative CSOI Index')
ax1.plot(xtime, cnegsoi, color="purple", label="CSOI")
ax1.grid()

# Create a second, vertical axis that shares the same x-axis
ax2 = ax1.twinx()  

# Set labels for new axis 
ax2.set_ylabel('AMO Index')  
ax2.plot(xtime, amots, color="darkgreen", label="AMO Index")

# Set legends 
# ax1.legend(loc = (.169,.89))
# ax2.legend(loc = (.169,.84))
ax1.legend(loc = (.125,.13))
ax2.legend(loc = (.125,.08))

# Yields the full view of the right y-label
fig.tight_layout()
plt.title('CSOI and AMO Index Comparison', pad = 8)
plt.show()


# Yields transpose of the data
ptamonv = PTA.T
pdamonv = PDA.T

# Creates an array of arrays 
ptada = [ptamonv,pdamonv]


# Compute SVD of ptada
svdptd_u, svdptd_s, svdptd_vh = np.linalg.svd(ptada,full_matrices=False)

# Verify SVD 
recontada = np.dot(np.dot(svdptd_u,np.diag(svdptd_s)),svdptd_vh)

# Compute error of calculations
svd_error = ptada - recontada

# Error maximum
svd_error.max()

4.218847493575595e-15


# Error minimum
svd_error.min()

-7.105427357601002e-15


# Gather appropriate variables
Dd = np.diag(svdptd_s)
Uu = svdptd_u
Vv = svdptd_vh

# Set up time array
xtime = np.linspace(1951,2016,(65*12)+1)[0:780]

# Compute WSOI1 for plotting 
wsoi1 = Dd[0,0] * (Vv)[0,:]


#Fig 4.3 (a)
# Prepare figure
fig, pltt = plt.subplots(figsize = (15,7))

# Plot data 
pltt.plot(xtime,wsoi1,color='k')
pltt.set_xlabel('Year')
pltt.set_ylabel('Weighted SOI1')
pltt.text(1980.2, -2.9, "Weighted SOI1 Index", fontsize=20)
pltt.text(1950.2, 3.7, "(a)", size = 16, alpha = 0.7)
pltt.grid()


#Fig 4.3 (b)
# Set up data for WSOI2
wsoi2 = Dd[1,1] * Vv[1,:]

# Prepare figure
fig, wsoi = plt.subplots(figsize = (15,7))

# Plot data 
wsoi.plot(xtime,wsoi2,color='darkblue')
wsoi.set_xlabel('Year')
wsoi.set_ylabel('WSOI2')
wsoi.text(1950.2, 1.8, "(b)", size = 16, alpha = 0.7)
wsoi.grid()

# Create second, horizontal axis
ax2 = wsoi.twiny()
ax2.plot(xtime,wsoi2,color='darkblue')
ax2.text(1970.2,1.76,"Weighted SOI2 Index", fontsize=20)

plt.show()


#Fig 4.3(c)
# Create needed variables 
cwsoi1 = np.cumsum(wsoi1)
xtime = np.linspace(1951,2016,(65*12)+1)[0:780]

# Prepare figure 
fig, plott = plt.subplots(figsize=(15,7))

# Plot cumulative sums 
plott.plot(xtime,cwsoi1,color='black',label='CWSOI1')
plott.set_xlabel('Year')
plott.set_ylabel('Cumulative Weighted SOI1', size = 20)
plott.grid()
plott.text(1950.2, -172.5, "(c)", size = 20, alpha = 0.7)

# Superimpose CSOI time series on this CWSOI1
cnegsoi = -np.cumsum(ptamonv-pdamonv)
plt.plot(xtime,cnegsoi,color='purple',label='CSOI')
plt.title('Comparison between CWSOI1 and the Smoothed AMO Index', size = 22)

ax2 = plott.twinx()

# Create 24-month ahead moving average of the monthly AMO index
i = 0
window_size = 24
moving_averages = []
while i < len(amots) - window_size + 1:
    this_window = amots[i : i + window_size]
    window_average = sum(this_window) / window_size
    moving_averages.append(window_average)
    i += 1

# Creating time sequence spanning from 1951 to 2016
xtime = np.linspace(1951,2016,(65*12)+1)[0:757]

# Plot the moving averages 
ax2.plot(xtime, moving_averages,color='darkgreen',label='AMO Index')
ax2.set_ylabel('Smoothed AMO Index')

# Set legend 
plott.legend(loc=(.17,.835))
ax2.legend(loc=(.17,.785))

# Ensures we can see all axis labels 
fig.tight_layout() 
plt.show()


# Create appropriate variables from SVD 
uu, ss, vvh = np.linalg.svd(ptada,full_matrices=False)
PC = vvh
PC1 = PC[0,:]
PC2 = PC[1,:]


#Fig 4.3 (d)
# Prepare figure 
plt.figure(figsize = (15,7))

# Prepare time sequence 
xtime = np.linspace(1951,2016,(65*12)+1)[0:780]

# Plot the cumulative sum
plt.plot(xtime,PC2.cumsum(),color='b',label='Principal Component 2')

# Label appropriately 
plt.xlabel('Year')
plt.ylabel('CWSOI2')
plt.title('CWSOI2 Index: The Cumulative PC2')
plt.text(1950.1, -5.9, '(d)', size = 16, alpha = 0.7)
plt.grid()
plt.show()


#Fig 4.4 (a)
# Display the two ENSO modes on a world map


# # Prepare the figure 
# plt.figure(figsize=(15., 7.))

# # Using the basemap package to put the cylindrical projection map into the image
# wmap2 = Basemap(projection='cyl',llcrnrlat=-90.,urcrnrlat=90.,\
#             llcrnrlon=0.,urcrnrlon=360.,resolution='c')

# # Draw coastlines, latitudes, and longitudes on the map
# wmap2.drawcoastlines(color='black', linewidth=1)
# wmap2.drawparallels(np.arange(-90.,91.,30.), labels = [1,0,0,1], fontsize=15)
# wmap2.drawmeridians(np.arange(00.,360.,30.), labels= [1,0,0,1], fontsize=15)

# # Add data to plot
# plt.scatter(131.,-12.,color='b',s=150)
# plt.scatter(220.,-20,color='r',s=150)

# # Add desired text to plot 
# plt.text(100,-24, 'Darwin -0.79', color = 'b', fontsize = 22)
# plt.text(200,-32, 'Tahiti 0.61', color = 'r', fontsize = 22)
# plt.text(60,40, 'El Nino Southern Oscillation Model 1', color = 'm', fontsize = 30)

# plt.show()


# Prepare the Figure
plt.figure(figsize = (15., 7.))

# Using the CartoPy package to put the cylindrical projection map into the image
ax = plt.axes(projection = crs.PlateCarree())
ax.set_extent([-180, 180, -90, 90], crs = crs.PlateCarree())

# Draw coastlines, gridlines, longitudes, and latitudes on the map
ax.coastlines(color='black', linewidth=1)
gl = ax.gridlines(crs=crs.PlateCarree(), draw_labels=True,
                  linewidth=2, color='gray', alpha=0.5, linestyle='--')
gl.xlabels_top = True
gl.ylabels_left = True
gl.xlines = True
gridx = np.linspace(-180, 180, 13)
gl.xlocator = mticker.FixedLocator(gridx)
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER

# Add data to plot
plt.scatter(131.,-12.,color='b',s=150)
plt.scatter(-129.,-30,color='r',s=150)

# Add descriptive text to plot
plt.text(100,-24, 'Darwin -0.79', color = 'b', fontsize = 22)
plt.text(-155,-42, 'Tahiti 0.61', color = 'r', fontsize = 22)
plt.text(-130,40, 'El Nino Southern Oscillation Model 1', color = 'm', fontsize = 30)

Text(-130, 40, 'El Nino Southern Oscillation Model 1')


#Fig 4.4 (b)
# Display the two ENSO modes on a world map
# Prepare figure
plt.figure(figsize=(15, 7))

# Using the basemap package to put the cylindrical projection map into the image
wmap2 = Basemap(projection='cyl',llcrnrlat=-90.,urcrnrlat=90.,\
            llcrnrlon=0.,urcrnrlon=360.,resolution='c')

# Draw coastlines, latitudes, and longitudes on the map
wmap2.drawcoastlines(color='black', linewidth=1)
wmap2.drawparallels(np.arange(-90.,91.,30.), labels = [1,0,0,1], fontsize=15)
wmap2.drawmeridians(np.arange(-180.,181.,30.), labels= [1,0,0,1], fontsize=15)

# Add data to plot 
plt.scatter(131.,-12.,color='r',s=150)
plt.scatter(220.,-20,color='r',s=150)

# Add desired text 
plt.text(105,-24, 'Darwin 0.61', color = 'r', fontsize = 22)
plt.text(200,-32, 'Tahiti 0.79', color = 'r', fontsize = 22)
plt.text(60,40, 'El Nino Southern Oscillation Model 2', color = 'm', fontsize = 30)

plt.show()


# Given the coordinates of 3 points 
x1 = [1,2,3]
x2 = [2,1,3]
y = [-1,2,1]

# Put them into a dataframe 
df = pd.DataFrame([x1,x2,y]).T
X_df = pd.DataFrame([x1,x2]).T

# Create the linear regression 
lm = LinearRegression()

# Fit the linear regression 
lm.fit(X_df, df[2])

# Display Coefficients 
lm.coef_

array([ 1.66666667, -1.33333333])


# Verify the 3 points determining a plane 
b1 = [i*1.667 for i in x1]
b2 = [j*1.333 for j in x2]
difference = []

zip_object = zip(b1, b2)
for b1_i, b2_i in zip_object:
    difference.append(b1_i-b2_i)
difference

[-0.9989999999999999, 2.0010000000000003, 1.0020000000000007]


u = [1,2,3,1]
v = [2,4,3,-1]
w = [1,-2,3,4]

df = pd.DataFrame([u,v,w]).T
X_df = pd.DataFrame([u,v]).T
lm = LinearRegression()
lm.fit(X_df, df[2])
lm.coef_

array([ 2. , -1.5])


lm.intercept_

0.9999999999999982


# Show a general multivariate linear regression with 3 independent random variables
dat = np.mat([np.random.normal(0, 1, 4),np.random.normal(0, 1, 4),
                np.random.normal(0, 1, 4),np.random.normal(0, 1, 4),
                np.random.normal(0, 1, 4),np.random.normal(0, 1, 4),
                np.random.normal(0, 1, 4),np.random.normal(0, 1, 4),
                np.random.normal(0, 1, 4),np.random.normal(0, 1, 4)])
fdat = pd.DataFrame(dat)
lm = LinearRegression()
lm.fit(fdat.iloc[:,0:3], fdat.iloc[:,3])
lm.coef_

array([ 1.80217071, -1.52838385,  0.01316941])


lm.intercept_

1.0778300140269834

Python Code Version 3.0 for¶

Climate Mathematics: Theory and Applications - Chapter 4¶

Chapter 4: Climate Data Matrices and Linear Algebra¶

4.2: Matrix Algebra¶

4.5: SVD Representation Model for Space-Time Data¶

4.6: SVD Analysis of Southern Oscillation Index¶

Visualization of the ENSO Mode Computed from SVD Method¶

4.8: Multivariate Linear Regression Using Matrix Notations¶