Python Code for Climate Science

In [1]:
####################################################################
#
#This Python Code for Climate Science
#is written for the book entitled
#"Climate Mathematics: Theory and Applications"
#A Cambridge University Press book authored by SSP Shen and RCJ Somerville
#July 2019
#The Python codes were based on the R codes written by Samuel Shen
#Distinguished Professor, San Diego State University, USA
#and were translated from R by
#Louis Selstad, Stephen Shen, Gregori Clarke, and Dakota Newmann
#and edited by Samuel Shen, 
#Email: sshen@sdsu.edu
#www.climatemathematics.org
#March 2019, San Diego
#
####################################################################
#

Chapter 2: Basics of R Programming

#
In [2]:
import numpy as np 
import matplotlib.pyplot as plt
from matplotlib.colors import rgb2hex
from matplotlib.patches import Polygon
import matplotlib.mlab as mlab
import pandas as pd
from mpl_toolkits.basemap import Basemap
from mpl_toolkits import mplot3d
from netCDF4 import Dataset as ds
from urllib import request
import scipy as sp
from scipy import stats as stt
from sklearn import datasets, linear_model
from sklearn.neighbors import KernelDensity as kd
from sklearn.linear_model import LinearRegression
from random import randint
from scipy.stats import norm
import statsmodels
import sklearn
import math
import statistics
import sympy as sy
from sympy import symbols, diff
import statsmodels.api as sm
from datetime import date

Python As a Smart Calculator

In [3]:
1+4 #The result 5 is shown as an Out[5] below. 
Out[3]:
5
In [4]:
2 + np.pi/4 - 0.8 #\pi=3.14...... in Python is given by np.pi
Out[4]:
1.9853981633974482
In [5]:
x=1
y=2
z=4
t=2*x**y-z  #x**y means x^y: x to the power of y
t
Out[5]:
-2
In [6]:
u=2
v=3
u+v
Out[6]:
5
In [7]:
np.sin(u*v)  #np.sin is sine function in Python
Out[7]:
-0.27941549819892586
In [8]:
tmax=[77, 72,75,73,66,64,59] #Enter the temperature data
tmax #Show the temperarure data
Out[8]:
[77, 72, 75, 73, 66, 64, 59]

Define a Sequence in Python

In [9]:
np.arange(9) #A Python sequence starts from 0 while R from 1
#A sequence is called an array in Python
Out[9]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8])
In [10]:
# np.arange(x,y) creates an array of numbers incremented by 1 
# between integer x and integer y-1 
np.arange(1, 9) 
Out[10]:
array([1, 2, 3, 4, 5, 6, 7, 8])
In [11]:
np.arange(-5.5, 2) #np.arange(x,y) can take negative and real numbers with increment 1
Out[11]:
array([-5.5, -4.5, -3.5, -2.5, -1.5, -0.5,  0.5,  1.5])
In [12]:
# np.arange(x,y,b) creates an array of numbers incremented by b
# between x and y-1 
#x,y, and b can be real numbers although they are integers in most applications
np.arange(1, 9, 2) 
Out[12]:
array([1, 3, 5, 7])
In [13]:
# np.linspace(x, y, n) creates an array of n numbers evenly distributed between x and y
np.linspace(0, 4.5, 5) #linspace means linearly/uniformly distributed space division
Out[13]:
array([0.   , 1.125, 2.25 , 3.375, 4.5  ])

Define a Function in R

In [14]:
# Define the x^2 function and call it samfctn(x)
def samfctn(x):
    return x * x;
samfctn(4)
Out[14]:
16
In [15]:
# Define a multivaraite function "x+y-z/2" 
def fctn2(x, y, z):
    return x + y - z / 2;
fctn2(1, 2, 3)
Out[15]:
1.5

Plot with Python

In [16]:
import math
import numpy
import scipy
import sympy
#After import these packages, the following three return the same pi=3.141592...
math.pi 
np.pi
scipy.pi
Out[16]:
3.141592653589793

Plot the curve of y=sin(x) from -pi to 2 pi

In [17]:
t = np.arange(-math.pi, 2 *math.pi, 0.1) #math.pi is the same as np.pi and scipy.pi, just using different packages
plt.plot(t, np.sin(t))
plt.show()

Plot x-y data based on a formula

In [18]:
t = np.linspace(-3, 2, 20) #uniform division using 20 points
plt.plot(t, t ** 2) 
plt.show()
In [19]:
np.size(t) #length of the data array
Out[19]:
20
In [20]:
import matplotlib.pyplot as plt
t = np.linspace(-3, 2, 300) #uniform division using 200 points
plt.plot(t, (np.sin(t ** 2))**2, 'ro', color='blue',linewidth=5) #linestyle ='ro'
plt.xlabel('Time')
plt.ylabel('Function Value')
plt.title('Plot a function')
plt.show()

Plot a 3D surface, yields a 3D surface with a perspective angle

In [21]:
x = np.linspace(-1, 1, 100)
y = np.linspace(-1, 1, 100)

# defines a formula for Z
def f(x, y):
    return 1 - x**2 - y**2
 
X, Y = np.meshgrid(x, y)
Z = f(X, Y) # renders z function on the x, y grid

# creates the 3D plot
fig = plt.figure()
ax = plt.axes(projection = '3d')
ax.contour3D(X, Y, Z, 30, cmap = 'binary')
plt.show()
In [22]:
x = np.linspace(-1, 1, 100)
y = np.linspace(-1, 1, 100)

# defines a formula for Z
def f(x, y):
    return 1 - x**2 - y**2
 
X, Y = np.meshgrid(x, y)
Z = f(X, Y) # renders z function on the x, y grid

# creates the 3D plot
fig = plt.figure()
ax = plt.axes(projection = '3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1,
                cmap='viridis', edgecolor='none') #cmap = 'color'

# labels the plot
ax.set_title('Contour surface')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z');
ax.view_init(30)
plt.show()

Contour plot

In [23]:
x = np.linspace(-1, 1, 30)
y = np.linspace(-1, 1, 30)
 
X, Y = np.meshgrid(x, y)
Z = f(X, Y)

# creates and colors the contour plot
plt.contourf(X, Y, Z, 20, cmap = 'bwr') #cmap='color-scheme'

# adds colorbar for color map of contours
plt.colorbar()
plt.show()
In [24]:
## Symbolic calculations by Python
In [25]:
# Makes 'x' and 'y' symbols used for derivatives
x = symbols('x')
y = symbols('y')
# 'diff' command takes the derivative of x^2 w.r.t. x
diff(x ** 2, x)
Out[25]:
$\displaystyle 2 x$
In [26]:
# creates a function of x^2 
fx = x ** 2
# takes the derivative of function w.r.t x
rslt = diff(fx, x)
print('The derivative of the function fx w.r.t x is: ', rslt)
The derivative of the function fx w.r.t x is:  2*x
In [27]:
# creates a function of x^2*sin(x)
fx = (x ** 2) * sy.sin(x)
# Take the derivative of function w.r.t x
rslt = diff(fx, x)
print('The derivative of the function w.r.t x is: ', rslt)
The derivative of the function w.r.t x is:  x**2*cos(x) + 2*x*sin(x)
In [28]:
# Create a multivariate function: x^2+y^2 
fxy = x ** 2 + y ** 2
# Take partial derivative of function w.r.t x
rslt = diff(fxy, x)
print('The partial derivative of the function w.r.t x is: ', rslt)
The partial derivative of the function w.r.t x is:  2*x
In [29]:
# Take partial derivative of function w.r.t y
rslt = diff(fxy, y)
print('The partial derivative of the function w.r.t y is: ', rslt)
The partial derivative of the function w.r.t y is:  2*y
In [30]:
# Integrates x^2 from 0 to 1
sy.integrate(x ** 2, (x, 0, 1))
Out[30]:
$\displaystyle \frac{1}{3}$
In [31]:
# integrates cos(x) from 0 to pi/2
sy.integrate(sy.cos(x), (x, 0, math.pi / 2))
Out[31]:
$\displaystyle 1.0$

Vectors, Matrices and Solve Linear Equations

In [32]:
# creates a 5X1 matrix
np.mat([[1], [6], [3], [np.pi], [-3]])
Out[32]:
matrix([[ 1.        ],
        [ 6.        ],
        [ 3.        ],
        [ 3.14159265],
        [-3.        ]])
In [33]:
# creates a sequence from 2 to 6
np.arange(2, 7)
Out[33]:
array([2, 3, 4, 5, 6])
In [34]:
# creates a sequence incremented by 2 from 1 to 10 
np.arange(1, 10, 2)
Out[34]:
array([1, 3, 5, 7, 9])
In [35]:
# creates a 4X1 matrix
x = np.mat([[1], [-1], [1], [-1]])
print(x)
[[ 1]
 [-1]
 [ 1]
 [-1]]
In [36]:
# adds number 1 to each element of x
print(x + 1)
[[2]
 [0]
 [2]
 [0]]
In [37]:
# multiplies number 2 to each element of x
print(x * 2)
[[ 2]
 [-2]
 [ 2]
 [-2]]
In [38]:
#x/2, divides each element of x by 2
print(x / 2)
[[ 0.5]
 [-0.5]
 [ 0.5]
 [-0.5]]
In [39]:
# creates a 1X4 vector with a sequence incremented by 1 from 1 to 4
y = np.mat(np.arange(1, 5))

# this multiplication multiples each pair of elements and results in 4X1 vector
np.multiply(x, y.T)
Out[39]:
matrix([[ 1],
        [-2],
        [ 3],
        [-4]])
In [40]:
print(x / 2)
print(x)
print(y)
[[ 0.5]
 [-0.5]
 [ 0.5]
 [-0.5]]
[[ 1]
 [-1]
 [ 1]
 [-1]]
[[1 2 3 4]]
In [41]:
# the dot product of two vectors  
np.dot(y, x)
Out[41]:
matrix([[-2]])
In [42]:
x.shape
Out[42]:
(4, 1)
In [43]:
# transforms x into a row 1X4 vector
x.T.shape
Out[43]:
(1, 4)
In [44]:
# dot product and results in 1X1 matrix
np.dot(x.T, y.T)
Out[44]:
matrix([[-2]])
In [45]:
# this column times row yields a 4X4 matrix
np.matmul(x, y)
Out[45]:
matrix([[ 1,  2,  3,  4],
        [-1, -2, -3, -4],
        [ 1,  2,  3,  4],
        [-1, -2, -3, -4]])
In [46]:
# convert a vector into a matrix of the same number of elements
# the matrix elements go by column, first column, second, etc
my = y.reshape(2, 2).transpose()
print(my)
[[1 3]
 [2 4]]
In [47]:
# finds dimensions of a matrix
my.shape
Out[47]:
(2, 2)
In [48]:
# converts a matrix to a vector, again via columns
my.flatten()
Out[48]:
matrix([[1, 3, 2, 4]])
In [49]:
# reshapes x to a 2X2 matrix
mx = x.reshape(2, -1).transpose()
# multiplication between each pair of elements
np.multiply(mx, my)
Out[49]:
matrix([[ 1,  3],
        [-2, -4]])
In [50]:
# division between each pair of elements
np.divide(mx, my)
Out[50]:
matrix([[ 1.        ,  0.33333333],
        [-0.5       , -0.25      ]])
In [51]:
np.subtract(mx, 2 * my)
Out[51]:
matrix([[-1, -5],
        [-5, -9]])
In [52]:
# this is matrix multiplication in linear algebra
np.matmul(mx, my)
Out[52]:
matrix([[ 3,  7],
        [-3, -7]])
In [53]:
# determinant of a matrix
x = np.linalg.det(my)
print(x)
-2.0
In [54]:
# inverse of a matrix
z = np.linalg.inv(my)
print(z)
[[-2.   1.5]
 [ 1.  -0.5]]
In [55]:
# verifies that the inverse of the matrix is correct by multiplying 
# the original matrix and the inverse matrix
a = np.matmul(z, my)
print(a)
[[1. 0.]
 [0. 1.]]
In [56]:
# returns a left to right diagonal vector of a matrix
np.diagonal(my)
Out[56]:
array([1, 4])
In [57]:
# returns the eigenvalues of the "my" matrix
myeigval = np.linalg.eigvals(my)
print(myeigval) 
[-0.37228132  5.37228132]
In [58]:
# returns the eigenvalues and eigenvectors of the "my" matrix
myeigvect = np.linalg.eig(my)
print(myeigvect)
(array([-0.37228132,  5.37228132]), matrix([[-0.90937671, -0.56576746],
        [ 0.41597356, -0.82456484]]))
In [59]:
# returns the singular value decomposition(svd) of the "my" matrix
mysvd = np.linalg.svd(my)
print(mysvd)
(matrix([[-0.57604844, -0.81741556],
        [-0.81741556,  0.57604844]]), array([5.4649857 , 0.36596619]), matrix([[-0.40455358, -0.9145143 ],
        [ 0.9145143 , -0.40455358]]))
In [60]:
# returns a solved linear matrix equation of the "my" matrix and array [1,3]
ysolve = np.linalg.solve(my, [1,3])
print(ysolve)
[ 2.5 -0.5]
In [61]:
# verifies the solution is correct by multiplying the "my" matrix with "ysolve" 
n = np.matmul(my, ysolve)
print(n)
[[1. 3.]]

Simple Statistics by Python

In [62]:
# generates 10 normally distributed numbers
random = np.random.normal(0, 1, 10)
print(random)
[-0.56944224  0.92318125  0.00635913 -0.19910735 -1.05111789  1.22739994
  0.96547908  0.58251679 -0.34498744  0.29586004]
In [63]:
# returns the mean of that random sample
mean = np.mean(random)
print(mean)
0.1836141304857005
In [64]:
# returns the variance of that random sample
var = np.var(random)
print(var)
0.49684553535901477
In [65]:
# returns the standard deviation of that random sample
std = np.std(random)
print(std)
0.7048727086212196
In [66]:
# returns the median of that random sample
median = np.median(random)
print(median)
0.15110958317541215
In [67]:
# returns quantile values in [0,.25,.5,.75,1]
quantile = pd.DataFrame(random).quantile([0, 0.25, 0.5, 0.75, 1])
print(quantile)
             0
0.00 -1.051118
0.25 -0.308517
0.50  0.151110
0.75  0.838015
1.00  1.227400
In [68]:
# returns range of values (max - min) in random sample
rang = np.ptp(random)
print(rang)
2.278517829232926
In [69]:
# returns the min value in random
minimum = np.min(random)
print(minimum)
-1.051117888472511
In [70]:
# returns the max value in random
maximum = np.max(random)
print(maximum)
1.2273999407604146
In [71]:
# yields the box plot of random values
fig = plt.figure(1, figsize=(8, 10))
ax = fig.add_subplot(111)
bp = ax.boxplot(random)
plt.show()
In [72]:
# statistical summary of the data sequence
random = np.random.normal(0, 1, 12)
summ = pd.DataFrame(random).describe()
print(summ)
               0
count  12.000000
mean    0.369812
std     1.388178
min    -2.561957
25%    -0.207109
50%     0.653065
75%     1.316669
max     2.230248
In [73]:
# yields the histogram of 1000 random numbers with a normal distribution
w = np.random.normal(0, 1, 1000)
hist = plt.hist(w, facecolor='black')
plt.show()

Read data from files, process and plot

In [74]:
# read the 5 deg gridded NOAAGlobalTemp .asc data
file = open('NOAAGlobalTemp.gridded.v4.0.1.201701.asc', "r")
# read float numbers from the file
da1 = []
for line in file:
    x = line.strip().split()
    for f in x:
        da1.append(float(f))

len(da1)
Out[74]:
4267130
In [75]:
da1[0 : 3]
Out[75]:
[1.0, 1880.0, -999.9]
In [76]:
tm1 = np.arange(0, 4267129, 2594)
tm2 = np.arange(1, 4267130, 2594)
len(tm1)
Out[76]:
1645
In [77]:
len(tm2)
Out[77]:
1645
In [78]:
# extract months
mm1 = []   
for i in range(len(tm1)):
    mm1.append(da1[tm1[i]])
    
# extract years
yy1 = []
for i in range(len(tm2)):
    yy1.append(da1[tm2[i]])
    
# combine YYYY with MM
rw1 = []
for i in range(len(mm1)):
    rw1.append(str(int(yy1[i])) + "-" + str(int(mm1[i])))    
In [79]:
mm1[0 : 6]
Out[79]:
[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
In [80]:
yy1[0 : 6]
Out[80]:
[1880.0, 1880.0, 1880.0, 1880.0, 1880.0, 1880.0]
In [81]:
len(mm1)
Out[81]:
1645
In [82]:
len(yy1)
Out[82]:
1645
In [83]:
tm1[0 : 6]
Out[83]:
array([    0,  2594,  5188,  7782, 10376, 12970])
In [84]:
tm2[0 : 6]
Out[84]:
array([    1,  2595,  5189,  7783, 10377, 12971])
In [85]:
tm3 = pd.concat([pd.DataFrame(tm1), pd.DataFrame(tm2)], axis = 1)

# remove the months and years data from the scanned data
tm4 = [] 
for r in range(len(tm1)):
    tm4.append(tm1[r])
    tm4.append(tm2[r])

tm4.reverse()
da2 = da1.copy() 
for f in tm4:
    del da2[f]

len(da2)/(36*72) # months, 137 yrs 1 mon: Jan 1880-Jan 2017
Out[85]:
1645.0
In [86]:
# generates the space-time data
# 2592 (=36*72) rows and 1645 months (=137 yrs 1 mon)
var = [[0 for x in range(1645)] for y in range(2592)]
i = 0
for c in range(1645):
    for r in range(2592):
        var[r][c] = da2[i]
        i += 1

da3 = pd.DataFrame(var, columns = rw1)

lat1=np.linspace(-87.5, 87.5, 36).tolist()
lon1=np.linspace(2.5, 357.5, 72).tolist()

Lat = sorted(lat1 * 72)
Lon = lon1 * 36

rw1.insert(0, "LAT")
rw1.insert(1, "LON")
gpcpst = pd.concat([pd.DataFrame(Lat), pd.DataFrame(Lon), 
                    pd.DataFrame(da3)], axis = 1)
gpcpst.columns = rw1
In [87]:
gpcpst['2015-12'].values
Out[87]:
array([-999.9, -999.9, -999.9, ..., -999.9, -999.9, -999.9])
In [88]:
gpcpst.columns
Out[88]:
Index(['LAT', 'LON', '1880-1', '1880-2', '1880-3', '1880-4', '1880-5',
       '1880-6', '1880-7', '1880-8',
       ...
       '2016-4', '2016-5', '2016-6', '2016-7', '2016-8', '2016-9', '2016-10',
       '2016-11', '2016-12', '2017-1'],
      dtype='object', length=1647)
In [89]:
gpcpst.shape
Out[89]:
(2592, 1647)
In [90]:
# the first two columns are Lat and Lon
# -87.5 to 87.5 and then 2.5 to 375.5
# the first row for time is header, not counted as data. 
gpcpst.to_csv("NOAAGlobalT1.csv") #Output the data as a csv file

Plot the climate data map for a given time

In [91]:
# column '2015-12' corresponding to Dec 2015
# convert the vector into a lon-lat matrix for contour map plotting
# compresses numbers to [-6, 6] range
mapmat = np.reshape(gpcpst['2015-12'].values, (-1, 72)) #36 X 72
# this command compresses numbers to the range of -6 to 6
mapmat = np.maximum(np.minimum(mapmat, 6), -6)  

dpi = 100
fig = plt.figure(figsize = (1100/dpi, 1100/dpi), dpi = dpi)
ax  = fig.add_axes([0.1, 0.1, 0.8, 0.9])

# create map
dmap = Basemap(projection = 'cyl', llcrnrlat = -87.5, urcrnrlat = 87.5,
              resolution = 'c',  llcrnrlon = 2.5, urcrnrlon = 357.5)

# draw coastlines, state and country boundaries, edge of map
dmap.drawcoastlines()
dmap.drawstates()
dmap.drawcountries()

# create and draw meridians and parallels grid lines
dmap.drawparallels(np.arange( -50, 100, 50.), labels = [1, 0, 0, 0], 
                  fontsize = 10)
dmap.drawmeridians(np.arange(50, 350, 50.), labels = [0, 0, 0, 1], 
                  fontsize = 10)

# convert latitude/longitude values to plot x/y values
# need to reverse sequence of lat1
#lat1_rev = lat1[::-1] 
lat1_rev = lat1
x, y = dmap(*np.meshgrid(lon1, lat1_rev))

# contour levels
clevs = np.arange(-7, 7, 0.5)
print(x.shape,y.shape, mapmat.shape)
# draw filled contours
cnplot = dmap.contourf(x, y, mapmat, clevs, cmap = plt.cm.jet)

# add colorbar
# pad: distance between map and colorbar
cbar = dmap.colorbar(cnplot, location = 'right', pad = "2%")      
cbar.set_label('[oC]') # add colorbar title string

# add plot title
plt.title('NOAAGlobalTemp Anomalies Dec 2015 [deg C]')

# label x and y
plt.xlabel('Longitude', labelpad = 20)
plt.ylabel('Latitude', labelpad = 40)

# display on screen
plt.show()
(36, 72) (36, 72) (36, 72)

Extract the data for a specific region

In [92]:
# keep only the data for the Pacific region for years from 1951-2000 

# get data for region with -20 < Latitude < 20 and 160 < Longitude < 260
n2 = gpcpst[(gpcpst['LAT'] > -20) & (gpcpst['LAT'] < 20) & 
            (gpcpst['LON'] > 160) & (gpcpst['LON'] < 260)]
print(gpcpst.shape)
print(n2.size)

# from 1951-2000 
# note Jan 1951 data starts from column 854 (note python is zero based)
pacificdat = n2.iloc[:, 854 : 1454]
(2592, 1647)
263520

Plot a climate map for a specific region

In [93]:
Lat = np.arange(-17.5, 22.5, 5)
Lon = np.arange(162.5, 262.5, 5)

plt.close()

# get Dec 1997 data from pacificdat (tropical region)
mapmat = np.reshape(pacificdat['1997-12'].values, (8, -1)) # 8 * 20

fig = plt.figure(figsize=(1100/dpi, 1100/dpi), dpi = dpi)
ax  = fig.add_axes([0.1, 0.1, 0.8, 0.9])

# create map
smap = Basemap(projection = 'cyl', llcrnrlat = -40, urcrnrlat = 40,
              resolution = 'c', llcrnrlon = 120, urcrnrlon = 300)

# draw coastlines, state and country boundaries, edge of map
smap.drawcoastlines()
smap.drawstates()
smap.drawcountries()

# create and draw meridians and parallels grid lines
smap.drawparallels(np.arange( -40, 60, 20.),labels=[1, 0, 0, 0],fontsize = 10)
smap.drawmeridians(np.arange(50, 350, 50.),labels=[0, 0, 0, 1],fontsize = 10)

# convert latitude/longitude values to plot x/y values
x, y = smap(*np.meshgrid(Lon, Lat))

# contour levels
clevs = np.arange(-4, 4, 0.1)

# draw filled contours
cnplot = smap.contourf(x, y, mapmat, clevs, cmap = plt.cm.jet)

# add colorbar
# pad: distance between map and colorbar
cbar = smap.colorbar(cnplot, location = 'right', pad = "2%") 
# add colorbar title string
cbar.set_label('[oC]', verticalalignment = 'top') 

# add plot title
plt.title('Tropic Pacific SAT Anomalies Dec 1997 [deg C]')

# label x and y
plt.xlabel('Longitude', labelpad = 20)
plt.ylabel('Latitude', labelpad = 40)

# display on screen
plt.show()