In this tutorial we create two variables over two different but overlapping time domains. We will grow both variables to span the same (union) domain, filling the added times with missing data
The CDAT software was developed by LLNL. This tutorial was written by Charles Doutriaux. This work was performed under the auspices of the U.S. Department of Energy by Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344.
Let's create two distinct variables (monthly time series)
variable 1 going from 1989 through 2010 variable 2 going from 2000 through 2017
import cdms2
import numpy
import MV2
import cdtime
import cdutil
def createTimeAxis(start, end):
""" Create a monthly time axis going from a start date to another"""
if isinstance(start, int):
start = cdtime.comptime(start)
if isinstance(end, int):
end = cdtime.comptime(end, 12, 31)
# Figure out how many month are there
units = "months since {}".format(start)
n = end.torelative(units).value + 1
# Create time axis
time = cdms2.createAxis(numpy.arange(n))
time.id = "time"
time.units = units
time.designateTime()
cdutil.setTimeBoundsMonthly(time)
return time
def createData(start, end, shape=()):
""" Create an array of random monthly data going from start to end, you can also pass shape for additional data"""
time = createTimeAxis(start, end)
n = len(time)
# Add time at beginning
shape = (n,) + shape
data = MV2.array(numpy.random.randn(*shape))
data.setAxis(0,time)
data.id = "data"
return data
data1 = createData(1989, 2010)
data2 = createData(2000, 2017)
def missingMonths(time, start, end):
""" given a time axis and a staert and end date, returns how mny months are missing before and after the time axis"""
# Before the time axis starts
if time[0].cmp(start)<=0:
before = 0
else:
units = "months since {}".format(start)
before = time[0].torelative(units).value
# After the time axis ends
if time[-1].cmp(end)>=0:
end = 0
else:
units = "months since {}".format(time[-1])
end = end.torelative(units).value
return int(before), int(end)
def grow(data, start, end):
""" Given an array and a start and end date, grows the array to fill the full time range """
order = data.getOrder(ids=True)
data = data(order=('t...'))
tc = data.getTime().asComponentTime()
b, e = missingMonths(tc, start, end)
# Prepare the new data
sh = list(data.shape)
sh[0] = sh[0] + b + e
new = MV2.ones(sh)
new = MV2.masked_greater(new,0.) # mask everywhere
if e != 0:
new[b:-e] = data[:]
else:
new[b:] = data[:]
new_time = cdms2.createAxis(numpy.arange(sh[0]))
new_time.units= "months since {}".format(start)
new_time.id = "time"
new_time.designateTime()
cdutil.setAxisTimeBoundsMonthly(new_time)
new.setAxis(0,new_time)
# set the old axes
for i, axis in enumerate(data.getAxisList()[1:]):
new.setAxis(i+1,ax)
new.id = data.id
return new(order=order)
def growDatasets(*arrays):
""" Given N cdms2 transient variables, grow them both to start and end at the same time, filling the rest with missing values"""
start = None
end = None
for data in arrays:
# Get time axis (as component time)
tc = data.getTime().asComponentTime()
# figure out which data start first and end last
if start is None or tc[0].cmp(start) <=0: # data starts first
start = tc[0]
if end is None or tc[-1].cmp(end) >=0: # data ends last
end = tc[-1]
# Figure out how many months are needed
for data in arrays:
yield grow(data, start, end)
data1, data2 = growDatasets(data1, data2)
print(data1.shape, data1[0], data1[-1], data2[0], data2[-1])