# Basically, this takes a folder of 2D-raster, stacks them, and then returns new rasters
# containig pixel-by-pixel trend analyses along the 3rd axis (r-value, slope, etc.)

import os, gdal
import numpy as np
from gdalconst import *


#####################################################
################## FUNCTIONS ########################
#####################################################

# Function that reads an entire folder of rasters as arrays and stores them in a list,
# default raster input format is GeoTiff
# printOut is only used to debug the code
def tiffToarray(inFol, printOut = False, inFormat = "tif"):

# Use the first raster as a blueprint for the raster size of all rasters
for allRasters in os.listdir(inFol):
if allRasters[-3:] == "tif":
firstRasStr = inFol + allRasters
break
firstRasGDAL = gdal.Open(firstRasStr, GA_ReadOnly)
cols = firstRasGDAL.RasterXSize
rows = firstRasGDAL.RasterYSize

finList = [] # initialise list that will store all arrays to be returned in the end
for files in os.listdir(inFol):
if files[-3:] == inFormat:
if printOut:
print(files)
fileIn = inFol + files
dataset = gdal.Open(fileIn, GA_ReadOnly)
array = dataset.ReadAsArray(0, 0, cols, rows)

finList.append(array)

return finList



# Function that calculates linear regression and Mann-Kendall-pValue coefficients for each raster pixel against continous time steps
# Input must be a list of arrays
# Benchmark: a time series of 15 rasters, with each having 321x161 pix (51681), the entire process takes
# about 19.1 seconds in total, with 5.8s for linReg and 13.2s for MK

def linReg(inList):

#equally spaced time steps by length of inList
timeList = np.asarray(list(range(len(inList))))
stepLen = len(inList)

#stack input arrays to make a 3D array
dstack = np.dstack((inList))
dstack1D = dstack.reshape(-1)

# Break down dstack1D into a list, each element in list contains the single steps
# of one pixel -> List length is equal to number of pixels
# List can be used to use Pythons map() function
dstackList = [dstack1D[i:i+stepLen] for i in range(0, len(dstack1D), stepLen)]

#initialise empty arrays to be filled by output values, arrays are 1D
slopeAr,intcptAr,rvalAr,pvalAr,stderrAr,mkPAr = [np.zeros(inList[0].reshape(-1).shape) for _ in range(6)]

# Use map() to iterate over each pixels timestep values for linear reg and Mann.Kendall
# Method is about 10% faster than using 2 for-loops (one for x- and y-axis)
outListReg = list(map( (lambda x: scipy.stats.linregress(timeList, x)) , dstackList))
outListMK = list(map( (lambda x: mk_test(x)) , dstackList))


for k in range(len(outListReg)):
slopeAr[k] = outListReg[k][0]
intcptAr[k] = outListReg[k][1]
rvalAr[k] = outListReg[k][2]
pvalAr[k] = outListReg[k][3]
stderrAr[k] = outListReg[k][4]

mkPAr[k] = outListMK[k][1]

outShape = inList[0].shape
outTuple = (slopeAr.reshape(outShape),
intcptAr.reshape(outShape),
rvalAr.reshape(outShape),
pvalAr.reshape(outShape),
stderrAr.reshape(outShape),
mkPAr.reshape(outShape))

return outTuple



# Mann-Kendall-Test
# Originally from: http://www.ambhas.com/codes/statlib.py
# I've changed the script though, now its about 35x faster than the original (depending on time series length)
# Input x must be a 1D list/array of numbers

def mk_test(x, alpha = 0.05):

n = len(x)

# calculate S
listMa = np.matrix(x) # convert input List to 1D matrix
subMa = np.sign(listMa.T - listMa) # calculate all possible differences in matrix
# with itself and save only sign of difference (-1,0,1)
s = np.sum( subMa[np.tril_indices(n,-1)] ) # sum lower left triangle of matrix

# calculate the unique data
# return_counts=True returns a second array that is equivalent to tp in old version
unique_x = np.unique(x, return_counts=True)
g = len(unique_x[0])

# calculate the var(s)
if n == g: # there is no tie
var_s = (n*(n-1)*(2*n+5))/18
else: # there are some ties in data
tp = unique_x[1]
var_s = (n*(n-1)*(2*n+5) + np.sum(tp*(tp-1)*(2*tp+5)))/18

if s>0:
z = (s - 1)/np.sqrt(var_s)
elif s == 0:
z = 0
elif s<0:
z = (s + 1)/np.sqrt(var_s)

# calculate the p_value
p = 2*(1-scipy.stats.norm.cdf(abs(z))) # two tail test
h = abs(z) > scipy.stats.norm.ppf(1-alpha/2)

return h, p



# Function that converts a numpy array to a GeoTIFF and saves it to disk, needs an existing GeoTIFF file as a blueprint
# Changed after the original
# http://gis.stackexchange.com/questions/58517/python-gdal-save-array-as-raster-with-projection-from-other-file
# inTiff is an exisiting GeoTiff file, the attributes from this file are used to create the output
# array is the array that will be saved as a tiff
# outFile is the path and name of the desired output GeoTiff

def array_to_raster(inTiff,array,outFile):

inDataset = gdal.Open(inTiff, GA_ReadOnly)

# You need to get those values like you did.
x_pixels = inDataset.RasterXSize # number of pixels in x
y_pixels = inDataset.RasterYSize # number of pixels in y
PIXEL_SIZE = inDataset.GetGeoTransform()[1] # size of the pixel...
x_min = inDataset.GetGeoTransform()[0]
y_max = inDataset.GetGeoTransform()[3] # x_min & y_max are like the "top left" corner.
wkt_projection = inDataset.GetProjectionRef()

driver = gdal.GetDriverByName('GTiff')

outDataset = driver.Create(
outFile,
x_pixels,
y_pixels,
1,
gdal.GDT_Float32, )

outDataset.SetGeoTransform((
x_min, # 0
PIXEL_SIZE, # 1
0, # 2
y_max, # 3
0, # 4
-PIXEL_SIZE))

outDataset.SetProjection(wkt_projection)
outDataset.GetRasterBand(1).WriteArray(array)
outDataset.FlushCache() # Write to disk.
return outDataset, outDataset.GetRasterBand(1) #If you need to return, remember to return also the dataset because the band don`t live without dataset



#####################################################
################## USAGE EXAMPLE ####################
#####################################################

# Example
# Take rasters with annual values and return statistical time series trend indices
inFol = '.../Annual/' # Input folder containing GeoTiffs
outFol = '.../OutStats/' # Output folder, will contain statistical GeoTiffs after script is run

inArrays = tiffToarray(inFol) #convert GeoTiffs to numpy arrays
outTuple = linReg(inArrays) #run linear regression and return arrays of slope, r etc. as a tuple

# get first raster in list and use it as a blueprint for size etc.
for allRasters in os.listdir(inFol):
if allRasters[-3:] == "tif":
firstRasStr = inFol + allRasters
break

# define the names of the statistical output rasters
outNames = ("An_slope.tif", "An_intcp.tif", "An_rval.tif", "An_pval.tif", "An_stderr.tif",
"An_mkP.tif")

# convert the arrays back to rasters and save them to disk
for fname, i in zip(outNames, range(len(outNames))):
array_to_raster(firstRasStr,outTuple[i],outFol+fname)