# Calculates linear regression coefficients (slope, r-value...) between two lists of rasters or arrays
# Input can be a list of rasters (i.e. their paths) or numpy arrays
# Raster input will return rasters and save to disk, array input will return a tuple containing arrays

# The script was specifically written to take different coordinate systems, areas and resolutions of rasters into account
# Each list should however contain only rasters with the same cs, area and resolution
# If the resolution of both lists is different, the coarser one will be upsampled
# If the coordinate system (cs) is different, the one from the second list is used (list1 will be reprojected)
# If the extent is different, only the shared extent is returned
# Non overlapping raster and raster lists without cs will return an error
#
# If the input are arrays, different resolutions will be resampled, however it is assumed they cover the same area in total
#
# outFol: a folder called "scratch" within may be automatically created for intermediate steps
# Note: A text file will be created, containing specific infos to each executed step



import os, gdal, datetime
import numpy as np
from gdalconst import *


# Function doing the actual calculation
def linReg(inList1,inList2,outFol=".../myOutFol/"):

#test if both lists contain the same number of datasets/rasters
if len(inList1) != len(inList2):
print("Aborting! Input lists must contain same amount of datasets/rasters")
return

# Txt-file to log processing steps
infoTxt = open(outFol + "_Info.txt", "a")
#First line is creation time and date
nT = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
infoTxt.write(("Process started [y-m-d]: " + nT + "\n" +
"Output Folder is: " + outFol + "\n\n"))

iType = 0 #input type, 0 is default means inputs are arrays

#If input is tif raster, convert to array before further use
if type(inList1[0]) != numpy.ndarray:
iType = 1
refRas1 = inList1[0]
refRas2 = inList2[0]

firstRasGDAL1 = gdal.Open(refRas1, GA_ReadOnly)
firstRasGDAL2 = gdal.Open(refRas2, GA_ReadOnly)

proj1 = osr.SpatialReference()
proj1.ImportFromWkt(firstRasGDAL1.GetProjectionRef())
proj2 = osr.SpatialReference()
proj2.ImportFromWkt(firstRasGDAL2.GetProjectionRef())


infoTxt.write("Input: Raster Files \n"
"First Raster in First list: " + refRas1 +
"\t Proj4: " + proj1.ExportToProj4() + "\n with cols/rows: " +
str(firstRasGDAL1.RasterXSize) + "/" +
str(firstRasGDAL1.RasterYSize) + "\n" +
"First Raster in Second list: " + refRas2 +
"\t Proj4: " + proj2.ExportToProj4() + "\n with cols/rows: " +
str(firstRasGDAL2.RasterXSize) + "/" +
str(firstRasGDAL2.RasterYSize) + "\n\n")

#Test if either projected or geographic coordinate systems is present
if not ((proj1.IsProjected() or proj1.IsGeographic()) and \
(proj2.IsProjected() or proj2.IsGeographic())):
print("Coordinate System in one of the series is missing!")
infoTxt.write("Aborted! Coordinate System in one of the series is missing!")
infoTxt.close()
return None


# if raster have a different coordinate systems and only partially overlap:
if firstRasGDAL1.GetProjectionRef() != firstRasGDAL2.GetProjectionRef():
intersection = my_intersect(refRas1,refRas2)
target_proj = proj1

infoTxt.write("Different Coordinate Systems -> Coordinate Systems changed to " +
proj1.ExportToProj4() + "\n" +
"Rasters intersect at [xmin ymin xmax ymax]: " +
str(intersection[0]) + " " + str(intersection[1]) + " " +
str(intersection[2]) + " " + str(intersection[3]) + "\n" +
"at " + outFol + "Scratch/ \n")

#reset list1 and fill it with arrays, cropped to intersection
nameList1 = [] #stores new pathnames
list1 = []
for dataset in inList1:
newTif1 = reproject_dataset(dataset, refRas1, te=intersection, outFol = outFol + "Scratch/")
nameList1.append(newTif1)
for x in nameList1:
dataset = gdal.Open(x, GA_ReadOnly)
cols = firstRasGDAL1.RasterXSize
rows = firstRasGDAL1.RasterYSize
array = dataset.ReadAsArray(0, 0, cols, rows)
list1.append(array)
infoTxt.write("First Rasters now have cols/rows: " + str(cols) + "/" +
str(rows) + "\n")

nameList2 = [] #stores new pathnames
list2 = []
for dataset in inList2:
newTif2 = reproject_dataset(dataset, refRas1, te=intersection, outFol = outFol + "Scratch/")
nameList2.append(newTif2)

#Define new first raster, as reprojection might have changed things
nameList2FirstRas = nameList2[0] #specs might have changed
nameList2FirstGDAL = gdal.Open(nameList2FirstRas, GA_ReadOnly)

for x in nameList2:
dataset = gdal.Open(x, GA_ReadOnly)
cols = nameList2FirstGDAL.RasterXSize
rows = nameList2FirstGDAL.RasterYSize
array = dataset.ReadAsArray(0, 0, cols, rows)
list2.append(array)

infoTxt.write("Second Rasters now have cols/rows: " + str(cols) + "/" +
str(rows) + "\n\n"
"New names of List1: " + str(nameList1) + "\n",
"New names of List2: " + str(nameList2) + "\n\n")




# raster share the same Coordinate System but not the same extent
elif (firstRasGDAL1.GetProjectionRef() == firstRasGDAL2.GetProjectionRef()) and \
(GetExtent(firstRasGDAL1) != GetExtent(firstRasGDAL2)):

intersec = my_intersect(firstRasGDAL1,firstRasGDAL2)
gdalTranslate = r'C:\Program Files\GDAL\gdal_translate.exe'

infoTxt.write("ELIF linReg2 invoked: Rasters share the same" +
"Coordinate System but not the same extent \n" +
"Rasters intersect at [xmin ymin xmax ymax]: " +
str(intersec[0]) + " " + str(intersec[1]) + " " +
str(intersec[2]) + " " + str(intersec[3]) + "\n")


newList1 = []
newList2 = []

# create new files (on HDD) with shared extent from FIRST input list
for file in inList1:
chkdir2(outFol + "Cropped/")
outPath = outFol + "Cropped/" + file[-13:]

cmd = "-of GTiff -projwin " + str(intersec[0]) + " " + \
str(intersec[3]) + " " + \
str(intersec[2]) + " " + \
str(intersec[1]) + " "

fullCmd = ' '.join([gdalTranslate, cmd, file, outPath])
child = subprocess.Popen(fullCmd,stdout=subprocess.PIPE)
child.wait() #Wait for subprocess to finish, or pyhton continues and returns error when output is not there yet
newList1.append(outPath)

# create new files (on HDD) with shared extent from SECOND input list
for file in inList2:
outPath = outFol + "Cropped/" + "2_" + file[-13:]

cmd = "-of GTiff -projwin " + str(intersec[0]) + " " + \
str(intersec[3]) + " " + \
str(intersec[2]) + " " + \
str(intersec[1]) + " "

fullCmd = ' '.join([gdalTranslate, cmd, file, outPath])
child = subprocess.Popen(fullCmd,stdout=subprocess.PIPE)
child.wait()
newList2.append(outPath)


# read new created rasters in as arrays
firstNewRas1 = newList1[0]
firstNewRas2 = newList2[0]

firstNewRasGDAL1 = gdal.Open(firstNewRas1, GA_ReadOnly)
firstNewRasGDAL2 = gdal.Open(firstNewRas2, GA_ReadOnly)

cols1 = firstNewRasGDAL1.RasterXSize
rows1 = firstNewRasGDAL1.RasterYSize
cols2 = firstNewRasGDAL2.RasterXSize
rows2 = firstNewRasGDAL2.RasterYSize

infoTxt.write("Raster cropped to shared extent and saved at " +
outFol + "Cropped/ \n" +
"First Rasters now have cols/rows: " +
str(cols1) + "/" + str(rows1) + "\n" +
"Second Rasters now have cols/rows: " +
str(cols2) + "/" + str(rows2) + "\n\n")

list1 = []
for file in newList1:
dataset = gdal.Open(file, GA_ReadOnly)
array = dataset.ReadAsArray(0, 0, cols1, rows1)
list1.append(array)

list2 = []
for file in newList2:
dataset = gdal.Open(file, GA_ReadOnly)
array = dataset.ReadAsArray(0, 0, cols2, rows2)
list2.append(array)



# it is assumes that rasters may vary in cell size, but share the same extent
else:
# open first rasters for meta information, done for both lists indiviually
cols1 = firstRasGDAL1.RasterXSize
rows1 = firstRasGDAL1.RasterYSize

cols2 = firstRasGDAL2.RasterXSize
rows2 = firstRasGDAL2.RasterYSize

list1 = [] # reset list1 and fill it with arrays
for x in inList1:
dataset = gdal.Open(x, GA_ReadOnly)
array = dataset.ReadAsArray(0, 0, cols1, rows1)
list1.append(array)

list2 = [] # reset list2 and fill it with arrays
for x in inList2:
dataset = gdal.Open(x, GA_ReadOnly)
array = dataset.ReadAsArray(0, 0, cols2, rows2)
list2.append(array)


# if input are arrays, use them without any changes
else:
infoTxt.write("Input are arrays with cols/rows: " +
str(inList1[0].RasterXSize) + "/" + str(inList1[1].RasterYSize)
+ "\n")
list1 = inList1
list2 = inList2

list1a = list1
list2a = list2

# Test if dimensions of both input array series are the same. If not, resize the smaller (i.e. coarser one)
# one with the zoom function
# http://stackoverflow.com/questions/13242382/resampling-a-numpy-array-representing-an-image
if list1a[0].shape != list2a[0].shape:

#set order for spline interpolation
splOrder = 3

size1 = list1[0].shape[0] * list1[0].shape[1]
size2 = list2[0].shape[0] * list2[0].shape[1]

infoTxt.write("Zoom function (scipy.ndimage.zoom) invoked because" +
"shape of arrays is list1 is " +
str(list1a[0].shape) + " and in list2 " + str(list2a[0].shape)+"\n")

if size1 > size2:
factorx = list1[0].shape[0] / list2[0].shape[0]
factory = list1[0].shape[1] / list2[0].shape[1]
list2a = []
for x in list2:
zoomArr = scipy.ndimage.zoom(x, (factorx,factory), order=splOrder)
list2a.append(zoomArr)

infoTxt.write("list2 arrays altered by factor for x and y: " +
str(factorx) + " " + str(factory) + "\n" +
"New shape of list2 arrays is " + str(list2a[0].shape) + "\n" +
"Order of spline interpolation was: " + str(splOrder) + "\n\n")

else:
factorx = list2[0].shape[0] / list1[0].shape[0]
factory = list2[0].shape[1] / list1[0].shape[1]
list1a = []
for x in list1:
zoomArr = scipy.ndimage.zoom(x, (factorx,factory), order=splOrder)
list1a.append(zoomArr)

infoTxt.write("list1 arrays altered by factor for x and y: " +
str(factorx) + " " + str(factory) + "\n" +
"New shape of list1 arrays is " + str(list1a[0].shape) + "\n" +
"Order of spline interpolation was: " + str(splOrder) + "\n\n")




# initialise empty arrays to be filled by output values
slopeAr,intcptAr,rvalAr,pvalAr,stderrAr = [np.zeros(list1a[0].shape) for _ in range(5)]

xx = 1
startTime = datetime.datetime.now()
print("Process Starting")
# iterate over every location in the arrays and calculate linreg
for yCo in range(list1a[0].shape[0]):
for xCo in range(list1a[0].shape[1]):

valList1 = []
for arrayX1 in list1a:
valList1.append(arrayX1[yCo,xCo])

valList2 = []
for arrayX2 in list2a:
valList2.append(arrayX2[yCo,xCo])

slope, intcpt, rval, pval, stderr = scipy.stats.linregress(valList1,valList2)

dPoints = list1a[0].shape[0] * list1a[0].shape[1]
if xx%100000 == 0:

nowTime = datetime.datetime.now()

#Overall time since start
timedelta = nowTime - startTime
timedeltaSec = timedelta.seconds
timedeltaMin = timedeltaSec/60
allRate = xx/timedeltaSec


print(xx, "/", dPoints, "done at average: %.2f /s " % allRate,
"Total runtime: %.2f min" % timedeltaMin )


slopeAr[yCo,xCo] = slope
intcptAr[yCo,xCo] = intcpt
rvalAr[yCo,xCo] = rval
pvalAr[yCo,xCo] = pval
stderrAr[yCo,xCo] = stderr

xx = xx+1

if iType == 0: # if input was array, return array... if was raster, return raster
outTuple = (slopeAr, intcptAr, rvalAr, pvalAr, stderrAr)
return outTuple
else:
if size2 > size1: # if one of the tifs was resized, the ouput must use the new dimensions
array_to_raster(inList2[0],slopeAr,outFol + "slope.tif")
array_to_raster(inList2[0],intcptAr,outFol + "intcpt.tif")
array_to_raster(inList2[0],rvalAr,outFol + "rval.tif")
array_to_raster(inList2[0],pvalAr,outFol + "pval.tif")
array_to_raster(inList2[0],stderrAr,outFol + "stderr.tif")

else:
array_to_raster(inList1[0],slopeAr,outFol + "slope.tif")
array_to_raster(inList1[0],intcptAr,outFol + "intcpt.tif")
array_to_raster(inList1[0],rvalAr,outFol + "rval.tif")
array_to_raster(inList1[0],pvalAr,outFol + "pval.tif")
array_to_raster(inList1[0],stderrAr,outFol + "stderr.tif")

infoTxt.write("Process finished [y-m-d]: " + nT + "\n" +
"Total number of locations: " + str(dPoints) +
" with " + str(len(inList1)) + " dimensions")
infoTxt.close()
print("Processing Done! See output File " + outFol + "_Info.txt for Details")




# Function that calculates a new extent from the intersection of two rasters
# Original from http://gis.stackexchange.com/questions/16834/how-to-add-different-sized-rasters-in-gdal-so-the-result-is-only-in-the-intersec
# Input can be gdal datasets or strings to tif files
# Returns 4 coordinate pairs. Coordinate systems must match! If not input files must be a tif
# input1 would be reprojected

def my_intersect(input1,input2):

# test if input is a string (filepath) or already gdal Dataset
if type(input1) == str:
gd1 = gdal.Open(input1, GA_ReadOnly)
else:
gd1 = input1

if type(input2) == str:
gd2 = gdal.Open(input2, GA_ReadOnly)
else:
gd2 = input2

# extract projection information of both datasets
proj1 = osr.SpatialReference()
proj1.ImportFromWkt(gd1.GetProjectionRef())
proj2 = osr.SpatialReference()
proj2.ImportFromWkt(gd2.GetProjectionRef())

if (gd1.GetProjectionRef() != gd2.GetProjectionRef()):
projectedRas = reproject_dataset(input1,input2)
gd1 = gdal.Open(projectedRas, GA_ReadOnly)

gt1 = gd1.GetGeoTransform()
gt2 = gd2.GetGeoTransform()

# calculate extents of both datasets
r1 = [gt1[0], gt1[3], gt1[0] + (gt1[1] * gd1.RasterXSize), gt1[3] + (gt1[5] * gd1.RasterYSize)]
r2 = [gt2[0], gt2[3], gt2[0] + (gt2[1] * gd2.RasterXSize), gt2[3] + (gt2[5] * gd2.RasterYSize)]

# return minimum shared extent
intersection = [max(r1[0], r2[0]), #xmin
max(r1[3], r2[3]), #ymin
min(r1[2], r2[2]), #xmax
min(r1[1], r2[1]), #ymax
]

return intersection