diff --git a/Stations/AllStations.py b/Stations/AllStations.py new file mode 100644 index 0000000000000000000000000000000000000000..e878385a0f8b78dbe66cd82cd04a87eeeed45174 --- /dev/null +++ b/Stations/AllStations.py @@ -0,0 +1,481 @@ +# +import numpy as np +import datetime +import sys +from netCDF4 import Dataset +from netCDF4 import stringtochar, chartostring, stringtoarr +# +import Station as ST +import ConversionCalculations as CC +# +class AllStations : + def __init__(self, Source="") : + self.stations = [] + self.nbstations = 0 + self.hist = None + self.nbmonth = 0 + self.source = Source + self.mergersource = None + return + # + def add(self, stn) : + # + # Only station if length of data larger than 0 + # + if stn.Length > 0 : + self.stations.append(stn) + # Manage source of merged data by building the union + if self.mergersource is not None : + self.mergersource = list(set(self.mergersource).union(stn.mergersource)) + else : + self.mergersource = stn.mergersource + # + self.nbstations += 1 + return + # + # Get some time information from the list of stations + # + def FirstDate(self) : + return min([stn.Time[0] for stn in self.stations]) + def LastDate(self) : + return max([stn.Time[-1] for stn in self.stations]) + # + # Function to dump all stations into a NetCDF file + # + def dumpnetcdf(self, dates, filename, hist) : + # + self.hist = hist + # + FillValue = 1.0e+20 + self.nbmonth = len(dates) + # Maximum options for original data + if self.mergersource is not None : + self.maxalt = len(self.mergersource)-1 + else : + self.maxalt = 0 + # + # Compute total number of alternatives + # + altloc=np.empty((self.maxalt,self.nbstations), int) + altloc[:,:] = -1 + self.altsz=0 + for i,stn in enumerate(self.stations) : + if stn.mergersource is not None : + for src in stn.mergersource[1:] : + ia = self.mergersource.index(src) + altloc[ia-1,i] = self.altsz + self.altsz += 1 + # + # Transfer data to matrices along the new time axis. + # + original=np.empty((self.nbmonth,self.nbstations), float) + original[:,:] = np.nan + alth=np.empty((self.nbmonth,self.altsz), float) + alth[:,:] = np.nan + for i,stn in enumerate(self.stations) : + if isinstance(stn.Original,np.ndarray) : + original[:,i] = CC.tonewtime(stn.Time, stn.Original, dates) + else : + original[:,i] = CC.tonewtime(stn.Time, stn.Original[0], dates) + icnt = 1 + for ia in range(len(stn.mergersource)-1) : + if altloc[ia,i] >= 0 : + alth[:,altloc[ia,i]] = CC.tonewtime(stn.Time, stn.Original[icnt], dates) + icnt += 1 + # + flags=np.empty((self.nbmonth,self.nbstations), float) + flags[:,:] = np.nan + for i,stn in enumerate(self.stations) : + if stn.Flag is not None : + flags[:,i] = CC.tonewtime(stn.Time, stn.Flag, dates) + # + merged2d=np.empty((self.nbmonth,self.nbstations), float) + merged2d[:,:] = np.nan + for i,stn in enumerate(self.stations) : + if stn.Merged is not None : + merged2d[:,i] = CC.tonewtime(stn.Time, stn.Merged, dates) + # + # + # Open NetCDF file to be filled + # + out=Dataset(filename, 'w') + out.history = hist + out.metadata = "Missing metadata are replaced by XXX or -999" + out.Source = self.source + if self.mergersource is not None : + for i,src in enumerate(self.mergersource) : + att="MergeSource"+str(i+1) + exec('out.%s = "%s"' % (att,src)) + out.ConversionDate = datetime.date.today().isoformat() + # + # Create dimensions + # + strlen=60 + out.createDimension('time', self.nbmonth) + out.createDimension('stations', self.nbstations) + if self.maxalt > 0 : + out.createDimension('maxalt', self.maxalt) + out.createDimension('z', self.altsz) + out.createDimension('str', strlen) + # + time=out.createVariable('time', 'd', ('time',), fill_value=FillValue) + time.units="seconds since "+self.FirstDate().isoformat().replace("T"," ") + time.long_name="Time axis" + for i in range(self.nbmonth): + time[i]=((dates[i]-self.FirstDate()).days)*86400 + # + # Set netCDF variables for station information + # + nb=out.createVariable('number', 'i', ('stations',)) + nb.long_name="GRDC Station number" + nex=out.createVariable('next', 'i', ('stations',)) + nex.long_name="Downstream GRDC Station" + wmoreg=out.createVariable('WMOreg', 'i', ('stations',)) + wmoreg.long_name="WMO region" + wmosubreg=out.createVariable('WMOsubreg', 'i', ('stations',)) + wmosubreg.long_name="WMO sub-region" + stname=out.createVariable('name', 'c', ('stations','str',)) + stname.long_name="Station Name" + riv=out.createVariable('river', 'c', ('stations','str',)) + riv.long_name="River Name" + ctry=out.createVariable('country', 'c', ('stations','str',)) + ctry.long_name="Country of station" + filedate=out.createVariable('FileDate', 'c', ('stations','str',)) + filedate.long_name="File generation date" + lastupdate=out.createVariable('LastUpdate', 'c', ('stations','str',)) + lastupdate.long_name="Last update of data" + lon=out.createVariable('lon', 'f', ('stations',)) + lon.units="degrees_east" + lon.long_name="Longitude" + lat=out.createVariable('lat', 'f', ('stations',)) + lat.units="degrees_north" + lat.long_name="Latitude" + area=out.createVariable('area', 'f', ('stations',)) + area.units="km^2" + area.long_name="Catchment Area" + altitude=out.createVariable('altitude', 'f', ('stations',)) + altitude.units="m" + altitude.long_name="Altitude of Station" + # + # Add the data to the variables + # + for i,stn in enumerate(self.stations) : + nb[i]=stn.Number + wmoreg[i]=stn.WMOregion + wmosubreg[i]=stn.WMOsubregion + nex[i]=stn.NextStation + if isinstance(stn.Name, str) : + stname[i,:]=stringtochar(np.array(stn.Name.replace("ö","o"),dtype="S60")) + else : + stname[i,:]=stringtoarr(stn.Name.tolist(), strlen) + if isinstance(stn.River, str) : + riv[i,:]=stringtochar(np.array(stn.River,dtype="S60")) + else : + riv[i,:]=stringtoarr(stn.River.tolist(), strlen) + if isinstance(stn.Country, str) : + ctry[i,:]=stringtochar(np.array(stn.Country,dtype="S60")) + else : + ctry[i,:]=stringtoarr(stn.Country.tolist(), strlen) + if isinstance(stn.FileDate, str) : + filedate[i,:]=stringtochar(np.array(stn.FileDate,dtype="S60")) + else : + filedate[i,:]=stringtoarr(stn.FileDate.tolist(), strlen) + if isinstance(stn.LastUpdate, str) : + lastupdate[i,:]=stringtochar(np.array(stn.LastUpdate,dtype="S60")) + else : + lastupdate[i,:]=stringtoarr(stn.LastUpdate.tolist(), strlen) + lon[i]=stn.Location[0] + lat[i]=stn.Location[1] + area[i]=stn.Area + altitude[i]=stn.Altitude + # + # + # Original Data + # + original[np.isnan(original)]=FillValue + hydro=out.createVariable('hydrographs', 'f', ('time','stations',), fill_value=FillValue) + hydro.units="m^3/s" + hydro.long_name="Original Hydrographs" + hydro.coordinates="time stations" + hydro[:,:]=original[:,:] + # + # Hydrographs from other sources + # + if self.maxalt > 0 : + altindex=out.createVariable('alternative_index', 'i', ('maxalt','stations',)) + altindex.long_name="Index of alternative hydrographs in althydro field" + altindex[:]=altloc + althydro=out.createVariable('alt_hydrograph', 'f', ('time','z',), fill_value=FillValue) + althydro.units="m^3/s" + althydro.long_name="Hydrographs from other sources used to build the merged product" + althydro.coordinates="time stations" + alth[np.isnan(alth)]=FillValue + althydro[:,:] = alth[:,:] + # + # Flags + # + if np.count_nonzero(np.isnan(flags)) < flags.size : + flag=out.createVariable('flags', 'f', ('time','stations',), fill_value=FillValue) + flag.units="-" + flag.long_name="GRDC Flag : Percentage of valid values used for calculation from daily data" + flag.coding1="80 < flag <= 100 : merge = corr (Average of daily values used)" + flag.coding1="0 < flag <= 80 : merge = hydro (Monthly values used)" + flag.coordinates="time stations" + flags[np.isnan(flags)]=FillValue + flag[:,:]=flags[:,:] + # + # Merged Data + # + if np.count_nonzero(np.isnan(merged2d)) < merged2d.size : + merge=out.createVariable('mergedhydro', 'f', ('time','stations',), fill_value=FillValue) + merge.units="m^3/s" + merge.long_name="Hydrographs - Best estimate" + merge.coordinates="time stations" + merged2d[np.isnan(merged2d)]=FillValue + merge[:,:]=merged2d[:,:] + # + out.close() + # + return +# +# +# + def fetchnetcdf(self, filename) : + ncin=Dataset(filename, 'r') + # + self.nbstations = ncin.variables["number"].shape[0] + self.nbmonth = ncin.variables["time"].shape[0] + tmpmerge = [] + for (a,v) in ncin.__dict__.items(): + if a == "history" : + self.hist = v + if a == "Source" : + self.source = v + if a.find("MergeSource") == 0 : + tmpmerge.append(v) + if len(tmpmerge) > 0 : + self.mergersource = tmpmerge + # + for i in range(self.nbstations) : + self.stations.append(ST.Station("netcdf", ncin, index=i)) + # + ncin.close() + # + return +# +# Functions to get all stations which are close to each other +# Assume we have Location for all stations (else they make little sense !). +# +def closeto(A, B, distance=5000.) : + AclosetoB = [] + BclosetoA = [] + d=[] + for Ai,Astn in enumerate(A.stations) : + d=np.array([CC.distance_haversine(Astn.Location, Bstn.Location) for Bstn in B.stations]) + if np.count_nonzero(np.array(d) < distance) > 0 : + AclosetoB.append(Ai) + BclosetoA.append(list(np.where(np.array(d) < distance)[0])) + return AclosetoB, BclosetoA +# +# Identify pairs in A and B which could be the same station. +# The choice tells which source is to be used for the metadata. +# +def PairAtoB (A, B, AclosetoB, BclosetoA, mindist, maxerr, interactive=False) : + pairs = [] + choice = [] + for i,Ai in enumerate(AclosetoB) : + Allscore = [] + Alldist = [] + Alluperr = [] + for Bi in BclosetoA[i] : + # + score=[] + # + dist = np.abs(CC.distance_haversine(A.stations[Ai].Location, B.stations[Bi].Location)) + if dist > 0 : + score.append(min(mindist/dist,1.0)) + else : + score.append(1.0) + # + if A.stations[Ai].Area > ST.defint and B.stations[Bi].Area > ST.defint : + uperr = np.abs((A.stations[Ai].Area-B.stations[Bi].Area)/A.stations[Ai].Area*100.) + if uperr > 0 : + score.append(min(maxerr/uperr,1.0)) + else : + score.append(1.0) + else : + uperr = np.nan + # Simplify river names. river, rio in other languages need to be added. + Ariver = A.stations[Ai].River.tolist().lower().replace("river","").replace("rio","") + Briver = B.stations[Bi].River.tolist().lower().replace("river","").replace("rio","") + # + score.append(CC.scorestring(A.stations[Ai].Name.tolist(), B.stations[Bi].Name.tolist())) + score.append(CC.scorestring(Ariver, Briver)) + # + if np.mean(np.array(score)) >= 0.9 : + print("Merge +++> ", A.stations[Ai].Name, "---", B.stations[Bi].Name, " Distance [m] ", dist, \ + "UpStr. area [%] ", uperr, " Mean score : ",np.mean(np.array(score))) + pairs.append([Ai,Bi]) + choice.append("A") + else : + Allscore.append(score) + Alldist.append(dist) + Alluperr.append(uperr) + # + # If interactive the user can choose between the options. + # + if len(Allscore) == len(BclosetoA[i]) and interactive : + print("========================================================================") + print("= Find a matching B =") + print("========================================================================") + print("Stations A : ", A.stations[Ai].Name, " on ", A.stations[Ai].River) + print("Stations A : Location = ", A.stations[Ai].Location) + print("Stations A : Upstream area [km^2] = ", A.stations[Ai].Area) + print("==> Options are ! ") + opt="" + for j,Bi in enumerate(BclosetoA[i]) : + print("Stations B"+str(j)+" : Distance to A [m] ", Alldist[j], "UpStr. area [%] ", Alluperr[j], \ + " Mean score : ",np.mean(np.array(Allscore[j]))) + print("Stations B"+str(j)+" : ", B.stations[Bi].Name, " on ", B.stations[Bi].River) + print("Stations B"+str(j)+" : Location = ", B.stations[Bi].Location) + print("Stations B"+str(j)+" : Upstream area [km^2] = ", B.stations[Bi].Area) + print(" ") + opt = opt+" B"+str(j) + print("Select best matching station in the B") + Bopt = input(opt+" or No (nothing) : ") + if len(Bopt) > 0 and Bopt[0].capitalize() == "B" : + try : + intsel = int(Bopt[-1]) + except : + intsel = -1 + # + if intsel < len(BclosetoA[i]) : + Bi = BclosetoA[i][intsel] + meta = input("Which metadata should be kept [A]/B : ") + if meta.capitalize() == "A" : + print("==> "+Bopt+" and Metadata of A are chosen") + pairs.append([Ai,Bi]) + choice.append("A") + elif meta.capitalize() == "B" : + print("==> "+Bopt+" and Metadata of B are chosen") + pairs.append([Ai,Bi]) + choice.append("B") + else : + print("==> "+Bopt+" and default metadata (A) chosen") + pairs.append([Ai,Bi]) + choice.append("A") + else : + print("Station does not exist. Choice droped !") + else : + print("Stations do not match") + + return pairs, choice +# +# Concatenating all non paired station in two lists +# +def concatenate(A, B, pairs, Sources="") : + C=AllStations(Source=Sources) + # + for Ai,Astn in enumerate(A.stations) : + if not any(np.array(pairs)[:,0] == Ai) : + C.add(Astn) + # + for Bi,Bstn in enumerate(B.stations) : + if not any(np.array(pairs)[:,1] == Bi) : + C.add(Bstn) + # + return C +# +# Merge the data of the paired stations +# +def mergedata(A, B, C, pairs, choice, verbose=False) : + # + for i,X in enumerate(pairs) : + Ai,Bi = X + if choice[i] == "A" : + NewStn = A.stations[Ai] + AltTime = B.stations[Bi].Time + if isinstance(B.stations[Bi].Original,np.ndarray) : + AltOrig = [B.stations[Bi].Original] + else : + AltOrig = B.stations[Bi].Original + AltSource = B.stations[Bi].mergersource + AltMerged = B.stations[Bi].Merged + else : + NewStn = B.stations[Bi] + AltTime = A.stations[Ai].Time + if isinstance(A.stations[Ai].Original,np.ndarray) : + AltOrig = [A.stations[Ai].Original] + else : + AltOrig = A.stations[Ai].Original + AltSource = A.stations[Ai].mergersource + AltMerged = A.stations[Ai].Merged + # + for ia,alt in enumerate(AltSource[:]) : + NewStn.mergersource.append(alt) + if isinstance(NewStn.Original,np.ndarray) : + NewStn.Original = [NewStn.Original, CC.tonewtime(AltTime, AltOrig[ia], NewStn.Time)] + else : + NewStn.Original.append(CC.tonewtime(AltTime, AltOrig[ia], NewStn.Time)) + # + if verbose : + print("For station ",A.stations[Ai].Name," the following sources exist :", \ + NewStn.mergersource, " Number of Originals :", len(NewStn.Original)) + # + # Generate a new merged product. + # + NewMerged = CC.mergedata(NewStn.Original, NewStn.Merged, verbose) + NewStn.Merged = NewMerged + C.add(NewStn) + # + return C +# +# Check Station number and generate new ones if needed. +# +def stnnbchecker(C) : + stnnb=np.array([C.stations[i].Number for i in range(C.nbstations)], int) + reg=np.array([C.stations[i].WMOregion for i in range(C.nbstations)], int) + subreg=np.array([C.stations[i].WMOsubregion for i in range(C.nbstations)], int) + u, c = np.unique(stnnb[stnnb > 0], return_counts=True) + # + # Solve duplicate first + # + while np.max(c) > 1 : + dup = u[np.argmax(c)] + duploc = np.argmin(np.abs(stnnb-dup)) + dreg = int(stnnb[duploc]/100000) + dsubreg = int((stnnb[duploc]-dreg*100000)/1000) + regstnnb = stnnb[(stnnb >= dup) & (stnnb < dreg*100000+(dsubreg+1)*1000)] + if regstnnb.shape[0] < 1 : + t=dreg*100000+(subreg[i]+1)*1000-1 + else : + t= np.max(regstnnb)+1 + if not any(stnnb == t) : + stnnb[duploc] = t + C.stations[duploc].Number = t + print("stnnbchecker : replacing ", u[np.argmax(c)], " by ", t) + else : + print("stnnbchecker 2 : Generated a number which already exists !") + sys.exit() + u, c = np.unique(stnnb[stnnb > 0], return_counts=True) + # + # Generate new numbers if needed + # + if np.count_nonzero(stnnb < 0) > 999 : + subreg[:] = subreg[:]-1 + mask = np.where(stnnb < 0) + for i in np.nditer(mask) : + regstnnb=stnnb[(stnnb >= reg[i]*100000+subreg[i]*1000) & (stnnb < (reg[i]+1)*100000)] + if regstnnb.shape[0] < 1 : + t = reg[i]*100000+subreg[i]*1000 + else : + t = np.max(regstnnb)+1 + if not any(stnnb == t) : + stnnb[i] = t + C.stations[i].Number = stnnb[i] + C.stations[i].WMOsubregion = subreg[i] + else: + print("stnnbchecker 2 : Generated a number which already exists !") + sys.exit() + # + return C diff --git a/Stations/ConversionCalculations.py b/Stations/ConversionCalculations.py new file mode 100644 index 0000000000000000000000000000000000000000..cc721d012ca16e0d8a047874e0a9aa67c6e7b78c --- /dev/null +++ b/Stations/ConversionCalculations.py @@ -0,0 +1,162 @@ +import numpy as np +import datetime +import math +import sys +# +from difflib import SequenceMatcher +# +# For Json format get dates +# +def key2date(obj, key) : + dates = [] + for dd in obj[key] : + dates.append(datetime.datetime(int(dd[0:4]), int(dd[4:6]), int(dd[6:8]))) + return dates +# +# For Json format get data +# +def key2data(obj, key) : + nval=len(obj[key].keys()) + data=np.zeros((nval)) + for i,dd in enumerate(obj[key]) : + data[i] = obj[key][dd] + return data +# +# Compute montly means when original data is daily +# +def monthlymean(d,x) : + inyrs = np.array([di.year for di in d]) + inym = np.array([(di.year*100)+di.month for di in d]) + # Carefull data can have gaps, We recreate a list of years + nbinyrs = inyrs[-1]-inyrs[0]+1 + nd = np.arange(nbinyrs)+inyrs[0] + # New data + od = [] + ox = [] + # + for iy in range(nbinyrs) : + for im in range(12) : + ind = (inym==nd[iy]*100+(im+1)) + mday = datetime.datetime(nd[iy], im+1, 15) + mlen = (mday.replace(month = mday.month % 12 +1, day = 1)-datetime.timedelta(days=1)).day + od.append(mday) + if len(x[ind]) < mlen : + # See if we can fix that + ox.append(np.nan) + else : + ox.append(np.mean(x[ind])) + # + return od, np.array(ox) +# +# get time axis from a NetCDF filex +# +def gettimeaxis(nf): + dims=nf.dimensions.keys() + for d in dims: + if ( d.find('time') >= 0 ): + tname=d + t=np.copy(nf.variables[tname][:]) + tunits=getattr(nf.variables[tname],"units") + if ( tunits.find("seconds") >= 0): + epoch=datetime.datetime.strptime(tunits[tunits.find("since ")+6:], "%Y-%m-%d %H:%M:%S") + date_list = [epoch+datetime.timedelta(seconds=x.astype(np.float64)) for x in t] + elif( tunits.find("days") >= 0): + epoch=datetime.datetime.strptime(tunits[tunits.find("since ")+6:], "%Y-%m-%d %H:%M:%S") + date_list = [epoch+datetime.timedelta(days=x.astype(np.float64)) for x in t] + elif( tunits.find("month") >= 0): + epoch=datetime.datetime.strptime(tunits[tunits.find("since ")+6:], "%Y-%m-%d %H:%M:%S") + date_list=[] + for x in t : + m=((epoch.month+(x-1))%12)+1 + y=epoch.year+int((x-m)/12)+1 + date_list.append(datetime.date(int(y),int(m),epoch.day)) + else : + print("The time axis is not in seconds and I cannot yet deal with it") + sys.exit() + return date_list +# +# +# +def distance_haversine(A, B, radius=6371000.): + """ note that the default distance is in meters """ + dLat = math.radians(B[1]-A[1]) + dLon = math.radians(B[0]-A[0]) + lat1 = math.radians(A[1]) + lat2 = math.radians(B[1]) + a = math.sin(dLat/2) * math.sin(dLat/2) + math.sin(dLon/2) * math.sin(dLon/2) * math.cos(lat1) * math.cos(lat2) + c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) + return c * radius +# +# Function to score strings +# +def scorestring(stra, strb) : + # One string is in another + match = SequenceMatcher(None,stra,strb).find_longest_match(0, len(stra), 0, len(strb)) + # + if match.size == min(len(stra), len(strb)) : + # Sub-string case + score = 1.0 + else : + # Fuzzy check + score = SequenceMatcher(None,stra,strb).ratio() + # + return score +# +# Function to put data onto a new time axis +# +def tonewtime(t,x,newt) : + newx = np.zeros((len(newt))) + newx[:] = np.nan + intersect=np.intersect1d(t,newt) + if len(intersect) == 0 : + print("Input time goes from ", t[0]," to ", t[-1]) + print("Target time axis goes from ",newt[0], " to ", newt[-1]) + print("Intersection is empty !") + sys.exit() + newx[newt.index(intersect[0]):newt.index(intersect[-1])]=x[t.index(intersect[0]):t.index(intersect[-1])] + return newx +# +# Merge a number of data set which are already on the same time axis +# +def mergedata(d, Merged, verbose=False) : + # Options + # Merger quality criteria + q = 0.1 + # Method to fill gaps : First or Mean + opt = "Mean" + # + da=np.reshape(np.append(Merged,np.asarray(d)),(-1,Merged.shape[0])) + nbdata,tlen=da.shape + nbval=np.zeros((nbdata)) + overlapmean=np.zeros((nbdata)) + mask=~np.isnan(np.sum(da,axis=0)) + grandmean=np.nanmean(da) + # + for i in range(nbdata) : + nbval[i] = np.count_nonzero(~np.isnan(da[i,:])) + overlapmean[i] = np.mean(da[i,mask]) + if verbose : + print(" mergedata : Mean of obs. = ", np.nanmean(da[i,:]), " Nb obs = ", nbval[i], \ + " Mean of overlap period = ", np.mean(da[i,mask])) + # + # Default us the Merged product from the selected station + # + c=np.copy(da[0,:]) + # + # If the various observations are close to each other we have two options to fill the gaps : + # Take the first product which is available, or use the main of all products + # + if np.std(overlapmean)/np.mean(overlapmean) < q : + if opt.find("Mean") != -1 : + # A mask without the Merged data + mask=~np.isnan(np.sum(da[1:,:],axis=0)) + m = np.zeros((tlen)) + m[:] = np.nan + m[mask] = np.nanmean(da[:,mask], axis=0) + c[np.isnan(c)]=m[np.isnan(c)] + # Fill with the other data when only option is available + for i in range(nbdata) : + c[np.isnan(c)]=da[i,np.isnan(c)] + # + return c + diff --git a/Stations/ConvertGRDC.py b/Stations/ConvertGRDC.py new file mode 100644 index 0000000000000000000000000000000000000000..de8de41005b4bac7ad5b5699a743689b7b28d422 --- /dev/null +++ b/Stations/ConvertGRDC.py @@ -0,0 +1,42 @@ +import sys +import glob +import numpy as np +import datetime +# +import Station as ST +import AllStations as AS +# +GRDCPath="/home/polcher/WORK/Python/GRDC/Files" +GRDCfile="GRDC_Monthly_Jan20.nc" +# +GRDC = AS.AllStations(Source=GRDCPath) +# +for f in glob.glob(GRDCPath+'/*.txt') : + GRDC.add(ST.Station("GRDCascii",f)) +# +# +# Compute the over all time axis. +# +FirstDate=GRDC.FirstDate() +# +nbmonth = int((GRDC.LastDate()-GRDC.FirstDate()).days/365)*12+12 +nbstations = GRDC.nbstations +print("Number of stations :", nbstations) +print("Data covers the period :", GRDC.FirstDate(), " to ", GRDC.LastDate(), ' or ', nbmonth, ' month.') +# +# Create a global calendar in which all observations fit +# +AllDates=[] +y=GRDC.FirstDate().year +m=1 +d=15 +for i in range(nbmonth): + dm=i%12 + dy=int((i-dm)/12) + AllDates.append(datetime.datetime(y+dy, m+dm, d)) +print("New time axis goes from ", AllDates[0]," to ", AllDates[-1]) +# +# Dump all stations into a netCDF file +# +hist="GRDC Monthly River Discharge Received in January 2020" +GRDC.dumpnetcdf(AllDates, GRDCfile, hist) diff --git a/Stations/ConvertSpainData.py b/Stations/ConvertSpainData.py new file mode 100644 index 0000000000000000000000000000000000000000..a6775cf380a22043a5414d9f09818748357ad440 --- /dev/null +++ b/Stations/ConvertSpainData.py @@ -0,0 +1,44 @@ +import sys +import glob +import numpy as np +import datetime +# +import Station as ST +import AllStations as AS +# +SpainPath="/home/polcher/WORK/Python/GRDC/data-all-1900-2014.json" +Spainfile="Spain_Monthly_May20.nc" +# +# +# +Spain = AS.AllStations(Source=SpainPath) +# +for f in glob.glob(SpainPath+'/*.json') : + Spain.add(ST.Station("SpainJson", f, regioncode=62, countrycode="ES")) +# +# Compute the over all time axis. +# +FirstDate=Spain.FirstDate() +# +nbmonth = int((Spain.LastDate()-Spain.FirstDate()).days/365)*12+12 +nbstations = Spain.nbstations +print("Number of stations :", nbstations) +print("Data covers the period :", Spain.FirstDate(), " to ", Spain.LastDate(), ' or ', nbmonth, ' month.') +# +# +# Create a global calendar in which all observations fit +# +AllDates=[] +y=Spain.FirstDate().year +m=1 +d=15 +for i in range(nbmonth): + dm=i%12 + dy=int((i-dm)/12) + AllDates.append(datetime.datetime(y+dy, m+dm, d)) +print("New time axis goes from ", AllDates[0]," to ", AllDates[-1]) +# +# Dump all stations into a netCDF file +# +hist="Spanish data obtained from Observatorio del Ebro in May 2020. Downloaded in 2014 by Pere Quintana" +Spain.dumpnetcdf(AllDates, Spainfile, hist) diff --git a/Stations/MergeStations.py b/Stations/MergeStations.py new file mode 100644 index 0000000000000000000000000000000000000000..a3fdffb9e2d1223fdc1d25ae25557d594246cccb --- /dev/null +++ b/Stations/MergeStations.py @@ -0,0 +1,60 @@ +# +import datetime +# +import Station as ST +import AllStations as AS +# +import configparser +config=configparser.ConfigParser() +config.read("MergeStations.def") +# +# +# +FileA=config.get("OverAll","FileA") +FileB=config.get("OverAll","FileB") +# +OutFile=config.get("OverAll","OutFile") +# +MinDistance=config.getfloat("OverAll","MinDistance",fallback=1000.) +UpAreaError=config.getfloat("OverAll","UpAreaError",fallback=10.) +interactive=config.getboolean("OverAll","Interactive",fallback=False) +# +A=AS.AllStations() +A.fetchnetcdf(FileA) +B=AS.AllStations() +B.fetchnetcdf(FileB) +# +AclosetoB, BclosetoA = AS.closeto(A, B, distance=5*MinDistance) +# +# Try to pair stations which are close to each other. The various metadate are compared. +# +pairs, metachoice = AS.PairAtoB(A, B, AclosetoB, BclosetoA, MinDistance, UpAreaError, interactive=interactive) +# +# Concatenate all stations which are not found to pair. +# +C=AS.concatenate(A, B, pairs, Sources=FileA+"+"+FileB) +# +# Merge the data if the station which where paired +# +C=AS.mergedata(A, B, C, pairs, metachoice, verbose=True) +# +# Check the station numbers and complete if needed +# +C=AS.stnnbchecker(C) +# +# Create a global calendar in which all observations fit +# +print("Number of stations in A = ", A.nbstations," in B = ", B.nbstations, " Merged set has : ", C.nbstations) +print("Time range of input : A=", A.FirstDate(), "-", A.LastDate()," B=", B.FirstDate(), "-", B.LastDate()) +nbmonth = int((C.LastDate()-C.FirstDate()).days/365)*12+12 +print("Data coverd by merged dataset :", C.FirstDate(), " to ", C.LastDate(), ' or ', nbmonth, ' month.') +AllDates=[] +y=C.FirstDate().year +m=1 +d=15 +for i in range(nbmonth): + dm=i%12 + dy=int((i-dm)/12)+1 + AllDates.append(datetime.datetime(y+dy, m+dm, d)) +# +C.dumpnetcdf(AllDates, OutFile, "Merge of "+FileA+" and "+FileB) diff --git a/Stations/Station.py b/Stations/Station.py new file mode 100644 index 0000000000000000000000000000000000000000..4bce63e4639f33228cd0ebe4067bc0a87e46bddc --- /dev/null +++ b/Stations/Station.py @@ -0,0 +1,320 @@ +# +# Class to store individual stations with all the metadata. +# The observations are put on a continuous time axis from the +# date of the first observation to the last. Missing observations +# are noted as np.nan. +# +# +import sys +import numpy as np +import datetime +import re +from netCDF4 import stringtochar, chartostring +# +import ConversionCalculations as CC +# +defint = -999 +defstr = "XXX" +# +class Station : + def __init__(self, ftype, filename, index=0, regioncode=-999, countrycode="XX") : + # + # Set default values for the station + # + self.sourceproj = None + self.lonlatproj = None + # + # GRDC station number + self.Number = defint + self.WMOregion = defint + self.WMOsubregion = defint + self.NextStation = defint + self.River = defstr + self.Name = defstr + self.FileDate = defstr + self.LastUpdate = defstr + self.Country = defstr + self.Area = defint + self.Altitude = defint + self.Length = defint + self.Location = None + self.Time = None + self.Original = None + self.mergersource = None + self.Merged = None + self.Flag = None + # + # + # + if ftype.find("GRDCascii") >= 0 : + self.GRDCasciiread(filename) + elif ftype.find("SpainJson") >= 0 : + self.Spainjsonread(filename, countrycode, regioncode) + elif ftype.find("netcdf") >= 0 : + self.getfromnetcdf(filename, index) + else: + print("Not foreseen file format :", ftype) + sys.exit() + # + # + # + def GRDCasciiread(self, filename) : + # + f = open(filename, 'rt', encoding='iso8859_9') + # + skip=0 + nbdata=-1 + for line in f: + # + # Find position of #If "#" is in first position print line + # + if ( line.find("#") == 0 ) : + if ( line.find("GRDC-No.:") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:] + self.Number=int(tmpread) + self.WMOregion = (self.Number/100000) + self.WMOsubregion = int((self.Number-self.WMOregion*100000)/1000) + elif ( line.find("Next downstream station:") > 0 ) : + start=line.find("Next downstream station:")+len("Next downstream station:")+1 + tmpread=line[start:] + if len(tmpread.strip()) <= 1 or tmpread.find('-') >= 0 : + self.NextStation = -999 + else : + self.NextStation=int(tmpread) + elif (line.find("River:") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:] + self.River=tmpread.strip().title() + elif (line.find("file generation") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:] + self.FileDate=tmpread.strip() + elif (line.find("Last update") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:] + self.LastUpdate=tmpread.strip() + elif (line.find("Station:") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:].lower() + self.Name=tmpread.strip().title() + elif (line.find("Country:") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:] + self.Country=tmpread.strip() + elif (line.find("Latitude (") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:] + lat=float(tmpread) + elif (line.find("Longitude (") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:] + lon=float(tmpread) + elif (line.find("Catchment area (") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:] + self.Area=float(tmpread) + elif (line.find("Altitude (m ASL)") > 0 ) : + start=line.find(":")+1 + tmpread=line[start:] + self.Altitude=float(tmpread) + else : + # Skip line ! + skip=skip+1 + else : + if ( skip > 0 and nbdata < 0) : + if (line.find("YYYY-MM-DD;hh:mm; Original; Calculated; Flag") != 0 ) : + print("Unknown format, cannot read !") + print(line) + sys.exit() + else : + nbdata=0 + date=[] + orig=[] + calc=[] + flag=[] + else : + nbdata=nbdata+1 + pos=[0] + for m in re.finditer(';', line): + pos.append(m.start()) + if ( len(pos) == 5 ) : + day=line[pos[0]:pos[1]] + time=line[pos[1]+1:pos[2]] + original=float(line[pos[2]+1:pos[3]]) + calculated=float(line[pos[3]+1:pos[4]]) + dataflag=int(line[pos[4]+1:-1]) + else : + print("We have not found the needed 4 ';'", nbdata) + print(pos) + print(line) + sys.exit() + y=int(day[0:4]) + m=int(day[5:7]) + d=15 + date.append(datetime.datetime(y, m, d)) + orig.append(original) + calc.append(calculated) + flag.append(dataflag) + # + # Closing IF on comments + # End of loop through lines + f.close + # + # Loop through all values to build a merged product between the + # the original month values and the montly values calculated by GRDC + # based on daily observations + # + orig=np.asarray(orig, float) + calc=np.asarray(calc, float) + merged=np.empty((nbdata), float) + for i in range(nbdata): + if ( flag[i] > 80 ) and ( flag[i] <= 100 ) : + merged[i] = calc[i] + elif (flag[i] <= 80) and (flag[i] >= 0): + merged[i] = orig[i] + else : + print("In building the merged product a flag was not recognized") + print("Working on ", filename) + print("Flag = ", flag[i]) + print("orig[i] = ", orig[i], "calc[i] = ", calc[i]) + sys.exit() + # + # Clean-up ! + # + self.Length=nbdata + self.Location=[lon,lat] + self.Time=date + # + orig[orig <= -999] = np.nan + self.Original=[np.asarray(orig)] + self.mergersource = ["Original monthly Hydrographs"] + # + calc[calc <= -999] = np.nan + if np.count_nonzero(~np.isnan(calc)) > 0 : + self.mergersource.append("Computed at GRDC from daily values") + self.Original.append(calc) + + self.Merged=merged + self.Merged[self.Merged <= -999]=np.nan + self.Flag=np.asarray(flag, float) + # + return + # + # + # + def Spainjsonread(self, filename, countrycode, regioncode): + # + import json + from pyproj import Proj, transform + # + if self.sourceproj is None : + self.sourceproj = Proj(init="EPSG:25830") + if self.lonlatproj is None : + self.lonlatproj = Proj(init = 'epsg:4326') + # + f = open(filename, 'r') + data = f.read() + fobj = json.loads(data) + # + if "runoff" in fobj : + if len(fobj["runoff"].keys()) > 0 : + # Get coordinates + x1,y1=float(fobj["metadata"][" UTM X H30 ETRS89"])*1000,float(fobj["metadata"][" UTM Y H30 ETRS89"])*1000 + lon,lat = transform(self.sourceproj,self.lonlatproj,x1,y1) + # + if " RIO" in fobj["metadata"] : + self.River=fobj["metadata"][" RIO"].title().strip() + else : + self.River=fobj["metadata"][" Confed. Hidrografica"].title() + self.Name=fobj["metadata"][" Municipio"].title().strip() + self.Country=countrycode + self.WMOregion=regioncode + self.WMOsubregion=99 + if " Superficie aguas arriba (km2)" in fobj["metadata"] : + if len(fobj["metadata"][" Superficie aguas arriba (km2)"]) > 1 : + ## Why this 1000 ? Units and numbers dont match ! + self.Area=float(fobj["metadata"][" Superficie aguas arriba (km2)"])*1000.0 + if " Altitud (m)" in fobj["metadata"] : + if len(fobj["metadata"][" Altitud (m)"]) > 1 : + self.Altitude=float(fobj["metadata"][" Altitud (m)"]) + self.Location=[lon,lat] + # + # Get data and build monthly averages + # + dates = CC.key2date(fobj, "runoff") + runoff = CC.key2data(fobj, "runoff") + month, monthlyrunoff = CC.monthlymean(dates, runoff) + # + self.Length=len(month) + self.Time=month + self.Original=monthlyrunoff + self.Merged=monthlyrunoff + # + # + # + # + else : + print("Station in "+filename+" has zero runoff values") + # + elif "volume" in fobj : + print("Dam named "+fobj["metadata"]["Embalse"]+" at "+fobj["metadata"][" Municipio"]) + else : + print("Unrecognized file ",filename) + print("Unrecognized file : Available keys : ", fobj.keys()) + # + f.close() + # + return + + def getfromnetcdf(self, nc, i): + # + self.Number = nc.variables["number"][i] + self.WMOregion = nc.variables["WMOreg"][i] + self.WMOsubregion = nc.variables["WMOsubreg"][i] + self.NextStation = nc.variables["next"][i] + self.River = chartostring(nc.variables["river"][i,:]) + self.Name = chartostring(nc.variables["name"][i,:]) + self.FileDate = chartostring(nc.variables["FileDate"][i,:]) + self.LastUpdate = chartostring(nc.variables["LastUpdate"][i,:]) + self.Country = chartostring(nc.variables["country"][i,:]) + self.Area = nc.variables["area"][i] + self.Altitude = nc.variables["altitude"][i] + self.Location = np.append(np.copy(nc.variables["lon"][i]),np.copy(nc.variables["lat"][i])) + # Convert time to + self.Time = CC.gettimeaxis(nc) + # + FillOrig = nc.variables["hydrographs"]._FillValue + if "alternative_index" in nc.variables.keys() : + # Get names of alternatives + nbalt,nbstations = nc.variables["alternative_index"].shape + tmpmerge = [defstr]*(nbalt+1) + for (a,v) in nc.__dict__.items(): + if a.find("MergeSource") == 0 : + ind = int(a[-1])-1 + tmpmerge[ind]=v + # Get indices + self.mergersource = [tmpmerge[0]] + self.Original = [nc.variables["hydrographs"][:,i]] + for ia in range(nbalt) : + if nc.variables["alternative_index"][ia,i] >= 0 : + self.mergersource.append(tmpmerge[ia+1]) + self.Original.append(nc.variables["alt_hydrograph"][:,nc.variables["alternative_index"][ia,i]]) + else : + self.mergersource = [nc.history] + self.Original = [nc.variables["hydrographs"][:,i]] + # + # + for d in self.Original : + d[d >= FillOrig] = np.nan + # + if "mergedhydro" in nc.variables.keys() : + self.Merged = nc.variables["mergedhydro"][:,i] + FillMerge = nc.variables["mergedhydro"]._FillValue + self.Merged[self.Merged >= FillMerge] = np.nan + if "flags" in nc.variables.keys() : + self.Flag = nc.variables["flags"][:,i] + # + self.Length=len(self.Time) + # + return diff --git a/Stations/Station_Metadata.nc b/Stations/Station_Metadata.nc new file mode 100644 index 0000000000000000000000000000000000000000..e1089a6348ae70d57d8f9e97bd2f8387b5b4af21 Binary files /dev/null and b/Stations/Station_Metadata.nc differ