The goal of Weatherfunctions is to support download of daily weather data from the German weather data service (DWD), to generate interpolations of weather data and to calculate some derived variables like potential evapotranspiration.
Installation
You can install the development version of Weatherfunctions from GitHub with:
# install.packages("pak")
pak::pak("AgronomyKiel/Weatherfunctions")Example
This is a basic example which shows you how to download a local copy of the historical and recent daily weather data from DWD and to interpolate them for a certain location defined by latitude, longitude and height above sea level.
rm(list = ls())
library(Weatherfunctions)
library(here)
library(fst)
library(dplyr)
library(tidyr)
library(sf)
# download meta data for "synoptic" weather stations from DWD
DWD_content <- getDWDStationList(historical = DWD_ftp_historical, recent = DWD_ftp_recent)
# download meta data for additional stations with precipitation data from DWD
DWDRain_content <- getDWDRainStationList(recent = DWDRain_ftp_recent, historical = DWDRain_ftp_historical)
# the package defines a standard directory for storing user specific data permanently
outputDir <- Local_R_DWD
# save the meta data to local files
if (!dir.exists(outputDir)) {dir.create(outputDir, recursive = T)}
fn <- paste0(outputDir, fn_DWD_content)
save(DWD_content, file = fn)
fn <- paste0(outputDir, fn_DWDRain_content)
save(DWDRain_content, file = fn)
# The package supports two ways of saving locally the daily DWD weather data either as zip files or as fst files
# In general the DWD data are available as historic data (older than approx. 1 year) and as recent data (up to approx. 1 year), the latter are updated daily
# It is therefore convenient to save the historic data as zip and/or fst files locally
# This consumes some disk space and takes time for the first download, but it is much faster to access the data later on
# download the historical data to local zip files
# only download the historical data for the first time or once per year
#if (!dir.exists(LocalCopy_DWD_ftp_historical)) {dir.create(LocalCopy_DWD_ftp_historical, recursive = T)}
#Copy_DWD_ZipFiles(DWD_ftp_historical, LocalCopy_DWD_ftp_historical)
# download the recent data to local zip files is an option, not a must
#if (!dir.exists(LocalCopy_DWD_ftp_recent)) {dir.create(LocalCopy_DWD_ftp_recent, recursive = T)}
#Copy_DWD_ZipFiles(DWD_ftp_recent, LocalCopy_DWD_ftp_recent)
# update the data to local fst file from the local zip files (isloadnew = F) or directly from the DWD ftp server (isloadnew = T)
# weather data interpolation uses the fst format
#UpdateDWDData_to_fst(dataperiod = "historical", startdate = "1990-01-01", isloadnew = T)
# if the data have been stored locally, they can be read from the local fst files
fn_histDWD_data <- paste0(fn_histDWD_data_str, as.character(1990),".fst")
fn <- here(Local_R_DWD, fn_histDWD_data)
weather_historic <- read.fst(path = fn)
# select the columns that are in the core, i.e. essential data set and rename them
weather_historic_core <- weather_historic[,c("Stations_id", "Date",longNames_DWD_core)]
rm(weather_historic)
ThisYear <- as.numeric(format(Sys.Date(), "%Y"))
# take the data frame with meta data for recent stations to a separate variable
RecentRainStationList <- DWDRain_content$recent$stationlist
# update the recent data to local fst file from the local zip files (isloadnew = F) or directly from the DWD ftp server (isloadnew = T)
# if you want to update the recent data, set UpdateRecentDWDdata <- T
UpdateRecentDWDdata <- F
if (UpdateRecentDWDdata) {
StartTime <- Sys.time()
UpdateDWDData_to_fst(dataperiod = "recent",
isloadnew = T,
DWD_content = DWD_content,
startdate = paste0(as.character(ThisYear-1),"-01-01"))
EndTime <- Sys.time()
print(paste("Time for UpdateDWDData_to_fst:", EndTime - StartTime))
}
# the name of the file is generated
fn_recentDWD_data <- paste0("recent_weather_dat_", as.character(1990),".fst")
weather_recent <- read.fst(path = here(Local_R_DWD, fn_recentDWD_data))
weather_recent <- weather_recent[,c("Stations_id","Date",longNames_DWD_core)]
# For estimation of weather data the coordinates and height asl of the location are needed
geoBreite <- 52.7306
geoLaenge <- 10.6298
Hoehe_m <- 85
# make location data frame
Location <- data.frame(Latitude=as.numeric(geoBreite), Longitude=as.numeric(geoLaenge))
# make an sf object from Location data frame
Location <- st_as_sf(Location, coords = c("Longitude", "Latitude")) %>%
st_set_crs(value = "+proj=longlat +datum=WGS84")
### interpolate the historic weather data for the location
# retrieve the recent station list from the DWD content
# we need a list with the available stations for the historic data
stationlist <- DWD_content$historical$stationlist
# make an sf object from stationlist data frame
stationlist <- st_as_sf(stationlist, coords = c("geoLaenge", "geoBreite")) %>%
st_set_crs(value = "+proj=longlat +datum=WGS84")
# add the distance to the location to the stationlist data frame
stationlist$Distance_m <- pmax(1,as.numeric(st_distance(stationlist, Location))) # minimum distance is 1 m because
stationlist$Distance_km <- as.numeric(format(stationlist$Distance_m /1000, digits = 3))
# remove the geometry column
stationlist$geometry <- NULL
stationlist <- as.data.frame(stationlist)
startdate <- "1990-01-01"
# The list of the additional stations with rain data is needed
RainStationList <- DWDRain_content$historical$stationlist
# select the 3 nearest stations with rain data, for historical data it is necessary to select more than one station
# because stations with rain data changed over time
RainStations_selected <- SelectStations (lat=geoBreite,
long=geoLaenge,
height_loc=Hoehe_m,
stationlist = RainStationList,
#DWDRain_content$recent$stationlist,
minstations=3,
max_stations = 3,
radius=80000,
startdate=startdate,
max.Height.Distance_m=200)
# get the historic rain data for the selected stations
df_Rain <- GetRainData_selection(RainStations_selected,
DWDRain_content,#$historical,
repository=DWDRain_ftp_historical,
startdate="1990-01-01")
# add the distance to the rain station to the data frame
df_Rain <- left_join(df_Rain, RainStations_selected[,c("Stations_id", "Distance_km")], by="Stations_id")
# interpolate the weather data for the location
IntPolDWDHistorical <- InterpolateFromDWD(df_DWD_core = weather_historic_core, stationlist = stationlist,
geoBreite = geoBreite, geoLaenge = geoLaenge,
max.Height.Distance_m=100, Hoehe_m = Hoehe_m, df_Rain = df_Rain,
startdate = startdate)
IntPolDWDdataHistorical <- IntPolDWDHistorical$DWDdata
# remove the data for the current and previous year as they are in the recent data
ThisYear <- as.numeric(format(Sys.Date(), "%Y"))
FirstDayOfYear <- as.Date(paste0(ThisYear-1,"-01-01"))
ExcelFirstDayofYear <- as.numeric(FirstDayOfYear - as.Date("1899-12-30"))
IntPolDWDdataHistorical <- IntPolDWDdataHistorical %>% filter(Time < ExcelFirstDayofYear)
# Add longitude and latitude to the data frame
IntPolDWDdataHistorical$Longitude <- geoLaenge
IntPolDWDdataHistorical$Latitude <- geoBreiteThe interpolation should deliver daily estimates of the weather data for the location:
summary(IntPolDWDdataHistorical)
#> Time TMPM Rain LF
#> Min. :32874 Min. :-18.300 Min. : 0.000 Min. : 33.70
#> 1st Qu.:35978 1st Qu.: 4.300 1st Qu.: 0.000 1st Qu.: 72.20
#> Median :39083 Median : 9.600 Median : 0.100 Median : 81.30
#> Mean :39083 Mean : 9.536 Mean : 1.912 Mean : 79.68
#> 3rd Qu.:42187 3rd Qu.: 15.200 3rd Qu.: 2.000 3rd Qu.: 88.80
#> Max. :45291 Max. : 28.900 Max. :56.200 Max. :100.00
#> VP Sat_def Rad_Int GlobRad
#> Min. : 1.20 Min. : 0.000 Min. : 10.2 Min. : 0.88
#> 1st Qu.: 6.90 1st Qu.: 1.000 1st Qu.: 37.0 1st Qu.: 3.20
#> Median : 9.50 Median : 2.100 Median :100.5 Median : 8.68
#> Mean :10.05 Mean : 3.028 Mean :119.5 Mean :10.33
#> 3rd Qu.:12.90 3rd Qu.: 4.200 3rd Qu.:189.9 3rd Qu.:16.41
#> Max. :23.30 Max. :25.100 Max. :347.0 Max. :29.98
#> Wind TMPMN TMPMX Longitude
#> Min. : 0.500 Min. :-22.40 Min. :-12.40 Min. :10.63
#> 1st Qu.: 2.000 1st Qu.: 0.70 1st Qu.: 7.30 1st Qu.:10.63
#> Median : 2.700 Median : 5.40 Median : 14.10 Median :10.63
#> Mean : 3.011 Mean : 5.21 Mean : 14.03 Mean :10.63
#> 3rd Qu.: 3.800 3rd Qu.: 10.10 3rd Qu.: 20.70 3rd Qu.:10.63
#> Max. :12.200 Max. : 21.00 Max. : 39.20 Max. :10.63
#> Latitude
#> Min. :52.73
#> 1st Qu.:52.73
#> Median :52.73
#> Mean :52.73
#> 3rd Qu.:52.73
#> Max. :52.73The average distance for the estimation of the different weather variables can be calculated:
usedstations <- IntPolDWDHistorical$df.wf
usedstations$Distance <- 1/usedstations$InvDist
Diststations <- usedstations %>% group_by(variable) %>% dplyr::summarise(Distance = mean(Distance, na.rm = T))The average distance for the estimation of the different weather variables is:
Diststations
#> # A tibble: 8 × 2
#> variable Distance
#> <fct> <dbl>
#> 1 LUFTTEMPERATUR 25.9
#> 2 REL_FEUCHTE 26.0
#> 3 WINDGESCHWINDIGKEIT 41.5
#> 4 LUFTTEMPERATUR_MAXIMUM 26.7
#> 5 LUFTTEMPERATUR_MINIMUM 26.7
#> 6 NIEDERSCHLAGSHOEHE 6.43
#> 7 SONNENSCHEINDAUER 35.1
#> 8 MHoeheWind 25.6Now the same procedure for the recent data:
# recent data #######
## retrieve the recent station list from the DWD content
DWDrecent <- DWD_content$recent
stationlist <- DWDrecent$stationlist
# make an sf object from stationlist data frame
stationlist <- st_as_sf(stationlist, coords = c("geoLaenge", "geoBreite")) %>%
st_set_crs(value = "+proj=longlat +datum=WGS84")
# add the distance to the location to the stationlist data frame
stationlist$Distance_m <- pmax(1,as.numeric(st_distance(stationlist, Location))) # minimum distance is 1 m because
stationlist$Distance_km <- format(stationlist$Distance_m /1000, digits = 3)
# remove the geometry column
stationlist$geometry <- NULL
stationlist <- as.data.frame(stationlist)
startdate <- paste0(as.character(ThisYear-1),"-01-01")
# select the neart stations with rain data
RainStation_selected <- SelectStations (lat=geoBreite,
long=geoLaenge,
height_loc=Hoehe_m,
stationlist = RecentRainStationList,
#DWDRain_content$recent$stationlist,
minstations=3,
max_stations = 3,
radius=80000,
startdate=startdate,
max.Height.Distance_m=200)
# get the rain data for the selected stations
df_Rain <- GetRainData_selection(RainStations_selected,
DWDRain_content,
repository=DWDRain_ftp_recent,
startdate=startdate)
# add the distance to the rain station to the data frame
df_Rain <- left_join(df_Rain, RainStations_selected[,c("Stations_id", "Distance_km")], by="Stations_id")
# interpolate the recent weather data for the location
IntpolDWDrecent <- InterpolateFromDWD(df_DWD_core = weather_recent, stationlist = stationlist, geoBreite = geoBreite, geoLaenge = geoLaenge,
max.Height.Distance_m=100, Hoehe_m = Hoehe_m, df_Rain = df_Rain,
startdate = startdate)
IntPolDWDdatarecent <- IntpolDWDrecent$DWDdata
IntPolDWDdatarecent$Longitude <- geoLaenge
IntPolDWDdatarecent$Latitude <- geoBreiteHere the summary of the result of the interpolation procedure for the recent data:
summary(IntPolDWDdatarecent)
#> Time TMPM Rain LF
#> Min. :45292 Min. :-7.100 Min. : 0.0 Min. :41.10
#> 1st Qu.:45404 1st Qu.: 4.200 1st Qu.: 0.0 1st Qu.:73.47
#> Median :45517 Median : 8.900 Median : 0.0 Median :81.55
#> Mean :45517 Mean : 9.526 Mean : 2.2 Mean :80.01
#> 3rd Qu.:45629 3rd Qu.:15.500 3rd Qu.: 2.4 3rd Qu.:88.22
#> Max. :45741 Max. :24.500 Max. :28.0 Max. :99.00
#> NA's :2 NA's :2
#> VP Sat_def Rad_Int GlobRad
#> Min. : 2.80 Min. : 0.100 Min. : 10.20 Min. : 0.880
#> 1st Qu.: 6.80 1st Qu.: 1.100 1st Qu.: 31.75 1st Qu.: 2.745
#> Median : 9.20 Median : 2.000 Median : 96.60 Median : 8.345
#> Mean :10.09 Mean : 2.939 Mean :112.98 Mean : 9.761
#> 3rd Qu.:13.15 3rd Qu.: 4.200 3rd Qu.:181.60 3rd Qu.:15.690
#> Max. :21.90 Max. :14.700 Max. :343.30 Max. :29.660
#> NA's :2 NA's :2 NA's :2 NA's :2
#> Wind TMPMN TMPMX Longitude
#> Min. :0.700 Min. :-10.700 Min. :-2.60 Min. :10.63
#> 1st Qu.:1.800 1st Qu.: 0.350 1st Qu.: 7.60 1st Qu.:10.63
#> Median :2.500 Median : 4.550 Median :13.10 Median :10.63
#> Mean :2.692 Mean : 5.039 Mean :14.00 Mean :10.63
#> 3rd Qu.:3.400 3rd Qu.: 10.100 3rd Qu.:20.82 3rd Qu.:10.63
#> Max. :8.000 Max. : 17.900 Max. :32.50 Max. :10.63
#> NA's :2 NA's :2 NA's :2
#> Latitude
#> Min. :52.73
#> 1st Qu.:52.73
#> Median :52.73
#> Mean :52.73
#> 3rd Qu.:52.73
#> Max. :52.73
#>
DWDdata <- rbind(IntPolDWDdataHistorical, IntPolDWDdatarecent)Here the combinded data set:
summary(DWDdata)
#> Time TMPM Rain LF
#> Min. :32874 Min. :-18.300 Min. : 0.000 Min. : 33.70
#> 1st Qu.:36091 1st Qu.: 4.300 1st Qu.: 0.000 1st Qu.: 72.22
#> Median :39308 Median : 9.600 Median : 0.100 Median : 81.30
#> Mean :39308 Mean : 9.536 Mean : 1.922 Mean : 79.69
#> 3rd Qu.:42524 3rd Qu.: 15.200 3rd Qu.: 2.000 3rd Qu.: 88.80
#> Max. :45741 Max. : 28.900 Max. :56.200 Max. :100.00
#> NA's :2 NA's :2
#> VP Sat_def Rad_Int GlobRad
#> Min. : 1.20 Min. : 0.000 Min. : 10.20 Min. : 0.88
#> 1st Qu.: 6.90 1st Qu.: 1.000 1st Qu.: 36.83 1st Qu.: 3.18
#> Median : 9.50 Median : 2.100 Median :100.40 Median : 8.68
#> Mean :10.05 Mean : 3.025 Mean :119.28 Mean :10.31
#> 3rd Qu.:12.90 3rd Qu.: 4.200 3rd Qu.:189.60 3rd Qu.:16.38
#> Max. :23.30 Max. :25.100 Max. :347.00 Max. :29.98
#> NA's :2 NA's :2 NA's :2 NA's :2
#> Wind TMPMN TMPMX Longitude
#> Min. : 0.5 Min. :-22.400 Min. :-12.40 Min. :10.63
#> 1st Qu.: 2.0 1st Qu.: 0.700 1st Qu.: 7.30 1st Qu.:10.63
#> Median : 2.7 Median : 5.400 Median : 14.00 Median :10.63
#> Mean : 3.0 Mean : 5.204 Mean : 14.03 Mean :10.63
#> 3rd Qu.: 3.7 3rd Qu.: 10.100 3rd Qu.: 20.70 3rd Qu.:10.63
#> Max. :12.2 Max. : 21.000 Max. : 39.20 Max. :10.63
#> NA's :2 NA's :2 NA's :2
#> Latitude
#> Min. :52.73
#> 1st Qu.:52.73
#> Median :52.73
#> Mean :52.73
#> 3rd Qu.:52.73
#> Max. :52.73
#>