Skip to content

Aggregate & Group Data

For performance reasons, the API does not allow to aggregate data on the fly. Instead, you can post-process the data on your own. This page provides some examples on how to aggregate and group data.

Mean DBH grouped by tree species in NRW

In this example, we will query meassured tree data from the inventory archive in North Rhine-Westphalia (NRW). We will then calculate the mean diameter at breast height (DBH) for each tree species.

R
# Install necessary packages if not already installed
if (!require(httr)) install.packages("httr")
if (!require(jsonlite)) install.packages("jsonlite")
if (!require(dplyr)) install.packages("dplyr")

# Load the libraries
library(httr)
library(jsonlite)
library(dplyr)

# Set headers for the API request
headers = c(
  'apikey' = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoiYW5vbiIsImlzcyI6InN1cGFiYXNlIiwiaWF0IjoxNzQ1NzkxMjAwLCJleHAiOjE5MDM1NTc2MDB9.hXiYlA_168hHZ6fk3zPgABQUpEcqkYRMzu0A5W5PtYU',
  'Accept-Profile' = 'inventory_archive'
)

# Make the API request
res <- VERB("GET", url = "https://ci.thuenen.de/rest/v1/cluster?states_affected=cd.{5}&select=cluster_name,plot!fk_plot_cluster(plot_name,tree(dbh,tree_species))&plot.tree.dbh=not.is.null", add_headers(headers))


# Parse JSON response
data <- content(res, as = "parsed")


# Initialize empty data frame
tree_data <- data.frame(dbh = numeric(), tree_species = numeric())

# Loop through each cluster
for (cluster in data) {
  # Loop through each plot in the cluster
  for (plot in cluster$plot) {
    # Check if the plot has tree data
    if (length(plot$tree) > 0) {
      # Extract tree data safely
      dbh_values <- sapply(plot$tree, function(x) {
        if (!is.null(x$dbh)) as.numeric(x$dbh) else NA
      })
      species_values <- sapply(plot$tree, function(x) {
        if (!is.null(x$tree_species)) as.numeric(x$tree_species) else NA
      })
      
      # Only add data if both vectors have the same length
      if (length(dbh_values) == length(species_values) && length(dbh_values) > 0) {
        plot_trees <- data.frame(
          dbh = dbh_values,
          tree_species = species_values
        )
        # Bind the rows to the main data frame
        tree_data <- rbind(tree_data, plot_trees)
      }
    }
  }
}

# Calculate statistics grouped by tree species (only if we have data)
if (nrow(tree_data) > 0) {
  tree_stats <- tree_data %>%
    group_by(tree_species) %>%
    summarise(
      count = n(),
      mean_dbh = round(mean(dbh, na.rm = TRUE), 2),
      sd_dbh = round(sd(dbh, na.rm = TRUE), 2)
    )

  # Print results for each tree species
  for (i in 1:nrow(tree_stats)) {
    cat("\nTree species:", tree_stats$tree_species[i], "\n")
    cat("Number of trees:", tree_stats$count[i], "\n")
    cat("Mean DBH:", tree_stats$mean_dbh[i], "cm\n")
    cat("Standard deviation DBH:", tree_stats$sd_dbh[i], "cm\n")
  }
} else {
  cat("No tree data found in the response.\n")
}
Python
import pandas as pd
import requests

class BearerAuth(requests.auth.AuthBase):
    def __init__(self, token, profile):
        self.token = token
        self.profile = profile
    def __call__(self, r):
        r.headers["apikey"] = self.token
        r.headers["Accept-Profile"] = self.profile
        return r
    
class RequestHandler():
    def __init__(self, baseUrl, token=None, profile=None, endPoint=None):
        self.baseUrl = baseUrl
        self.token = token
        self.profile = profile
        self.set_endpoint(endPoint)
    
    def set_endpoint(self, endPoint):
        self.endPoint = endPoint

    def get_response(self):
        if(not self.set_endpoint):
            return
        self.response = requests.get(self.baseUrl+self.endPoint, auth=BearerAuth(self.token, self.profile))
    
    def check_status(self):
        if(self.response.status_code in [200]):
            self._isAuthorized = True
        else:
            self._isAuthorized = False

    def isAuthorized(self):
        self.check_status()
        return self._isAuthorized

    def return_response_dataFrame(self) -> pd.DataFrame:
        """
        """
        if(self.isAuthorized()):
            return pd.DataFrame.from_records(self.response.json())
        else:
            print("Failed to retrieve data")

baseUrl = "https://ci.thuenen.de/rest/v1/"
authToken = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoiYW5vbiIsImlzcyI6InN1cGFiYXNlIiwiaWF0IjoxNzQ1NzkxMjAwLCJleHAiOjE5MDM1NTc2MDB9.hXiYlA_168hHZ6fk3zPgABQUpEcqkYRMzu0A5W5PtYU'
acctProfile = 'inventory_archive'
endPoint = "cluster?states_affected=cd.{5}&select=cluster_name,plot!fk_plot_cluster(plot_name,tree(dbh,tree_species))&plot.tree.dbh=not.is.null"

requestHand = RequestHandler(baseUrl=baseUrl,
                             token=authToken,
                             profile=acctProfile,
                             endPoint=endPoint
                             )

requestHand.get_response()

dfData = requestHand.return_response_dataFrame().set_index('cluster_name')
dfPlots = pd.concat([pd.DataFrame.from_records(df).assign(cluster_name=key) for key, df in dfData.loc[:,'plot'].items()], ignore_index=True)
# Set cluster_name as index, to keep information in DataFrame for trees
dfPlots.set_index('cluster_name',inplace=True)
# Get all trees out of dfPlots
dfTrees = pd.concat([pd.DataFrame.from_records(df).assign(cluster_name=key) for key, df in dfPlots.loc[:,'tree'].items()], ignore_index=True)

# print summary
print(dfTrees.groupby('tree_species')['dbh'].describe())

# Part with named tree species
# prepare to get lookup table
endPoint = 'lookup_tree_species'
requestHand = RequestHandler(baseUrl=baseUrl,
                             token=authToken,
                             endPoint=endPoint,
                             profile='lookup'
                             )

# get lookup table
requestHand.get_response()
dfLookupTreeSpec = requestHand.return_response_dataFrame()

# Join with lookup table for tree species and print summary
print(dfTrees.join(dfLookupTreeSpec.set_index('code'), on=['tree_species'], how='left')[['dbh','tree_species','name_de']].groupby('name_de')['dbh'].describe())