Skip to article frontmatterSkip to article content
earth and related environmental sciences

Surface Water Level Monitoring with Sentinel-2

Timeseries of water covered area at Santa Giustina lake, Italy

Authors
Affiliations
Eurac Research
Eurac Research
ESA EOPF Zarr Logo

Introduction

This notebook demonstrates how to estimate and visualize surface water area measurements at Santa Giustina lake, Italy, using Sentinel-2 data stored in EOPF-Zarr format.

The lake

Lago di Santa Giustina is an artificial lake located in the Trentino region of northern Italy. It was created in the 1950s by damming the Noce River for hydroelectric power generation. The lake is situated at an elevation of approximately 500 meters above sea level and has a surface area of about 3.5 square kilometers.

Lago di Santa Giustina

Image source: By Mosco - Own work, CC BY-SA 3.0, https://commons.wikimedia.org/w/index.php?curid=5144281

Setup

Start importing the necessary libraries

import xarray as xr
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np

import pystac_client

from rasterio.features import geometry_mask

from skimage.filters import threshold_otsu
import pandas as pd
from skimage import measure

from pyproj import Transformer

import dask
from dask_gateway import Gateway

import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
"""
gate = Gateway("https://dask.user.eopf.eodc.eu",
               proxy_address="tls://dask.user.eopf.eodc.eu:10000",
               auth="jupyterhub")
"""

# Dask Gateway automatically loads the correct configuration.
# So the code below is identical to the comment above
gate = Gateway()
cluster = gate.new_cluster()
cluster.scale(8)
client = cluster.get_client()
client

Access data

Query the EOPF Sentinel-2 STAC Collection for the area and time of interest.

catalog = pystac_client.Client.open("https://stac.core.eopf.eodc.eu")
bbox = [
    11.003288060439315,
    46.34055268553584,
    11.10369555555596,
    46.40183319621778,
]
items = list(
    catalog.search(
        collections=["sentinel-2-l2a"],
        bbox=bbox,
        datetime=["2020-12-31", "2024-01-01"],
    ).items()
)
print(f"items found: {len(items)}")
items found: 434

Combine the products into a single datacube:

%%time


def reproject_bbox(bbox, src_crs="EPSG:4326", dst_crs="EPSG:32632"):
    transformer = Transformer.from_crs(src_crs, dst_crs, always_xy=True)
    xmin, ymin = transformer.transform(bbox[0], bbox[1])
    xmax, ymax = transformer.transform(bbox[2], bbox[3])
    return [xmin, ymin, xmax, ymax]


def load_tile(item, bbox_ll):
    href = item.assets["product"].href
    ds = xr.open_datatree(href, engine="zarr", chunks={}, mask_and_scale=True)

    dst_crs = item.properties.get("proj:code")
    if dst_crs is None:
        raise ValueError(f"No proj:code in item {item.id}")

    utm_bbox = reproject_bbox(bbox_ll, dst_crs=dst_crs)

    ds_ref = (
        ds["measurements"]["reflectance"]["r20m"]
        .to_dataset()[["b02", "b03", "b04", "b8a"]]
        .sel(x=slice(utm_bbox[0], utm_bbox[2]), y=slice(utm_bbox[3], utm_bbox[1]))
        .expand_dims(time=[item.datetime])
    )

    ds_scl = (
        ds["conditions"]["mask"]["l2a_classification"]["r20m"]
        .to_dataset()
        .sel(x=slice(utm_bbox[0], utm_bbox[2]), y=slice(utm_bbox[3], utm_bbox[1]))
        .expand_dims(time=[item.datetime])
    )

    return xr.merge([ds_ref, ds_scl])


delayed_results = [dask.delayed(load_tile)(item, bbox) for item in items]
results = dask.compute(*delayed_results)

# Get CRS from STAC item
crs = items[0].properties.get("proj:code")

datacube = xr.concat(results, dim="time").sortby("time")

datacube = datacube.rio.write_crs(crs, inplace=True)
datacube

Open the geoJSON file containing the lake boundaries:

lake_gdf = gpd.read_file("./geojson/lago_santa_giustina.geojson")
lake_gdf = lake_gdf.to_crs(crs)

Visualize datacube sample

In this section we visualize an RGB composite for a random date and use the lake polygon as an outline to see if it is aligned correctly.

datacube_sample = datacube.isel(time=50)

rgb = datacube_sample[["b04", "b03", "b02"]].to_dataarray(dim="band")

rgb = rgb.assign_coords(band=["red", "green", "blue"])

vmin, vmax = 0, 0.25
rgb_scaled = (rgb - vmin) / (vmax - vmin)
rgb_scaled = rgb_scaled.clip(0, 1)

rgb_img = rgb_scaled.transpose("y", "x", "band").values

fig, ax = plt.subplots(figsize=(8, 8))
ax.imshow(
    rgb_img,
    extent=[
        float(rgb.x.min()),
        float(rgb.x.max()),
        float(rgb.y.min()),
        float(rgb.y.max()),
    ],
)

lake_gdf.boundary.plot(ax=ax, edgecolor="yellow", linewidth=2)
ax.set_title("RGB composite with lake boundary overlay")
ax.axis("off")
plt.show()
<Figure size 800x800 with 1 Axes>

Cloud masking

In this section we apply the scl band to mask out the clouds. We visualize the same RGB composite as before, but with the mask applied.

valid_classes = [2, 4, 5, 6]

mask = datacube["scl"].isin(valid_classes)

datacube_masked = datacube.where(mask)

datacube_sample = datacube_masked.isel(time=50)

rgb = datacube_sample[["b04", "b03", "b02"]].to_dataarray(dim="band")

rgb = rgb.assign_coords(band=["red", "green", "blue"])

vmin, vmax = 0, 0.25
rgb_scaled = (rgb - vmin) / (vmax - vmin)
rgb_scaled = rgb_scaled.clip(0, 1)  # keep in [0, 1]
rgb_img = rgb_scaled.transpose("y", "x", "band").values

fig, ax = plt.subplots(figsize=(8, 8))
ax.imshow(
    rgb_img,
    extent=[
        float(rgb.x.min()),
        float(rgb.x.max()),
        float(rgb.y.min()),
        float(rgb.y.max()),
    ],
)
lake_gdf.boundary.plot(ax=ax, edgecolor="yellow", linewidth=2)
ax.set_title("Cloud-masked RGB composite")
ax.axis("off")
plt.show()
<Figure size 800x800 with 1 Axes>

We keep only the dates with a cloud coverage less than 5% over the lake:

valid_classes = [4, 5, 6]
threshold = 0.05

datacube_lake = datacube.rio.clip(lake_gdf.geometry, lake_gdf.crs)

clear_mask = datacube_lake["scl"].isin(valid_classes)

clear_fraction = clear_mask.mean(dim=("y", "x")).compute()

mask_good = clear_fraction >= threshold

datacube_clear = datacube_lake.isel(time=mask_good)
datacube_clear
Loading...

NDWI

Water detection is performed using the Normalized Difference Water Index (NDWI), which utilizes the green and near-infrared (NIR) bands of the Sentinel-2 imagery. The NDWI is calculated as follows:

NDWI=GREENNIRGREEN+NIRNDWI = \frac {GREEN - NIR} {GREEN + NIR}
ndwi = (datacube_clear["b03"] - datacube_clear["b8a"]) / (
    datacube_clear["b03"] + datacube_clear["b8a"]
)

Water detection

The water area is selected applying Otsu Thresholding:

def otsu_mask(arr):
    arr = np.clip(np.nan_to_num(arr, nan=0.0), -1, 1)

    valid = arr > -0.2
    if np.count_nonzero(valid) < 10:
        return np.zeros_like(arr, dtype=bool)
    try:
        thr = threshold_otsu(arr[valid])
    except Exception:
        thr = 0.0
    thr -= 0.05

    return arr > thr


ndwi = ndwi.chunk({"x": -1, "y": -1})
water_masks = xr.apply_ufunc(
    otsu_mask,
    ndwi,
    input_core_dims=[["y", "x"]],
    output_core_dims=[["y", "x"]],
    vectorize=True,
    dask="parallelized",
    output_dtypes=[bool],
)

water_masks.compute()

water_masks
Loading...

Sample visualization of computed water mask:

target_time = "2021-04"

datacube_full_rgb = datacube.sel(time=target_time, method="nearest")[
    ["b04", "b03", "b02"]
].to_dataarray("band")
datacube_full_rgb = datacube_full_rgb.assign_coords(band=["red", "green", "blue"])

vmin, vmax = 0, 0.25
rgb_scaled_full = (datacube_full_rgb - vmin) / (vmax - vmin)
rgb_scaled_full = rgb_scaled_full.clip(0, 1)
rgb_img_full = rgb_scaled_full.transpose("y", "x", "band").values

water_mask_da = water_masks.sel(time=target_time, method="nearest")

fig, ax = plt.subplots(figsize=(8, 8))
ax.imshow(
    rgb_img_full,
    extent=[
        float(datacube_full_rgb.x.min()),
        float(datacube_full_rgb.x.max()),
        float(datacube_full_rgb.y.min()),
        float(datacube_full_rgb.y.max()),
    ],
)

water_mask = water_mask_da.values
x_coords = water_mask_da.x.values
y_coords = water_mask_da.y.values

contours = measure.find_contours(water_mask, 0.5)
for contour in contours:
    x_contour = np.interp(contour[:, 1], np.arange(len(x_coords)), x_coords)
    y_contour = np.interp(contour[:, 0], np.arange(len(y_coords)), y_coords)

    ax.plot(x_contour, y_contour, color="cyan", linewidth=1.5)

ax.set_title("Lake water mask over full-scene RGB (April 2021)")
ax.axis("off")
plt.show()
<Figure size 800x800 with 1 Axes>
lake_masked = []

for t in range(len(datacube_clear.time)):
    wm = xr.DataArray(
        water_masks[t],
        dims=("y", "x"),
        coords={"y": datacube_clear.y, "x": datacube_clear.x},
        name="water_mask",
    ).rio.write_crs(datacube_clear.rio.crs)

    mask = geometry_mask(
        [lake_gdf.geometry.unary_union],
        transform=wm.rio.transform(),
        invert=True,
        out_shape=wm.shape,
    )
    wm_clipped = wm.where(mask)
    lake_masked.append(wm_clipped)

water_masks_clipped = xr.concat(lake_masked, dim="time")
water_fraction = water_masks_clipped.sum(dim=("y", "x")) / (
    water_masks_clipped.notnull().sum(dim=("y", "x"))
)

Water surface area extraction

df_water = pd.DataFrame(
    {"time": water_fraction.time.values, "water_fraction": water_fraction.values}
)
df_water.set_index("time", inplace=True)
pixel_area_m2 = 20 * 20

water_pixel_count = (water_masks_clipped == 1).sum(dim=("y", "x"))
water_area_m2 = water_pixel_count * pixel_area_m2

water_area_m2 = water_area_m2.assign_coords(time=water_masks_clipped.time)

dates = pd.to_datetime(water_masks.time.values)

water_area_ts = pd.Series(water_area_m2.values, index=dates)
smoothed = water_area_ts.rolling(window=5, center=True, min_periods=1).mean()

plt.figure(figsize=(12, 5))
plt.plot(
    water_area_ts.index,
    water_area_ts.values / 1e6,
    color="lightgray",
    alpha=0.6,
    label="Raw",
)
plt.plot(
    smoothed.index,
    smoothed.values / 1e6,
    color="blue",
    linewidth=2,
    label="rolling mean",
)
plt.ylabel("Lake surface area (km²)")
plt.xlabel("Date")
plt.title("Lake surface area over time")
plt.grid(True, linestyle="--", alpha=0.6)
plt.legend()
plt.show()
<Figure size 1200x500 with 1 Axes>
cluster.shutdown()