"""
This file is part of CLIMADA.
Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.
CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.
CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.
---
Define functions to handle impact_yearsets
"""
import logging
import numpy as np
from numpy.random import default_rng
import climada.util.dates_times as u_dt
from climada.engine import Impact
LOGGER = logging.getLogger(__name__)
[docs]
def impact_yearset(
imp, sampled_years, lam=None, correction_fac=True, with_replacement=False, seed=None
):
"""Create a yearset of impacts (yimp) containing a probabilistic impact for each year
in the sampled_years list by sampling events from the impact received as input with a
Poisson distribution centered around lam per year (lam = sum(imp.frequency)).
In contrast to the expected annual impact (eai) yimp contains impact values that
differ among years. When correction factor is true, the yimp are scaled such
that the average over all years is equal to the eai.
Parameters
-----------
imp : climada.engine.Impact()
impact object containing impacts per event
sampled_years : list
A list of years that shall be covered by the resulting yimp.
lam: float, optional
The parameter (i.e., the mean) of the Poisson distribution to sample the number of
events per year. If no lambda value is given, the default lam = sum(imp.frequency)
is used. Default is None.
correction_fac : boolean, optional
If True, a correction factor is applied to the resulting yimp. It is
scaled in such a way that the expected annual impact (eai) of the yimp
equals the eai of the input impact. Defaults to True.
with_replacement : bool, optional
If True, impact events are sampled with replacement. If False, events are sampled
without replacement. Sampling without replacement can yield distorted samples if
frequencies of different events are unqual. Defaults to False.
seed : Any, optional
seed for the default bit generator
default: None
Returns
-------
yimp : climada.engine.Impact()
yearset of impacts containing annual impacts for all sampled_years
sampling_vect : 2D array
The sampling vector specifies how to sample the yimp, it consists of one
sub-array per sampled_year, which contains the event_ids of the events used to
calculate the annual impacts.
Can be used to re-create the exact same yimp.
"""
n_sampled_years = len(sampled_years)
# create sampling vector
if not lam:
lam = np.sum(imp.frequency)
events_per_year = sample_from_poisson(n_sampled_years, lam, seed=seed)
sampling_vect = sample_events(
events_per_year, imp.frequency, with_replacement=with_replacement, seed=seed
)
# compute impact per sampled_year
yimp = impact_yearset_from_sampling_vect(
imp, sampled_years, sampling_vect, correction_fac=correction_fac
)
return yimp, sampling_vect
[docs]
def impact_yearset_from_sampling_vect(
imp, sampled_years, sampling_vect, correction_fac=True
):
"""Create a yearset of impacts (yimp) containing a probabilistic impact for each year
in the sampled_years list by sampling events from the impact received as input following
the sampling vector provided.
In contrast to the expected annual impact (eai) yimp contains impact values that
differ among years. When correction factor is true, the yimp are scaled such
that the average over all years is equal to the eai.
Parameters
-----------
imp : climada.engine.Impact()
impact object containing impacts per event
sampled_years : list
A list of years that shall be covered by the resulting yimp.
sampling_vect : 2D array
The sampling vector specifies how to sample the yimp, it consists of one
sub-array per sampled_year, which contains the event_ids of the events used to
calculate the annual impacts.
It needs to be obtained in a first call,
i.e. [yimp, sampling_vect] = climada_yearsets.impact_yearset(...)
and can then be provided in this function to obtain the exact same sampling
(also for a different imp object)
correction_fac : boolean, optional
If True a correction factor is applied to the resulting yimp. It is
scaled in such a way that the expected annual impact (eai) of the yimp
equals the eai of the input impact. Defaults to True.
Returns
-------
yimp : climada.engine.Impact()
yearset of impacts containing annual impacts for all sampled_years
"""
# compute impact per sampled_year
imp_per_year = compute_imp_per_year(imp, sampling_vect)
frequency = np.ones(len(sampled_years)) / len(sampled_years)
if correction_fac: # adjust for sampling error
correction_factor = calculate_correction_fac(imp_per_year, imp)
imp_per_year = imp_per_year.astype(float) * correction_factor
LOGGER.info("The correction factor is %s.", correction_factor)
# create yearly impact object
yimp = Impact(
at_event=imp_per_year,
date=u_dt.str_to_date([str(date) + "-01-01" for date in sampled_years]),
event_name=np.arange(1, len(sampled_years) + 1),
event_id=np.arange(1, len(sampled_years) + 1),
unit=imp.unit,
haz_type=imp.haz_type,
frequency=frequency,
frequency_unit="1/year",
aai_agg=np.sum(frequency * imp_per_year),
)
return yimp
[docs]
def sample_from_poisson(n_sampled_years, lam, seed=None):
"""Sample the number of events for n_sampled_years
Parameters
-----------
n_sampled_years : int
The target number of years the impact yearset shall contain.
lam: float
The parameter (i.e., the mean) of the Poisson distribution to sample the number of
events per year.
seed : int, optional
seed for numpy.random, will be set if not None
default: None
Returns
-------
events_per_year : np.ndarray
Number of events per sampled year
"""
if seed is not None:
np.random.seed(seed)
return np.round(np.random.poisson(lam=lam, size=n_sampled_years)).astype("int")
[docs]
def sample_events(events_per_year, freqs_orig, with_replacement=False, seed=None):
"""Sample events uniformely from an array (indices_orig) without replacement
(if sum(events_per_year) > n_input_events the input events are repeated
(tot_n_events/n_input_events) times, by ensuring that the same events doens't
occur more than once per sampled year).
Parameters
-----------
events_per_year : np.ndarray
Number of events per sampled year
freqs_orig : np.ndarray
Frequency of each input event
with_replacement : bool, optional
If True, impact events are sampled with replacement. If False, events are sampled
without replacement. Sampling without replacement can yield distorted samples if
frequencies of different events are unqual. Defaults to False.
seed : Any, optional
seed for the default bit generator.
Default: None
Returns
-------
sampling_vect : 2D array
The sampling vector specifies how to sample the yimp, it consists of one
sub-array per sampled_year, which contains the event_ids of the events used to
calculate the annual impacts.
"""
sampling_vect = []
indices_orig = np.arange(len(freqs_orig))
rng = default_rng(seed)
# sample without replacement, works well if event frequencies are equal
if with_replacement is False:
# warn if frequencies of different events are not equal
if np.unique(freqs_orig).size != 1:
LOGGER.warning(
"The frequencies of the different events are not equal. This can lead to "
"distorted sampling if the frequencies vary significantly. To avoid this, "
"please set `with_replacement=True` to sample with replacement instead."
)
indices = indices_orig
freqs = freqs_orig
# sample events for each sampled year
for amount_events in events_per_year:
# if there are not enough input events, choice with no replace will fail
if amount_events > len(freqs_orig):
raise ValueError(
f"cannot sample {amount_events} distinct events for a single year"
f" when there are only {len(freqs_orig)} input events. Set "
"with_replacement=True if you want to sample with replacmeent."
)
# if not enough events remaining, append original events
if len(np.unique(indices)) < amount_events or len(indices) == 0:
indices = np.append(indices, indices_orig)
freqs = np.append(freqs, freqs_orig)
# ensure that each event only occurs once per sampled year
unique_events = np.unique(indices, return_index=True)[0]
probab_dis = freqs[np.unique(indices, return_index=True)[1]] / (
np.sum(freqs[np.unique(indices, return_index=True)[1]])
)
# sample events
selected_events = rng.choice(
unique_events, size=amount_events, replace=False, p=probab_dis
).astype("int")
# determine used events to remove them from sampling pool
idx_to_remove = [
np.where(indices == event)[0][0] for event in selected_events
]
indices = np.delete(indices, idx_to_remove)
freqs = np.delete(freqs, idx_to_remove)
# save sampled events in sampling vector
sampling_vect.append(selected_events)
else:
# easier method if we allow for replacement sample with replacement
probab_dis = freqs_orig / sum(freqs_orig)
# sample events for each sampled year
selected_events = rng.choice(
indices_orig, size=sum(events_per_year), replace=True, p=probab_dis
).astype("int")
# group to list of lists
index = 0
for amount_events in events_per_year:
sampling_vect.append(selected_events[index : index + amount_events])
index += amount_events
return sampling_vect
[docs]
def compute_imp_per_year(imp, sampling_vect):
"""Sample annual impacts from the given event_impacts according to the sampling dictionary
Parameters
-----------
imp : climada.engine.Impact()
impact object containing impacts per event
sampling_vect : 2D array
The sampling vector specifies how to sample the yimp, it consists of one
sub-array per sampled_year, which contains the event_ids of the events used to
calculate the annual impacts.
Returns
-------
imp_per_year: np.ndarray
Sampled impact per year (length = sampled_years)
"""
imp_per_year = [
np.sum(imp.at_event[list(sampled_events)]) for sampled_events in sampling_vect
]
return np.array(imp_per_year)
[docs]
def calculate_correction_fac(imp_per_year, imp):
"""Calculate a correction factor that can be used to scale the yimp in such
a way that the expected annual impact (eai) of the yimp amounts to the eai
of the input imp
Parameters
-----------
imp_per_year : np.ndarray
sampled yimp
imp : climada.engine.Impact()
impact object containing impacts per event
Returns
-------
correction_factor: int
The correction factor is calculated as imp_eai/yimp_eai
"""
yimp_eai = np.mean(imp_per_year)
imp_eai = np.sum(imp.frequency * imp.at_event)
correction_factor = imp_eai / yimp_eai
# if correction_factor > 0.1:
# tex = raw_input("Do you want to exclude small events?")
return correction_factor