camminapy.data

 1from camminapy.data.resample import (
 2    resample_dataframe_grouped_pandas,
 3    resample_dataframe_grouped_polars,
 4    resample_dataframe_pandas,
 5    resample_dataframe_polars,
 6)
 7
 8__all__ = [
 9    "resample_dataframe_polars",
10    "resample_dataframe_grouped_polars",
11    "resample_dataframe_pandas",
12    "resample_dataframe_grouped_pandas",
13]
def resample_dataframe_polars( df: polars.dataframe.frame.DataFrame, interpolation_column: str, interpolation_step: float, to_log: bool = False) -> polars.dataframe.frame.DataFrame:
10def resample_dataframe_polars(
11    df: pl.DataFrame,
12    interpolation_column: str,
13    interpolation_step: float,
14    to_log: bool = False,
15) -> pl.DataFrame:
16    """Resamples a dataframe to obtain data at interpolation points.
17
18    Parameters
19    ----------
20    df : pl.DataFrame
21        The dataframe to interpolate.
22    interpolation_column : str
23        Which numeric column to use for the interpolation points.
24    interpolation_step : float
25        Steps for the newly create interpolation points
26    to_log : bool
27        Whether or not to show additional logging info.
28
29    Returns
30    -------
31    pl.DataFrame
32        A dataframe with the same columns as the input dataframe and where
33        `interpolation_column` is spaced as `interpolation_step` and all other
34        data is interpolated onto that timeline.
35        **Note**: This will **NOT** extrapolate.
36    """
37    # Get the x-values onto which we want to interpolate the data.
38    interpolation_points = pl.DataFrame(
39        {
40            interpolation_column: np.arange(
41                start=df.min()[0, interpolation_column],
42                stop=df.max()[0, interpolation_column] + interpolation_step,
43                step=interpolation_step,
44            )
45        }
46    )
47    # Add the new interpolation points to the input dataframe and interpolate the
48    # data onto those new interpolation points.
49    df_with_data_at_additional_interpolation_points = (
50        interpolation_points.join(df, on=[interpolation_column], how="outer")
51        .sort(interpolation_column)
52        .interpolate()
53    )
54
55    # After interpolation, we now have data at the new nodes. What's left is
56    # to only select those new interpolation nodes and the new data.
57    df_with_data_only_at_interpolation_points = interpolation_points.join(
58        df_with_data_at_additional_interpolation_points,
59        on=[interpolation_column],
60        how="left",
61    ).sort(interpolation_column)
62
63    if to_log:
64        n_input = len(df)
65        n_output = len(df_with_data_only_at_interpolation_points)
66        logger.info(f"Resampled from {n_input} rows to {n_output} rows.")
67
68    # Forward fill string columns because interpolation does not
69    # work on them. Only the first entry will be preserved and the others are none.
70    return df_with_data_only_at_interpolation_points.with_columns(
71        cs.string().fill_null(strategy="forward")
72    )

Resamples a dataframe to obtain data at interpolation points.

Parameters
  • df (pl.DataFrame): The dataframe to interpolate.
  • interpolation_column (str): Which numeric column to use for the interpolation points.
  • interpolation_step (float): Steps for the newly create interpolation points
  • to_log (bool): Whether or not to show additional logging info.
Returns
  • pl.DataFrame: A dataframe with the same columns as the input dataframe and where interpolation_column is spaced as interpolation_step and all other data is interpolated onto that timeline. Note: This will NOT extrapolate.
def resample_dataframe_grouped_polars( df: polars.dataframe.frame.DataFrame, interpolation_column: str, interpolation_step: float, group_column: str, to_log: bool = False) -> polars.dataframe.frame.DataFrame:
 80def resample_dataframe_grouped_polars(
 81    df: pl.DataFrame,
 82    interpolation_column: str,
 83    interpolation_step: float,
 84    group_column: str,
 85    to_log: bool = False,
 86) -> pl.DataFrame:
 87    """Groupwise resamples a dataframe to obtain data at interpolation points.
 88
 89    Parameters
 90    ----------
 91    df : pl.DataFrame
 92        The dataframe to interpolate.
 93    interpolation_column : str
 94        Which numeric column to use for the interpolation points.
 95    interpolation_step : float
 96        Steps for the newly create interpolation points
 97    group_column : str
 98        The column over which to group
 99    to_log : bool
100        Whether or not to show additional logging info.
101
102    Returns
103    -------
104    pl.DataFrame
105        A dataframe with the same columns as the input dataframe and where
106        `interpolation_column` is spaced as `interpolation_step` and all other
107        data is interpolated onto that timeline.
108
109    Info
110    -------
111    This is a wrapper that just calls `resample_dataframe_polars` for each group.
112    """
113    return pl.concat(
114        [
115            resample_dataframe_polars(
116                groupdf,
117                interpolation_column=interpolation_column,
118                interpolation_step=interpolation_step,
119                to_log=to_log,
120            )
121            for _, groupdf in df.groupby(group_column, maintain_order=True)
122        ]
123    )

Groupwise resamples a dataframe to obtain data at interpolation points.

Parameters
  • df (pl.DataFrame): The dataframe to interpolate.
  • interpolation_column (str): Which numeric column to use for the interpolation points.
  • interpolation_step (float): Steps for the newly create interpolation points
  • group_column (str): The column over which to group
  • to_log (bool): Whether or not to show additional logging info.
Returns
  • pl.DataFrame: A dataframe with the same columns as the input dataframe and where interpolation_column is spaced as interpolation_step and all other data is interpolated onto that timeline.
Info

This is a wrapper that just calls resample_dataframe_polars for each group.

def resample_dataframe_pandas( df: pandas.core.frame.DataFrame, interpolation_column: str, interpolation_step: float, to_log: bool = False) -> pandas.core.frame.DataFrame:
126def resample_dataframe_pandas(
127    df: pd.DataFrame,
128    interpolation_column: str,
129    interpolation_step: float,
130    to_log: bool = False,
131) -> pd.DataFrame:
132    """Resamples a dataframe to obtain data at interpolation points.
133
134    Parameters
135    ----------
136    df : pd.DataFrame
137        The dataframe to interpolate.
138    interpolation_column : str
139        Which numeric column to use for the interpolation points.
140    interpolation_step : float
141        Steps for the newly create interpolation points
142    to_log : bool
143        Whether or not to show additional logging info.
144
145    Returns
146    -------
147    pd.DataFrame
148        A dataframe with the same columns as the input dataframe and where
149        `interpolation_column` is spaced as `interpolation_step` and all other
150        data is interpolated onto that timeline.
151
152    Info
153    -------
154    This is a wrapper that just calls `resample_dataframe_polars`.
155    """
156    return resample_dataframe_polars(
157        df=pl.DataFrame(df),
158        interpolation_column=interpolation_column,
159        interpolation_step=interpolation_step,
160        to_log=to_log,
161    ).to_pandas()

Resamples a dataframe to obtain data at interpolation points.

Parameters
  • df (pd.DataFrame): The dataframe to interpolate.
  • interpolation_column (str): Which numeric column to use for the interpolation points.
  • interpolation_step (float): Steps for the newly create interpolation points
  • to_log (bool): Whether or not to show additional logging info.
Returns
  • pd.DataFrame: A dataframe with the same columns as the input dataframe and where interpolation_column is spaced as interpolation_step and all other data is interpolated onto that timeline.
Info

This is a wrapper that just calls resample_dataframe_polars.

def resample_dataframe_grouped_pandas( df: pandas.core.frame.DataFrame, interpolation_column: str, interpolation_step: float, group_column: str, to_log: bool = False) -> pandas.core.frame.DataFrame:
164def resample_dataframe_grouped_pandas(
165    df: pd.DataFrame,
166    interpolation_column: str,
167    interpolation_step: float,
168    group_column: str,
169    to_log: bool = False,
170) -> pd.DataFrame:
171    """Groupwise resamples a dataframe to obtain data at interpolation points.
172
173    Parameters
174    ----------
175    df : pd.DataFrame
176        The dataframe to interpolate.
177    interpolation_column : str
178        Which numeric column to use for the interpolation points.
179    interpolation_step : float
180        Steps for the newly create interpolation points
181    group_column : str
182        The column over which to group
183    to_log : bool
184        Whether or not to show additional logging info.
185
186    Returns
187    -------
188    pd.DataFrame
189        A dataframe with the same columns as the input dataframe and where
190        `interpolation_column` is spaced as `interpolation_step` and all other
191        data is interpolated onto that timeline.
192
193    Info
194    -------
195    This is a wrapper that just calls `resample_dataframe_grouped_polars`.
196
197    """
198    return resample_dataframe_grouped_polars(
199        df=pl.DataFrame(df),
200        interpolation_column=interpolation_column,
201        interpolation_step=interpolation_step,
202        group_column=group_column,
203        to_log=to_log,
204    ).to_pandas()

Groupwise resamples a dataframe to obtain data at interpolation points.

Parameters
  • df (pd.DataFrame): The dataframe to interpolate.
  • interpolation_column (str): Which numeric column to use for the interpolation points.
  • interpolation_step (float): Steps for the newly create interpolation points
  • group_column (str): The column over which to group
  • to_log (bool): Whether or not to show additional logging info.
Returns
  • pd.DataFrame: A dataframe with the same columns as the input dataframe and where interpolation_column is spaced as interpolation_step and all other data is interpolated onto that timeline.
Info

This is a wrapper that just calls resample_dataframe_grouped_polars.