camminapy.data
1from camminapy.data.resample import ( 2 resample_dataframe_grouped_pandas, 3 resample_dataframe_grouped_polars, 4 resample_dataframe_pandas, 5 resample_dataframe_polars, 6) 7 8__all__ = [ 9 "resample_dataframe_polars", 10 "resample_dataframe_grouped_polars", 11 "resample_dataframe_pandas", 12 "resample_dataframe_grouped_pandas", 13]
def
resample_dataframe_polars( df: polars.dataframe.frame.DataFrame, interpolation_column: str, interpolation_step: float, to_log: bool = False) -> polars.dataframe.frame.DataFrame:
10def resample_dataframe_polars( 11 df: pl.DataFrame, 12 interpolation_column: str, 13 interpolation_step: float, 14 to_log: bool = False, 15) -> pl.DataFrame: 16 """Resamples a dataframe to obtain data at interpolation points. 17 18 Parameters 19 ---------- 20 df : pl.DataFrame 21 The dataframe to interpolate. 22 interpolation_column : str 23 Which numeric column to use for the interpolation points. 24 interpolation_step : float 25 Steps for the newly create interpolation points 26 to_log : bool 27 Whether or not to show additional logging info. 28 29 Returns 30 ------- 31 pl.DataFrame 32 A dataframe with the same columns as the input dataframe and where 33 `interpolation_column` is spaced as `interpolation_step` and all other 34 data is interpolated onto that timeline. 35 **Note**: This will **NOT** extrapolate. 36 """ 37 # Get the x-values onto which we want to interpolate the data. 38 interpolation_points = pl.DataFrame( 39 { 40 interpolation_column: np.arange( 41 start=df.min()[0, interpolation_column], 42 stop=df.max()[0, interpolation_column] + interpolation_step, 43 step=interpolation_step, 44 ) 45 } 46 ) 47 # Add the new interpolation points to the input dataframe and interpolate the 48 # data onto those new interpolation points. 49 df_with_data_at_additional_interpolation_points = ( 50 interpolation_points.join(df, on=[interpolation_column], how="outer") 51 .sort(interpolation_column) 52 .interpolate() 53 ) 54 55 # After interpolation, we now have data at the new nodes. What's left is 56 # to only select those new interpolation nodes and the new data. 57 df_with_data_only_at_interpolation_points = interpolation_points.join( 58 df_with_data_at_additional_interpolation_points, 59 on=[interpolation_column], 60 how="left", 61 ).sort(interpolation_column) 62 63 if to_log: 64 n_input = len(df) 65 n_output = len(df_with_data_only_at_interpolation_points) 66 logger.info(f"Resampled from {n_input} rows to {n_output} rows.") 67 68 # Forward fill string columns because interpolation does not 69 # work on them. Only the first entry will be preserved and the others are none. 70 return df_with_data_only_at_interpolation_points.with_columns( 71 cs.string().fill_null(strategy="forward") 72 )
Resamples a dataframe to obtain data at interpolation points.
Parameters
- df (pl.DataFrame): The dataframe to interpolate.
- interpolation_column (str): Which numeric column to use for the interpolation points.
- interpolation_step (float): Steps for the newly create interpolation points
- to_log (bool): Whether or not to show additional logging info.
Returns
- pl.DataFrame: A dataframe with the same columns as the input dataframe and where
interpolation_columnis spaced asinterpolation_stepand all other data is interpolated onto that timeline. Note: This will NOT extrapolate.
def
resample_dataframe_grouped_polars( df: polars.dataframe.frame.DataFrame, interpolation_column: str, interpolation_step: float, group_column: str, to_log: bool = False) -> polars.dataframe.frame.DataFrame:
80def resample_dataframe_grouped_polars( 81 df: pl.DataFrame, 82 interpolation_column: str, 83 interpolation_step: float, 84 group_column: str, 85 to_log: bool = False, 86) -> pl.DataFrame: 87 """Groupwise resamples a dataframe to obtain data at interpolation points. 88 89 Parameters 90 ---------- 91 df : pl.DataFrame 92 The dataframe to interpolate. 93 interpolation_column : str 94 Which numeric column to use for the interpolation points. 95 interpolation_step : float 96 Steps for the newly create interpolation points 97 group_column : str 98 The column over which to group 99 to_log : bool 100 Whether or not to show additional logging info. 101 102 Returns 103 ------- 104 pl.DataFrame 105 A dataframe with the same columns as the input dataframe and where 106 `interpolation_column` is spaced as `interpolation_step` and all other 107 data is interpolated onto that timeline. 108 109 Info 110 ------- 111 This is a wrapper that just calls `resample_dataframe_polars` for each group. 112 """ 113 return pl.concat( 114 [ 115 resample_dataframe_polars( 116 groupdf, 117 interpolation_column=interpolation_column, 118 interpolation_step=interpolation_step, 119 to_log=to_log, 120 ) 121 for _, groupdf in df.groupby(group_column, maintain_order=True) 122 ] 123 )
Groupwise resamples a dataframe to obtain data at interpolation points.
Parameters
- df (pl.DataFrame): The dataframe to interpolate.
- interpolation_column (str): Which numeric column to use for the interpolation points.
- interpolation_step (float): Steps for the newly create interpolation points
- group_column (str): The column over which to group
- to_log (bool): Whether or not to show additional logging info.
Returns
- pl.DataFrame: A dataframe with the same columns as the input dataframe and where
interpolation_columnis spaced asinterpolation_stepand all other data is interpolated onto that timeline.
Info
This is a wrapper that just calls resample_dataframe_polars for each group.
def
resample_dataframe_pandas( df: pandas.core.frame.DataFrame, interpolation_column: str, interpolation_step: float, to_log: bool = False) -> pandas.core.frame.DataFrame:
126def resample_dataframe_pandas( 127 df: pd.DataFrame, 128 interpolation_column: str, 129 interpolation_step: float, 130 to_log: bool = False, 131) -> pd.DataFrame: 132 """Resamples a dataframe to obtain data at interpolation points. 133 134 Parameters 135 ---------- 136 df : pd.DataFrame 137 The dataframe to interpolate. 138 interpolation_column : str 139 Which numeric column to use for the interpolation points. 140 interpolation_step : float 141 Steps for the newly create interpolation points 142 to_log : bool 143 Whether or not to show additional logging info. 144 145 Returns 146 ------- 147 pd.DataFrame 148 A dataframe with the same columns as the input dataframe and where 149 `interpolation_column` is spaced as `interpolation_step` and all other 150 data is interpolated onto that timeline. 151 152 Info 153 ------- 154 This is a wrapper that just calls `resample_dataframe_polars`. 155 """ 156 return resample_dataframe_polars( 157 df=pl.DataFrame(df), 158 interpolation_column=interpolation_column, 159 interpolation_step=interpolation_step, 160 to_log=to_log, 161 ).to_pandas()
Resamples a dataframe to obtain data at interpolation points.
Parameters
- df (pd.DataFrame): The dataframe to interpolate.
- interpolation_column (str): Which numeric column to use for the interpolation points.
- interpolation_step (float): Steps for the newly create interpolation points
- to_log (bool): Whether or not to show additional logging info.
Returns
- pd.DataFrame: A dataframe with the same columns as the input dataframe and where
interpolation_columnis spaced asinterpolation_stepand all other data is interpolated onto that timeline.
Info
This is a wrapper that just calls resample_dataframe_polars.
def
resample_dataframe_grouped_pandas( df: pandas.core.frame.DataFrame, interpolation_column: str, interpolation_step: float, group_column: str, to_log: bool = False) -> pandas.core.frame.DataFrame:
164def resample_dataframe_grouped_pandas( 165 df: pd.DataFrame, 166 interpolation_column: str, 167 interpolation_step: float, 168 group_column: str, 169 to_log: bool = False, 170) -> pd.DataFrame: 171 """Groupwise resamples a dataframe to obtain data at interpolation points. 172 173 Parameters 174 ---------- 175 df : pd.DataFrame 176 The dataframe to interpolate. 177 interpolation_column : str 178 Which numeric column to use for the interpolation points. 179 interpolation_step : float 180 Steps for the newly create interpolation points 181 group_column : str 182 The column over which to group 183 to_log : bool 184 Whether or not to show additional logging info. 185 186 Returns 187 ------- 188 pd.DataFrame 189 A dataframe with the same columns as the input dataframe and where 190 `interpolation_column` is spaced as `interpolation_step` and all other 191 data is interpolated onto that timeline. 192 193 Info 194 ------- 195 This is a wrapper that just calls `resample_dataframe_grouped_polars`. 196 197 """ 198 return resample_dataframe_grouped_polars( 199 df=pl.DataFrame(df), 200 interpolation_column=interpolation_column, 201 interpolation_step=interpolation_step, 202 group_column=group_column, 203 to_log=to_log, 204 ).to_pandas()
Groupwise resamples a dataframe to obtain data at interpolation points.
Parameters
- df (pd.DataFrame): The dataframe to interpolate.
- interpolation_column (str): Which numeric column to use for the interpolation points.
- interpolation_step (float): Steps for the newly create interpolation points
- group_column (str): The column over which to group
- to_log (bool): Whether or not to show additional logging info.
Returns
- pd.DataFrame: A dataframe with the same columns as the input dataframe and where
interpolation_columnis spaced asinterpolation_stepand all other data is interpolated onto that timeline.
Info
This is a wrapper that just calls resample_dataframe_grouped_polars.