Skip to content

Features

py3r.behaviour.features.features.Features

Features(tracking: Tracking)

generates features from a pre-processed Tracking object

tracking instance-attribute

tracking = tracking

data instance-attribute

data = DataFrame()

meta instance-attribute

meta = dict()

handle instance-attribute

handle = handle

tags instance-attribute

tags = tags

loc property

loc

iloc property

iloc

save

save(
    dirpath: str,
    *,
    data_format: str = "parquet",
    overwrite: bool = False,
) -> None

Save this Features object (and its nested Tracking) to a self-describing directory.

Examples
>>> import tempfile, os
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # add a trivial feature so data is not empty
>>> s = pd.Series(range(len(t.data)), index=t.data.index)
>>> f.store(s, 'counter', meta={})
>>> with tempfile.TemporaryDirectory() as d:
...     f.save(d, data_format='csv', overwrite=True)
...     os.path.exists(os.path.join(d, 'manifest.json'))
True

load classmethod

load(dirpath: str) -> Features

Load a Features object previously saved with save().

Examples
>>> import tempfile, os
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter', meta={})
>>> with tempfile.TemporaryDirectory() as d:
...     f.save(d, data_format='csv', overwrite=True)
...     f2 = Features.load(d)
>>> isinstance(f2, Features) and 'counter' in f2.data.columns
True

copy

copy() -> Features

Creates an independent copy of this Features object.

The returned object shares no mutable state with the original: Tracking is copied via Tracking.copy(), the features DataFrame via DataFrame.copy(), and meta/tags via deepcopy.

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> f_copy = f.copy()
>>> f_copy.handle == f.handle
True
>>> f_copy.tracking.data is not f.tracking.data
True

coarse_grain

coarse_grain(
    window: int,
    method: Literal[
        "mean", "median", "min", "max"
    ] = "mean",
    non_numeric: Literal[
        "drop", "nan", "first", "mode", "error"
    ] = "drop",
    keep_assets: bool = True,
) -> Self

Coarse-grain feature data over fixed, non-overlapping windows.

Applies the same aggregation to both Features.data and the backing Tracking object so row counts and index alignment remain consistent. fps is divided by window to reflect the new effective frame rate. A "coarse_grain" entry is appended to meta["transforms"].

Parameters

window : int Number of consecutive rows to collapse into one. method : {"mean", "median", "min", "max"}, default "mean" Aggregation applied to numeric feature columns within each window. non_numeric : {"drop", "nan", "first", "mode", "error"}, default "drop" How to handle non-numeric feature columns (e.g. string state labels). Pass "mode" to keep the most-frequent value per window, which is appropriate for categorical columns. keep_assets : bool, default True If True, assets (e.g. boundary objects) are deep-copied to the result. Set to False to avoid copying large assets when they are not needed at the coarser scale.

Returns

Features New Features (or subclass) object with len(data) // window rows and reduced fps.

Examples
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> vals = pd.Series(range(len(t.data)), index=t.data.index, dtype=float)
>>> f.store(vals, 'counter', meta={})
>>> len(f.data), f.tracking.meta['fps']
(5, 30.0)

Coarse-graining by 2 halves the row count and fps for both feature data and the backing Tracking:

>>> f2 = f.coarse_grain(2)
>>> len(f2.data)
3
>>> f2.tracking.meta['fps']
15.0
>>> f2.handle
'ex'

The 5-row input produces 3 windows: two complete (rows 0–1, rows 2–3) and one partial (row 4 alone). Incomplete trailing windows are retained — the single-row window aggregates to the row's own value:

>>> list(f2.data['counter'])
[0.5, 2.5, 4.0]

The backing Tracking is coarse-grained in sync — row counts match:

>>> len(f2.tracking.data) == len(f2.data)
True

Categorical columns are preserved with non_numeric='mode':

>>> labels = pd.Series(['A','A','B','B','A'], index=t.data.index)
>>> f.store(labels, 'state', meta={})
>>> f_mode = f.coarse_grain(2, non_numeric='mode')
>>> list(f_mode.data['state'])
['A', 'B', 'A']

The transform is recorded in meta:

>>> f2.meta['transforms'][-1]
{'type': 'coarse_grain', 'window': 2, 'method': 'mean'}

to_summary

to_summary() -> Summary

Create a Summary object from this Features object.

This is a convenience wrapper around Summary(self).

Returns

Summary A new summary object linked to this features object.

Examples

```pycon >>> from py3r.behaviour.util.docdata import data_path >>> from py3r.behaviour.tracking.tracking import Tracking >>> from py3r.behaviour.features.features import Features >>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p: ... t = Tracking.from_dlc(str(p), handle='demo', fps=30) >>> f = Features(t) >>> s = f.to_summary() >>> from py3r.behaviour.summary.summary import Summary >>> isinstance(s, Summary) True >>> s.handle 'demo'

```

concat classmethod

concat(
    features_list: list[Features],
    *,
    handle: str | None = None,
    reindex: Literal[
        "rezero", "follow_previous", "keep_original"
    ] = "follow_previous",
) -> Features

Concatenate multiple Features objects along the time (frame) axis.

This method concatenates both the underlying Tracking data and the computed features DataFrame. All Features objects must have: - Matching fps (in underlying Tracking) - Identical tracking column names - Identical feature column names

Parameters

features_list : list[Features] List of Features objects to concatenate, in temporal order. handle : str, optional Handle for the concatenated object. If None, uses first object's handle. reindex : {"rezero", "follow_previous", "keep_original"}, default "follow_previous" How to handle frame indices: - "rezero": Reindex all frames starting from 0 (0, 1, 2, ...). - "follow_previous": Each chunk continues from where the previous ended. If chunk 1 ends at frame n, chunk 2 starts at n+1. - "keep_original": Leave indices untouched; duplicates are allowed.

Returns

Features A new Features object containing all frames from input objects.

Raises

ValueError If features_list is empty, fps values don't match, or columns differ.

Notes

For context-dependent features (normalization, embeddings with temporal windows, etc.), consider whether you need to recompute features on concatenated Tracking data rather than concatenating pre-computed features.

Examples

Concatenate two features objects:

>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t1 = Tracking.from_dlc(str(p), handle='ex1', fps=30)
...     t2 = Tracking.from_dlc(str(p), handle='ex2', fps=30)
>>> f1, f2 = Features(t1), Features(t2)
>>> # Add a simple feature to both
>>> f1.store(pd.Series([1,2,3,4,5], index=t1.data.index), 'val', meta={})
>>> f2.store(pd.Series([6,7,8,9,10], index=t2.data.index), 'val', meta={})
>>> combined = Features.concat([f1, f2], handle='combined')
>>> len(combined.data) == len(f1.data) + len(f2.data)
True
>>> list(combined.data['val'])
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

Verify tracking is also concatenated:

>>> len(combined.tracking.data) == len(t1.data) + len(t2.data)
True

Concatenation metadata is recorded:

>>> 'concat' in combined.meta
True
>>> combined.meta['concat']['n_chunks']
2

distance_between

distance_between(
    point1: str, point2: str, dims=("x", "y")
) -> FeaturesResult

returns distance from point1 to point2

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> res = f.distance_between('p1','p2')
>>> isinstance(res, pd.Series) and len(res) == len(t.data)
True

within_distance

within_distance(
    point1: str,
    point2: str,
    distance: float,
    dims=("x", "y"),
) -> FeaturesResult

returns True for frames where point1 is within specified distance of point2 NA is propagated where inputs are missing (pd.NA).

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> res = f.within_distance('p1','p2', distance=15.0)
>>> bool((isinstance(res, pd.Series) and res.notna().any()))
True

get_point_median

get_point_median(point: str, dims=('x', 'y')) -> tuple

Return the per-dimension median coordinate for a tracked point.

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> med = f.get_point_median('p1', dims=('x','y'))
>>> isinstance(med, tuple) and len(med) == 2
True

define_static_boundary

define_static_boundary(
    points: list[str],
    *,
    dims: tuple[str, str] = ("x", "y"),
    anchor: str | list[str] | None = None,
    scale_dim1: float = 1.0,
    scale_dim2: float = 1.0,
    name: str | None = None,
    overwrite: bool = False,
) -> StaticBoundary

Define a static boundary from point medians and optional scaling.

Scaling is applied independently in each selected dimension about anchor.

define_dynamic_boundary

define_dynamic_boundary(
    points: list[str],
    *,
    dims: tuple[str, str] = ("x", "y"),
    anchor: str | list[str] | None = None,
    scale_dim1: float = 1.0,
    scale_dim2: float = 1.0,
    name: str | None = None,
    overwrite: bool = False,
) -> DynamicBoundary

Define a dynamic boundary from ordered point names and optional scaling.

import_static_boundary

import_static_boundary(
    vertices: list[tuple[float, float]],
    *,
    dims: tuple[str, str] = ("x", "y"),
    name: str | None = None,
    overwrite: bool = False,
) -> StaticBoundary

Escape hatch: import a precomputed static polygon in selected dims.

get_boundary

get_boundary(name: str) -> StaticBoundary | DynamicBoundary

Draft accessor for named boundary assets.

list_boundaries

list_boundaries() -> pd.DataFrame

Return a compact table of named boundaries on this Features object.

within_boundary

within_boundary(point: str, boundary) -> FeaturesResult

Main boundary inclusion API.

Accepts a StaticBoundary or DynamicBoundary (or a stored boundary name).

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> b = f.define_dynamic_boundary(['p1','p2','p3'], name='tri')
>>> mask = f.within_boundary('p1', b)
>>> bool(isinstance(mask, pd.Series))
True
>>> mask2 = f.within_boundary('p1', 'tri')
>>> bool(isinstance(mask2, pd.Series))
True

distance_to_boundary

distance_to_boundary(
    point: str,
    boundary: str | DynamicBoundary | StaticBoundary,
) -> FeaturesResult

Main boundary distance API.

Accepts a StaticBoundary or DynamicBoundary (or a stored boundary name).

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> b = f.define_static_boundary(['p1','p2','p3'], name='tri')
>>> d = f.distance_to_boundary('p1', b)
>>> bool(isinstance(d, pd.Series))
True
>>> d2 = f.distance_to_boundary('p1', 'tri')
>>> bool(isinstance(d2, pd.Series))
True

area_of_boundary

area_of_boundary(
    boundary: str | StaticBoundary | DynamicBoundary,
    **kwargs,
) -> FeaturesResult

Return boundary area as a FeaturesResult.

Accepts a StaticBoundary or DynamicBoundary (or a stored boundary name).

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> b = f.define_dynamic_boundary(['p1','p2','p3'], name='tri')
>>> a = f.area_of_boundary(b)
>>> bool(isinstance(a, pd.Series))
True
>>> a2 = f.area_of_boundary('tri')
>>> bool(isinstance(a2, pd.Series))
True

acceleration

acceleration(point: str, dims=('x', 'y')) -> FeaturesResult

returns acceleration of point from previous frame to current frame, for each frame

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> acc = f.acceleration('p1')
>>> isinstance(acc, pd.Series) and len(acc) == len(t.data)
True

azimuth

azimuth(point1: str, point2: str) -> FeaturesResult

returns azimuth in radians from tracked point1 to tracked point2 for each frame in the data, relative to the direction of the x-axis

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> ang = f.azimuth('p1','p2')
>>> isinstance(ang, pd.Series) and len(ang) == len(t.data)
True

azimuth_deviation

azimuth_deviation(
    basepoint: str,
    pointdirection1: str,
    pointdirection2: str,
) -> FeaturesResult

Compute the signed angular deviation (radians) between two directions from a common basepoint for each frame.

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> dev = f.azimuth_deviation('p1','p2','p3')
>>> bool((isinstance(dev, pd.Series) and len(dev) == len(t.data)))
True

within_azimuth_deviation

within_azimuth_deviation(
    basepoint: str,
    pointdirection1: str,
    pointdirection2: str,
    deviation: float,
) -> FeaturesResult

Return True for frames where the angular deviation between two rays from basepoint is <= deviation (radians). NA is propagated where inputs are missing (pd.NA).

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> mask = f.within_azimuth_deviation('p1','p2','p3', deviation=1.0)
>>> bool((isinstance(mask, pd.Series) and mask.notna().any()))
True

speed

speed(point: str, dims=('x', 'y')) -> FeaturesResult

returns average speed of point from previous frame to current frame, for each frame

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> sp = f.speed('p1')
>>> isinstance(sp, pd.Series) and len(sp) == len(t.data)
True

above_speed

above_speed(
    point: str, speed: float, dims=("x", "y")
) -> FeaturesResult

Return True for frames where the point's speed is >= threshold. NA is propagated where inputs are missing (pd.NA).

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.above_speed('p1', speed=0.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True

all_above_speed

all_above_speed(
    points: list, speed: float, dims=("x", "y")
) -> FeaturesResult

Return True for frames where all listed points are moving at least at the threshold speed. NA is propagated: if any input is NA at a frame, result is NA.

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.all_above_speed(['p1','p2'], speed=0.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True

below_speed

below_speed(
    point: str, speed: float, dims=("x", "y")
) -> FeaturesResult

Return True for frames where the point's speed is < threshold. NA is propagated where inputs are missing (pd.NA).

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.below_speed('p1', speed=9999.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True

all_below_speed

all_below_speed(
    points: list, speed: float, dims=("x", "y")
) -> FeaturesResult

Return True for frames where all listed points are moving slower than the threshold speed. NA is propagated: if any input is NA at a frame, result is NA.

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.all_below_speed(['p1','p2'], speed=9999.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True

distance_change

distance_change(
    point: str, dims=("x", "y")
) -> FeaturesResult

Return unsigned distance moved by point from previous to current frame, per frame.

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> dc = f.distance_change('p1')
>>> isinstance(dc, pd.Series) and len(dc) == len(t.data)
True

compose_state_from_booleans

compose_state_from_booleans(
    sources: dict[str, str | Series],
    *,
    priority: list[str] | None = None,
    none_label: str = "none",
) -> FeaturesResult

Compose a categorical state series from labeled boolean sources.

Parameters

sources: Mapping {state_label: source}, where source is either: - a column name in self.data containing a boolean series, or - a boolean pandas Series aligned/reindexable to self.data.index (e.g. a FeaturesResult) priority: Optional label precedence when multiple sources are True in the same frame. Labels not listed are appended in insertion order. none_label: Label used when no source is True at a frame.

Examples
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> idx = t.data.index
>>> f.store(pd.Series([True, False, True, False, True], index=idx).reindex(idx,
...         fill_value=False),
...         'in_corner', meta={})
>>> f.store(pd.Series([False, True, True, False, True], index=idx).reindex(idx,
...         fill_value=False),
...         'in_food', meta={})
>>> state = f.compose_state_from_booleans(
...     {"corner": "in_corner", "food": "in_food"},
...     priority=["food", "corner"],
... )
>>> isinstance(state, pd.Series)
True
>>> set(state.dropna().unique()) >= {'corner', 'food', 'none'}
True

store

store(
    feature: Series,
    name: str,
    overwrite: bool = False,
    meta: dict | None = None,
) -> None

Store calculated feature with name and associated freeform metadata.

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> s = pd.Series(range(len(t.data)), index=t.data.index)
>>> f.store(s, 'counter', meta={'unit':'frames'})
>>> 'counter' in f.data.columns and f.meta['counter']['unit'] == 'frames'
True

classify

classify(classifier: BaseClassifier, **kwargs)

Classify behaviour using a classifier with inputs from this Features object. Returns a FeaturesResult. Classifier output must be a pd.Series with same index.

smooth

smooth(
    name: str,
    method: str,
    window: int,
    inplace: bool = False,
    **method_kwargs,
) -> pd.Series

Smooth feature with method over rolling window. If inplace=True, feature and metadata are updated in place. method: 'median' : median in window (numerical) 'mean' : mean in window (numerical) 'savgol' : Savitzky–Golay (SciPy). Kwargs e.g. polyorder=3, mode='interp'. 'mode' : mode in window (numerical or non-numerical) 'block' : applies categorical series_utils.block_filter then series_utils.block_fill using window for both min_block and max_gap. Legacy smooth_block behavior is removed from this method; use series_utils.smooth_block directly if required.

embedding_df

embedding_df(embedding: dict[str, list[int]])

Generate a time-series embedding DataFrame with per-column time shifts.

Parameters

embedding : dict[str, list[int]] Mapping of feature column name to a list of integer time shifts. Positive shift pulls the value from the future (t+n); negative shift pulls from the past (t-n); zero is the current frame.

Returns

pd.DataFrame One column per (feature, shift) pair, named <col>_t0, <col>_t+n, or <col>_t-n.

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # prepare a simple feature to embed
>>> s = pd.Series(range(len(t.data)), index=t.data.index)
>>> f.store(s, 'counter', meta={})
>>> emb = f.embedding_df({'counter':[0,1,-1]})
>>> list(emb.columns)
['counter_t0', 'counter_t+1', 'counter_t-1']

cluster_embedding

cluster_embedding(
    embedding_dict: dict[str, list[int]],
    n_clusters: int,
    random_state: int = 0,
    *,
    normalize: bool = False,
    feature_weights: dict[str, float] | None = None,
    lowmem: bool = False,
    decimation_factor: int = 10,
    missing_policy: Literal[
        "drop", "impute_weight"
    ] = "drop",
    auto_normalize: bool = False,
    rescale_factors: dict | None = None,
    custom_scaling: dict[str, dict] | None = None,
)

Perform k-means clustering on a single Features object.

Delegates to FeaturesCollection.cluster_embedding. See that method for full parameter documentation.

Returns

(FeaturesResult, centroids DataFrame, scaling_factors or None)

Examples
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter')
>>> result, centroids, norm = f.cluster_embedding({'counter': [0]}, n_clusters=2)
>>> isinstance(centroids, pd.DataFrame)
True
>>> len(result) == len(f.data)
True

cluster_embedding_stream

cluster_embedding_stream(
    embedding_dict: dict[str, list[int]],
    n_clusters: int,
    random_state: int = 0,
    *,
    normalize: bool = False,
    feature_weights: dict[str, float] | None = None,
    missing_policy: Literal[
        "drop", "impute_weight"
    ] = "drop",
    chunk_size: int = 10000,
    n_epochs: int = 3,
    batch_size: int = 1024,
)

Memory-friendly clustering on a single Features object.

Delegates to FeaturesCollection.cluster_embedding_stream. See that method for full parameter documentation.

Returns

(FeaturesResult, centroids DataFrame, scaling_factors or None)

Examples
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter')
>>> result, centroids, norm = f.cluster_embedding_stream(
...     {'counter': [0]}, n_clusters=2)
>>> isinstance(centroids, pd.DataFrame)
True
>>> len(result) == len(f.data)
True

assign_clusters_by_centroids

assign_clusters_by_centroids(
    embedding: dict[str, list[int]],
    centroids_df: DataFrame,
    *,
    scaling_factors: dict[str, float] | None = None,
    impute_medians: Series | None = None,
    rescale_factors: dict | None = None,
    custom_scaling: dict[str, dict] | None = None,
) -> FeaturesResult

Assign cluster labels to this Features object using pre-fitted centroids.

Parameters

embedding : dict[str, list[int]] Same embedding dict used during fitting. centroids_df : pd.DataFrame (n_clusters, n_features) DataFrame of cluster centres. scaling_factors : dict[str, float] | None Per-embedding-column multipliers (the "dumb" scalars returned by cluster_embedding_stream). Each raw embedding column is multiplied by the corresponding value before distance computation. impute_medians : pd.Series | None Per-column fill values for NaN imputation (from training). Returns


FeaturesResult Series of cluster IDs (0 .. n_clusters-1).

Examples
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # add a simple feature to embed
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter', meta={})
>>> emb = {'counter':[0, 1]}
>>> df = f.embedding_df(emb)
>>> # make 2 simple centroids matching columns
>>> cents = pd.DataFrame([[0, 0], [1, 1]], columns=df.columns)
>>> labels = f.assign_clusters_by_centroids(emb, cents)
>>> isinstance(labels, pd.Series) and len(labels) == len(t.data)
True

train_knn_regressor

train_knn_regressor(
    *,
    source_embedding: dict[str, list[int]],
    target_embedding: dict[str, list[int]],
    n_neighbors: int = 5,
    normalize_source: bool = False,
    **kwargs,
)

Developer mode: not available in public release yet.

Train a KNN regressor to predict target from source embedding on this object. If normalize_source is True, normalize source and return rescale factors. Returns (model, input_cols, target_cols[, rescale_factors]).

predict_knn

predict_knn(
    model: KNeighborsRegressor,
    source_embedding: dict[str, list[int]],
    target_embedding: dict[str, list[int]],
    rescale_factors: dict = None,
) -> pd.DataFrame

Developer mode: not available in public release yet.

Predict using a trained KNN regressor on this Features object. If rescale_factors is provided, normalize the source embedding before prediction. The prediction will match the shape and columns of self.embedding_df(target_embedding).

rms_error_between_embeddings staticmethod

rms_error_between_embeddings(
    ground_truth: DataFrame,
    prediction: DataFrame,
    rescale: dict | str = None,
) -> pd.Series

Developer mode: not available in public release yet.

Compute RMS for each row between two embedding DataFrames. If rescale is a dict, normalize both with it before computing error. If rescale == 'auto', compute factors from ground_truth and apply to both. Returns Series indexed like inputs; NaN where either input has NaNs.

define_elliptical_boundary_from_params

define_elliptical_boundary_from_params(
    centre: str | list[str],
    major_axis_length: float,
    minor_axis_length: float,
    angle_in_radians: float = 0.0,
    n_points: int = 100,
) -> list[tuple[float, float]]

Generate a polygonal approximation of an ellipse as a list of (x, y) tuples, around centre using explicit parameters. centre can be a point name or list of point names (then centre = mean of medians).

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> poly = f.define_elliptical_boundary_from_params(
...     'p1', major_axis_length=10, minor_axis_length=6,
...     angle_in_radians=0.0, n_points=32)
>>> isinstance(poly, list) and len(poly) == 32
True

define_elliptical_boundary_from_points

define_elliptical_boundary_from_points(
    points: list[str],
    n_points: int = 100,
    scaling: float = 1.0,
    smallness_weight: float = 0.1,
) -> list[tuple[float, float]]

Fit an ellipse to the median coordinates of the given tracked points (at least 4) and return a polygonal approximation. After fitting, the ellipse is scaled by scaling.

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # Use exactly 4 points to avoid requiring skimage in tests
>>> poly = f.define_elliptical_boundary_from_points(
...     ['p1','p3','p2','p3'], n_points=20, scaling=1.0)
>>> isinstance(poly, list) and len(poly) == 20
True

animation_stream

animation_stream(
    *,
    points: list[str],
    lines: list[tuple[str, str]] | None = None,
    boundaries: list[str] | None = None,
    features: list[str | None]
    | dict[str | None, str | None]
    | None = None,
    dims: tuple[str, ...] = ("x", "y"),
    view: dict | None = None,
    canvas_size: tuple[int, int] = (800, 800),
    bg_color: tuple[int, int, int] = (0, 0, 0),
    style: dict | None = None,
    pixel_coords: bool = False,
    undo_meta_scaling: bool = False,
) -> AnimationStream

Build an OpenCV-backed animation stream from Features + boundary assets.

This wraps the same renderer used by :meth:Tracking.animation_stream, while additionally resolving named boundaries stored in self._assets. Static and dynamic boundaries are resolved to per-boundary arrays and rendered in boundary order.

Parameters

points : list[str] Point names to render as circles. lines : list[tuple[str, str]] | None Line segments connecting point pairs. boundaries : list[str] | None Boundary names (or refs resolvable by _resolve_boundary_ref) to draw. Order controls draw stacking. features : list[str | None] | dict[str | None, str | None] | None Per-frame scalar feature columns from self.data to render as text overlays. If a list is provided, each column is shown as name: value. If a dict is provided, keys are display labels and values are source column names. None or "" entries insert a blank spacer line. dims : tuple[str, ...], default=("x", "y") Coordinate dimensions. For 3D, use ("x","y","z"). Boundary definitions are interpreted in their native 2D dims and can be projected in 3D via view. view : dict | None 3D view options for projection (azim, elev, proj, camera_distance, focal_length, boundary_z, pad). canvas_size : tuple[int, int], default=(800, 800) Canvas size as (width, height). bg_color : tuple[int, int, int], default=(0, 0, 0) Background color in BGR. style : dict | None Style overrides for points/lines/boundaries. pixel_coords : bool, default=False If True, coordinates are treated as absolute pixel values. undo_meta_scaling : bool, default=False If True, invert tracking meta scaling before rendering.

Returns

AnimationStream Stream object with get_frame(), read(), play(), and save().

Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path("py3r.behaviour.tracking._data", "dlc_single.csv") as p:
...     t = Tracking.from_dlc(str(p), handle="ex", fps=30)
>>> f = Features(t)
>>> f.data["speed"] = [0.0, 1.0, 0.0, 1.0, 0.0]
>>> style = {
...     "points": {
...         "default": {"color": (0, 255, 255), "radius": 3},  # default
...         "p1": {"color": (0, 255, 0), "radius": 5},  # static override
...         "p2": {  # dynamic override (source must be in Features.data)
...             "radius": {"from": "speed", "map": {0.0: 2, 1.0: 6}}
...         },
...     }
... }
>>> stream = f.animation_stream(
...     points=["p1", "p2"],
...     lines=[("p1", "p2")],
...     features={"spd": "speed"},
...     pixel_coords=True,
...     canvas_size=(96, 72),
...     style=style,
... )
>>> stream.frame_count
5
>>> stream.get_frame(1).shape
(72, 96, 3)

boundaries_to_arrays

boundaries_to_arrays(
    boundaries: list[str],
    *,
    dims: tuple[str, ...] = ("x", "y"),
    undo_meta_scaling: bool = False,
) -> list[tuple[str, np.ndarray]]

Resolve named boundary assets into per-boundary arrays.

Parameters

boundaries : list[str] Stored boundary names (or refs accepted by _resolve_boundary_ref). dims : tuple[str, ...], default=("x", "y") Requested coordinate dimensions. Boundary dims must match (dims[0], dims[1]). undo_meta_scaling : bool, default=False If True, invert tracking scaling metadata before resolving dynamic boundary coordinates.

Returns

list[tuple[str, np.ndarray]] Boundary arrays as [(boundary_name, arr), ...] where each arr has shape (n_frames, n_vertices, 2).

Examples
>>> import pandas as pd
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> df = pd.DataFrame(
...     {
...         "a.x": [0.0, 0.0],
...         "a.y": [0.0, 0.0],
...         "b.x": [1.0, 1.0],
...         "b.y": [0.0, 0.0],
...         "c.x": [1.0, 1.0],
...         "c.y": [1.0, 1.0],
...     }
... )
>>> f = Features(Tracking(df, meta={"fps": 30.0}, handle="demo"))
>>> _ = f.define_static_boundary(["a", "b", "c"], name="tri")
>>> arrays = f.boundaries_to_arrays(["tri"])
>>> arrays[0][1].shape
(2, 3, 2)