Features
py3r.behaviour.features.features.Features ¶
Features(tracking: Tracking)
generates features from a pre-processed Tracking object
save ¶
save(
dirpath: str,
*,
data_format: str = "parquet",
overwrite: bool = False,
) -> None
Save this Features object (and its nested Tracking) to a self-describing directory.
Examples:
>>> import tempfile, os
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # add a trivial feature so data is not empty
>>> s = pd.Series(range(len(t.data)), index=t.data.index)
>>> f.store(s, 'counter', meta={})
>>> with tempfile.TemporaryDirectory() as d:
... f.save(d, data_format='csv', overwrite=True)
... os.path.exists(os.path.join(d, 'manifest.json'))
True
load
classmethod
¶
load(dirpath: str) -> Features
Load a Features object previously saved with save().
Examples:
>>> import tempfile, os
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter', meta={})
>>> with tempfile.TemporaryDirectory() as d:
... f.save(d, data_format='csv', overwrite=True)
... f2 = Features.load(d)
>>> isinstance(f2, Features) and 'counter' in f2.data.columns
True
copy ¶
copy() -> Features
Creates an independent copy of this Features object.
The returned object shares no mutable state with the original: Tracking is copied via Tracking.copy(), the features DataFrame via DataFrame.copy(), and meta/tags via deepcopy.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> f_copy = f.copy()
>>> f_copy.handle == f.handle
True
>>> f_copy.tracking.data is not f.tracking.data
True
concat
classmethod
¶
concat(
features_list: list[Features],
*,
handle: str | None = None,
reindex: Literal[
"rezero", "follow_previous", "keep_original"
] = "follow_previous",
) -> Features
Concatenate multiple Features objects along the time (frame) axis.
This method concatenates both the underlying Tracking data and the computed features DataFrame. All Features objects must have: - Matching fps (in underlying Tracking) - Identical tracking column names - Identical feature column names
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
|
list[Features]
|
List of Features objects to concatenate, in temporal order. |
required |
|
str
|
Handle for the concatenated object. If None, uses first object's handle. |
None
|
|
('rezero', 'follow_previous', 'keep_original')
|
How to handle frame indices: - "rezero": Reindex all frames starting from 0 (0, 1, 2, ...). - "follow_previous": Each chunk continues from where the previous ended. If chunk 1 ends at frame n, chunk 2 starts at n+1. - "keep_original": Leave indices untouched; duplicates are allowed. |
"rezero"
|
Returns:
| Type | Description |
|---|---|
Features
|
A new Features object containing all frames from input objects. |
Raises:
| Type | Description |
|---|---|
ValueError
|
If features_list is empty, fps values don't match, or columns differ. |
Notes
For context-dependent features (normalization, embeddings with temporal windows, etc.), consider whether you need to recompute features on concatenated Tracking data rather than concatenating pre-computed features.
Examples:
Concatenate two features objects:
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t1 = Tracking.from_dlc(str(p), handle='ex1', fps=30)
... t2 = Tracking.from_dlc(str(p), handle='ex2', fps=30)
>>> f1, f2 = Features(t1), Features(t2)
>>> # Add a simple feature to both
>>> f1.store(pd.Series([1,2,3,4,5], index=t1.data.index), 'val', meta={})
>>> f2.store(pd.Series([6,7,8,9,10], index=t2.data.index), 'val', meta={})
>>> combined = Features.concat([f1, f2], handle='combined')
>>> len(combined.data) == len(f1.data) + len(f2.data)
True
>>> list(combined.data['val'])
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Verify tracking is also concatenated:
>>> len(combined.tracking.data) == len(t1.data) + len(t2.data)
True
Concatenation metadata is recorded:
>>> 'concat' in combined.meta
True
>>> combined.meta['concat']['n_chunks']
2
distance_between ¶
distance_between(
point1: str, point2: str, dims=("x", "y")
) -> FeaturesResult
returns distance from point1 to point2
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> res = f.distance_between('p1','p2')
>>> isinstance(res, pd.Series) and len(res) == len(t.data)
True
within_distance ¶
within_distance(
point1: str,
point2: str,
distance: float,
dims=("x", "y"),
) -> FeaturesResult
returns True for frames where point1 is within specified distance of point2 NA is propagated where inputs are missing (pd.NA).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> res = f.within_distance('p1','p2', distance=15.0)
>>> bool((isinstance(res, pd.Series) and res.notna().any()))
True
get_point_median ¶
get_point_median(point: str, dims=('x', 'y')) -> tuple
Return the per-dimension median coordinate for a tracked point.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> med = f.get_point_median('p1', dims=('x','y'))
>>> isinstance(med, tuple) and len(med) == 2
True
define_static_boundary ¶
define_static_boundary(
points: list[str],
*,
dims: tuple[str, str] = ("x", "y"),
anchor: str | list[str] | None = None,
scale_dim1: float = 1.0,
scale_dim2: float = 1.0,
name: str | None = None,
overwrite: bool = False,
) -> StaticBoundary
Define a static boundary from point medians and optional scaling.
Scaling is applied independently in each selected dimension about anchor.
define_dynamic_boundary ¶
define_dynamic_boundary(
points: list[str],
*,
dims: tuple[str, str] = ("x", "y"),
anchor: str | list[str] | None = None,
scale_dim1: float = 1.0,
scale_dim2: float = 1.0,
name: str | None = None,
overwrite: bool = False,
) -> DynamicBoundary
Define a dynamic boundary from ordered point names and optional scaling.
import_static_boundary ¶
import_static_boundary(
vertices: list[tuple[float, float]],
*,
dims: tuple[str, str] = ("x", "y"),
name: str | None = None,
overwrite: bool = False,
) -> StaticBoundary
Escape hatch: import a precomputed static polygon in selected dims.
get_boundary ¶
get_boundary(name: str) -> StaticBoundary | DynamicBoundary
Draft accessor for named boundary assets.
list_boundaries ¶
list_boundaries() -> pd.DataFrame
Return a compact table of named boundaries on this Features object.
within_boundary ¶
within_boundary(point: str, boundary) -> FeaturesResult
Main boundary inclusion API.
Accepts a StaticBoundary or DynamicBoundary (or a stored boundary name).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> b = f.define_dynamic_boundary(['p1','p2','p3'], name='tri')
>>> mask = f.within_boundary('p1', b)
>>> bool(isinstance(mask, pd.Series))
True
>>> mask2 = f.within_boundary('p1', 'tri')
>>> bool(isinstance(mask2, pd.Series))
True
distance_to_boundary ¶
distance_to_boundary(
point: str,
boundary: str | DynamicBoundary | StaticBoundary,
) -> FeaturesResult
Main boundary distance API.
Accepts a StaticBoundary or DynamicBoundary (or a stored boundary name).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> b = f.define_static_boundary(['p1','p2','p3'], name='tri')
>>> d = f.distance_to_boundary('p1', b)
>>> bool(isinstance(d, pd.Series))
True
>>> d2 = f.distance_to_boundary('p1', 'tri')
>>> bool(isinstance(d2, pd.Series))
True
area_of_boundary ¶
area_of_boundary(
boundary: str | StaticBoundary | DynamicBoundary,
**kwargs,
) -> FeaturesResult
Return boundary area as a FeaturesResult.
Accepts a StaticBoundary or DynamicBoundary (or a stored boundary name).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> b = f.define_dynamic_boundary(['p1','p2','p3'], name='tri')
>>> a = f.area_of_boundary(b)
>>> bool(isinstance(a, pd.Series))
True
>>> a2 = f.area_of_boundary('tri')
>>> bool(isinstance(a2, pd.Series))
True
acceleration ¶
acceleration(point: str, dims=('x', 'y')) -> FeaturesResult
returns acceleration of point from previous frame to current frame, for each frame
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> acc = f.acceleration('p1')
>>> isinstance(acc, pd.Series) and len(acc) == len(t.data)
True
azimuth ¶
azimuth(point1: str, point2: str) -> FeaturesResult
returns azimuth in radians from tracked point1 to tracked point2 for each frame in the data, relative to the direction of the x-axis
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> ang = f.azimuth('p1','p2')
>>> isinstance(ang, pd.Series) and len(ang) == len(t.data)
True
azimuth_deviation ¶
azimuth_deviation(
basepoint: str,
pointdirection1: str,
pointdirection2: str,
) -> FeaturesResult
Compute the signed angular deviation (radians) between two directions from a common basepoint for each frame.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> dev = f.azimuth_deviation('p1','p2','p3')
>>> bool((isinstance(dev, pd.Series) and len(dev) == len(t.data)))
True
within_azimuth_deviation ¶
within_azimuth_deviation(
basepoint: str,
pointdirection1: str,
pointdirection2: str,
deviation: float,
) -> FeaturesResult
Return True for frames where the angular deviation between two rays from basepoint is <= deviation (radians). NA is propagated where inputs are missing (pd.NA).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> mask = f.within_azimuth_deviation('p1','p2','p3', deviation=1.0)
>>> bool((isinstance(mask, pd.Series) and mask.notna().any()))
True
speed ¶
speed(point: str, dims=('x', 'y')) -> FeaturesResult
returns average speed of point from previous frame to current frame, for each frame
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> sp = f.speed('p1')
>>> isinstance(sp, pd.Series) and len(sp) == len(t.data)
True
above_speed ¶
above_speed(
point: str, speed: float, dims=("x", "y")
) -> FeaturesResult
Return True for frames where the point's speed is >= threshold. NA is propagated where inputs are missing (pd.NA).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.above_speed('p1', speed=0.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True
all_above_speed ¶
all_above_speed(
points: list, speed: float, dims=("x", "y")
) -> FeaturesResult
Return True for frames where all listed points are moving at least at the threshold speed. NA is propagated: if any input is NA at a frame, result is NA.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.all_above_speed(['p1','p2'], speed=0.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True
below_speed ¶
below_speed(
point: str, speed: float, dims=("x", "y")
) -> FeaturesResult
Return True for frames where the point's speed is < threshold. NA is propagated where inputs are missing (pd.NA).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.below_speed('p1', speed=9999.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True
all_below_speed ¶
all_below_speed(
points: list, speed: float, dims=("x", "y")
) -> FeaturesResult
Return True for frames where all listed points are moving slower than the threshold speed. NA is propagated: if any input is NA at a frame, result is NA.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.all_below_speed(['p1','p2'], speed=9999.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True
distance_change ¶
distance_change(
point: str, dims=("x", "y")
) -> FeaturesResult
Return unsigned distance moved by point from previous to current frame, per frame.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> dc = f.distance_change('p1')
>>> isinstance(dc, pd.Series) and len(dc) == len(t.data)
True
compose_state_from_booleans ¶
compose_state_from_booleans(
sources: dict[str, str | Series],
*,
priority: list[str] | None = None,
none_label: str = "none",
) -> FeaturesResult
Compose a categorical state series from labeled boolean sources.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
|
dict[str, str | Series]
|
Mapping |
required |
|
list[str] | None
|
Optional label precedence when multiple sources are True in the same frame. Labels not listed are appended in insertion order. |
None
|
|
str
|
Label used when no source is True at a frame. |
'none'
|
Examples:
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> idx = t.data.index
>>> f.store(pd.Series([True, False, True, False, True], index=idx).reindex(idx,
... fill_value=False),
... 'in_corner', meta={})
>>> f.store(pd.Series([False, True, True, False, True], index=idx).reindex(idx,
... fill_value=False),
... 'in_food', meta={})
>>> state = f.compose_state_from_booleans(
... {"corner": "in_corner", "food": "in_food"},
... priority=["food", "corner"],
... )
>>> isinstance(state, pd.Series)
True
>>> set(state.dropna().unique()) >= {'corner', 'food', 'none'}
True
store ¶
store(
feature: Series,
name: str,
overwrite: bool = False,
meta: dict | None = None,
) -> None
Store calculated feature with name and associated freeform metadata.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> s = pd.Series(range(len(t.data)), index=t.data.index)
>>> f.store(s, 'counter', meta={'unit':'frames'})
>>> 'counter' in f.data.columns and f.meta['counter']['unit'] == 'frames'
True
classify ¶
classify(classifier: BaseClassifier, **kwargs)
Classify behaviour using a classifier with inputs from this Features object. Returns a FeaturesResult. Classifier output must be a pd.Series with same index.
smooth ¶
smooth(
name: str,
method: str,
window: int,
inplace: bool = False,
**method_kwargs,
) -> pd.Series
Smooth feature with method over rolling window. If inplace=True, feature and metadata are updated in place. method: 'median' : median in window (numerical) 'mean' : mean in window (numerical) 'savgol' : Savitzky–Golay (SciPy). Kwargs e.g. polyorder=3, mode='interp'. 'mode' : mode in window (numerical or non-numerical) 'block' : removes labels that occur in blocks of less than length window and replaces them with value from previous block unless there is no previous block, in which case replaced from next block after smoothing note: all nan values will be filled using this method (dangerous!)
embedding_df ¶
embedding_df(embedding: dict[str, list[int]])
generate a time series embedding dataframe with specified time shifts for each column, where embedding is a dict mapping column names to lists of shifts positive shift: value from the future (t+n) negative shift: value from the past (t-n)
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # prepare a simple feature to embed
>>> s = pd.Series(range(len(t.data)), index=t.data.index)
>>> f.store(s, 'counter', meta={})
>>> emb = f.embedding_df({'counter':[0,1,-1]})
>>> list(emb.columns)
['counter_t0', 'counter_t+1', 'counter_t-1']
cluster_embedding ¶
cluster_embedding(
embedding_dict: dict[str, list[int]],
n_clusters: int,
random_state: int = 0,
*,
normalize: bool = False,
feature_weights: dict[str, float] | None = None,
lowmem: bool = False,
decimation_factor: int = 10,
missing_policy: Literal[
"drop", "impute_weight"
] = "drop",
auto_normalize: bool = False,
rescale_factors: dict | None = None,
custom_scaling: dict[str, dict] | None = None,
)
Perform k-means clustering on a single Features object.
Delegates to FeaturesCollection.cluster_embedding.
See that method for full parameter documentation.
Returns:
| Type | Description |
|---|---|
(FeaturesResult, centroids DataFrame, scaling_factors or None)
|
|
Examples:
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter')
>>> result, centroids, norm = f.cluster_embedding({'counter': [0]}, n_clusters=2)
>>> isinstance(centroids, pd.DataFrame)
True
>>> len(result) == len(f.data)
True
cluster_embedding_stream ¶
cluster_embedding_stream(
embedding_dict: dict[str, list[int]],
n_clusters: int,
random_state: int = 0,
*,
normalize: bool = False,
feature_weights: dict[str, float] | None = None,
missing_policy: Literal[
"drop", "impute_weight"
] = "drop",
chunk_size: int = 10000,
n_epochs: int = 3,
batch_size: int = 1024,
)
Memory-friendly clustering on a single Features object.
Delegates to FeaturesCollection.cluster_embedding_stream.
See that method for full parameter documentation.
Returns:
| Type | Description |
|---|---|
(FeaturesResult, centroids DataFrame, scaling_factors or None)
|
|
Examples:
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter')
>>> result, centroids, norm = f.cluster_embedding_stream(
... {'counter': [0]}, n_clusters=2)
>>> isinstance(centroids, pd.DataFrame)
True
>>> len(result) == len(f.data)
True
assign_clusters_by_centroids ¶
assign_clusters_by_centroids(
embedding: dict[str, list[int]],
centroids_df: DataFrame,
*,
scaling_factors: dict[str, float] | None = None,
impute_medians: Series | None = None,
rescale_factors: dict | None = None,
custom_scaling: dict[str, dict] | None = None,
) -> FeaturesResult
Assign cluster labels to this Features object using pre-fitted centroids.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
|
dict[str, list[int]]
|
Same embedding dict used during fitting. |
required |
|
DataFrame
|
(n_clusters, n_features) DataFrame of cluster centres. |
required |
|
dict[str, float] | None
|
Per-embedding-column multipliers (the "dumb" scalars returned by
|
None
|
|
Series | None
|
Per-column fill values for NaN imputation (from training). |
None
|
Returns:
| Type | Description |
|---|---|
FeaturesResult
|
Series of cluster IDs (0 .. n_clusters-1). |
Examples:
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # add a simple feature to embed
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter', meta={})
>>> emb = {'counter':[0, 1]}
>>> df = f.embedding_df(emb)
>>> # make 2 simple centroids matching columns
>>> cents = pd.DataFrame([[0, 0], [1, 1]], columns=df.columns)
>>> labels = f.assign_clusters_by_centroids(emb, cents)
>>> isinstance(labels, pd.Series) and len(labels) == len(t.data)
True
train_knn_regressor ¶
train_knn_regressor(
*,
source_embedding: dict[str, list[int]],
target_embedding: dict[str, list[int]],
n_neighbors: int = 5,
normalize_source: bool = False,
**kwargs,
)
Developer mode: not available in public release yet.
Train a KNN regressor to predict target from source embedding on this object. If normalize_source is True, normalize source and return rescale factors. Returns (model, input_cols, target_cols[, rescale_factors]).
predict_knn ¶
predict_knn(
model: KNeighborsRegressor,
source_embedding: dict[str, list[int]],
target_embedding: dict[str, list[int]],
rescale_factors: dict = None,
) -> pd.DataFrame
Developer mode: not available in public release yet.
Predict using a trained KNN regressor on this Features object. If rescale_factors is provided, normalize the source embedding before prediction. The prediction will match the shape and columns of self.embedding_df(target_embedding).
rms_error_between_embeddings
staticmethod
¶
rms_error_between_embeddings(
ground_truth: DataFrame,
prediction: DataFrame,
rescale: dict | str = None,
) -> pd.Series
Developer mode: not available in public release yet.
Compute RMS for each row between two embedding DataFrames. If rescale is a dict, normalize both with it before computing error. If rescale == 'auto', compute factors from ground_truth and apply to both. Returns Series indexed like inputs; NaN where either input has NaNs.
define_elliptical_boundary_from_params ¶
define_elliptical_boundary_from_params(
centre: str | list[str],
major_axis_length: float,
minor_axis_length: float,
angle_in_radians: float = 0.0,
n_points: int = 100,
) -> list[tuple[float, float]]
Generate a polygonal approximation of an ellipse as a list of (x, y) tuples,
around centre using explicit parameters.
centre can be a point name or list of point names (then centre = mean of medians).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> poly = f.define_elliptical_boundary_from_params(
... 'p1', major_axis_length=10, minor_axis_length=6,
... angle_in_radians=0.0, n_points=32)
>>> isinstance(poly, list) and len(poly) == 32
True
define_elliptical_boundary_from_points ¶
define_elliptical_boundary_from_points(
points: list[str],
n_points: int = 100,
scaling: float = 1.0,
smallness_weight: float = 0.1,
) -> list[tuple[float, float]]
Fit an ellipse to the median coordinates of the given tracked points (at least 4)
and return a polygonal approximation. After fitting, the ellipse is scaled by scaling.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # Use exactly 4 points to avoid requiring skimage in tests
>>> poly = f.define_elliptical_boundary_from_points(
... ['p1','p3','p2','p3'], n_points=20, scaling=1.0)
>>> isinstance(poly, list) and len(poly) == 20
True