Features
py3r.behaviour.features.features.Features ¶
Features(tracking: Tracking)
generates features from a pre-processed Tracking object
save ¶
save(dirpath: str, *, data_format: str = 'parquet', overwrite: bool = False) -> None
Save this Features object (and its nested Tracking) to a self-describing directory.
Examples:
>>> import tempfile, os
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # add a trivial feature so data is not empty
>>> s = pd.Series(range(len(t.data)), index=t.data.index)
>>> f.store(s, 'counter', meta={})
>>> with tempfile.TemporaryDirectory() as d:
... f.save(d, data_format='csv', overwrite=True)
... os.path.exists(os.path.join(d, 'manifest.json'))
True
load
classmethod
¶
load(dirpath: str) -> 'Features'
Load a Features object previously saved with save().
Examples:
>>> import tempfile, os
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter', meta={})
>>> with tempfile.TemporaryDirectory() as d:
... f.save(d, data_format='csv', overwrite=True)
... f2 = Features.load(d)
>>> isinstance(f2, Features) and 'counter' in f2.data.columns
True
distance_between ¶
distance_between(point1: str, point2: str, dims=('x', 'y')) -> FeaturesResult
returns distance from point1 to point2
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> res = f.distance_between('p1','p2')
>>> isinstance(res, pd.Series) and len(res) == len(t.data)
True
within_distance ¶
within_distance(point1: str, point2: str, distance: float, dims=('x', 'y')) -> FeaturesResult
returns True for frames where point1 is within specified distance of point2 NA is propagated where inputs are missing (pd.NA).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> res = f.within_distance('p1','p2', distance=15.0)
>>> bool((isinstance(res, pd.Series) and res.notna().any()))
True
get_point_median ¶
get_point_median(point: str, dims=('x', 'y')) -> tuple
Return the per-dimension median coordinate for a tracked point.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> med = f.get_point_median('p1', dims=('x','y'))
>>> isinstance(med, tuple) and len(med) == 2
True
define_boundary ¶
define_boundary(points: list[str], scaling: float, scaling_y: float = None, centre: str | list[str] = None) -> list[tuple[float, float]]
takes a list of defined points, and creates a static rescaled list of point coordinates based on median location of those points 'centre' (point about which to scale) can be a string or list of strings, in which case the median of the points will be used as the centre if 'centre' is None, the median of all the boundary points will be used as the centre 'scaling' is the factor by which to scale the boundary points, and 'scaling_y' is the factor by which to scale the y-axis if 'scaling_y' is not provided, 'scaling' will be applied to both axes
within_boundary_static ¶
within_boundary_static(point: str, boundary: list[tuple[float, float]], boundary_name: str = None) -> FeaturesResult
checks whether point is inside polygon defined by ordered list of boundary points boundary points must be specified as a list of numerical tuples
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> boundary = f.define_boundary(['p1','p2','p3'], scaling=1.0)
>>> res = f.within_boundary_static('p1', boundary)
>>> bool((isinstance(res, pd.Series) and res.notna().any()))
True
within_boundary_dynamic ¶
within_boundary_dynamic(point: str, boundary: list[str], boundary_name: str = None) -> FeaturesResult
checks whether point is inside polygon defined by ordered list of boundary points boundary points must be specified as a list of names of tracked points
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> res = f.within_boundary_dynamic('p1', ['p1','p2','p3'])
>>> bool((isinstance(res, pd.Series) and res.notna().any()))
True
within_boundary ¶
within_boundary(point: str, boundary: list, median: bool = True, boundary_name: str = None) -> FeaturesResult
deprecated: use within_boundary_static or within_boundary_dynamic instead checks whether point is inside polygon defined by ordered list of boundary points boundary points may either be specified as a list of numerical tuples, or as a list of names of tracked points. Optionally, pass boundary_name for a custom short name in the feature name/meta.
distance_to_boundary ¶
distance_to_boundary(point: str, boundary: list[str], median: bool = True, boundary_name: str = None) -> FeaturesResult
Deprecated: use distance_to_boundary_static or distance_to_boundary_dynamic instead returns distance from point to boundary Optionally, pass boundary_name for a custom short name in the feature name/meta.
distance_to_boundary_static ¶
distance_to_boundary_static(point: str, boundary: list[tuple[float, float]], boundary_name: str = None) -> FeaturesResult
Returns distance from point to a static boundary defined by a list of (x, y) tuples. If boundary_name is provided, it overrides the automatic id. NaN is returned if the point or any boundary vertex is NaN.
distance_to_boundary_dynamic ¶
distance_to_boundary_dynamic(point: str, boundary: list[str], boundary_name: str | None = None) -> FeaturesResult
Returns distance from point to a dynamic boundary defined by a list of point names. If boundary_name is provided, it overrides the automatic id. NaN is returned if the point or any boundary vertex is NaN.
area_of_boundary ¶
area_of_boundary(boundary: list[str], median: bool = True) -> FeaturesResult
returns area of boundary as a FeaturesResult (constant for static, per-frame for dynamic)
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> res = f.area_of_boundary(['p1','p2','p3'], median=True)
>>> isinstance(res, pd.Series) and res.nunique() == 1
True
acceleration ¶
acceleration(point: str, dims=('x', 'y')) -> FeaturesResult
returns acceleration of point from previous frame to current frame, for each frame
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> acc = f.acceleration('p1')
>>> isinstance(acc, pd.Series) and len(acc) == len(t.data)
True
azimuth ¶
azimuth(point1: str, point2: str) -> FeaturesResult
returns azimuth in radians from tracked point1 to tracked point2 for each frame in the data, relative to the direction of the x-axis
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> ang = f.azimuth('p1','p2')
>>> isinstance(ang, pd.Series) and len(ang) == len(t.data)
True
azimuth_deviation ¶
azimuth_deviation(basepoint: str, pointdirection1: str, pointdirection2: str) -> FeaturesResult
Compute the signed angular deviation (radians) between two directions from a common basepoint for each frame.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> dev = f.azimuth_deviation('p1','p2','p3')
>>> bool((isinstance(dev, pd.Series) and len(dev) == len(t.data)))
True
within_azimuth_deviation ¶
within_azimuth_deviation(basepoint: str, pointdirection1: str, pointdirection2: str, deviation: float) -> FeaturesResult
Return True for frames where the angular deviation between two rays from basepoint is <= deviation (radians). NA is propagated where inputs are missing (pd.NA).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> mask = f.within_azimuth_deviation('p1','p2','p3', deviation=1.0)
>>> bool((isinstance(mask, pd.Series) and mask.notna().any()))
True
speed ¶
speed(point: str, dims=('x', 'y')) -> FeaturesResult
returns average speed of point from previous frame to current frame, for each frame
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> sp = f.speed('p1')
>>> isinstance(sp, pd.Series) and len(sp) == len(t.data)
True
above_speed ¶
above_speed(point: str, speed: float, dims=('x', 'y')) -> FeaturesResult
Return True for frames where the point's speed is >= threshold. NA is propagated where inputs are missing (pd.NA).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.above_speed('p1', speed=0.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True
all_above_speed ¶
all_above_speed(points: list, speed: float, dims=('x', 'y')) -> FeaturesResult
Return True for frames where all listed points are moving at least at the threshold speed. NA is propagated: if any input is NA at a frame, result is NA.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.all_above_speed(['p1','p2'], speed=0.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True
below_speed ¶
below_speed(point: str, speed: float, dims=('x', 'y')) -> FeaturesResult
Return True for frames where the point's speed is < threshold. NA is propagated where inputs are missing (pd.NA).
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.below_speed('p1', speed=9999.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True
all_below_speed ¶
all_below_speed(points: list, speed: float, dims=('x', 'y')) -> FeaturesResult
Return True for frames where all listed points are moving slower than the threshold speed. NA is propagated: if any input is NA at a frame, result is NA.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> m = f.all_below_speed(['p1','p2'], speed=9999.0)
>>> isinstance(m, pd.Series) and len(m) == len(t.data)
True
distance_change ¶
distance_change(point: str, dims=('x', 'y')) -> FeaturesResult
returns unsigned distance moved by point from previous frame to current frame, for each frame
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> dc = f.distance_change('p1')
>>> isinstance(dc, pd.Series) and len(dc) == len(t.data)
True
store ¶
store(feature: Series, name: str, overwrite: bool = False, meta: dict = dict()) -> None
stores calculated feature with name and associated freeform metadata object
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> s = pd.Series(range(len(t.data)), index=t.data.index)
>>> f.store(s, 'counter', meta={'unit':'frames'})
>>> 'counter' in f.data.columns and f.meta['counter']['unit'] == 'frames'
True
classify ¶
classify(classifier: BaseClassifier, **kwargs)
classify behaviour using a classifier with inputs from this Features object returns a FeaturesResult object with the classification result this means that the output of the classifier should be a pd.Series with the same index as this Features object
smooth ¶
smooth(name: str, method: str, window: int, center: bool = True, inplace: bool = False) -> pd.Series
smooths specified feature with specified method over rolling window. if inplace=True then feature will be directly edited and metadata updated method: 'median' : median of value in window, requires numerical series values 'mean' : mean of value in window, requires numerical series values 'mode' : mode value in window, works with numerical or non-numerical types 'block' : removes labels that occur in blocks of less than length window and replaces them with value from previous block unless there is no previous block, in which case replaced from next block after smoothing note: all nan values will be filled using this method (dangerous!)
embedding_df ¶
embedding_df(embedding: dict[str, list[int]])
generate a time series embedding dataframe with specified time shifts for each column, where embedding is a dict mapping column names to lists of shifts positive shift: value from the future (t+n) negative shift: value from the past (t-n)
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> import pandas as pd
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # prepare a simple feature to embed
>>> s = pd.Series(range(len(t.data)), index=t.data.index)
>>> f.store(s, 'counter', meta={})
>>> emb = f.embedding_df({'counter':[0,1,-1]})
>>> list(emb.columns)
['counter_t0', 'counter_t+1', 'counter_t-1']
assign_clusters_by_centroids ¶
assign_clusters_by_centroids(embedding: dict[str, list[int]], centroids_df: DataFrame, *, rescale_factors: dict | None = None, custom_scaling: dict[str, dict] | None = None) -> 'FeaturesResult'
new_embed_df: (n_samples, n_features) DataFrame of your new time-shifted embedding centroids_df: (n_clusters, n_features) DataFrame of cluster centers Returns a Series of cluster IDs (0..n_clusters-1) indexed like new_embed_df.
Examples:
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # add a simple feature to embed
>>> f.store(pd.Series(range(len(t.data)), index=t.data.index), 'counter', meta={})
>>> emb = {'counter':[0, 1]}
>>> df = f.embedding_df(emb)
>>> # make 2 simple centroids matching columns
>>> cents = pd.DataFrame([[0, 0], [1, 1]], columns=df.columns)
>>> labels = f.assign_clusters_by_centroids(emb, cents)
>>> isinstance(labels, pd.Series) and len(labels) == len(t.data)
True
train_knn_regressor ¶
train_knn_regressor(*, source_embedding: dict[str, list[int]], target_embedding: dict[str, list[int]], n_neighbors: int = 5, normalize_source: bool = False, **kwargs)
Developer mode: not available in public release yet.
Train a KNN regressor to predict a target embedding from a feature embedding on this Features object. If normalize_source is True, normalize the source embedding before training and return the rescale factors. Returns the trained model, input columns, target columns, and (optionally) the rescale factors.
predict_knn ¶
predict_knn(model: KNeighborsRegressor, source_embedding: dict[str, list[int]], target_embedding: dict[str, list[int]], rescale_factors: dict = None) -> pd.DataFrame
Developer mode: not available in public release yet.
Predict using a trained KNN regressor on this Features object. If rescale_factors is provided, normalize the source embedding before prediction. The prediction will match the shape and columns of self.embedding_df(target_embedding).
rms_error_between_embeddings
staticmethod
¶
rms_error_between_embeddings(ground_truth: DataFrame, prediction: DataFrame, rescale: dict | str = None) -> pd.Series
Developer mode: not available in public release yet.
Compute the root mean squared error (RMS) for each row between two embedding DataFrames. If rescale is a dict, normalize both DataFrames using this dict before computing the error. If rescale == 'auto', compute normalization factors from ground_truth and apply to both DataFrames. Returns a Series indexed like the input DataFrames, with NaN for rows where either input has NaNs.
define_elliptical_boundary_from_params ¶
define_elliptical_boundary_from_params(centre: str | list[str], major_axis_length: float, minor_axis_length: float, angle_in_radians: float = 0.0, n_points: int = 100) -> list[tuple[float, float]]
Generate a polygonal approximation of an ellipse as a list of (x, y) tuples,
around centre using explicit parameters.
centre can be a single point name or a list of point names.
if centre is a list, the boundary will be centred on the mean of the median coordinates of the points.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> poly = f.define_elliptical_boundary_from_params('p1', major_axis_length=10, minor_axis_length=6, angle_in_radians=0.0, n_points=32)
>>> isinstance(poly, list) and len(poly) == 32
True
define_elliptical_boundary_from_points ¶
define_elliptical_boundary_from_points(points: list[str], n_points: int = 100, scaling: float = 1.0, smallness_weight: float = 0.1) -> list[tuple[float, float]]
Fit an ellipse to the median coordinates of the given tracked points (at least 4)
and return a polygonal approximation. After fitting, the ellipse is scaled by scaling.
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='ex', fps=30)
>>> f = Features(t)
>>> # Use exactly 4 points to avoid requiring skimage in tests
>>> poly = f.define_elliptical_boundary_from_points(['p1','p2','p3','p2'], n_points=20, scaling=1.0)
>>> isinstance(poly, list) and len(poly) == 20
True