Skip to content

SummaryCollection

py3r.behaviour.summary.summary_collection.SummaryCollection

SummaryCollection(summary_dict: dict[str, Summary])

Bases: BaseCollection, SummaryCollectionPlotMixin

collection of Summary objects (e.g. for grouping individuals) note: type-hints refer to Summary, but factory methods allow for other classes these are intended ONLY for subclasses of Summary, and this is enforced.

Examples

>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # add a simple boolean feature to each Features for summaries to consume
>>> for f in fc.values():
...     s = pd.Series([True, False] * (len(f.tracking.data)//2 + 1))[:len(f.tracking.data)]
...     s.index = f.tracking.data.index
...     f.store(s, 'flag', meta={})
>>> sc = SummaryCollection.from_features_collection(fc)
>>> list(sorted(sc.keys()))
['A', 'B']

each instance-attribute

each: Summary

each_forcebatch instance-attribute

each_forcebatch: Summary

group_keys property

group_keys

Keys for the groups in a grouped view. Empty list if not grouped.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
...     coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G2')
>>> g = coll.groupby('group')
>>> sorted(g.group_keys)
[('G1',), ('G2',)]

groupby_tags property

groupby_tags

The tag names used to form this grouped view (or None if flat).

is_grouped property

is_grouped

True if this collection is a grouped view.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> coll.is_grouped
False

summary_dict property

summary_dict

bfa

bfa(
    column: str,
    all_states: list | None = None,
    numshuffles: int = 1000,
    pairs: list[tuple[str, str]] | None = None,
    random_state: int | None = 0,
    scale_by_transitions: bool = False,
)

Behaviour Flow Analysis between groups for a grouped SummaryCollection.

Requires the collection to be grouped (via groupby). Computes transition matrices per Summary within each group, then computes Manhattan distances between group means and surrogate distributions via shuffling.

If pairs is provided, only those group pairs are analyzed; otherwise all unique pairs in self.group_keys are evaluated.

Parameters:

Name Type Description Default

column

str

Name of the column containing discrete state labels.

required

all_states

list | None

Explicit state ordering for the transition matrix. None infers states from the data.

None

numshuffles

int

Number of surrogate shuffles used to build the null distribution.

1000

pairs

list[tuple[str, str]] | None

Group pairs to compare. None evaluates all unique pairs.

None

random_state

int | None

Seed for reproducible surrogate shuffling. None keeps non-deterministic behaviour. Pass the same seed to each bfa() call when combining scales so that surrogate shuffles are synchronised; see combine_bfa_results.

0

scale_by_transitions

bool

If True, each pairwise Manhattan distance (observed and all surrogates) is divided by the total number of transitions across both groups for that pair. This rescales raw-count distances to a per-transition unit, making distances comparable across temporal resolutions. Defaults to False to preserve legacy behaviour.

False
Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # inject simple 2-state labels and tags to build groups
>>> for i, (h, f) in enumerate(fc.items()):
...     pat = ['A','A','B','B','A'] * (len(f.tracking.data)//5 + 1)
...     states = pd.Series(pat[:len(f.tracking.data)], index=f.tracking.data.index)
...     f.store(states, 'state', meta={})
...     f.tracking.add_tag('group', f'G{i+1}')
>>> gfc = fc.groupby('group')
>>> sc = SummaryCollection.from_features_collection(gfc)
>>> # compute all pairs (raw transition counts)
>>> res = sc.bfa('state', all_states=['A','B'], numshuffles=2)
>>> isinstance(res, dict) and 'observed' in next(iter(res.values()))
True
>>> # compute only specific pair(s)
>>> res2 = sc.bfa('state', all_states=['A','B'], numshuffles=2, pairs=[('G1','G2')])
>>> list(res2.keys()) == ['G1_vs_G2']
True
>>> # scale distances by total transition count (comparable across resolutions)
>>> res3 = sc.bfa('state', all_states=['A','B'], numshuffles=2, scale_by_transitions=True)
>>> isinstance(res3, dict) and 'observed' in next(iter(res3.values()))
True

bfa_multiscale staticmethod

bfa_multiscale(
    scs: list[SummaryCollection],
    columns: list[str] | str,
    all_states: list[list | None] | list | None = None,
    numshuffles: int = 1000,
    pairs: list[tuple[str, str]] | None = None,
    random_state: int | None = 0,
    scale_by_transitions: bool = True,
    scale_weights: list[float] | None = None,
) -> dict

Multi-scale Behaviour Flow Analysis across pre-built SummaryCollections.

Each entry in scs is an independently prepared grouped SummaryCollection — typically derived from data at a different temporal resolution (e.g. raw, 4x coarse-grained, 16x coarse-grained). The relevant state column at each scale is specified via columns.

The state column is expected to have been computed directly on the data at that scale (e.g. via cluster labels computed on coarse-grained features), not simply aggregated from a finer scale.

Surrogate shuffles are automatically synchronised: the same random_state is passed to every :meth:bfa call, which — given that QC has verified identical group/handle order — guarantees that surrogate i at scale A and surrogate i at scale B used the same animal shuffle. The combined surrogate distribution is therefore the correct null for the combined statistic.

Parameters:

Name Type Description Default

scs

list[SummaryCollection]

Grouped SummaryCollection objects, one per scale, in the order they should be combined.

required

columns

list[str] | str

State column name to use for each scale's transition matrix. Pass a single string to use the same column name for all scales.

required

all_states

list[list | None] | list | None

Explicit state ordering for transition matrices. Three forms are accepted:

  • None — states are inferred from the data at every scale.
  • A flat list (e.g. [0, 1, 2]) — the same state set is used for all scales.
  • A list of lists / None values whose length equals len(scs) — each scale uses its own state set, or None to infer for that scale.
None

numshuffles

int

Number of surrogate shuffles per scale.

1000

pairs

list[tuple[str, str]] | None

Group pairs to compare. None evaluates all unique pairs. Must be the same for all scales.

None

random_state

int | None

Seed for reproducible surrogate shuffling. The same seed is used at every scale to synchronise surrogates.

0

scale_by_transitions

bool

Divide each pairwise Manhattan distance by the total number of transitions across both groups for that pair. Enabled by default because distances across scales must be on a common per-transition unit before they can be meaningfully combined.

True

scale_weights

list[float] | None

Per-scale multipliers for the combined distance, in the same order as scs. Defaults to uniform weighting.

None

Returns:

Type Description
dict

Dict with keys "combined" (bfa-format result with an additional "per_scale_observed" list per comparison) and "scales" (dict mapping scale index to the individual bfa result).

Raises:

Type Description
ValueError

If any SC is not grouped, group keys / handle order differ across SCs, a requested column is missing, or columns length mismatches scs.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> for i, (h, f) in enumerate(fc.items()):
...     pat = ['A','A','B','B','A'] * (len(f.tracking.data)//5 + 1)
...     states = pd.Series(pat[:len(f.tracking.data)], index=f.tracking.data.index)
...     f.store(states, 'state', meta={})
...     f.tracking.add_tag('group', f'G{i+1}')
>>> gfc = fc.groupby('group')
>>> sc1 = SummaryCollection.from_features_collection(gfc)
>>> # sc2 would normally come from independently-clustered coarse-grained data;
>>> # here we reuse sc1 with the same column purely for doctest purposes.
>>> # shared all_states (broadcast form)
>>> ms = SummaryCollection.bfa_multiscale(
...     [sc1, sc1], 'state', all_states=['A', 'B'], numshuffles=2)
>>> # per-scale all_states (list-of-lists form)
>>> ms2 = SummaryCollection.bfa_multiscale(
...     [sc1, sc1], 'state',
...     all_states=[['A', 'B'], ['A', 'B']],
...     numshuffles=2)
>>> bool(ms['combined']['G1_vs_G2']['observed'] == ms2['combined']['G1_vs_G2']['observed'])
True
>>> set(ms.keys()) == {'combined', 'scales'}
True
>>> set(ms['scales'].keys()) == {0, 1}
True
>>> 'observed' in ms['combined']['G1_vs_G2']
True
>>> 'per_scale_observed' in ms['combined']['G1_vs_G2']
True
>>> len(ms['combined']['G1_vs_G2']['per_scale_observed']) == 2
True

bfa_stats staticmethod

bfa_stats(
    bfa_results: dict[str, dict[str, float]],
) -> dict[str, dict[str, float]]

Compute simple statistics (percentile, zscore, right_tail_p) from bfa results.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> for i, (h, f) in enumerate(fc.items()):
...     pat = ['A','A','B','B','A'] * (len(f.tracking.data)//5 + 1)
...     states = pd.Series(pat[:len(f.tracking.data)], index=f.tracking.data.index)
...     f.store(states, 'state', meta={})
...     f.tracking.add_tag('group', f'G{i+1}')
>>> sc = SummaryCollection.from_features_collection(fc.groupby('group'))
>>> bfa_out = sc.bfa('state', all_states=['A','B'], numshuffles=2)
>>> stats = SummaryCollection.bfa_stats(bfa_out)
>>> set(next(iter(stats.values())).keys()) >= {'percentile','zscore','right_tail_p'}
True

collate_bin_dfs staticmethod

collate_bin_dfs(
    dfs: list[DataFrame] | dict[str, DataFrame],
    format: Literal["tall", "wide"] = "tall",
    bin_col: str = "bin",
) -> pd.DataFrame

Collate a sequence of per-bin DataFrames into a single table.

Intended for DataFrames produced by :meth:to_df, one per time bin (e.g. from :meth:make_bins).

Parameters:

Name Type Description Default

dfs

list[DataFrame] | dict[str, DataFrame]

List of DataFrames (bins labeled 0, 1, 2, ...) or a dict mapping bin labels to DataFrames. Each DataFrame must be indexed by handle.

required

format

Literal['tall', 'wide']

'tall' stacks rows and adds a bin column; 'wide' produces one row per handle with columns named <metric>_<bin_label>.

'tall'

bin_col

str

Name of the bin column inserted in tall format.

'bin'

Returns:

Name Type Description
DataFrame

Collated DataFrame.

Tall DataFrame

index is handle. Rows are ordered so that all bins for a given

DataFrame

handle appear together (handle-major), with bins in the order supplied.

Wide DataFrame

index is handle. Columns are ordered metric-major — all bins for

DataFrame

a given metric appear together (metric_bin0, metric_bin1, ...),

DataFrame

then the next metric, and so on. Metric order follows the first

DataFrame

DataFrame supplied.

Examples
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> from py3r.behaviour.summary.summary import Summary
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t1 = Tracking.from_dlc(str(p), handle='A', fps=30)
...     t2 = Tracking.from_dlc(str(p), handle='B', fps=30)
>>> s1, s2 = Summary(Features(t1)), Summary(Features(t2))
>>> s1.store(1.0, 'score'); s2.store(2.0, 'score')
>>> sc = SummaryCollection.from_list([s1, s2])
>>> df0 = sc.to_df(); df0['score'] = [1.0, 2.0]
>>> df1 = sc.to_df(); df1['score'] = [1.5, 2.5]
>>> tall = SummaryCollection.collate_bin_dfs([df0, df1], format='tall')
>>> list(tall.columns[:2])
['bin', 'score']
>>> list(tall.index)
['A', 'A', 'B', 'B']
>>> wide = SummaryCollection.collate_bin_dfs({'early': df0, 'late': df1}, format='wide')
>>> list(wide.columns)
['score_early', 'score_late']

combine_bfa_results staticmethod

combine_bfa_results(
    results_list: list[dict],
    *,
    scale_weights: list[float] | None = None,
    per_scale: bool = True,
) -> dict

Combine BFA results from multiple temporal scales into a single result.

Note

This is an escape-hatch for advanced workflows. If you are starting a multi-scale BFA from scratch, use bfa_multiscale instead — it handles scale generation, surrogate synchronisation, and result combination automatically. Only use this helper directly when you have already computed per-scale results through a custom pipeline and know that their surrogate shuffles are synchronised (same random_state, same group/handle order, same pairs, same numshuffles).

Each entry in results_list is a dict returned by bfa. The observed distances and the per-surrogate surrogate distances are summed (optionally weighted) across scales, yielding a combined result in the same format as a single bfa call.

For valid multi-scale statistics the surrogate shuffles must be synchronised across scales — pass the same random_state to every bfa call, use the same group structure and the same pairs ordering, and the shuffles will be identical by construction.

Parameters:

Name Type Description Default

results_list

list[dict]

BFA result dicts, one per scale, in the same format returned by bfa. All dicts must contain the same pair keys and the same number of surrogates.

required

scale_weights

list[float] | None

Optional per-scale multiplicative weights (must have the same length as results_list). Defaults to uniform weighting (all 1.0).

None

per_scale

bool

If True, each combined pair entry includes a "per_scale_observed" list containing the individual scale contributions.

True

Returns:

Type Description
dict

Same structure as bfa output, with an optional extra key "per_scale_observed" per comparison when per_scale=True.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> for i, (h, f) in enumerate(fc.items()):
...     pat = ['A','A','B','B','A'] * (len(f.tracking.data)//5 + 1)
...     states = pd.Series(pat[:len(f.tracking.data)], index=f.tracking.data.index)
...     f.store(states, 'state', meta={})
...     f.tracking.add_tag('group', f'G{i+1}')
>>> gfc = fc.groupby('group')
>>> sc1 = SummaryCollection.from_features_collection(gfc)
>>> sc4 = SummaryCollection.from_features_collection(
...     gfc.each.coarse_grain(4, non_numeric='mode'))
>>> res1 = sc1.bfa('state', all_states=['A','B'], numshuffles=2, random_state=0)
>>> res4 = sc4.bfa('state', all_states=['A','B'], numshuffles=2, random_state=0)
>>> combined = SummaryCollection.combine_bfa_results([res1, res4])
>>> 'observed' in combined['G1_vs_G2'] and 'surrogates' in combined['G1_vs_G2']
True
>>> len(combined['G1_vs_G2']['surrogates']) == 2
True
>>> 'per_scale_observed' in combined['G1_vs_G2']
True
>>> len(combined['G1_vs_G2']['per_scale_observed']) == 2
True

copy

copy()

Creates a copy of the BaseCollection. Raises NotImplementedError if any leaf does not implement copy().

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv')
...         _ = shutil.copy(p, d / 'B.csv')
...     coll = TrackingCollection.from_folder(
...         str(d), tracking_loader=Tracking.from_dlc, fps=30
...     )
>>> coll_copy = coll.copy()
>>> sorted(coll_copy.keys())
['A', 'B']

flatten

flatten()

Flatten a MultipleCollection to a flat Collection. If already flat, return self.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
...     coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G1')
...     g = coll.groupby('group')
>>> flat = g.flatten()
>>> flat.is_grouped
False
>>> sorted(flat.keys())
['A', 'B']

from_features_collection classmethod

from_features_collection(
    features_collection: FeaturesCollection,
    summary_cls: type[Summary] = Summary,
)

Create a SummaryCollection from a FeaturesCollection.

Parameters:

Name Type Description Default

features_collection

FeaturesCollection

Source collection. Grouped structure is preserved.

required

summary_cls

type[Summary]

Summary subclass to instantiate for each session.

Summary
Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # add numeric scalar per Features via a quick summary to test to_df later
>>> for f in fc.values():
...     import numpy as np, pandas as pd
...     s = pd.Series(range(len(f.tracking.data)), index=f.tracking.data.index)
...     f.store(s, 'counter', meta={})
>>> sc = SummaryCollection.from_features_collection(fc)
>>> isinstance(sc['A'], Summary) and isinstance(sc['B'], Summary)
True

from_list classmethod

from_list(summary_list: list[Summary])

Create a SummaryCollection from a list of Summary objects, keyed by handle.

Parameters:

Name Type Description Default

summary_list

list[Summary]

Summary objects to collect. All handles must be unique.

required
Examples
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> from py3r.behaviour.summary.summary import Summary
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t1 = Tracking.from_dlc(str(p), handle='A', fps=30)
...     t2 = Tracking.from_dlc(str(p), handle='B', fps=30)
>>> f1, f2 = Features(t1), Features(t2)
>>> # store simple scalar summaries
>>> s1, s2 = Summary(f1), Summary(f2)
>>> s1.store(1, 'count'); s2.store(2, 'count')
>>> sc = SummaryCollection.from_list([s1, s2])
>>> list(sorted(sc.keys()))
['A', 'B']

get_group

get_group(key)

Get a sub-collection by group key from a grouped view.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
...     coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G2')
>>> g = coll.groupby('group')
>>> sub = g.get_group(('G1',))
>>> list(sub.keys())
['A']

groupby

groupby(tags)

Group the collection by one or more existing tag names. Returns a grouped view (this same collection type) whose values are sub-collections keyed by a tuple of tag values in the order provided.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
...     coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G2')
>>> g = coll.groupby('group')
>>> g.is_grouped
True
>>> sorted(g.group_keys)
[('G1',), ('G2',)]

items

items()

Items iterator (handle, element).

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> sorted([h for h, _ in coll.items()])
['A', 'B']

keys

keys()

Keys iterator (handles or group keys).

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> list(sorted(coll.keys()))
['A', 'B']

load classmethod

load(dirpath: str)

Load a collection previously saved with save(). Uses the class's _element_type.load to reconstruct leaves.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
...     out = d / 'coll'
...     coll.save(str(out), overwrite=True, data_format='csv')
...     coll2 = TrackingCollection.load(str(out))
>>> list(sorted(coll2.keys()))
['A', 'B']

make_bin

make_bin(startframe: int, endframe: int)

Return a new SummaryCollection restricted to frames in [startframe, endframe).

Parameters:

Name Type Description Default

startframe

int

First frame index of the bin (inclusive).

required

endframe

int

Last frame index of the bin (exclusive).

required
Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> from py3r.behaviour.summary.summary import Summary
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='A', fps=30)
>>> s = Summary(Features(t))
>>> sc = SummaryCollection.from_list([s])
>>> b = sc.make_bin(0, 2)
>>> isinstance(b, SummaryCollection)
True

make_bins

make_bins(numbins: int)

Divide the collection into equal time bins and return one SummaryCollection per bin.

Parameters:

Name Type Description Default

numbins

int

Number of equal-length bins to split each session into.

required
Examples
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> from py3r.behaviour.summary.summary import Summary
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t = Tracking.from_dlc(str(p), handle='A', fps=30)
>>> sc = SummaryCollection.from_list([Summary(Features(t))])
>>> bins = sc.make_bins(3)
>>> len(bins) == 3 and all(isinstance(b, SummaryCollection) for b in bins)
True

map_leaves

map_leaves(fn: Callable[[Any], Any])

Apply a function to every leaf element and return a new collection of the same type. Preserves grouping shape and groupby metadata when grouped.

Parameters:

Name Type Description Default

fn

Callable[[Any], Any]

Callable applied to each leaf element. Must return an element compatible with this collection type.

required
Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> sub = coll.map_leaves(lambda t: t.loc[0:1])
>>> all(len(t.data) == 2 for t in sub.values())
True

merge classmethod

merge(
    collections: list[Self], *, copy: bool = False
) -> Self

Merge multiple collections into a single flat collection containing all leaf elements from each input.

Each input collection is flattened before merging, so grouped inputs are supported. The result is always a new flat collection. Leaves are shared by reference unless copy=True.

Parameters:

Name Type Description Default

collections

list[Self]

Two or more collections of the same concrete type. Every element across all collections must have a unique handle.

required

copy

bool

If True, each leaf is copied (via its .copy() method) so that the merged collection is fully independent of the originals.

False

Returns:

Type Description
Self

A new flat collection containing all leaves.

Raises:

Type Description
ValueError

If collections is empty, or if any handles are duplicated.

TypeError

If any input is not an instance of the calling class.

Warns:

Type Description
UserWarning

If the tag key sets differ across input collections (the merged collection will have mixed tag coverage).

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...         _ = shutil.copy(p, d / 'C.csv'); _ = shutil.copy(p, d / 'D.csv')
...     c1 = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
...     c2 = TrackingCollection.from_dlc({'C': str(d/'C.csv'), 'D': str(d/'D.csv')}, fps=30)
>>> merged = TrackingCollection.merge([c1, c2])
>>> sorted(merged.keys())
['A', 'B', 'C', 'D']
>>> len(merged)
4

plot_bfa_results staticmethod

plot_bfa_results(
    results: dict[str, dict[str, float]],
    compares: str | list[str] | None = None,
    add_stats: bool = True,
    stats: dict[str, dict[str, float]] | None = None,
    bins: int = 50,
    figsize: tuple[float, float] = (4, 3),
    save_dir: str | None = None,
    show: bool = True,
    compare: str | None = None,
) -> tuple[Figure, Any] | dict[str, tuple[Figure, Any]]

Plot one or more BFA result comparisons as separate single-panel figures.

Parameters:

Name Type Description Default

results

dict[str, dict[str, float]]

BFA result dict as returned by bfa.

required

compares

str | list[str] | None

Which comparisons to plot. None plots all (or the single comparison if only one exists). A string plots that comparison only; a list of strings plots each one separately.

None

add_stats

bool

If True and stats is not provided, statistics are computed via bfa_stats and annotated on each plot.

True

stats

dict[str, dict[str, float]] | None

Precomputed stats dict. If None and add_stats is True, stats are computed automatically.

None

bins

int

Number of histogram bins for the surrogate distribution.

50

figsize

tuple[float, float]

Size of each figure.

(4, 3)

save_dir

str | None

If provided, save each figure as <comparison>.png here.

None

show

bool

If True, call plt.show() after each figure.

True

compare

str | None

Deprecated alias for compares (single string only).

None

Returns:

Type Description
tuple[Figure, Any] | dict[str, tuple[Figure, Any]]

(fig, ax) for a single comparison, or {compare: (fig, ax)} for multiple.

Examples
>>> import tempfile, shutil, os
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # add simple 2-state labels and tags to build two groups
>>> for i, (h, f) in enumerate(fc.items()):
...     pat = ['A','A','B','B','A'] * (len(f.tracking.data)//5 + 1)
...     states = pd.Series(pat[:len(f.tracking.data)], index=f.tracking.data.index)
...     f.store(states, 'state', meta={})
...     f.tracking.add_tag('group', f'G{i+1}')
>>> sc = SummaryCollection.from_features_collection(fc.groupby('group'))
>>> bfa_out = sc.bfa('state', all_states=['A','B'], numshuffles=5)
>>> # plot a single comparison and save it
>>> with tempfile.TemporaryDirectory() as outdir:
...     fig, ax = SummaryCollection.plot_bfa_results(
...         bfa_out, compare='G1_vs_G2', show=False, save_dir=outdir)
...     os.path.exists(os.path.join(outdir, 'G1_vs_G2.png'))
True

plot_chord

plot_chord(
    column: str,
    all_states: list[str | int] | None = None,
    *,
    fromkey: str | None = None,
    plot_individual: bool = False,
    show: bool = True,
    save_dir: str | None = None,
    cmap: str | list | None = None,
    **kwargs,
)

Plot chord diagrams of state transitions using a minimal pattern.

  • If not grouped:
  • plot_individual=False: sum over the collection and plot a single chord.
  • plot_individual=True: plot one chord per recording.
  • If grouped:
  • plot_individual=False: sum within each group and plot one chord per group.
  • plot_individual=True: plot one chord per recording per group.
Parameters

column: Name of the categorical column used to compute transitions. all_states: Optional explicit state ordering for transition matrices. Required when fromkey is not provided. fromkey: Optional key in each Summary.data containing a precomputed transition DataFrame. If provided, this key is used directly instead of computing transitions from column. plot_individual: If True, plot per recording; otherwise plot summed aggregate. show: If True, display figures. save_dir: Optional directory to save figures; created if missing. kwargs: Additional keyword arguments to pass to pycirclize.Circos.chord_diagram.

Returns

object: - flat & plot_individual=False: single fig - flat & plot_individual=True: dict {handle: fig} - grouped & plot_individual=False: dict {group: fig} - grouped & plot_individual=True: dict {group: {handle: fig}}

Examples
>>> # xdoctest: +REQUIRES(module: pycirclize)
>>> import tempfile, os, shutil
>>> import pandas as pd
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     # create two recordings from the sample csv
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
...     # build features and inject a simple 3-state sequence
...     fc = FeaturesCollection.from_tracking_collection(tc)
...     for _, f in fc.items():
...         pat = ['0','1','2','1','0'] * (len(f.tracking.data)//5 + 1)
...         seq = pd.Series(pat[:len(f.tracking.data)], index=f.tracking.data.index)
...         f.store(seq, 'state', meta={})
...     sc = SummaryCollection.from_features_collection(fc)
...     # plot flat aggregate and save it
...     with tempfile.TemporaryDirectory() as outdir:
...         _ = sc.plot_chord(
...             'state', all_states=['0','1','2'], show=False, save_dir=outdir)
...         os.path.exists(os.path.join(outdir, 'chord_state.png'))
True

plot_transition_umap

plot_transition_umap(
    column: str,
    all_states: list | None = None,
    groups: list[str | tuple[str, ...]]
    | list[list[str | tuple[str, ...]]]
    | None = None,
    n_neighbors: int = 15,
    min_dist: float = 0.1,
    random_state: int = 0,
    figsize: tuple[float, float] = (4.5, 4),
    show: bool = True,
    save_dir: str | None = None,
) -> tuple[Figure, Any]

Plot a UMAP embedding of per-subject transition matrices for selected groups.

Transition matrices are computed for each subject within each group, flattened, scaled, and embedded with UMAP. The collection must already be grouped, for example via groupby.

Parameters:

Name Type Description Default

column

str

Name of the categorical column used to compute transition matrices.

required

all_states

list | None

Optional explicit state ordering used when constructing transition matrices.

None

groups

list[str | tuple[str, ...]] | list[list[str | tuple[str, ...]]] | None

Optional group selection. If omitted, all groups are included. Supports three forms:

  • A flat list of single-tag group labels, e.g. ['control', 'treatment'].
  • A flat list of multi-tag group keys (tuples), e.g. [('control', 'time1'), ('control', 'time2')].
  • A list of lists defining ordered sequences, e.g. [[('control', 'time1'), ('control', 'time2')], ...]. Each sequence is plotted with a monochrome gradient.
None

n_neighbors

int

Number of neighbors used by UMAP.

15

min_dist

float

Minimum distance parameter passed to UMAP.

0.1

random_state

int

Seed for reproducible UMAP embeddings.

0

figsize

tuple[float, float]

Figure size passed to Matplotlib.

(4.5, 4)

show

bool

If True, display the figure.

True

save_dir

str | None

Optional directory in which to save the plot as transition_umap.png.

None

Returns:

Type Description
tuple[Figure, Any]

Tuple of (fig, ax).

Raises:

Type Description
ValueError

If the collection is not grouped, or if no data are found for the requested groups.

ImportError

If umap-learn is not installed.

Examples
>>> # xdoctest: +REQUIRES(module: umap)
>>> import os, shutil, tempfile
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection

>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     paths = {}
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         for name in ['A', 'B', 'C', 'D']:
...             dst = d / f'{name}.csv'
...             _ = shutil.copy(p, dst)
...             paths[name] = str(dst)
...     tc = TrackingCollection.from_dlc(paths, fps=30)
...     fc = FeaturesCollection.from_tracking_collection(tc)
...
...     tags = {
...         'A': ('control', 'time1'),
...         'B': ('control', 'time2'),
...         'C': ('treatment', 'time1'),
...         'D': ('treatment', 'time2'),
...     }
...
...     for h, f in fc.items():
...         pat = ['A', 'A', 'B', 'B', 'A'] * (len(f.tracking.data) // 5 + 1)
...         states = pd.Series(pat[:len(f.tracking.data)], index=f.tracking.data.index)
...         f.store(states, 'state', meta={})
...         condition, time = tags[h]
...         f.tracking.add_tag('condition', condition)
...         f.tracking.add_tag('time', time)
...
...     sc = SummaryCollection.from_features_collection(fc.groupby(['condition', 'time']))
...
...     with tempfile.TemporaryDirectory() as outdir:
...         fig, ax = sc.plot_transition_umap(
...             column='state',
...             all_states=['A', 'B'],
...             groups=[('control', 'time1'), ('control', 'time2')],
...             show=False,
...             save_dir=outdir,
...         )
...         os.path.exists(os.path.join(outdir, 'transition_umap.png'))
True

>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     paths = {}
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         for name in ['A', 'B', 'C', 'D']:
...             dst = d / f'{name}.csv'
...             _ = shutil.copy(p, dst)
...             paths[name] = str(dst)
...     tc = TrackingCollection.from_dlc(paths, fps=30)
...     fc = FeaturesCollection.from_tracking_collection(tc)
...
...     tags = {
...         'A': ('control', 'time1'),
...         'B': ('control', 'time2'),
...         'C': ('treatment', 'time1'),
...         'D': ('treatment', 'time2'),
...     }
...
...     for h, f in fc.items():
...         pat = ['A', 'A', 'B', 'B', 'A'] * (len(f.tracking.data) // 5 + 1)
...         states = pd.Series(pat[:len(f.tracking.data)], index=f.tracking.data.index)
...         f.store(states, 'state', meta={})
...         condition, time = tags[h]
...         f.tracking.add_tag('condition', condition)
...         f.tracking.add_tag('time', time)
...
...     sc = SummaryCollection.from_features_collection(fc.groupby(['condition', 'time']))
...
...     fig, ax = sc.plot_transition_umap(
...         column='state',
...         all_states=['A', 'B'],
...         groups=[
...             [('control', 'time1'), ('control', 'time2')],
...             [('treatment', 'time1'), ('treatment', 'time2')],
...         ],
...         show=False,
...     )
...     fig is not None and ax is not None
True

prepare_plot

prepare_plot(
    metric: str | BatchResult | list,
    *,
    group_order: dict | None = None,
    sort_by: list | str | None = None,
    merge_by: str | None = "metric",
    ax: Any | None = None,
    figsize: tuple[float, float] | None = None,
) -> Any

Prepare a tidy DataFrame and seaborn kwargs without drawing anything.

This is the single entry point for all plot data preparation. The convenience sns* methods call this internally; power users can call it directly for full control over the seaborn call.

Parameters:

Name Type Description Default

metric

str | BatchResult | list

Metric to prepare. Lists are merged into a single plot-ready metric.

required

group_order

dict | None

{tag_name: [value, ...]} controlling within-tag value ordering.

None

sort_by

list | str | None

Override spatial sort priority (which tag is the primary x-axis sort dimension). Colours are unaffected — they always follow the groupby(tags=...) order. Accepts a single tag name or a list.

None

merge_by

str | None

Used only when metric is a list with more than one item. Controls whether merged labels are arranged as metric::component ("metric"), component::metric ("component"), or kept as flat merged labels without two-level axis formatting (None).

'metric'

ax

Any | None

Axes to plot on. If None, a new figure is created with auto-calculated size.

None

figsize

tuple[float, float] | None

Override the automatic figure size.

None

Returns:

Type Description
Any

A PlotSpec namespace with attributes fig, ax,

Any

df (tidy long-form DataFrame), sns_kwargs (ready to unpack

Any

into any seaborn categorical plot), metric_name, ylabel,

Any

hide_legend, created_fig, n_components, n_groups,

Any

and filename_prefix.

Examples

Basic power-user workflow::

import seaborn as sns

spec = sc_grouped.prepare_plot(
    "total_distance",
    group_order=GROUP_ORDER,
    sort_by="timepoint",
)

# Full seaborn control — override anything you like
sns.boxplot(**spec.sns_kwargs, width=0.6)
spec.ax.set_title("My custom title")
spec.fig.savefig("custom.png", dpi=300)

Composing multiple layers::

spec = sc_grouped.prepare_plot(metric, group_order=ORDER)
sns.barplot(**spec.sns_kwargs, errorbar=None, alpha=0.4)
sns.stripplot(**spec.sns_kwargs, size=4, jitter=True)

regroup

regroup()

Recompute the same grouping using the current tags and the original grouping tag order. If not grouped, returns self.

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
...     coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G1')
...     g = coll.groupby('group')
...     coll['B'].add_tag('group','G2', overwrite=True)  # change tag
>>> g2 = g.regroup()
>>> sorted(g2.group_keys)
[('G1',), ('G2',)]

save

save(
    dirpath: str,
    *,
    overwrite: bool = False,
    data_format: str = "parquet",
) -> None

Save this collection to a directory. Preserves grouping and delegates to leaf objects' save(dirpath, data_format, overwrite=True).

Examples
>>> import tempfile, shutil, os
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
...     out = d / 'coll'
...     coll.save(str(out), overwrite=True, data_format='csv')
...     # collection-level manifest at top-level
...     assert os.path.exists(os.path.join(str(out), 'manifest.json'))
...     # element-level manifests under elements/<handle>/
...     el_manifest = os.path.join(str(out), 'elements', 'A', 'manifest.json')
...     assert os.path.exists(el_manifest)

snsbar

snsbar(
    metric: str | BatchResult | list,
    *,
    group_order: dict | None = None,
    sort_by: list | str | None = None,
    annotate: str | dict | None = None,
    ax: Any | None = None,
    show: bool = True,
    savedir: str | None = None,
    filename: str | None = None,
    title: str | None = None,
    **kwargs: Any,
) -> tuple[Figure, Any, pd.DataFrame]

Bar plot with error bars using seaborn.

Parameters:

Name Type Description Default

metric

str | BatchResult | list

Key from Summary.data or a BatchResult from a batch method.

required

group_order

dict | None

{tag_name: [value, ...]} controlling within-tag display order.

None

sort_by

list | str | None

Override spatial sort priority. See prepare_plot.

None

annotate

str | dict | None

Statistical annotation spec. See prepare_plot.

None

ax

Any | None

Axes to plot on. If None, a new figure is created.

None

show

bool

If True, call plt.show() after rendering.

True

savedir

str | None

Directory to save the figure.

None

filename

str | None

Custom filename.

None

title

str | None

Plot title.

None

**kwargs

Any

Forwarded to seaborn.barplot (e.g., errorbar, palette, saturation).

{}

Returns:

Type Description
tuple[Figure, Any, DataFrame]

Tuple of (fig, ax, df).

snsbox

snsbox(
    metric: str | BatchResult | list,
    *,
    group_order: dict | None = None,
    sort_by: list | str | None = None,
    annotate: str | dict | None = None,
    ax: Any | None = None,
    show: bool = True,
    savedir: str | None = None,
    filename: str | None = None,
    title: str | None = None,
    **kwargs: Any,
) -> tuple[Figure, Any, pd.DataFrame]

Box plot using seaborn.

Parameters:

Name Type Description Default

metric

str | BatchResult | list

Key from Summary.data or a BatchResult from a batch method.

required

group_order

dict | None

{tag_name: [value, ...]} controlling within-tag display order.

None

sort_by

list | str | None

Override spatial sort priority. See prepare_plot.

None

annotate

str | dict | None

Statistical annotation spec. See prepare_plot.

None

ax

Any | None

Axes to plot on. If None, a new figure is created.

None

show

bool

If True, call plt.show() after rendering.

True

savedir

str | None

Directory to save the figure.

None

filename

str | None

Custom filename.

None

title

str | None

Plot title.

None

**kwargs

Any

Forwarded to seaborn.boxplot (e.g., width, palette, fliersize).

{}

Returns:

Type Description
tuple[Figure, Any, DataFrame]

Tuple of (fig, ax, df).

snspoint

snspoint(
    metric: str | BatchResult | list,
    *,
    group_order: dict | None = None,
    sort_by: list | str | None = None,
    annotate: str | dict | None = None,
    ax: Any | None = None,
    show: bool = True,
    savedir: str | None = None,
    filename: str | None = None,
    title: str | None = None,
    **kwargs: Any,
) -> tuple[Figure, Any, pd.DataFrame]

Point plot (mean + CI) using seaborn.

Parameters:

Name Type Description Default

metric

str | BatchResult | list

Key from Summary.data or a BatchResult from a batch method.

required

group_order

dict | None

{tag_name: [value, ...]} controlling within-tag display order.

None

sort_by

list | str | None

Override spatial sort priority. See prepare_plot.

None

annotate

str | dict | None

Statistical annotation spec. See prepare_plot.

None

ax

Any | None

Axes to plot on. If None, a new figure is created.

None

show

bool

If True, call plt.show() after rendering.

True

savedir

str | None

Directory to save the figure.

None

filename

str | None

Custom filename.

None

title

str | None

Plot title.

None

**kwargs

Any

Forwarded to seaborn.pointplot (e.g., errorbar, markers, linestyles).

{}

Returns:

Type Description
tuple[Figure, Any, DataFrame]

Tuple of (fig, ax, df).

snsstrip

snsstrip(
    metric: str | BatchResult | list,
    *,
    group_order: dict | None = None,
    sort_by: list | str | None = None,
    annotate: str | dict | None = None,
    ax: Any | None = None,
    show: bool = True,
    savedir: str | None = None,
    filename: str | None = None,
    title: str | None = None,
    **kwargs: Any,
) -> tuple[Figure, Any, pd.DataFrame]

Strip plot (jittered scatter) using seaborn.

Parameters:

Name Type Description Default

metric

str | BatchResult | list

Key from Summary.data or a BatchResult from a batch method.

required

group_order

dict | None

{tag_name: [value, ...]} controlling within-tag display order.

None

sort_by

list | str | None

Override spatial sort priority. See prepare_plot.

None

annotate

str | dict | None

Statistical annotation spec. See prepare_plot.

None

ax

Any | None

Axes to plot on. If None, a new figure is created.

None

show

bool

If True, call plt.show() after rendering.

True

savedir

str | None

Directory to save the figure.

None

filename

str | None

Custom filename (overrides auto-generated name).

None

title

str | None

Plot title.

None

**kwargs

Any

Forwarded to seaborn.stripplot (e.g., jitter, alpha, size, palette). Also accepts random_state for deterministic jitter placement.

{}

Returns:

Type Description
tuple[Figure, Any, DataFrame]

Tuple of (fig, ax, df).

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> for f in fc.values():
...     idx = f.tracking.data.index[:30]
...     f.store(pd.Series(([True, False] * 15)[:len(idx)], index=idx),
...             'active', meta={})
>>> sc = SummaryCollection.from_features_collection(fc)
>>> fig, ax, df = sc.snsstrip(sc.each.time_in_state('active'), show=False)
>>> isinstance(df, pd.DataFrame)
True

snssuperplot

snssuperplot(
    metric: str | BatchResult | list,
    *,
    group_order: dict | None = None,
    sort_by: list | str | None = None,
    annotate: str | dict | None = None,
    ax: Any | None = None,
    show: bool = True,
    savedir: str | None = None,
    filename: str | None = None,
    title: str | None = None,
    ylabel: str | None = None,
    bar_kwargs: dict | None = None,
    strip_kwargs: dict | None = None,
    **kwargs: Any,
) -> tuple[Figure, Any, pd.DataFrame]

Superplot: bar plot (mean) with strip plot (individual dots) overlay.

This is the "publication-ready" visualization showing mean bars with individual data points scattered on top, commonly used in scientific papers. The dots are constrained within the bar width by default.

Parameters:

Name Type Description Default

metric

str | BatchResult | list

Key from Summary.data or a BatchResult from a batch method.

required

group_order

dict | None

{tag_name: [value, ...]} controlling within-tag display order.

None

sort_by

list | str | None

Override spatial sort priority. See prepare_plot.

None

annotate

str | dict | None

Statistical annotation spec. See prepare_plot.

None

ax

Any | None

Axes to plot on. If None, a new figure is created.

None

show

bool

If True, call plt.show() after rendering.

True

savedir

str | None

Directory to save the figure.

None

filename

str | None

Custom filename.

None

title

str | None

Plot title.

None

ylabel

str | None

Y-axis label. Auto-detected from metric when None.

None

bar_kwargs

dict | None

Extra kwargs for the bar layer (e.g., errorbar, capsize, saturation).

None

strip_kwargs

dict | None

Extra kwargs for the strip layer (e.g., alpha, size, jitter).

None

**kwargs

Any

Common kwargs passed to both layers (e.g., palette, dodge). Also accepts random_state for deterministic jitter placement.

{}

Returns:

Type Description
tuple[Figure, Any, DataFrame]

Tuple of (fig, ax, df).

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> for f in fc.values():
...     n = len(f.tracking.data)
...     states = pd.Series((['A', 'B', 'A'] * (n // 3 + 1))[:n],
...                        index=f.tracking.data.index)
...     f.store(states, 'zone', meta={})
>>> sc = SummaryCollection.from_features_collection(fc)
>>> fig, ax, df = sc.snssuperplot(sc.each.time_in_state('zone'), show=False)
>>> isinstance(df, pd.DataFrame)
True

snsswarm

snsswarm(
    metric: str | BatchResult | list,
    *,
    group_order: dict | None = None,
    sort_by: list | str | None = None,
    annotate: str | dict | None = None,
    ax: Any | None = None,
    show: bool = True,
    savedir: str | None = None,
    filename: str | None = None,
    title: str | None = None,
    **kwargs: Any,
) -> tuple[Figure, Any, pd.DataFrame]

Swarm plot (non-overlapping scatter) using seaborn.

Parameters:

Name Type Description Default

metric

str | BatchResult | list

Key from Summary.data or a BatchResult from a batch method.

required

group_order

dict | None

{tag_name: [value, ...]} controlling within-tag display order.

None

sort_by

list | str | None

Override spatial sort priority. See prepare_plot.

None

annotate

str | dict | None

Statistical annotation spec. See prepare_plot.

None

ax

Any | None

Axes to plot on. If None, a new figure is created.

None

show

bool

If True, call plt.show() after rendering.

True

savedir

str | None

Directory to save the figure.

None

filename

str | None

Custom filename.

None

title

str | None

Plot title.

None

**kwargs

Any

Forwarded to seaborn.swarmplot (e.g., size, palette).

{}

Returns:

Type Description
tuple[Figure, Any, DataFrame]

Tuple of (fig, ax, df).

snsviolin

snsviolin(
    metric: str | BatchResult | list,
    *,
    group_order: dict | None = None,
    sort_by: list | str | None = None,
    annotate: str | dict | None = None,
    ax: Any | None = None,
    show: bool = True,
    savedir: str | None = None,
    filename: str | None = None,
    title: str | None = None,
    **kwargs: Any,
) -> tuple[Figure, Any, pd.DataFrame]

Violin plot using seaborn.

Parameters:

Name Type Description Default

metric

str | BatchResult | list

Key from Summary.data or a BatchResult from a batch method.

required

group_order

dict | None

{tag_name: [value, ...]} controlling within-tag display order.

None

sort_by

list | str | None

Override spatial sort priority. See prepare_plot.

None

annotate

str | dict | None

Statistical annotation spec. See prepare_plot.

None

ax

Any | None

Axes to plot on. If None, a new figure is created.

None

show

bool

If True, call plt.show() after rendering.

True

savedir

str | None

Directory to save the figure.

None

filename

str | None

Custom filename.

None

title

str | None

Plot title.

None

**kwargs

Any

Forwarded to seaborn.violinplot (e.g., inner, split, palette).

{}

Returns:

Type Description
tuple[Figure, Any, DataFrame]

Tuple of (fig, ax, df).

store

store(
    results_dict: BatchResult | dict,
    name: str | None = None,
    meta: dict | None = None,
    overwrite: bool = False,
) -> str

Store SummaryResult objects returned by batch methods.

Parameters:

Name Type Description Default

results_dict

BatchResult | dict

Batch results to store. Flat: {handle: SummaryResult}. Grouped: {group_key: {handle: SummaryResult}}.

required

name

str | None

Metric name to store under. If None, resolved automatically from the result objects (all must agree on a single name).

None

meta

dict | None

Metadata dict to attach alongside the stored metric.

None

overwrite

bool

If True, overwrite an existing metric with the same name.

False

Returns:

Type Description
str

The resolved stored metric name. Raises ValueError if auto-naming

str

resolves to multiple different names across leaves.

Examples
>>> import pandas as pd, tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
...     tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # add a boolean column for summaries
>>> for f in fc.values():
...     m = pd.Series([True, False] * (len(f.tracking.data)//2 + 1))[:len(f.tracking.data)]
...     m.index = f.tracking.data.index
...     f.store(m, 'mask', meta={})
>>> sc = SummaryCollection.from_features_collection(fc)
>>> rd = {h: s.time_true('mask') for h, s in sc.items()}
>>> sc.store(rd, name='t_mask')
>>> all('t_mask' in s.data for s in sc.values())
True

stored_info

stored_info() -> pd.DataFrame

Summarize stored summary metrics across the collection's leaf Summary objects.

Returns a DataFrame indexed by summary with columns: - attached_to: number of recordings containing the summary key - missing_from: number of recordings not containing the summary key - type: value datatype name when consistent, or a list of datatype names when mixed across recordings.

tags_info

tags_info(
    *, include_value_counts: bool = False
) -> pd.DataFrame

Summarize tag presence across the collection's leaf objects. Works for flat and grouped collections. If include_value_counts is True, include a column 'value_counts' with a dict of value->count for each tag. Returns a pandas.DataFrame with columns: ['tag', 'attached_to', 'missing_from', 'unique_values', ('value_counts')].

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
...     coll['A'].add_tag('genotype', 'WT')
...     coll['B'].add_tag('timepoint', 'T1')
>>> info = coll.tags_info(include_value_counts=True)
>>> int(info.loc['genotype','attached_to'])
1
>>> int(info.loc['genotype','missing_from'])
1
>>> int(info.loc['genotype','unique_values'])
1
>>> info.loc['genotype','value_counts']
{'WT': 1}
>>> int(info.loc['timepoint','attached_to'])
1

to_df

to_df(
    include_tags: bool = False,
    tag_prefix: str = "tag_",
    series: Literal["ignore", "separate"] = "ignore",
) -> (
    pd.DataFrame
    | tuple[pd.DataFrame, dict[str, pd.DataFrame]]
)

Collate values from each Summary.data into tabular output.

  • Index: handles of the Summary objects
  • Scalar columns: keys from each Summary.data with scalar values
  • If include_tags is True, include tag columns with the given prefix
  • If series='ignore' (default), Series entries are skipped
  • If series='separate', return (scalars_df, series_tables) where series_tables is {metric_name: dataframe} and each dataframe has one row per handle and one column per Series index value.
Examples
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> from py3r.behaviour.summary.summary import Summary
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...     t1 = Tracking.from_dlc(str(p), handle='A', fps=30)
...     t2 = Tracking.from_dlc(str(p), handle='B', fps=30)
>>> s1, s2 = Summary(Features(t1)), Summary(Features(t2))
>>> s1.store(1.0, 'score'); s2.store(2.0, 'score')
>>> s1.features.tracking.add_tag('group', 'G1'); s2.features.tracking.add_tag('group', 'G2')
>>> sc = SummaryCollection.from_list([s1, s2])
>>> df = sc.to_df(include_tags=True)
>>> set(df.columns) >= {'score', 'tag_group'}
True
>>> s1.store(pd.Series([1.0, 2.0], index=['A', 'B']), 'speed_by_state')
>>> s2.store(pd.Series([3.0, 4.0], index=['A', 'B']), 'speed_by_state')
>>> scalars, series_tables = sc.to_df(series='separate')
>>> isinstance(scalars, pd.DataFrame) and 'speed_by_state' in series_tables
True

values

values()

Values iterator (elements or sub-collections).

Examples
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
...     d = Path(d)
...     with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
...         a = d / 'A.csv'; b = d / 'B.csv'
...         _ = shutil.copy(p, a); _ = shutil.copy(p, b)
...     coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> len(list(coll.values())) == 2
True