SummaryCollection
py3r.behaviour.summary.summary_collection.SummaryCollection ¶
SummaryCollection(summary_dict: dict[str, Summary])
Bases: BaseCollection, SummaryCollectionBatchMixin
collection of Summary objects (e.g. for grouping individuals) note: type-hints refer to Summary, but factory methods allow for other classes these are intended ONLY for subclasses of Summary, and this is enforced
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
... tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # add a simple boolean feature to each Features for summaries to consume
>>> for f in fc.values():
... s = pd.Series([True, False] * (len(f.tracking.data)//2 + 1))[:len(f.tracking.data)]
... s.index = f.tracking.data.index
... f.store(s, 'flag', meta={})
>>> sc = SummaryCollection.from_features_collection(fc)
>>> list(sorted(sc.keys()))
['A', 'B']
is_grouped
property
¶
is_grouped
True if this collection is a grouped view.
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> coll.is_grouped
False
groupby_tags
property
¶
groupby_tags
The tag names used to form this grouped view (or None if flat).
group_keys
property
¶
group_keys
Keys for the groups in a grouped view. Empty list if not grouped.
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
... coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G2')
>>> g = coll.groupby('group')
>>> sorted(g.group_keys)
[('G1',), ('G2',)]
from_features_collection
classmethod
¶
from_features_collection(features_collection: FeaturesCollection, summary_cls=Summary)
creates a SummaryCollection from a FeaturesCollection (flat or grouped)
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
... tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # add numeric scalar per Features via a quick summary to test to_df later
>>> for f in fc.values():
... import numpy as np, pandas as pd
... f.store(pd.Series(range(len(f.tracking.data)), index=f.tracking.data.index), 'counter', meta={})
>>> sc = SummaryCollection.from_features_collection(fc)
>>> isinstance(sc['A'], Summary) and isinstance(sc['B'], Summary)
True
from_list
classmethod
¶
from_list(summary_list: list[Summary])
creates a SummaryCollection from a list of Summary objects, keyed by handle
Examples:
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> from py3r.behaviour.summary.summary import Summary
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t1 = Tracking.from_dlc(str(p), handle='A', fps=30)
... t2 = Tracking.from_dlc(str(p), handle='B', fps=30)
>>> f1, f2 = Features(t1), Features(t2)
>>> # store simple scalar summaries
>>> s1, s2 = Summary(f1), Summary(f2)
>>> s1.store(1, 'count'); s2.store(2, 'count')
>>> sc = SummaryCollection.from_list([s1, s2])
>>> list(sorted(sc.keys()))
['A', 'B']
to_df ¶
to_df(include_tags: bool = False, tag_prefix: str = 'tag_')
Collate scalar values (numeric, string, bool) from each Summary.data into a pandas DataFrame.
- Index: handles of the Summary objects
- Columns: keys from each Summary.data (simple scalar values)
- If include_tags is True, include tag columns with the given prefix
Examples:
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> from py3r.behaviour.summary.summary import Summary
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t1 = Tracking.from_dlc(str(p), handle='A', fps=30)
... t2 = Tracking.from_dlc(str(p), handle='B', fps=30)
>>> s1, s2 = Summary(Features(t1)), Summary(Features(t2))
>>> s1.store(1.0, 'score'); s2.store(2.0, 'score')
>>> s1.features.tracking.add_tag('group', 'G1'); s2.features.tracking.add_tag('group', 'G2')
>>> sc = SummaryCollection.from_list([s1, s2])
>>> df = sc.to_df(include_tags=True)
>>> set(df.columns) >= {'score', 'tag_group'}
True
make_bin ¶
make_bin(startframe, endframe)
returns a new SummaryCollection with binned summaries
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> from py3r.behaviour.summary.summary import Summary
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='A', fps=30)
>>> s = Summary(Features(t))
>>> sc = SummaryCollection.from_list([s])
>>> b = sc.make_bin(0, 2)
>>> isinstance(b, SummaryCollection)
True
make_bins ¶
make_bins(numbins)
returns a list of SummaryCollection, one per bin
Examples:
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking import Tracking
>>> from py3r.behaviour.features.features import Features
>>> from py3r.behaviour.summary.summary import Summary
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... t = Tracking.from_dlc(str(p), handle='A', fps=30)
>>> sc = SummaryCollection.from_list([Summary(Features(t))])
>>> bins = sc.make_bins(3)
>>> len(bins) == 3 and all(isinstance(b, SummaryCollection) for b in bins)
True
store ¶
store(results_dict: dict[str, SummaryResult], name: str = None, meta: dict = None, overwrite: bool = False)
Store all SummaryResult objects in a one-layer dict (as returned by batch methods).
Examples:
>>> import pandas as pd, tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
... tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # add a boolean column for summaries
>>> for f in fc.values():
... m = pd.Series([True, False] * (len(f.tracking.data)//2 + 1))[:len(f.tracking.data)]
... m.index = f.tracking.data.index
... f.store(m, 'mask', meta={})
>>> sc = SummaryCollection.from_features_collection(fc)
>>> rd = {h: s.time_true('mask') for h, s in sc.items()}
>>> sc.store(rd, name='t_mask')
>>> all('t_mask' in s.data for s in sc.values())
True
bfa ¶
bfa(column: str, all_states=None, numshuffles: int = 1000, pairs: list[tuple[str, str]] | None = None)
Behaviour Flow Analysis between groups for a grouped SummaryCollection.
Requires the collection to be grouped (via groupby). Computes transition matrices per Summary within each group, then computes Manhattan distances between group means and surrogate distributions via shuffling.
If pairs is provided, only those group pairs are analyzed; otherwise all
unique pairs in self.group_keys are evaluated.
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
... tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # inject simple 2-state labels and tags to build groups
>>> for i, (h, f) in enumerate(fc.items()):
... states = pd.Series(['A','A','B','B','A'] * (len(f.tracking.data)//5 + 1))[:len(f.tracking.data)]
... states.index = f.tracking.data.index
... f.store(states, 'state', meta={})
... f.tracking.add_tag('group', f'G{i+1}')
>>> gfc = fc.groupby('group')
>>> sc = SummaryCollection.from_features_collection(gfc)
>>> # compute all pairs
>>> res = sc.bfa('state', all_states=['A','B'], numshuffles=2)
>>> isinstance(res, dict) and 'observed' in next(iter(res.values()))
True
>>> # compute only specific pair(s)
>>> res2 = sc.bfa('state', all_states=['A','B'], numshuffles=2, pairs=[('G1','G2')])
>>> list(res2.keys()) == ['G1_vs_G2']
True
bfa_stats
staticmethod
¶
bfa_stats(bfa_results: dict[str, dict[str, float]]) -> dict[str, dict[str, float]]
Compute simple statistics (percentile, zscore, right_tail_p) from bfa results.
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
... tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> for i, (h, f) in enumerate(fc.items()):
... states = pd.Series(['A','A','B','B','A'] * (len(f.tracking.data)//5 + 1))[:len(f.tracking.data)]
... states.index = f.tracking.data.index
... f.store(states, 'state', meta={})
... f.tracking.add_tag('group', f'G{i+1}')
>>> sc = SummaryCollection.from_features_collection(fc.groupby('group'))
>>> bfa_out = sc.bfa('state', all_states=['A','B'], numshuffles=2)
>>> stats = SummaryCollection.bfa_stats(bfa_out)
>>> set(next(iter(stats.values())).keys()) >= {'percentile','zscore','right_tail_p'}
True
plot_bfa_results
staticmethod
¶
plot_bfa_results(results: dict[str, dict[str, float]], compares: str | list[str] | None = None, add_stats: bool = True, stats: dict[str, dict[str, float]] | None = None, bins: int = 50, figsize: tuple[float, float] = (4, 3), save_dir: str | None = None, show: bool = True, compare: str | None = None)
Plot one or more BFA result comparisons as separate single-panel figures.
- If
comparesis None and results contain a single comparison, that one is plotted. - If
comparesis a string, only that comparison is plotted. - If
comparesis a list of strings, each comparison is plotted separately. - If
add_statsis True andstatsnot provided, statistics will be computed viaSummaryCollection.bfa_stats(results)and annotated on each plot.
Returns (fig, ax) for a single comparison, or a dict {compare: (fig, ax)}
for multiple.
Examples:
>>> import tempfile, shutil, os
>>> from pathlib import Path
>>> import pandas as pd
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
... tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> # add simple 2-state labels and tags to build two groups
>>> for i, (h, f) in enumerate(fc.items()):
... states = pd.Series(['A','A','B','B','A'] * (len(f.tracking.data)//5 + 1))[:len(f.tracking.data)]
... states.index = f.tracking.data.index
... f.store(states, 'state', meta={})
... f.tracking.add_tag('group', f'G{i+1}')
>>> sc = SummaryCollection.from_features_collection(fc.groupby('group'))
>>> bfa_out = sc.bfa('state', all_states=['A','B'], numshuffles=5)
>>> # plot a single comparison and save it
>>> with tempfile.TemporaryDirectory() as outdir:
... fig, ax = SummaryCollection.plot_bfa_results(bfa_out, compare='G1_vs_G2', show=False, save_dir=outdir)
... os.path.exists(os.path.join(outdir, 'G1_vs_G2.png'))
True
plot_transition_umap ¶
plot_transition_umap(column: str, all_states=None, groups: list[str] | list[list[str]] | None = None, n_neighbors: int = 15, min_dist: float = 0.1, random_state: int = 0, figsize: tuple[float, float] = (4.5, 4), show: bool = True, save_dir: str | None = None)
Plot a simple UMAP embedding of per-subject transition matrices for selected groups.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
|
str
|
Name of the categorical column used to compute transition matrices. |
required |
|
Optional explicit state ordering for transition matrices. |
None
|
|
|
list[str] | list[list[str]] | None
|
|
None
|
|
int
|
UMAP hyperparameters. |
15
|
|
int
|
UMAP hyperparameters. |
15
|
|
int
|
UMAP hyperparameters. |
15
|
|
tuple[float, float]
|
Matplotlib options. |
(4.5, 4)
|
|
tuple[float, float]
|
Matplotlib options. |
(4.5, 4)
|
Returns:
| Type | Description |
|---|---|
(fig, ax): Matplotlib figure and axis.
|
|
Examples:
>>> # xdoctest: +REQUIRES(module: umap)
>>> import tempfile, shutil, os, pandas as pd
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> from py3r.behaviour.features.features_collection import FeaturesCollection
>>> from py3r.behaviour.summary.summary_collection import SummaryCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... _ = shutil.copy(p, d / 'A.csv'); _ = shutil.copy(p, d / 'B.csv')
... tc = TrackingCollection.from_dlc({'A': str(d/'A.csv'), 'B': str(d/'B.csv')}, fps=30)
>>> fc = FeaturesCollection.from_tracking_collection(tc)
>>> for i, (h, f) in enumerate(fc.items()):
... states = pd.Series(['A','A','B','B','A'] * (len(f.tracking.data)//5 + 1))[:len(f.tracking.data)]
... states.index = f.tracking.data.index
... f.store(states, 'state', meta={})
... f.tracking.add_tag('group', f'G{i+1}')
>>> sc = SummaryCollection.from_features_collection(fc.groupby('group'))
>>> with tempfile.TemporaryDirectory() as outdir:
... fig, ax = sc.plot_transition_umap(column='state', all_states=['A','B'], groups=['G1','G2'], show=False, save_dir=outdir)
... os.path.exists(os.path.join(outdir, 'transition_umap.png'))
True
save ¶
save(dirpath: str, *, overwrite: bool = False, data_format: str = 'parquet') -> None
Save this collection to a directory. Preserves grouping and delegates to leaf objects' save(dirpath, data_format, overwrite=True).
Examples:
>>> import tempfile, shutil, os
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
... out = d / 'coll'
... coll.save(str(out), overwrite=True, data_format='csv')
... # collection-level manifest at top-level
... assert os.path.exists(os.path.join(str(out), 'manifest.json'))
... # element-level manifests under elements/<handle>/
... assert os.path.exists(os.path.join(str(out), 'elements', 'A', 'manifest.json'))
count_onset ¶
count_onset(column: str) -> BatchResult
Batch-mode wrapper for Summary.count_onset across the collection.
counts number of times boolean series in the given column changes from False to True, ignoring nan values if first non nan value in series is true, this counts as an onset
See Summary.count_onset for examples.
time_true ¶
time_true(column: str) -> BatchResult
Batch-mode wrapper for Summary.time_true across the collection.
See Summary.time_true for examples.
time_false ¶
time_false(column: str) -> BatchResult
Batch-mode wrapper for Summary.time_false across the collection.
See Summary.time_false for examples.
total_distance ¶
total_distance(point: str, startframe: int | None = None, endframe: int | None = None) -> BatchResult
Batch-mode wrapper for Summary.total_distance across the collection.
See Summary.total_distance for examples.
sum_column ¶
sum_column(column: str) -> BatchResult
Batch-mode wrapper for Summary.sum_column across the collection.
Sum all non-NaN values in a features.data column and return as a SummaryResult.
See Summary.sum_column for examples.
mean_column ¶
mean_column(column: str) -> BatchResult
Batch-mode wrapper for Summary.mean_column across the collection.
Mean of all non-NaN values in a features.data column and return as a SummaryResult.
See Summary.mean_column for examples.
median_column ¶
median_column(column: str) -> BatchResult
Batch-mode wrapper for Summary.median_column across the collection.
Median of all non-NaN values in a features.data column and return as a SummaryResult.
See Summary.median_column for examples.
max_column ¶
max_column(column: str) -> BatchResult
Batch-mode wrapper for Summary.max_column across the collection.
Max of all non-NaN values in a features.data column and return as a SummaryResult.
See Summary.max_column for examples.
min_column ¶
min_column(column: str) -> BatchResult
Batch-mode wrapper for Summary.min_column across the collection.
Min of all non-NaN values in a features.data column and return as a SummaryResult.
See Summary.min_column for examples.
transition_matrix ¶
transition_matrix(column: str, all_states=None) -> BatchResult
Batch-mode wrapper for Summary.transition_matrix across the collection.
See Summary.transition_matrix for examples.
count_state_onsets ¶
count_state_onsets(column: str) -> BatchResult
Batch-mode wrapper for Summary.count_state_onsets across the collection.
counts the number of times a state is entered in a given column
See Summary.count_state_onsets for examples.
time_in_state ¶
time_in_state(column: str) -> BatchResult
Batch-mode wrapper for Summary.time_in_state across the collection.
See Summary.time_in_state for examples.
values ¶
values()
Values iterator (elements or sub-collections).
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> len(list(coll.values())) == 2
True
items ¶
items()
Items iterator (handle, element).
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> sorted([h for h, _ in coll.items()])
['A', 'B']
keys ¶
keys()
Keys iterator (handles or group keys).
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> list(sorted(coll.keys()))
['A', 'B']
groupby ¶
groupby(tags)
Group the collection by one or more existing tag names. Returns a grouped view (this same collection type) whose values are sub-collections keyed by a tuple of tag values in the order provided.
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
... coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G2')
>>> g = coll.groupby('group')
>>> g.is_grouped
True
>>> sorted(g.group_keys)
[('G1',), ('G2',)]
flatten ¶
flatten()
Flatten a MultipleCollection to a flat Collection. If already flat, return self.
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
... coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G1')
... g = coll.groupby('group')
>>> flat = g.flatten()
>>> flat.is_grouped
False
>>> sorted(flat.keys())
['A', 'B']
get_group ¶
get_group(key)
Get a sub-collection by group key from a grouped view.
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
... coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G2')
>>> g = coll.groupby('group')
>>> sub = g.get_group(('G1',))
>>> list(sub.keys())
['A']
regroup ¶
regroup()
Recompute the same grouping using the current tags and the original grouping tag order. If not grouped, returns self.
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
... coll['A'].add_tag('group','G1'); coll['B'].add_tag('group','G1')
... g = coll.groupby('group')
... coll['B'].add_tag('group','G2', overwrite=True) # change tag
>>> g2 = g.regroup()
>>> sorted(g2.group_keys)
[('G1',), ('G2',)]
tags_info ¶
tags_info(*, include_value_counts: bool = False) -> pd.DataFrame
Summarize tag presence across the collection's leaf objects.
Works for flat and grouped collections. If include_value_counts is True,
include a column 'value_counts' with a dict of value->count for each tag.
Returns a pandas.DataFrame with columns:
['tag', 'attached_to', 'missing_from', 'unique_values', ('value_counts')]
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
... coll['A'].add_tag('genotype', 'WT')
... coll['B'].add_tag('timepoint', 'T1')
>>> info = coll.tags_info(include_value_counts=True)
>>> int(info.loc['genotype','attached_to'])
1
>>> int(info.loc['genotype','missing_from'])
1
>>> int(info.loc['genotype','unique_values'])
1
>>> info.loc['genotype','value_counts']
{'WT': 1}
>>> int(info.loc['timepoint','attached_to'])
1
map_leaves ¶
map_leaves(fn)
Apply a function to every leaf element and return a new collection of the same type. Preserves grouping shape and groupby metadata when grouped.
fn: callable(Element) -> ElementLike
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
>>> sub = coll.map_leaves(lambda t: t.loc[0:1])
>>> all(len(t.data) == 2 for t in sub.values())
True
load
classmethod
¶
load(dirpath: str)
Load a collection previously saved with save(). Uses the class's _element_type.load to reconstruct leaves.
Examples:
>>> import tempfile, shutil
>>> from pathlib import Path
>>> from py3r.behaviour.util.docdata import data_path
>>> from py3r.behaviour.tracking.tracking_collection import TrackingCollection
>>> with tempfile.TemporaryDirectory() as d:
... d = Path(d)
... with data_path('py3r.behaviour.tracking._data', 'dlc_single.csv') as p:
... a = d / 'A.csv'; b = d / 'B.csv'
... _ = shutil.copy(p, a); _ = shutil.copy(p, b)
... coll = TrackingCollection.from_dlc({'A': str(a), 'B': str(b)}, fps=30)
... out = d / 'coll'
... coll.save(str(out), overwrite=True, data_format='csv')
... coll2 = TrackingCollection.load(str(out))
>>> list(sorted(coll2.keys()))
['A', 'B']