a": ["red"] * 2 + ["blue"] * 2 + ["black"] * 2, "b": range(6)} ... ) >>> df a b 0 red 0 1 red 1 2 blue 2 3 blue 3 4 black 4 5 black 5 Select one row at random for each distinct value in column a. The `random_state` argument can be used to guarantee reproducibility: >>> df.groupby("a").sample(n=1, random_state=1) a b 4 black 4 2 blue 2 1 red 1 Set `frac` to sample fixed proportions rather than counts: >>> df.groupby("a")["b"].sample(frac=0.5, random_state=2) 5 5 2 2 0 0 Name: b, dtype: int64 Control sample probabilities within groups by setting weights: >>> df.groupby("a").sample( ... n=1, ... weights=[1, 1, 1, 0, 0, 1], ... random_state=1, ... ) a b 5 black 5 2 blue 2 0 red 0 NrČ