これは、少し複雑で、おそらくあまり高速ではない を使用したソリューションですitertools.groupby
(これは、連続した同様の値のシーケンスをまとめることで有名です)。
from itertools import groupby
import io
import pandas as pd
# Create your sample dataframe
data = io.StringIO("""\
id A B C
1 34353 917998 x
2 34973 980340 x
3 87365 498097 x
4 98309 486547 x
5 87699 475132
6 52734 4298894
7 8749267 4918066 x
8 89872 18103
9 589892 4818086 y
10 765 4063 y
11 32369 418165 y
12 206 2918137
13 554 3918072
14 1029 1918051 x
15 2349243 4918064
""")
df = pd.read_csv(data, delim_whitespace=True)
# Create a boolean column that encodes which rows you want to keep
df['grouper'] = df['C'].notnull()
# Isolate the indices of the rows you want to keep, grouped by contiguity
groups = [list(map(lambda x: x[1]['id'], list(l)))
for k, l in groupby(df.iterrows(), key=lambda x: x[1]['grouper'])
if not k]
print(groups) # => [[5, 6], [8], [12, 13], [15]]
# Gather the sub-dataframes whose indices match `groups`
dfs = []
for g in groups:
dfs.append(df[['A', 'B']][df['id'].isin(g)])
# Inspect what you got
for df in dfs:
print(df)
出力:
A B
4 87699 475132
5 52734 4298894
A B
7 89872 18103
A B
11 206 2918137
12 554 3918072
A B
14 2349243 4918064