Here is some data that I want to group by model:
SOME_DATA = [
{'model': u'Yaris', 'some_value': 11202, 'trim_name': u'3-Door L Manual'},
{'model': u'Yaris', 'some_value': 19269, 'trim_name': u'3-Door LE Automatic'},
{'model': u'Corolla', 'some_value': 27119, 'trim_name': u'L Automatic'},
{'model': u'Corolla', 'some_value': 32262, 'trim_name': u'LE'},
{'model': u'Corolla', 'some_value': 37976, 'trim_name': u'S Premium'},
{'model': u'Camry', 'some_value': 39730, 'trim_name': u'LE 4-Cyl'},
{'model': u'Camry', 'some_value': 45761, 'trim_name': u'XSE 4-Cyl'},
{'model': u'Yaris', 'some_value': 48412, 'trim_name': u'3-Door L Automatic'},
{'model': u'Camry', 'some_value': 55423, 'trim_name': u'XLE 4-Cyl'},
{'model': u'Corolla', 'some_value': 57055, 'trim_name': u'ECO Premium'},
{'model': u'Corolla', 'some_value': 61296, 'trim_name': u'ECO Plus'},
{'model': u'Camry', 'some_value': 63660, 'trim_name': u'XSE V6'},
{'model': u'Yaris', 'some_value': 65570, 'trim_name': u'5-Door LE Automatic'},
{'model': u'Camry', 'some_value': 67461, 'trim_name': u'XLE V6'},
{'model': u'Corolla', 'some_value': 73602, 'trim_name': u'S'},
{'model': u'Yaris', 'some_value': 74158, 'trim_name': u'5-Door SE Manual'},
{'model': u'Corolla', 'some_value': 74249, 'trim_name': u'LE Plus'},
{'model': u'Corolla', 'some_value': 78386, 'trim_name': u'ECO'},
{'model': u'Camry', 'some_value': 82747, 'trim_name': u'SE 4-Cyl'},
{'model': u'Corolla', 'some_value': 83162, 'trim_name': u'LE Premium'},
{'model': u'Corolla', 'some_value': 84863, 'trim_name': u'S Plus Manual'},
{'model': u'Yaris', 'some_value': 90313, 'trim_name': u'5-Door L Automatic'},
{'model': u'Corolla', 'some_value': 90452, 'trim_name': u'L Manual'},
{'model': u'Yaris', 'some_value': 93152, 'trim_name': u'5-Door SE Automatic'},
{'model': u'Corolla', 'some_value': 94973, 'trim_name': u'S Plus CVT'},
]
This can be done using defaultdict
from the collections module.
import collections
grouped = collections.defaultdict(list)
for item in SOME_DATA:
grouped[item['model']].append(item)
for model, group in grouped.items():
print
print model
pprint(group, width=150)
Here are the results:
Yaris
[{'model': u'Yaris', 'some_value': 27065, 'trim_name': u'5-Door L Automatic'},
{'model': u'Yaris', 'some_value': 32757, 'trim_name': u'5-Door SE Automatic'},
{'model': u'Yaris', 'some_value': 57344, 'trim_name': u'3-Door L Manual'},
{'model': u'Yaris', 'some_value': 64002, 'trim_name': u'5-Door SE Manual'},
{'model': u'Yaris', 'some_value': 77974, 'trim_name': u'3-Door L Automatic'},
{'model': u'Yaris', 'some_value': 92658, 'trim_name': u'3-Door LE Automatic'},
{'model': u'Yaris', 'some_value': 98769, 'trim_name': u'5-Door LE Automatic'}]
Camry
[{'model': u'Camry', 'some_value': 30247, 'trim_name': u'XSE 4-Cyl'},
{'model': u'Camry', 'some_value': 33809, 'trim_name': u'XSE V6'},
{'model': u'Camry', 'some_value': 65637, 'trim_name': u'LE 4-Cyl'},
{'model': u'Camry', 'some_value': 67329, 'trim_name': u'SE 4-Cyl'},
{'model': u'Camry', 'some_value': 76269, 'trim_name': u'XLE 4-Cyl'},
{'model': u'Camry', 'some_value': 87438, 'trim_name': u'XLE V6'}]
Corolla
[{'model': u'Corolla', 'some_value': 11239, 'trim_name': u'S'},
{'model': u'Corolla', 'some_value': 27356, 'trim_name': u'S Plus Manual'},
{'model': u'Corolla', 'some_value': 44792, 'trim_name': u'L Manual'},
{'model': u'Corolla', 'some_value': 56252, 'trim_name': u'ECO Premium'},
{'model': u'Corolla', 'some_value': 78570, 'trim_name': u'S Plus CVT'},
{'model': u'Corolla', 'some_value': 78964, 'trim_name': u'LE Premium'},
{'model': u'Corolla', 'some_value': 82116, 'trim_name': u'ECO'},
{'model': u'Corolla', 'some_value': 85467, 'trim_name': u'S Premium'},
{'model': u'Corolla', 'some_value': 87099, 'trim_name': u'L Automatic'},
{'model': u'Corolla', 'some_value': 91974, 'trim_name': u'LE Plus'},
{'model': u'Corolla', 'some_value': 94862, 'trim_name': u'LE'},
{'model': u'Corolla', 'some_value': 97625, 'trim_name': u'ECO Plus'}]
This can also be done using itertools.groupby
. This method is probably better when working with large datasets because groupby
returns the group as an iterator. (This is the reason I convert it to a list before printing.)
import itertools
def keyfunc(x):
return x['model']
SOME_DATA = sorted(SOME_DATA, key=keyfunc)
for model, group in itertools.groupby(SOME_DATA, keyfunc):
print
print model
pprint(list(group), width=150)
Here are the results:
Camry
[{'model': u'Camry', 'some_value': 36776, 'trim_name': u'SE 4-Cyl'},
{'model': u'Camry', 'some_value': 56569, 'trim_name': u'LE 4-Cyl'},
{'model': u'Camry', 'some_value': 57052, 'trim_name': u'XSE 4-Cyl'},
{'model': u'Camry', 'some_value': 92360, 'trim_name': u'XLE V6'},
{'model': u'Camry', 'some_value': 92756, 'trim_name': u'XSE V6'},
{'model': u'Camry', 'some_value': 94413, 'trim_name': u'XLE 4-Cyl'}]
Corolla
[{'model': u'Corolla', 'some_value': 13307, 'trim_name': u'L Automatic'},
{'model': u'Corolla', 'some_value': 15726, 'trim_name': u'ECO Plus'},
{'model': u'Corolla', 'some_value': 25579, 'trim_name': u'S'},
{'model': u'Corolla', 'some_value': 31920, 'trim_name': u'ECO Premium'},
{'model': u'Corolla', 'some_value': 34480, 'trim_name': u'LE'},
{'model': u'Corolla', 'some_value': 44958, 'trim_name': u'S Plus Manual'},
{'model': u'Corolla', 'some_value': 49606, 'trim_name': u'LE Premium'},
{'model': u'Corolla', 'some_value': 59629, 'trim_name': u'LE Plus'},
{'model': u'Corolla', 'some_value': 74226, 'trim_name': u'S Plus CVT'},
{'model': u'Corolla', 'some_value': 75725, 'trim_name': u'L Manual'},
{'model': u'Corolla', 'some_value': 82382, 'trim_name': u'ECO'},
{'model': u'Corolla', 'some_value': 95633, 'trim_name': u'S Premium'}]
Yaris
[{'model': u'Yaris', 'some_value': 16789, 'trim_name': u'3-Door L Manual'},
{'model': u'Yaris', 'some_value': 20349, 'trim_name': u'5-Door LE Automatic'},
{'model': u'Yaris', 'some_value': 42897, 'trim_name': u'5-Door L Automatic'},
{'model': u'Yaris', 'some_value': 62045, 'trim_name': u'5-Door SE Automatic'},
{'model': u'Yaris', 'some_value': 91913, 'trim_name': u'3-Door L Automatic'},
{'model': u'Yaris', 'some_value': 94218, 'trim_name': u'5-Door SE Manual'},
{'model': u'Yaris', 'some_value': 97979, 'trim_name': u'3-Door LE Automatic'}]