python分类之group by和defaultdict

将下面的数据可以通过’model’字段进行group by

SOME_DATA = [
    {'model': u'Yaris', 'some_value': 11202, 'trim_name': u'3-Door L Manual'},
    {'model': u'Yaris', 'some_value': 19269, 'trim_name': u'3-Door LE Automatic'},
    {'model': u'Corolla', 'some_value': 27119, 'trim_name': u'L Automatic'},
    {'model': u'Corolla', 'some_value': 32262, 'trim_name': u'LE'},
    {'model': u'Corolla', 'some_value': 37976, 'trim_name': u'S Premium'},
    {'model': u'Camry', 'some_value': 39730, 'trim_name': u'LE 4-Cyl'},
    {'model': u'Camry', 'some_value': 45761, 'trim_name': u'XSE 4-Cyl'},
    {'model': u'Yaris', 'some_value': 48412, 'trim_name': u'3-Door L Automatic'},
    {'model': u'Camry', 'some_value': 55423, 'trim_name': u'XLE 4-Cyl'},
    {'model': u'Corolla', 'some_value': 57055, 'trim_name': u'ECO Premium'},
    {'model': u'Corolla', 'some_value': 61296, 'trim_name': u'ECO Plus'},
    {'model': u'Camry', 'some_value': 63660, 'trim_name': u'XSE V6'},
    {'model': u'Yaris', 'some_value': 65570, 'trim_name': u'5-Door LE Automatic'},
    {'model': u'Camry', 'some_value': 67461, 'trim_name': u'XLE V6'},
    {'model': u'Corolla', 'some_value': 73602, 'trim_name': u'S'},
    {'model': u'Yaris', 'some_value': 74158, 'trim_name': u'5-Door SE Manual'},
    {'model': u'Corolla', 'some_value': 74249, 'trim_name': u'LE Plus'},
    {'model': u'Corolla', 'some_value': 78386, 'trim_name': u'ECO'},
    {'model': u'Camry', 'some_value': 82747, 'trim_name': u'SE 4-Cyl'},
    {'model': u'Corolla', 'some_value': 83162, 'trim_name': u'LE Premium'},
    {'model': u'Corolla', 'some_value': 84863, 'trim_name': u'S Plus Manual'},
    {'model': u'Yaris', 'some_value': 90313, 'trim_name': u'5-Door L Automatic'},
    {'model': u'Corolla', 'some_value': 90452, 'trim_name': u'L Manual'},
    {'model': u'Yaris', 'some_value': 93152, 'trim_name': u'5-Door SE Automatic'},
    {'model': u'Corolla', 'some_value': 94973, 'trim_name': u'S Plus CVT'},
]

可以通过collection库中的defaultdict来实现

import collections

grouped = collections.defaultdict(list)
for item in SOME_DATA:
    grouped[item['model']].append(item)

for model, group in grouped.items():
    print
    print model
    pprint(group, width=150)

也可以使用itertools.groupby,这种方式可能是更好的,因为对于比较大的数据集,group by会返回一个迭代器,这也是为什么我打印之前先转为了list

import itertools

def keyfunc(x):
    return x['model']

SOME_DATA = sorted(SOME_DATA, key=keyfunc)
for model, group in itertools.groupby(SOME_DATA, keyfunc):
    print
    print model
    pprint(list(group), width=150)

下面是运行的结果

Camry
[{'model': u'Camry', 'some_value': 36776, 'trim_name': u'SE 4-Cyl'},
 {'model': u'Camry', 'some_value': 56569, 'trim_name': u'LE 4-Cyl'},
 {'model': u'Camry', 'some_value': 57052, 'trim_name': u'XSE 4-Cyl'},
 {'model': u'Camry', 'some_value': 92360, 'trim_name': u'XLE V6'},
 {'model': u'Camry', 'some_value': 92756, 'trim_name': u'XSE V6'},
 {'model': u'Camry', 'some_value': 94413, 'trim_name': u'XLE 4-Cyl'}]

Corolla
[{'model': u'Corolla', 'some_value': 13307, 'trim_name': u'L Automatic'},
 {'model': u'Corolla', 'some_value': 15726, 'trim_name': u'ECO Plus'},
 {'model': u'Corolla', 'some_value': 25579, 'trim_name': u'S'},
 {'model': u'Corolla', 'some_value': 31920, 'trim_name': u'ECO Premium'},
 {'model': u'Corolla', 'some_value': 34480, 'trim_name': u'LE'},
 {'model': u'Corolla', 'some_value': 44958, 'trim_name': u'S Plus Manual'},
 {'model': u'Corolla', 'some_value': 49606, 'trim_name': u'LE Premium'},
 {'model': u'Corolla', 'some_value': 59629, 'trim_name': u'LE Plus'},
 {'model': u'Corolla', 'some_value': 74226, 'trim_name': u'S Plus CVT'},
 {'model': u'Corolla', 'some_value': 75725, 'trim_name': u'L Manual'},
 {'model': u'Corolla', 'some_value': 82382, 'trim_name': u'ECO'},
 {'model': u'Corolla', 'some_value': 95633, 'trim_name': u'S Premium'}]

Yaris
[{'model': u'Yaris', 'some_value': 16789, 'trim_name': u'3-Door L Manual'},
 {'model': u'Yaris', 'some_value': 20349, 'trim_name': u'5-Door LE Automatic'},
 {'model': u'Yaris', 'some_value': 42897, 'trim_name': u'5-Door L Automatic'},
 {'model': u'Yaris', 'some_value': 62045, 'trim_name': u'5-Door SE Automatic'},
 {'model': u'Yaris', 'some_value': 91913, 'trim_name': u'3-Door L Automatic'},
 {'model': u'Yaris', 'some_value': 94218, 'trim_name': u'5-Door SE Manual'},
 {'model': u'Yaris', 'some_value': 97979, 'trim_name': u'3-Door LE Automatic'}]

Written by

说点什么

欢迎讨论

avatar

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据

  Subscribe  
提醒