Pandas: Média, Mínimo, Máximo e agrupamento

# Autor: Sidon
import pandas as pd
import random

# Random
rnds=[]
for i in range(20):
    rnds.append({'id': str(i), 'price': random.randrange(200,8500),
 'zipcode': str(random.randrange(9700,9710))})

# Criando o df
prices = pd.DataFrame(rnds)

prices id price zipcode 0 0 4939 9706 1 1 314 9708 2 2 4554 9705 3 3 5930 9700 4 4 8306 9706 5 5 717 9708 6 6 7105 9705 7 7 6136 9705 8 8 6882 9704 9 9 4364 9708 10 10 2384 9704 11 11 2003 9704 12 12 8119 9705 13 13 354 9707 14 14 3747 9701 15 15 1838 9709 16 16 3287 9706 17 17 7586 9709 18 18 3228 9702 19 19 4483 9701
# Obtenfo o maximo prices.ix[prices['price'].idxmax()]
id 4 price 8306 zipcode 9706 Name: 4, dtype: object
# Agrupando os máximos prices.groupby('zipcode', sort=False)['price'].max()
zipcode 9706 8306 9708 4364 9705 8119 9700 5930 9704 6882 9707 354 9701 4483 9709 7586 9702 3228 Name: price, dtype: int64
# Agrupando pelas médias prices.groupby('zipcode', sort=False)['price'].mean()
zipcode 9706 5510.666667 9708 1798.333333 9705 6478.500000 9700 5930.000000 9704 3756.333333 9707 354.000000 9701 4115.000000 9709 4712.000000 9702 3228.000000 Name: price, dtype: float64
# Agrupando pelos minimos prices.groupby('zipcode', sort=False)['price'].min()
zipcode 9706 3287 9708 314 9705 4554 9700 5930 9704 2003 9707 354 9701 3747 9709 1838 9702 3228 Name: price, dtype: int64

Comentários