U
    +if                     @  sf  d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZmZ ddlZddlZddlmZ dd	lmZmZmZmZ dd
lm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1m2Z2 ddl3m4Z4m5Z5m6Z6m7Z7 ddl8m9Z9 ddl:m;  m<Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE ddlFmGZGmHZHmIZImJZJmKZK ddlLmMZM ddlNmOZOmPZPmQZQ ddlRmSZS ddlTmUZU ddlVmWZW eeXede
f f ZYedZZG dd deZ[dd d!d"d#Z\d d$d%d&d'Z]e]eSeEj^G d(d) d)eGeS Z_e]eAeEj`G d*d+ d+eGeA Zad,d,d-d,d.d/d0ZbdS )1z
Define the SeriesGroupBy and DataFrameGroupBy
classes that hold the groupby interfaces (and some implementations).

These are user facing as the result of the ``df.groupby(...)`` operations,
which here returns a DataFrameGroupBy object.
    )annotations)abc)partial)dedent)
AnyCallableHashableIterableMapping
NamedTupleSequenceTypeVarUnioncastN)	reduction)	ArrayLikeManager	Manager2DSingleManager)AppenderSubstitutiondoc)find_stack_level)ensure_int64is_boolis_categorical_dtypeis_dict_likeis_integer_dtypeis_interval_dtype	is_scalar)isnanotna)
algorithmsnanops)GroupByApplymaybe_mangle_lambdasreconstruct_funcvalidate_func_kwargs)SpecificationError)!create_series_with_explicit_dtype)	DataFrame)NDFrame)base)GroupBy_agg_template_apply_docs_transform_template)warn_dropping_nuisance_columns_deprecated)get_grouper)Index
MultiIndexall_indexes_same)Series)maybe_use_numba)boxplot_frame_groupby.ScalarResultc                   @  s   e Zd ZU ded< ded< dS )NamedAggr   column	AggScalarZaggfuncN)__name__
__module____qualname____annotations__ rA   rA   N/home/mars/bis/venv/lib/python3.8/site-packages/pandas/core/groupby/generic.pyr:   c   s   
r:   strztype[DataFrame | Series])nameklassc                   s0    fdd}t | }|jpd|_ |_t|S )z
    Create a property for a GroupBy subclass to dispatch to DataFrame/Series.

    Parameters
    ----------
    name : str
    klass : {DataFrame, Series}

    Returns
    -------
    property
    c                   s
   |   S N)Z_make_wrapperselfrD   rA   rB   propv   s    zgenerate_property.<locals>.prop )getattr__doc__r=   property)rD   rE   rJ   Zparent_methodrA   rI   rB   generate_propertyh   s
    
rO   zfrozenset[str])rE   	allowlistc                   s    fdd}|S )a  
    Create GroupBy member defs for DataFrame/Series names in a allowlist.

    Parameters
    ----------
    klass : DataFrame or Series class
        class where members are defined.
    allowlist : frozenset[str]
        Set of names of klass methods to be constructed

    Returns
    -------
    class decorator

    Notes
    -----
    Since we don't want to override methods explicitly defined in the
    base class, any such name is skipped.
    c                   s0    D ]&}t | |rqt|}t| || q| S rF   )hasattrrO   setattr)clsrD   rJ   rP   rE   rA   rB   pinner   s    

z*pin_allowlisted_properties.<locals>.pinnerrA   )rE   rP   rU   rA   rT   rB   pin_allowlisted_properties   s    rV   c                      s  e Zd ZejZdddddZdddd	Zd
dddZe	dZ
eed jded d fddZeee
dddKdddddZeZddddZdddddZdLdd!d"d#d$d%d&Zd'd( Zedd)eedddd*d+ZdMd.d"d/d0d1d2Zd3dd4d5d6Zd"dd7d8ZdNd"d9d:d;ZdOd"dd<d=d>Zeej  fd?d@Z dPd"d"d"d"dAdBdCZ!eej"dQd/d.dFdGdHZ"eej#dRd/d.dFdIdJZ#  Z$S )SSeriesGroupByr   r6   mgrreturnc                 C  sF   |j dkrtt|}|}ntt|}|d}| jj|| jjd}|S )N   r   rI   )ndimr   r   r   igetobj_constructorrD   )rH   rY   singleserrA   rA   rB   _wrap_agged_manager   s    



z!SeriesGroupBy._wrap_agged_managerr   rZ   c                 C  s   | j }|j}|S rF   )_obj_with_exclusions_mgr)rH   ra   r`   rA   rA   rB   _get_data_to_aggregate   s    z$SeriesGroupBy._get_data_to_aggregateIterable[Series]c                 c  s   | j V  d S rF   )_selected_objrG   rA   rA   rB   _iterate_slices   s    zSeriesGroupBy._iterate_slicesa  
    Examples
    --------
    >>> s = pd.Series([1, 2, 3, 4])

    >>> s
    0    1
    1    2
    2    3
    3    4
    dtype: int64

    >>> s.groupby([1, 1, 2, 2]).min()
    1    1
    2    3
    dtype: int64

    >>> s.groupby([1, 1, 2, 2]).agg('min')
    1    1
    2    3
    dtype: int64

    >>> s.groupby([1, 1, 2, 2]).agg(['min', 'max'])
       min  max
    1    1    2
    2    3    4

    The output column names can be controlled by passing
    the desired column names and aggregations as keyword arguments.

    >>> s.groupby([1, 1, 2, 2]).agg(
    ...     minimum='min',
    ...     maximum='max',
    ... )
       minimum  maximum
    1        1        2
    2        3        4

    .. versionchanged:: 1.3.0

        The resulting dtype will reflect the return value of the aggregating function.

    >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min())
    1    1.0
    2    3.0
    dtype: float64
    templateZseriesZseries_examples)inputexamplesc                   s   t  j|f||S rF   )superapply)rH   funcargskwargs	__class__rA   rB   rn      s    zSeriesGroupBy.applyrl   rE   Nengineengine_kwargsc             	   O  sb  t |rb|   | j}W 5 Q R X | j| |f|d|i|}| jj}| jj|	 ||j
dS |d k}	d }
|	rt|\}
}i }t|trt| |||S t|tjrt|}| |}|	r|
|_|S t|}|r|s|st| | S | jjdkr
| j|f||S z| j|f||W S  tk
r\   | j|f||}| jj}t||td Y S X d S )Nrw   indexrD   r[   )ry   dtype_if_empty)r7   _group_selection_contextrh   _aggregate_with_numbaZto_framegrouperresult_indexr^   r_   ZravelrD   r'   
isinstancerC   rL   r   r	   r%   _aggregate_multiple_funcscolumnscomZget_cython_funcnkeys_python_agg_generalKeyError_aggregate_namedr)   object)rH   ro   rv   rw   rp   rq   dataresultry   
relabelingr   retZcyfuncrA   rA   rB   	aggregate   sV    
 


  zSeriesGroupBy.aggregater*   c                 C  s.  t |trtdnZtdd |D rBdd |D }tt| }n,g }|D ]}|t|p^| qJt||}i }t	|D ](\}\}}t
j||d}| |||< qztdd | D rdd	lm}	 |	| d
dd | D d}
|
S dd | D }| jj|d d}tdd |D |_| |}|S )Nznested renamer is not supportedc                 s  s   | ]}t |ttfV  qd S rF   r   tuplelist.0xrA   rA   rB   	<genexpr>5  s     z:SeriesGroupBy._aggregate_multiple_funcs.<locals>.<genexpr>c                 S  s&   g | ]}t |ttfs||fn|qS rA   r   r   rA   rA   rB   
<listcomp>6  s     z;SeriesGroupBy._aggregate_multiple_funcs.<locals>.<listcomp>)labelpositionc                 s  s   | ]}t |tV  qd S rF   )r   r*   r   rA   rA   rB   r   H  s     r   concatr[   c                 S  s   g | ]
}|j qS rA   r   r   keyrA   rA   rB   r   L  s     )axiskeysc                 S  s   i | ]\}}|j |qS rA   r   r   r   valrA   rA   rB   
<dictcomp>P  s      z;SeriesGroupBy._aggregate_multiple_funcs.<locals>.<dictcomp>ry   c                 s  s   | ]}|j V  qd S rF   r   r   rA   rA   rB   r   R  s     )r   dictr(   anynextzipappendr   Zget_callable_name	enumerater,   Z	OutputKeyr   valuespandasr   r   itemsr^   _constructor_expanddimr3   r   _reindex_output)rH   argr   fresultsidxrD   ro   r   r   res_dfindexed_outputoutputrA   rA   rB   r   -  s4    


  
z'SeriesGroupBy._aggregate_multiple_funcs"Mapping[base.OutputKey, ArrayLike]r   rZ   c                 C  s:   t |dksttt| }| j|}| jj|_|S )zN
        Wrap the dict result of a GroupBy aggregation into a Series.
        r[   )lenAssertionErrorr   iterr   r^   r_   rD   )rH   r   r   r   rA   rA   rB   _indexed_output_to_ndframeW  s
    
z(SeriesGroupBy._indexed_output_to_ndframeFz	list[Any]boolDataFrame | Series)r   r   not_indexed_samerZ   c                 C  s   t |dkr*| jjg | jj| jj|jdS |dk	s6tt|d t	r| jj}| jj
||d}| |}|j| jd}| jj|_|S t|d ttfr| j||dS | jj|| jj| jjd}| |S dS )a  
        Wrap the output of SeriesGroupBy.apply into the expected result.

        Parameters
        ----------
        data : Series
            Input data for groupby operation.
        values : List[Any]
            Applied output for each group.
        not_indexed_same : bool, default False
            Whether the applied outputs are not indexed the same as the group axes.

        Returns
        -------
        DataFrame or Series
        r   )rD   ry   dtypeNr   dropnar   )r   ry   rD   )r   r^   r_   rD   r}   r~   r   r   r   r   r   r   stackobservedr6   r*   _concat_objects)rH   r   r   r   ry   r   Zres_serr   rA   rA   rB   _wrap_applied_outputc  s.    

  z"SeriesGroupBy._wrap_applied_outputc           	      O  s`   i }d}| D ]N\}}t |d| ||f||}t|}|sRt||j d}|||< q|S )NFrD   T)r   __setattr__libreductionZextract_resultZcheck_result_arrayr   )	rH   ro   rp   rq   r   ZinitializedrD   groupr   rA   rA   rB   r     s    

zSeriesGroupBy._aggregate_namedrE   c                O  s   | j |f|||d|S Nru   Z
_transformrH   ro   rv   rw   rp   rq   rA   rA   rB   	transform  s     zSeriesGroupBy.transformTr   rC   int)hownumeric_onlyr   c              
   K  s   |dkst | j}z| jjd|j||f|}W n: tk
rh } zt| d|j d|W 5 d }~X Y nX |j|| j	j
|jdS )Nr   r   z is not supported for z dtyperx   )r   rh   r}   _cython_operation_valuesNotImplementedError	TypeErrorr   r_   r^   ry   rD   )rH   r   r   r   rq   r^   r   errrA   rA   rB   _cython_transform  s       *zSeriesGroupBy._cython_transformr   )ro   rZ   c                 O  s   t |stt| j}g }| D ]:\}}t|d| ||f||}||||jd q|r~ddlm	}	 |	|}
| 
|
}n| jjtjd}| jj|_|S )z2
        Transform with a callable func`.
        rD   r   r   r   r   )callabler   typer^   r   r   r   ry   pandas.core.reshape.concatr   _set_result_index_orderedr_   npZfloat64rD   )rH   ro   rp   rq   rE   r   rD   r   resr   concatenatedr   rA   rA   rB   _transform_general  s    

z SeriesGroupBy._transform_generalc                 C  s   dS )NTrA   rH   r   rA   rA   rB   _can_use_transform_fast  s    z%SeriesGroupBy._can_use_transform_fastr   c              
     s   t tr fddn fddddfddzfdd	D }W n0 ttfk
r } ztd
|W 5 d}~X Y nX ||}|S )ao  
        Return a copy of a Series excluding elements from groups that
        do not satisfy the boolean criterion specified by func.

        Parameters
        ----------
        func : function
            To apply to each group. Should return True or False.
        dropna : Drop groups that do not pass the filter. True by default;
            if False, groups that evaluate False are filled with NaNs.

        Notes
        -----
        Functions that mutate the passed object can produce unexpected
        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
        for more details.

        Examples
        --------
        >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
        ...                           'foo', 'bar'],
        ...                    'B' : [1, 2, 3, 4, 5, 6],
        ...                    'C' : [2.0, 5., 8., 1., 2., 9.]})
        >>> grouped = df.groupby('A')
        >>> df.groupby('A').B.filter(lambda x: x.mean() > 3.)
        1    2
        3    4
        5    6
        Name: B, dtype: int64

        Returns
        -------
        filtered : Series
        c                   s   t |  S rF   rL   r   rp   ro   rq   rA   rB   <lambda>      z&SeriesGroupBy.filter.<locals>.<lambda>c                   s   | f S rF   rA   r   r   rA   rB   r     r   r   rc   c                   s    | }|ot |S rF   )r!   )r   b)wrapperrA   rB   true_and_notna  s    z,SeriesGroupBy.filter.<locals>.true_and_notnac                   s"   g | ]\}}|r  |qS rA   )
_get_index)r   rD   r   )rH   r   rA   rB   r     s     z(SeriesGroupBy.filter.<locals>.<listcomp>z'the filter must return a boolean resultN)r   rC   
ValueErrorr   _apply_filter)rH   ro   r   rp   rq   indicesr   filteredrA   )rp   ro   rq   rH   r   r   rB   filter  s    #

zSeriesGroupBy.filterr   rZ   c              	   C  s  | j j\}}}| jj}tj|dd\}}t||f}|| }|| }tjddt	|dd |dd kd  f }tjd|dd |dd kf }|dk}	|rd||< d||	< n&d||	tjd|	dd f @ < d||< tj
||jddd}
t|r.|d dkr(|
dd }|t| }n|
}n|
dd }| j j}t|t|krztjt||
jd	| }}
|
||| < | jj||| jjd
}| j|ddS )z
        Return number of unique elements in the group.

        Returns
        -------
        Series
            Number of unique values within each group.
        Fsortr   r[   NZint64copyr   rx   )
fill_value)r}   
group_infor^   r   r"   	factorizer   lexsortr_nonzeroaddreduceatastyper   Zflatnonzeror~   zerosr   r_   rD   r   )rH   r   ids_r   codessorterr   incmaskoutr   rir   rA   rA   rB   nunique  s6    	0"

zSeriesGroupBy.nuniquec                   s   t  jf |S rF   )rm   describe)rH   rq   rr   rA   rB   r	  J  s    zSeriesGroupBy.describe)	normalizer   	ascendingr   c           !        s  ddl m} ddlm} jj\}}	}	jj}
 	fdd}d k	r\t	sl| S nt
|
jrl| S |dk| |
  }}
d krtj|
dd\}}d	d
 }n8|t|
dd}|jj}|j|jjd|jd}dd
 }t|jrt|j|j|f}nt||f}|| ||  }}dt|dd  |d d kd  }tjd|f }t|sf|}||tdd ||td dk}tjd|f }t|
s|}d||< tttj|df d }ttjtj ||djj!}fdd|D |||g }dd jj"D |g }jj#jj$g }|rn|d dk% rRd}n| fdd|D  }}r|&d}ttj|t|f }|r||dk }tj'||d | }n|}|| }	r4d kr4|r||  n|| }t r|n| |f}|| |d |  }|d< d k	r~tj(t|dd|d d D ],}tjd|dd  |d d kf O q\) t|d  }tt*|t+t*|g}, d |d g}|||ddd\}	}t-|dk|| d}	r@t r|n| |d f}|| |d |  }|d< dddfddfdd|d d D }|.|d  t/|||dd} t0|jrt1|}jj2|| jj$d S )!Nr   )get_join_indexers)cutc                     s   j tj dS )N)r
  r   r  bins)rn   r6   value_countsrA   )r  r  r
  rH   r   rA   rB   apply_series_value_counts]  s    z=SeriesGroupBy.value_counts.<locals>.apply_series_value_countsr   Tr   c                 S  s   | | S rF   rA   labr  rA   rA   rB   r   u  r   z,SeriesGroupBy.value_counts.<locals>.<lambda>)Zinclude_lowest)Z
allow_fillr   c                 S  s   | | j jd S )Nr   )Z_multiindexr  r  rA   rA   rB   r     r   r[   )Zrepeatsc                   s   g | ]} |qS rA   rA   r   level_codes)reprA   rB   r     s     z.SeriesGroupBy.value_counts.<locals>.<listcomp>c                 S  s   g | ]
}|j qS rA   )Zgroup_index)r   ZpingrA   rA   rB   r     s     Fc                   s   g | ]}|  qS rA   rA   r  )r  rA   rB   r     s     floatr   r   left)r   r   z
np.ndarray)	lev_codesrZ   c                   s   t |   S rF   )r   repeat)r  )diffnbinrA   rB   build_codes  s    z/SeriesGroupBy.value_counts.<locals>.build_codesc                   s   g | ]} |qS rA   rA   )r   r  )r  rA   rB   r     s     )levelsr  namesverify_integrityrx   )3Zpandas.core.reshape.merger  Zpandas.core.reshape.tiler  r}   r   r^   r   r   iterabler   r   r"   r   r6   cat
categoriestaker  Z	_na_valuer   r   r  rightr   r   r   slicer  r   r  r   r   Zreconstructed_codes	groupingsr  rD   allr   atr   sumZarangeZtileZcumsumwherer   r4   r   r   r_   )!rH   r
  r   r  r  r   r  r  r   r  r   r  r  levZllabr  Z	idchangesr   Zlchangesr  r  r  r  r  dmaccr!  r  Zncatr  r$  mirA   )
r  r  r  r  r  r  r
  r  rH   r   rB   r  N  s    		


&
 



*$zSeriesGroupBy.value_counts   firstnkeepc                 C  s*   t tj||d}| j}| j||dd}|S Nr2  Tr   )r   r6   nlargestrd   _python_apply_generalrH   r3  r4  r   r   r   rA   rA   rB   r6    s    zSeriesGroupBy.nlargestc                 C  s*   t tj||d}| j}| j||dd}|S r5  )r   r6   	nsmallestrd   r7  r8  rA   rA   rB   r9    s    zSeriesGroupBy.nsmallest)N)F)Tr   )T)T)FTFNT)r0  r1  )r0  r1  )%r=   r>   r?   r,   series_apply_allowlist_apply_allowlistrb   rf   ri   r   _agg_examples_docr   r/   formatrn   r   r.   r   aggr   r   r   r   r   r0   r   r   r   r   r   r  r6   r	  r  r6  r9  __classcell__rA   rA   rr   rB   rW      sZ   2 4* 3   75      rW   c                      s  e Zd ZejZedZee	edddYdddddZ
e
Zdd	d
dZdd	ddZdd	ddZdZddddddZddddddZd[dddddd d!Zd"d# Zedd$eedddd%d&Zdd	d'd(Zd)d* Zd+d+dd,d-d.Zddd/d0d1Zd\d2d3Zd4d	 fd5d6Zd]dd7d8d9Zd:d	d;d<Zdd=d>d?d@ZdAddBdCdDZ d:ddEdFdGZ!ddHdIdJZ"ddd/dKdLZ#d^dddMdNdOZ$ee%j&j'd_ddPdQdRZ&ee%j(j'd`ddPdSdTZ(e)Z*dadUddddddVdWdXZ+  Z,S )bDataFrameGroupBya  
    Examples
    --------
    >>> df = pd.DataFrame(
    ...     {
    ...         "A": [1, 1, 2, 2],
    ...         "B": [1, 2, 3, 4],
    ...         "C": [0.362838, 0.227877, 1.267767, -0.562860],
    ...     }
    ... )

    >>> df
       A  B         C
    0  1  1  0.362838
    1  1  2  0.227877
    2  2  3  1.267767
    3  2  4 -0.562860

    The aggregation is for each column.

    >>> df.groupby('A').agg('min')
       B         C
    A
    1  1  0.227877
    2  3 -0.562860

    Multiple aggregations

    >>> df.groupby('A').agg(['min', 'max'])
        B             C
      min max       min       max
    A
    1   1   2  0.227877  0.362838
    2   3   4 -0.562860  1.267767

    Select a column for aggregation

    >>> df.groupby('A').B.agg(['min', 'max'])
       min  max
    A
    1    1    2
    2    3    4

    Different aggregations per column

    >>> df.groupby('A').agg({'B': ['min', 'max'], 'C': 'sum'})
        B             C
      min max       sum
    A
    1   1   2  0.590715
    2   3   4  0.704907

    To control the output names with different aggregations per column,
    pandas supports "named aggregation"

    >>> df.groupby("A").agg(
    ...     b_min=pd.NamedAgg(column="B", aggfunc="min"),
    ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
       b_min     c_sum
    A
    1      1  0.590715
    2      3  0.704907

    - The keywords are the *output* column names
    - The values are tuples whose first element is the column to select
      and the second element is the aggregation to apply to that column.
      Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
      ``['column', 'aggfunc']`` to make it clearer what the arguments are.
      As usual, the aggregation can be a callable or a string alias.

    See :ref:`groupby.aggregate.named` for more.

    .. versionchanged:: 1.3.0

        The resulting dtype will reflect the return value of the aggregating function.

    >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
          B
    A
    1   1.0
    2   3.0
    r*   rt   Nru   c             
   O  s  t |rZ|   | j}W 5 Q R X | j||f|d|i|}| jj}| jj|||jdS t	|f|\}	}}
}t
|}t| |||}| }t|s|d k	r|S |	r|d k	r|jd d |f }|
|_|d kr| jjdkr| j|f||S |s|r
| j|f||}n| jdkr$| |}|S t| |gdi d}z| }W n> tk
r } zdt|krf | |}W 5 d }~X Y nRX | j}t|tr| jj |_n.|jj|jjtt|jjd |jd|_| js|  | t!tt"||_#|S )	Nrw   ry   r   r[   rA   )rp   rq   z
no results)levelr   )$r7   r{   rh   r|   r}   r~   r^   r_   r   r&   r%   r$   r>  r   ilocr   r   _aggregate_framer   r   rC   r   r6   rd   r   
_set_namesr  r   rangenlevels	droplevelas_index_insert_inaxis_grouper_inplacer3   r   ry   )rH   ro   rv   rw   rp   rq   r   r   ry   r   r   orderopZgbar   ZsobjrA   rA   rB   r   U  sf    
 


 
zDataFrameGroupBy.aggregaterg   rc   c                 c  s\   | j }| jdkr|j}t|tr4|j| jkr4|V  n$| D ]\}}|| jkrPq<|V  q<d S Nr[   )rh   r   Tr   r6   rD   
exclusionsr   )rH   r^   r   r   rA   rA   rB   ri     s    

z DataFrameGroupBy._iterate_slicesc                 O  s   | j jdkrtd| j}i }| jdkrP| D ] \}}||f||}|||< q,n2| jD ]*}| j||d}	||	f||}|||< qV| j j}
|jd| j  }| j	j
|||
d}| jdkr|j}|S )Nr[   zNumber of keys must be 1r   r^   rA  )r}   r   r   rd   r   r   Z	get_groupr~   axesr^   r_   rN  )rH   ro   rp   rq   r^   r   rD   r   ZfresZgrp_dfr~   Zother_axr  rA   rA   rB   rD    s$    



z!DataFrameGroupBy._aggregate_framec           
      O  sT   | j }i }t| |D ]"\}\}}|j|f||||< q| j|}	|j|	_|	S rF   )rd   r   _iterate_column_groupbysr   r^   r_   r   )
rH   ro   rp   rq   r^   r   iitemsgbr   rA   rA   rB   _aggregate_item_by_item  s    z(DataFrameGroupBy._aggregate_item_by_itemFr   r   )r   r   r   c                 C  s   t |dkr6| jj| jj|jd}|j|jdd}|S tt	j
| d }|d krX| j S t|trp| j||dS | jr~| jjnd }t|tjtfr| jj||| jdS t|ts| jr| jj||dS | jj|| jgd}| | |S n| ||||S d S )	Nr   rA  Fr   r   rx   r   )r   )r   r^   r_   r}   r~   r   r   Zdtypesr   r   Znot_noner   r*   r   rI  r   Zndarrayr3   _constructor_slicedZ
_selectionr6   rJ  _wrap_applied_output_series)rH   r   r   r   r   first_not_none	key_indexrA   rA   rB   r     s>     

  

   z%DataFrameGroupBy._wrap_applied_outputzlist[Series]r   )r   r   rZ   c                   s\  |  }tf dti|  fdd|D }tdd |D }| jr| j| j}t|dkof|j	dk}|rz| j
||dS |rdd	lm}	 |	|S |s| j
|d
dS tdd |D }
| jdkr
|}|j }|jd krdd |D }t|dkrt|d |_n|j}|}|
j}
|
jtkr.|
 }
| jj|
||d}| jsR| | | |S )Nrz   c                   s   g | ]}|d k	r|n qS rF   rA   r   backuprA   rB   r     s     z@DataFrameGroupBy._wrap_applied_output_series.<locals>.<listcomp>c                 s  s   | ]}|j V  qd S rF   r   r   rA   rA   rB   r     s     z?DataFrameGroupBy._wrap_applied_output_series.<locals>.<genexpr>r[   r   r   r   Tc                 S  s   g | ]}t |qS rA   )r   Zasarrayr   vrA   rA   rB   r   8  s     c                 S  s   h | ]
}|j qS rA   rI   r]  rA   rA   rB   	<setcomp>?  s     z?DataFrameGroupBy._wrap_applied_output_series.<locals>.<setcomp>rA  )Z_construct_axes_dictr)   r   r5   squeezerh   	_get_axisr   r   rG  r   r   r   r   Zvstackry   r   rD   r   rN  r   tolistr^   r_   rI  rJ  r   )rH   r   r   rY  rZ  rq   Zall_indexed_sameZapplied_indexZsingular_seriesr   Zstacked_valuesry   r   r  r   rA   r[  rB   rX    s>    	

z,DataFrameGroupBy._wrap_applied_output_seriesTr   rC   r   )r   r   r   rZ   c           	        s   |dkst  }|r$|jdd}ddd fdd}|j|dd	}|d
|jd
  t|t|k rztt  j	
|}jd
kr|j}|S )Nr   Fr   r   )bvaluesrZ   c                   s   j jd|  dfS )Nr   r[   )r}   r   )rc  r   rq   rH   rA   rB   arr_func`  s       z4DataFrameGroupBy._cython_transform.<locals>.arr_funcT)Zignore_failuresr[   )r   rf   Zget_numeric_dataZgrouped_reduceset_axisrQ  r   r1   r   r^   r_   r   rN  )	rH   r   r   r   rq   rY   re  Zres_mgrr   rA   rd  rB   r   Q  s    
z"DataFrameGroupBy._cython_transformc              
   O  s  ddl m} g }| j}| jj|| jd}| j|f||\}}	zt|\}
}W n tk
rb   Y nX t	
|d|
 z| ||	|\}}W nN tk
r   | || Y S  tk
r } zd}t||W 5 d }~X Y nX |jdkrt| j||}|| |D ]H\}
}|jdkr qt	
|d|
 ||}t| j||}||  q| jdkrZ|jn|j}| jdkrpdnd}||| jdd}|j||dd	}| |S )
Nr   r   r   rD   z3transform must return a scalar value for each groupr[   F)r   r  )r   r   )r   r   rd   r}   get_iteratorr   _define_pathsr   StopIterationr   r   _choose_pathr   _transform_item_by_itemr   size_wrap_transform_general_framer^   r   r   ry   Zreindexr   )rH   ro   rp   rq   r   Zappliedr^   gen	fast_path	slow_pathrD   r   pathr   r   msgZconcat_indexZ
other_axisr   rA   rA   rB   r   r  s@    

z#DataFrameGroupBy._transform_generalr   c                O  s   | j |f|||d|S r   r   r   rA   rA   rB   r     s     zDataFrameGroupBy.transformc                 C  s   t |to|j| jjS rF   )r   r*   r   equalsrd   r   rA   rA   rB   r     s    z(DataFrameGroupBy._can_use_transform_fastc                   sX   t tr. fdd} fdd}n" fdd} fdd}||fS )Nc                   s   t |  S rF   r   r   r   rA   rB   r     r   z0DataFrameGroupBy._define_paths.<locals>.<lambda>c                   s   | j  fddjdS )Nc                   s   t |  S rF   r   r   r   rA   rB   r     r   BDataFrameGroupBy._define_paths.<locals>.<lambda>.<locals>.<lambda>rg  rn   r   ru  rp   ro   rq   rH   rA   rB   r     s    c                   s   | f S rF   rA   ru  r   rA   rB   r     r   c                   s   | j  fddjdS )Nc                   s   | f S rF   rA   r   r   rA   rB   r     r   rv  rg  rw  ru  rx  rA   rB   r     s    )r   rC   )rH   ro   rp   rq   rp  rq  rA   rx  rB   ri    s    
zDataFrameGroupBy._define_pathsr   )rp  rq  r   c                 C  s   |}||}z||}W n0 t k
r.    Y n tk
rH   ||f Y S X t|ts\||fS |j|jsr||fS ||r|}||fS rF   )r   	Exceptionr   r*   r   rt  )rH   rp  rq  r   rr  r   Zres_fastrA   rA   rB   rk    s    

zDataFrameGroupBy._choose_path)r^   rZ   c           
   	   C  s   i }g }t | |D ]L\}\}}z||||< W n" tk
rV   tt| d Y qX || q|sptd|j|}| j	j
||jd}	||	_|	S )Nr   z)Transform function invalid for data typesr   )r   rR  r   r   r1   r   r   r   r#  r^   r_   ry   )
rH   r^   r   r   ZindsrS  colnamerU  r   r   rA   rA   rB   rl    s    z(DataFrameGroupBy._transform_item_by_itemc              	   O  s   g }| j }| jj|| jd}|D ]\}}	t|	d| ||	f||}
z|
 }
W n tk
rf   Y nX t|
st	|
rt
|
r|
rt|
r|| | q tdt|
j dq | ||S )a1  
        Return a copy of a DataFrame excluding filtered elements.

        Elements from groups are filtered if they do not satisfy the
        boolean criterion specified by func.

        Parameters
        ----------
        func : function
            Function to apply to each subframe. Should return True or False.
        dropna : Drop groups that do not pass the filter. True by default;
            If False, groups that evaluate False are filled with NaNs.

        Returns
        -------
        filtered : DataFrame

        Notes
        -----
        Each subframe is endowed the attribute 'name' in case you need to know
        which group you are working on.

        Functions that mutate the passed object can produce unexpected
        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
        for more details.

        Examples
        --------
        >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
        ...                           'foo', 'bar'],
        ...                    'B' : [1, 2, 3, 4, 5, 6],
        ...                    'C' : [2.0, 5., 8., 1., 2., 9.]})
        >>> grouped = df.groupby('A')
        >>> grouped.filter(lambda x: x['B'].mean() > 3.)
             A  B    C
        1  bar  2  5.0
        3  bar  4  1.0
        5  bar  6  9.0
        rg  rD   zfilter function returned a z, but expected a scalar bool)rh   r}   rh  r   r   r   r`  AttributeErrorr   r   r    r!   r   r   r   r   r=   r   )rH   ro   r   rp   rq   r   r^   ro  rD   r   r   rA   rA   rB   r     s"    (zDataFrameGroupBy.filterz DataFrameGroupBy | SeriesGroupByc                   sF   | j dkrtdt|tr:t|dkr:tjdtt d t	 
|S )Nr[   z'Cannot subset columns when using axis=1zmIndexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.)
stacklevel)r   r   r   r   r   warningswarnFutureWarningr   rm   __getitem__)rH   r   rr   rA   rB   r  ,  s    
zDataFrameGroupBy.__getitem__)r\   c                 C  s   |dkrR|dkr| j }t|| j| j| j| j| j|| j| j| j| j	| j
| j| jdS |dkr|dkrl| j | }t|| j| j|| j| j| j	| j
| jd	S tddS )a  
        sub-classes to define
        return a sliced object

        Parameters
        ----------
        key : string / list of selections
        ndim : {1, 2}
            requested ndim of result
        subset : object, default None
            subset to act on
           N)r   rB  r}   rO  	selectionrI  r   
group_keysr`  r   mutatedr   r[   )rB  r}   r  r   r  r`  r   r   zinvalid ndim for _gotitem)r^   r@  r}   r   rB  rO  rI  r   r  r`  r   r  r   rW   r   )rH   r   r\   subsetrA   rA   rB   _gotitem<  sD    
zDataFrameGroupBy._gotitemr   c                 C  s"   | j }| jdkr|jjS |jS d S rM  )rd   r   rN  re   )rH   r^   rA   rA   rB   rf   m  s    
z'DataFrameGroupBy._get_data_to_aggregateNone)r   rZ   c                 C  s`   |j }tt| jjt| j tdd | jjD D ]$\}}}|r6||kr6|d|| q6d S )Nc                 S  s   g | ]
}|j qS rA   )in_axis)r   grprA   rA   rB   r   z  s     zCDataFrameGroupBy._insert_inaxis_grouper_inplace.<locals>.<listcomp>r   )r   r   reversedr}   r  Zget_group_levelsr&  insert)rH   r   r   rD   r+  r  rA   rA   rB   rJ  t  s    
z/DataFrameGroupBy._insert_inaxis_grouper_inplacer   r   c                 C  sT   dd |  D }tdd |D }|| jd| j j | j|}||_	|S )zQ
        Wrap the dict result of a GroupBy aggregation into a DataFrame.
        c                 S  s   i | ]\}}|j |qS rA   r   r   rA   rA   rB   r     s      z?DataFrameGroupBy._indexed_output_to_ndframe.<locals>.<dictcomp>c                 S  s   g | ]
}|j qS rA   r   r   rA   rA   rB   r     s     z?DataFrameGroupBy._indexed_output_to_ndframe.<locals>.<listcomp>r[   )
r   r3   rE  rd   ra  r   r  r^   r_   r   )rH   r   r   r   r   rA   rA   rB   r     s    z+DataFrameGroupBy._indexed_output_to_ndframerX   c                 C  s   | j sZ|jd dkr|jd nd}tt|}|d| | j|}| | | }n | j	j
}|d| | j|}| jdkr|j}| |jddS )Nr   r[   T)datetime)rI  shaper3   rF  rf  r^   r_   rJ  Z_consolidater}   r~   r   rN  r   _convert)rH   rY   rowsry   r   rA   rA   rB   rb     s    


z$DataFrameGroupBy._wrap_agged_managerrP  c              	   c  sD   t |jD ]4\}}|t|jd d |f || j| j| jdfV  q
d S )N)r  r}   rO  r   )r   r   rW   rC  r}   rO  r   )rH   r^   rS  rz  rA   rA   rB   rR    s    z)DataFrameGroupBy._iterate_column_groupbysc                   sV   ddl m} |j} fdd| |D }t|sDtg || jjdS |||ddS d S )Nr   r   c                   s   g | ]\}} |qS rA   rA   )r   r  Zcol_groupbyro   rA   rB   r     s    z>DataFrameGroupBy._apply_to_column_groupbys.<locals>.<listcomp>r   ry   r[   )r   r   )r   r   r   rR  r   r*   r}   r~   )rH   ro   r^   r   r   r   rA   r  rB   _apply_to_column_groupbys  s    
z*DataFrameGroupBy._apply_to_column_groupbysr   c                   s^   | j dkr|  fddS | j}| j fdd|d}| jsZttt||_| 	| |S )a  
        Return DataFrame with counts of unique elements in each position.

        Parameters
        ----------
        dropna : bool, default True
            Don't include NaN in the counts.

        Returns
        -------
        nunique: DataFrame

        Examples
        --------
        >>> df = pd.DataFrame({'id': ['spam', 'egg', 'egg', 'spam',
        ...                           'ham', 'ham'],
        ...                    'value1': [1, 5, 5, 2, 5, 5],
        ...                    'value2': list('abbaxy')})
        >>> df
             id  value1 value2
        0  spam       1      a
        1   egg       5      b
        2   egg       5      b
        3  spam       2      a
        4   ham       5      x
        5   ham       5      y

        >>> df.groupby('id').nunique()
              value1  value2
        id
        egg        1       1
        ham        1       2
        spam       2       1

        Check for rows with the same id but conflicting values:

        >>> df.groupby('id').filter(lambda g: (g.nunique() > 1).any())
             id  value1 value2
        0  spam       1      a
        3  spam       2      a
        4   ham       5      x
        5   ham       5      y
        r   c                   s
   |   S rF   r  rU  r   rA   rB   r     r   z*DataFrameGroupBy.nunique.<locals>.<lambda>c                   s
   |   S rF   r  r  r   rA   rB   r     r   rP  )
r   r   rd   r  rI  r3   rF  r   ry   rJ  )rH   r   r^   r   rA   r   rB   r    s    -

 
zDataFrameGroupBy.nunique)skipnac                   s>   t    dkrd nd fdd}d|_| || jS )Nr   Fc                   sH   | j tjdd}|j}|   fdd|D }| j||jdS )NZargmaxr   r  r   c                   s"   g | ]}|d kr | nt jqS r   r   nanr   rS  r   rA   rB   r   	  s     z9DataFrameGroupBy.idxmax.<locals>.func.<locals>.<listcomp>r   )_reducer#   Z	nanargmaxr   ra  rW  ry   dfr   r   r   r   r   r  r   rB   ro     s    
z%DataFrameGroupBy.idxmax.<locals>.funcidxmaxr*   Z_get_axis_numberr=   r7  rd   rH   r   r  ro   rA   r  rB   r    s
    
zDataFrameGroupBy.idxmaxc                   s>   t    dkrd nd fdd}d|_| || jS )Nr   Fc                   sH   | j tjdd}|j}|   fdd|D }| j||jdS )NZargminr  c                   s"   g | ]}|d kr | nt jqS r  r  r  r   rA   rB   r     s     z9DataFrameGroupBy.idxmin.<locals>.func.<locals>.<listcomp>r   )r  r#   Z	nanargminr   ra  rW  ry   r  r  r   rB   ro     s    
z%DataFrameGroupBy.idxmin.<locals>.funcidxminr  r  rA   r  rB   r    s
    
zDataFrameGroupBy.idxminzSequence[Hashable] | None)r  r
  r   r  r   rZ   c              
     s  j dkrtd  j}dd jjD  tjtr`jj	}| krVg njg}n fddt
jjD }|dk	rt|t @ }	|	rtd|	 d	tjj}
|D ].}t||j j|d
\}}}|
t|j7 }
q|j|
jjjd}tt| }|r\tttjj|jj}|j|j|jjjdd}|| }|rttjj}|j|dj|dd}js|j|rdndd}|j jddW  5 Q R  S Q R X dS )u  
        Return a Series or DataFrame containing counts of unique rows.

        .. versionadded:: 1.4.0

        Parameters
        ----------
        subset : list-like, optional
            Columns to use when counting unique combinations.
        normalize : bool, default False
            Return proportions rather than frequencies.
        sort : bool, default True
            Sort by frequencies.
        ascending : bool, default False
            Sort in ascending order.
        dropna : bool, default True
            Don’t include counts of rows that contain NA values.

        Returns
        -------
        Series or DataFrame
            Series if the groupby as_index is True, otherwise DataFrame.

        See Also
        --------
        Series.value_counts: Equivalent method on Series.
        DataFrame.value_counts: Equivalent method on DataFrame.
        SeriesGroupBy.value_counts: Equivalent method on SeriesGroupBy.

        Notes
        -----
        - If the groupby as_index is True then the returned Series will have a
          MultiIndex with one level per input column.
        - If the groupby as_index is False then the returned DataFrame will have an
          additional column with the value_counts. The column is labelled 'count' or
          'proportion', depending on the ``normalize`` parameter.

        By default, rows that contain any NA values are omitted from
        the result.

        By default, the result will be in descending order so that the
        first element of each group is the most frequently-occurring row.

        Examples
        --------
        >>> df = pd.DataFrame({
        ...    'gender': ['male', 'male', 'female', 'male', 'female', 'male'],
        ...    'education': ['low', 'medium', 'high', 'low', 'high', 'low'],
        ...    'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR']
        ... })

        >>> df
            gender 	education 	country
        0 	male 	low 	    US
        1 	male 	medium 	    FR
        2 	female 	high 	    US
        3 	male 	low 	    FR
        4 	female 	high 	    FR
        5 	male 	low 	    FR

        >>> df.groupby('gender').value_counts()
        gender  education  country
        female  high       FR         1
                           US         1
        male    low        FR         2
                           US         1
                medium     FR         1
        dtype: int64

        >>> df.groupby('gender').value_counts(ascending=True)
        gender  education  country
        female  high       FR         1
                           US         1
        male    low        US         1
                medium     FR         1
                low        FR         2
        dtype: int64

        >>> df.groupby('gender').value_counts(normalize=True)
        gender  education  country
        female  high       FR         0.50
                           US         0.50
        male    low        FR         0.50
                           US         0.25
                medium     FR         0.25
        dtype: float64

        >>> df.groupby('gender', as_index=False).value_counts()
           gender education country  count
        0  female      high      FR      1
        1  female      high      US      1
        2    male       low      FR      2
        3    male       low      US      1
        4    male    medium      FR      1

        >>> df.groupby('gender', as_index=False).value_counts(normalize=True)
           gender education country  proportion
        0  female      high      FR        0.50
        1  female      high      US        0.50
        2    male       low      FR        0.50
        3    male       low      US        0.25
        4    male    medium      FR        0.25
        r[   z1DataFrameGroupBy.value_counts only handles axis=0c                 S  s   h | ]}|j r|jqS rA   )r  rD   )r   groupingrA   rA   rB   r_    s     z0DataFrameGroupBy.value_counts.<locals>.<setcomp>c                   s,   g | ]$\}}| krj jd d |f qS rF   )rh   rC  )r   r   rD   Zin_axis_namesrH   rA   rB   r     s   z1DataFrameGroupBy.value_counts.<locals>.<listcomp>NzKeys z/ in subset cannot be in the groupby column keys)r   r   r   r   )r   r   r   r)  )r  F)rB  Zsort_remainingZ
proportioncountrI   r  )method)!r   r   r{   r^   r}   r&  r   rh   r6   rD   r   r   setr   r   r2   r   groupbyr   r   r   rm  rF  r   ry   rG  rH  r   Zsort_valuesZ
sort_indexrI  Zreset_indexZ__finalize__)rH   r  r
  r   r  r   r  rD   r   Zclashingr&  r   r}   r  gbr   r  Zindexed_group_sizeZindex_levelrA   r  rB   r  '  st    o



 zDataFrameGroupBy.value_counts)N)F)Tr   )T)N)T)r   T)r   T)NFTFT)-r=   r>   r?   r,   dataframe_apply_allowlistr;  r   r<  r   r.   r   r>  ri   rD  rV  r   rX  r   r   r   r   r0   r   r   ri  rk  rl  r   r  r  rf   rJ  r   rb   rR  r  r  r*   r  rM   r  r8   Zboxplotr  r?  rA   rA   rr   rB   r@    sZ   UK /F   !+
D1
<

     r@  r*   r   )r^   r   r   rZ   c                 C  s   ddl m} t|tr|j| jrN||gt|j dd}|j|_|j|_n0| jt	
|jgt|j |j|j|jd}t|tst|S |S d S )Nr   r   r[   rg  r  )r   r   r   r6   ry   is_r   r   r_   r   Zconcatenater   Zreshaper  r*   r   )r^   r   r   r   Z	res_framerA   rA   rB   rn    s    

rn  )crM   
__future__r   collectionsr   	functoolsr   textwrapr   typingr   r   r   r	   r
   r   r   r   r   r   r}  numpyr   Zpandas._libsr   r   Zpandas._typingr   r   r   r   Zpandas.util._decoratorsr   r   r   Zpandas.util._exceptionsr   Zpandas.core.dtypes.commonr   r   r   r   r   r   r   Zpandas.core.dtypes.missingr    r!   Zpandas.corer"   r#   Zpandas.core.applyr$   r%   r&   r'   Zpandas.core.baser(   Zpandas.core.commoncorecommonr   Zpandas.core.constructionr)   Zpandas.core.framer*   Zpandas.core.genericr+   Zpandas.core.groupbyr,   Zpandas.core.groupby.groupbyr-   r.   r/   r0   r1   Zpandas.core.groupby.grouperr2   Zpandas.core.indexes.apir3   r4   r5   Zpandas.core.seriesr6   Zpandas.core.util.numba_r7   Zpandas.plottingr8   rC   r<   r9   r:   rO   rV   r:  rW   r  r@  rn  rA   rA   rA   rB   <module>   s`   0$	&
    Y
       n