U
    +if                  
   @  s  d Z ddlmZ ddlmZmZmZmZ ddlZddl	Z
ddlmZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlm  mZ ddlmZm Z  ddl!m  m"Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z* ddl+m,Z,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2 er*ddl3m4Z4 G dd dZ5eG dd dZ6d)dddddddddddZ7dd d!d"Z8d#d$d%d&Z9d'd( Z:dS )*z]
Provide user facing operators for doing the split part of the
split-apply-combine paradigm.
    )annotations)TYPE_CHECKINGAnyHashablefinalN)	ArrayLikeNDFrameTnpt)InvalidIndexError)cache_readonly)find_stack_level)sanitize_to_nanoseconds)is_categorical_dtypeis_list_like	is_scalar)CategoricalExtensionArray)	DataFrame)ops)recode_for_groupbyrecode_from_groupby)CategoricalIndexIndex
MultiIndex)Series)pprint_thing)NDFramec                      s   e Zd ZU dZded< ded< ded< ded< ded	< d
Zded<  fddZd(ddddddZee	ddddZ
d)ddddddZed*ddd d!d"Zee	d#d$ Zed%dd&d'Z  ZS )+Grouperan  
    A Grouper allows the user to specify a groupby instruction for an object.

    This specification will select a column via the key parameter, or if the
    level and/or axis parameters are given, a level of the index of the target
    object.

    If `axis` and/or `level` are passed as keywords to both `Grouper` and
    `groupby`, the values passed to `Grouper` take precedence.

    Parameters
    ----------
    key : str, defaults to None
        Groupby key, which selects the grouping column of the target.
    level : name/number, defaults to None
        The level for the target index.
    freq : str / frequency object, defaults to None
        This will groupby the specified frequency if the target selection
        (via key or level) is a datetime-like object. For full specification
        of available frequencies, please see `here
        <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
    axis : str, int, defaults to 0
        Number/name of the axis.
    sort : bool, default to False
        Whether to sort the resulting labels.
    closed : {'left' or 'right'}
        Closed end of interval. Only when `freq` parameter is passed.
    label : {'left' or 'right'}
        Interval boundary to use for labeling.
        Only when `freq` parameter is passed.
    convention : {'start', 'end', 'e', 's'}
        If grouper is PeriodIndex and `freq` parameter is passed.
    base : int, default 0
        Only when `freq` parameter is passed.
        For frequencies that evenly subdivide 1 day, the "origin" of the
        aggregated intervals. For example, for '5min' frequency, base could
        range from 0 through 4. Defaults to 0.

        .. deprecated:: 1.1.0
            The new arguments that you should use are 'offset' or 'origin'.

    loffset : str, DateOffset, timedelta object
        Only when `freq` parameter is passed.

        .. deprecated:: 1.1.0
            loffset is only working for ``.resample(...)`` and not for
            Grouper (:issue:`28302`).
            However, loffset is also deprecated for ``.resample(...)``
            See: :class:`DataFrame.resample`

    origin : Timestamp or str, default 'start_day'
        The timestamp on which to adjust the grouping. The timezone of origin must
        match the timezone of the index.
        If string, must be one of the following:

        - 'epoch': `origin` is 1970-01-01
        - 'start': `origin` is the first value of the timeseries
        - 'start_day': `origin` is the first day at midnight of the timeseries

        .. versionadded:: 1.1.0

        - 'end': `origin` is the last value of the timeseries
        - 'end_day': `origin` is the ceiling midnight of the last day

        .. versionadded:: 1.3.0

    offset : Timedelta or str, default is None
        An offset timedelta added to the origin.

        .. versionadded:: 1.1.0

    dropna : bool, default True
        If True, and if group keys contain NA values, NA values together with
        row/column will be dropped. If False, NA values will also be treated as
        the key in groups.

        .. versionadded:: 1.2.0

    Returns
    -------
    A specification for a groupby instruction

    Examples
    --------
    Syntactic sugar for ``df.groupby('A')``

    >>> df = pd.DataFrame(
    ...     {
    ...         "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"],
    ...         "Speed": [100, 5, 200, 300, 15],
    ...     }
    ... )
    >>> df
       Animal  Speed
    0  Falcon    100
    1  Parrot      5
    2  Falcon    200
    3  Falcon    300
    4  Parrot     15
    >>> df.groupby(pd.Grouper(key="Animal")).mean()
            Speed
    Animal
    Falcon  200.0
    Parrot   10.0

    Specify a resample operation on the column 'Publish date'

    >>> df = pd.DataFrame(
    ...    {
    ...        "Publish date": [
    ...             pd.Timestamp("2000-01-02"),
    ...             pd.Timestamp("2000-01-02"),
    ...             pd.Timestamp("2000-01-09"),
    ...             pd.Timestamp("2000-01-16")
    ...         ],
    ...         "ID": [0, 1, 2, 3],
    ...         "Price": [10, 20, 30, 40]
    ...     }
    ... )
    >>> df
      Publish date  ID  Price
    0   2000-01-02   0     10
    1   2000-01-02   1     20
    2   2000-01-09   2     30
    3   2000-01-16   3     40
    >>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean()
                   ID  Price
    Publish date
    2000-01-02    0.5   15.0
    2000-01-09    2.0   30.0
    2000-01-16    3.0   40.0

    If you want to adjust the start of the bins based on a fixed timestamp:

    >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00'
    >>> rng = pd.date_range(start, end, freq='7min')
    >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng)
    >>> ts
    2000-10-01 23:30:00     0
    2000-10-01 23:37:00     3
    2000-10-01 23:44:00     6
    2000-10-01 23:51:00     9
    2000-10-01 23:58:00    12
    2000-10-02 00:05:00    15
    2000-10-02 00:12:00    18
    2000-10-02 00:19:00    21
    2000-10-02 00:26:00    24
    Freq: 7T, dtype: int64

    >>> ts.groupby(pd.Grouper(freq='17min')).sum()
    2000-10-01 23:14:00     0
    2000-10-01 23:31:00     9
    2000-10-01 23:48:00    21
    2000-10-02 00:05:00    54
    2000-10-02 00:22:00    24
    Freq: 17T, dtype: int64

    >>> ts.groupby(pd.Grouper(freq='17min', origin='epoch')).sum()
    2000-10-01 23:18:00     0
    2000-10-01 23:35:00    18
    2000-10-01 23:52:00    27
    2000-10-02 00:09:00    39
    2000-10-02 00:26:00    24
    Freq: 17T, dtype: int64

    >>> ts.groupby(pd.Grouper(freq='17min', origin='2000-01-01')).sum()
    2000-10-01 23:24:00     3
    2000-10-01 23:41:00    15
    2000-10-01 23:58:00    45
    2000-10-02 00:15:00    45
    Freq: 17T, dtype: int64

    If you want to adjust the start of the bins with an `offset` Timedelta, the two
    following lines are equivalent:

    >>> ts.groupby(pd.Grouper(freq='17min', origin='start')).sum()
    2000-10-01 23:30:00     9
    2000-10-01 23:47:00    21
    2000-10-02 00:04:00    54
    2000-10-02 00:21:00    24
    Freq: 17T, dtype: int64

    >>> ts.groupby(pd.Grouper(freq='17min', offset='23h30min')).sum()
    2000-10-01 23:30:00     9
    2000-10-01 23:47:00    21
    2000-10-02 00:04:00    54
    2000-10-02 00:21:00    24
    Freq: 17T, dtype: int64

    To replace the use of the deprecated `base` argument, you can now use `offset`,
    in this example it is equivalent to have `base=2`:

    >>> ts.groupby(pd.Grouper(freq='17min', offset='2min')).sum()
    2000-10-01 23:16:00     0
    2000-10-01 23:33:00     9
    2000-10-01 23:50:00    36
    2000-10-02 00:07:00    39
    2000-10-02 00:24:00    24
    Freq: 17T, dtype: int64
    intaxisboolsortdropnaIndex | None
_gpr_index_grouper)keylevelfreqr   r!   ztuple[str, ...]_attributesc                   s6   | dd k	r*ddlm} t|| d |} t | S )Nr(   r   )TimeGrouper)origin)getZpandas.core.resampler*   !_check_deprecated_resample_kwargssuper__new__)clsargskwargsr*   	__class__ N/home/mars/bis/venv/lib/python3.8/site-packages/pandas/core/groupby/grouper.pyr/     s
    zGrouper.__new__Nr   FT)r   r!   r"   c                 C  sR   || _ || _|| _|| _|| _d | _d | _d | _d | _d | _	d | _
d | _|| _d S N)r&   r'   r(   r   r!   grouperr$   objindexerbinnerr%   _indexerr"   )selfr&   r'   r(   r   r!   r"   r5   r5   r6   __init__  s    	zGrouper.__init__r   returnc                 C  s   | j }|d krtd|S )Nz1_set_grouper must be called before ax is accessed)r$   
ValueError)r=   indexr5   r5   r6   ax,  s    z
Grouper.axr   z%tuple[Any, ops.BaseGrouper, NDFrameT])r9   validater@   c              	   C  sH   |  | t| j| jg| j| j| j|| jd\| _}| _| j	| j| jfS )z
        Parameters
        ----------
        obj : Series or DataFrame
        validate : bool, default True
            if True, validate the grouper

        Returns
        -------
        a tuple of binner, grouper, obj (possibly sorted)
        )r   r'   r!   rD   r"   )
_set_grouperget_grouperr9   r&   r   r'   r!   r"   r8   r;   )r=   r9   rD   _r5   r5   r6   _get_grouper4  s    
zGrouper._get_grouperr   )r9   r!   c           	      C  s  |dk	st | jdk	r(| jdk	r(td| jdkrB| j| _| j| _| jdk	r| j}t| jdd|krt	|t
r| jdk	s|t | jdk	r| j }| j|}||j}q| j|j}n*||jkrtd| dt|| |d}nn|| j}| jdk	rT| j}t	|tr4||}t|||j| d}n |d|jfkrTtd| d	| jsb|r|js|jjd
dd }| _||}|j|| jd}|| _|| _| jS )a%  
        given an object and the specifications, setup the internal grouper
        for this particular specification

        Parameters
        ----------
        obj : Series or DataFrame
        sort : bool, default False
            whether the resulting grouper should be sorted
        Nz2The Grouper cannot specify both a key and a level!namezThe grouper name z is not foundrI   r   z
The level z is not validZ	mergesortfirst)kindZna_positionr   )AssertionErrorr&   r'   rA   r%   r$   r:   r<   getattr
isinstancer   ZargsortZtakerB   Z
_info_axisKeyErrorr   	_get_axisr   r   Z_get_level_numberZ_get_level_valuesnamesrI   r!   Zis_monotonicarrayr9   )	r=   r9   r!   r&   Zreverse_indexerZunsorted_axrC   r'   r:   r5   r5   r6   rE   U  sN    

 



 
zGrouper._set_grouperc                 C  s   | j jS r7   )r8   groupsr=   r5   r5   r6   rU     s    zGrouper.groupsstrc                   s8    fdd j D }d|}t j}| d| dS )Nc                 3  s4   | ],}t  |d k	r| dtt  | V  qd S )N=)rO   repr).0	attr_namerV   r5   r6   	<genexpr>  s   z#Grouper.__repr__.<locals>.<genexpr>z, ())r)   jointype__name__)r=   Z
attrs_listattrscls_namer5   rV   r6   __repr__  s    


zGrouper.__repr__)NNNr   FT)T)F)ra   
__module____qualname____doc____annotations__r)   r/   r>   r   propertyrC   rH   rE   rU   rd   __classcell__r5   r5   r3   r6   r   :   s8   
 J
       !Lr   c                   @  s&  e Zd ZU dZdZded< dZded< ded< d	ed
< ded< d5dddddddddZddddZdd Z	e
ddddZe
ddddZeddd d!Ze
d"dd#d$Zed%dd&d'Ze
d(dd)d*Ze
ddd+d,Ze
ddd-d.Ze
d/dd0d1Ze
d2dd3d4ZdS )6GroupingaJ  
    Holds the grouping information for a single key

    Parameters
    ----------
    index : Index
    grouper :
    obj : DataFrame or Series
    name : Label
    level :
    observed : bool, default False
        If we are a Categorical, use the observed values
    in_axis : if the Grouping is a column in self.obj and hence among
        Groupby.exclusions list

    Returns
    -------
    **Attributes**:
      * indices : dict of {group -> index_list}
      * codes : ndarray, group codes
      * group_index : unique groups
      * groups : dict of {group -> label_list}
    Nznp.ndarray | None_codesr#   _group_indexr    _passed_categoricalzCategorical | None_all_grouperr   _indexTFzNDFrame | None)rB   r9   r!   observedin_axisr"   c	                 C  s  || _ || _t||| _d | _|| _|| _|| _|| _|| _	|| _
d| _| j}	|	d k	rz| j}
|j|
|	d\| _| _| _n6t| jtr| jd k	st| jj| jdd\}}}|| _| }t|tjr|| _nt||jjd| _nt| jrd| _t| j||\| _| _nt| jttttjfst| jdddkr^| jpLt t!| j}t"d| d	|#| j| _t$| jd
rt%| jt%|kst&| j}d| }d | _t|t| jtjrt'| j| _d S )NF)r'   rD   rJ   Tndim   Grouper for '' not 1-dimensional__len__z9Grouper result violates len(labels) == len(data)
result: )(r'   _orig_grouper_convert_groupergrouping_vectorro   rp   _sortr9   	_observedrr   _dropnarn   _ilevelZ_get_grouper_for_levelrl   rm   rP   r   rN   rH   r   Z
BinGrouperr   result_indexrI   r   r   r   r   npndarrayrO   rW   r`   rA   maphasattrlenr   r   )r=   rB   r8   r9   r'   r!   rq   rr   r"   ilevelZmapperrG   Z
newgrouperZnewobjngtZgrpererrmsgr5   r5   r6   r>     sr        

zGrouping.__init__rW   r?   c                 C  s   d| j  dS )Nz	Grouping(r^   rJ   rV   r5   r5   r6   rd   4  s    zGrouping.__repr__c                 C  s
   t | jS r7   )iterindicesrV   r5   r5   r6   __iter__7  s    zGrouping.__iter__r   c                 C  sb   | j }|d k	r| jj| S t| jttfr2| jjS t| jt	j
rJ| jjjS t| jtr^| jjS d S r7   )r   rp   rS   rP   ry   r   r   rI   r{   r   BaseGrouperr   )r=   r   r5   r5   r6   rI   :  s    
zGrouping.namez
int | Nonec                 C  sL   | j }|dkrdS t|tsH| j}||jkr<td| d|j|S |S )zS
        If necessary, converted index level name to index level position.
        NzLevel z not in index)r'   rP   r   rp   rS   rN   rB   )r=   r'   rB   r5   r5   r6   r   L  s    

zGrouping._ilevelr   c                 C  s
   t | jS r7   )r   group_indexrV   r5   r5   r6   ngroups[  s    zGrouping.ngroupsz$dict[Hashable, npt.NDArray[np.intp]]c                 C  s(   t | jtjr| jjS t| j}| S r7   )rP   r{   r   r   r   r   Z_reverse_indexer)r=   valuesr5   r5   r6   r   _  s    
zGrouping.indicesz
np.ndarrayc                 C  s   | j d k	r| j S | jd S )Nr   )rl   _codes_and_uniquesrV   r5   r5   r6   codesh  s    
zGrouping.codesr   c                 C  s.   | j dk	r| j jS | jdk	r$| jjS | jd S )zv
        Analogous to result_index, but holding an ArrayLike to ensure
        we can retain ExtensionDtypes.
        Nru   )rm   _valuesro   r   r   rV   r5   r5   r6   group_arraylikep  s
    

zGrouping.group_arraylikec                 C  s4   | j d k	r.| j}t|tstt| j | j|S | jS r7   )ro   r   rP   r   rN   r   r|   )r=   Z	group_idxr5   r5   r6   r     s
    
zGrouping.result_indexc                 C  s*   | j d k	r| j S | jd }tj|| jdS )Nru   rJ   )rm   r   r   Z_with_inferrI   )r=   uniquesr5   r5   r6   r     s    

zGrouping.group_indexztuple[np.ndarray, ArrayLike]c                 C  s   | j rr| j}|j}| jrHt|j}||dk }| js<|jrVt	
|}nt	t|}tj|||jd}|j|fS t| jtjr| jj}| jj}n(| jsd }nd}tj| j| j|d\}}||fS )N)r   
categoriesordered)r!   na_sentinel)rn   r{   r   r}   
algorithmsZunique1dr   r|   r   r   r!   Zaranger   r   
from_codesrP   r   r   Z
codes_infoZresult_arrayliker~   Z	factorize)r=   catr   Zucodesr   r   r   r5   r5   r6   r     s6      

  
zGrouping._codes_and_uniqueszdict[Hashable, np.ndarray]c                 C  s   | j t| j| jS r7   )rp   groupbyr   r   r   r   rV   r5   r5   r6   rU     s    zGrouping.groups)NNNTFFT)ra   re   rf   rg   rl   rh   rm   r>   rd   r   r   rI   r   ri   r   r   r   r   r   r   r   rU   r5   r5   r5   r6   rk     sH   
       `	#rk   TFr   r   r    z5tuple[ops.BaseGrouper, frozenset[Hashable], NDFrameT])r9   r   r!   rq   mutatedrD   r"   r@   c	                   s4    |}	|dk	rt|	trXt|r8t|dkr8|d }|dkrt|r|	|}d}nt|rt|}
|
dkrz|d }n|
dkrtdntdt|trʈ  |j	|krtd| d 
| n|dks|dk rtd	d}|	}t|tr6|j d
d\}} |jdkr"|t  fS |t|jh fS nt|tjrP|t  fS t|tsh|g}d
}n|}t|t|	k}tdd |D }tdd |D }tdd |D }|s0|s0|s0|r0|dkr0t trt fdd|D }n&t tstt fdd|D }|s0t|g}t|ttfr^|dkrXdgt| }|}n|gt| }g }t }dd fdd}dd fdd}t||D ]\}}||rd}||j	 n||rZ| kr.|r j||d d| |   }}}|jdkr"td| d|| n* j||drPd
|d  }}}nt |n.t|tr|jdk	r||j d
}nd
}t|t!st!|	| |||||dn|}|"| qt|dkrt rtdn2t|dkr|"t!t#g ddt$j%g t$j&d tj|	||||d}|t| fS ) a  
    Create and return a BaseGrouper, which is an internal
    mapping of how to create the grouper indexers.
    This may be composed of multiple Grouping objects, indicating
    multiple groupers

    Groupers are ultimately index mappings. They can originate as:
    index mappings, keys to columns, functions, or Groupers

    Groupers enable local references to axis,level,sort, while
    the passed in axis, level, and sort are 'global'.

    This routine tries to figure out what the passing in references
    are and then creates a Grouping for each one, combined into
    a BaseGrouper.

    If observed & we have a categorical grouper, only show the observed
    values.

    If validate, then check for key/level overlaps.

    Nru   r   zNo group keys passed!z*multiple levels only valid with MultiIndexzlevel name z is not the name of the r   z2level > 0 or level < -1 only valid with MultiIndexFrs   c                 s  s    | ]}t |pt|tV  qd S r7   )callablerP   dictrZ   gr5   r5   r6   r\   "  s     zget_grouper.<locals>.<genexpr>c                 s  s   | ]}t |ttfV  qd S r7   )rP   r   rk   r   r5   r5   r6   r\   #  s     c                 s  s$   | ]}t |tttttjfV  qd S r7   )rP   listtupler   r   r   r   r   r5   r5   r6   r\   $  s    c                 3  s$   | ]}| j kp| jjkV  qd S r7   )columnsrB   rS   r   r9   r5   r6   r\   1  s    c                 3  s   | ]}| j jkV  qd S r7   )rB   rS   r   r   r5   r6   r\   6  s     r    r?   c              
     sB   t | s> jd }z||  W n tttfk
r<   Y dS X dS )Nr   FT)_is_label_likeZaxesZget_locrQ   	TypeErrorr
   )r&   itemsr   r5   r6   
is_in_axisF  s    
zget_grouper.<locals>.is_in_axisc              
     s@   t | dsdS z|  | j kW S  tttfk
r:   Y dS X d S )NrI   F)r   rI   rQ   
IndexErrorr
   )gprr   r5   r6   	is_in_objS  s    
zget_grouper.<locals>.is_in_objTrM   rv   rw   )r9   r'   r!   rq   rr   r"   r   )Zdtype)r!   r   r"   )'rR   rP   r   r   r   r   Zget_level_valuesrA   rW   rI   Z_get_axis_namer   rH   r&   	frozensetr   r   r   anyr   allr   rN   comasarray_tuplesafer   setzipaddZ_check_label_or_level_ambiguityrt   Z_is_level_referencerQ   rk   appendr   r   rT   Zintp)r9   r&   r   r'   r!   rq   r   rD   r"   Z
group_axisZnlevelsr;   r8   keysZmatch_axis_lengthZany_callableZany_groupersZany_arraylikeZall_in_columns_indexZlevelsZ	groupingsZ
exclusionsr   r   r   rr   rI   Zpingr5   r   r6   rF     s    !
	











$    rF   r?   c                 C  s   t | ttfp| d k	ot| S r7   )rP   rW   r   r   )valr5   r5   r6   r     s    r   r   rM   c                 C  s   t |tr|jS t |tr:|j| r,|jS || jS n^t |trJ|jS t |t	t
tttjfrt|t| krxtdt |t	t
frt|}|S |S d S )Nz$Grouper and axis must be same length)rP   r   r,   r   rB   equalsr   Zreindexr   r   r   r   r   r   r   r   rA   r   r   )r   r8   r5   r5   r6   rz     s    



rz   c                 C  sH   |  dddk	r"tjdtt d |  dddk	rDtjdtt d dS )a  
    Check for use of deprecated parameters in ``resample`` and related functions.

    Raises the appropriate warnings if these parameters are detected.
    Only sets an approximate ``stacklevel`` for the warnings (see #37603, #36629).

    Parameters
    ----------
    kwargs : dict
        Dictionary of keyword arguments to check for deprecated parameters.
    origin : object
        From where this function is being called; either Grouper or TimeGrouper. Used
        to determine an approximate stacklevel.
    baseNz'base' in .resample() and in Grouper() is deprecated.
The new arguments that you should use are 'offset' or 'origin'.

>>> df.resample(freq="3s", base=2)

becomes:

>>> df.resample(freq="3s", offset="2s")
)
stacklevelZloffseta  'loffset' in .resample() and in Grouper() is deprecated.

>>> df.resample(freq="3s", loffset="8H")

becomes:

>>> from pandas.tseries.frequencies import to_offset
>>> df = df.resample(freq="3s").mean()
>>> df.index = df.index.to_timestamp() + to_offset("8H")
)r,   warningswarnFutureWarningr   )r2   r+   r5   r5   r6   r-     s    	r-   )Nr   NTFFTT);rg   
__future__r   typingr   r   r   r   r   numpyr   Zpandas._typingr   r   r	   Zpandas.errorsr
   Zpandas.util._decoratorsr   Zpandas.util._exceptionsr   Zpandas.core.dtypes.castr   Zpandas.core.dtypes.commonr   r   r   Zpandas.core.algorithmscorer   Zpandas.core.arraysr   r   Zpandas.core.commoncommonr   Zpandas.core.framer   Zpandas.core.groupbyr   Zpandas.core.groupby.categoricalr   r   Zpandas.core.indexes.apir   r   r   Zpandas.core.seriesr   Zpandas.io.formats.printingr   Zpandas.core.genericr   r   rk   rF   r   rz   r-   r5   r5   r5   r6   <module>   sR     |           ^