U
    +if                     @  s*  d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZ ddlmZ ddlZddlmZmZmZmZ ddlmZmZmZmZmZmZmZ dd	l m!Z! dd
l"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z: ddl;m<Z< ddl=m>Z>m?Z? ddl@mAZAmBZBmCZCmDZDmEZEmFZFmGZG ddlHmIZImJZJ ddlKmLZL ddlMmNZOmPZPmQZQ ddlRmSZS erddlmTZTmUZU ddlVmWZWmXZXmYZYmZZZ ddl[m\Z\m]Z]m^Z^ dddddZ_ddddddd Z`dd!d"d#Zaejbejcejdejeejfejgejhejiejjejkejlejmejnejod$Zpdd%d&d'Zqdddd(d)Zrdd%d*d+Zsdd,dd-d.Ztd/d0 ZueuZvddd1d2d3d4Zwddd6d7d8d9d:d;d<Zxe!ed=ed>ed?d@ddBd7d7dCdDdEdFZyddBdBdBdBdHdIdJdKZzdBdLdMdNZ{dddPd1dQdRdSZ|dddBddTdUdVZ}ddd6d,d,dBdBdYdZd[d\Z~ddd]d]dd^d_d`ZG dadb dbZG dcdd ddeZG dedf dfeZddgd6dBdhdidjZdddldmdndodpdqdrZdsdtdudvdwdxhZdd6d6dydzd{Zdd6dBdBd|d}d~dZdd!ddZdddddZddddddZdS )zl
Generic data algorithms. This module is experimental at the moment and not
intended for public consumption
    )annotationsN)dedent)TYPE_CHECKINGHashableLiteralSequenceUnioncastfinal)warn)algos	hashtableiNaTlib)AnyArrayLike	ArrayLikeDtypeObj
IndexLabelScalarTakeIndexernpt)doc)find_stack_level)'construct_1d_object_array_from_listlikeinfer_dtype_from_arraysanitize_to_nanoseconds)ensure_float64ensure_objectensure_platform_intis_array_likeis_bool_dtypeis_categorical_dtypeis_complex_dtypeis_datetime64_dtypeis_extension_array_dtypeis_float_dtype
is_integeris_integer_dtypeis_list_likeis_numeric_dtypeis_object_dtype	is_scalaris_timedelta64_dtypeneeds_i8_conversion)concat_compat)ExtensionDtypePandasDtype)ABCDatetimeArrayABCExtensionArrayABCIndexABCMultiIndexABCRangeIndex	ABCSeriesABCTimedeltaArray)isnana_value_for_dtype)take_nd)arrayensure_wrapped_if_datetimelikeextract_array)validate_indices)NumpySorterNumpyValueArrayLike)Categorical	DataFrameIndexSeries)DatetimeArrayExtensionArrayTimedeltaArrayr   z
np.ndarray)valuesreturnc              	   C  sL  t | tst| dd} t| jr.tt| S t| jrt | tj	rTt| 
dS zt| jdddW S  ttfk
r   t|  Y S X nt| jrt| S t| jr| jjdkrt| S t| S t| jr| S t| jrt | tj	 rt| } | 
d}ttj	|}|S t| jr6td| } | j} | S tj| td	} t| S )
a  
    routine to ensure that our data is of the correct
    input dtype for lower-level routines

    This will coerce:
    - ints -> int64
    - uint -> uint64
    - bool -> uint8
    - datetimelike -> i8
    - datetime64tz -> i8 (in local tz)
    - categorical -> codes

    Parameters
    ----------
    values : np.ndarray or ExtensionArray

    Returns
    -------
    np.ndarray
    Textract_numpyuint8Fcopy)         i8rA   dtype)
isinstancer4   r=   r*   rT   r   npasarrayr    ndarrayviewastype	TypeError
ValueErrorr'   r%   itemsizer   r"   r-   r   r	   r!   codesobject)rH   Znpvalues r`   I/home/mars/bis/venv/lib/python3.8/site-packages/pandas/core/algorithms.py_ensure_datas   s>    









rb   r   r   )rH   rT   originalrI   c                 C  s   t | tr| j|kr| S t |tjsT| }t | |rD| j|krD| S |j| |d} nlt|r| j|dd} t |tr| jt	dd} n<|dk	rt
|rtd}nt|rtd}| j|dd} | S )z
    reverse of _ensure_data

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
    dtype : np.dtype or ExtensionDtype
    original : AnyArrayLike

    Returns
    -------
    ExtensionArray or np.ndarray
    rS   FrM   Nzdatetime64[ns]timedelta64[ns])rU   r2   rT   rV   Zconstruct_array_typeZ_from_sequencer    rZ   r3   r_   r#   r,   )rH   rT   rc   clsr`   r`   ra   _reconstruct_data   s$    

rf   )rI   c                 C  sH   t | sDtj| dd}|dkr:t| tr0t| } t| } n
t| } | S )z5
    ensure that we are arraylike if not already
    FZskipna)mixedstringmixed-integer)	r   r   infer_dtyperU   tuplelistr   rV   rW   )rH   inferredr`   r`   ra   _ensure_arraylike   s    


ro   )Z
complex128Z	complex64float64float32Zuint64Zuint32Zuint16rL   int64int32int16int8ri   r_   )rH   c                 C  s    t | } t| }t| }|| fS )z
    Parameters
    ----------
    values : np.ndarray

    Returns
    -------
    htable : HashTable subclass
    values : ndarray
    )rb   _check_object_for_strings_hashtablesrH   ndtypehtabler`   r`   ra   _get_hashtable_algo  s    r{   c                 C  s2   t | } | jjdkr.| jjd }| j|dd} | S )N)iuf8FrM   )rb   rT   kindrZ   )rH   rT   r`   r`   ra   _get_values_for_rank!  s
    r   c                 C  s(   t | } t| }t|td }|| fS )Nr_   )r   rv   rw   getrx   r`   r`   ra   _get_data_algo+  s    r   strc                 C  s*   | j j}|dkr&tj| dddkr&d}|S )z
    Check if we can use string hashtable instead of object hashtable.

    Parameters
    ----------
    values : ndarray

    Returns
    -------
    str
    r_   Frg   )ri   ri   )rT   namer   rk   )rH   ry   r`   r`   ra   rv   4  s
    rv   c                 C  sR   t | } t| jr|  S | }t| \}} |t| }|| }t||j|}|S )a
  
    Return unique values based on a hash table.

    Uniques are returned in order of appearance. This does NOT sort.

    Significantly faster than numpy.unique for long enough sequences.
    Includes NA values.

    Parameters
    ----------
    values : 1d array-like

    Returns
    -------
    numpy.ndarray or ExtensionArray

        The return can be:

        * Index : when the input is an Index
        * Categorical : when the input is a Categorical dtype
        * ndarray : when the input is a Series/ndarray

        Return numpy.ndarray or ExtensionArray.

    See Also
    --------
    Index.unique : Return unique values from an Index.
    Series.unique : Return unique values of Series object.

    Examples
    --------
    >>> pd.unique(pd.Series([2, 1, 3, 3]))
    array([2, 1, 3])

    >>> pd.unique(pd.Series([2] + [1] * 5))
    array([2, 1])

    >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
    array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')

    >>> pd.unique(
    ...     pd.Series(
    ...         [
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...         ]
    ...     )
    ... )
    <DatetimeArray>
    ['2016-01-01 00:00:00-05:00']
    Length: 1, dtype: datetime64[ns, US/Eastern]

    >>> pd.unique(
    ...     pd.Index(
    ...         [
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...         ]
    ...     )
    ... )
    DatetimeIndex(['2016-01-01 00:00:00-05:00'],
            dtype='datetime64[ns, US/Eastern]',
            freq=None)

    >>> pd.unique(list("baabc"))
    array(['b', 'a', 'c'], dtype=object)

    An unordered Categorical will return categories in the
    order of appearance.

    >>> pd.unique(pd.Series(pd.Categorical(list("baabc"))))
    ['b', 'a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc"))))
    ['b', 'a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    An ordered Categorical preserves the category ordering.

    >>> pd.unique(
    ...     pd.Series(
    ...         pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
    ...     )
    ... )
    ['b', 'a', 'c']
    Categories (3, object): ['a' < 'b' < 'c']

    An array of tuples

    >>> pd.unique([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")])
    array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
    )ro   r$   rT   uniquer{   lenrf   )rH   rc   rz   tableuniquesr`   r`   ra   r   P  s    ^

r   znpt.NDArray[np.bool_])compsrH   rI   c                 C  s  t | stdt| j dt |s<tdt|j dt|ttttj	fs^t
t|}n$t|trtt|}nt|ddd}t
| } t| dd} t| tj	s| |S t| jrt| |S t|jrt| jstj| jtdS t|jrt| |tS t|jtr(tt| t|S t| dkrnt|dkrnt| snt| rfd	d
 }ntj}n6t|j| jgg }|j|dd}| j|dd} t j!}|| |S )z
    Compute the isin boolean array.

    Parameters
    ----------
    comps : array-like
    values : array-like

    Returns
    -------
    ndarray[bool]
        Same length as `comps`.
    zIonly list-like objects are allowed to be passed to isin(), you passed a []T)rK   Zextract_rangerJ   rS   i@B    c                 S  s   t t | |t | S N)rV   
logical_orin1disnan)cvr`   r`   ra   r~     s    zisin.<locals>.fFrM   )"r(   r[   type__name__rU   r3   r6   r2   rV   rX   ro   rm   r4   r;   r=   isinr-   rT   pd_arrayr*   ZzerosshapeboolrZ   r_   r/   rW   r   r8   anyr   Zfind_common_typerz   Zismember)r   rH   r~   commonr`   r`   ra   r     sD    


&
r   intz
int | Noneznp.ndarray | Nonez'tuple[npt.NDArray[np.intp], np.ndarray])rH   na_sentinel	size_hintmaskrI   c           	      C  sB   t | \}} ||pt| }|j| |||d\}}t|}||fS )a[  
    Factorize a numpy array to codes and uniques.

    This doesn't do any coercion of types or unboxing before factorization.

    Parameters
    ----------
    values : ndarray
    na_sentinel : int, default -1
    size_hint : int, optional
        Passed through to the hashtable's 'get_labels' method
    na_value : object, optional
        A value in `values` to consider missing. Note: only use this
        parameter when you know that you don't have any values pandas would
        consider missing in the array (NaN for float data, iNaT for
        datetimes, etc.).
    mask : ndarray[bool], optional
        If not None, the mask is used as indicator for missing values
        (True = missing, False = valid) instead of `na_value` or
        condition "val != val".

    Returns
    -------
    codes : ndarray[np.intp]
    uniques : ndarray
    )r   na_valuer   )r   r   	factorizer   )	rH   r   r   r   r   
hash_klassr   r   r^   r`   r`   ra   factorize_array  s    !   
r   z    values : sequence
        A 1-D sequence. Sequences that aren't pandas objects are
        coerced to ndarrays before factorization.
    zt    sort : bool, default False
        Sort `uniques` and shuffle `codes` to maintain the
        relationship.
    zG    size_hint : int, optional
        Hint to the hashtable sizer.
    )rH   sortr   Fr   z%tuple[np.ndarray, np.ndarray | Index])r   r   r   rI   c                 C  s  t | tr| j|dS t| } | }t | ts8t| dd} d}|dkrLd}d}t | ttfr| jdk	r| j|d\}}t |t	r|j
|dd}nt |trdd	lm} ||}||fS t | jtjs| j|d
\}}|j}	n8| j}	t| } |jjdkrt}
nd}
t| |||
d\}}|r<t|dkr<t|||ddd\}}||k}|s| rt|jdd}
t||
g}t|t|d |}t||	|}t |t	r|jjdkrt |tjrtd|j|_t|jj||jd}|j
|dd}n t |trdd	lm} ||}||fS )ao  
    Encode the object as an enumerated type or categorical variable.

    This method is useful for obtaining a numeric representation of an
    array when all that matters is identifying distinct values. `factorize`
    is available as both a top-level function :func:`pandas.factorize`,
    and as a method :meth:`Series.factorize` and :meth:`Index.factorize`.

    Parameters
    ----------
    {values}{sort}
    na_sentinel : int or None, default -1
        Value to mark "not found". If None, will not drop the NaN
        from the uniques of the values.

        .. versionchanged:: 1.1.2
    {size_hint}
    Returns
    -------
    codes : ndarray
        An integer ndarray that's an indexer into `uniques`.
        ``uniques.take(codes)`` will have the same values as `values`.
    uniques : ndarray, Index, or Categorical
        The unique valid values. When `values` is Categorical, `uniques`
        is a Categorical. When `values` is some other pandas object, an
        `Index` is returned. Otherwise, a 1-D ndarray is returned.

        .. note::

           Even if there's a missing value in `values`, `uniques` will
           *not* contain an entry for it.

    See Also
    --------
    cut : Discretize continuous-valued array.
    unique : Find the unique value in an array.

    Notes
    -----
    Reference :ref:`the user guide <reshaping.factorize>` for more examples.

    Examples
    --------
    These examples all show factorize as a top-level method like
    ``pd.factorize(values)``. The results are identical for methods like
    :meth:`Series.factorize`.

    >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'])
    >>> codes
    array([0, 0, 1, 2, 0]...)
    >>> uniques
    array(['b', 'a', 'c'], dtype=object)

    With ``sort=True``, the `uniques` will be sorted, and `codes` will be
    shuffled so that the relationship is the maintained.

    >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True)
    >>> codes
    array([1, 1, 0, 2, 1]...)
    >>> uniques
    array(['a', 'b', 'c'], dtype=object)

    Missing values are indicated in `codes` with `na_sentinel`
    (``-1`` by default). Note that missing values are never
    included in `uniques`.

    >>> codes, uniques = pd.factorize(['b', None, 'a', 'c', 'b'])
    >>> codes
    array([ 0, -1,  1,  2,  0]...)
    >>> uniques
    array(['b', 'a', 'c'], dtype=object)

    Thus far, we've only factorized lists (which are internally coerced to
    NumPy arrays). When factorizing pandas objects, the type of `uniques`
    will differ. For Categoricals, a `Categorical` is returned.

    >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
    >>> codes, uniques = pd.factorize(cat)
    >>> codes
    array([0, 0, 1]...)
    >>> uniques
    ['a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    Notice that ``'b'`` is in ``uniques.categories``, despite not being
    present in ``cat.values``.

    For all other pandas objects, an Index of the appropriate type is
    returned.

    >>> cat = pd.Series(['a', 'a', 'c'])
    >>> codes, uniques = pd.factorize(cat)
    >>> codes
    array([0, 0, 1]...)
    >>> uniques
    Index(['a', 'c'], dtype='object')

    If NaN is in the values, and we want to include NaN in the uniques of the
    values, it can be achieved by setting ``na_sentinel=None``.

    >>> values = np.array([1, 2, 1, np.nan])
    >>> codes, uniques = pd.factorize(values)  # default: na_sentinel=-1
    >>> codes
    array([ 0,  1,  0, -1])
    >>> uniques
    array([1., 2.])

    >>> codes, uniques = pd.factorize(values, na_sentinel=None)
    >>> codes
    array([0, 1, 0, 2])
    >>> uniques
    array([ 1.,  2., nan])
    )r   TrJ   Nr   F)r   r   )rC   )r   )mM)r   r   r   )r   assume_uniqueverify)compat   z$Union[DatetimeArray, TimedeltaArray]rS   ) rU   r5   r   ro   r4   r=   r1   r7   freqr3   Z_shallow_copyr6   pandasrC   rT   rV   rb   r   r   r   r   	safe_sortr   r9   appendwhererf   rX   r	   _datar   Z_simple_new)rH   r   r   r   rc   dropnar^   r   rC   rT   r   Z
code_is_nar`   r`   ra   r   8  s|     



   
    
 r   TrD   )r   	ascending	normalizer   rI   c              
   C  sB  ddl m} t| dd}|dk	rddlm} || } z|| |dd}	W n, tk
rr }
 ztd|
W 5 d}
~
X Y nX |	j|d	}||j  }|j	d
|_|
 }|r|jdk r|jdd }tt|	g}nDt| r|| jj|d	}||_|j}nt| |\}}||||d}|r,|j|d}|r>||  }|S )aK  
    Compute a histogram of the counts of non-null values.

    Parameters
    ----------
    values : ndarray (1-d)
    sort : bool, default True
        Sort by values
    ascending : bool, default False
        Sort in ascending order
    normalize: bool, default False
        If True then compute a relative histogram
    bins : integer, optional
        Rather than count values, group them into half-open bins,
        convenience for pd.cut, only works with numeric data
    dropna : bool, default True
        Don't include counts of NaN

    Returns
    -------
    Series
    r   )rD   r   N)cutT)Zinclude_lowestz+bins argument only works with numeric data.r   intervalindexr   r   )Zpandas.core.seriesrD   getattrZpandas.core.reshape.tiler   r[   value_countsr   ZnotnarZ   Z
sort_index_valuesallilocrV   r;   r   r$   r   value_counts_arraylikesort_valuessum)rH   r   r   r   Zbinsr   rD   r   r   iierrresultcountskeysr`   r`   ra   r   #  s6    r   r   c                 C  sb   t | } | }t| } t| |\}}t|jrL|rL|tk}|| ||  }}t||j|}||fS )z
    Parameters
    ----------
    values : arraylike
    dropna : bool

    Returns
    -------
    uniques : np.ndarray or ExtensionArray
    counts : np.ndarray
    )ro   rb   rz   Zvalue_countr-   rT   r   rf   )rH   r   rc   r   r   ZmskZres_keysr`   r`   ra   r   s  s    
r   firstz!Literal[('first', 'last', False)])rH   keeprI   c                 C  s   t | } tj| |dS )a  
    Return boolean ndarray denoting duplicate values.

    Parameters
    ----------
    values : nd.array, ExtensionArray or Series
        Array over which to check for duplicate values.
    keep : {'first', 'last', False}, default 'first'
        - ``first`` : Mark duplicates as ``True`` except for the first
          occurrence.
        - ``last`` : Mark duplicates as ``True`` except for the last
          occurrence.
        - False : Mark all duplicates as ``True``.

    Returns
    -------
    duplicated : ndarray[bool]
    r   )rb   rz   
duplicated)rH   r   r`   r`   ra   r     s    r   )rH   r   rI   c              
   C  s   t | } | }t| jr*t| } | j|dS t| } tj| |d}zt	|}W n0 t
k
r~ } ztd|  W 5 d}~X Y nX t||j|}|S )a  
    Returns the mode(s) of an array.

    Parameters
    ----------
    values : array-like
        Array over which to check for duplicate values.
    dropna : bool, default True
        Don't consider counts of NaN/NaT.

    Returns
    -------
    np.ndarray or ExtensionArray
    r   zUnable to sort modes: N)ro   r-   rT   r<   _moderb   rz   moderV   r   r[   r   rf   )rH   r   rc   Znpresultr   r   r`   r`   ra   r     s    
 r   averager   znpt.NDArray[np.float64])rH   axismethod	na_optionr   pctrI   c              	   C  sd   t | j}t| } | jdkr4tj| |||||d}n,| jdkrXtj| ||||||d}ntd|S )a  
    Rank the values along a given axis.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
        Array whose values will be ranked. The number of dimensions in this
        array must not exceed 2.
    axis : int, default 0
        Axis over which to perform rankings.
    method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
        The method by which tiebreaks are broken during the ranking.
    na_option : {'keep', 'top'}, default 'keep'
        The method by which NaNs are placed in the ranking.
        - ``keep``: rank each NaN value with a NaN ranking
        - ``top``: replace each NaN with either +/- inf so that they
                   there are ranked at the top
    ascending : bool, default True
        Whether or not the elements should be ranked in ascending order.
    pct : bool, default False
        Whether or not to the display the returned rankings in integer form
        (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
    r   )is_datetimeliketies_methodr   r   r   rO   )r   r   r   r   r   r   z&Array with ndim > 2 are not supported.)r-   rT   r   ndimr   Zrank_1dZrank_2dr[   )rH   r   r   r   r   r   r   Zranksr`   r`   ra   rank  s.    



r   znpt.NDArray[np.bool_] | None)arrarr_maskb_maskrI   c                 C  s:  t || j}|dk	r&t || j}nd}|dk	rJ|dk	rJt ||B }nB|dk	r^t |}n.|dk	rrt |}nt j| jtd}|d tj}t	}|dk}	|dk }
|	
 s|| | k|@ 
 }n`|

 s|| | k |@ 
 }nB|||	  | |	 k ||	 @ 
 p"|||
  | |
 k||
 @ 
 }|r2td| | S )a  
    Perform array addition that checks for underflow and overflow.

    Performs the addition of an int64 array and an int64 integer (or array)
    but checks that they do not result in overflow first. For elements that
    are indicated to be NaN, whether or not there is overflow for that element
    is automatically ignored.

    Parameters
    ----------
    arr : array addend.
    b : array or scalar addend.
    arr_mask : np.ndarray[bool] or None, default None
        array indicating which elements to exclude from checking
    b_mask : np.ndarray[bool] or None, default None
        array or scalar indicating which element(s) to exclude from checking

    Returns
    -------
    sum : An array for elements x + b for each element x in arr if b is
          a scalar or an array for elements x + y for each element pair
          (x, y) in (arr, b).

    Raises
    ------
    OverflowError if any x + y exceeds the maximum or minimum int64 value.
    NrS   Tr   zOverflow in int64 addition)rV   Zbroadcast_tor   Zlogical_notemptyr   fillr   i8maxr   r   OverflowError)r   br   r   b2Zb2_maskZnot_nanr   Zi8minZmask1Zmask2Zto_raiser`   r`   ra   checked_add_with_arr  s4    #
	"r   c                   @  s\   e Zd ZdddddZddddd	Zed
d Zedd ZeedddddZ	dS )SelectNr   r   nr   c                 C  s(   || _ || _|| _| jdkr$tdd S )N)r   lastr   z,keep must be either "first", "last" or "all")objr   r   r\   )selfr   r   r   r`   r`   ra   __init__d  s
    
zSelectN.__init__zDataFrame | Seriesr   rI   c                 C  s   t d S r   )NotImplementedError)r   r   r`   r`   ra   computel  s    zSelectN.computec                 C  s
   |  dS )Nnlargestr   r   r`   r`   ra   r   o  s    zSelectN.nlargestc                 C  s
   |  dS )N	nsmallestr   r   r`   r`   ra   r   s  s    zSelectN.nsmallestr   r   )rT   rI   c                 C  s   t | rt|  pt| S )zg
        Helper function to determine if dtype is valid for
        nsmallest/nlargest methods
        )r)   r"   r-   rS   r`   r`   ra   is_valid_dtype_n_methodw  s    zSelectN.is_valid_dtype_n_methodN)
r   
__module____qualname__r   r   r
   r   r   staticmethodr   r`   r`   r`   ra   r   c  s   

r   c                   @  s    e Zd ZdZdddddZdS )SelectNSeriesz
    Implement n largest/smallest for Series

    Parameters
    ----------
    obj : Series
    n : int
    keep : {'first', 'last'}, default 'first'

    Returns
    -------
    nordered : Series
    r   rD   r   c                 C  s   ddl m} | j}| jj}| |s8td| d| |dkrJ| jg  S | j }| j|j	}t
|jrddlm} |j}t||rt||j|j	|jd}	t| |	| j| jd|}
|
|jS |t| jkr|dk}| jj|d	|S |j}t|j}|d
kr:| }t|r&|d8 }nt|r:d|  }| jdkrT|d d d }|}t| j}t|}t||}t|jdd|d }t !||k\}||| j"dd }| jdkr|d | }|}| jdkr|d | }||j#| |gj#d | S )Nr   )concatzCannot use method 'z' with dtype )BaseMaskedArrayr   r   r   r   r   r   r   r   C)order	mergesort)r   r   )$Zpandas.core.reshape.concatr   r   r   rT   r   r[   r   dropr   r$   pandas.core.arraysr   r   rU   r   r   r   r   r   rZ   r   r   headrb   rH   r'   r    minr   Zkth_smallestrN   rV   Znonzeroargsortr   )r   r   r   r   rT   ZdroppedZ	nan_indexr   r   Zserr   r   Z	new_dtypeZnbaseZfindexZnarrZkth_valnsZindsr`   r`   ra   r     sT    












zSelectNSeries.computeN)r   r   r   __doc__r   r`   r`   r`   ra   r     s   r   c                      s<   e Zd ZdZddddd fddZddd	d
dZ  ZS )SelectNFramez
    Implement n largest/smallest for DataFrame

    Parameters
    ----------
    obj : DataFrame
    n : int
    keep : {'first', 'last'}, default 'first'
    columns : list or str

    Returns
    -------
    nordered : DataFrame
    rB   r   r   r   )r   r   r   columnsc                   sH   t  ||| t|r"t|tr(|g}ttt |}t|}|| _	d S r   )
superr   r(   rU   rl   r	   r   r   rm   r   )r   r   r   r   r   	__class__r`   ra   r     s    zSelectNFrame.__init__r   c              	     s~  ddl m} | j}| j}| j}|D ]<}|| j}| |s"tdt| d| dt  dq" fdd}|j	}	|j
d	d
 }
}|}|g }t|D ]\}}|
| }t|d |k}t| ||r| jndd}|st||kr|||j	} q>|||j	d  k}|| }||  }|||j	}|
j|j	 }
|t| }q||}|	||_	t|dkrf|S  dk}|j||ddS )Nr   )
Int64IndexzColumn z has dtype z, cannot use method z with this dtypec                   s     dkr|  |S | | S dS )z{
            Helper function to concat `current_indexer` and `other_indexer`
            depending on `method`
            r   N)r   )Zcurrent_indexerZother_indexerr   r`   ra   get_indexer  s    
z)SelectNFrame.compute.<locals>.get_indexerT)r   r   r   r   r   r   r   )r   r   )Zpandas.core.apir  r   r   r   rT   r   r[   reprr   Zreset_index	enumerater   r   r   loctaker   )r   r   r  r   framer   columnrT   r  Zoriginal_indexZ	cur_frameZcur_nindexerr|   ZseriesZis_last_columnrH   Zborder_valueZunsafe_valuesZsafe_valuesr   r`   r  ra   r     sJ    

 

zSelectNFrame.compute)r   r   r   r   r   r   __classcell__r`   r`   r  ra   r     s   	r   r   )indicesr   
allow_fillc                 C  s\   t | st| } tj|tjd}|rJt|| j|  t| ||d|d}n| j||d}|S )a  
    Take elements from an array.

    Parameters
    ----------
    arr : array-like or scalar value
        Non array-likes (sequences/scalars without a dtype) are coerced
        to an ndarray.
    indices : sequence of int or one-dimensional np.ndarray of int
        Indices to be taken.
    axis : int, default 0
        The axis over which to select values.
    allow_fill : bool, default False
        How to handle negative values in `indices`.

        * False: negative values in `indices` indicate positional indices
          from the right (the default). This is similar to :func:`numpy.take`.

        * True: negative values in `indices` indicate
          missing values. These values are set to `fill_value`. Any other
          negative values raise a ``ValueError``.

    fill_value : any, optional
        Fill value to use for NA-indices when `allow_fill` is True.
        This may be ``None``, in which case the default NA value for
        the type (``self.dtype.na_value``) is used.

        For multi-dimensional `arr`, each *element* is filled with
        `fill_value`.

    Returns
    -------
    ndarray or ExtensionArray
        Same type as the input.

    Raises
    ------
    IndexError
        When `indices` is out of bounds for the array.
    ValueError
        When the indexer contains negative values other than ``-1``
        and `allow_fill` is True.

    Notes
    -----
    When `allow_fill` is False, `indices` may be whatever dimensionality
    is accepted by NumPy for `arr`.

    When `allow_fill` is True, `indices` should be 1-D.

    See Also
    --------
    numpy.take : Take elements from an array along an axis.

    Examples
    --------
    >>> from pandas.api.extensions import take

    With the default ``allow_fill=False``, negative numbers indicate
    positional indices from the right.

    >>> take(np.array([10, 20, 30]), [0, 0, -1])
    array([10, 10, 30])

    Setting ``allow_fill=True`` will place `fill_value` in those positions.

    >>> take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True)
    array([10., 10., nan])

    >>> take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True,
    ...      fill_value=-10)
    array([ 10,  10, -10])
    rS   T)r   r  
fill_value)r   )r   rV   rW   Zintpr>   r   r:   r
  )r   r  r   r  r  r   r`   r`   ra   r
  H  s    P
    r
  leftz$NumpyValueArrayLike | ExtensionArrayzLiteral[('left', 'right')]r?   znpt.NDArray[np.intp] | np.intp)r   valuesidesorterrI   c                 C  s   |dk	rt |}t| tjrt| jrt|s6t|rt| jj}t	|rXt
|gnt
|}||jk r||jk r| j}n|j}t	|rtt||}qttt||d}nt| } | j|||dS )a  
    Find indices where elements should be inserted to maintain order.

    .. versionadded:: 0.25.0

    Find the indices into a sorted array `arr` (a) such that, if the
    corresponding elements in `value` were inserted before the indices,
    the order of `arr` would be preserved.

    Assuming that `arr` is sorted:

    ======  ================================
    `side`  returned index `i` satisfies
    ======  ================================
    left    ``arr[i-1] < value <= self[i]``
    right   ``arr[i-1] <= value < self[i]``
    ======  ================================

    Parameters
    ----------
    arr: np.ndarray, ExtensionArray, Series
        Input array. If `sorter` is None, then it must be sorted in
        ascending order, otherwise `sorter` must be an array of indices
        that sort it.
    value : array-like or scalar
        Values to insert into `arr`.
    side : {'left', 'right'}, optional
        If 'left', the index of the first suitable location found is given.
        If 'right', return the last such index.  If there is no suitable
        index, return either 0 or N (where N is the length of `self`).
    sorter : 1-D array-like, optional
        Optional array of integer indices that sort array a into ascending
        order. They are typically the result of argsort.

    Returns
    -------
    array of ints or int
        If value is array-like, array of insertion points.
        If value is scalar, a single integer.

    See Also
    --------
    numpy.searchsorted : Similar method from NumPy.
    NrS   )r  r  )r   rU   rV   rX   r'   rT   r&   iinfor   r+   r;   r   r   maxr	   r   r   r   r<   searchsorted)r   r  r  r  r  Z	value_arrrT   r`   r`   ra   r    s(    2
	r  rp   rq   rr   rs   rt   ru   )r   r   c                 C  s`  t |}tj}| j}t|}|r(tj}ntj}t|t	rF| 
 } | j}t|tjst| d|j dr|dkrtdt| j d| || | |S tdtt d t| } | j}d}t| jrtj}| d} t}d	}n2|rtj}n&t|r| jjd
krtj}ntj}| j}|dkr.| dd} t|}tj| j |d}	t!dgd }
|dkrjt!d|nt!|d|
|< ||	t"|
< | jjt#krt$j%| |	|||d nt!dgd }|dkrt!|dnt!d|||< t"|}t!dgd }|dkrt!d| n
t!| d||< t"|}|| | | | |	|< |rB|	d}	|dkr\|	dddf }	|	S )aQ  
    difference of n between self,
    analogous to s-s.shift(n)

    Parameters
    ----------
    arr : ndarray or ExtensionArray
    n : int
        number of periods
    axis : {0, 1}
        axis to shift on
    stacklevel : int, default 3
        The stacklevel for the lost dtype warning.

    Returns
    -------
    shifted
    __r   zcannot diff z	 on axis=zwdtype lost in 'diff()'. In the future this will raise a TypeError. Convert to a suitable dtype prior to calling 'diff'.)
stacklevelFrR   T)ru   rt   r   r   rS   NrO   )Zdatetimelikerd   )&r   rV   nanrT   r    operatorxorsubrU   r0   Zto_numpyhasattrr   r\   r   shiftr   FutureWarningr   rW   r-   rr   rY   r   Zobject_r'   r   rq   rp   r   Zreshaper   r   slicerl   _diff_specialr   Zdiff_2d)r   r   r   narT   Zis_boolopZis_timedeltaZ	orig_ndimZout_arrZ
na_indexerZ_res_indexerZres_indexerZ_lag_indexerZlag_indexerr`   r`   ra   diff  sp    






""&

r&  z*np.ndarray | tuple[np.ndarray, np.ndarray])r   r   r   rI   c                 C  s  t | stdt| tjtfs:t| \}}tj| |d} d}t| sbt	j
| dddkrbt| }nRz|  }| |}W n: tk
r   | jrt| d trt| }nt| }Y nX |dkr|S t |stdtt|}|stt| t| kstd	|dkr8t| \}	} |	t| }
|
|  t|
|}|d
kr| }t||d
d}|rz|t|  k |t| kB }nd}ndtjt|tjd}||tt| |j|dd}||k}|r||t|  k B |t| kB }|dk	rt||| |t|fS )a  
    Sort ``values`` and reorder corresponding ``codes``.

    ``values`` should be unique if ``codes`` is not None.
    Safe for use with mixed types (int, str), orders ints before strs.

    Parameters
    ----------
    values : list-like
        Sequence; must be unique if ``codes`` is not None.
    codes : list_like, optional
        Indices to ``values``. All out of bound indices are treated as
        "not found" and will be masked with ``na_sentinel``.
    na_sentinel : int, default -1
        Value in ``codes`` to mark "not found".
        Ignored when ``codes`` is None.
    assume_unique : bool, default False
        When True, ``values`` are assumed to be unique, which can speed up
        the calculation. Ignored when ``codes`` is None.
    verify : bool, default True
        Check if codes are out of bound for the values and put out of bound
        codes equal to na_sentinel. If ``verify=False``, it is assumed there
        are no out of bound codes. Ignored when ``codes`` is None.

        .. versionadded:: 0.25.0

    Returns
    -------
    ordered : ndarray
        Sorted ``values``
    new_codes : ndarray
        Reordered ``codes``; returned when ``codes`` is not None.

    Raises
    ------
    TypeError
        * If ``values`` is not list-like or if ``codes`` is neither None
        nor list-like
        * If ``values`` cannot be sorted
    ValueError
        * If ``codes`` is not None and ``values`` contain duplicates.
    zFOnly list-like objects are allowed to be passed to safe_sort as valuesrS   NFrg   rj   r   zMOnly list-like objects or None are allowed to be passed to safe_sort as codesz,values should be unique if codes is not Noner   r  wrap)r   )r(   r[   rU   rV   rX   r2   r   rW   r$   r   rk   _sort_mixedr   r
  sizerl   _sort_tuplesr   r   r   r\   r   Zmap_locationslookupr:   r   int_putZarangeZputmask)rH   r^   r   r   r   rT   _r  Zorderedr   tZorder2Z	new_codesr   Zreverse_indexerr`   r`   ra   r     sb    1





r   c                 C  sN   t jdd | D td}t | |  }t | | }t |t j|tdgS )z3order ints before strings in 1d arrays, safe in py3c                 S  s   g | ]}t |tqS r`   )rU   r   ).0xr`   r`   ra   
<listcomp>  s     z_sort_mixed.<locals>.<listcomp>rS   )rV   r;   r   r   ZconcatenaterW   r_   )rH   Zstr_posnumsstrsr`   r`   ra   r)    s    r)  c                 C  s:   ddl m} ddlm} || d\}}||dd}| | S )a  
    Convert array of tuples (1d) to array or array (2d).
    We need to keep the columns separately as they contain different types and
    nans (can't use `np.sort` as it may fail when str and nan are mixed in a
    column as types cannot be compared).
    r   )	to_arrays)lexsort_indexerNT)Zorders)Z"pandas.core.internals.constructionr6  Zpandas.core.sortingr7  )rH   r6  r7  Zarraysr/  r  r`   r`   ra   r+  	  s
    r+  )lvalsrvalsrI   c                 C  s   g }t | dd}t |dd}|j|dd\}}tt| |g}t|}t|D ],\}}||gtt|j| |j|  7 }qN|	|S )a  
    Extracts the union from lvals and rvals with respect to duplicates and nans in
    both arrays.

    Parameters
    ----------
    lvals: np.ndarray or ExtensionArray
        left values which is ordered in front.
    rvals: np.ndarray or ExtensionArray
        right values ordered after lvals.

    Returns
    -------
    np.ndarray or ExtensionArray
        Containing the unsorted union of both arrays.

    Notes
    -----
    Caller is responsible for ensuring lvals.dtype == rvals.dtype.
    Fr   r   r'  )
r   alignr   r.   r<   r  r   r  atr
  )r8  r9  r  Zl_countZr_countZunique_arrayr|   r  r`   r`   ra   union_with_duplicates  s    &r<  )r   NNN)Fr   N)TFFNT)r   )T)r   r   r   TF)NN)r   FN)r  N)r   )Nr   FT)r   
__future__r   r  textwrapr   typingr   r   r   r   r   r	   r
   warningsr   numpyrV   Zpandas._libsr   r   rz   r   r   Zpandas._typingr   r   r   r   r   r   r   Zpandas.util._decoratorsr   Zpandas.util._exceptionsr   Zpandas.core.dtypes.castr   r   r   Zpandas.core.dtypes.commonr   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   Zpandas.core.dtypes.concatr.   Zpandas.core.dtypes.dtypesr/   r0   Zpandas.core.dtypes.genericr1   r2   r3   r4   r5   r6   r7   Zpandas.core.dtypes.missingr8   r9   Zpandas.core.array_algos.taker:   Zpandas.core.constructionr;   r   r<   r=   Zpandas.core.indexersr>   r?   r@   r   rA   rB   rC   rD   r   rE   rF   rG   rb   rf   ro   ZComplex128HashTableZComplex64HashTableZFloat64HashTableZFloat32HashTableZUInt64HashTableZUInt32HashTableZUInt16HashTableZUInt8HashTableZInt64HashTableZInt32HashTableZInt16HashTableZInt8HashTableZStringHashTableZPyObjectHashTablerw   r{   r   r   rv   r   Zunique1dr   r   r   r   r   r   r   r   r   r   r   r   r
  r  r#  r&  r   r)  r+  r<  r`   r`   r`   ra   <module>   s   $	$	P$	
N,
	mN    ,    X     P '     =  [ Us   i  Zv     