U
    +if2                     @  s  d Z ddlmZ ddlmZmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZ ddlmZ ddlm Z  erddl!m"Z"m#Z# dddddddddZ$G dd deZ%G dd de%Z&G dd de%Z'dddddZ(d d!d d"d#d$Z)d d!d d%d&d'Z*d d!d d%d(d)Z+d d!d d*d+d,Z,d dd-d.d/d0Z-dd1d2d3d4Z.dS )5z
Module responsible for execution of NDFrame.describe() method.

Method NDFrame.describe() delegates actual execution to function describe_ndframe().
    )annotations)ABCabstractmethod)TYPE_CHECKINGAnyCallableHashableSequencecastN)	Timestamp)NDFrameT)find_stack_level)validate_percentile)is_bool_dtypeis_datetime64_any_dtypeis_numeric_dtypeis_timedelta64_dtype)concat)format_percentiles)	DataFrameSeriesr   str | Sequence[str] | Noneboolz#Sequence[float] | np.ndarray | None)objincludeexcludedatetime_is_numericpercentilesreturnc                 C  sR   t |}| jdkr&ttd| |d}nttd| |||d}|j|d}tt|S )a  Describe series or dataframe.

    Called from pandas.core.generic.NDFrame.describe()

    Parameters
    ----------
    obj: DataFrame or Series
        Either dataframe or series to be described.
    include : 'all', list-like of dtypes or None (default), optional
        A white list of data types to include in the result. Ignored for ``Series``.
    exclude : list-like of dtypes or None (default), optional,
        A black list of data types to omit from the result. Ignored for ``Series``.
    datetime_is_numeric : bool, default False
        Whether to treat datetime dtypes as numeric.
    percentiles : list-like of numbers, optional
        The percentiles to include in the output. All should fall between 0 and 1.
        The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and
        75th percentiles.

    Returns
    -------
    Dataframe or series description.
       r   r   r   r   r   r   r   r   )r   )refine_percentilesndimSeriesDescriberr
   DataFrameDescriberdescriber   )r   r   r   r   r   Z	describerresult r(   G/home/mars/bis/venv/lib/python3.8/site-packages/pandas/core/describe.pydescribe_ndframe/   s    
r*   c                   @  s4   e Zd ZdZdddddZedddd	d
ZdS )NDFrameDescriberAbstractzAbstract class for describing dataframe or series.

    Parameters
    ----------
    obj : Series or DataFrame
        Object to be described.
    datetime_is_numeric : bool
        Whether to treat datetime dtypes as numeric.
    zDataFrame | Seriesr   r    c                 C  s   || _ || _d S Nr    )selfr   r   r(   r(   r)   __init__m   s    z!NDFrameDescriberAbstract.__init__Sequence[float] | np.ndarrayr   r   c                 C  s   dS )zDo describe either series or dataframe.

        Parameters
        ----------
        percentiles : list-like of numbers
            The percentiles to include in the output.
        Nr(   )r-   r   r(   r(   r)   r&   q   s    z!NDFrameDescriberAbstract.describeN)__name__
__module____qualname____doc__r.   r   r&   r(   r(   r(   r)   r+   b   s   
r+   c                   @  s*   e Zd ZU dZded< dddddZdS )	r$   z2Class responsible for creating series description.r   r   r/   r0   c                 C  s   t | j| j}|| j|S r,   )select_describe_funcr   r   )r-   r   describe_funcr(   r(   r)   r&      s
    zSeriesDescriber.describeN)r1   r2   r3   r4   __annotations__r&   r(   r(   r(   r)   r$   |   s   
r$   c                      sD   e Zd ZdZddddd fddZddd	d
dZdd Z  ZS )r%   a  Class responsible for creating dataobj description.

    Parameters
    ----------
    obj : DataFrame
        DataFrame to be described.
    include : 'all', list-like of dtypes or None
        A white list of data types to include in the result.
    exclude : list-like of dtypes or None
        A black list of data types to omit from the result.
    datetime_is_numeric : bool
        Whether to treat datetime dtypes as numeric.
    r   r   r   r!   c                  s>   || _ || _|jdkr*|jjdkr*tdt j||d d S )N   r   z+Cannot describe a DataFrame without columns)r   )r   r   r#   columnssize
ValueErrorsuperr.   )r-   r   r   r   r   	__class__r(   r)   r.      s
    zDataFrameDescriber.__init__r/   r0   c                   sn   |   }g }| D ]$\}}t|| j}|||| qt| t fdd|D ddd}|j |_|S )Nc                   s   g | ]}|j  d dqS )F)copy)Zreindex.0xZ	col_namesr(   r)   
<listcomp>   s     z/DataFrameDescriber.describe.<locals>.<listcomp>r   F)Zaxissort)	_select_dataitemsr5   r   appendreorder_columnsr   r9   r?   )r-   r   dataldesc_seriesr6   dr(   rC   r)   r&      s    zDataFrameDescriber.describec                 C  s   | j dkrP| jdkrPtjg}| jr,|d | jj|d}t|j	dkr| j}n<| j dkrx| jdk	rpd}t
|| j}n| jj| j | jd}|S )zSelect columns to be described.Ndatetime)r   r   allz*exclude must be None when include is 'all')r   r   )r   r   npnumberr   rH   r   Zselect_dtypeslenr9   r;   )r-   Zdefault_includerJ   msgr(   r(   r)   rF      s"    


zDataFrameDescriber._select_data)r1   r2   r3   r4   r.   r&   rF   __classcell__r(   r(   r=   r)   r%      s   r%   zSequence[Series]zlist[Hashable])rK   r   c                 C  sD   g }t dd | D td}|D ] }|D ]}||kr&|| q&q|S )z,Set a convenient order for rows for display.c                 s  s   | ]}|j V  qd S r,   )indexr@   r(   r(   r)   	<genexpr>   s     z"reorder_columns.<locals>.<genexpr>)key)sortedrS   rH   )rK   namesZldesc_indexesZidxnamesnamer(   r(   r)   rI      s    rI   r   zSequence[float])rM   r   r   c                 C  sn   ddl m} t|}ddddg| dg }|  |  |  |  g| |  | 	 g }|||| j
dS )	zDescribe series containing numerical data.

    Parameters
    ----------
    series : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r   countmeanstdminmaxrV   r[   )pandasr   r   r]   r^   r_   r`   quantiletolistra   r[   )rM   r   r   formatted_percentiles
stat_indexrN   r(   r(   r)   describe_numeric_1d   s    
rh   )rJ   percentiles_ignoredr   c           
      C  s   ddddg}|   }t||dk }|dkrH|jd |jd  }}d}ntjtj }}d}|  |||g}ddlm}	 |	||| j	|d	S )
zDescribe series containing categorical data.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    r]   uniquetopfreqr   Nobjectr\   rV   r[   dtype)
value_countsrS   rV   ilocrQ   nanr]   rc   r   r[   )
rJ   ri   rZ   	objcountscount_uniquerk   rl   ro   r'   r   r(   r(   r)   describe_categorical_1d   s    ru   c                 C  s  ddg}|   }t||dk }|  |g}d}|dkr|jd |jd  }}| jj}	|  j	d}
t
|}|jdk	r|	dk	r||	}n
||	}|dddd	g7 }|||t
|
 |	d
t
|
 |	d
g7 }n |ddg7 }|tjtjg7 }d}ddlm} |||| j|dS )zDescribe series containing timestamp data treated as categorical.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    r]   rj   r   Ni8rk   rl   firstlast)tzrm   r\   rn   )rp   rS   r]   rV   rq   dtry   Zdropnavaluesviewr   tzinfoZ
tz_convertZtz_localizer`   ra   rQ   rr   rc   r   r[   )rJ   ri   rZ   rs   rt   r'   ro   rk   rl   ry   Zasintr   r(   r(   r)   $describe_timestamp_as_categorical_1d  s2    

r~   )rJ   r   r   c                 C  sf   ddl m} t|}dddg| dg }|  |  |  g| |  |  g }|||| j	dS )zDescribe series containing datetime64 dtype.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r\   r]   r^   r`   ra   rb   )
rc   r   r   r]   r^   r`   rd   re   ra   r[   )rJ   r   r   rf   rg   rN   r(   r(   r)   describe_timestamp_1dG  s    r   r   )rJ   r   r   c                 C  sZ   t | jrtS t| rtS t| jrD|r,tS tjdt	t
 d tS nt| jrRtS tS dS )zSelect proper function for describing series based on data type.

    Parameters
    ----------
    data : Series
        Series to be described.
    datetime_is_numeric : bool
        Whether to treat datetime dtypes as numeric.
    zTreating datetime data as categorical rather than numeric in `.describe` is deprecated and will be removed in a future version of pandas. Specify `datetime_is_numeric=True` to silence this warning and adopt the future behavior now.)
stacklevelN)r   ro   ru   r   rh   r   r   warningswarnFutureWarningr   r~   r   )rJ   r   r(   r(   r)   r5   b  s     


r5   z%np.ndarray[Any, np.dtype[np.float64]]r0   c                 C  sv   | dkrt dddgS t| } t|  d| kr:| d t | } t | }| dk	sZtt|t| k rrt	d|S )z
    Ensure that percentiles are unique and sorted.

    Parameters
    ----------
    percentiles : list-like of numbers, optional
        The percentiles to include in the output.
    Ng      ?g      ?g      ?z%percentiles cannot contain duplicates)
rQ   arraylistr   rH   Zasarrayrj   AssertionErrorrS   r;   )r   Zunique_pctsr(   r(   r)   r"     s    


r"   )/r4   
__future__r   abcr   r   typingr   r   r   r   r	   r
   r   numpyrQ   Zpandas._libs.tslibsr   Zpandas._typingr   Zpandas.util._exceptionsr   Zpandas.util._validatorsr   Zpandas.core.dtypes.commonr   r   r   r   Zpandas.core.reshape.concatr   Zpandas.io.formats.formatr   rc   r   r   r*   r+   r$   r%   rI   rh   ru   r~   r   r5   r"   r(   r(   r(   r)   <module>   s2    3I 0$