U
    +if'                     @  sN  d Z ddlmZ ddlZddlmZmZmZmZm	Z	 ddl
ZddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZmZ erdd
lmZmZmZmZmZ dZ ddddddZ!dde dfdddddddddZ"de fdddddddZ#d dddd!d"d#Z$de dfd$ddddd%d&d'Z%de dfdddddd%d(d)Z&dS )*z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKINGHashableIterableIteratorcast)lib)hash_object_array)	ArrayLike)is_categorical_dtypeis_list_like)ABCDataFrameABCIndexABCMultiIndex	ABCSeries)Categorical	DataFrameIndex
MultiIndexSeriesZ0123456789123456zIterator[np.ndarray]intz
np.ndarray)arrays	num_itemsreturnc                 C  s   zt | }W n$ tk
r0   tjg tjd Y S X t|g| } td}t|td }t| D ]6\}}|| }||N }||9 }|td| | 7 }qf|d |kst	d|td7 }|S )z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    dtypeiCB ixV4 iXB    zFed in wrong num_itemsi| )
nextStopIterationnparrayuint64	itertoolschainZ
zeros_like	enumerateAssertionError)r   r   firstZmultoutiaZ	inverse_i r*   K/home/mars/bis/venv/lib/python3.8/site-packages/pandas/core/util/hashing.pycombine_hash_arrays.   s    
r,   Tutf8zIndex | DataFrame | Seriesboolstrz
str | Noner   )objindexencodinghash_key
categorizer   c                   s  ddl m} dkrtttr8|tdddS ttrptj j	ddd}||ddd}ntt
rtj j	ddd}|rȇ fd	d
dD }t|g|}	t|	d}||jddd}nttrj fdd
 D }
tj}|rL fdd
dD }|d7 }t|
|}dd
 |D }
t|
|}||jddd}ntdt |S )a~  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object
    r   )r   Nr!   F)r   copyr5   )r1   r   r5   c                 3  s$   | ]}t jd  djV  qdS F)r1   r2   r3   r4   Nhash_pandas_objectr1   _values.0_r4   r2   r3   r0   r*   r+   	<genexpr>|   s   z%hash_pandas_object.<locals>.<genexpr>N   c                 3  s"   | ]\}}t |j V  qd S r@   )
hash_arrayr:   )r<   r=   Zseries)r4   r2   r3   r*   r+   r?      s   c                 3  s$   | ]}t jd  djV  qdS r7   r8   r;   r>   r*   r+   r?      s   r   c                 s  s   | ]
}|V  qd S r@   r*   )r<   xr*   r*   r+   r?      s     zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer   hash_tuplesr   rB   r:   astyper   r"   r#   r,   r1   r   itemslencolumns	TypeErrortype)r0   r1   r2   r3   r4   r   hZserZ
index_iterr   hashesr   Zindex_hash_generator_hashesr*   r>   r+   r9   N   sN    

 
 




r9   z+MultiIndex | Iterable[tuple[Hashable, ...]])valsr2   r3   r   c                   sz   t | stdddlm m} t| ts6|| n|  fddtj	D }fdd|D }t
|t|}|S )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )r   r   c                   s(   g | ] } j | j| d ddqS )FTZorderedZfastpath)codesZlevels)r<   level)r   mir*   r+   
<listcomp>   s   zhash_tuples.<locals>.<listcomp>c                 3  s   | ]}t | d V  qdS )r2   r3   N)_hash_categorical)r<   catrW   r*   r+   r?      s    zhash_tuples.<locals>.<genexpr>)r   rL   rD   r   r   rF   r   from_tuplesrangeZnlevelsr,   rJ   )rQ   r2   r3   r   Zcat_valsrO   rN   r*   )r   r2   r3   rU   r+   rG      s    
rG   r   )rY   r2   r3   r   c                 C  sd   t | jj}t|||dd}|  }t|r<|| j}nt j	t|dd}|
 r`tj||< |S )a  
    Hash a Categorical by hashing its categories, and then mapping the codes
    to the hashes

    Parameters
    ----------
    cat : Categorical
    encoding : str
    hash_key : str

    Returns
    -------
    ndarray[np.uint64] of hashed values, same size as len(c)
    F)r4   r!   r   )r   Zasarray
categoriesr:   rB   ZisnarJ   ZtakerS   Zzerosanyr   Zu8max)rY   r2   r3   valueshashedmaskresultr*   r*   r+   rX      s    	
rX   r
   )rQ   r2   r3   r4   r   c                 C  s\   t | dstd| j}t|r6td| } t| ||S t| tjsN| 	 \} }t
| |||S )aK  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.
    r   zmust pass a ndarray-liker   )hasattrrL   r   r   r   rX   rF   r   ZndarrayZ_values_for_factorize_hash_ndarray)rQ   r2   r3   r4   r   r=   r*   r*   r+   rB      s    

rB   c                 C  sv  | j }t|tjr4tt| dtt|   S t|trJ| 	d} nt
|jtjtjfrt| dj	ddd} nt
|jtjr|jdkr| d| j j 	d} n|rdd	lm}m}m} || dd
\}}	||||	ddd}
t|
||S zt| ||} W n. tk
r0   t| 	t	t||} Y nX | | d? N } | td9 } | | d? N } | td9 } | | d? N } | S )z!
    See hash_array.__doc__.
       u8i8Fr6      ur   )r   r   	factorize)sortTrR      l   e9z    l   b&&&	    )r   r   Z
issubdtypeZ
complex128rB   realimagrF   r.   rH   
issubclassrM   Z
datetime64Ztimedelta64viewnumberitemsizerD   r   r   ri   Z_with_inferrX   r	   rL   r/   objectr!   )rQ   r2   r3   r4   r   r   r   ri   rS   r\   rY   r*   r*   r+   rc   (  sB    	 
     
rc   )'__doc__
__future__r   r"   typingr   r   r   r   r   numpyr   Zpandas._libsr   Zpandas._libs.hashingr	   Zpandas._typingr
   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.genericr   r   r   r   rD   r   r   r   r   r   rE   r,   r9   rG   rX   rB   rc   r*   r*   r*   r+   <module>   s<   
"^.(,