U
    Mf                     @  sZ  d Z ddlmZ ddlmZ ddlmZmZmZm	Z	m
Z
 ddlZddlZddlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z1 ddl2m3Z3m4Z4m5Z5m6Z6 ddl7m8Z8m9Z9m:Z:m;Z; ddl<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZC ddlDmEZEmFZF ddlGmHZHmIZImJZJ ddlKmLZLmMZMmNZNmOZO erddlPmQZQ ddddddddddddddZRdddd d!d"d#ZSd$dd%d&d'd(ZTdqd dd)d*d+ZUddd dd,d-d.ZVd/ddd0d1d2d3ZWdd4dd5d6dd ddd7d8d9ZXd:d;d;dd<d=d>d?ZYdd@dAdBZZdrdd/dCdDdEZ[dddFdGdHdIZ\dd@dJdKZ]dFdd;dLdMdNdOdPZ^dd@dQdRZ_dLdLd;d;dSdTdUdVZ`dWdX Zadsd;ddMdYdZd[Zbd\d/d]d^d_Zcd`d;dadbdcddZdded;dadbdfdgZed/d;ddMdhdidjZfd%d;ddkdldmZgd%ddFdndodpZhdS )tz~
Functions for preparing various inputs passed to the DataFrame or Series
constructors before passing them to a BlockManager.
    )annotations)abc)TYPE_CHECKINGAnyHashableSequencecastN)lib)	ArrayLikeDtypeObjManager)find_stack_level)"construct_1d_arraylike_from_scalardict_compatmaybe_cast_to_datetimemaybe_convert_platformmaybe_infer_to_datetimelikemaybe_upcast)	is_1d_only_ea_dtypeis_datetime64tz_dtypeis_datetime_or_timedelta_dtypeis_dtype_equalis_extension_array_dtypeis_integer_dtypeis_list_likeis_named_tupleis_object_dtype)ExtensionDtype)ABCDataFrame	ABCSeries)
algorithmscommon)CategoricalDatetimeArrayExtensionArrayTimedeltaArray)ensure_wrapped_if_datetimelikeextract_arrayrange_to_ndarraysanitize_array)DatetimeIndexIndexTimedeltaIndexdefault_indexensure_indexget_objs_combined_axisunion_indexes)ArrayManagerSingleArrayManager)BlockPlacementensure_block_shapenew_block_2d)BlockManagerSingleBlockManager create_block_manager_from_blocks'create_block_manager_from_column_arrays)MaskedRecordsT)dtypeverify_integritytypconsolidater+   zDtypeObj | Noneboolz
str | Noner   )columnsr;   r<   r=   r>   returnc          	      C  s   |r,|dkrt | }nt|}t| ||} nRt|}dd | D } | D ]6}t|tjtfrt|jdkstt|t|krFt	dqFt|}t|t| krt	d||g}|dkrt
| ||dS |d	krt| ||gS t	d
| ddS )zs
    Segregate Series based on type and coerce into matrices.

    Needs to handle a lot of exceptional cases.
    Nc                 S  s   g | ]}t |d dqS )TZextract_numpy)r'   .0x rF   F/tmp/pip-unpacked-wheel-eb6vo0j3/pandas/core/internals/construction.py
<listcomp>   s     z!arrays_to_mgr.<locals>.<listcomp>   zYArrays must be 1-dimensional np.ndarray or ExtensionArray with length matching len(index)z#len(arrays) must match len(columns)block)r>   array2'typ' needs to be one of {'block', 'array'}, got '')_extract_indexr.   _homogenize
isinstancenpndarrayr$   ndimlen
ValueErrorr9   r1   )	arraysr@   indexr;   r<   r=   r>   arraxesrF   rF   rG   arrays_to_mgrf   s<    
  rZ   z(MaskedRecords | np.recarray | np.ndarraystr)datar;   copyr=   c                 C  s   t | }|dkr tt|}nt|}|dk	r8t|}t||\}}t| tj jrjt	d| } t
| |}	n|}	t|	||t|\}}|dkr|}t|||||d}
|r|
 }
|
S )zA
    Extract from a masked rec array and create the manager.
    Nr:   r;   r=   )maZgetdatar-   rT   r.   	to_arraysrP   rQ   ZMaskedArrayr   fill_masked_arraysreorder_arraysrZ   r]   )r\   rW   r@   r;   r]   r=   fdatarV   arr_columns
new_arraysmgrrF   rF   rG   rec_array_to_mgr   s.    

   rg   r:   zlist[np.ndarray])r\   rd   rA   c                 C  sV   g }|D ]H}| | }|j }t|}| rFt||dd\}}|||< || q|S )zA
    Convert numpy MaskedRecords to ensure mask is softened.
    T)
fill_valuer]   )rh   r_   Zgetmaskarrayanyr   append)r\   rd   re   colrX   ZfvmaskrF   rF   rG   ra      s    
ra   )r=   r]   c                   s  |dkrXt  tr }q jdkrBt j jd  jd dd}qt jd  j}n|dkrt  t	rp }q jdkrȇ fddt
t jd D }|rd	d |D }t	| jd  jd g}q  }|r| }t|g jg}ntd
| d|S )z
    Convert to specific type of Manager. Does not copy if the type is already
    correct. Does not guarantee a copy otherwise. `copy` keyword only controls
    whether conversion from Block->ArrayManager copies the 1D arrays.
    rJ      r   rI   )r=   rK   c                   s   g | ]}  |qS rF   )Ziget_valuesrD   irf   rF   rG   rH      s     zmgr_to_mgr.<locals>.<listcomp>c                 S  s   g | ]}|  qS rF   r]   rD   rX   rF   rF   rG   rH     s     rL   rM   )rP   r6   rS   rZ   rV   rY   r7   Z
from_arrayrW   r1   rangerT   Zinternal_valuesr]   r2   rU   )rf   r=   r]   Znew_mgrrV   rK   rF   rp   rG   
mgr_to_mgr   s2    

   

 rt   )r;   r]   r=   rA   c                   st  t trh|d kr(jd k	r(tjg}|d kr8j}n
|tsh|d k	rht|rhtjdt	d|dkrtdn|}t
dd }t|st|rt tjtfrΈjdkr·fddtjd D ng|d krttt}nt|}t||||d	S t|rPt|sPtd
d|r6 jdkr\ddnt|d|d k	rtj|sj} }	t|ojjdk }
t|	d |||
d|tjd jd ||d\}}t|| |dkrt jj!t"r
tj#t	d|d kr>t$jr>fddtjd D }n0t%jrRt&fddtjd D }|rdd |D }t'|||gddS j(|d kr.t$jr.t)}dd |D }t*dd t+||D r
dd |D   fddtt D }n"t,t-t|}t.|d}|g}n"t,t-t|}t.|d}|g}t|dkrbg }t/|||gddS )N)r   rI   r;   rK   Fr;   rI   c                   s   g | ]} d d |f qS NrF   rD   nvaluesrF   rG   rH   2  s   z"ndarray_to_mgr.<locals>.<listcomp>r^   TrB   rq   fr;   r]   Zraise_cast_failurer   )rW   r@   c                   s$   g | ]}t t d d |f qS rv   )r&   r   rn   ry   rF   rG   rH   g  s   c                   s   g | ]} d d |f qS rv   rF   rn   ry   rF   rG   rH   p  s     c                 S  s   g | ]}|  qS rF   rq   rr   rF   rF   rG   rH   s  s     )r<   c                 S  s   g | ]}t |qS rF   )r   rC   rF   rF   rG   rH   ~  s     c                 s  s   | ]\}}||k	V  qd S rv   rF   )rD   rE   yrF   rF   rG   	<genexpr>  s     z!ndarray_to_mgr.<locals>.<genexpr>c                 S  s   g | ]}t |d qS )rm   )r4   )rD   dvalrF   rF   rG   rH     s     c                   s    g | ]}t  | t|d qS )Z	placement)r5   r3   rw   )
dvals_listrF   rG   rH     s   r   )0rP   r   namer+   rW   reindexrT   rQ   emptyobjectgetattrr   rR   r$   rS   rs   shaper.   rZ   r   r'   r]   reshape_prep_ndarrayr   r;   Zravelr   kindr)   	_get_axes!_check_values_indices_shape_match
issubclasstyper[   rK   r   r   r&   r1   Tlistri   zipr3   slicer5   r8   )rz   rW   r@   r;   r]   r=   Zcopy_on_sanitizeZvdtyper   ZflatZrcfrV   Zobj_columnsZmaybe_datetimeZblock_valuesZbpnbrF   )r   rz   rG   ndarray_to_mgr  s    



    
   




  r   z
np.ndarrayNone)rz   rW   r@   rA   c                 C  sh   | j d t|ks$| j d t|krd| j d dkr:td| j }t|t|f}td| d| dS )z\
    Check that the shape implied by our axes matches the actual shape of the
    data.
    rI   r   z)Empty data passed with indices specified.zShape of passed values is z, indices imply N)r   rT   rU   )rz   rW   r@   ZpassedZimpliedrF   rF   rG   r     s    $r   rJ   )r;   r=   r]   dict)r\   r;   r=   r]   rA   c                  sl  |dk	rddl m} | |td}| }|dkrDt||  }nt|}| rt|s|dk	r|j	 d }	|	D ]"}
t
|j|
 ||d}||j|
< qvnTtd}ttjt||| }|rЈg| }nfddt|D }||j|< t|}t|}nBt  }t|} fd	d|D }d
d |D }dd |D }|rXdd |D }t||||||dS )z
    Segregate Series based on type and coerce into matrices.
    Needs to handle a lot of exceptional cases.

    Used in DataFrame.__init__
    Nr   )Series)rW   r;   ru   r   c                   s   g | ]}   qS rF   rq   )rD   _)valrF   rG   rH     s     zdict_to_mgr.<locals>.<listcomp>c                   s   g | ]}t  | qS rF   )comZmaybe_iterable_to_listrD   kr\   rF   rG   rH     s     c                 S  s    g | ]}t |ts|n|jqS rF   )rP   r+   _datarr   rF   rF   rG   rH     s     c                 S  s    g | ]}t |s|n| qS rF   )r   r]   rr   rF   rF   rG   rH     s    c                 S  s.   g | ]&}t |d rt|jts"|n| qS ru   )hasattrrP   r;   r   r]   rC   rF   rF   rG   rH     s   
)r;   r=   r>   )Zpandas.core.seriesr   r   ZisnarN   r.   ri   r   rz   Znonzeror)   ZiatrQ   r;   r   nanrT   sumrs   locr   keysr+   rZ   )r\   rW   r@   r;   r=   r]   r   rV   missingZmidxsro   rX   Z	nan_dtypeZnmissingrhsr   rF   )r\   r   rG   dict_to_mgr  sD    



r   r   zIndex | Nonez$tuple[list[ArrayLike], Index, Index])r\   r@   rW   r;   rA   c                 C  s   t | d r"|dkr"t| d j}t| ||d\}}t|}|dkrt| d tr\t| }n,t| d tr|tt	| d }ntt	| }|||fS )zA
    Convert a single sequence of arrays to multiple arrays.
    r   Nru   )
r   r.   _fieldsr`   rP   r   _get_names_from_indexr"   r-   rT   )r\   r@   rW   r;   rV   rF   rF   rG   nested_data_to_arrays  s    
r   )rA   c                 C  sB   t | dko@t| d o@t| d dddko@t| to>| jdk S )z7
    Check if we should use nested_data_to_arrays.
    r   rS   rI   rm   )rT   r   r   rP   r$   rS   r   rF   rF   rG   treat_as_nested  s    
r   )r]   rA   c                   s0  t | tst | tr$| jd kr$| j} t | tjttfst	| dkrPtj
dtdS t | trpt| }|dtjf S dd  t| d rt fdd| D } qt | d tjr| d jdkrt fd	d| D } q | } ntj| |d
} | jdkr| | jd df} n| jdkr,td| j | S )Nr   )r   r   ru   .c                 S  s.   t | rt| tr| S t| dd} t| }|S )NTrB   )r   rP   r   r'   r   )vresrF   rF   rG   convert4  s
    z_prep_ndarray.<locals>.convertc                   s   g | ]} |qS rF   rF   rD   r   r   rF   rG   rH   @  s     z!_prep_ndarray.<locals>.<listcomp>c                   s   g | ]} |qS rF   rF   r   r   rF   rG   rH   C  s     rq   rI   rm   zMust pass 2-d input. shape=)rP   r%   r#   tzZ_ndarrayrQ   rR   r   r+   rT   r   r   rs   r(   Znewaxisr   rK   rS   r   r   rU   )rz   r]   rX   rF   r   rG   r   %  s0    


r   zlist[ArrayLike])rW   r;   rA   c                 C  s   d }g }| D ]}t |trP|d k	r0|j|dd}|j|k	rH|j|dd}|j}nnt |tr|d krl|d}t |ttfrt	|}nt|}t
j||jtjd}t|||ddd}t|| || q|S )NFrq   O)defaultr}   )rP   r   ZastyperW   r   Z_valuesr   r*   r,   r   r	   Zfast_multigetrQ   r   r)   r   Zrequire_length_matchrj   )r\   rW   r;   ZoindexZhomogenizedr   rF   rF   rG   rO   T  s4    




    rO   c           
      C  s  d}t | dkrtg }n`g }g }d}d}d}| D ]}t|trTd}||j q4t|trvd}|t|  q4t	|rt
|dddkrd}|t | q4t|tjr4|jdkr4tdq4|s|stdn|rt|}n|rt|dd	}|r|tt|}t |dkrtd
|r(td|rp|dk	s<t|d t |kr|d|d  dt | }	t|	nt|d }t|S )zR
    Try to infer an Index from the passed data, raise ValueError on failure.
    Nr   FTrS   rI   z,Per-column arrays must each be 1-dimensionalz2If using all scalar values, you must pass an indexsortz%All arrays must be of the same lengthz<Mixing dicts with non-Series may lead to ambiguous ordering.zarray length z does not match index length )rT   r+   rP   r   rj   rW   r   r   r   r   r   rQ   rR   rS   rU   r0   setAssertionErrorr-   r.   )
r\   rW   Zraw_lengthsZindexesZhave_raw_arraysZhave_seriesZ
have_dictsr   lengthsmsgrF   rF   rG   rN   {  sT    





rN   intztuple[list[ArrayLike], Index])rV   rd   r@   lengthrA   c           	      C  s   |dk	rx| |sxdgt| }||}t|D ]<\}}|dkr^tj|td}|tj n| | }|||< q2|} |}| |fS )zB
    Pre-emptively (cheaply) reindex arrays with new columns.
    Nr{   ru   )	equalsrT   get_indexer	enumeraterQ   r   r   fillr   )	rV   rd   r@   r   re   indexerro   r   rX   rF   rF   rG   rb     s    


rb   c                 C  s   t dd | D }|s"tt| S ttt| }d}t| D ]<\}}t|dd }|d k	rd|||< q>d| ||< |d7 }q>t|S )Nc                 s  s   | ]}t |d ddk	V  qdS )r   N)r   )rD   srF   rF   rG   r     s     z(_get_names_from_index.<locals>.<genexpr>r   r   zUnnamed rI   )ri   r-   rT   r   rs   r   r   r+   )r\   Zhas_some_namerW   countro   r   rx   rF   rF   rG   r     s    

r   ztuple[Index, Index])NKrW   r@   rA   c                 C  s<   |d krt | }nt|}|d kr,t |}nt|}||fS rv   )r-   r.   )r   r   rW   r@   rF   rF   rG   r     s    

r   c                 C  s   ddl m} tt|| S )a  
    Converts a list of dataclass instances to a list of dictionaries.

    Parameters
    ----------
    data : List[Type[dataclass]]

    Returns
    --------
    list_dict : List[dict]

    Examples
    --------
    >>> from dataclasses import dataclass
    >>> @dataclass
    ... class Point:
    ...     x: int
    ...     y: int

    >>> dataclasses_to_dicts([Point(1, 2), Point(2, 3)])
    [{'x': 1, 'y': 2}, {'x': 2, 'y': 3}]

    r   )asdict)Zdataclassesr   r   map)r\   r   rF   rF   rG   dataclasses_to_dicts  s    r   )r@   r;   rA   c                   s8  t trV dk	r. fddtjD }n j fddtt D }| fS tst tjr؈jj	dk	rt
jj	 fdd D }tdkrt|D ]&\}}|jdkr|dddf ||< q| fS g t
g fS t d trdtjdtt d	  dkrtt n>t tkr:td
n"t tk r\dt   fS t tjrjj	dk	rttjj	 fdd D }| fS t d ttfrt}nXt d tjrt \} n6t d tr
t \} ndd D t}t| |\} | fS )a	  
    Return list of arrays, columns.

    Returns
    -------
    list[ArrayLike]
        These will become columns in a DataFrame.
    Index
        This will become frame.columns.

    Notes
    -----
    Ensures that len(result_arrays) == len(result_index).
    Nc                   s(   g | ] \}}| krj |d djqS rI   )ZaxisZ_ixsrz   )rD   ro   rk   r@   r\   rF   rG   rH   2  s   zto_arrays.<locals>.<listcomp>c                   s   g | ]} j |d djqS r   r   rn   r   rF   rG   rH   9  s     c                   s   g | ]} | qS rF   rF   )rD   r   r   rF   rG   rH   B  s     r   rm   zThe behavior of DataFrame([categorical, ...]) is deprecated and in a future version will be changed to match the behavior of DataFrame([any_listlike, ...]). To retain the old behavior, pass as a dictionary DataFrame({col: categorical, ..}))
stacklevelzlen(columns) > len(data)c                   s   g | ]} | qS rF   rF   r   r   rF   rG   rH   e  s     c                 S  s   g | ]}t |qS rF   )tuplerC   rF   rF   rG   rH   p  s     )rP   r   r   r@   rs   rT   rQ   rR   r;   namesr.   rS   r"   warningswarnFutureWarningr   r-   rU   r+   r   r   _list_to_arraysr   Mapping_list_of_dict_to_arraysr   _list_of_series_to_arrays_finalize_columns_and_data)r\   r@   r;   rV   ro   rX   contentrF   r   rG   r`     s\    

	


r`   zlist[tuple | list])r\   rA   c                 C  s(   t | d trt| }n
t| }|S )Nr   )rP   r   r	   Zto_object_array_tuplesZto_object_array)r\   r   rF   rF   rG   r   w  s    
r   r   ztuple[np.ndarray, Index])r\   r@   rA   c           
      C  s   |d kr"dd | D }t |dd}i }g }| D ]r}t|dd }|d krRtt|}t||krl|t| }n|| }|t|< t|dd}|t	|| q.t
|}	|	|fS )Nc                 S  s   g | ]}t |ttfr|qS rF   )rP   r   r   rC   rF   rF   rG   rH     s      z-_list_of_series_to_arrays.<locals>.<listcomp>Fr   rW   TrB   )r/   r   r-   rT   idr   r'   rj   r    Ztake_ndrQ   Zvstack)
r\   r@   Z	pass_dataZindexer_cacheZaligned_valuesr   rW   r   rz   r   rF   rF   rG   r     s     
r   z
list[dict]c                 C  sf   |dkr@dd | D }t dd | D  }tj||d}t|}dd | D } t| t|}||fS )a  
    Convert list of dicts to numpy arrays

    if `columns` is not passed, column names are inferred from the records
    - for OrderedDict and dicts, the column names match
      the key insertion-order from the first record to the last.
    - For other kinds of dict-likes, the keys are lexically sorted.

    Parameters
    ----------
    data : iterable
        collection of records (OrderedDict, dict)
    columns: iterables or None

    Returns
    -------
    content : np.ndarray[object, ndim=2]
    columns : Index
    Nc                 s  s   | ]}t | V  qd S rv   )r   r   rC   rF   rF   rG   r     s     z*_list_of_dict_to_arrays.<locals>.<genexpr>c                 s  s   | ]}t |tV  qd S rv   )rP   r   rD   drF   rF   rG   r     s     r   c                 S  s$   g | ]}t |tkr|nt|qS rF   )r   r   r   rF   rF   rG   rH     s     z+_list_of_dict_to_arrays.<locals>.<listcomp>)ri   r	   Zfast_unique_multiple_list_genr.   Zdicts_to_arrayr   )r\   r@   genr   Zpre_colsr   rF   rF   rG   r     s    r   )r   r@   r;   rA   c              
   C  sr   t | j}zt||}W n, tk
rD } zt||W 5 d}~X Y nX t|rj|d jtjkrjt	||d}||fS )zG
    Ensure we have valid columns, cast object dtypes if possible.
    Nr   ru   )
r   r   _validate_or_indexify_columnsr   rU   rT   r;   rQ   Zobject__convert_object_array)r   r@   r;   contentserrrF   rF   rG   r     s    
r   )r   r@   rA   c                 C  s   |dkrt t| }nt|to0tdd |D }|sdt|t| krdtt| dt|  dn\|rtdd |D dkrtd	n8|rt|d
 t| krtt|d
  dt|  d|S )a  
    If columns is None, make numbers as column names; Otherwise, validate that
    columns have valid length.

    Parameters
    ----------
    content : list of np.ndarrays
    columns : Index or None

    Returns
    -------
    Index
        If columns is None, assign positional column index value as columns.

    Raises
    ------
    1. AssertionError when content is not composed of list of lists, and if
        length of columns is not equal to length of content.
    2. ValueError when content is list of lists, but length of each sub-list
        is not equal
    3. ValueError when content is list of lists, but length of sub-list is
        not equal to length of content
    Nc                 s  s   | ]}t |tV  qd S rv   )rP   r   rD   rk   rF   rF   rG   r      s    z0_validate_or_indexify_columns.<locals>.<genexpr>z! columns passed, passed data had z columnsc                 S  s   h | ]}t |qS rF   )rT   r   rF   rF   rG   	<setcomp>  s     z0_validate_or_indexify_columns.<locals>.<setcomp>rI   z<Length of columns passed for MultiIndex columns is differentr   )r-   rT   rP   r   allr   rU   )r   r@   Z
is_mi_listrF   rF   rG   r     s&    r   )r   r;   rA   c                   s"   fdd  fdd| D }|S )z
    Internal function to convert object array.

    Parameters
    ----------
    content: List[np.ndarray]
    dtype: np.dtype or ExtensionDtype

    Returns
    -------
    List[ArrayLike]
    c                   s&    t dkr"t| } t|  } | S )Nr   )rQ   r;   r	   Zmaybe_convert_objectsr   )rX   ru   rF   rG   r   +  s    

z&_convert_object_array.<locals>.convertc                   s   g | ]} |qS rF   rF   rr   r   rF   rG   rH   1  s     z)_convert_object_array.<locals>.<listcomp>rF   )r   r;   rV   rF   )r   r;   rG   r     s    r   )T)T)N)i__doc__
__future__r   collectionsr   typingr   r   r   r   r   r   ZnumpyrQ   Znumpy.mar_   Zpandas._libsr	   Zpandas._typingr
   r   r   Zpandas.util._exceptionsr   Zpandas.core.dtypes.castr   r   r   r   r   r   Zpandas.core.dtypes.commonr   r   r   r   r   r   r   r   r   Zpandas.core.dtypes.dtypesr   Zpandas.core.dtypes.genericr   r   Zpandas.corer    r!   r   Zpandas.core.arraysr"   r#   r$   r%   Zpandas.core.constructionr&   r'   r(   r)   Zpandas.core.indexes.apir*   r+   r,   r-   r.   r/   r0   Z#pandas.core.internals.array_managerr1   r2   Zpandas.core.internals.blocksr3   r4   r5   Zpandas.core.internals.managersr6   r7   r8   r9   Znumpy.ma.mrecordsr:   rZ   rg   ra   rt   r   r   r   r   r   r   rO   rN   rb   r   r   r   r`   r   r   r   r   r   r   rF   rF   rF   rG   <module>   sl    ,$	?2) P/'?" Y$%: