U
    +if9                     @  s   d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
  mZ d dlmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlm Z  d dl!m"Z"m#Z# G dd de"Z$dddddZ%dddddZ&dS )    )annotations)HashableMappingSequenceN)	ArrayLikeDtypeArgDtypeObjReadCsvBuffer)DtypeWarning)find_stack_level)is_categorical_dtypepandas_dtype)union_categoricals)ExtensionDtype)Index
MultiIndex)ensure_index_from_sequences)
ParserBaseis_index_colc                      s   e Zd ZU ded< ded< dd fddZd	d
ddZd	d
ddZd dddddZdddddZdd Z	d!dddddZ
  ZS )"CParserWrapperbool
low_memoryzparsers.TextReader_readerzReadCsvBuffer[str])srcc                   s  t  | | _| }|dd _ jdk	|d<  j|d<  jj	|d< dD ]}||d  qPt
|dd |d< tj|f| _ jj _ jd k} jjd krd  _n"  jj j|\ _ _ _} jd kr jr fdd	t jjD  _ntt jj _ jd d   _ jrȈ  j j jd k	sLt jd
krxt jsx  j t  jt krfdd	t! jD  _t  jt k rȈ  j  " j  #   j _ j$sr jj%dkr<t& jr<d _' ( j j j\} _ _ jd kr<| _ jjd krr|sr jd k	s`td gt  j  _ jj%dk _)d S )Nr   FZallow_leading_colsusecolson_bad_lines)Zstorage_optionsencodingZ
memory_mapcompressionZerror_bad_linesZwarn_bad_linesdtypec                   s   g | ]} j  | qS  )prefix).0iselfr   U/home/mars/bis/venv/lib/python3.8/site-packages/pandas/io/parsers/c_parser_wrapper.py
<listcomp>f   s    z+CParserWrapper.__init__.<locals>.<listcomp>stringc                   s$   g | ]\}}| ks| kr|qS r   r   )r!   r"   nr   r   r%   r&      s    r   T)*super__init__kwdscopypopr   	index_colr   r   valueensure_dtype_objsgetparsersZ
TextReaderr   unnamed_colsnamesheaderZ_extract_multi_indexer_columnsindex_names	col_namesr    rangetable_widthlist
orig_names_evaluate_usecolsAssertionErrorZusecols_dtypesetissubsetZ_validate_usecols_nameslen	enumerateZ_validate_parse_dates_presence_set_noconvert_columns_has_complex_date_colleading_colsr   Z_name_processed_clean_index_namesZ_implicit_index)r$   r   r,   keyZpassed_namesr7   	__class__)r$   r   r%   r+   -   s    


	




	zCParserWrapper.__init__None)returnc                 C  s(   z| j   W n tk
r"   Y nX d S N)r   close
ValueErrorr#   r   r   r%   rM      s    zCParserWrapper.closec                   s^   | j dk	stdd t| j D   fdd| jD }| || j}|D ]}| j| qHdS )z
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions.
        Nc                 S  s   i | ]\}}||qS r   r   )r!   r"   xr   r   r%   
<dictcomp>   s      z9CParserWrapper._set_noconvert_columns.<locals>.<dictcomp>c                   s   g | ]} | qS r   r   r!   rO   Z
names_dictr   r%   r&      s     z9CParserWrapper._set_noconvert_columns.<locals>.<listcomp>)r<   r>   rB   r5   Z_set_noconvert_dtype_columnsr   Zset_noconvert)r$   Zcol_indicesZnoconvert_columnscolr   rR   r%   rC      s    z%CParserWrapper._set_noconvert_columnsNz
int | Nonez_tuple[Index | MultiIndex | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike]])nrowsrK   c              
     s`  z,| j r| j|}t|}n| j|}W n tk
r   | jrd| _| | j}| j	|| j
| j| jdd\} }|  | j | jd k	r|    fdd| D }| |f Y S |    Y nX d| _| j}| jjr| jrtdg }t| jjD ]H}| j
d kr||}	n|| j
| }	| j|	|dd}	||	 qt|}| jd k	rj| |}| |}t| }
d	d t||
D }| ||\}}nt| }
| jd k	st t!| j}| |}| jd k	r| |}d
d |
D }| jd kr| "|| dd t||
D }| ||\}}| #|||\}}| || j}|||fS )NFr   r   c                   s   i | ]\}}| kr||qS r   r   )r!   kvcolumnsr   r%   rP      s       z'CParserWrapper.read.<locals>.<dictcomp>z file structure not yet supportedT)try_parse_datesc                 S  s   i | ]\}\}}||qS r   r   r!   rV   r"   rW   r   r   r%   rP     s    
  c                 S  s   g | ]}|d  qS )   r   rQ   r   r   r%   r&   0  s     z'CParserWrapper.read.<locals>.<listcomp>c                 S  s   i | ]\}\}}||qS r   r   r[   r   r   r%   rP   4  s    
  )$r   r   Zread_low_memory_concatenate_chunksreadStopIterationZ_first_chunkZ_maybe_dedup_namesr<   Z_get_empty_metar/   r7   r,   r2   Z_maybe_make_multi_index_columnsr8   r   _filter_usecolsitemsrM   r5   rE   rD   NotImplementedErrorr9   r.   _maybe_parse_datesappendr   sortedzipZ_do_date_conversionsr>   r;   Z_check_data_lengthZ_make_index)r$   rT   chunksdatar5   indexZcol_dictZarraysr"   valuesZ	data_tupsZ	date_dataZalldataZ
conv_namesr   rX   r%   r^      sn    









zCParserWrapper.readzSequence[Hashable])r5   rK   c                   s@   |  | j|  d k	r<t|t kr< fddt|D }|S )Nc                   s$   g | ]\}}| ks| kr|qS r   r   )r!   r"   namer)   r   r%   r&   B  s      z2CParserWrapper._filter_usecols.<locals>.<listcomp>)r=   r   rA   rB   )r$   r5   r   r)   r%   r`   >  s    
zCParserWrapper._filter_usecolsc                 C  sL   t | jjd }d }| jjdkrD| jd k	rD| || j| j\}}| _||fS )Nr   )r;   r   r6   rE   r/   rF   r4   )r$   r5   Z	idx_namesr   r   r%   _get_index_namesG  s      zCParserWrapper._get_index_namesTint)ri   rZ   c                 C  s   |r|  |r| |}|S rL   )Z_should_parse_datesZ
_date_conv)r$   rj   ri   rZ   r   r   r%   rc   R  s    
z!CParserWrapper._maybe_parse_dates)N)T)__name__
__module____qualname____annotations__r+   rM   rC   r^   r`   rl   rc   __classcell__r   r   rH   r%   r   )   s   
  g	r   zlist[dict[int, ArrayLike]]dict)rg   rK   c                   s
  t | d  }g }i }|D ]  fdd| D }dd |D }dd |D }t|dkr|t|g }|tkr||t  | }t	|rt
|dd	| < qt|tr| }	|	|| < qt|| < q|rd
|}
dd|
 dg}tj|tt d |S )z
    Concatenate chunks of data read with low_memory=True.

    The tricky part is handling Categoricals, where different chunks
    may have different inferred categories.
    r   c                   s   g | ]}|  qS r   )r.   )r!   chunkrk   r   r%   r&   d  s     z'_concatenate_chunks.<locals>.<listcomp>c                 S  s   h | ]
}|j qS r   rU   )r!   ar   r   r%   	<setcomp>f  s     z&_concatenate_chunks.<locals>.<setcomp>c                 S  s   h | ]}t |s|qS r   )r   rQ   r   r   r%   rw   h  s      r\   F)Zsort_categories, z	Columns (zK) have mixed types. Specify dtype option on import or set low_memory=False.)
stacklevel)r;   keysrA   npZfind_common_typeobjectrd   strr.   r   r   
isinstancer   Zconstruct_array_typeZ_concat_same_typeZconcatenatejoinwarningswarnr
   r   )rg   r5   Zwarning_columnsresultZarrsZdtypesZnumpy_dtypesZcommon_typer   Z
array_typeZwarning_namesZwarning_messager   ru   r%   r]   X  s>    



r]   z*DtypeArg | dict[Hashable, DtypeArg] | Nonez*DtypeObj | dict[Hashable, DtypeObj] | None)r   rK   c                   s0   t  tr fdd D S  dk	r,t S  S )zc
    Ensure we have either None, a dtype object, or a dictionary mapping to
    dtype objects.
    c                   s   i | ]}|t  | qS r   )r   )r!   rV   rU   r   r%   rP     s      z%ensure_dtype_objs.<locals>.<dictcomp>N)r   rs   r   rU   r   rU   r%   r1     s
    
r1   )'
__future__r   typingr   r   r   r   numpyr|   Zpandas._libs.parsersZ_libsr3   Zpandas._typingr   r   r   r	   Zpandas.errorsr
   Zpandas.util._exceptionsr   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.concatr   Zpandas.core.dtypes.dtypesr   Zpandasr   r   Zpandas.core.indexes.apir   Zpandas.io.parsers.base_parserr   r   r   r]   r1   r   r   r   r%   <module>   s$     1@