U
    Mf;                     @   s  d dl m Z  d dlZd dlZd dlZd dlmZmZmZ d dl	m
Z ejdddgddggdd Zd	d
 Zdd Zdd Zdd Zejde eg dedddgdeg dedddgdgdd Zdd Zdd Zdd Zejddd gejd!dd"d"d#gidd"d#gidd d$gfdd"d"d#gidd"d#gid d d"gfd"d"d#gd"d"d%gd&d"d#gd"d%gd&dd d$gfd"d"d#gd"d"d%gd&d"d#gd"d%gd&d d d"gfgd'd( Zd)d* Zejd+d,d-d gd.d/ Zejd0d$gd$d1g d gd2d3 Zd4d5 ZdS )6    )datetimeN)	DataFrameNaTconcatsubsetaBc              	   C   sT   t dddgdddgdddgd}td}tjt|d ||  W 5 Q R X d S )Nr      )Ar   CzIndex(['a'], dtype='object')match)r   reescapepytestraisesKeyErrordrop_duplicates)r   dfmsg r   S/tmp/pip-unpacked-wheel-eb6vo0j3/pandas/tests/frame/methods/test_drop_duplicates.py0test_drop_duplicates_with_misspelled_column_name   s    "
r   c                  C   s  t ddddddddgddddddddgddddddddgtdd} | d	}| d d }t|| | jd	d
d}| jddg }t|| | jd	dd}| jg  }t|| t|dkst| jddddg }| t	d	dg}t|| | d	dg}t|| | jdd
d}| jddddg }t|| | jddd}| jdg }t|| | jd d d	ddgf }| }|d	dg}t|| |jd
d}|jd	dgd
d}t|| |jdd}|jd	dgdd}t|| | d}| j
ddg }t|| | jdd
d}| j
ddg }t|| | d d| d< | d}| j
ddg }t|| | jdd
d}| j
ddg }t|| t dddddddgdddddddgd} | j| jdk }t|  | t ddgddgg} t|  |  t ddgddgg} t|  |  ttjjd d }t | |gd|d gg} t|  |  t | |g||d gg} t|  |  t dd tdD } t| t dgdgd  ggd d!} d"D ] }| j|d dkstqd S )#Nfoobaronetwor	         AAAr   r   Dr    lastkeep      Fr      r   )r    r      r   Zint8E   	   )xyc                 s   s   | ]}|gd  V  qdS )r-   Nr   ).0ir   r   r   	<genexpr>r   s     z'test_drop_duplicates.<locals>.<genexpr>   Tignore_index)firstr"   F)r   ranger   tmassert_frame_equalloclenAssertionErrornparrayilocZastypeindexZiinfoZint64maxr   Z
duplicatedsum)r   resultexpecteddf2r.   r$   r   r   r   test_drop_duplicates   s    	



*"rG   c                  C   sd   t dddgdddgdddggddd	gd
} |  }t||  | d}| d d }t|| d S )Nr	   r   r(   r'   r,   r%   r&   r   bcolumns)r   r   r9   r:   )r   Zresult0Zresult1Z	expected1r   r   r   0test_drop_duplicates_with_duplicate_column_namesy   s    *
rK   c                  C   sZ  t ddddddddgddddddddgddddddddgtd	d
} | d}| jddddg }t|| | jddd}| jddddg }t|| | jddd}| jddg }t|| | ddg}| jddddddg }t|| | jddgdd}| jddddddg }t|| | jddgdd}| jddddg }t|| d S )Nr   r   bazquxr   r   r	   r   r   r   r    r   r%   r"   r#   r(   r&   Fr   r'   r,   )r   r8   r   r@   r9   r:   r   rD   rE   r   r   r   !test_drop_duplicates_for_take_all   s2    	
rO   c                  C   s   t ddddddddgddddddddgddddddddgtdd} | d	}| d d }t|| | jd	d
d}| jddg }t|| | jd	dd}| jg  }t|dkstt|| | jddddg }| d}t|| d S )Nr   r   r   r   r	   r   r   )ZAAZABr   r   r!   rP   r"   r#   r%   r&   Fr   r'   )rP   r   )r   r8   r   r9   r:   r;   r<   r=   rN   r   r   r   test_drop_duplicates_tuple   s(    	


rQ   r   rI   r
   r   rA   c                 C   s8   |   }t||  |  }|j dd t||  d S )NTinplace)r   r9   r:   copy)r   rD   r   r   r   test_drop_duplicates_empty   s
    rV   c                  C   s  t d d ddddddgddddddddgdtjtjtjddddgtdd} | d	}| jd
ddg }t|| | jd	dd}| jdddg }t|| | jd	dd}| jg  }t|| t|d
kst	| d	dg}| jd
dddg }t|| | jd	dgdd}| jddddg }t|| | jd	dgdd}| jdg }t|| t ddddddddgddddddddgdtjtjtjddddgtdd} | d}| d d }t|| | jddd}| jddg }t|| | jddd}| jg  }t|| t|d
ks"t	| ddg}| jd
dddg }t|| | jddgdd}| jddddg }t|| | jddgdd}| jdg }t|| d S )Nr   r   r   r         ?r	   r   r
   r   r   r!   r
   r   r   r'   r"   r#   r%   r&   Fr   r(   r   r,   )
r   r>   nanr8   r   r;   r9   r:   r<   r=   rN   r   r   r   test_drop_duplicates_NA   sh    	

	

rZ   c               
   C   s4  t d d ddddddgdtjtjtjddddgd} | d	}| jd
ddddg }t|| | jd	dd}| jdddddg }t|| | jd	dd}| jddg }t|| | d}| jd
dddg }t|| | jddd}| jddddg }t|| | jddd}| jddg }t|| d S )Nr   r   rL   rM   rW   g       @r'   )r
   r   r
   r   r   r(   r&   r"   r#   r	   r,   r%   Fr   )r   r>   rY   r   r@   r9   r:   rN   r   r   r   $test_drop_duplicates_NA_for_take_all$  s.    

r[   c                  C   s  t ddddddddgddddddddgddddddddgtdd} |  }|jd	d
d}| d d }|}t|| |d kst|  }|jd	dd
d}| jddg }|}t|| |d kst|  }|jd	dd
d}| jg  }|}t|| t|dks
t|d kst|  }|jd	dgd
d}| jddddg }|}t|| |d ksbt|  }|jd	dgdd
d}| jddddg }|}t|| |d kst|  }|jd	dgdd
d}| jdg }|}t|| |d kst| jd d d	ddgf  }| }|jd
d}|d	dg}|}t|| |d ksPt| }|jdd
d}|jd	dgdd}|}t|| |d kst| }|jdd
d}|jd	dgdd}|}t|| |d kstd S )Nr   r   r   r   r	   r   r   rX   r
   TrS   r"   )r$   rT   r%   r&   Fr   r   r'   r(   r   r#   )	r   r8   rU   r   r9   r:   r=   r;   r<   )origr   Zreturn_valuerE   rD   Zorig2rF   r   r   r   test_drop_duplicates_inplaceJ  s~    	
r]   rT   TFz4origin_dict, output_dict, ignore_index, output_indexr   r'   r	   r,   )r
   r   c                 C   s^   t |}t ||d}| r0| }|j|| d n|j|| d}t|| t|t | d S )NrR   )r6   rT   )r   rU   r   r9   r:   )rT   Zorigin_dictZoutput_dictr6   Zoutput_indexr   rE   Z	result_dfr   r   r   !test_drop_duplicates_ignore_index  s    r^   c                 C   s0   t d| gddggtd}| }t|| d S )Nr	   r   r   )Zdtype)r   objectr   r9   r:   )Znulls_fixturer   rD   r   r   r   *test_drop_duplicates_null_in_object_column  s    r`   r$   r7   r"   c                 C   s   t dddddgddtjtjtjgddtjtjdgdddddgtdddtdddtdddttgd}|jD ]6}||g j| d	}|| j| d	}t||	  qnd S )
Nr	   r   r   r'   Zthreer,   i  )r   rH   cder#   )
r   r>   rY   r   r   rJ   r   r9   r:   Zto_frame)r$   r   columnZdropped_frameZdropped_seriesr   r   r   (test_drop_duplicates_series_vs_dataframe  s"    



re   argTruec              	   C   sB   t dddddgi}d}tjt|d |j| d W 5 Q R X d S )Nr   r	   r   r'   zC^For argument "ignore_index" expected type bool, received type .*.$r   r5   )r   r   r   
ValueErrorr   )rf   r   r   r   r   r   -test_drop_duplicates_non_boolean_ignore_index  s    ri   c               	   C   s   t dddgdddgdddgd} d}tjt|d | ddgd	}W 5 Q R X t ddgddgddgdddgd
}t|| d S )Nr	   r   r'   )r   rH   ra   z~In a future version of pandas all arguments of DataFrame.drop_duplicates except for the argument 'subset' will be keyword-onlyr   rH   ra   r"   rR   )r   r9   Zassert_produces_warningFutureWarningr   r:   )r   r   rD   rE   r   r   r   )test_drop_duplicates_pos_args_deprecation  s    "$rk   )r   r   Znumpyr>   r   Zpandasr   r   r   Zpandas._testingZ_testingr9   markZparametrizer   rG   rK   rO   rQ   rV   rZ   r[   r]   r^   r`   re   ri   rk   r   r   r   r   <module>   sP   
	`$


J&P  ..	

