U
    f/e:                     @   s  d dl m Z  d dlZd dlZd dlZd dlmZmZ d dlm	Z
 ejdddgddggdd Zd	d
 Zdd Zdd Zdd Zejde eg dedddgdeg dedddgdgdd Zdd Zdd Zdd Zejddd gejd!dd"d"d#gidd"d#gidd d$gfdd"d"d#gidd"d#gid d d"gfd"d"d#gd"d"d%gd&d"d#gd"d%gd&dd d$gfd"d"d#gd"d"d%gd&d"d#gd"d%gd&d d d"gfgd'd( Zd)d* Zejd+d,d-d gd.d/ Zejd0d$gd$d1g d gd2d3 Zd4d5 ZdS )6    )datetimeN)	DataFrameNaTsubsetaBc              	   C   sT   t dddgdddgdddgd}td}tjt|d ||  W 5 Q R X d S )Nr      )Ar   CzIndex(['a'], dtype='object')match)r   reescapepytestraisesKeyErrordrop_duplicates)r   dfmsg r   S/tmp/pip-unpacked-wheel-tiezk1ph/pandas/tests/frame/methods/test_drop_duplicates.py0test_drop_duplicates_with_misspelled_column_name   s    "
r   c                  C   s  t ddddddddgddddddddgddddddddgtdd} | d	}| d d }t|| | jd	d
d}| jddg }t|| | jd	dd}| jg  }t|| t|dkst| jddddg }| t	d	dg}t|| | d	dg}t|| | jdd
d}| jddddg }t|| | jddd}| jdg }t|| | jd d d	ddgf }| }|d	dg}t|| |jd
d}|jd	dgd
d}t|| |jdd}|jd	dgdd}t|| | d}| j
ddg }t|| | jdd
d}| j
ddg }t|| | d d| d< | d}| j
ddg }t|| | jdd
d}| j
ddg }t|| t dddddddgdddddddgd} | j| jdk }t|  | t ddgddgg} t|  |  t ddgddgg} t|  |  ttjjd d }t | |gd|d gg} t|  |  t | |g||d gg} t|  |  t dd tdD } | jdgdgd  gd d!} d"D ] }| j|d dkstqd S )#Nfoobaronetwor         AAAr   r
   Dr   lastkeep      Fr      r   )r   r      r
   Zint8E   	   )xyc                 s   s   | ]}|gd  V  qdS )r,   Nr   ).0ir   r   r   	<genexpr>q   s     z'test_drop_duplicates.<locals>.<genexpr>   Tignore_index)firstr!   F)r   ranger   tmassert_frame_equalloclenAssertionErrornparrayilocZastypeindexZiinfoZint64maxappendZ
duplicatedsum)r   resultexpecteddf2r-   r#   r   r   r   test_drop_duplicates   s    	



*rG   c                  C   sd   t dddgdddgdddggddd	gd
} |  }t||  | d}| d d }t|| d S )Nr   r   r'   r&   r+   r$   r%   r   bcolumns)r   r   r8   r9   )r   Zresult0Zresult1Z	expected1r   r   r   0test_drop_duplicates_with_duplicate_column_namesx   s    *
rK   c                  C   sZ  t ddddddddgddddddddgddddddddgtd	d
} | d}| jddddg }t|| | jddd}| jddddg }t|| | jddd}| jddg }t|| | ddg}| jddddddg }t|| | jddgdd}| jddddddg }t|| | jddgdd}| jddddg }t|| d S )Nr   r   bazquxr   r   r   r   r   r   r   r   r$   r!   r"   r'   r%   Fr   r&   r+   )r   r7   r   r?   r8   r9   r   rD   rE   r   r   r   !test_drop_duplicates_for_take_all   s2    	
rO   c                  C   s   t ddddddddgddddddddgddddddddgtdd} | d	}| d d }t|| | jd	d
d}| jddg }t|| | jd	dd}| jg  }t|dkstt|| | jddddg }| d}t|| d S )Nr   r   r   r   r   r   r   )ZAAZABr   r
   r    rP   r!   r"   r$   r%   Fr   r&   )rP   r   )r   r7   r   r8   r9   r:   r;   r<   rN   r   r   r   test_drop_duplicates_tuple   s(    	


rQ   r   rI   r	   r
   r@   c                 C   s8   |   }t||  |  }|j dd t||  d S )NTinplace)r   r8   r9   copy)r   rD   r   r   r   test_drop_duplicates_empty   s
    rV   c                  C   s  t d d ddddddgddddddddgdtjtjtjddddgtdd} | d	}| jd
ddg }t|| | jd	dd}| jdddg }t|| | jd	dd}| jg  }t|| t|d
kst	| d	dg}| jd
dddg }t|| | jd	dgdd}| jddddg }t|| | jd	dgdd}| jdg }t|| t ddddddddgddddddddgdtjtjtjddddgtdd} | d}| d d }t|| | jddd}| jddg }t|| | jddd}| jg  }t|| t|d
ks"t	| ddg}| jd
dddg }t|| | jddgdd}| jddddg }t|| | jddgdd}| jdg }t|| d S )Nr   r   r   r         ?r   r   r	   r   r
   r    r	   r   r   r&   r!   r"   r$   r%   Fr   r'   r
   r+   )
r   r=   nanr7   r   r:   r8   r9   r;   r<   rN   r   r   r   test_drop_duplicates_NA   sh    	

	

rZ   c               
   C   s4  t d d ddddddgdtjtjtjddddgd} | d	}| jd
ddddg }t|| | jd	dd}| jdddddg }t|| | jd	dd}| jddg }t|| | d}| jd
dddg }t|| | jddd}| jddddg }t|| | jddd}| jddg }t|| d S )Nr   r   rL   rM   rW   g       @r&   )r	   r
   r	   r   r   r'   r%   r!   r"   r   r+   r$   Fr
   )r   r=   rY   r   r?   r8   r9   rN   r   r   r   $test_drop_duplicates_NA_for_take_all#  s.    

r[   c                  C   s  t ddddddddgddddddddgddddddddgtdd} |  }|jd	d
d}| d d }|}t|| |d kst|  }|jd	dd
d}| jddg }|}t|| |d kst|  }|jd	dd
d}| jg  }|}t|| t|dks
t|d kst|  }|jd	dgd
d}| jddddg }|}t|| |d ksbt|  }|jd	dgdd
d}| jddddg }|}t|| |d kst|  }|jd	dgdd
d}| jdg }|}t|| |d kst| jd d d	ddgf  }| }|jd
d}|d	dg}|}t|| |d ksPt| }|jdd
d}|jd	dgdd}|}t|| |d kst| }|jdd
d}|jd	dgdd}|}t|| |d kstd S )Nr   r   r   r   r   r   r   rX   r	   TrS   r!   )r#   rT   r$   r%   Fr   r   r&   r'   r
   r"   )	r   r7   rU   r   r8   r9   r<   r:   r;   )origr   Zreturn_valuerE   rD   Zorig2rF   r   r   r   test_drop_duplicates_inplaceI  s~    	
r]   rT   TFz4origin_dict, output_dict, ignore_index, output_indexr   r&   r   r+   )r	   r   c                 C   s^   t |}t ||d}| r0| }|j|| d n|j|| d}t|| t|t | d S )NrR   )r5   rT   )r   rU   r   r8   r9   )rT   Zorigin_dictZoutput_dictr5   Zoutput_indexr   rE   Z	result_dfr   r   r   !test_drop_duplicates_ignore_index  s    r^   c                 C   s0   t d| gddggtd}| }t|| d S )Nr   r   r   )Zdtype)r   objectr   r8   r9   )Znulls_fixturer   rD   r   r   r   *test_drop_duplicates_null_in_object_column  s    r`   r#   r6   r!   c                 C   s   t dddddgddtjtjtjgddtjtjdgdddddgtdddtdddtdddttgd}|jD ]6}||g j| d	}|| j| d	}t||	  qnd S )
Nr   r   r   r&   Zthreer+   i  )r   rH   cder"   )
r   r=   rY   r   r   rJ   r   r8   r9   Zto_frame)r#   r   columnZdropped_frameZdropped_seriesr   r   r   (test_drop_duplicates_series_vs_dataframe  s"    



re   argTruec              	   C   sB   t dddddgi}d}tjt|d |j| d W 5 Q R X d S )Nr   r   r   r&   zC^For argument "ignore_index" expected type bool, received type .*.$r   r4   )r   r   r   
ValueErrorr   )rf   r   r   r   r   r   -test_drop_duplicates_non_boolean_ignore_index  s    ri   c               	   C   s   t dddgdddgdddgd} d}tjt|d | ddgd	}W 5 Q R X t ddgddgddgdddgd
}t|| d S )Nr   r   r&   )r   rH   ra   z~In a future version of pandas all arguments of DataFrame.drop_duplicates except for the argument 'subset' will be keyword-onlyr   rH   ra   r!   rR   )r   r8   Zassert_produces_warningFutureWarningr   r9   )r   r   rD   rE   r   r   r   )test_drop_duplicates_pos_args_deprecation  s    "$rk   )r   r   Znumpyr=   r   Zpandasr   r   Zpandas._testingZ_testingr8   markZparametrizer   rG   rK   rO   rQ   rV   rZ   r[   r]   r^   r`   re   ri   rk   r   r   r   r   <module>   sP   
	`$


J&P  ..	

