U
    /ej;                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlm	Z
 d dlmZ d dlmZmZ d dlmZ d dlmZmZmZmZ g ZdD ]8ZeejededZe
jed	d
Zeeef qejedeeddZe
jed	d
Zee  j!ej!f e"edede#dedej#ddddZ$e"edede#ddedej#dddddZ%e"edede#ddedej#dddddZ&e$e%e&gZ'g Z(e'D ]RZ)e)j*+de)_*e)j,+de)_,e(e)j-e)j*j./ede)j,j./edd qd d! e'D Z0d"d! e(D Z1d#d! e'D Z2d$d! e(D Z3d%d& Z4d'd( Z5d)d* Z6d+d, Z7d-d. Z8d/d0 Z9d1d2 Z:ej;<d3d4d5dgej;<d6d4d5dgd7d8 Z=ej;<d6d4d9gd:d; Z>d<d= Z?d>d? Z@d@dA ZAdIdCdDZBdEdF ZCG dGdH dHZDdS )J    N)_compat)check_numeric_only_deprecationtm)_concat)	assert_eqclear_known_categoriesis_categorical_dtype	make_meta)TFZbacbac)ordered   npartitions   indexZabcdeZxxxxx   Zabcbcf8dtypevwxyzZfghijZyyyyy
   ZabbbaZklmnoZzzzzz   ZbcbcccategoryZxyzabc)r   r   c                 C   s   g | ]}| |jqS  	set_indexr   .0ir   r   I/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/tests/test_categorical.py
<listcomp>H   s     r&   c                 C   s   g | ]}| |jqS r   r    r"   r   r   r%   r&   I   s     c                 C   s   g | ]}| |j|jgqS r   r!   r   r   r"   r   r   r%   r&   J   s     c                 C   s   g | ]}| |j|jgqS r   r'   r"   r   r   r%   r&   K   s     c                   C   sF  t tttt t tdd tD tdd tD  t tdd t	D tdd t
D j t tt	tt
 t tdd t	D tdd t
D  t tdd t	D tdd t
D  t tdd t	D tdd t
D  t td	d tD td
d tD j t tttt d S )Nc                 S   s   g | ]
}|j qS r   )r   r"   r   r   r%   r&   T   s     z3test_concat_unions_categoricals.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   r   r"   r   r   r%   r&   Y   s     c                 S   s   g | ]}|qS r   r   r"   r   r   r%   r&   Y   s     c                 S   s   g | ]}|d dg qS r   r   r   r"   r   r   r%   r&   a   s     c                 S   s   g | ]}|d dg qS r(   r   r"   r   r   r%   r&   b   s     c                 S   s   g | ]
}|j qS r   )r   r"   r   r   r%   r&   g   s     c                 S   s   g | ]
}|j qS r   r   r"   r   r   r%   r&   l   s     c                 S   s   g | ]
}|j qS r   r   r"   r   r   r%   r&   q   s     c                 S   s   g | ]}|qS r   r   r"   r   r   r%   r&   q   s     )r   Zassert_frame_equalr   framespdconcatframes2Zassert_series_equalassert_index_equalframes3frames4r   frames5frames6r   r   r   r%   test_concat_unions_categoricalsN   s6         r3   c                 C   s   t dd ttD dtddddddtd	 d
d gd }| }t|j |j  t|j	 |j	  t
  ||j }W 5 Q R X t||j | t||jj	 ||jj	  t|j|j |j|j  d S )Nc                 S   s   i | ]\}}d |f|qS unknownr   r#   r$   dfr   r   r%   
<dictcomp>z   s      z-test_unknown_categoricals.<locals>.<dictcomp>r5   objectr   i8r   r   r   )Zparent_meta   )dd	DataFrame	enumerater*   r	   computer   r   Zvalue_countsZnuniquer   groupbysumr   count)shuffle_methodddfr7   expectedr   r   r%   test_unknown_categoricalsx   s"    
&rF   c                  C   sv   t t ddddgddddgd} t| d s6tt| d rFtt| d}t|d sbtt|d rrtd S )N   r      r;   )catr   rI   r   )r+   r=   Categoricalr   AssertionErrorr<   from_pandas)r7   rD   r   r   r%   test_is_categorical_dtype   s    &rM   c               	   C   s  t td jddid} tdd ttD d| d gd jddid}|j|jj	
d	dd
gd}|jj	jsrt|jj	jrt|jj	jrt| }dD ]`}|dk	}|j|d}|jj	jst|jj	jst|jj	j|kstt||ddidd |j|dd}|jj	jst|jj	js(t|jj	j|ks<tt||ddidd |jd|d}|jj	jrrt|jj	jst|jj	j|kstt||ddidd |jd|d}|jj	jst|jjdkst|jj	j|kstt|| q|jg dd}|jj	jstt|| |jdgdd|ks@t|jg dd|ksXt|dg|ksnt|g |ksttt |jdd W 5 Q R X tt |jdd W 5 Q R X d S )Nr   r   y_)columnsc                 S   s   i | ]\}}d |f|qS r4   r   r6   r   r   r%   r8      s      z#test_categorize.<locals>.<dictcomp>r5   r;   r   r   )r   )NTFFr   r   r   )check_categoricalr   )r   split_everyr9   T)rO   r   r   rG   )rQ   Zfoo)r   r0   renamer<   r=   r>   r/   assignr   rI   set_categoriesknownrK   rN   r   r?   
categorizer   r   astyper   pytestraises
ValueError)metarD   r7   r   Zknown_indexddf2Zddf_known_indexr   r   r%   test_categorize   s^    

r]   c                  C   s   t jjtdddgdd} t| jtdddg t| jd t jjtdddgd} t| jtdddg t| jd t jjtddd	gd
d} t| jtddd	g t| jd
 d S )NabcF)r[   
categoriesr
   )r[   ra   rG   d      T)	r<   ZcategoricalZcategorical_dtyper^   r   ra   r+   Indexr
   )Z	cat_dtyper   r   r%   test_categorical_dtype   s"        re   c                  C   s   t jt dd} |  }|  }|jjjs0t	t
||t|jddd | jdd| ksbt	t j||jddd} |  }| jdd}|jjjst	t
||t|jddd |  | kst	d S )Nr   r   F)check_divisionsrP   r   idxT)r<   rL   r   ZmakeDataFramer?   rV   r   rI   rU   rK   r   r!   r+   CategoricalIndexArR   )rD   r7   r\   r   r   r%   test_categorize_index   s,    rj   c              	   C   s  t ddddgddddgd}t j|d	 dddgd
d|d	< tj|dd}tjjdd2 |jd	|j	d}|
d|
d }}t|j dgksttt|j dddgkst|j|j|j	d}|
d|
d }}t|j dgk sttt|j dddgks t|jd	dddg|j	d}|
d|
d }}t|j dgkshttt|j dddgkstW 5 Q R X d S )NrG   r   rH   r;   r^   r_   r`   r   r   r   T)ra   r
   r   sync)Z	schedulerr   )Z	divisionsr   )r+   r=   rJ   r<   rL   daskconfigsetr!   r   Zget_partitionlistr   r?   rK   sortedr   )rC   r7   r^   r_   d1Zd2r   r   r%   test_categorical_set_index  s       "rs   ncategoriesrG   rH   r   c                 C   sn   d}|| }dd t |D }t|| tj|d}tj|| d}|d dj	 |d< |
d}dS )	z(https://github.com/dask/dask/issues/5343r   c                 S   s   g | ]}d t | qS )ZCAT)strr"   r   r   r%   r&   )  s     zItest_categorical_set_index_npartitions_vs_ncategories.<locals>.<listcomp>)idvaluer   rv   r   N)ranger+   r=   nprandomr<   rL   rW   rI   
as_orderedr!   )r   rt   Zrows_per_categoryZn_rowsra   ZpdfrD   r   r   r%   5test_categorical_set_index_npartitions_vs_ncategories"  s    r|   r;   c                 C   sv   t tdtdd}tj|dd}|d d|d< |j| d}| }|d d|d< t	|| t	|| d S )Nr   Z
abababcbcbrk   r   r   r   r   )
r+   r=   rx   rp   r<   rL   rW   Zrepartitioncopyr   )r   r7   rD   r\   r   r   r%    test_repartition_on_categoricals3  s    
r~   c                  C   s   t tdtdd} | jd| _tj| dd}dt|jksFt	dt|j
ksXt	t|jdsht	t|j
drxt	| | j}tj|ddd	}t|jd
st	t|jd
rt	d S )NZaaaaabbbbbcccccr   rk   r   r   r   rI   F)r   sortra   )r+   r=   rp   rx   r   rW   r<   rL   dirrK   r   hasattrr!   r   )r7   rD   Zdf2r\   r   r   r%   "test_categorical_accessor_presence@  s    r   c               	   C   sT   t jtddddtdgidd} tjdd}|    W 5 Q R X |rPt	d S )	Nri   r^   r_   nanr   r   T)record)
r<   rL   r+   r=   floatwarningscatch_warningsrV   r?   rK   )r7   r   r   r   r%   test_categorize_nanP  s     r   c                 C   s   t | tjr| S | jS )N)
isinstancer+   rh   rI   r)   r   r   r%   get_catY  s    r   Fc                 C   s(   t | t|tjrt|n||d dS )z@left and right are equal, treating index and array as equivalentrf   N)r   r   ry   Zndarrayr+   rd   )leftrightrf   r   r   r%   assert_array_index_eq]  s
    r   c                  C   sT   t ddddgi} | d d| d< t| d}|jj }t|tj	j
sPtd S )Nri   r^   r_   r`   r   r   )r+   r=   rW   r<   rL   ri   rI   as_knownr   coreSeriesrK   )r7   Zdask_dfZret_typer   r   r%   !test_return_type_known_categoriesf  s
    r   c                   @   s   e Zd Zejdeejddefdefdefgdd Z	ejdeejdd	e
d
dgdfdi fdi fdi fde
dgdfde
d
ddgdfde
dddgdfde
dddgdfdi fg	dd Zdd Zejdedd Zdd  Zd!d" Zd#S )$TestCategoricalAccessorserieszprop, comparera   r
   codesc                 C   s6   |\}}t t||}t t||}|||dd d S NFr   )getattrr   )selfr   propcomparesdsrE   resultr   r   r%   test_propertieso  s    
z'TestCategoricalAccessor.test_propertieszmethod, kwargsZadd_categoriesde)Znew_categoriesr{   Zas_unorderedZremove_categoriesr^   )ZremovalsZrename_categoriesfZreorder_categoriesr_   r`   rT   Zremove_unused_categoriesc           	      C   sx   t j|f|}|\}}|t|}|t|}t||dd tt|jjt|jdd tt|jjt|jdd d S r   )operatormethodcallerr   r   Z_metara   r
   )	r   r   methodkwargsopr   r   rE   r   r   r   r%   test_callable~  s    

z%TestCategoricalAccessor.test_callablec                 C   s<   dd }dd }t t| t| g}|jjj d S )Nc                   S   s   t dt tjtjgiS )Nri   )r+   r=   rJ   ry   r   r   r   r   r%   
make_empty  s    zBTestCategoricalAccessor.test_categorical_empty.<locals>.make_emptyc                   S   s   t dt ddgiS )Nri   r^   )r+   r=   rJ   r   r   r   r%   	make_full  s    zATestCategoricalAccessor.test_categorical_empty.<locals>.make_full)r<   Zfrom_delayedrm   Zdelayedri   rI   ra   )r   r   r   r^   r   r   r%   test_categorical_empty  s    z.TestCategoricalAccessor.test_categorical_emptyc              	   C   sH  |\}}|j jst|j  }|j jr*ttjtdd |j j W 5 Q R X tjtdd |j j W 5 Q R X tjt	dd |j j W 5 Q R X tjt	dd |j j W 5 Q R X |j 
dddg}|j jstt|j jt|j t|j jt|j |j  }|j jst| }t|j jt|j t|j jt|j d S )Nzwith unknown categories)matchr^   r_   r`   )rI   rU   rK   Z
as_unknownrX   rY   NotImplementedErrorra   r   AttributeErrorrT   r   r.   r   r   r   r?   )r   r   r^   dadbresr   r   r%   test_unknown_categories  s*    

z/TestCategoricalAccessor.test_unknown_categoriesc                 C   sB   t jdddgdd}t|d}|j }|j }t|| d S )Nr^   r_   r   r   r   )r+   r   r<   rL   ru   upperr   )r   r^   r   r   rE   r   r   r%   test_categorical_string_ops  s
    

z3TestCategoricalAccessor.test_categorical_string_opsc              	   C   sD   t jdddgdd}t|d}tt |j  W 5 Q R X d S )NrG   r   rH   r   r   )	r+   r   r<   rL   rX   rY   r   ru   r   )r   r^   r   r   r   r%   "test_categorical_non_string_raises  s    z:TestCategoricalAccessor.test_categorical_non_string_raisesN)__name__
__module____qualname__rX   markparametrize
cat_seriesr   r   r   dictr   r   r   r   r   r   r   r   r%   r   n  s:   
r   )F)Er   r   Znumpyry   Zpandasr+   rX   rm   Zdask.dataframeZ	dataframer<   r   Zdask.dataframe._compatr   r   Zdask.dataframe.corer   Zdask.dataframe.utilsr   r   r   r	   r   r
   r   rJ   rp   r   rL   r   appendrx   r?   r   r=   Zaranger^   r_   r`   r*   r-   r7   r   rW   r   rS   rI   rT   r/   r0   r1   r2   r3   rF   rM   r]   re   rj   rs   r   r   r|   r~   r   r   r   r   r   r   r   r   r   r%   <module>   s   





*? 
	
	