U
    /e                     @   s"  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlmZ d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZmZmZmZ d dlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d d	l'm(Z(m)Z) d d
l*m+Z+ ej,dddgdddgdd ddgdej,dddgdddgddddgdej,dddgdddgddddgddZ-e)ddde.g de, dZ/e,e-de/d dddgZ0e01 Z2i Z3ej4j5rde3d< e&Z6dd Z7dd Z8d d! Z9d"d# Z:d$d% Z;d&d' Z<d(d) Z=d*d+ Z>e,ej?dddgd d,d-ej?dd.d/gd d0d-e@d1d2d3gd Ad4e@d5d6d7gd e?d8dd8gd e@ejBd9dd:e@ejBd9dd;d<e@ejCd=dd:d>ZDd?d@ ZEdAdB ZFejGHdCdddejIdDejGjJdEgdFdG ZKejGjLe dHdIejGHdJdKdLdMgdNdO ZMdPdQ ZNdRdS ZOdTdU ZPdVdW ZQdXdY ZRejGHdZd[d\gd]d^ ZSd_d` ZTdadb ZUdcdd ZVdedf ZWdgdh ZXdidj ZYdkdl ZZdmdn Z[dodp Z\dqdr Z]ejGjJdsdt Z^dudv Z_dwdx Z`dydz Zad{d| Zbd}d~ Zcdd Zddd ZeejGHddejIdejGjfdEgdd ZgejGHddejIdejGjfdEgdd ZhejGHddejIdejGjfdEgdd ZiejGHddejIdejGjfdEgdd Zjdd Zkdd ZlejGHdddgdd ZmejGHdd8dgdd Zndd Zodd Zpdd Zqdd Zrdd Zsdd Ztdd Zudd Zvdd Zwdd Zxdd ZyejGjJdd ZzejGHdCddgdd Z{dd Z|dd Z}ejGHdddgdd Z~ejGHddgdd Zdd ZejGHddd8dgejGHdddddge@ddggejGHdddgddȄ Zddʄ Zdd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zddք ZejGHdCddgddل Zddۄ Zdd݄ Zdd߄ ZejGHdd8dgejGHdd1d2gejGHdddgdd ZejGHdd8ddd8gd8dggejGHdd1d2gd2d1ggejGHdddgdd ZejGHdddgejGHdd8dgejGHdd1d2gejGHdddgejGHdeeddgd  eedd dgd eedd deeddgd  eeeddgdd Zdd Zdd Zdd ZejGHdd1d2gd2d1ggejGHdddgdd Zdd ZejGHdCddgdd ZdS (       N)ProcessPoolExecutor)copy)partial)mock)compute_as_if_collection)PANDAS_GT_120PANDAS_GT_140assert_categorical_equaltm)_noopmaybe_buffered_partdpartitioning_indexrearrange_by_columnrearrange_by_divisionsremove_nansshuffle)	assert_eq	make_meta)cull               abindex         	   )xr   )r$   r   )r$   r   i8)r   Zparent_metar$   FZ
check_freqc                 C   s   t ttj| d}t|tjs t|jtjks0tt	|j|j
df}t	|j|j
df}t|jt|j@ rptt|jtjstt ttjj
t ttjj
kstd S )Nr   r   r   )shuffle_funcdr   
isinstancedd	DataFrameAssertionErrornpartitionsdaskget_nameset
issuperset)shuffle_methodsr$   y r6   E/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/tests/test_shuffle.pytest_shuffle8   s    r8   c                   C   s   t ttjjtjkstd S N)r   r(   r   r-   r,   r6   r6   r6   r7   test_default_partitionsF   s    r:   c                 C   s   t dtjdi}tj|dd}t||j| ddd}| }|j	dksNt
t|jt|jsht
t|t|ks|t
t|jt|jkst
ttt|j ttt|j kst
d S )Nr$   d   
   r-      r   r   r-   
max_branchpdr+   nprandomr*   from_pandasr   r$   computer-   r,   r1   r.   r2   lenlistcolumnsmaptuplevaluestolistr3   dfddfr4   Zscr6   r6   r7   test_shuffle_npartitionsJ   s    rQ   c                 C   s   t dtjdi}tj|dd}t||j| ddd}| }|j	dksNt
t|jt|jsht
t|t|ks|t
t|jt|jkst
ttt|j ttt|j kst
d S )Nr$   r;      r=   r   r   r?   rA   rN   r6   r6   r7   ,test_shuffle_npartitions_lt_input_partitionsW   s    rS   c                 C   s0   ddl m} |tttj| dttd| d d S )Nr   list_eqr&   r   )dask.dataframe.tests.test_multirU   r   r(   r   )r3   rU   r6   r6   r7   test_index_with_non_seriesd   s
     rW   c                 C   s   t ttdg | d }t tdg| d }t td| d }t|j t|j ks^tt|j t|j ks~td S )Nr   r&   )r   r(   rF   sortedrL   rM   r,   )r3   Zres1res2Zres3r6   r6   r7   test_index_with_dataframel   s
     rZ   c                 C   sb   t ddddgi}t|d}dD ]8}t|d|| d}t|jddt|jddks$tq$d S )	Nr$   r   r   r   )r   r   r-   r   sync	scheduler)rB   r+   r*   rE   r   rG   rF   r,   )r3   rO   r   ir   r6   r6   r7   ,test_shuffle_from_one_partition_to_one_otheru   s
    r`   c                 C   sj   t ddddgd i}tj|dd}t||jd| d}ttj|j| }|D ]}|j	|j	ksPt
qPd S )	Nr$   r   r   r   r<   r=   r    r[   )rB   r+   r*   rE   r   r$   r   r.   __dask_keys__rI   r,   )r3   rO   rP   r4   partspr6   r6   r7   test_shuffle_empty_partitions~   s    rd   Zint32dtypeg      @g      @Zfloat32r   r   ccategoryr(   efT20130101periods
US/Easternrm   tz2000)i32f32catobjbooldtZdt_tztdc                  C   s  t tjd} | dk | dk@  s$ttt| dks:tt tjdt tjdk sZtt tdg d} | dk | dk@  sttt| dkstt tdddg d} d| k| dk @  stt tjd	} | d	k | dk@  sttt| dkstd S )
Nr   r   r   rr   rt   rv   rs   r   r   )	r   df2rr   allr,   rG   rC   uniquer   )resr6   r6   r7   test_partitioning_index   s     r}   c                  C   s   t ttjddddgd d} | jd| _|  }|jj	tt
|jjj|_t| jd}t|jd}||k s~tt| d}t|d}||k std S )	Nr   r   r   r      r   rh   r   )rB   r+   rH   stringascii_lettersr   astyper   rt   Zset_categoriesreversed
categoriesr   rz   r,   )rO   ry   r|   rY   r6   r6   r7   -test_partitioning_index_categorical_on_values   s    "

r   r-      )Zmarksc                 C   s(  dddg}t jtjdtjdd tj|ddtjdd}trZ|dd	d
}tj|| d}t	|
d|j
d|d t	|
d|j
d|d t	|
d|j
d|d t	|
|j|j
|j|d t	|
|j|j |j
|j|j |d t	|
|jd |j
|jd |d d S )Nalicebobrickyr;   皙?r$   r5   zr   ZFloat64r   )r$   r   r=   r$   r&   r5   r   r   )rB   r+   rC   rD   choicer   r   r*   rE   r   	set_indexr$   r5   )r-   r3   namesrO   rP   r6   r6   r7   test_set_index_general   s(    


 r   z8Only test `string[pyarrow]` on recent versions of pandas)reasonstring_dtypezstring[python]string[pyarrow]objectc                 C   s   |dkrt d dddg}tjtjdtj|ddtjdd}|d	|i}tj	|d
d}t
|d	|jd	| d d S )Nr   Zpyarrowr   r   r   r;   r$   r5   r   r5   r<   r=   r&   )pytestimportorskiprB   r+   rC   rD   r   r   r*   rE   r   r   )r3   r   r   rO   rP   r6   r6   r7   test_set_index_string   s    



r   c              	   C   s   t jtjdtjdd dtjdd}tj|dd}tjtdd |j	|j
| d	}W 5 Q R X ||ksttt||	|j
 d S )
Nr;   r   r   r   r   r=   zthis is a no-opmatchr&   )rB   r+   rC   rD   r*   rE   r   warnsUserWarningr   r   r,   r   )r3   rO   r   r   r6   r6   r7   test_set_index_self_index   s    
r   c                 C   s  | dkrt d tjtjdtjdd dtjdd}tj|dd}t|j	d	| d
j
t|j	d	| d
j
ks|tt|j	d	| d
j
t|j	d| d
j
kstt|j	d	d| dj
t|j	d	d| dj
kstt|j	d	d| dj
t|j	d	d| dj
ks
td S )Ndiskz/dsk names in disk shuffle are not deterministicr;   r   r   r   r   r=   r$   r&   r5   r@   r   r   T)dropr   F)r   ZxfailrB   r+   rC   rD   r*   rE   r1   r   r.   r,   )r3   rO   rP   r6   r6   r7   test_set_index_names   s&    




r   c                 C   sD   t jjddtttddddd}|jd| d	}|j j	d
d d S )Nrq   Z2004valuenameid2H1Mr   )freqpartition_freqseedr   r&   r\   r]   )
r*   ZdemoZmake_timeseriesfloatstrintr   r   sumrF   )r3   rO   ry   r6   r6   r7   test_set_index_2  s    
	r   c                 C   sd   t jtjdddgd}tj|dd}|jd| d|jd}|d}t|| |j|jks`t	d S )	N)r<   r   r$   r5   rI   r   r=   r   )r   r@   r-   )
rB   r+   rC   rD   r*   rE   r   r-   r   r,   )r3   rO   rP   ddf2ry   r6   r6   r7   test_set_index_3!  s       

r   c                 C   sp   t dddddgdddddgd}tj|dd	}|d
 }|jd
| d}t|jdd |jdd  d S )Nr   r   r   r"   r!   r   r   r   r=   r$   r&   )rB   r+   r*   rE   r   Z
sort_indexr   loc)r3   rO   rP   ry   r   r6   r6   r7   test_shuffle_sort-  s
    $r   r^   threads	processesc           	         s   t dtjdi}tj|dd}|j|jd d}t|dd| d}|j	|j	ksVt
t|j|jslt
|j|d	}tjj|d	}||j| }|j D ]" t fd
d|D dkst
qd S )Nr$   r<   r   r=   _partitionsr       r   r]   c                 3   s   | ]} t |jkV  qd S r9   )r1   r   ).0partr_   r6   r7   	<genexpr>I  s     z!test_rearrange.<locals>.<genexpr>r   )rB   r+   rC   rD   r*   rE   assignr$   r   r-   r,   r1   r.   issubsetrF   baseZget_schedulerra   r   Zdrop_duplicatesr   )	r3   r^   rO   rP   r   resultr   r/   rb   r6   r   r7   test_rearrange7  s        r   c               	   C   s   t dtjdi} tj| dd}|j|jd d}t	 }t
jjt|d" t|ddd	d
}|jdd W 5 Q R X tt|dkstd S )Nr$   r<   r   r=   r   Ztemporay_directoryr   r   r   r   r   r]   r   )rB   r+   rC   rD   r*   rE   r   r$   tempfilemkdtempr.   configr1   r   r   rF   rG   oslistdirr,   rO   rP   r   Ztmpdirr   r6   r6   r7   test_rearrange_cleanupL  s    r   c                 C   s   t dd S )NMock exception!)
ValueError)rO   colr-   rc   r6   r6   r7   mock_shuffle_group_3Z  s    r   c                  C   s   t jdtd tdtjdi} tj| dd}|j	|j
d d}t }tjjt|d< tjtd	d
" t|dddd}|jdd W 5 Q R X W 5 Q R X W 5 Q R X tt|dkstd S )Nz&dask.dataframe.shuffle.shuffle_group_3)newr$   r<   r   r=   r   r   r   r   r   r   r   r   r   r]   r   )r   patchr   rB   r+   rC   rD   r*   rE   r   r$   r   r   r.   r   r1   r   r   raisesr   r   rF   rG   r   r   r,   r   r6   r6   r7   *test_rearrange_disk_cleanup_with_exception^  s       *r   c               	   C   s`   ddl m}  tddddddgddddddgd	}t|dddg}t|d
d}| || d S )Nr   rT   r   r   r   r   r   r    r   r$   )r   r   r   )rV   rU   rB   r+   r*   repartitionr   )rU   Ar   rO   r6   r6   r7   .test_rearrange_by_column_with_narrow_divisionsr  s
    (r   c           
      C   s   dd l }t }| }t|j |js&ttt|}|jr@t| }t|j |j	sXtt| d}| }t|j |jsztt
|  }t|dkst|d jdkst|d j| ksttt|}	|	jrt|	j| kstd S )Nr   )tempdirr   z.partd)partdr   r)   ZBufferr,   pickleloadsdumpsbufferZFilerH   iterdirrG   suffixparentr   )
Ztmp_pathr   rj   p1f2p2Zf3p3contentsZf4r6   r6   r7   test_maybe_buffered_partd|  s$    


r   c               	   C   s   t jdddddgidddd	gd
} tj| dd}dd }tjj|d |jddddgd}W 5 Q R X |jdkstt	| d}t
|| tt |jddddgd W 5 Q R X d S )Nr$   r   r   r   r   r<   rR      (   r   r=   c                  _   s
   t  d S r9   	Exceptionargskwargsr6   r6   r7   throw  s    z5test_set_index_with_explicit_divisions.<locals>.throwr]   r   	divisions)r   r   r   )rB   r+   r*   rE   r.   r   r1   r   r   r,   r   r   r   r   )rO   rP   r   r   ry   r6   r6   r7   &test_set_index_with_explicit_divisions  s    "

r   c               	   C   sL   t dddddgi} tj| dd}tt |jdg d W 5 Q R X d S )Nr$   r   r   r   r   r=   r   )rB   r+   r*   rE   r   r   r   r   rO   rP   r6   r6   r7   #test_set_index_with_empty_divisions  s    r   c                  C   sx   t ddddddgtdd} t| d}|jd	d
ddgd}|jdksNtt|jddj	dd  ddgksttd S )Nr   r   r   r   r   r    Zabdabdr   r5   r   rg   r(   r   )r   rg   r(   r\   r]   )
rB   r+   rH   r*   rE   r   r   r,   rF   r   )rO   rP   r   r6   r6   r7   test_set_index_divisions_2  s
     r   c                  C   s   t jddddgdd} t jddddgdd}t| | t| td t|td t| jt|jksntt jt jdddgdd}t jt jdddgdd}t }|j|_	t|| t|| t|| t|jt|jkstd S )Nr   r   r   r"   F)r   rF   T)
r(   r   r   fullrG   r.   r,   r   r   r   )d2d3d4d5expr6   r6   r7    test_set_index_divisions_compute  s    



r   c               	   C   sj  t dddgdddgd} t dddgd	d	d
gd}t dddgdddgd}t| ||dd| d d d d g}| }dd }tjj|d |jdddddgdd}W 5 Q R X t||d tjj|d |jddd	ddgdd}W 5 Q R X t||d t	
t  |jddd	d
ddgdd W 5 Q R X t	
t |jddd	dd
gdd W 5 Q R X d S )Nr<         r   r   r~         r   rg      r>      r(   ri   r#   r$   c                  _   s   t dd S )NzShouldn't have computedr   r   r6   r6   r7   r     s    z.test_set_index_divisions_sorted.<locals>.throwr]   T)r   rX   r5   )rB   r+   r*   rF   r.   r   r1   r   r   r   r   r   )r   r   r   rP   rO   r   r|   r6   r6   r7   test_set_index_divisions_sorted  s*    
   
""$r   c               	   C   s   t jtjdtjdd dtjdd} tj| dd}| }td}t	d|(}t
t|d	d
}t||td}W 5 Q R X t|dkstd S )Nr;   r   r   r   r   r=   spawnr!   r$   )rO   idxr   )rB   r+   rC   rD   r*   rE   clear_divisionsmpZget_contextr   r   
_set_indexr1   rJ   rangerG   r,   )rO   rP   ctxpoolfuncZdivisions_setr6   r6   r7   #test_set_index_consistent_divisions  s    

r
  c                 C   s   | |jS r9   )r   r   )r_   rO   r  r6   r6   r7   r    s    r  c                 C   sF   t dtjdi}tj|dd}|jd| dd}|jdk sBtd S )Nr$   r;   2   r=   auto)r   r-   r<   )	rB   r+   rC   rD   r*   rE   r   r-   r,   r3   rO   rP   r   r6   r6   r7   'test_set_index_reduces_partitions_small  s    r  c                 C   s    t tj| tj| dS Nr   rB   r+   rC   rD   nr6   r6   r7   	make_part	  s    r  c                    sz   d}d}t ||d   t fddt|D dtdd g|d  }|jd| d|d	}d|j  k rpd
k svn td S )N    .Ar  r!   c                    s   i | ]}d |ft  fqS r$   r  r   r_   r  r6   r7   
<dictcomp>  s      z;test_set_index_reduces_partitions_large.<locals>.<dictcomp>r$   r   r  r   r-   Zpartition_sizerR   r   r*   r+   r  r  r   r-   r,   )r3   nbytesnpartsrP   r   r6   r  r7   'test_set_index_reduces_partitions_large  s        r  c                    sn   d}d}t ||d   t fddt|D dtdd g|d  }|jd| d|d	}|j|jksjtd S )
Nr   r  r!   c                    s   i | ]}d |ft  fqS r  r  r  r  r6   r7   r  "  s      z=test_set_index_doesnt_increase_partitions.<locals>.<dictcomp>r$   r   r  r  r  )r3   r  r  rP   r   r6   r  r7   )test_set_index_doesnt_increase_partitions  s        r  c                 C   sT   t tdtdd}tj|dddd}|jd| d}t|j|jd k sPt	d S )	Nr;   r   r<   r$   F)r-   r   sortr&   r   )
rB   r+   r  r*   rE   r   rG   r.   r-   r,   r  r6   r6   r7   "test_set_index_detects_sorted_data-  s    r   c               <   C   s$  t ddddddddddddddd	ddd	d
ddddd
ddddddddddddddddddddddddddddddddddg:} tj| dd} ddd g}g }tt|D ]N}t|d | }t|||d!  }|tjd"| || it||d# qt	
| }|d"j jd$ks td S )%Nl    ra l    li) l    PG l    /D l    $;{lH l    r]:x l    . l    L7% l    ` l    XTwK l    Vv;
M l     2M l    4Sj l    g? l    &q l    8^WK l    Ceo l    JMzl l    F2y| l    zr l    ud l    ^J;p l    j{t l    R~% l    4 l    dw' l    @z}Z nsunitr<   $   :   r   	timestampr   T)rC   arrayrB   to_datetimer  rG   r   appendr+   r*   concatr  r   r   rF   Zis_monotonic_increasingr,   )valsZbreaksdfsr_   lohirP   r6   r6   r7   test_set_index_sorts5  s    >
(r/  enginepandascudfc           	   	   C   s  | dkrt d}tjdddgdddgdd	ddgd
tjdddgdddgddddgd
tjdddgdddgddddgd
d}t|dtd	dddg}| dkr||}| }|jddd}|j	dkst
|jjdkst
t||d |j|jdd}|j	dkst
|jjdks$t
t|||j |d}|jjdksRt
t||d |dg}|jjdkst
t||dg d S )Nr2  	dask_cudfr   r   r   r   r    r   r   r   r   r!   r   r"   r#   r$   r   r=   )r   r   rB   r+   r*   metaZfrom_dask_dataframerF   r   r-   r,   r   r   r   r   )	r0  r3  dskr(   r   r   r   r   r   r6   r6   r7   test_set_index  s0    
$$$

r6  c                 C   s
  | dkrt d}t d}tdddddgdddddgd}| dkrb||}|j|dd	}nt|d}|jd
dd	}|jdkst	t
|jdddhkst	|jddd	}|jd dkst	d|jd   k r|jd   k rdk sn t	|jd dkst	d S )Nr2  r3  r   r   r         ?r   r   r=   r$   r5   r   g       @)r   r   rB   r+   rE   	from_cudfr*   r   r-   r,   r1   r   )r0  r2  r3  rO   gdfr(   d1r   r6   r6   r7   test_set_index_interpolate  s    

$
0r;  c                 C   s   | dkrt d}t d}tttdddd }tdd| i}| dkrh||}|j|dd}nt	|d}|j
ddd}td	d
 |jD std S )Nr2  r3  r      r<   r   r$   r=   c                 s   s    | ]}t t|t jV  qd S r9   )rC   Z
issubdtypetypeintegerr   r$   r6   r6   r7   r     s     z1test_set_index_interpolate_int.<locals>.<genexpr>)r   r   rX   rH   r  rB   r+   rE   r8  r*   r   rz   r   r,   )r0  r2  r3  LrO   r9  r(   r:  r6   r6   r7   test_set_index_interpolate_int  s    


rA  c                 C   s   | dkrt d}t d}tdtjddgtjdi}| dkr\||}|j|dd}nt	|d}|j
ddd}|jdkstt|jddhkstd S )	Nr2  r3  r$   l   TH Dl   +7yDre   r   r=   )r   r   rB   r+   rC   r'  Zuint64rE   r8  r*   r   r-   r,   r1   r   )r0  r2  r3  rO   r9  r(   r:  r6   r6   r7   %test_set_index_interpolate_large_uint  s    


rB  c            	   	   C   s  t t jddd} t t jdddd}t || d}tj|dd}|jd	dd}t j| j| j	d
}|j
d | d   kr|d ksn t|j
d | d   kr|d ksn t|jddd}t j||j	d
}|j
d |d kst|j
d |d kst|j
d j|d jks t|j
d jd k	s6tt j|j| j	d
}trh|j
d |d krtn.tt |j
d |d kstW 5 Q R X d S )Nrk   r   rl   rn   ro   )rp   notzr   r=   rC  re   r   r   rp   )rB   Series
date_ranger+   r*   rE   r   ZDatetimeIndexrL   rf   r   r,   rp   r   r   r   	TypeError)	Zs_naiveZs_awarerO   r(   r:  s1r   s2Z	s2badtyper6   r6   r7   test_set_index_timezone  s&    ((rJ  c                  C   s^   t jt dddddddddddddgd} tj| dd} |  jddd}|jdksZtd S )	Nr   BCr   r   r=   r   r   )	rB   r+   Indexr*   rE   reset_indexr   r-   r,   )dataoutputr6   r6   r7   test_set_index_npartitions  s    rQ  r#  r!  usc                 C   sh   t jddgddgddgddggd	d
gd}t j|j| d|_tj|dd}|d	}t||d	 d S )Nl   9+wEG, r   l   !|IwEG, r   l   wwEG, r   l   a>hwEG, r   tsZrankr   r"  r=   )rB   r+   r(  rS  r*   rE   r   r   )r#  rO   rP   r6   r6   r7   !test_set_index_datetime_precision%  s    	
rT  r   c                 C   s  t tddddddddd	d
dg
ddddddddddg
d}t|d}t|jd| d|jd| d t|jd| d|jd| d t|jd| d|jd| d t|j|j| d|j|j| d t|j|j| d|j|j| d t|j|j	| d|j|j	| d t tddddddddd	d
dg
ddddddddddg
d}t|d}t|jd| d|jd| d t|jd| d|jd| d d S )NZ
ABAABBABAAr   r   r   r   r   r    r   r!   r"   r<   )r   rK  rL  r   r   rK  rL  )r   r   r   r   )
rB   r+   rH   r*   rE   r   r   r   rK  rL  )r   ZpdfrP   r6   r6   r7   test_set_index_drop9  s,    """rV  c               
   C   s   t dddddddgdddddddgd} t| d}d	}tt}|d
dg W 5 Q R X |t|j	ksrt
tt}|d
dgg W 5 Q R X |t|j	kst
tt}|d
gg W 5 Q R X |t|j	kst
d S )Nr   r   r   r   r   r    r   r   z1Dask dataframe does not yet support multi-indexesr   r   )rB   r+   r*   rE   r   r   NotImplementedErrorr   r   r   r,   )rO   rP   msgerrr6   r6   r7   (test_set_index_raises_error_on_bad_inputX  s    ,rZ  c               	   C   s  t ddddgddddgddddgd} tj| dd	d
}|jrDt|jddd}|js\tt|j	t|jsvtdD ]r}t
|jd|d| jd|d t
|j|jd|d| j| j|d t
|j|jd d|d| j| jd |d qztt |j|jdd W 5 Q R X d S )Nr   r   r   r   r<   rR   r   r   Fr  r$   TrX   )TFrU  )rX   r   )rB   r+   r*   rE   known_divisionsr,   r   r1   r.   r   r   r$   r   r   r   r   )rO   r   r   r   r6   r6   r7   test_set_index_sorted_truej  s$    *

 r^  c                  C   sL   t ddddgddddgd} tj| dd}t|jdd	d
| d d S )Nr   r   r   r   r   r   r=   r$   Tr\  )rB   r+   r*   rE   r   r   r   r6   r6   r7   &test_set_index_sorted_single_partition  s     r_  c                  C   s   t dddgdddgd} t dddgdddgd}t| }t|}tj||g| d}|jrht|jddd	}|j	d
kstd S )Nr   r   r   r   r   )r4  r5   Tr\  )r   r   r   )
rB   r+   r.   Zdelayedr*   Zfrom_delayedr]  r,   r   r   )r   r   ZaaZbbrO   ry   r6   r6   r7   "test_set_index_sorted_min_max_same  s    


r`  c               	      s   dddg} t ttdd g}|D ] tj fdd| D dd	gd
}ttj|ddtj||j|j	 k ddgt
fddtjD sttd|dstqd S )Nr   r   r   c                 S   s   t j| ddS Nr!  r"  rB   r(  r  r6   r6   r7   <lambda>      z0test_set_index_empty_partition.<locals>.<lambda>c                    s   g | ]} ||d qS r   r6   r  convr6   r7   
<listcomp>  s     z2test_set_index_empty_partition.<locals>.<listcomp>r$   r5   r   r=   c                 3   s   | ]}  | jV  qd S r9   )get_partitionrF   emptyr   rc   )rP   r6   r7   r     s     z1test_set_index_empty_partition.<locals>.<genexpr>)r   r   r   rB   r+   r*   r*  rE   r5   maxanyr  r-   r,   r   r   )	test_vals
convertersrO   r6   )rg  rP   r7   test_set_index_empty_partition  s    
  rp  c                     s"  ddddg} t ttdd g}|D ] t fdd| D }tj|dd	}|jdksXt||j	|j	
 k d
}||j	|j	
 k d
}t||ftst|jdksttdd |jD st||j	|j	
 k jd
dd}t||ftst|jdksttdd |jD s tq d S )Nr   r   r   r   c                 S   s   t j| ddS ra  rb  r  r6   r6   r7   rc    rd  z)test_set_index_on_empty.<locals>.<lambda>c                    s   g | ]} ||d qS re  r6   r?  	converterr6   r7   rh    s     z+test_set_index_on_empty.<locals>.<listcomp>r=   r$   c                 s   s   | ]}t |V  qd S r9   rB   Zisnullr   r(   r6   r6   r7   r     s     z*test_set_index_on_empty.<locals>.<genexpr>Tr\  c                 s   s   | ]}t |V  qd S r9   rs  rt  r6   r6   r7   r     s     )r   r   r   rB   r+   r*   rE   r-   r,   r5   rl  r   r   
CHECK_FREQrz   r   )rn  ro  rO   rP   actualexpectedr6   rq  r7   test_set_index_on_empty  s    rx  c                  C   s   t ttj} t tj}t| tjjj	| dd}t
tj||ddd}tj|ddd}t|t|ksvttj|j|d}t||  d S )	NT)Zorderedre   r   r   rK  r   r=   r   )rH   r   r   r   rD   r   rB   apitypesZCategoricalDtyper+   ZCategoricalr*   rE   r   rG   r,   r   r	   sort_values)orderrL   rf   rO   r   r   r6   r6   r7   test_set_index_categorical  s    

r~  c                  C   s   t jttdddddddddgddddddddgdd	} t| d}||jdk jd
dd}| | jdk d
}|jdkst	t
|| d S )Nr!   r   r   r   r   r   r   r   )r   rO  r   Tr\  )r7        @r  )rB   r+   rH   r  r*   rE   r   r   r   r,   r   )rO   rP   r   rw  r6   r6   r7   %test_set_index_with_empty_and_overlap  s    
r  c                  C   s   ddl m}  tjddddgdddd	gddddgd
ddddgd}tj|ddd}|jr\t| t|}t	||dd |jstd S )Nr   )compute_and_set_divisionsr   r   r   r   r<   rR   r   r   r   Fr[  Zcheck_divisions)
dask.dataframe.shuffler  rB   r+   r*   rE   r]  r,   r   r   )r  rO   r   r   r6   r6   r7   test_compute_divisions  s    "

r  c                  C   s   t dttdi} | d d | d< | d t| d< tj| dd}|d}|j	dd}|
d  t|| d |d}t|| dd d S )Nr   r<   r   r   rg   r=   r   )rB   r+   rH   r  r   r   r*   rE   r   r   ri  rF   r   r   r6   r6   r7   test_empty_partitions  s    

r  c                     s   ddddddddg} t tjfttjfttjfd	d
 tdfg}|D ]L\ | D ]>\}} fdd|D } fdd|D }t||ksPtqPqDd S )N)r   r   r   r  ))Nr   r   r  ))r   Nr   r   r   r   ))r   r   Nr  ))r   r   NNr   r   r   r   ))NNr   r   )r   r   r   r   ))r   NNr   r  ))Nr   Nr   Nr   N)r   r   r   r   r   r   r   c                 S   s   t j| ddS ra  rb  r  r6   r6   r7   rc    rd  z"test_remove_nans.<locals>.<lambda>ZNaTc                    s    g | ]}|d krn |qS r9   r6   r?  rg  Znone_valr6   r7   rh    s     z$test_remove_nans.<locals>.<listcomp>c                    s   g | ]} |qS r6   r6   r?  rf  r6   r7   rh    s     )r   rC   nanr   r   Z
datetime64r   r,   )testsro  inputsrw  paramsr6   r  r7   test_remove_nans   s&    r  c               	   C   s   t dtddi} t dtddi}tj|dd}tj| dd}tjjd	d
d |j	|ddd}|
 }W 5 Q R X | j	|ddd}t|djdd| d S )NZKEYr   iP       r   r=   i  tasksr\   )r   r^   inner)howonTrU  )rB   r+   rC   aranger*   rE   r.   r   r1   mergerF   r
   Zassert_frame_equalr|  rN  )ZlargesmallZdd_leftZdd_rightZ	dd_mergedr   rw  r6   r6   r7   test_gh_2730  s    r  r  c                    s   t    fdddd d}d}t||d  fdd	t|D }|fd
d	t|D  t|dd dd g|d  }|jd| d t }||kst	d S )Nc                      s   t   d S r9   )nextr6   )countr6   r7   	increment0  s    zKtest_set_index_does_not_repeat_work_due_to_optimizations.<locals>.incrementc                 S   s    t tj|tj|dS r  r  )dummyr  r6   r6   r7   r  3  s    zKtest_set_index_does_not_repeat_work_due_to_optimizations.<locals>.make_partr  r  r!   c                    s   i | ]}d |f fqS )incr6   r  )r  r6   r7   r  :  s      zLtest_set_index_does_not_repeat_work_due_to_optimizations.<locals>.<dictcomp>c                    s    i | ]}d |f d|ffqS )r$   r  r6   r  )r  r  r6   r7   r  ;  s      r$   r   r=   )
	itertoolsr  r   r  updater*   r+   r   r  r,   )r-   r  r  r5  rP   Zntimesr6   )r  r  r  r  r7   8test_set_index_does_not_repeat_work_due_to_optimizations+  s     r  c               	   C   sd   t dddgdddgddd	gd
} tj| d	d}|d tt |jddd W 5 Q R X d S )Nr"   r!   r   r    r   r   r   r   r   )r   r   rg   r=   r   T)Zinplace)rB   r+   r*   rE   r   r   r   rW  r   r6   r6   r7   (test_set_index_errors_with_inplace_kwargC  s
    $
r  c                  C   s   t t jdddddd} t| d}t jddd	t jd
dd	f}| d}|jd|d}t||jD ](\}}|j	|j	kst
|j|jksht
qht||ft t||dft d S )Nrq   r   z
US/Centralro   r   ry  r   z2000-01-01 00:00:00-0600rp   z2000-01-12 00:00:00-0600r   r   )rB   r+   rF  r*   rE   	Timestampr   zipr   r   r,   rp   r   ru  )rO   rP   r   ry   Zddf_new_divZts1Zts2r6   r6   r7   test_set_index_timestampM  s    
r  compressionZZLibc              	   C   s(   t jd| i td W 5 Q R X d S )Ndataframe.shuffle-compressionr   )r.   r   r1   r8   r  r6   r6   r7   )test_disk_shuffle_with_compression_option`  s    r  ZUNKOWN_COMPRESSION_ALGOc              
   C   sH   t jd| i. tjtd| d td W 5 Q R X W 5 Q R X d S )Nr  zxNot able to import and load {} as compression algorithm.Please check if the library is installed and supported by Partd.r   r   )r.   r   r1   r   r   ImportErrorformatr8   r  r6   r6   r7   *test_disk_shuffle_with_unknown_compressiong  s    	r  c                  C   s4   dd } | d d}| dd}t |t |ks0td S )Nc                 S   s   t dttdi}|d d t|d< tjd| ir t	dd d }|
d|i | rl|jjdn
|jd}t|d	"}| W  5 Q R  W  5 Q R  S Q R X W 5 Q R X d S )
Nr   i'  {   r   r  F)r   r   r$   rb)rB   r+   rH   r  r   r   r.   r   r1   r   r)  r   filenameopenread)r  Zdf1r   r  rj   r6   r6   r7   generate_raw_partd_filey  s    zKtest_disk_shuffle_check_actual_compression.<locals>.generate_raw_partd_filer  ZBZ2)rG   r,   )r  Zuncompressed_dataZcompressed_datar6   r6   r7   *test_disk_shuffle_check_actual_compressionw  s    

r  ignore_indexr  r   r   r@   c                 C   s   t jjddtttddddd}t| tr:|| g  }n||   }|j| |||d}|j|||d	}t	||| d
 |r|dkr|j
j|j
jkstn|j
j|j
jkstd S )Nrq   Z2001r   r   r   r   )r{  r   r   r   )r   r  r@   )r   r  check_indexr  )r.   ZdatasetsZ
timeseriesr   r   r   r)   r   r   r   r   rf   r,   )r  r  r@   r3   Zdf_inZext_onZdf_out_1Zdf_out_2r6   r6   r7   test_dataframe_shuffle_on_arg  s,    	

   r  c               
   C   s\   t ddddddddgtdd	} tj| dd
}|jddd}|j|jd}t|| d S )Nr   r   r   r   r   r    r   abcdabcdkeyr   r=   r  Tr\  r   )	rB   r+   rH   r*   rE   r   r   r   r   )r   r   r   r6   r6   r7   test_set_index_overlap  s
    $r  c                  C   sr   t jt jdddddddddddddgddd} tj| dd}| d	jdd
d}t|| |j	d	ksnt
d S )Nr   rK  rL  r   r   r   r   r=   r!   Tr\  )rB   r+   rM  r*   rE   rN  r   r   r   r-   r,   rO  Zddf1r   r6   r6   r7   test_set_index_overlap_2  s    
r  c               
   C   sT   t ddddddddgdd} tj| dd}| d}|jddd	}t|| d S )
Nr   r   r   abc)rS  r   r=   rS  Tr\  )rB   r+   r*   rE   r   r   )rO   rP   rw  rv  r6   r6   r7   @test_set_index_overlap_does_not_drop_rows_when_divisions_overlap  s
     
r  c                  C   s   t t jdk d} | d}|dks*t|| _t| | dd t t jdk d} | d}|dksht|| _t| | dd d S )Nr   r   )r   r   r!   r"   Fr  r   )r   r   r   r"   )r(   r   r|  compute_current_divisionsr,   r   r   )r   r   r6   r6   r7   ,test_compute_current_divisions_nan_partition  s    

r  c               
   C   s   t ddddddddgtdd	} tj| dd
}tjtddH |d}|j	d|d}|j
dkshtdd |jD ddgkstW 5 Q R X d S )Nr   r   r   r   r   r    r   r  r  r=   "Partitions have overlapping valuesr   r  r   )r   r   r   c                 S   s   g | ]}t |qS r6   )rG   rk  r6   r6   r7   rh    s     z:test_compute_current_divisions_overlap.<locals>.<listcomp>)rB   r+   rH   r*   rE   r   r   r   r  r   r   r,   Z
partitions)r   r   r   r   r6   r6   r7   &test_compute_current_divisions_overlap  s    $
r  c                  C   sr   t jt jdddddddddddddgddd} tj| dd}| d	}tjt	d
d |
  W 5 Q R X d S )Nr   rK  rL  r   r  r   r   r=   r!   r  r   )rB   r+   rM  r*   rE   r  r   r   r   r   r  r  r6   r6   r7   (test_compute_current_divisions_overlap_2  s    r  c            	   	      s(  t jtdtjdddidd} | jdddd  fd	d
t j	D } 
 }|t|}t|tjjstttdd |j D st|j D ] }t|t jjrt|drtqt|t|k st|j D ]$\}}t|t jjr|dstqt|}t||\}}||ks$td S )Nr   r   r<   r;   r=   r   r  r   c                    s   g | ]} j |fqS r6   )r0   r  ddf_shuffledr6   r7   rh    s     z*test_shuffle_hlg_layer.<locals>.<listcomp>c                 s   s   | ]}t |tjjV  qd S r9   )r)   r*   r   ShuffleLayer)r   layerr6   r6   r7   r   	  s    z)test_shuffle_hlg_layer.<locals>.<genexpr>_cached_dictzshuffle-)r*   rE   rB   r+   rC   rD   randintr   r  r-   __dask_graph__r   r1   r)   r.   ZhighlevelgraphZHighLevelGraphr,   rm  layersrL   r  hasattrrG   items
startswithdict)	rP   keysr5  Z
dsk_culledr  r   Zdsk_dictZdsk_dict_culled_r6   r  r7   test_shuffle_hlg_layer  s,     r  r<   c              	   C   s   t jtdtjdddi| d}|jdddd}| }|j	
 D ]h}t|t jjsZqFt|d	rhttt|}t|t|kstt|d	rt| | ksFtqFd S )
Nr   r   r<   r;   r=   r   r  r   r  )r*   rE   rB   r+   rC   rD   r  r   r  r  rL   r)   ZSimpleShuffleLayerr  r,   r   r   r   r=  r  )r-   rP   r  r5  r  Zlayer_roundtripr6   r6   r7    test_shuffle_hlg_layer_serialize!  s     r  c                  C   sR   t t jdk d t t jdk jddd t t jdk jddd} t| |  d S )Nr   r   r   Tr\  )r(   r   r   r   )r   r6   r6   r7   test_set_index_nan_partition<  s    r  c                  C   s&  ddddgd dddgd dd	gd
 d} t jddddt jdd }t j| |d}tj|dd}|jjd}|	|}tjt j| |dddd}t
|| t jdd}|	|j| }tjt j| || ddd}t
|| tjt | dd}	|	|	j}
tjt | dd}t
|
| d S )Nr   r   r   r   r<   rR   r   AliceBobr    )r$   r5   r   
2022-02-2216hr   startr   rm   secondsr   r=   Ddays)rB   rF  	Timedeltar+   r*   rE   r   rw   floorr   r   )rL   Z
date_indexrO   rP   Z	day_indexZday_dfrw  one_dayZnext_day_dfZno_datesZrange_dfr6   r6   r7   !test_set_index_with_dask_dt_indexC  s>    
  

 
 
r  c                  C   s   t jddddt jdd } t jdd}t jdd	d
dgd
 dddgd ddgd | | | |d  d| d}tj|d
d}||j| }||j| }t|| d S )Nr  r  r   r  r   r  r   r  r   r   r   r<   rR   r  r  r    r   )r$   r5   r   r:  r   r   r=   )	rB   rF  r  r+   r*   rE   r   r   r   )datesr  rO   rP   r|   rw  r6   r6   r7   (test_set_index_with_series_uses_fastpathf  s"    


r  	ascendingbynelemr  c              	   C   s   t jd t }t t | d d d |d< t d| d |d< tj|dd}t	j
jdd	 |j||d
}W 5 Q R X |j||d
}tj||ddd d S )Nr   rD  r   r;   r   r<   r=   single-threadedr]   r  r  F)r  Zsort_resultsrC   rD   r   rB   r+   Zascontiguousarrayr  r*   rE   r.   r   r1   r|  r   r  r  r  rO   rP   gotexpectr6   r6   r7   test_sort_values|  s    r  c              	   C   s   t jd t }t t | d d d |d< t d| d |d< tj|dd}t	j
jdd	 |j||d
}W 5 Q R X |j||d
}tj||dd d S )Nr   rD  r   r;   r   r   r=   r  r]   r  Fr  r  r  r6   r6   r7   !test_sort_values_single_partition  s    r  na_positionfirstlastr  rO  r  r;      r   rR   c           	   	   C   sf   t | }tj||d}tjjdd |j|||d}W 5 Q R X |j|||d}tj||dd d S )Nr=   r  r]   )r  r  r  Fr  	rB   r+   r*   rE   r.   r   r1   r|  r   )	rO  r  r  r  r  rO   rP   r  r  r6   r6   r7   test_sort_values_with_nulls  s    
r  c               	   C   sN   t ddddgi} tj| dd}tjtdd |jddd	 W 5 Q R X d S )
Nr   r   r   r   r=   z,na_position must be either 'first' or 'last'r   invalid)r  r  )rB   r+   r*   rE   r   r   r   r|  r   r6   r6   r7   test_shuffle_values_raises  s     r  c               	   C   sv   t ddddgi} tj| dd}tjjdd |jdgdd	d
}W 5 Q R X t ddddgi}tj||dd d S )Nr   r   r   r   r=   r  r]   r  T)r  r-   r  Fr  r  )rO   rP   r  r  r6   r6   r7   test_shuffle_by_as_list  s    r  c                   C   s(   t dd dkstt dd dks$td S )Nr   test)r   r,   r6   r6   r6   r7   	test_noop  s    r  c              	   C   s   t dddgd ddddgd	 d
}tj||d}dd }tjjdd |j| d |d| id}W 5 Q R X |j| d}tj||dd d S )Nr   r   r   rR   r   r   r    r   r   r   r=   c                 [   s   | j |||dS )N)r  r  )r|  )	partition
by_columnsr  r  r   r6   r6   r7   rj     s
      z+test_sort_values_custom_function.<locals>.fr  r]   r   r  )r  Zsort_functionZsort_function_kwargs)r  Fr  r  )r  r  rO   rP   rj   r  r  r6   r6   r7    test_sort_values_custom_function  s    &  r   c               	   C   s`   t dddgd ddddgd	 d
} tj| dd}tt |jdddgd W 5 Q R X d S )Nr   r   r   rR   r   r   r    r   r   r   r<   r=   r   TFr  )rB   r+   r*   rE   r   r   rW  r|  r   r6   r6   r7   test_sort_values_bool_ascending  s    &r  c                 C   s  t jjt jdddddgt jdddddgt jd	ddd
dgt jdddddgt jdddddgt jdddddgt jdddddgt jdddddgt jdddddgt jdddddgt jdddd d!ggd"d#d$gd%}tj|| d&}|d"}|d"}t|| d S )'Nz2002-01-11 21:00:01+0000UTCr  i  g    @z2002-01-14 21:00:01+0000i  g    @z2002-01-15 21:00:01+0000i1  g    @z2002-01-23 21:00:01+0000iu  g    (@z2002-01-29 21:00:01+0000i  g     @z2002-02-01 21:00:01+0000i  g    @i  g    @ig  g    n@z2002-01-22 21:00:01+0000g   g    `#@z2002-01-30 21:00:01+0000iA  g    @@z2002-01-31 21:00:01+0000i/  g     {@timeZid1Zid2r   r=   )rB   r+   Zfrom_recordsr  r*   rE   r|  r   )r-   rO   rP   r   rw  r6   r6   r7   test_sort_values_timestamp  s&    

r  )r  multiprocessingr  r   r   rD   r   r   concurrent.futuresr   r   	functoolsr   Zunittestr   ZnumpyrC   r1  rB   r   r.   Zdask.dataframeZ	dataframer*   Z	dask.baser   Zdask.dataframe._compatr   r   r	   r
   r  r   r   r   r   r   r   r   Zdask.dataframe.utilsr   r   Zdask.optimizationr   r+   r5  rM  r4  r(   rF   r   ru  _compatZPANDAS_GT_110r'   r8   r:   rQ   rS   rW   rZ   r`   rd   r'  rE  r   rF  Ztimedelta_rangery   r}   r   markZparametrizeparamZslowr   Zskipifr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
  r  r  r  r  r  r   r/  Zgpur6  r;  rA  rB  rJ  rQ  rT  rV  rZ  r^  r_  r`  rp  rx  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rH   r  r   r  r  r  r  r   r  r  r6   r6   r6   r7   <module>   s  $	$$$ 
 
			 
  





N 
& 
 
 







 
(
#"			