U
    /e                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dl	Z
d dlZd dlZd dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d dl m!Z! i Z"ejjrd	e"d
< dddddddddddddddgZ#ej$e#ddd Z%ej$dddd  Z&ej'j(d!d"d#d$ Z)d%d& Z*d'd( Z+d)d* Z,d+d, Z-ej'.d-d.d/ d0d/ d1d/ d2d/ ej/d3d/ ej'j(d4d"d5gej'.d6dd	gd7d8 Z0d9d: Z1ej'.d;d<d=gd>d? Z2ej'.d-d@d/ dAd/ dBd/ dCd/ dDd/ dEd/ dFd/ gdGdH Z3dIdJ Z4ej'.dKdd	gdLdM Z5dNdO Z6dPdQ Z7dRdS Z8ej'.dTdUej'.dVdWdXdY Z9dZd[ Z:d\d] Z;d^d_ Z<d`da Z=ej'.dbdd	gdcdd Z>ej'.dedfdggdhdi Z?ej'.djdkd/ dld/ dmd/ dnd/ dod/ dpd/ dqd/ drd/ dsd/ dtd/ dud/ dvd/ gej'.dVdwd/ dxd/ gdydz Z@ej'.d-d{d/ d|d/ d}d/ d~d/ dd/ ej/dd/ ej'(dd5gej'.dVdd/ dd/ gdd ZAdd ZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHej'.dfddgej'.dgddgdd ZIej'.ddd	gdd ZJdd ZKej'.dddiejLejMdgej'.ddddggdd ZNej'.dd dgej'.ddd	dgej'.dddgej'.ddddgdd ZOdd ZPdd ZQej'.d-dd/ dd/ dd/ dd/ dd/ gej'.dgddgdd ZRej'.d-dd/ dd/ dd/ gej'.dgddgdd ZSej'.d-dd/ dd/ dd/ dd/ ej/dd/ ej'j(e dd"d5gej'.ddd/ dd/ dd/ gdd˄ ZTdd̈́ ZUddτ ZVej'.dTdddgej'.dfddgej'.dgdddgddӄ ZWddՄ ZXddׄ ZYddل ZZddۄ Z[ej'.ddddddgddggej'.ddddggej'.dVdddgdd Z\dd Z]ej'.dVddgdd Z^dd Z_dd Z`dd Zadd Zbdd Zcdd Zdej'.dddgddgdggdd Zeej'.dddddddddddddddgdd Zfej'.dddgddgdgdgej'.dejgejhddd/gdd Ziejddd/ d d/ dd/ Zkejddd/ dd/ Zlej'.dddidekid	fddidelidfddgekelgd	fdddgidekelgid	fgdd Zmej'.ddekfdgekgfddgekelgfgdd	 Znd
d Zodd Zpdd Zqej'.dVdergdd Zsej'.dVdd/ dd/ dd/ dd/ dd/ dd/ gdd Ztdd Zudd Zvej'.dd dddgd d! Zwej'.ddd	dgd"d# Zxej'.d$dddgeyd%d&d'gd(d)d*gd+ddggd,d- Zzd.d/ Z{ej'.d0dd	gd1d2 Z|d3d4 Z}ej'.d0dd	gd5d6 Z~d7d8 Zej'.d0dd	gd9d: Zd;d< Zej'.d0dd	gd=d> Zd?d@ ZdAdB Zej'.dTdddgej'.dCddDdEgej'.dd dgdFdG ZdHdI ZdJdK Zej'dLdMdN ZdOdP Zej'.dQdRd/ ejde
jjgdSdT Zej'.dTereddEej'.dUdd	gdVdW Zej'.dXdYd/ dZd/ fd[d/ d\d/ fd]d/ d^d/ fd_d/ d`d/ fgdadb Zej'.dcddd/ ded/ dfd/ dgd/ gdhdi Zej'.dcdjd/ dkd/ gdldm Zej'.dcdnd/ dod/ gdpdq Zej'j(ejjj drd"ej'.dsd	dgdtdu Zej'jej'.dsd	ddgej'.dvdddddgddgddggej'.ddd	dgdwdx Zej'jej'.dddgdydz Zej'j(ejjj d{d"d|d} Zd~d Zdd Zej'.dgddgej'.dddgddgdݐdggdd Zej'.ddej/dej'jd5gdd Zej'.dvdddddgddggej'.dcdddgej'.ddd	gdd Zej'.dcejejejMejggej'.ddd	gdd Zdd Zej'je dd"ej'j.ddd	gddgdej'j.ddd	gddgdej'.ddddggej'j.ddd	gddgddd Zej'je dd"ej'.ddd	gdd Zej'je dd"ej'.ddd	gej'.dcdejheejddgdd Zdd Zdd Zej'dej'.dddgdd Zej'.dddgdd Zej'.dddgdd Zdd Zdd Zej'.dvdddggej'.ddddgde
dge
dggdd ZdS (      N)partial)_compat)PANDAS_GT_110PANDAS_GT_130PANDAS_GT_140PANDAS_GT_150check_numeric_only_deprecationtm)grouper_dispatch)assert_dask_graph	assert_eqassert_max_deps)M)	hlg_layerFZ
check_freqsummeanmedianminmaxcountsizestdvarcovcorrnuniquefirstlastprod)paramsc                 C   s   | j S )z+
    Aggregations supported for groups
    )param)request r"   E/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/tests/test_groupby.pyagg_func3   s    r$   T)Zautousec                 c   s
   d V  d S Nr"   )shuffle_methodr"   r"   r#   auto_shuffle_method=   s    r'   z)uncertain how to handle. See issue #3481.reasonc                  C   s   t ddddddddd	d
g
tdd} t| d}| dd }|dd }t|jtjsbt	t
|j|j | | jd }||jd }t|jtjst	d S Nr                        	   
   
abcbabbcdaxyr7   r6   )pd	DataFramelistddfrom_pandasgroupby
isinstanceobjSeriesAssertionErrorr   r7   pdfddfZgpdpr"   r"   r#    test_groupby_internal_repr_xfailB   s    (rF   c                  C   s  t ddddddddd	d
g
tdd} t| d}| d}|d}t|tjjsZtt|j	t j
jjsptt|jtjstt|j|j | dd }|dd }t|tjjstt|j	t j
jjst| ddg }|ddg }t|tjjstt|j	t j
jjs tt|jtjs4tt|j|j | | jd }||jd }t|tjjsvtt|j	t j
jjst| | jdg }||jdg }t|tjjstt|j	t j
jjstt|jtjstt|j|j d S r*   )r8   r9   r:   r;   r<   r=   r>   DataFrameGroupByrA   _metacorer?   r   SeriesGroupByr7   rB   r"   r"   r#   test_groupby_internal_reprQ   s8    (

rK   c                  C   s(  t ddddddddd	d
g
tdd} t| d}tt |d W 5 Q R X tt |ddg W 5 Q R X |d}d}tt}|d  W 5 Q R X |t	|j
kstd}tt}|ddg  W 5 Q R X |t	|j
kstd}tt}|  W 5 Q R X |t	|j
ks$td S )Nr   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   Ar6   r7   zColumn not found: zColumns not found: zDataFrameGroupBy does not allow compute method.Please chain it with an aggregation method (like ``.mean()``) or get a specific group using ``.get_group()`` before calling ``compute()``)r8   r9   r:   r;   r<   pytestraisesKeyErrorr=   strvaluerA   NotImplementedErrorcompute)rC   rD   rE   msgerrr"   r"   r#   test_groupby_errorw   s(    (
rV   c                     s   t jddddddddd	g	ddddddd
d
d
g	dd
dddddd	d	d	g	d} tj| dd tt fdd tt fdd dt 	dkst
dd }| 	d|}tjtdd8  	d|jdst
t| 	d| W 5 Q R X d S )Nr+   r,   r-   r.      r/   r0   r1   r2   r   abindexnpartitionsc                      s
     dS )Ndoes_not_existr=   r"   rD   r"   r#   <lambda>       z#test_full_groupby.<locals>.<lambda>c                      s     djS NrY   )r=   r_   r"   ra   r"   r#   rb      rc   rZ   rY   c                 S   s   | j | j| j  dS N)rZ   ZassignrZ   r   dfr"   r"   r#   func   s    ztest_full_groupby.<locals>.func`meta` is not specifiedmatchri   )r8   r9   r;   r<   rM   rN   rO   AttributeErrordirr=   rA   applywarnsUserWarning_name
startswithr   )rh   ri   expectedr"   ra   r#   test_full_groupby   s    ,ru   c            	         s  t jddddddddd	g	ddddddd
d
d
g	dd
dddddd	d	d	g	d} tj| dd}ddd}| j  | j |j }|j }t	 fdd }t	fdd }t
jtdd t| dj| d|dj| |d t| d| |d|  t| dj| d|dj| d t| d| |d||dd W 5 Q R X | d| }t| d| |dj|||d t| dj| d|dj| ||d t
t}|dj| |d W 5 Q R X dt|jkr0dt|jks4tt
t}|dj||d W 5 Q R X dt|jkrdt|jkstt| d| |dj|||d t| dj| d|dj| ||d d S )Nr+   r,   r-   r.   rW   r/   r0   r1   r2   r   rX   r[   r]   c                 S   s    | j | j| j  ||  dS re   rf   )rh   cdr"   r"   r#   ri      s    z.test_full_groupby_apply_multiarg.<locals>.funcc                      s    S r%   r"   r"   )rv   r"   r#   rb      rc   z2test_full_groupby_apply_multiarg.<locals>.<lambda>c                      s    S r%   r"   r"   rw   r"   r#   rb      rc   rj   rk   rY   rx   Fcheck_dtypemeta)rw   r|   zdask.delayedr|   )r-   )r8   r9   r;   r<   rY   r   rZ   r   daskdelayedrM   rp   rq   r   r=   ro   rN   
ValueErrorrP   rQ   rA   )	rh   rD   ri   Zc_scalarZd_scalarZ	c_delayedZ	d_delayedr|   excr"   rv   rw   r#    test_full_groupby_apply_multiarg   sd    ,




&  $ $r   grouperc                 C   s   dgS rd   r"   rg   r"   r"   r#   rb      rc   rb   c                 C   s   ddgS NrY   rZ   r"   rg   r"   r"   r#   rb      rc   c                 C   s   | d S rd   r"   rg   r"   r"   r#   rb      rc   c                 C   s   | d | d gS r   r"   rg   r"   r"   r#   rb      rc   c                 C   s   | d dk| d dkgS NrY   r,   rZ   r+   r"   rg   r"   r"   r#   rb      rc   znot yet supported)Zmarksreversec                 C   s   dddddddddg	}|r(|d d d }t jdd	dd
dddddg	dd	dd
dddddg	d
dddd	ddddg	d|d}tj|dd}dd }tjtdd0 t|| |	||| |	| W 5 Q R X d S )Nr   r+   r-   rW   r/   r1   r2   r,   r.   r0   )rY   rw   rZ   r[   r]   c                 S   s   | j | j| j  dS re   rf   rg   r"   r"   r#   ri     s    z*test_full_groupby_multilevel.<locals>.funcrj   rk   )
r8   r9   r;   r<   rM   rp   rq   r   r=   ro   )r   r   r\   rh   rD   ri   r"   r"   r#   test_full_groupby_multilevel   s"     r   c                  C   sT   t tdtdd} tj| dd}|d}dt|ks@tdt|ksPtd S )Nr3   )rY   b c d er,   r]   rY   r   )r8   r9   ranger;   r<   r=   rn   rA   )rh   rD   gr"   r"   r#   test_groupby_dir  s
    
r   	schedulersyncthreadsc           	      C   s  t jddddddddd	g	ddddddd
d
d
g	dd
dddddd	d	d	g	d}tj|dd}|d}|d}t|dj ||j	j  |d
ddi}t|dj |j t|jdstdd }dd }dd }tjj| d t  tdt t|d||d| t|d|d|d|d t||j	|||j	| t|jd||jd| t|j|j	||j|j	| W 5 Q R X W 5 Q R X d S )Nr+   r,   r-   r.   rW   r/   r0   r1   r2   r   rX   r[   r]   rY   rZ   r   getitemc                 S   s   | j | j| j  dS re   rf   rg   r"   r"   r#   ri   0  s    z#test_groupby_on_index.<locals>.funcc                 S   s   | dg | dg    S )NrZ   r   rg   r"   r"   r#   func23  s    z$test_groupby_on_index.<locals>.func2c                 S   s   |   S r%   r   rg   r"   r"   r#   func36  s    z$test_groupby_on_index.<locals>.func3r   ignore)r8   r9   r;   r<   	set_indexr   r=   rZ   r   r\   aggr   r}   rA   configsetwarningscatch_warningssimplefilterrq   ro   )	r   rC   rD   Zddf2Zpdf2r   ri   r   r   r"   r"   r#   test_groupby_on_index  sD    ,

$
" r   c                 C   s   |  dd S r   r`   rg   r"   r"   r#   rb   U  rc   c                 C   s   |  ddgS r   r`   rg   r"   r"   r#   rb   V  rc   c                 C   s   |  ddgd S NrY   rZ   rv   r`   rg   r"   r"   r#   rb   W  rc   c                 C   s   |  | d ddg S r   r`   rg   r"   r"   r#   rb   X  rc   c                 C   s   |  dddg S r   r`   rg   r"   r"   r#   rb   Y  rc   c                 C   s   |  ddg S r   r`   rg   r"   r"   r#   rb   Z  rc   c                 C   s   |  dddgS r   r`   rg   r"   r"   r#   rb   [  rc   c           
      C   s  |dkrd S t ddddddgddddddgddddddgddddddgd}t|d}| |}| |}t|t jjjr|dkrd S t||}t||}t|tjj	st
t|t jjjst
|dkrt| | t n6| }t  td	t | }	W 5 Q R X t||	 d S )
Nr   r+   r,   r-   r.   rY   rZ   rv   rw   r   r   r   r   )r8   r9   r;   r<   r>   rI   r=   rJ   getattrZ_GroupByrA   ZGroupByr   astypefloatr   r   r   RuntimeWarning)
r   r$   rh   rD   
dask_groupZpandas_groupZdask_aggZ
pandas_aggrY   rZ   r"   r"   r#   test_groupby_multilevel_getitemR  s2    


r   c                  C   s   t ddddddgddddddgddddddgddddddgd} t| d}| dg }|dg }t|| | ddg }|ddg }t|| | | d | d g }||d |d g }t|| d S )Nr+   r,   r-   r.   r   rY   rv   )r8   r9   r;   r<   r=   r   r   )rh   rD   solresr"   r"   r#   test_groupby_multilevel_agg  s"    

r   categoricalsc           
   	   C   sf  t jdddgdddgddddgd	t jdddgdddgdd
ddgd	t jdddgdddgddddgd	d}|d }t|d|ddddg}| }d|j|jfg}| r|jdgd}| }n||jd |jd f |D ]|\}}||}||}	t|	d|		d t|	d|		d t|j
	d|	j
	d t|j
	d|	j
	d qd S )Nr+   r,   r/   r.   r0   rX   r   r-   r[   rW   r1   r2   )r6   r   )r6   r+   )r6   r,   r   r6   rZ   rZ   rZ   columns)r8   r9   r;   rS   rZ   Z
categorizeappendr=   r   	get_grouprY   )
r   dskr|   rw   fullZby_keysddkeypdkeyZ	ddgroupedZ	pdgroupedr"   r"   r#   test_groupby_get_group  s&    $$$


r   c                  C   sf   t d} tjt| }tt| |d}tj	|dd}|
dd  }t|
dd  | d S )Naaabbccccdddeeestringsdatar-   r]   r   r   )r:   nprandomrandnlenr8   r9   dictr;   r<   r=   r   r   r   r   Zpssrt   r"   r"   r#   test_dataframe_groupby_nunique  s    r   c                  C   sd   t d} t ttd}tt| |d}tj|dd}|dd 	 }t
|dd 	 | d S )Nr   Z123111223323412r   r-   r]   r   r   )r:   mapintr8   r9   r   r;   r<   r=   r   r   r   r"   r"   r#   6test_dataframe_groupby_nunique_across_group_same_value  s    r   c               	   C   st   t dddgdddgd} t| d}dd	 }tt |d
|}W 5 Q R X | d
|}t	|| d S )Nr+   r,   r-   r.   rW   r/   r5   c                 S   s   | d   S )Nr7   r   rg   r"   r"   r#   rb     rc   z6test_series_groupby_propagates_names.<locals>.<lambda>r6   )
r8   r9   r;   r<   rM   rp   rq   r=   ro   r   )rh   rD   ri   resultrt   r"   r"   r#   $test_series_groupby_propagates_names  s    r   r^   )r+   r,   ri   )cumsumcumprodcumcountc              
   C   s   t dddddddgdd	dddddgd
d}t|| }|j| ksLtt|d d| }t|d d| }t	|| d S )Nr+   r,   r-   r.   rW   r/   r0   r1   r2   r5   r6   r7   )
r8   r9   r   r;   r<   r^   rA   r   r=   r   )r^   ri   rh   rD   rt   r   r"   r"   r#   ,test_series_groupby_cumfunc_with_named_index  s    $r   c                  C   s   t dddddg} | | }tj| dd}||}| | d }||d }||f||ffD ]\}}t| |  t| |  t| |  t|	 |	  t|
 |
  t| |  t| |  t| |  qbd S )Nr+   r,   r]   )r8   r@   r=   r;   r<   r   r   r   r   r   r   r   r   r   )r   Zpd_groupssr   Z	pd_group2Zdask_group2dgZpdgr"   r"   r#   test_series_groupby  s    

r   c               	   C   s   t dddddg} tj| dd}d}tt}| g  W 5 Q R X |t|j	ksXt
tt}|g  W 5 Q R X |t|j	kst
tj| dd}tt || W 5 Q R X tt | d W 5 Q R X tt |d W 5 Q R X d S )Nr+   r,   r]   zNo group keys passed!rW   r6   )r8   r@   r;   r<   rM   rN   r   r=   rP   rQ   rA   rR   rO   )r   r   rT   rU   Zsssr"   r"   r#   test_series_groupby_errors  s     r   c                  C   sv   t  } tj| dd}t| j| jj	 |j|jj	 dd t| | jjj	 ||jjj	 dd d S )Nr,   r]   FZcheck_names)
r   makeTimeDataFramer;   r<   r   rL   r=   r\   monthr   rh   rD   r"   r"   r#   test_groupby_index_array  s    r   c                      s0   t  tjdd tt fdd d S )Nr,   r]   c                      s    j jjddS )NFZas_index)r=   r\   r   r"   rD   rh   r"   r#   rb   *  rc   z(test_groupby_set_index.<locals>.<lambda>)r   r   r;   r<   rM   rN   	TypeErrorr"   r"   r   r#   test_groupby_set_index'  s    r   emptyc                    s  | r.t jdgdgddgdjd d }g }nTdddg}t jdddddddd	d
g	ddd
d	d	ddddg	dddd	ddddddg	d}tj|d	d d j|jf jd |jd ffD ]\}}t |j	 ||j	  t |j
 ||j
  t |j ||j  t |j ||j  t |j ||j  t |j ||j  t |j ||j  t |j ||j  t |j ||j  t |j ||j  |D ]R}t |j|||j| t |j|||j| qt | ||  t |	 ||	  t |
 ||
  t | ||  t | ||  t | ||  t | ||  t | ||  t | ||  |D ]R}t |||||dd t |||||dd qzq j|jf jd |jd ffD ]\}}t j| |j| dd t j|
 |j|
 dd t j| |j| dd t j| |j| dd t j| |j| dd t j| |j| dd t j| |j| dd t j| |j| dd |D ]R}t j|||j|| t j|||j|| q.qdD ]}t  j|kj ||j|kj  t  j|kj	 ||j|kj	  t  j|kj
 ||j|kj
  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj	 ||j|kj	  t  j|kj
 ||j|kj
  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|kj ||j|kj  t  j|k ||j|k  t  j|k	 ||j|k	  t  j|k
 ||j|k
  t  j|k ||j|k  t  j|k ||j|k  t  j|k ||j|k  t  j|k ||j|k  t  j|k ||j|k  t  j|k ||j|k  t  j|k ||j|k  t  j|k	 ||j|k	  t  j|k
 ||j|k
  t  j|k ||j|k  t  j|k ||j|k  t  j|k ||j|k  t  j|k ||j|k  t  j|k ||j|k  t  j|k ||j|k  |D ]4}t  j|k|||j|k| qؐqd j|jf jd |jd f jd	k|jd	kffD ]\}}t |j ||j  t |j	 ||j	  t |j
 ||j
  t |j ||j  t |j ||j  t |j ||j  t |j ||j  t |j ||j  t | ||  t | ||  t | ||  t |	 ||	  t |
 ||
  t | ||  t | || t t | ||  t | ||  t | ||  t | ||  |D ],}t |j|||j| qqFt dj jt dj jkstt  jd	kj jt  jd	kj jksVttt fdd tt fdd tt fdd tt fdd tt fdd t dj d t dj	 d t dj
 d t dj d t dj d  t dj  d! t dj d" t dj d# t dj d$ t dj d% t dj d& t dj d t dj d t dj d' t dj d( t d d) t d	 d* t d
 d+ t d d, t d d- t d d. t d d/ t d d) t d d, t d d0 d S )1N      ?rX   r   r[   r+   r,   r/   r.   r-   r0   rW   r1   r2   r]   r   Fry   r   )r   r.   r0   )rY   rY   rZ   c                      s
     dS Nr6   r`   r"   ra   r"   r#   rb     rc   z4test_split_apply_combine_on_series.<locals>.<lambda>c                      s     ddgS NrY   r6   r`   r"   ra   r"   r#   rb     rc   c                      s     dd S r   r`   r"   ra   r"   r#   rb     rc   c                      s     dd S )NrY   )rZ   r6   r`   r"   ra   r"   r#   rb     rc   c                      s     dddg S )NrY   rZ   r6   r`   r"   ra   r"   r#   rb     rc   zseries-groupby-sumzseries-groupby-minzseries-groupby-maxzseries-groupby-countzseries-groupby-varzseries-groupby-covzseries-groupby-firstzseries-groupby-lastzseries-groupby-tailzseries-groupby-headzseries-groupby-prodzseries-groupby-nuniquezseries-groupby-sizezdataframe-groupby-sumzdataframe-groupby-minzdataframe-groupby-maxzdataframe-groupby-countzdataframe-groupby-firstzdataframe-groupby-lastzdataframe-groupby-prodzdataframe-groupby-size)!r8   r9   ilocr;   r<   rZ   r   r=   rY   r   r   r   r   r   r   r   r   tailheadr   r   r   r   r   r   sortedr}   rA   rM   rN   rO   r   r   )r   rC   Zddofsr   r   ddofir"   ra   r#   "test_split_apply_combine_on_series-  s   $
,,""""""""""&**          &.
..... ........... ......****************** 
""""""""$.r   keywordsplit_every	split_outc                 C   s  t dddddddddg	d dddddddddg	d d}tj|d	d
}dd }tD ]`}|dkrdqV||jddd|f| di}||d|}t|| ||d|j|jksVtqV||jddddfddi| di}||dddd}t|| ||ddddj|jksttD ]n}|dkr2q ||jdddj	|f| di}||dj	|}t|| ||dj	|j|jks tq ||jdddj	dfddi| di}||dj	ddd}t|| ||dj	dddj|jksttD ]x}|dkrq||j	j|j
dd|f| di}||j	|j
|}t||dd ||j	|j
|j|jkstq||j	j|j
dddfddi| di}||j	|j
ddd}t|| ||j	|j
dddj|jkstd S )Nr+   r,   r/   r.   r-   r0   d   rX      r]   c                 [   s   t | |f |S r%   r   r   mkwargsr"   r"   r#   call  s    z*test_groupby_reduction_split.<locals>.callr   r   r   rZ   Fsortr   r   r   r   r   )r8   r9   r;   r<   	AGG_FUNCSr=   r   rr   rA   rY   rZ   )r   rC   rD   r   r   r   r   r"   r"   r#   test_groupby_reduction_split  sL    4
$
"

$&
$
 &(
r   groupedc                 C   s
   |  dS NrL   r`   rg   r"   r"   r#   rb   G  rc   c                 C   s   |  | d S r   r`   rg   r"   r"   r#   rb   H  rc   c                 C   s   |  | d d S NrL   r+   r`   rg   r"   r"   r#   rb   I  rc   c                 C   s   |  dd S NrL   Br`   rg   r"   r"   r#   rb   J  rc   c                 C   s   |  dd S r   r`   rg   r"   r"   r#   rb   L  rc   c                 C   s   |  | d d S r   r`   rg   r"   r"   r#   rb   M  rc   c                 C   s   |  | d d d S )NrL   r+   r   r`   rg   r"   r"   r#   rb   N  rc   c                 C   s   | j | d S r   r   r=   rg   r"   r"   r#   rb   P  rc   c                 C   s   | j | d d S r   r   rg   r"   r"   r#   rb   Q  rc   c                 C   s   |  dddg S NrL   r   Cr`   rg   r"   r"   r#   rb   S  rc   c                 C   s   |  | d ddg S r   r`   rg   r"   r"   r#   rb   T  rc   c                 C   s   |  | d d ddg S )NrL   r+   r   r   r`   rg   r"   r"   r#   rb   U  rc   c                 C   s   |  dd S )Nc                 S   s   |   S r%   r   r6   r"   r"   r#   rb   [  rc   <lambda>.<locals>.<lambda>ro   grpr"   r"   r#   rb   [  rc   c                 C   s   |  dd S )Nc                 S   s   |   S r%   r   r   r"   r"   r#   rb   \  rc   r   	transformr   r"   r"   r#   rb   \  rc   c              	   C   sx   t ddddgd tjdtjdtjdd}t|d}tt	  t
|| ||| | W 5 Q R X d S )Nr+   r,   r-   r.   rW      rL   r   r   D)r8   r9   r   r   r   r;   r<   rM   rp   rq   r   )r   ri   rC   rD   r"   r"   r#   test_apply_or_transform_shuffleD  s    


r  c                 C   s   dS NAAr"   rg   r"   r"   r#   rb   q  rc   c                 C   s   ddgS Nr  ABr"   rg   r"   r"   r#   rb   r  rc   c                 C   s   | d S r  r"   rg   r"   r"   r#   rb   s  rc   c                 C   s   | d | d gS r	  r"   rg   r"   r"   r#   rb   t  rc   c                 C   s   | d d S )Nr  r+   r"   rg   r"   r"   r#   rb   u  rc   c                 C   s   | d d | d d gS )Nr  r+   r
  r"   rg   r"   r"   r#   rb   w  rc   NotImplementedc                 C   s   |  dd S )Nc                 S   s   |   S r%   r   r   r"   r"   r#   rb     rc   r   r   r   r"   r"   r#   rb     rc   c                 C   s   |  dd S )Nc                 S   s   |   S r%   r   r   r"   r"   r#   rb     rc   r   r  r  r"   r"   r#   rb     rc   c              	   C   s   t ddddgd ddddgd tjdtjdtjdd}t|d}tt	 t
||| |||| | t
||| |d ||| |d  t
||| |dd	g ||| |dd	g  W 5 Q R X d S )
Nr+   r,   r-   r.   rW   r  )r
  r  r   r   r  r   r   )r8   r9   r   r   r   r;   r<   rM   rp   rq   r   r=   )r   ri   rC   rD   r"   r"   r#   *test_apply_or_transform_shuffle_multileveln  s&    


	& r  c                  C   s   t ddddgddddgddddgd} tj| dd}t|d | d  t|ddg | ddg  t|djdd	 tttdd
| ddd	  d S )Nr   r+   r,   r-   r.   )r   r+   r,   r]   c                 S   s   | S r%   r"   r   r"   r"   r#   rb     rc   z+test_numeric_column_names.<locals>.<lambda>r{   c                 S   s   | S r%   r"   r   r"   r"   r#   rb     rc   )	r8   r9   r;   r<   r   r=   r   ro   r   r   r"   r"   r#   test_numeric_column_names  s    *&r  c              
   C   s*  | dkrt d t }|jd |d< |jd |d< tj|dd}dd	 d
d	 fD ]}|||	t
}t t |||	t
}W 5 Q R X t||  tdd |jD rt|||j	t
}t t |||j	t
}W 5 Q R X t||  tdd |jD rTtqTd S )NZdiskzTasks-only shuffle testg?rL   r   r3   r]   c                 S   s   dS r   r"   r   r"   r"   r#   rb     rc   z*test_groupby_apply_tasks.<locals>.<lambda>c                 S   s   | j S r%   )rL   r   r"   r"   r#   rb     rc   c                 s   s   | ]}d |d kV  qdS Zpartdr   Nr"   .0kr"   r"   r#   	<genexpr>  s     z+test_groupby_apply_tasks.<locals>.<genexpr>c                 s   s   | ]}d |d kV  qdS r  r"   r  r"   r"   r#   r    s     )rM   skipr   r   rL   r   r;   r<   r=   ro   r   rp   rq   r   rS   anyr}   rA   )r&   rh   rD   indrY   rZ   r"   r"   r#   test_groupby_apply_tasks  s"    
 r  c               	   C   s   t dddddgdddddgd} tj| dd	}tjjd
d: t|dj	dd t
tdd| d	dd  W 5 Q R X d S )Nr+   r,   r-   r.   rW   1rY   rL   r   r]   Z	processesr   r   c                 S   s   | S r%   r"   r   r"   r"   r#   rb     rc   z.test_groupby_multiprocessing.<locals>.<lambda>r{   c                 S   s   | S r%   r"   r   r"   r"   r#   rb     rc   )r8   r9   r;   r<   r}   r   r   r   r=   ro   r   objectr   r"   r"   r#   test_groupby_multiprocessing  s    $r  c                  C   s  t jddddddddd	g	ddddddd
d
d
g	dd
dddddd	d	d	g	d} tj| dd}|djdkslt||d jdkst||d dkjj|d dkjkst|ddgjddgkst||d |d gjddgkst||d dgjddgkstd S )Nr+   r,   r-   r.   rW   r/   r0   r1   r2   r   rX   r[   r]   rY   rZ   )r8   r9   r;   r<   r=   byrA   rr   )r   rw   r"   r"   r#   test_groupby_normalize_by  s    ,($r  c                 C   s   | }|dkrd S t jddddgddddgddddgddddgdddd	d
gd}tj|dd}|d	d
g|}|dkr|t}| dkrddini }t||d	d
gj|f| d S )Nr   r+   r-   r.      r   rv   rZ   rY   rw   r   r]   >   r   r   r   shuffleZtasks)	r8   r9   r;   r<   r=   r   r   r   r   )r$   specrC   rD   rt   r  r"   r"   r#   %test_aggregate__single_element_groups  s    ,

r!  c            	      C   s   ddl m}  ddg}dddg}| |\}}}| |\}}}t|t|ksNtt|t|ksbtt|t|ksvtt|t|kstdS )zAggregate reuses intermediates. For example, with sum, count, and mean
    the sums and counts are only calculated once across the graph and reused to
    compute the mean.
    r   )_build_agg_args)foor   input)barr   r$  )Zbazr   r$  N)Zdask.dataframe.groupbyr"  r   rA   )	r"  Zno_mean_specZwith_mean_specZno_mean_chunksZno_mean_aggsZno_mean_finalizersZwith_mean_chunksZwith_mean_aggsZwith_mean_finalizersr"   r"   r#   5test_aggregate_build_agg_args__reuse_of_intermediates  s    
r&  c                     s  t ddg  fdd} ddiddd	d
dddgd
ddddddgdddddgddddddddddg}tjdddddddddg	d dddddddddg	d dddddddddg	d dddddddddg	d ddddd gd!}tj|dd"}|D ]}|ddgj|dd#}|ddgj|dd#}| |}| |}t|d t|d t	|t
rlt|jd$sltt|d t|d kst|D ]~}	t	|tt	|	tkrt	|t
t	|	t
kr|ddgj|	dd#}
t|
jt|jkstt|
jt|jkstqqd S )%Ndask_holderr}   c                    s    dd | j  D S )Nc                 S   s0   i | ](\}}|d   drd|d  kr||qS )r   	aggregatez-chunk-)rs   r  r  vr"   r"   r#   
<dictcomp>  s
     z9test_aggregate_dask.<locals>.<lambda>.<locals>.<dictcomp>)r}   items)r?   r'  r"   r#   rb     s   z%test_aggregate_dask.<locals>.<lambda>rv   r   r   r   rX   rZ   rv   r   r   r   r   r   r   r   r   r+   r,   r-   r.   r0   r   r   rW   r/   r1   r   rZ   rY   rw   r   r]   )r   r   )collections
namedtupler8   r9   r;   r<   r=   r   r   r>   r   r   r}   rA   pickledumpsr:   r   )Zget_agg_daskspecsrC   rD   r   Zresult1Zresult2Z	agg_dask1Z	agg_dask2Z
other_specotherr"   r-  r#   test_aggregate_dask  sl    	 
	


"r5  r+   r1   r,       c                 C   s   t jdddddddddg	d dddddddddg	d ddddddd	dd
g	d dddddddd	dg	d dddddgd}tj|dd}dddgd}|jddgddj|||| d}|ddg|}|d |d j|d< t|| d S )Nr+   r,   r-   r.   r0   r   r   rW   r/   r1   r   rv   rZ   rY   rw   r   r]   r   r   r   r.  Fr   )r   r   r  rZ   r   )	r8   r9   r;   r<   r=   r   r   dtyper   )r&   r   r   rC   rD   r   r   expectr"   r"   r#   test_shuffle_aggregatek  s&    
	   r:  r   c                 C   s"  t jdddddddddg	d dddddddddg	d ddddddd	dd
g	d dddddddd	dg	d dddddgd}tj|dd}dddgd}|jd|dj|d| d}|jd|d|}|d |d j|d< t|| |rt	
t$ |jddg|dj|d| d W 5 Q R X d S )Nr+   r,   r-   r.   r0   r   r   rW   r/   r1   r   rv   rZ   rY   rw   r   r]   r   r   r   r.  r   r   r  r7  )r8   r9   r;   r<   r=   r   r   r8  r   rM   rN   rR   )r&   r   rC   rD   r   r   r9  r"   r"   r#   test_shuffle_aggregate_sort  s*    
	
  r<  c                 C   s(  t jdddddddddg	d dddddddddg	d ddddddd	dd
g	d dddddddd	dg	d dddddgd}tj|dd}dddgd}|dj|ddj}tdd |jD rt	t
t |dj|ddd W 5 Q R X |jdddj|dddj}tdd |jD s$t	d S )Nr+   r,   r-   r.   r0   r   r   rW   r/   r1   r   rv   rZ   rY   rw   r   r]   r   r   r   r.  r   c                 s   s   | ]}d |kV  qdS r  Nr"   r  lr"   r"   r#   r    s     z2test_shuffle_aggregate_defaults.<locals>.<genexpr>r   r   Fr   c                 s   s   | ]}d |kV  qdS r>  r"   r?  r"   r"   r#   r    s     )r8   r9   r;   r<   r=   r   r}   r  ZlayersrA   rM   rN   r   )r&   rC   rD   r   r   r"   r"   r#   test_shuffle_aggregate_defaults  s     
	 rB  r   rv   r.  keysrY   rw   c                 C   s  t jdddddddddg	d dddddddddg	d ddddddd	dd
g	d dddddddd	dg	d dddddgd}tj|dd}||j| |d}||| }t|| tjt	dd ||j| dd W 5 Q R X tjt	dd ||j
dd W 5 Q R X d S )Nr+   r,   r-   r.   r0   r3   r   rW   r/   r1   r   rv   rZ   rY   rw   r   r]   )r  zmust use shufflrk   F)r8   r9   r;   r<   r=   r(  r   rM   rN   r   r   )r   rC  r&   rC   rD   actualrt   r"   r"   r#   test_aggregate_median  s     
	
rE  axis
group_keysmethodffillbfilllimitr.   c              
   C   s  t ddddgddddgtjdtjtjgdtjdtjgdtjdtjgd}tj|dd	}t|jd
|djd| d|jd
|djd| d t|jd
|dj	d|jd
|dj	d t|jd
dg|dd|jd
dg|dd t|jd
|dj||| d|jd
|dj||| d t|jd
dg|dj||| d|jd
dg|dj||| d t
t |d
d
di W 5 Q R X t
t |d
t jtd W 5 Q R X t
t |d
t j W 5 Q R X d S )Nr+   r,   r-   r.   rW   r/   r0   rL   r   r   r  Er]   rL   rG  r   rF  r   )rH  rK  rF  )r8  )r8   r9   r   nanr;   r<   r   r=   fillnar   rM   rN   rR   r@   r   )rF  rG  rH  rK  rh   rD   r"   r"   r#   test_fillna  sf    

	        	"rR  c               
   C   s   t ddddgddddgtjdtjtjgdtjdtjgdtjdtjgd} tj| dd	}t| d
 |d
  t| d
j	 |d
j	  t| d
dg |d
dg  d S Nr+   r,   r-   r.   rW   r/   r0   rL  r]   rL   r   )
r8   r9   r   rP  r;   r<   r   r=   rI  r   r   r"   r"   r#   
test_ffill  s*    

	rT  c               
   C   s   t ddddgddddgtjdtjtjgtjdtjdgtjdtjdgd} tj| dd	}t| d
 |d
  t| d
j	 |d
j	  t| d
dg |d
dg  d S rS  )
r8   r9   r   rP  r;   r<   r   r=   rJ  r   r   r"   r"   r#   
test_bfill&  s*    

	rU  c                 C   s   dgS rd   r"   rg   r"   r"   r#   rb   B  rc   c                 C   s   ddgS r   r"   rg   r"   r"   r#   rb   C  rc   c                 C   s   | d S rd   r"   rg   r"   r"   r#   rb   D  rc   c                 C   s   | d | d gS r   r"   rg   r"   r"   r#   rb   E  rc   c                 C   s   | d dk| d dkgS r   r"   rg   r"   r"   r#   rb   F  rc   c           
      C   s  |dk}dd }t jdddddddddg	d	 dddddddddg	d	 d
ddddddddg	d	 d
ddddddddg	d	 dddddgd}tj|d	d}|dkrt||j| ||dd |||j| ||dd ||dd |dkr|dkr|dkrtd t||j| ||dddg |||j| ||dddg ||dd |dkr||j| ||d| }t	t
|j}|| }||j| ||d||dd }	|	 }	t	t
|	j}|	| }	t||	 n8t||j| ||d|||j| ||d||dd d S )Nr+   c                 [   s   t | |f |S r%   r   r   r"   r"   r#   r   M  s    z4test_dataframe_aggregations_multilevel.<locals>.callr,   r/   r.   r-   r0   r3   r   rW   r1   )rY   rZ   rw   rv   rv   rZ   rY   rw   r   r]   r   r   rA  r   z(https://github.com/dask/dask/issues/9509)r8   r9   r;   r<   r   r=   rM   r  
sort_indexr   r:   r   rS   )
r   r   r$   r   r   rC   rD   rh   colsZdddfr"   r"   r#   &test_dataframe_aggregations_multilevel?  sp    






rX  c                 C   s   | d S rd   r"   rg   r"   r"   r#   rb     rc   c                 C   s   | d | d gS r   r"   rg   r"   r"   r#   rb     rc   c                 C   s   | d dk| d dkgS r   r"   rg   r"   r"   r#   rb     rc   c                 C   s   |dk}dd }|dkrdS t jdddddddd	d
g	d ddd
d	d	ddddg	d dddd	dddd
dg	d ddddgd}tj|dd}t||d j| ||d|||d j| ||d||dd|dkd dS )zp
    similar to ``test_dataframe_aggregations_multilevel``, but series do not
    support all groupby args.
    r+   c                 [   s   t | |f |S r%   r   r   r"   r"   r#   r     s    z1test_series_aggregations_multilevel.<locals>.callr   Nr,   r/   r.   r-   r0   r3   r   rW   r1   rY   rZ   rv   rv   rZ   rY   r   r]   r   rA  >   r   r   r   r8   r9   r;   r<   r   r=   )r   r   r$   r   r   rC   rD   r"   r"   r#   #test_series_aggregations_multilevel  s,    	r[  c                 C   s   | d S rd   r"   rg   r"   r"   r#   rb     rc   c                 C   s   | d dkS NrY   r,   r"   rg   r"   r"   r#   rb     rc   c                 C   s   | d | d gS r   r"   rg   r"   r"   r#   rb     rc   c                 C   s   | d dkgS r\  r"   rg   r"   r"   r#   rb     rc   c                 C   s   | d dk| d dkgS r   r"   rg   r"   r"   r#   rb     rc   z/index dtype does not coincide: boolean != emptygroup_and_slicec                 C   s   |  || S r%   r`   rh   r   r"   r"   r#   rb     rc   c                 C   s   | d  || S Nrv   r`   r^  r"   r"   r#   rb     rc   c                 C   s   |  || d S r_  r`   r^  r"   r"   r#   rb     rc   c                 C   s   t jdddddddddg	d dddddddddg	d dddddd	ddd
g	d ddddgd}tj|dd}| || d}| ||j }| ||j d}t|| t|| d S Nr+   r,   r/   r.   r-   r0   r3   r   rW   r1   rY  rv   rZ   rY   r   r]   )	r8   r9   r;   r<   r   r   rH   Z_meta_nonemptyr   )r]  r   rC   rD   rt   r|   Zmeta_nonemptyr"   r"   r#   test_groupby_meta_content  s    	
ra  c                  C   sr  t jdddddddddg	d dddddddddg	d dddddd	ddd
g	d ddddgd} tj| dd}tj| dd}|ddg ||d |d g tt ||d  W 5 Q R X tt ||d |d g W 5 Q R X tt ||d |d g W 5 Q R X tt ||d |d g W 5 Q R X tt ||d dg W 5 Q R X d S r`  )r8   r9   r;   r<   r=   rM   rN   rR   )rC   Zddf3Zddf7r"   r"   r#   test_groupy_non_aligned_index  s*    	   rb  c                  C   s  t jdddddddddg	d dddddddddg	d dddddd	ddd
g	d ddddgd} tj| dd} | d }|| |||g tt |d W 5 Q R X tt ||dg W 5 Q R X tt ||  W 5 Q R X tt ||| g W 5 Q R X d S )Nr+   r,   r/   r.   r-   r0   r3   r   rW   r1   rY  rv   rZ   rY   r   r]   r#  )	r8   r9   r;   r<   r=   rM   rN   rO   r   )rh   r   r"   r"   r#    test_groupy_series_wrong_grouper  s&    	
rc  r  rW   c           
      C   s   t tdd tdd}t|| }|jd|dkdjj	||d}|
|j| }dd	lm} ||\}}	|j|ksttd
d | D | kstt||dj	  d S )Nr   r3   r5   r6   r+   r   )r   r   r   )get_depsc                 S   s   g | ]\}}|s|qS r"   r"   r)  r"   r"   r#   
<listcomp>?  s      z/test_hash_groupby_aggregate.<locals>.<listcomp>)r8   r9   r   arangeonesr;   r<   r=   r7   r   Z__dask_optimize__r}   Z__dask_keys__Z	dask.corerd  r^   rA   r   r,  r   )
r^   r   r   rh   rD   r   r   rd  ZdependenciesZ
dependentsr"   r"   r#   test_hash_groupby_aggregate.  s      rh  c               	   C   s   t tdd tddddddgd d	} tj| dd
}|jddgddjj	dd}| ddgj	 }t
||dd d S )Nr   r3   r+   r,   r-   r.   rW   r  r6   r7   zr]   r6   r7   Fr   r=  ry   )r8   r9   r   rf  rg  r;   r<   r=   rj  r   r   )rh   rD   r   rt   r"   r"   r#   #test_split_out_multi_column_groupbyD  s    (rk  c               	   C   s   t jtddddgddddgddd} | d jdksBt| jddd	jdd
jdksbt| jddd	jdd
jdkstt	t
 | jddd
 W 5 Q R X d S )Nr+   r,   r-   r.   r  r]   rL   Fr   r=  )r;   r<   r8   r9   r=   r   r^   rA   rM   rN   r   ra   r"   r"   r#   test_groupby_split_out_numQ  s       rl  c               	   C   s   t jtddddgddddgddd} tt | jddd W 5 Q R X tt | jddd	 W 5 Q R X tt | jdd
d W 5 Q R X tt | jddd W 5 Q R X d S )Nr+   r,   r-   r.   r  r]   rL   rO  )levelFr   T)squeeze)r;   r<   r8   r9   rM   rN   r   r=   ra   r"   r"   r#   test_groupby_not_supported_  s     ro  c                  C   sX   t dddgdddgd} tj| dd}t||jd  | | jd   d S )	Nr#  r%  r+   r,   r-   )rL   r   r]   r   )r8   r9   r;   r<   r   r=   rL   r   r   r"   r"   r#   test_groupby_numeric_columnm  s    rp  selrZ   keyr   r   r   c                    s  t s0| dks0ddgkr0 ddgkr0tjdd tjdddd	d	dd	d
dg	d d	ddd
d
ddddg	d tjdtjddddddgd}tj|j	dddgdf< t
j|dd} fdd||fD \}}tt||  t||   | dkrtjtdd |jdd W 5 Q R X d S )Nr   rY   rZ   zFcumsum and cumprod will raise DataError: No numeric types to aggregater(   r+   r,   r/   r.   r-   r0   6   r   rv   rw   r   iir   r3   r]   c                 3   s   | ]}|   V  qd S r%   r`   )r  rw   rr  rq  r"   r#   r    s     z"test_cumulative.<locals>.<genexpr>J`axis` keyword argument is deprecated and will removed in a future releaserk   r   rO  )r   rM   xfailr8   r9   r   r   r   rP  r   r;   r<   r   r   rp   FutureWarningr   )ri   rr  rq  rh   rD   r   r   r"   ru  r#   test_cumulativet  s:    




	
ry  c               
   C   sP   t ddddddddgi} tj| dd}t| dj |dj  d S )Naar+   r,   r-   r.   r]   )r8   r9   r;   r<   r   r=   rz  r   r   r"   r"   r#   /test_series_groupby_multi_character_column_name  s    r{  c                 C   s   t dddddddddg	d tjdtjdd}tj|jd	< tj|dd
}t	t
|d| ddt
|d| dd tjtdd t
|dj| dd W 5 Q R X tjtdd |djdd W 5 Q R X d S )Nr+   r,   r/   r.   r-   r0      rY  )rt  r   r]   rY   rO  z&No axis named 1 for object type Seriesrk   rv  )r8   r9   r   r   r   rP  r   r;   r<   r   r   r=   rM   rN   r   rZ   rp   rx  r   )ri   rh   rD   r"   r"   r#   test_cumulative_axis1  s&    

 "r}  c               
   C   s<  t tjdddtjdtjdd} tj| dd}| | jdk  }||jdk  }|	|j
}|j	|j
}| | |j |d| | | |dg}|D ]"}tt |  W 5 Q R X qdd	 }|	| j
}	|j||d
|	|f|jj||jd
|	j|fg}
|
D ]\}}t|| q"d S )Nr   r3   2   rY  rW   r]         ?c                 S   s   | d S Nr+   r"   r   r"   r"   r#   add1  s    z*test_groupby_unaligned_index.<locals>.add1r{   )r8   r9   r   r   randintr   r;   r<   rZ   r=   rY   r   r   r   r   rM   rN   r   rS   ro   r   )rh   rD   filteredZ	dfiltered	ddf_groupZds_groupbadr?   r  Zdf_groupZgoodr   r   r"   r"   r#   test_groupby_unaligned_index  s<    

r  c                  C   s   t dddgdddgdddgd} tjt | dd	}|d
}|d }t jddgddgddgdt ddgd}t	|| d S )Nr+   r.   r,   r-   rW   r/   r0   )r#  r   r   r]   r#  r   r[   )
r8   r9   r;   r<   r=   r   rS   Indexr	   Zassert_frame_equal)rh   rD   r  r   rt   r"   r"   r#   test_groupby_string_label  s    $
 r  c            
      C   s   t jttddt jdddd} d| d< d	| d
< tj| dd}ddg}|D ]|}t|dg| }|j	dddd}t|dg| }t
||\}}| |  }}	||st||	sLtqLdS )zaTest caching behavior of cumulative operations on grouped dataframes.

    Relates to #3756.
    aabbccrY   20100101r/   startperiodsr[   r+   rg  r,   twosr-   r]   r   r   rY   r#  r%  )rg  r  r   N)r8   r9   r   r:   
date_ranger;   r<   r   r=   renamer}   rS   equalsrA   )
rh   rD   opsopZddf0Zddf1res0_ares1_ares0_bres1_br"   r"   r#   "test_groupby_dataframe_cum_caching  s      r  c            
      C   s   t jttddt jdddd} d| d< d	| d
< ddg}|D ]}tj| dd}|dg}t	t
|d | t
|d
 | \}}| dg}t
|d | t
|d
 |  }}	||st||	s>tq>dS )z[Test caching behavior of cumulative operations on grouped Series

    Relates to #3755
    r  r  r  r/   r  r[   r+   rg  r,   r  r   r   r-   r]   rY   N)r8   r9   r   r:   r  r;   r<   r=   r}   rS   r   r  rA   )
rh   r  r  rD   Zdcumr  r  Zcumr  r  r"   r"   r#   test_groupby_series_cum_caching  s$      "r  c                  C   sl   t ddddgddddgd} tj| dd}|dd	 d
dg}| dd	 d
dg}t|| d S )Nr+   r,   r-   r.   rW   rX   r]   rY   rZ   r   r   r8   r9   r;   r<   r=   r   r   rw   rY   r   rt   r"   r"   r#   test_groupby_slice_agg_reduces$  s
     r  c                  C   sb   t dddddgi} tj| dd}|dd ddg}| dd ddg}t|| d S )	NrY   r+   r,   r-   r.   r]   r   r   r  r  r"   r"   r#   test_groupby_agg_grouper_single,  s
    r  slice_c                 C   sl   t ddddgddddgd}tj|dd}|d|  dd	g}|d|  dd	g}t|| d S )
Nr+   r,   r-   r.   rX   r]   rY   r   r   r  )r  rw   rY   r   rt   r"   r"   r#   !test_groupby_agg_grouper_multiple6  s
     r  c                 C   s"  dd }t ddddddgddddddgtdddddddgdd}tj||j d}tj||j d	d
}||	ddg| }| dkr|
t}||	ddg| }t|| ||	ddg| }t|| dddddddddh	}| |kr|	ddg| }t|| ||	ddg| }| dkr>|
t}||	ddg| }t|| ||	ddg| }t|| | |kr|	ddg| }t|| ||	d| }| dkr|
t}||	d| }t|| ||	d| }t|| | |kr|	d| }t|| d S )Nc                 [   s   t | |f |S r%   r   r   r"   r"   r#   r   U  s    z5test_groupby_column_and_index_agg_funcs.<locals>.callr+   r,   r/   )idxrY   rZ   rv   r  r]   Fr^   r   rY   >   r   r   r   r   r   r   r   r   r   r   r   )r8   r9   r   rf  r   r;   r<   r\   r   r=   r   r   r   r   )r$   r   rh   rD   ddf_no_divsrt   r   Zaca_aggr"   r"   r#   'test_groupby_column_and_index_agg_funcsA  sV    	















r  
group_argsr  
apply_funcc                 C   s   t | jt | j S r%   )r   r   valuesr   )r   rF  r"   r"   r#   rb     rc   c              	   C   s4  t ddddddgddddddgtddd}tj||j d}tj||j dd}|	| j
|d	d
}|	| j
|d	|d}t||dd |j|jkstt|jt|j|j kst|	| j
|d	d
}|	| j
|d	|d}t||dd |j|jkstt|jt|j|j ks0td S )Nr+   r,   r/   )r  rY   rZ   r  r]   Fr  r   rO  )rF  r|   )Zcheck_divisions)r8   r9   r   rf  r   r;   r<   r\   r   r=   ro   r   Z	divisionsrA   r   r}   r^   )r  r  rh   rD   r  rt   r   r"   r"   r#   #test_groupby_column_and_index_apply  s"    (r  c                 C   s   |   |  fS r%   r   r   r   r"   r"   r#   rb     rc   c                 C   s   |   |  fS r%   r   s0s1r"   r"   r#   rb     rc   c                 C   s   ||  S r%   r"   r  r"   r"   r#   rb     rc   c                 C   s   |   S r%   r   r  r"   r"   r#   rb     rc   c                 C   s   |   S r%   r   r  r"   r"   r#   rb     rc   z#pandas_spec, dask_spec, check_dtypec                 C   sd   t dddgd dddgd d}tj|dd}|d| }|d|}t|||d d S )	Nr   r+   r-   r,   r   rZ   r]   r   ry   r8   r9   r;   r<   r=   r(  r   )pandas_spec	dask_specrz   rh   rD   rt   r   r"   r"   r#   %test_dataframe_groupby_agg_custom_sum  s
    
$r  zpandas_spec, dask_specc                 C   st   t dddgd dddgd d}tj|dd}|d |d | }|d |d |}t||d	d
 d S )Nr   r+   r-   r,   r  r]   rZ   r   Fry   r  )r  r  rw   rY   rt   r   r"   r"   r#   #test_series_groupby_agg_custom_mean  s
    	$r  c               	   C   sz   t dddgd dddgd d} tj| dd}tddd	 d
d	 }tt |d	d|dgi W 5 Q R X dS )z7for a single input column only unique names are allowedr   r+   r-   r,   r  r]   r   c                 S   s   |   S r%   r   r  r"   r"   r#   rb     rc   zOtest_groupby_agg_custom__name_clash_with_internal_same_column.<locals>.<lambda>c                 S   s   |   S r%   r   r  r"   r"   r#   rb     rc   r   rZ   N)
r8   r9   r;   r<   AggregationrM   rN   r   r=   r(  )rw   rY   r$   r"   r"   r#   =test_groupby_agg_custom__name_clash_with_internal_same_column  s
    $r  c                  C   s   t dddgd dddgd dddgd d} tj| dd	}td
dd dd dd }|d|d
d}| ddd
d}t||dd dS )zEcustom aggregation functions can share the name of a builtin functionr   r+   r-   r,   r.   rW   r/   )r   rZ   rv   r]   r   c                 S   s   |   |  fS r%   r  r  r"   r"   r#   rb   
  rc   zTtest_groupby_agg_custom__name_clash_with_internal_different_column.<locals>.<lambda>c                 S   s   |   |  fS r%   r   r  r"   r"   r#   rb     rc   c                 S   s   ||  S r%   r"   r  r"   r"   r#   rb     rc   r   r.  r   Fry   N)r8   r9   r;   r<   r  r=   r(  r   )rw   rY   r$   r   rt   r"   r"   r#   Btest_groupby_agg_custom__name_clash_with_internal_different_column  s    0	r  c               	   C   s   dd } t ddd | dd }tdddddgd	 dddddgd	 d
dd
ddgd	 d}t j|dd}|d |d |d g|}tddgddgd
dgd}|d |d |d gd}t|| d S )Nc                 S   s   dd }|  |S )Nc                 S   s6   | j d \}| j dd  D ]\}|j|dd}q|gS )Nr   r+   )
fill_value)r   add)r   r   r   r"   r"   r#   impl  s    z=test_groupby_agg_custom__mode.<locals>.agg_mode.<locals>.implr   )r   r  r"   r"   r#   agg_mode  s    z/test_groupby_agg_custom__mode.<locals>.agg_modeZcustom_modec                 S   s   |  dd S )Nc                 S   s
   |   gS r%   )value_countsr  r"   r"   r#   rb   '  rc   Atest_groupby_agg_custom__mode.<locals>.<lambda>.<locals>.<lambda>r   r  r"   r"   r#   rb   '  rc   z/test_groupby_agg_custom__mode.<locals>.<lambda>c                 S   s   |  dd S )Nc                 S   s   | d   S )Nr   )idxmax)r   r"   r"   r#   rb   )  rc   r  )r   r  r"   r"   r#   rb   )  rc   r   r+   r-   r.   rW   r/   )g0g1ccr]   r  r  r  r   )r;   r  r8   r9   r<   r=   r   r   )r  r$   rw   rY   rD  rt   r"   r"   r#   test_groupby_agg_custom__mode  s$      r  c                 C   sx   t dddddddddg	ddddd	d
dddg	d}tj|dd}|dd | }|dd | }t|| d S )Nr+   r,   r-   r.   gEgٿgV-gS㥫?gNbX9?gtVglgClgZd;Or  r]   rL   r   r  )ri   rC   rD   rD  rt   r"   r"   r#   test_groupby_select_column_agg>  s    r  c                 C   s   | j ddS )NT)Znumeric_only)r   r   r"   r"   r#   rb   O  rc   c                 C   s   |  d S r   )r=   r   r   r"   r"   r#   rb   P  rc   c                 C   s   |  d S r   )r=   r   r   r"   r"   r#   rb   Q  rc   c                 C   s   |  d S r   )r=   r   r   r"   r"   r#   rb   R  rc   c                 C   s   |  d S r   )r=   r   r   r"   r"   r#   rb   S  rc   c                 C   s   |  dj S r   )r=   rj  r   r   r"   r"   r#   rb   T  rc   c              	   C   s^   t dddgdddgdddgd	}tj|dd
}t  | |}W 5 Q R X t|| | d S )Nr+   r,   rY   rZ   rv   g      &@g      6@g     @@ri  r]   )r8   r9   r;   r<   r   r   )ri   rh   rD   rt   r"   r"   r#   test_std_object_dtypeL  s
    $r  c                  C   sH   t dgdgd} tj| dd}tjddg }||	  d S )NrW   )r   r+   r,   r]   r   r+   )
r8   r9   r;   r<   r}   arrayZ
from_arrayZto_dask_dataframer=   r   )rh   rD   r  r"   r"   r#   test_std_columns_int`  s    r  c                  C   s6   t j jd d } t| d | d  d S )Nr,   name)r}   ZdatasetsZ
timeseriesZ
partitionsr   r=   r   rg   r"   r"   r#   test_timeseriesj  s    r  	min_countr-   c              	   C   s   t dddgtjddgtjddgdddgd	t dddgdtjtjgtjddgdddgd	g}d
d |D }t||D ]T\}}t|dj| d|dj| d t|dj| d|dj| d qzd S )NrL   r   r,   r-   rW   r/   r.   r2   )groupZval1Zval2Zval3c                 S   s   g | ]}t j|d dqS )r.   r]   )r;   r<   )r  rh   r"   r"   r#   re    s     z'test_with_min_count.<locals>.<listcomp>r  )r  )	r8   r9   r   rP  zipr   r=   r   r   )r  dfsZddfsrh   rD   r"   r"   r#   test_with_min_counto  s2    


r  c                 C   s   t ddddgddddgd}tj|ddd}|d}d	d
 }|d|}t||dj||d |jd| d|}t||jd| dj||d d S )Nr+   r,   r-   r.   rW   rX   r]   rY   c                 S   s   |   S r%   )copy)r   r"   r"   r#   rb     rc   z)test_groupby_group_keys.<locals>.<lambda>r{   rN  )r8   r9   r;   r<   r   r=   ro   r   )rG  rh   rD   rC   ri   rt   r"   r"   r#   test_groupby_group_keys  s     
 r  r   r          @g      @r  23 c                 C   s   d}d}t j||}tj|| d}dgd dgd  dgd  |d	< tj|dd
}|d	 }|d	 }t	| t j
r| }|jt d|_t||dd n
t|| d S )Nr  r-   r   r   r3   r+   rW   r,   rr  r]   OF)Zcheck_index)r   r   r   r8   r9   r;   r<   r=   r   r>   ZndarrayrS   r   r   r8  r   )r   ZrowsrW  r   rh   rD   rt   r   r"   r"   r#   test_groupby_cov  s    "r  c                  C   s   t ttdddddgddddgdd} tj| dd}t ddgd	d
gdd}| d }|d }t	|| t	|| d S )Nr.   r+   r,   r3   r  r  r  rQ   r  r]   r   r-   r  rQ   r  )
r8   r9   r:   r   r   r;   r<   r=   idxminr   rC   rD   rt   	result_pd	result_ddr"   r"   r#   test_df_groupby_idxmin  s    "
r  skipnac                 C   st   t ttdddddgtjdtjdgdd}tj|dd}|	d	j
| d
}|	d	j
| d
}t|| d S Nr.   r+   r,   皙4@333333$@r  r  r]   r  r  r8   r9   r:   r   r   rP  r   r;   r<   r=   r  r   r  rC   rD   r  r  r"   r"   r#   test_df_groupby_idxmin_skipna  s    

r  c                  C   s   t ttdddddgddddgdd} tj| dd	}t ddgddgd
d}| d }|d }t	|| t	|| d S )Nr.   r+   r,   r3   r  r  r  r-   r]   r  r  )
r8   r9   r:   r   r   r;   r<   r=   r  r   r  r"   r"   r#   test_df_groupby_idxmax  s    "
r  c                 C   st   t ttdddddgtjdtjdgdd}tj|dd}|	d	j
| d
}|	d	j
| d
}t|| d S r  r8   r9   r:   r   r   rP  r   r;   r<   r=   r  r   r  r"   r"   r#   test_df_groupby_idxmax_skipna  s    

r  c                  C   s   t ttdddddgddddgdd} tj| dd}t ddgd	d
gdd }| dd 	 }|dd 	 }t
|| t
|| d S )Nr.   r+   r,   r3   r  r  r  r]   r   r-   r  r  rQ   )r8   r9   r:   r   r   r;   r<   rn  r=   r  r   r  r"   r"   r#   test_series_groupby_idxmin  s    " 
r  c                 C   s|   t ttdddddgtjdtjdgdd}tj|dd}|	d	d
 j
| d}|	d	d
 j
| d}t|| d S Nr.   r+   r,   r  r  r  r  r]   r  rQ   r  r  r  r"   r"   r#   !test_series_groupby_idxmin_skipna	  s    

r  c                  C   s   t ttdddddgddddgdd} tj| dd	}t ddgddgd
d }| dd 	 }|dd 	 }t
|| t
|| d S )Nr.   r+   r,   r3   r  r  r  r-   r]   r  r  rQ   )r8   r9   r:   r   r   r;   r<   rn  r=   r  r   r  r"   r"   r#   test_series_groupby_idxmax#	  s    " 
r  c                 C   s|   t ttdddddgtjdtjdgdd}tj|dd}|	d	d
 j
| d}|	d	d
 j
| d}t|| d S r  r  r  r"   r"   r#   !test_series_groupby_idxmax_skipna5	  s    

r  c                  C   sx   t jd} t| jddd| jdddd}tj|dd}|dd	 	 }|dd	 	 }t
| |  d S )
N*   r-   r   r   r3   r#  r%  r]   r#  r%  )r   r   RandomStater8   r9   r  r;   r<   r=   uniquer   Zexploderngrh   rD   Zpd_gbZdd_gbr"   r"   r#   test_groupby_uniqueG	  s    r  c                  C   sp   t jd} t| jddd| jdddd}tj|dd}|d	d
 	 }|d	d
 	 }t
|| d S )Nr  r-   r   r  r.   r  r,   r]   r#  r%  )r   r   r  r8   r9   r  r;   r<   r=   r  r   r  r"   r"   r#   test_groupby_value_countsU	  s    r  periodr   r3   c                 C   s*  t dddddddddg	dddddddddg	dddddddddg	d}tj|| d	}tt8 t|d
dgj	||d|d
dgj	||d W 5 Q R X tt4 t|d
gj	||d|d
gj	||d W 5 Q R X tt4 t||j
j	||d||j
j	||d W 5 Q R X d S )Nr   r+   r,   r-   r.   rW   r/   rY  r]   rY   rv   rO  )r8   r9   r;   r<   rM   rp   rq   r   r=   shiftrv   )r^   r  rF  rC   rD   r"   r"   r#   test_groupby_shift_basic_inputa	  s,    r  c                  C   s   t dddddddddg	dddddddddg	d} tj| dd	}tt4 t| d
d j	dd|d
d j	dd W 5 Q R X d S )Nr   r+   r,   r-   r.   rW   r/   rX   r]   rY   rZ   r  )
r8   r9   r;   r<   rM   rp   rq   r   r=   r  )rC   rD   r"   r"   r#   test_groupby_shift_series~	  s    r  c                  C   s   t dddddddddg	dddddddddg	dddddddddg	d} td	d
  }tj| dd}t| | jj	dd||jj	|t
t
dd tt@ t| | jj	d| j d||jj	d|j d W 5 Q R X d S )Nr   r+   r,   r-   r.   rW   r/   rY  c                   S   s   dS r  r"   r"   r"   r"   r#   rb   	  rc   z/test_groupby_shift_lazy_input.<locals>.<lambda>r]   r  rX   )r  r|   )r  r  )r8   r9   r}   r~   r;   r<   r   r=   rv   r  r   rM   rp   rq   rZ   r   )rC   Zdelayed_periodsrD   r"   r"   r#   test_groupby_shift_lazy_input	  s"    r  zignore:`meta` is not specifiedc                  C   s   t dD ]} tt dddddgd d ddd d	d
gd d}|d }tj|dd}t|dd 	d|dd 	ddd qd S )Nr3   <   r,   r.   r-   r+   r   r     (   rY  rY   r/   r]   rZ   rv   r   r   )
r   r8   r9   r   rV  r;   r<   r   r=   r  )_rh   rD   r"   r"   r#   +test_groupby_shift_within_partition_sorting	  s    r   c               	   C   s   t jtddddddgddddddgdt jd	dd
d} tj| dd}| | jjddd}t	|||jjdd|dft
 | djddd}t	||djdd|d d S )Nr+   r,   r-   r.   rW   r/   r   rX   r  r  r[   r]   r  )r  freq)r  r  r|   rZ   )r8   r9   r   r  r;   r<   r=   r\   r  r   
CHECK_FREQ)rC   rD   Z	df_resultr"   r"   r#   test_groupby_shift_with_freq	  s    "r  transformationc                 C   s   |   S r%   r   r   r"   r"   r#   rb   	  rc   c              	   C   s   t ddddgd tjdtjdtjdd}t|d}tt	R t
|d| |d|  t
|dd	 | |dd	 |  W 5 Q R X d S )
Nr+   r,   r-   r.   rW   r  r  rL   r   )r8   r9   r   r   r   r;   r<   rM   rp   rq   r   r=   r  )r  rC   rD   r"   r"   r#   test_groupby_transform_funcs	  s"    


r  indexedc              	   C   s   t dddddgd tjdd}|r4|d	}t|| }t	t
b t|d	d
d |d	dd  t|d	d dd |d	d dd  W 5 Q R X d S )Nr+   r,   r-   r.   rW   r  r   r  r  c                 S   s   | |    S r%   r   seriesr"   r"   r#   rb   	  rc   z;test_groupby_transform_ufunc_partitioning.<locals>.<lambda>c                 S   s   | |    S r%   r   r  r"   r"   r#   rb   	  rc   rQ   c                 S   s   | |    S r%   r   r  r"   r"   r#   rb   	  rc   c                 S   s   | |    S r%   r   r  r"   r"   r#   rb   	  rc   )r8   r9   r   r   r   r   r;   r<   rM   rp   rq   r   r=   r  )r^   r  rC   rD   r"   r"   r#   )test_groupby_transform_ufunc_partitioning	  s"    &
r
  zgrouping,aggc                 C   s   | j dddS N
category_2r   
category_1Zdropr=   rg   r"   r"   r#   rb   
  rc   c                 C   s   |   S r%   r   r   r"   r"   r#   rb   	
  rc   c                 C   s   | j dddS r  r  rg   r"   r"   r#   rb   
  rc   c                 C   s
   |  dS Nr   r   r   r"   r"   r#   rb   
  rc   c                 C   s   |  ddgS Nr  r  r`   rg   r"   r"   r#   rb   
  rc   c                 C   s   |   S r%   r   r   r"   r"   r#   rb   
  rc   c                 C   s   |  ddgS r  r`   rg   r"   r"   r#   rb   
  rc   c                 C   s
   |  dS r  r  r   r"   r"   r#   rb   
  rc   c                 C   s|   t t tdt tdtjjddd}t|d}t	|| ||| | t	|| |d || |d  d S )NZAABBCCZABCABCr/   r  )r  r  rQ   r,   rQ   )
r8   r9   Categoricalr:   r   r   uniformr;   r<   r   )groupingr   rC   rD   r"   r"   r#   #test_groupby_aggregate_categoricals
  s    r  r   c                 C   s   |  ttjddS Nr+   r   r   r   r   r   r   r"   r"   r#   rb   *
  rc   c                 C   s   |  ttjddS Nr  r   r  r   r"   r"   r#   rb   +
  rc   c                 C   s   |  ttjddS r  r   r   r   r   r   r"   r"   r#   rb   ,
  rc   c                 C   s   |  ttjddS r  r  r   r"   r"   r#   rb   -
  rc   c                 C   s   t ddddddddgddddddddgd	}tj|dd
}t| |d| |d t| |dd | |dd  d S )NrW   r.   r-   r,   r+   r/   r2   r1   rX   r]   rY   rZ   rZ  r   rC   rD   r"   r"   r#   'test_groupby_aggregate_partial_function'
  s    
r  c                 C   s   |  ttjddS Nr+   )Zunexpected_argr  r   r"   r"   r#   rb   C
  rc   c                 C   s   |  ttjddS r  r  r   r"   r"   r#   rb   D
  rc   c                 C   s   t ddddddddgddddddddgd	}tj|dd
}tjtdd | |d W 5 Q R X tjtdd | |dd  W 5 Q R X d S )NrW   r.   r-   r,   r+   r/   r2   r1   rX   r]   z?supports {'ddof'} keyword arguments, but got {'unexpected_arg'}rk   rY   rZ   r8   r9   r;   r<   rM   rN   r   r=   r  r"   r"   r#   9test_groupby_aggregate_partial_function_unexpected_kwargs@
  s     r  c                 C   s   |  ttjdS NZpositional_argr  r   r"   r"   r#   rb   a
  rc   c                 C   s   |  ttjdS r  r  r   r"   r"   r#   rb   b
  rc   c                 C   s   t ddddddddgddddddddgd	}tj|dd
}tjtdd | |d W 5 Q R X tjtdd | |dd  W 5 Q R X d S )NrW   r.   r-   r,   r+   r/   r2   r1   rX   r]   z$doesn't support positional argumentsrk   rY   rZ   r  r  r"   r"   r#   7test_groupby_aggregate_partial_function_unexpected_args^
  s    r   z-dropna kwarg not supported in pandas < 1.1.0.dropnac                 C   st   t ddddd d ddgddddddd	d	gd
}tj|dd}|jd| dj }|jd| dj }t|| d S )Nr+   r,   r-   r.   r0   r1   rW   r/   r   )rY   er]   rY   r!  )r8   r9   r;   r<   r=   r"  r   r   )r!  rh   rD   dask_resultZ	pd_resultr"   r"   r#   test_groupby_dropna_pandasv
  s    (r%  r  c           	      C   s  t d}t d}|ddddd d ddgdd	gd d
dd d ddddgddddddd	d	gd}|d d|d< |j|dd}| d kr|j||dj }|j||dj }n,|j|| |dj }|j|| |dj }|dkr| }|j	j
|j	_
t|| d S )Ncudf	dask_cudfr+   r,   r-   r.   r0   r1   r   rY   rZ   r"  fr   hrW   r/   rY   rZ   rv   r"  rv   categoryrw   r]   rN  )r!  rG  r   )rM   importorskipr9   r   	from_cudfr=   r"  r   rS   r\   r  r   )	r!  r  rG  r&  r'  rh   rD   r$  Zcudf_resultr"   r"   r#   test_groupby_dropna_cudf
  s(    


r.  c           	      C   s   t d}t d}tddddddd	d
gddddddddgddddddddgd}||}t|| d}t|| d}|| }|| }t|| d S )Nr&  r'  rY   rZ   rv   rw   r"  r(  r   r)  r+   r,   r-   r.   rW   r/   r0   r1   r   r  g      @gffffff@      @gffffff@g@g!@rY  rr  )	rM   r,  r8   r9   r<   r
   r=   r   r   )	rr  r&  r'  rC   ZgdfZ
pd_grouperZ
gd_grouperr9  gotr"   r"   r#   test_groupby_grouper_dispatch
  s    


r2  z&Should work starting from pandas 1.1.0c                  C   sr   t dd dgddd gddd gd} | jdd	gd
dd}tj| dd
d}|jdd	gd
dd}t|| d S )NrY   rZ   r+   r,   g      @r/  )id1id2Zv1r3  r4  Fr#  r   r   )r8   r9   r=   r   r;   r<   r   rh   rt   rD   rD  r"   r"   r#   test_groupby_dropna_with_agg
  s    r6  c                  C   s   t t jtdtddt jddgdddgdtjjddd} | jd	d
gddd}t	
| d}|jd	d
gddd}t|| d S )Nr
  ABCDE)
categoriesr+   r,   r-   r  cat_1cat_2value_1r:  r;  Tobservedr   )r8   r9   r  r:   r   r   r  r=   r   r;   r<   r   r5  r"   r"   r#   test_groupby_observed_with_agg
  s    r?  c                  C   sz   dd t dD } dd t dD dd t dD  }t|| d}tj|dd	}t|d
j |d
j  d S )Nc                 S   s   g | ]}d qS )gE}]r"   r  r  r"   r"   r#   re  
  s     z.test_rounding_negative_var.<locals>.<listcomp>r3   c                 S   s   g | ]}d qS )r+   r"   r@  r"   r"   r#   re  
  s     rW   c                 S   s   g | ]}d qS )r,   r"   r@  r"   r"   r#   re  
  s     )idsr6   r,   r]   rA  )	r   r8   r9   r;   r<   r   r=   r6   r   )r6   rA  rh   rD   r"   r"   r#   test_rounding_negative_var
  s
    $rB  columnr"  c                 C   s   t tdddddddddgddgd ddddddddgd	d}|d
 d|d< tj|dd}|j|ddj	j
dd  }|j|ddj	j
| d  }t|| d S )Nr1   r+   r   r,   r.   ZdogcatZbirdr   rw   r+  r"  r-   r]   Fr   r=  )r8   r9   r   rf  rQ  r   r;   r<   r=   rY   r   rS   r!  r   )r   rC  rh   rD   Zddf_result_so1Z
ddf_resultr"   r"   r#   !test_groupby_split_out_multiindex
  s     
rE  backendpandasr&  c           	      C   s   t | }| dkr$t d}|j}ntj}ttjj}|d }|	t
|td tdgd}||tdtdd}||d	d
}t|d |d jdd d S )Nr&  r'  r  rW   #   Zint64r  )r6   rj  r7   r+   r]   r6   zsingle-threadedr   )rM   r,  r-  r;   r<   r   ZiinfoZuint64r   r@   Zconcatenaterf  r   r9   r   r=   r   rS   )	rF  Zdata_sourcer'  Z
data_framer   sqrtr	  rh   rD   r"   r"   r#   !test_groupby_large_ints_exception  s$    

rJ  c                    s0  t ddddd d ddgddgd dd	d d d
dddgddddddddgd}tj|dd}|j| |d}|j| |dt| } fdd}t|j }tj }	|d
 i}
d
 i} dkrt| | 	d t| |	 	d t|
|	d n&t| |  t| |	  t|
| d S )Nr+   r,   r-   r.   r0   r1   r   rY   rZ   r"  r(  r   r)  rW   r/   r*  r]   r   c                
      s*   t   t  W  5 Q R  S Q R X d S r%   )r   r   r"   r   Zgb_pdr"   r#   result_1_pd>  s    z/test_groupby_sort_argument.<locals>.result_1_pdr   r   )
r8   r9   r;   r<   r=   r   r"  r   r   r   )r  r   r   rh   rD   gbZresult_1rL  Zresult_2Zresult_2_pdZresult_3Zresult_3_pdr"   rK  r#   test_groupby_sort_argument)  s.    


rN  c              	   C   sz   t ddddddgddddddgd}tj|dd}| |jd	|d
}| |jd	|d
}t|| |rvt|j|j d S )Nr.   r,   r+   r-   rW   r/   r5   r]   r6   r   )r8   r9   r;   r<   r=   r   r\   )r   r   rh   rD   r   r  r"   r"   r#   test_groupby_sort_argument_aggT  s    (
rO  c               	   C   s   t ddddddgddddddgd} tj| dd}tj|jd	d
ddd tj|jd	dddd tjt	dd tj|d	dd W 5 Q R X t
t  tj|jd	d
dddd W 5 Q R X |jd	d
djddd
d d S )Nr.   r,   r+   r-   rW   r/   r5   r]   r6   Tr   r=  Fzsplit_out>1rk   )r  r   r   r;  )r8   r9   r;   r<   r   r   r=   rM   rp   rx  rN   rR   r   r   r"   r"   r#    test_groupby_sort_true_split_outd  s    ($rP  z(observed only supported for newer pandas
known_catsZknownunknown)rA  ordered_catsorderedZ
unordererdr=   r:  r;  r>  Z
unobservedc                    s"   dkrt d  dkr,|dkr,t d  dkrB|rBt d ttjtdtd	|d
tjddgdddg|d
tjjddd}t	
|d}| s|d j |d< |d j |d<  fdd}|dkr dkr|dkr|ddg }|ddg }t||j||d||j||d d S )N)r   r   r   z)Not implemented for DataFrameGroupBy yet.)r   r   r   r:  zGives zeros rather than nans.)r   r   z&Can't calculate observed with all nansr
  r7  )r8  rT  r+   r,   r-   r  r9  r;  c              
      sJ   t | tjjjrt}ntj}|  t|  f |W  5 Q R  S Q R X d S r%   )	r>   r8   rI   r=   rG   r   
contextlibnullcontextr   )r   r   ctxr$   r"   r#   r     s
    z8test_groupby_aggregate_categorical_observed.<locals>.aggF)r   r   r<  r=  )rM   r  r8   r9   r  r:   r   r   r  r;   r<   rD  Z
as_unknownr   r=   )rQ  rS  r$   r=   r>  rC   rD   r   r"   rX  r#   +test_groupby_aggregate_categorical_observedx  s8    



  	
rY  zrequires pandas >= 1.4.0r  c              	   C   s   t ddddgddddgddddgd}tj|dd}|d	jt jd
ddt jdttj	dddd}|d	j| t jd
ddt jdttj	dddd}t
|| d S )Nr+   r,   rW   r/   r-   r0   rY  r]   rY   rZ   r   )Zaggfuncrv   r   r5   )r  r6   r7   )r8   r9   r;   r<   r=   r   ZNamedAggr   r   r   r   )r  rh   rD   rt   rD  r"   r"   r#   test_dataframe_named_agg  s"    




rZ  r   c                 C   sz   t ddddddddgddddddddgd	}tj|dd
}|djj|dd}|djj| |dd}t|| d S )NrW   r.   r-   r,   r+   r/   r2   r1   rX   r]   rY   r   r   )r  rv   rw   )r8   r9   r;   r<   r=   rZ   r   r   )r  r   rh   rD   rt   rD  r"   r"   r#   test_series_named_agg  s    r[  c                  C   s   t jddgdd gddgd d gd d gd d gddgddgddgg	ddgd	} | dd  }tj| d
d}|dd  }t|| d S )Na1b1a3b3a5b5rL   r   r   r   r-   r]   )r8   r9   r=   r  r;   r<   r   r5  r"   r"   r#   'test_empty_partitions_with_value_counts  s"    rc  c               	   C   s   t jtdddgdddgdddgddd} tt | tjd	d
 W 5 Q R X tt | d	tjdd
g W 5 Q R X d S )NrY   rZ   rv   r+   r,   r-   )key1key2rQ   r]   rd  r0  re  )	r;   r<   r8   r9   rM   rN   rR   r=   ZGrouperra   r"   r"   r#   test_groupby_with_pd_grouper  s    rf  z/ignore:Invalid value encountered:RuntimeWarning	operationr   r   c                 C   s   t jddgdd gddgd d gd d gd d gddgddgddgg	ddgd	}t| d
}||dd }tj|dd}||dd }t|| d S )Nr\  r]  r^  r_  r`  ra  rL   r   rb  r+   r-   r]   r8   r9   operatormethodcallerr=   r;   r<   r   rg  rh   Zcallerrt   rD   rD  r"   r"   r#   1test_groupby_empty_partitions_with_rows_operation  s$    rl  c                 C   s   t jddgddgddgddgddgddgddgddgddgg	d	d
gd}t| }||d	d
 }tj|dd}||d	d
 }t|| d S Na0r]  Zb2r\  r^  r_  r`  ra  rL   r   rb  r-   r]   rh  rk  r"   r"   r#    test_groupby_with_row_operations  s$    
ro  c                 C   s   t jddgddgddgddgddgddgddgddgddgg	d	d
gd}t| }||d	|d	 dgd
 }tj|dd}||d	|d	 dgd
 }t|| d S rm  )	r8   r9   ri  rj  r=   eqr;   r<   r   rk  r"   r"   r#   ,test_groupby_multi_index_with_row_operations,  s$    
  rq  c               	   C   sd   t jddgddgddgddggdd	gd
} tj| dd}tjtdd t|d W 5 Q R X d S )Nrn  r]  r\  r^  r_  r`  ra  rL   r   rb  r+   r]   zcomputing the groupsrk   )	r8   r9   r;   r<   rM   rN   rR   r:   r=   r   r"   r"   r#   test_groupby_iter_failsD  s    	rr  c               	   C   s`   t dddgdddgd} tj| dd}tjtdd |d	jd
did d W 5 Q R X d S )Nr+   r,   r-   r.   rX   r]   zsplit_out=Nonerk   rY   rZ   r   r=  )	r8   r9   r;   r<   rM   rp   rx  r=   r   r   r"   r"   r#   !test_groupby_None_split_out_warnsS  s    rs  rd  re  	slice_keyrQ   )rQ   c                 C   s|   t dddgdddgdddgdddgd}tj|dd}|| |  }|| |  }t|jd	sntt	|| d S )
NrY   rZ   rv   r+   r,   r-   )rd  re  rQ   r-   r]   r   )
r8   r9   r;   r<   r=   r   r   r}   rA   r   )r  rt  rC   rD   r9  r1  r"   r"   r#   test_groupby_slice_getitemZ  s    ru  )N)r/  rU  ri  r1  r   	functoolsr   Znumpyr   rG  r8   rM   r}   Zdask.dataframeZ	dataframer;   r   Zdask.dataframe._compatr   r   r   r   r   r	   Zdask.dataframe.backendsr
   Zdask.dataframe.utilsr   r   r   Z
dask.utilsr   Zdask.utils_testr   r  r   Zfixturer$   r'   markrw  rF   rK   rV   ru   r   Zparametrizer    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r!  r&  r5  r:  r<  rB  r   r   rE  rR  rT  rU  rX  r[  ra  rb  rc  rh  rk  rl  ro  rp  ry  r{  r}  r  r  r  r  r  r  r  r  r   r   r  r  Zcustom_meanZ
custom_sumr  r  r  r  r  r:   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  filterwarningsr   r  r   r@   Zrankr  r   r
  r  r  r  r   utilsr%  Zgpur.  r2  r6  r?  rB  rE  rJ  rN  r   rO  rP  ZskipifrY  rZ  r   r[  rc  rf  rl  ro  rq  rr  rs  r  ru  r"   r"   r"   r#   <module>   s   

	


&!D
2
+
		

 [
;

\
2
G'"!
+



J%	


'	
 6 	
&!

" (    -"