U
    f/eN                     @   s  d dl m Z  d dlZd dlZd dlm  mZ d dlZ	d dlm
Z
mZmZmZmZmZmZ d dlmZ ejfddZejejd ejejejejejejejejejejejd ejejejd ejejd ejdZdd Zd	d
 Zdd Zdd Zejdddgdd Zejdd Zdd Z dd Z!dd Z"dd Z#dd Z$d d! Z%ejdddgejd"ddgejd#ddgd$d% Z&d&d' Z'd(d) Z(d*d+ Z)d,d- Z*d.d/ Z+d0d1 Z,d2d3 Z-d4d5 Z.ejd6d7d8d7gfd9d:d;gfd<d:d7gfd=d8d;gfgd>d? Z/d@dA Z0dBdC Z1dDdE Z2dFdG Z3dHdI Z4dJdK Z5dLdM Z6dNdO Z7ejdPdeej8edQdQdRgdSdTdQdRdRggdUdVgdWdQdRdXgdYdZfdeedQdQdRgdSdTdQdRdRgdQdRdXgd[fgd\d] Z9ejd^dej:e	j;gd_d` Z<ej=dadb Z>ejdcdddedSifdfi fgdgdh Z?ejdidddfgejd"ddgdjdk Z@ejdldejAdmdndodpdqdrdsdtgdudvdgdWdQdQdXdXdRdRdwdwgfdejBedxdygddzed{d|d}gddzed<d=ggdudvdgdWdRdRdwdwej:ej:dQdQej:ej:dXdXgfdejBedxdygddzed{d|d}gddzed<d=ggdudvdgdWdRdRdwdwej:ej:dQdQej:ej:dXdXgfgd~d ZCdd ZDejddQd d gd d d ggdd ZEejFddd ZGdd ZHdd ZIdd ZJdd ZKejd"ddgdd ZLdd ZMejdejNdQdQdRgfejOdQdRdRgfgdd ZPdd ZQejdd7d9gdd ZRdd ZSdd ZTdd ZUdd ZVdd ZWejdd7d9geXeYdddZZejdd7d9geXeYdddZ[dd Z\dS )    )datetimeN)CategoricalCategoricalIndex	DataFrameIndex
MultiIndexSeriesqcutc                 C   s.   dd }t jt|||d}| j||d S )zpReindex to a cartesian production for the groupers,
    preserving the nature (Categorical) of each grouper
    c                 S   s4   t | ttfr0| j}tjtt||| jd} | S )N
categoriesordered)	
isinstancer   r   r   
from_codesnparangelenr   )ar    r   I/tmp/pip-unpacked-wheel-tiezk1ph/pandas/tests/groupby/test_categorical.pyf   s      z)cartesian_product_for_groupers.<locals>.fnames
fill_value)r   from_productmapreindex
sort_index)resultargsr   r   r   indexr   r   r   cartesian_product_for_groupers   s    r!   )allanycountcorrwithfirstZidxmaxZidxminlastZmadmaxmeanZmedianminnthnuniqueprodZquantileZsemsizeZskewstdsumvarc                 C   sB   t | jd}dd }| j|ddj|}|jjd dks>td S )N   c                 S   s   |   |  |  |  dS )Nr*   r(   r$   r)   r3   )groupr   r   r   	get_statsJ   s
    z2test_apply_use_categorical_name.<locals>.get_statsFobservedr   C)r	   r8   groupbyDapplyr    r   AssertionError)dfcatsr5   r   r   r   r   test_apply_use_categorical_nameG   s    r?   c               
   C   s  t dddddddddg	ddddgdd} tddddddd	d
dg	| d}ttdddd}tdddd
tjgi|d}|jddd }t	|| t ddddgdddgdd}t ddddgdddgdd}t||ddd	d
gd}|jddd}tdddgddd}	tdt
d	ddg|	di}| }t	|| tddgddgddggddgd}
t |
j|
d< |
jdgdd}|dd }t	||
dg  |
d}|
jddg }t	|| d d! }||}|
jddg  }tddgdd"|_|d d#|d< t	|| tddd$d%gi}tj|jdd&d'd(d)gd*}|jj|ddt}t||d  t|jj|ddd+d |d  t	|j|ddt|dg  t	|j|ddd,d |dg  t|jj|ddtj|d  t	|j|ddtj| tddd$d%d-gi}tj|jd.dd&d'd(d)gd*}|jj|ddt}t||d  t|jj|ddd/d |d  t	|j|ddt|dg  t	|j|ddd0d |dg  tdddddgi}tj|jdddd	d
gt tdd1}|j|ddt}t|jj|jjd2}t
ddddg|d}d|j_t|| d3d4d5d6g}tj j!dd
d7d8}t j"||dd2} ttj #d7d
}|j| dd }|jt$| dd }t|| jdd}	|%|	}t	|| |j| dd}|& }| j'( }t$| )|}|)|}t |dd3d4d5d6gd9}|j|ddd:& }t	|| t j"t*d
+d;|dd2}t|}t,|- j.d| td<d=d>d?d@dAdBdCgd
 }t,|- j.d| d S )DNr   bcdTr
            r2      r   r@   abcdnamer   r    Fr6   zyABvaluesrO   rQ      r   zJohn P. Doez	Jane DoveZ	person_idperson_namecolumnsc                 S   s   | S Nr   xr   r   r   <lambda>w       ztest_basic.<locals>.<lambda>c                 S   s   |  djd S )NrS   r   )drop_duplicatesilocrW   r   r   r   r   ~   s    ztest_basic.<locals>.frJ   object      
         (   )binsc                 S   s
   t | S rV   r   r0   xsr   r   r   rY      rZ   c                 S   s
   t | S rV   )r   r(   rg   r   r   r   rY      rZ   ic                 S   s
   t | S rV   rf   rg   r   r   r   rY      rZ   c                 S   s
   t | S rV   rf   rg   r   r   r   rY      rZ   )labelsr   foobarbazquxd   r.   )r   r   sortr7      r$   r)   r/   r*   25%50%75%r(   )/r   r   r   listr   nanr9   r)   tmassert_frame_equalr   r0   rS   	transformr[   r\   r;   copyr   r    astypepdcutr   assert_series_equalfilterr"   r   rQ   r   r   rJ   randomrandintr   randnasarrayr   describecodesargsorttaker   repeatassert_index_equalstackget_level_values)r>   data	exp_indexexpectedr   cat1cat2r=   gbZexp_idxrX   gr   rA   levelsr   groupeddesc_resultidx
ord_labelsord_dataZexp_catsexpcexpr   r   r   
test_basicV   s    
 

 " $ " $


  
r   c                 C   s   t tdddttddgtdgdgd dgd  tdgd	d
gdd}|jd	g| d}t tdddttddgtdgdgd tdgd	d
gdd}|d}t	|| d S )NrD      r   r@   ra   r   rF   rC   ZIndex1ZIndex2)r   r   r   r   r    levelr7      )
r   r   r   r   r   ranger9   	get_grouprz   r{   )r7   r=   r   r   r   r   r   r   test_level_get_group   s&    
r   c                  C   s   t dgd dgd  dddgd tdd	} t| jdddgd
d| _| dd  }|jdd
d}ddddddg}t|dddgd
d}ddddddgt|g}t	j
|dd gd}tdgd |dd}t|| d S )NrO      rP   highZmedlowr2   g      (@)r4   doseZoutcomesTr
   r4   r   r   )r   Zsort_remainingr   rD   r    rJ   )r   r   r   r   r   r9   Zvalue_countsr   r   r   from_arraysr   rz   r   r=   r   r    r   r   r   r   (test_sorting_with_different_categoricals   s    r   r   TFc           	   	   C   s$  t td| d}t tdddg| d}tt|}t|||d}|jdd	gd
d}tj||gdd	gd}tdddg|dgd}t	j
tddd |dd }W 5 Q R X t	|| | }t	|| |tj}t	|| tj||gdd	gd}td|d}|dd }t	|| d S )Nabcrk   Zaaar   r@   r
   )missingdenserQ   r   r   Tr6   r   r   rC          @rQ   r    rU   zSelect only validF)matchZcheck_stacklevelc                 S   s
   t | S rV   )r   r)   rW   r   r   r   rY      rZ   ztest_apply.<locals>.<lambda>rK   c                 S   s   dS )NrC   r   rW   r   r   r   rY   ,  rZ   )r   rx   r   r   r   r   r9   r   r   rz   Zassert_produces_warningFutureWarningr;   r{   r)   aggr   r   )	r   r   r   rQ   r=   r   r   r   r   r   r   r   
test_apply
  s,      r   c              	   C   s  t ddddgdddgdd}t ddddgdddgdd}t||d	d
ddgd}ddgd
 |d< |jdddg| d}tj||ddgd
 gdddgd}tdtd	d
ddg|di }| }| st|||ddggt	ddd}t
|| |jddg| d}tj||gddgd}tdd	d
ddgi|d}| }| sRt|||gt	ddd}t
|| t ddddgdddgddd	d	d
d
gddddgd}	t|	}|jd | d}
|
 }tt	d!d t	d"dd#}td$d$gd%dgd&|d}| s
tt	d"d t	d"dd#}||}t
|| |jd d'g| d}|d(}td)d*d%d+gt ddddgdddgddd	d
d	d
gd,d d'g}| st||jjd	d
ggd d'g}t
|| d-D ]<}|\}}||}||j|k|j|k@  }t
|| qdd.dd.dd	d	gddddd/d0d1gddd2ddddgd3}	t|	}t|d tddd}||d4< |jd4d5gd6| d7}|d(}|jd4d5gd| d7}|d( }t
|| d S )8Nr   r@   rL   Tr
   rA   rB   rM   rC   rD   rE   r2   rN   rl   rm   r8   rO   rP   r6   r   rQ   rK   ABCr   r   ABra   rb   rc   rd   )catintsvalr   abr   )rJ   r   r   g      ?      4@)r   r   r   r)         $@g      >@g      D@)r   r   r   ))r   rC   )r@   rD   )r@   rC   )r   rD   rt   2   <   F   erl   rm   rn   r   rn   Fas_indexr7   )r   r   r9   r   r   r   r   r0   r!   rx   rz   r{   r)   r   r   r   	set_indexr   rQ   r   r   r   r   r   linspacereset_index)r7   Zusing_array_managerr   r   r=   r   r   r   r   rB   Zgroups_single_keyr    groups_double_keykeyrA   ir   groupsZgroups2r   r   r   test_observed1  s    	        
  

      



  
	  

r   c                 C   s   ddddgddddgdddd	gd
}t |}t|d ddddg}d|_|j|dg| d}tj|ddddggddgd}t ddddgddddgd|d}| st||jddddggddg}|	d}t
|| d S )NrE   r2   rF   rC   rD   ra   rp      "   )C1C2C3r   r   r   r   r6   r   g      @g      @g      @r   g      Y@g      i@g      A@)r   r   rK   r)   )r   r   r   rJ   r9   r   r   r!   rQ   r   rz   r{   )r7   rB   r=   rQ   r   r   r   r   r   r   r   test_observed_codes_remap  s$    $   
r   c                  C   s   t tjjddddtjjddddtjjdddddd} | jtd| d< | jdd	d
gdd}| }|j	j
d  | j kst|j	j
d  | j kst|j	j
d  | j kstd S )Nr      i0u  rq   '  )r   int_idother_idrl   categoryr   r   r   Tr6   rC   rD   )r   r   r   r   r   r~   strr9   r$   r    r   r,   r<   r   r   )r=   r   r   r   r   r   test_observed_perf  s    r   c                 C   s   t dddgdddgd}t|dddgd}|jd	| d
}|j}| rftddgddtdgddd}n*tddgddtg ddtdgddd}t|| d S )Nr   rA   r@   r   rC   rD   rE   r   valsr   r6   r   int64dtype)r   rA   r   r@   rA   )r   r   r9   r   r   rz   assert_dict_equal)r7   r   r=   r   r   r   r   r   r   test_observed_groups  s    "
r   c                 C   s   t tdtjdgdddgddddgd}|jd	| d
}|j}| rXdtddgddi}n(tddgddtg ddtg ddd}t|| d S )Nr   r@   rB   r   rC   rD   rE   r   r   r6   r   r   r   )r   r@   rB   )	r   r   r   ry   r9   r   r   rz   r   )r7   r=   r   r   r   r   r   r   test_observed_groups_with_nan  s    

r   c                  C   s   t dtjtjgdddgd} tdddg}t| |d}|jd	d
dd d}t dddgdddgd}tdtjtjg|dd}d	|j_t	
|| d S )Nr   r@   rA   r   rC   rD   rE   )r   serr   Fr6   r   r   r   )r   r   ry   r   r   r9   r+   r    rJ   rz   r   )r   r   r=   r   r    r   r   r   r   test_observed_nth  s    r   c                 C   s   t tjdtjdgdddgd}tddddg}t||d	}|jd
| d  }| rxtt dgdddgddgd	}n,tt dddgdddgddtjtjgd	}t	|| d S )Nr   r@   rA   r   rC   rD   rE   r2   )s1s2r   r6   )
r   r   ry   r   r   r9   r&   r   rz   r{   )r7   r   r   r=   r   r   r   r   r   #test_dataframe_categorical_with_nan   s    r   r7   rs   c           	      C   s   t ddddddgddddg| d}tddddddg}t||d}|jd||dd	 d
}t|jjdd}t|j}|sd|| < t||ksd|  d| d| d| }dst	|d S )NrB   r   r@   r   r
   )labelr   r   )r7   rs   r   r&   r^   r   zDLabels and aggregation results not consistently sorted
for (ordered=z, observed=z, sort=z
)
Result:
F)
r   r   r   r9   Z	aggregater    arrayisnar"   r<   )	r   r7   rs   r   r   r=   r   Zaggrmsgr   r   r   0test_dataframe_categorical_ordered_observed_sort  s     	

r   c               	   C   s  t jddd} tjjdddd}tj|| dd}ttjdd}|j	|d	d

 }|j	t|d	d

 }|| }t|j|jdd|_t|| |j	|d	d
}| }|j }||}	||}
|
j	|	d	d
 }t|| t|j|j t|jd|jd tjtdd| dd}t|}t| jd| tddddddddgd }t| jd| d S )Nz
2014-01-01r2   )periodsr   rp   rq   Trk   Fr6   r
   rt   r$   r)   r/   r*   ru   rv   rw   r(   rC   )r   
date_ranger   r   r   r   r   r   r   r9   r)   r   r   r   r    rz   r{   r   r   r   r   r   r   r   r   r   r   )r   r   r>   r   r   r   r   r   r   r   r   r   r   r   r   r   test_datetime7  s<    
  



 
r   c                  C   s(  t jd} ddddg}| jdddd	}tj||d
d}tt t dd	ddt
dd}||d< |djddd }|t
d j|jdd }ttjddddg|d
ddd|_t|| |jddd }|t
d j|jdd }ttjddddg|d
ddd|_t|| d S )Ni90  rl   rm   rn   ro   r   r2   rb   rq   Trk   rH   rT   r>   Fr   r6   rC   rD   rE   r]   )r   r   ZRandomStater   r   r   r   r   r   Zreshaperx   r   r9   r0   r   r   r    rz   r{   )sr   r   r>   r=   r   r   r   r   r   test_categorical_index^  s(    &  r   c                  C   sz   t ddddgddddgdd} ttjdd| d	}|d
dddgd  }t|	 j
|  t|	 j
j| j d S )Nro   rl   rn   rm   Tr
   rb   r2   rT   rC   rD   rE   rF   )r   r   r   r   r   r9   r   rz   r   r   rU   assert_categorical_equalrQ   )r>   r=   r   r   r   r   !test_describe_categorical_columnsx  s    

r   c                  C   s   t tdddgd tdd d} | d d	| d< | jd
dgddd   }| }tddgddd}t	
|j| t	|jj|j |d |d  }tddgtddgd
dd}t	|| d S )Nra   rO   rP   rF   ZXYXXYrD   )r   mediumartistr   r   r   Fr6   r   r   rJ   r   r2   XYr]   rK   )r   r   rx   r~   r9   r$   unstackr   r   rz   r   rU   r   rQ   r   r   r   )r=   Zgcatr   Zexp_columnsr   r   r   r   test_unstack_categorical  s    r   c                  C   sb   t tjtjddddddddg
} t|  jd}d}tjt	|d | 
|  W 5 Q R X d S )NrC   rD   rE   r2   z;Length of grouper \(8\) and axis \(10\) must be same lengthr   )r   r   ry   r   r   dropnarQ   pytestraises
ValueErrorr9   r)   )Zseriesre   r   r   r   r   test_bins_unequal_len  s
     r  c                     s  t tdddgdddgdddgdddgd	  jd
dgddd } t tddg jjjdddgddgd	d
ddgd}t| |  fdd} jd
|gddd } t tddg jjjdddgddgd	d
ddgd}t| | tdddgd
d} jd
|gddd } t| | d
dg}t tddg jjjdddgddgd	d
ddgd}dD ]8}t	t
d|d _ j|ddd } t| | q`d S )NrC   rD   rE   ra      e   f   g   )r   rO   rP   r   rO   FTr   r      rP   rT   c                    s    j | df S )NrO   )loc)rr=   r   r   rY     rZ   ztest_as_index.<locals>.<lambda>r   r   r@   r]   )Nr   rP   r   )r   r   r9   r0   r   r   rz   r{   r   r   rx   r    )r   r   r   r   Zgroup_columnsrJ   r   r  r   test_as_index  sP    	r  c                  C   s   t d} tdtt d| ddi}t| | ddd}t|jdddd j| t|jdddd j| tdtt d| ddi}t| | ddd}tt d	t d	ddd}t|jdddd j| t|jdddd j| d S )
Nr   rO   baTr
   r   Frr   bac)	rx   r   r   r   rz   r   r9   r&   r    )r   r=   r    r   Znosort_indexr   r   r   test_preserve_categories  s,        r  c               	   C   s   t dddddgdddddgttdtd	d
dttdtd	ddd} t ddtjgddtjgttd	td	d
dttd	td	ddd}dD ]V}| j|d
d
d }| j|dd
d  }|j|j	d}t
|| t
|| qd S )NrC   rD   ra      r      r   Zabaabr  Fr
   T)rO   rP   r   r   r   g      ?g      9@r   )r   r   )byr   r7   rT   )r   r   rx   r   ry   r9   r)   r   r   rU   rz   r{   )r=   Zexp_fullcolZresult1Zresult2r   r   r   r   test_preserve_categorical_dtype  s(    	

r  zfunc, valuesr&   secondr'   fourththirdr*   r(   c                 C   s   t ddddgdd}tddddg|d	}|d
}t||  }tddgt||jdd	d
}t|| |d
d }t||  }|d }t	|| d S )Nr&   r  r  r  Trk   r   )payloadr  r  r   r  )
r   r   r9   getattrr   r   r   rz   r{   r   )funcrQ   rA   r=   r   r   r   Zsgbr   r   r   test_preserve_on_ordered_ops  s    
r  c                  C   s  t tjd} tdddddddddg	}tj|dddgdd}| j|dd }| j|dd }t	|j
|j|jd	|_
t|| tddddddd
d
d
g	}tj|dddd
gdd}| j|dd }| j|dd |j}t	|j
|j|jd	|_
t|| tdddddddddg	ddddgdd	}tddddddd
ddg	|d} | jddd }|d j}tdddtjg}t|| d S )N	   r   rC   rD   Trk   Fr6   r
   rE   r   r@   rA   rB   r2   rF   rG   )r   r   r   r   r   r   r   r9   r)   r   r    r   r   rz   r   r   r   rQ   ry   assert_numpy_array_equal)r   r   r>   r   r   r   r   r   test_categorical_no_compress-  s>        
 
r   c                  C   sd   t d gd tdddgd} | d d }ttg ddgdtg ddd	dd
}t|| d S )NrE   ZtraintestrO   rP   rO   rP   r   r^   )r   rJ   r   )r   r   r9   r&   r   rz   r   r=   r   r   r   r   r    test_groupby_empty_with_categoryR  s    r$  c                  C   s   t dtjdddi} dd tdddD }t||}| jdgdd	} tj| j	tdd
dd|d| d< | j
dgddd  }|t|jdd d }t|j|jjd|_t|| d S )Nvaluer   r   rp   c                 S   s   g | ]}| d |d  qS )z - i  r   ).0r   r   r   r   
<listcomp>i  s     ztest_sort.<locals>.<listcomp>i  T)r  Z	ascendingi)  F)rightrj   Zvalue_groupr6   c                 S   s   t |  d S )Nr   )floatsplitrW   r   r   r   rY   r  rZ   ztest_sort.<locals>.<lambda>)r   r]   )r   r   r   r   r   r   Zsort_valuesr   r   r%  r9   r$   sortedr    r   rJ   rz   r   )r=   rj   Z
cat_labelsresr   r   r   r   	test_sort`  s    
 
  
r-  c               
   C   s  t dddgdddgdddgdd	d
gdddgdddgdddggdddgd} t| d dd| d< tddddgddd}t ddgddgd	d
gddggddg|d}d}| j|ddd }t|| |}| j|ddd }t|| t| d dd| d< tddddgdd}t ddgddgd	d
gddggddg|d}tddddgddddgdd}t ddgddgd	d
gddgg|ddgd}d}| j|ddd }t|| | j|ddd }t|| d S )Nz	(7.5, 10]ra   rt   rb   z(2.5, 5]rF   rc   z(5, 7.5]r   rd   r2   r   z(0, 2.5]rC   r   rR   r   r   rl   rm   rT   Trk   rI   )rU   r    Frr   r]   r   rJ   r   )r   r   r   r9   r&   rz   r{   )r=   r    Zexpected_sortr  result_sortZexpected_nosortresult_nosortr   r   r   
test_sort2w  sj    	
    
   

  r1  c                  C   s  t tdddtdddtdddtdddtdddtdddtdddgddddd	ddgdd
dddddgddddgd} t| d dd| d< tdddtdddtdddtdddg}t ddgddgddgddggddgd}t|ddd|_tdddtdddtdddtdddg}t ddgddgddgddggddgd}t||ddd|_d}t|| j|ddd  t|| j|ddd  t| d dd| d< tdddtdddtdddtdddg}t ddgddgddgddggddgd}t|dd|_tdddtdddtdddtdddg}t ddgddgddgddggddgd}t||dd|_d}t|| j|ddd  t|| j|ddd  d S )Ni  rR   rC   rD   rF   ra   rt   r   r2   rb   rc   rd   r   r   r   )dtrl   rm   r2  rl   rm   rT   Trk   rI   )r   rJ   r   Frr   r]   r.  )	r   r   r   r   r    rz   r{   r9   r&   )r=   r    r/  r0  r  r   r   r   test_sort_datetimelike  s    






	



 



      



 



   r3  c                  C   s   t tdddgdddgddddgd} tdddgdd	}| jdd
dj }tdddg|dd	}t|| | jdd
djjdd}tdddg|dd	}t|| | jdd
djjdd}tddt	j
g|dd	}t|| | jdd
djjdd}tdt	j
t	j
g|dd	}t|| d S )Nr   r@   rA   r   rC   rD   r"  rO   r]   Fr6   rE   r   rP   Z	min_count)r   r   r   r9   rP   r0   r   rz   r   r   ry   r=   Zexpected_idxr   r   r   r   r   test_empty_sum  s     "r6  c                  C   s   t tdddgdddgddddgd} tdddgdd	}| jdd
dj }tdddg|dd	}t|| | jdd
djjdd}tdddg|dd	}t|| | jdd
djjdd}tddt	j
g|dd	}t|| d S )Nr   r@   rA   r   rC   rD   r"  rO   r]   Fr6   rP   r   r4  )r   r   r   r9   rP   r-   r   rz   r   r   ry   r5  r   r   r   test_empty_prod(  s    "r7  c                  C   s   t ttdtttjddddd tdd} | dd	g }t	j
td
ddgttjddddgdd	gd}t ddddddddtjdg	i|d}t|| d S )NZ	abcbabcbaz2018-06-01 00Z1TrE   )freqr   r  )key1key2rQ   r9  r:  r   r@   rA   r   rQ   r   r2   rt   rF   r   rD   rK   )r   r   rx   r   r   r   r   r9   r)   r   r   ry   rz   r{   )r=   r   r   r   r   r   r   ,test_groupby_multiindex_categorical_datetime@  s"    
	$r;  zas_index, expectedrC   rD   r   r   r   r@   r   rE   rX   )r    r   rJ   r   r@   rX   c                 C   sV   t tdddgdddddgdddgd}|jddg| d	d
d  }t|| d S )NrC   rD   r   r   rE   r<  r   r@   Tr   rX   )r   r   r9   r0   rz   assert_equal)r   r   r=   r   r   r   r   ,test_groupby_agg_observed_true_single_columnY  s
    $r>  r   c                 C   sZ   t ddddgddddgdd}t d dddgddddgdd}|jd| d}t|| d S )	Nr   r@   rA   rB   Fr
   rC   r   )r   shiftrz   r=  )r   ctr   r,  r   r   r   
test_shift}  s    
 
 
 
 rA  c                 C   s\   |   dd }|d d|d< |d d|d< tddddg|d	< |jd
gdd}|S )a  
    DataFrame with multiple categorical columns and a column of integers.
    Shortened so as not to contain all possible combinations of categories.
    Useful for testing `observed` kwarg functionality on GroupBy objects.

    Parameters
    ----------
    df: DataFrame
        Non-categorical, longer DataFrame from another fixture, used to derive
        this one

    Returns
    -------
    df_cat: DataFrame
    Nr2   rO   r   rP   rC   rD   rE   r8   r:   Zaxis)r}   r~   r   Zdrop)r=   df_catr   r   r   rC    s    rC  zoperation, kwargsr   r   r;   c                 C   sr   t tddddgddddgdf|}tddd	d
g|dd}| jddgddd }t||t}t|| d S )Nrl   rm   onetwothreer"  rC   rE   rD   r2   r8   r   r    rJ   rO   rP   Tr6   )	r   Z
from_framer   r   r9   r  r0   rz   r   )rC  	operationkwargsr    r   r   r   r   r   r    test_seriesgroupby_observed_true  s    rJ  rH  c                 C   s   t jtddgddtdddgddgdd	gd
 \}}tddtjdtjdg|dd}|dkrl|jddd}| jdd	g|dd }t	||t
}t|| d S )Nrm   rl   Frk   rD  rF  rE  rO   rP   r   rD   r2   rC   rE   r8   rG  r   r   Zinfer)Zdowncastr6   )r   r   r   Z	sortlevelr   r   ry   Zfillnar9   r  r0   rz   r   )rC  r7   rH  r    _r   r   r   r   r   r   )test_seriesgroupby_observed_false_or_none  s    rL  zobserved, index, data)rl   rD  r*   )rl   rD  r(   )rl   rE  r*   )rl   rE  r(   )rm   rD  r*   )rm   rD  r(   )rm   rF  r*   )rm   rF  r(   rO   rP   r2   rm   rl   rk   rD  rF  rE  c                 C   s>   t ||dd}| jddg|dd dd }t|| d S )Nr8   rG  rO   rP   r6   c                 S   s   |   |  dS )Nr*   r(   rM  rW   r   r   r   rY     rZ   z8test_seriesgroupby_observed_apply_dict.<locals>.<lambda>)r   r9   r;   rz   r   )rC  r7   r    r   r   r   r   r   r   &test_seriesgroupby_observed_apply_dict  s
    0rN  c                 C   s<   |  ddgd  }|  ddg d }t|| d S )NrO   rP   r8   )r9   r)   rz   r   )rC  r   r   r   r   r   4test_groupby_categorical_series_dataframe_consistent  s    rO  codec                 C   sr   t ddddgddddgd	d
ddgd}tj| tdd}|j|dd }|jj|dd j}t|| d S )NrC   rD   rE   r2   r   r  rF   r   rR   rt   r   r   r   rB  r   )	r   r   r   rx   r9   r)   Trz   r{   )rP  r=   r   r   r   r   r   r   test_groupby_categorical_axis_1  s
    (rT  z(ignore:.*Select only valid:FutureWarningc                 C   s\   t tddg|dddgdddgd	}| }|jd| d
jt jdd }t|| d S )NZBobZGregrk   rC   rD   )NameItemrU  rV  rT   r6   T)Zskipna)	r   r   r}   r9   r   r0   r   rz   r{   )r7   r   r=   r   r   r   r   r   $test_groupby_cat_preserves_structure  s     rW  c               	   C   sL   t ddddgtdd} tjtdd | ddd	  W 5 Q R X d S )
Nr   r@   r2   r1   r   z'vau'r   r1   c                 S   s&   t | jd d g| jd d gdS )Nr   r1   ZvaurX  )r   r\   )Zrowsr   r   r   rY   &  s    z/test_get_nonexistent_category.<locals>.<lambda>)r   r   r  r  KeyErrorr9   r;   r  r   r   r   test_get_nonexistent_category!  s
    
rZ  c           
      C   s   | dkrt d | dkr4t jjdd}|j| tttdtddttd	d
 tdddgd d}ddgi	| g }|rdnd}|j
ddg|dd }t|| }|| }	t|	|kstd S )Nngroupngroup is not truly a reductionr%   6TODO: implemented SeriesGroupBy.corrwith. See GH 32293reasonAABBABCDr   r   rD   皙?r2   cat_1cat_2r%  r+   r   r  rd  re  r6   r%  )r  skipmarkxfailnode
add_markerr   r   rx   getr9   r  r   r<   )
reduction_funcr7   requestrg  r=   r   Zexpected_lengthseries_groupbyr   r   r   r   r   0test_series_groupby_on_2_categoricals_unobserved,  s&    

ro  c                 C   s4  | dkrt d | dkr4t jjdd}|j| tttdtddttd	d
 tdddgd d}t	dt	dt	dt	dt	dg}ddgi
| g }|jddgddd }t|| }|| }t|  }	|D ].}
|j|
 }t|	rt|s||	kstq|	dkr0| dkr0t|jtjs0td S )Nr[  r\  r%   r]  r^  r`  r   r   r   rD   rb  r2   rc  ZACZBCCAZCBCCr+   r   rd  re  Fr6   r%  r0   )r  rf  rg  rh  ri  rj  r   r   rx   tuplerk  r9   r  -_results_for_groupbys_with_missing_categoriesr
  r   r   r<   r   Z
issubdtyper   integer)rl  rm  rg  r=   Z
unobservedr   rn  r   r   Zzero_or_nanr   r   r   r   r   ?test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nansI  s0    
"

"ru  c                 C   s   | dkrt d tttdtddttdtddddddgd	}d
dddg}|jddgdd}dg|gd| g }t|| | }|D ]}||jkst	qd S )Nr[  2ngroup does not return the Categories on the indexr`  r   r   111112rb  rc  rO   2rP   rz  r8   1r8   rz  rd  re  Tr6   r   r+   r%   )
r  rf  r   r   rx   r9   rk  r  r    r<   )rl  r=   unobserved_catsdf_grpr   r,  r   r   r   r   >test_dataframe_groupby_on_2_categoricals_when_observed_is_truet  s    

r  c           	      C   s   | dkrt d tttdtddttdtddddddgd	}d
dddg}|jddg|d}dg|gd| g }t|| | }t|  }|t	j
kr|j|    stn|j| |k  std S )Nr[  rv  r`  r   r   rw  rx  rb  rc  ry  r{  r|  r~  rd  re  r6   r   r  )r  rf  r   r   rx   r9   rk  r  rs  r   ry   r
  Zisnullr"   r<   )	rl  r7   rm  r=   r  r  r   r,  r   r   r   r   ?test_dataframe_groupby_on_2_categoricals_when_observed_is_false  s     	


r  c                  C   s   ddddgddddgdd	dd	gd
} t | }t|d tddd}||d< |jddgddd}|d d}|dd }t|| d S )Nra   rt   r2   rC   rb   rc   rd   rB   rA   r   rl   r   rF   r   rn   T)r   rs   r)   )	r   r   r   r   r   r9   r   rz   r   )rB   r=   r   r   r   r   r   r   r   3test_series_groupby_categorical_aggregation_getitem  s    $r  zfunc, expected_valuesc              	   C   sv   t dddddgdddddgtdddddgdd}|d| }t d	|itdddgdd
d}t|| d S )Nr   rC   rD   rE   r2   )idr   r%  r  r   r%  r]   rK   )r   r   r   r9   r   r   rz   r{   )r  Zexpected_valuesr=   r   r   r   r   r   $test_groupby_agg_categorical_columns  s     r  c                  C   s   t dtdddgdddgdi} t dddgiddgd}| dddgtj}t|| | dddg }t|| d S )	NrO   r   r@   rA   r   rD   rC   rK   )r   r   r9   r   r   r,   rz   r{   r=   r   r   r   r   r   test_groupby_agg_non_numeric  s     r  r  c                 C   sl   t dgtdgddj d}|dd }t||  }tdgtdgddd|d jd	}t	|| d S )
Ni  r@   r   r   r"  rO   rP   r]   r    rJ   r   )
r   r   r   Z
as_orderedr9   r  r   r   rz   r   )r  r=   Z
df_groupedr   r   r   r   r   ;test_groupy_first_returned_categorical_instead_of_dataframe  s        r  c                  C   s   t ddg} d| j_tddddgtddddgt| dd}td	d
dgitddgddd}|jddd	 }t
|| d S )NrC   rD   FrE   rF   rR   r   rG   r   r   g      @r@   r]   r   rs   )r   r   flagsZ	writeabler   r   r   r   r9   r)   rz   r{   )r>   r=   r   r   r   r   r   test_read_only_category_no_sort  s    $
 r  c               
   C   s   t ddddddddgddddddddgd} | d djjd	dddgd
d| d< t ddddddddddddd}|jddd}td	dddgd	dddgd
ddd|_| ddg 	 }t
|| d S )Nsmalllarger   r8   rO   )rl   rm   rl   r   tinyTrk   r   )rO   r8   rC   rE   rD   )r  r  r   r  rm   r    rB  )r   r   rJ   r   )r   r~   r   Zset_categoriesZrename_axisr   rU   r9   r.   r   rz   r{   r  r   r   r   #test_sorted_missing_category_values  sJ    

 

r  c                  C   s   t dddddgi} | d d| d< | dj }tdddgtdddgddd| d jd}t	|| | d
dd	i}| }t|| d S )
NZcol_numrC   rD   rE   r   col_catr]   r  r&   )r   r~   r9   r  r&   r   r   r   rz   r   r   to_framer{   r#  r   r   r   1test_agg_cython_category_not_implemented_fallback&  s    r  c               	   C   s   t ddddgddddgddtjdgddddgddddgd	} | d
di} | ddgdd }tjddgddggdd}t ddgddgddgd|d}t	|| d S )NrC   rD   rb  g?g333333?rl   rm   Zfee)rO   rP   numerical_col
object_colcategorical_colr  r   rO   rP   c                 S   s   |    S rV   )r   r0   r  r   r   r   rY   J  rZ   z7test_aggregate_categorical_with_isnan.<locals>.<lambda>r"  r   r   )r  r  r  r   )
r   r   ry   r~   r9   r   r   r   rz   r{   r   r   r   r   %test_aggregate_categorical_with_isnan<  s&    




r  c               	   C   s   t ddddddgddddddgd} tjdddgdd	}| d
 || d
< | dd
 t| d< |  }t ddddddgddddddgddddddgd}|d
 ||d
< |d ||d< t	|| d S )NrC   rD   rE   ZWaitingZOnTheWayZ	Delivered)
package_idstatusTr
   r  r  last_status)r  r  r  )
r   r   ZCategoricalDtyper~   r9   r|   r(   r}   rz   r{   )r=   Zdelivery_status_typer   r   r   r   r   test_categorical_transformW  sN     	r  )r  r7   c                 C   s   t ddddg}ddddg}t|||d}t ddg}tj||gddgd}tdtjtjdg|ddtdtjtjdg|ddd	}||  }|r| tj	}|j
ddg|d
d }	t|	|  }
t|
| d S Nr   rC   r   r   r@   r   rA   r]   )r&   r'   r6   )r   r   r   r   r   r   NaNr   r~   r   r9   r  rz   r   )r  r7   r   r   r=   r   r   expected_dictr   Zsrs_grpr   r   r   r   Ftest_series_groupby_first_on_categorical_col_grouped_on_2_categoricals  s    r  c                 C   s   t ddddg}ddddg}t|||d}t ddg}tj||gddgd}tdtjtjdg|ddtdtjtjdg|ddd	}||   }|r| 	tj
}|jddg|d
}	t|	|  }
t|
| d S r  )r   r   r   r   r   r   r  r  r   r~   r   r9   r  rz   r{   )r  r7   r   r   r=   r   r   r  r   r  r   r   r   r   Btest_df_groupby_first_on_categorical_col_grouped_on_2_categoricals  s    r  c                  C   s   t tdddgdddgdtdd} | jddd	}|j}tjd
dgddtjdgddtjg ddd}| | ks~t| D ]}t	
|| ||  qd S )Nr@   r   rA   r   rE   )r   r  r   Fr  r   rC   Zintpr   rD   )r@   r   rA   )r   r   r   r9   indicesr   r   keysr<   rz   r  )r=   r   r   r   r   r   r   r   2test_groupby_categorical_indices_unused_categories  s    r  )]r   Znumpyr   r  Zpandas.util._test_decoratorsutilZ_test_decoratorstdZpandasr   r   r   r   r   r   r   r	   Zpandas._testingZ_testingrz   r  r!   rs  r?   r   r   r   rg  Zparametrizer   Z&skip_array_manager_not_yet_implementedr   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r   r$  r-  r1  r3  r6  r7  r;  r   r>  ry   ZNaTrA  ZfixturerC  rJ  rL  from_tuplesr   rN  rO  rT  filterwarningsrW  rZ  ro  ru  r  r  r  r,   r$   r  r  r  r  r  r  r  r  r   boolr  r  r  r   r   r   r   <module>   sp  $	! 
&
l '8



	
%:[ 


 


"
".
	
	

+
!

-5  