U
    /eRQ                     @   s  d dl Z d dlZd dlmZ d dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d dl m!Z! dd Z"d	d
 Z#dd Z$dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+dd Z,dd Z-ej./dd d!gd"d# Z0d$d% Z1d&d' Z2ej.j3e d(d)d*d+ Z4d,d- Z5d.d/ Z6ej./d0e7d ge8d ge9d gej:d1d gid2d3ej:d1d gid2d3j;ej:d1d gid2d3j<gd4d5 Z=ej./d0ej:d1d gid2d3j;j>e?d ggd6d7 Z@dS )8    N)Iterable)tm)apply_and_enforce)PANDAS_GT_120UNKNOWN_CATEGORIES	assert_eqcheck_matching_columns
check_metais_dataframe_likeis_index_likeis_series_like	make_metameta_frame_constructormeta_nonemptymeta_series_constructorraise_on_meta_errorshard_df_on_indexget_syncc                  C   s   t jddddddgtddd	d
ddddgd} tt| d
dg}t|d jd	gksZtt|d jd
ddgksvtt|d jddgkstd S )N                  Zabdabd)xy
         (   2   <   indexr   )pd	DataFramelistr   r$   AssertionError)dfresult r+   M/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/tests/test_utils_dataframe.pytest_shard_df_on_index    s     r-   c            	      C   s   t jdddgtddddgdd	d
dgd} t| }t|dksDt|j| jk sXtt|j	t
| j	sntdD ]8}|| jjd d }| | jjd d }||ksrtqr|j	jjd d }| j	jjd d }||kstt| j}t|dkst|j| jjks
tt|j	t
| j	s"t|jjd d }| jjjd d }||ksRt|j	jjd d }| j	jjd d }||kstt| j	}t|t
| j	stt|dkst|jjd d }| j	jjd d }||ksttj| dd}t||jksttdddd}t|t js*tt|dks<t|j| jk sRtt|j	t jsfttdddg}|jdddgk stt|dkst|j| j|jj	 k stt|j	t jsttd}t|t jstt|dkst|jdkst|jdkstG dd dt}t| }|jdddgk sRtt|dksdt|j| j|jj	 k stt|j	t jstt jddgdd}tddd|d}t
|j	t
|kst|j	jdkstt|j	dksttd|d}t
|j	t
|kst|j	jdks.tt|j	dksBttdd i| d!}t|jjjdksjt|jjjd tksttd"| d!}t|jjdkst|jjd tksttd dd|d}t|jjjdkst|j	jdkst|j	jstttd| d!}t|tjs(ttd| d!}t|tjsFtt d#dd}t|| d!}||ksntt jd$d%}t|}|t jd|j |j!d&ksttd| d!}t|tj"sttt#| d!}t|tt#j
stttd'| d!}t|tj$stt%&t'd(d) std S )*Nr   r   r   abc      ?g       @g      @)abcr   r   r   r#   r   dataZnpartitionsi8Of8r0   r5   r2   r7   r1   r6   r0   r2   r1   c                   @   s*   e Zd ZdZd
ddZdd Zdd Zd	S )z&test_make_meta.<locals>.CustomMetadataz-Custom class iterator returning pandas types.r   c                 S   s   dddg| _ d S )Nr8   r9   r:   )types)selfmaxr+   r+   r,   __init__q   s    z/test_make_meta.<locals>.CustomMetadata.__init__c                 S   s
   d| _ | S )Nr   )n)r<   r+   r+   r,   __iter__t   s    z/test_make_meta.<locals>.CustomMetadata.__iter__c                 S   s6   | j t| jk r.| j| j  }|  j d7  _ |S td S )Nr   )r?   lenr;   StopIteration)r<   retr+   r+   r,   __next__x   s
    z/test_make_meta.<locals>.CustomMetadata.__next__N)r   )__name__
__module____qualname____doc__r>   r@   rD   r+   r+   r+   r,   CustomMetadatan   s   
rI   foonamei4)r0   r1   int64category)Zparent_meta)r0   rO     UTCtzrS   unitboolc                   S   s   t d S N)r   r+   r+   r+   r,   <lambda>       z test_make_meta.<locals>.<lambda>)(r%   r&   r'   r   rA   r(   dtypesall
isinstancer$   typevaluesZ__array_interface__r0   dtypeddfrom_pandas_meta
RangeIndexcolumnsSeriesrL   r   Indexcat
categoriesr   emptynpfloat64	TimestampDatetimeTZDtyperS   rU   rN   floatZbool_pytestraises	TypeError)	r)   metacolZmeta_pointerZ
df_pointerddfrI   idxr   r+   r+   r,   test_make_meta+   s     


rv   c                  C   s"  t jt dddgtddtdtdt dt jddd	d
t 	dt
dt tgd t d d d gdtdd} | jdd }t|}|j|jk st|d d dkst|d d dkst|d d dkst|d d tdkst|d d jdkst|d d tdks0t|d d jdksHt|d d t dksdt|d d t jdd	dkst|d d t 	dkst|d d dkst|d  d tkstt|d! jjdkstt|d }|j|d jkst|d |k std S )"NZAliceZBobZCarolr.   barr   z
2016-01-01r   America/New_York)ZperiodsrS   z1 hours    )ABCDEFGHIJKZDCBAHGFEIJKrd   r   rz   r{   rJ   r|   r}   Zf4r~   rM   r   z1970-01-01 00:00:00r   rR   r   1r   r   r   )r%   r&   Categoricalr'   rj   Zfloat32Zint32rl   Z
date_rangeZ	Timedeltavoidr   ilocr   rZ   r[   r(   r_   rA   rg   rh   )Zdf1df2Zdf3sr+   r+   r,   test_meta_nonempty   sD     r   c                  C   sT   t jdddgd} t| }t jdddgdddggddgdddgd}t|| d S )Nrz   r{   r   rJ   r0   r1   )r$   rd   )r%   r&   r   r   Zassert_frame_equal)r)   resexpr+   r+   r,   test_meta_duplicated   s    r   c                  C   s   dD ]} t jg t jg | dddd}t|}t|t jks>tt|jt|jksVt|j|jksft|j|jksvt|	 }t|}|j
dkst|j
dkstt|jjt|jjkst|jj|jjkst|j|jkstqd S )N)r6   r7   zM8[ns]r_   TrJ   orderedrL   rO   )r%   CategoricalIndexrf   r   r]   r(   rh   r   rL   Z	to_seriesr_   rg   )r_   ru   r   r   r+   r+   r,   #test_meta_nonempty_empty_categories   s&       r   c                  C   s  t jddd} t| }t|t jks(t|j| jks8tt jdgddd} t| }t|t| ksft|jdkstt|j| jkstt jdgdd} t| }t|t jkst|j| jkstt jdgd	d
dd} t| }t|t jkst|j	| j	kst|j
| j
kst|j| jks tt jdgd	dd} t| }t|t jksNt|j
| j
ks`t|j| jksrtt jtddgd	dd} t| }t|t jkst|j
| j
kst|j| jkstt jdgddgddd} t| }t|t jks t|j| jk st|j| jks(t|j| jks:tt jg tgddd} t| }t|t jksjt|j| jks|t|j| jkstt jdgddt jdgddg}dgdgg}t j|ddg|d} t| }t|t jkstt| j|jD ]2\}}t|t|kst|j|jkstq|j| jks>tt jdgddt jdgdgddt jtddgddg}dgdgdgg}t j|dddg|d} t| }t|t jkstt| j|jD ]2\}}t|t|kst|j|jkstq|j| jkstd S )Nr   rJ   rK   intrL   r_   rN   r0   z
1970-01-01drx   )freqrS   rL   )r   rL   r}   ZxyxZzzzTr   r/   r1   r   )levelsnamescodes)r3   rh   rL   	timedelta)r%   rc   r   r]   r(   rL   rf   r_   ZDatetimeIndexrS   r   ZPeriodIndexZTimedeltaIndexrj   Ztimedelta64r   rh   r[   r   r   Z
MultiIndexzipr   r   )ru   r   r   r   Zidx1Zidx2r+   r+   r,   test_meta_nonempty_index  sz     r   c                  C   sP   t jdgddd} t| }t|t| ks.t|jdks<t|j| jksLtd S )Nr   rJ   uint64r   )r%   rf   r   r]   r(   r_   rL   )ru   r   r+   r+   r,   test_meta_nonempty_uint64indexS  s
    r   c                  C   st   t td} t| tjsttddd}t |} | |ks@ttjdd}t |} | tjd|j|j	dksptd S )Nr/   rP   r   rQ   rR   rT   )
r   rj   rk   r\   r(   r%   rl   rm   rS   rU   )rr   r   r+   r+   r,   test_meta_nonempty_scalar[  s    r   c               
   C   s   zt   tdW 5 Q R X W nH tk
rf }  z*| jd dsDtd| jd ksVtW 5 d } ~ X Y nX dsttdz t d tdW 5 Q R X W nH tk
r }  z*| jd dstd| jd kstW 5 d } ~ X Y nX dstdd S )	Nz	Bad stuffr   zMetadata inference failed.
RuntimeErrorFzshould have erroredZmyfuncz'Metadata inference failed in `myfunc`.
)r   r   	Exceptionargs
startswithr(   )er+   r+   r,   test_raise_on_meta_errori  s    $
$r   c            	      C   s  t dddgdddgdddgdd	d
gt dddgt jdd	d
gtjdd} | jd d }t| || ksnt| j	}t||j	|kst| j
}| j}t||j
ddd|kstt||jddd|kstt||jddd|ksttt}t||j
ddd W 5 Q R X t|jdks.t|dddddddg }| ddddg }tt}t||dd W 5 Q R X d}t|j|ksttt }t| jt jg dddd W 5 Q R X t|jdkstd S )Nr   r   zTFr   g      @g      @r   r   r   )r0   r1   r2   r   r   fr   r7   )Znumeric_equalr5   zMetadata mismatch found.

Partition type: `pandas.core.series.Series`
+----------+---------+
|          | dtype   |
+----------+---------+
| Found    | int64   |
| Expected | float64 |
+----------+---------+rO   )r0   r   r0   r1   r2   r   r   Zfrom_delayed)funcnamea@  Metadata mismatch found in `from_delayed`.

Partition type: `pandas.core.frame.DataFrame`
+--------+----------+----------+
| Column | Found    | Expected |
+--------+----------+----------+
| 'a'    | object   | category |
| 'c'    | -        | float64  |
| 'e'    | category | -        |
+--------+----------+----------+stringzMetadata mismatch found.

Partition type: `pandas.core.series.Series`
+----------+--------+
|          | dtype  |
+----------+--------+
| Found    | object |
| Expected | string |
+----------+--------+)r%   r&   r   re   rj   r   r   r	   r(   r   r   r   Zastypero   rp   
ValueErrorstrvaluer0   )	r)   rr   r   r   r   errZmeta2r   r   r+   r+   r,   test_check_meta}  sF    
 
$r   c               	   C   s   t jdddgd} t jdddgd}tjtdd t|| sBtW 5 Q R X t jddddgd}tjtdd t|| s~tW 5 Q R X t jddgd}tjtd	d t|| stW 5 Q R X d S )
Nr0   r1   r2   r   zOrder of columns does not matchmatchr   zMissing: \['d'\]zExtra:   \['c'\])r%   r&   ro   rp   r   r   r(   )r)   rr   r+   r+   r,   5test_check_matching_columns_raises_appropriate_errors  s    r   c               	   C   sn   t dg i} tj| dd}t| |  tt}t||  W 5 Q R X dt|j	ksXt
dt|j	ksjt
d S )Nr   r   r4   daskpandas)r%   r&   r`   ra   r	   ro   rp   r   r   r   r(   )r)   rt   infor+   r+   r,   test_check_meta_typename  s    
r   frame_value_countsTFc                 C   s  |r| j tjddd dd tdddd	gi}tj|dd
}t|sJtt|sVtt|jrdtt|jrrtt|jrtt|jrtttjrtt	|rtt	|rtt	|jstt	|jstt	|jrtt	|jrtt	tj
rtt|rtt|rtt|jr&tt|jr6tt|jsFtt|jsVtttjrftG dd d}| }d |_d |_t|stG dd d}| }d |_d |_t	|stG dd d}| }d |_d |_t|std S )NZvalue_countsc                 S   s   d S rW   r+   )r   r+   r+   r,   rX     rY   z(test_is_dataframe_like.<locals>.<lambda>F)Zraisingr   r   r   r   r4   c                   @   s   e Zd ZejZdS )z0test_is_dataframe_like.<locals>.DataFrameWrapperN)rE   rF   rG   r%   r&   	__class__r+   r+   r+   r,   DataFrameWrapper  s   r   c                   @   s   e Zd ZejZdS )z-test_is_dataframe_like.<locals>.SeriesWrapperN)rE   rF   rG   r%   re   r   r+   r+   r+   r,   SeriesWrapper  s   r   c                   @   s   e Zd ZejZdS )z,test_is_dataframe_like.<locals>.IndexWrapperN)rE   rF   rG   r%   rf   r   r+   r+   r+   r,   IndexWrapper  s   r   )setattrr%   r&   r`   ra   r
   r(   r   r$   r   re   r   rf   rZ   rd   r_   rL   )Zmonkeypatchr   r)   rt   r   wrapr   r   r+   r+   r,   test_is_dataframe_like  sP    r   c               	   C   sr   dd } t jddgdgd}tjtdd t| |d	 W 5 Q R X tjttd
d t| |d	 W 5 Q R X d S )Nc                   S   s   t jdddgdgdS )Nrz   r{   r|   r   rd   r$   )r%   r&   r+   r+   r+   r,   func"  s    z,test_apply_and_enforce_message.<locals>.funcrz   r}   r   r   zExtra: *['B', 'C']r   )Z_funcrb   zMissing: ['D'])r%   r&   ro   rp   r   r   reescape)r   rr   r+   r+   r,   test_apply_and_enforce_message!  s    r   c               	   C   sD   t t jddgdd} tjdd}t|  W 5 Q R X |r@td S )Nr   r   ZSparser   T)record)r%   re   arraywarningscatch_warningsr   r(   )serr   r+   r+   r,   test_nonempty_series_sparse-  s    r   z%Float64 was introduced in pandas>=1.2)reasonc                  C   s(   t jg dd} t| }|jdks$td S )NZFloat64r   )r%   re   r   r_   r(   )r   Z	non_emptyr+   r+   r,   #test_nonempty_series_nullable_float4  s    r   c               	   C   sF  t tdddtjdd} | d}t| | tt	 t| |dd W 5 Q R X |j
dd	}t| |dd
 tt	 t| | W 5 Q R X tt	 t| |ddd W 5 Q R X tj| dd}|dg}t| | tt	 t| |dd W 5 Q R X |j
dd	}t| |dd
 tt	 t| |ddd W 5 Q R X d S )Nr   r   r   )rz   r{   r{   F)sort_resultsT)Zdrop)check_index)r   r   r   r4   )r%   r&   rj   ZlinspacerandomZsort_valuesr   ro   rp   r(   Zreset_indexr`   ra   )r)   Zdf_sZdf_srrt   Zddf_sZddf_srr+   r+   r,   test_assert_eq_sorts;  s(    "


r   c               	      s   d  fdd} t jt jd fdd}t ddd	d
dgi}tj|d	d}|j||d}tjtdd t|| W 5 Q R X t||| d t	j
j| d t||d d W 5 Q R X d S )NFc                     s   zd t | |W S d X d S )NFTr   )r   kwargsZusing_custom_schedulerr+   r,   custom_schedulerX  s    z2test_assert_eq_scheduler.<locals>.custom_scheduler)partreturnc                    s    st d| d S )Nnot using custom schedulerr   )r(   )r   r   r+   r,   check_custom_scheduler`  s    z8test_assert_eq_scheduler.<locals>.check_custom_schedulerr   r   r   r   r   r4   )rr   r   r   )Z	scheduler)r%   r&   r`   ra   Zmap_partitionsro   rp   r(   r   r   configset)r   r   r)   rt   Zddf2r+   r   r,   test_assert_eq_schedulerU  s    r   r3   r   r   r4   c                 C   s(   t | tjkstt| tjks$td S rW   )r   r%   re   r(   r   r&   r3   r+   r+   r,   test_meta_constructor_utilitiesq  s    r   c              	   C   sH   t jtdd t|  W 5 Q R X t jtdd t|  W 5 Q R X d S )Nznot supported by meta_seriesr   znot supported by meta_frame)ro   rp   rq   r   r   r   r+   r+   r,   %test_meta_constructor_utilities_raise  s    r   )Ar   r   typingr   Znumpyrj   r   r%   ro   r   Zdask.dataframeZ	dataframer`   Zdask.dataframe._compatr   Zdask.dataframe.corer   Zdask.dataframe.utilsr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   Z
dask.localr   r-   rv   r   r   r   r   r   r   r   r   r   r   markZparametrizer   r   r   Zskipifr   r   r   r&   re   rf   	from_dictr   r$   r   r^   r   r   r+   r+   r+   r,   <module>   sd   @ 'LL
<





