U
    /e2                     @   s  d dl Z d dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZmZmZ d dlmZmZmZ eZeZdZddZd[d	d
Zd\ddZd]ddZdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%d^dd Z&d!d" Z'd#d$ Z(d%d& Z)d'd( Z*d)d* Z+d+d, Z,d-d. Z-d_d/d0Z.d`d1d2Z/dad3d4Z0d5d6 Z1d7d8 Z2d9d: Z3d;d< Z4d=d> Z5dbd?d@Z6dAdB Z7dCdD Z8dEdF Z9dGdH Z:dIdJ Z;dKdL Z<dMdN Z=dOdP Z>dQdR Z?dSdT Z@dUdV ZAee?dWdXZBee?dYdXZCee@dWdXZDee@dYdXZEeeAdWdXZFeeAdYdXZGdS )c    N)partial)	partition)PANDAS_GT_131)	concatconcat_dispatchgroup_split_dispatchhash_object_dispatchis_categorical_dtypeis_categorical_dtype_dispatchtolisttolist_dispatchunion_categoricals)is_dataframe_likeis_index_likeis_series_likec                 C   s$   |dkr| j | S | j ||f S dS )z"
    .loc for known divisions
    N)locdfZiindexercindexer r   :/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/methods.pyr   !   s    
r   c                 C   s   | j d d |f S N)iloc)r   r   r   r   r   r   +   s    r   c                 C   s@   zt | ||W S  tk
r:   | dj dd|f  Y S X dS )z$
    .loc for unknown divisions
    r   N)r   KeyErrorheadr   r   r   r   try_loc/   s    r   Tc           
      C   s   t | jdkr| S tr6|dk	r,tjdtd i }d}n|p<d}d|i}|dkr| jjs|dk	r|rr| | j|k } n| | j|k } |dk	r|r| | j|k } n| | j|k  } | S t| ||| }|s|dk	r|jj|df|}|j	d| }|s|dk	r|jj|df|}	|j	|	d }|S )	aY  Index slice start/stop. Can switch include/exclude boundaries.

    Examples
    --------
    >>> df = pd.DataFrame({'x': [10, 20, 30, 40, 50]}, index=[1, 2, 2, 3, 4])
    >>> boundary_slice(df, 2, None)
        x
    2  20
    2  30
    3  40
    4  50
    >>> boundary_slice(df, 1, 3)
        x
    1  10
    2  20
    2  30
    3  40
    >>> boundary_slice(df, 1, 3, right_boundary=False)
        x
    1  10
    2  20
    2  30

    Empty input DataFrames are returned

    >>> df_empty = pd.DataFrame()
    >>> boundary_slice(df_empty, 1, 3)
    Empty DataFrame
    Columns: []
    Index: []
    r   NzXThe `kind` argument is no longer used/supported. It will be dropped in a future release.)categoryr   kindleftright)
lenindexr   warningswarnFutureWarningis_monotonic_increasinggetattrZget_slice_boundr   )
r   startstopZright_boundaryZleft_boundaryr   Z	kind_optsresultZright_indexZ
left_indexr   r   r   boundary_slice9   s<     r*   c                 C   s   t |  S r   )pdZnotnullsumxr   r   r   index_count   s    r/   c              
   C   s^   z8t jdd" t d | | W  5 Q R  W S Q R X W n  tk
rX   ttj Y S X d S )NT)recordalways)r"   catch_warningssimplefilterZeroDivisionErrornpfloat64nan)snr   r   r   mean_aggregate   s    
 r:   c                 C   s(   t | tjst | tr$tj| |dS | S Nr!   
isinstancer5   Zndarraylistr+   Series)Z	array_varr!   r   r   r   wrap_var_reduction   s    rA   c                 C   s(   t | tjst | tr$tj| |dS | S r;   r=   )Z
array_skewr!   r   r   r   wrap_skew_reduction   s    rB   c                 C   s(   t | tjst | tr$tj| |dS | S r;   r=   )Zarray_kurtosisr!   r   r   r   wrap_kurtosis_reduction   s    rC   c                 C   s   t | |g}|j|dS r;   )r+   r   reindex)Znumeric_varZtimedelta_varcolumnsvarsr   r   r   var_mixed_concat   s    rG   c                 C   sf   t | dkstg }tdd | D t d}|D ] }|D ]}||kr6|| q6q.tj| ddd|S )Nr   c                 s   s   | ]}|j V  qd S r   r<   ).0r.   r   r   r   	<genexpr>   s     z%describe_aggregate.<locals>.<genexpr>)key   F)axissort)r    AssertionErrorsortedappendr+   r   rD   )valuesnamesZvalues_indexesZidxnamesnamer   r   r   describe_aggregate   s    rT   Fc                 C   s@  t | dkst| \}}}}}}	t|r6t| }
nt|}
|rxt|}t|}t|}t|	}	|dd }|rt|}t|	}	|dd }|r|
||gddgd}n|
||||gddd	dgd}d
d t	|j
D |_
t|r
|
t|kr
| }|
|	gdgd}t|||gdd}t|r<||_|S )N   c                 S   s
   t | S r   )r+   to_timedeltar-   r   r   r   <lambda>       z,describe_numeric_aggregate.<locals>.<lambda>c                 S   s
   t | S r   )r+   to_datetimer-   r   r   r   rW      rX   countminr<   meanstdc                 S   s   g | ]}|d  ddqS )d   g%r   )rH   lr   r   r   
<listcomp>   s     z.describe_numeric_aggregate.<locals>.<listcomp>maxF)rM   )r    rN   r   typeZto_framer+   rV   applyrY   r   r!   r   rS   )statsrS   Zis_timedelta_colZis_datetime_colrZ   r\   r]   r[   qrc   typZpart1Zpart3r)   r   r   r   describe_numeric_aggregate   s4    






ri   c                 C   sZ  t | }|dk}|dk}|s$|s$t|r4| \}}}n| \}}}}}	t |dkrddg}
ddg}d }|
d d g |ddg t}tj|
|||d}|S |jd }|jd }ddddg}||g}|r<|j}t	|}|j
d k	r|d k	r||}n
||}tj	||d	}tj	|	|d	}|d
dg |||||g n|||g tj|||dS )N      r   rZ   uniquetopfreq)r!   dtyperS   )tzfirstlast)r!   rS   )r    rN   extendobjectr+   r@   r!   r   rp   Z	TimestamptzinfoZ
tz_convertZtz_localize)rf   rS   Zargs_lenZis_datetime_columnZis_categorical_columnZnuniquerZ   Ztop_freqZmin_tsZmax_tsdatar!   ro   r)   rm   rn   rQ   rp   rq   rr   r   r   r   describe_nonnumeric_aggregate   s@    



rw   c                 C   s   |dkr|S | ||S dS )zApply aggregation function within a cumulative aggregation

    Parameters
    ----------
    aggregate: function (a, a) -> a
        The aggregation function, like add, which is used to and subsequent
        results
    x:
    y:
    Nr   )Z	aggregater.   yr   r   r   _cum_aggregate_apply  s    ry   c                 C   s$   | d kr|S |d kr| S | | S d S r   r   r.   rx   r   r   r   cumsum_aggregate'  s
    r{   c                 C   s$   | d kr|S |d kr| S | | S d S r   r   rz   r   r   r   cumprod_aggregate0  s
    r|   c                 C   sF   t | st| r2| j| |k |  B || jd dS | |k r>| S |S d S NrK   rL   r   r   whereisnullndimrz   r   r   r   cummin_aggregate9  s    "r   c                 C   sF   t | st| r2| j| |k|  B || jd dS | |kr>| S |S d S r}   r   rz   r   r   r   cummax_aggregate@  s    "r   c                 G   sR   t td|}tt|t| j@ }| jt|d} | D ]\}}|| |< q<| S )N   )deep)dictr   boolsetrE   copyitems)r   pairsr   rS   valr   r   r   assignG  s    
r   c                 C   s*   |   }t|s&t|s&tj||d}|S )N)rS   )rl   r   r   r+   r@   )r.   Zseries_nameoutr   r   r   rl   R  s    rl   c                 K   s   | j f ddi| S )Nlevelr   groupbyr,   )r.   rM   	ascendinggroupby_kwargsr   r   r   value_counts_combine[  s    r   c                 K   s<   t | f|}|r(||d k	r|n|  }|r8|j|dS |S )N)r   )r   r,   Zsort_values)r.   rM   r   	normalizetotal_lengthr   r   r   r   r   value_counts_aggregate`  s    r   c                 C   s   | j S r   )nbytesr-   r   r   r   r   k  s    r   c                 C   s   | j S r   )sizer-   r   r   r   r   o  s    r   c                 C   s   | j S r   )rQ   r   r   r   r   rQ   s  s    rQ   c                 C   s,   t j|}t| dkr(| j|||dS | S )Nr   )Zrandom_statefracreplace)r5   randomZRandomStater    sample)r   stater   r   rsr   r   r   r   w  s    r   c                 C   s    | j |dd} | j|| _| S r}   )ZdroprE   astype)r   rE   ro   r   r   r   drop_columns|  s    r   c                 C   s2   | j |d}|r.| jjdd r.td|S )N)methodr   r~   zAll NaN partition encountered in `fillna`. Try using ``df.repartition`` to increase the partition size, or specify `limit` in `fillna`.)Zfillnar   rQ   allany
ValueError)r   r   checkr   r   r   r   fillna_check  s    r   c                 C   s   | j dd S Nr   )r   r   r   r   r   r   	pivot_agg  s    r   c                 C   s   | j dd S r   )r   rq   r   r   r   r   pivot_agg_first  s    r   c                 C   s   | j dd S r   )r   rr   r   r   r   r   pivot_agg_last  s    r   c                 C   s   t j| |||dddS )Nr,   Fr!   rE   rQ   ZaggfuncZdropnar+   pivot_tabler   r!   rE   rQ   r   r   r   	pivot_sum  s         r   c                 C   s   t j| |||dddtjS )NrZ   Fr   )r+   r   r   r5   r6   r   r   r   r   pivot_count  s         r   c                 C   s   t j| |||dddS )Nrq   Fr   r   r   r   r   r   pivot_first  s         r   c                 C   s   t j| |||dddS )Nrr   Fr   r   r   r   r   r   
pivot_last  s         r   c                 C   s   |   } || _| S r   )r   r!   )r   indr   r   r   assign_index  s    r   c                 C   sL   | j rd }n,t| r| n| j}t| ||d |d gg}tj|dddgdS )Nr   	monotonicrq   rr   )rv   rE   )emptyr   r   r&   r+   	DataFrame)r.   proprv   r   r   r   _monotonic_chunk  s
    r   c                 C   sh   | j rd }nHt| ddg   }| d  o:t||}||jd |jd gg}tj|dddgdS )Nrq   rr   r   r   r   )rE   )	r   r+   r@   to_numpyravelr   r&   r   r   )concatenatedr   rv   r8   Zis_monotonicr   r   r   _monotonic_combine  s    r   c                 C   s0   t | ddg   }| d  o.t||S )Nrq   rr   r   )r+   r@   r   r   r   r&   )r   r   r8   r   r   r   _monotonic_aggregate  s    r   r%   )r   Zis_monotonic_decreasing)N)N)N)TTN)NFF)N)TF)TFFN)T)Hr"   	functoolsr   Znumpyr5   Zpandasr+   Ztlzr   Zdask.dataframe._compatr   Zdask.dataframe.dispatchr   r   r   r   r	   r
   r   r   r   Zdask.dataframe.utilsr   r   r   Zhash_dfZgroup_splitr   r   r   r*   r/   r:   rA   rB   rC   rG   rT   ri   rw   ry   r{   r|   r   r   r   rl   r   r   r   r   rQ   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zmonotonic_increasing_chunkZmonotonic_decreasing_chunkZmonotonic_increasing_combineZmonotonic_decreasing_combineZmonotonic_increasing_aggregateZmonotonic_decreasing_aggregater   r   r   r   <module>   s   ,





K	     
*1		
	
       



    