U
    /e[                  
   @  s  U d dl mZ d dlZd dlmZ d dlZd dlZd dl	m
Z
mZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZmZmZmZ d d
l m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4m5Z5m6Z6m7Z7m8Z8 d dl9m:Z:m;Z; d dl<m=Z=m>Z>m?Z? G dd deZ@edde@ddZAe2BejCdd ZDe2BejEe2BejFe2BejGe2BejHdd ZDe)BejejfdYddZDe)BejdZddZDejejejejIfZJdeKd< zd dlLmMZN eJeNjOf7 ZJW n ePk
r&   Y nX e-Bejfdd ZQe,BejRe)BejRd[ddZSe*BeJd\d d!ZTe,BeUd"d# ZVe,Bejd$d% ZWe,Bejd&d' ZXe,Bejd]d(d)ZYe+Bed*d+ ZZe+Bej[d,d- Z\e/Bejejejej]fd^d/d0Z^e$Bejd1d2 Z_e$Bejd3d4 Z`e$Bejd5d6 Zae$Bed7d8 Zbe$BeUd9d: Zce'Bejejejfd_d=d>ZdG d?d@ d@e:eeZfe%Bejejejfd`dAdBZge#BejejejfdadDdEZhe!BejejejfdbdFdGZie.Bejejej]fdHdI Zje(BejejejkjljmejCfdJdK Zne&BejejfdLdM ZoeBejejfdcdOdPZpG dQdR dRe@ZqeArdeq  e#sdSe'sdSe%sdSe$sdSe,sdSe)sdSe*sdSesdSdTdU Zte+sdVdWdX ZudS )d    )annotationsN)Iterable)is_categorical_dtypeis_datetime64tz_dtypeis_interval_dtypeis_period_dtype	is_scalar	is_sparseunion_categoricals)Array)percentile_lookup_percentile)CreationDispatchDaskBackendEntrypoint)	DataFrameIndexScalarSeries_Frame)categorical_dtype_dispatchconcatconcat_dispatchget_parallel_typegroup_split_dispatchgrouper_dispatchhash_object_dispatchis_categorical_dtype_dispatchmake_meta_dispatchmake_meta_objmeta_lib_from_arraymeta_nonemptypyarrow_schema_dispatchtolist_dispatchunion_categoricals_dispatch)make_array_nonemptymake_scalar)_empty_series_nonempty_scalar_scalar_from_dtypeis_float_na_dtypeis_integer_na_dtype)SimpleSizeofsizeof)is_arraylikeis_series_liketypenamec                   @  s   e Zd ZdZedddddZeddd	d
ZeddddZeddddZeddddZ	edddddZ
dS )DataFrameBackendEntrypointzoDask-DataFrame version of ``DaskBackendEntrypoint``

    See Also
    --------
    PandasBackendEntrypoint
    dictint)datanpartitionsc                K  s   t dS )a  Create a DataFrame collection from a dictionary

        Parameters
        ----------
        data : dict
            Of the form {field : array-like} or {field : dict}.
        npartitions : int
            The desired number of output partitions.
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.io.from_dict
        NNotImplementedError)r4   r5   kwargs r9   ;/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/backends.py	from_dict<   s    z$DataFrameBackendEntrypoint.from_dictz
str | list)pathc                 K  s   t dS )a$  Read Parquet files into a DataFrame collection

        Parameters
        ----------
        path : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.parquet.core.read_parquet
        Nr6   r<   r8   r9   r9   r:   read_parquetO   s    z'DataFrameBackendEntrypoint.read_parquet)url_pathc                 K  s   t dS )a  Read json files into a DataFrame collection

        Parameters
        ----------
        url_path : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.json.read_json
        Nr6   )r?   r8   r9   r9   r:   	read_json`   s    z$DataFrameBackendEntrypoint.read_jsonc                 K  s   t dS )a  Read ORC files into a DataFrame collection

        Parameters
        ----------
        path : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.orc.core.read_orc
        Nr6   r=   r9   r9   r:   read_orcq   s    z#DataFrameBackendEntrypoint.read_orc)urlpathc                 K  s   t dS )a  Read CSV files into a DataFrame collection

        Parameters
        ----------
        urlpath : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.csv.read_csv
        Nr6   )rB   r8   r9   r9   r:   read_csv   s    z#DataFrameBackendEntrypoint.read_csvstr)patternkeyc                 K  s   t dS )aT  Read HDF5 files into a DataFrame collection

        Parameters
        ----------
        pattern : str or list
            Source path(s).
        key : str
            Group identifier in the store.
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.hdf.read_hdf
        Nr6   )rE   rF   r8   r9   r9   r:   read_hdf   s    z#DataFrameBackendEntrypoint.read_hdfN)__name__
__module____qualname____doc__staticmethodr;   r>   r@   rA   rC   rG   r9   r9   r9   r:   r1   4   s   r1   Z	dataframepandasdataframe_creation_dispatch)module_namedefaultZentrypoint_classnamec                 C  s   t | S N)r)   dtyper9   r9   r:   _   s    rU   c                 C  s   | S rR   r9   xr9   r9   r:   rU      s    c                 C  s*   | j d d jdd}|jjdd|_|S Nr   T)deep)iloccopyindex)rW   r\   outr9   r9   r:   rU      s    c                 C  s   | dd j ddS rX   r[   rW   r\   r9   r9   r:   rU      s    ztuple[type, ...]meta_object_typesc                 C  s   dd l }|j| S Nr   )ZpyarrowZSchemaZfrom_pandas)objpar9   r9   r:   get_pyarrow_schema_pandas   s    rd   c                 C  s   t | S rR   )r(   r_   r9   r9   r:   make_meta_pandas_datetime_tz   s    re   c                   sB  t | r| jr| dd S  dk	r*t  t| trTtj fdd|  D  dS t| trt	| dkrt
| d | d  dS t| trt| tstdd	 | D std
|  tj fdd| D dd | D  dS t| ds| dk	rzt| }t|W S  tk
r   Y nX t| r0t| S td|  dS )a  Create an empty pandas object containing the desired metadata.

    Parameters
    ----------
    x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar
        To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or
        an iterable of `(name, dtype)` tuples. To create a `Series`, provide a
        tuple of `(name, dtype)`. If a pandas object, names, dtypes, and index
        should match the desired output. If a dtype or scalar, a scalar of the
        same dtype is returned.
    index :  pd.Index, optional
        Any pandas index to use in the metadata. If none provided, a
        `RangeIndex` will be used.

    Examples
    --------

    >>> make_meta_object([('a', 'i8'), ('b', 'O')])
    Empty DataFrame
    Columns: [a, b]
    Index: []
    >>> make_meta_object(('a', 'f8'))
    Series([], Name: a, dtype: float64)
    >>> make_meta_object('i8')
    1
    Nr   c                   s    i | ]\}}|t || d qS r\   r'   .0cdrg   r9   r:   
<dictcomp>	  s      z$make_meta_object.<locals>.<dictcomp>rg         c                 s  s$   | ]}t |tot|d kV  qdS )rn   N)
isinstancetuplelenrj   ir9   r9   r:   	<genexpr>  s     z#make_meta_object.<locals>.<genexpr>z2Expected iterable of tuples of (name, dtype), got c                   s    i | ]\}}|t || d qS rf   rh   ri   rg   r9   r:   rm     s      c                 S  s   g | ]\}}|qS r9   r9   ri   r9   r9   r:   
<listcomp>  s     z$make_meta_object.<locals>.<listcomp>)columnsr\   rT   z'Don't know how to create metadata from )r.   shaper   rp   r2   pdr   itemsrq   rr   r'   r   rD   all
ValueErrorhasattrnprT   r)   	Exceptionr   r(   	TypeError)rW   r\   rT   r9   rg   r:   make_meta_object   s8    
 


r   c                 C  s*   t | rt| S tdtt|  dS )zCreate a nonempty pandas object from the given metadata.

    Returns a pandas DataFrame, Series, or Index that contains two rows
    of fake data.
    z>Expected Pandas-like Index, Series, DataFrame, or scalar, got N)r   r(   r   r0   typerV   r9   r9   r:   meta_nonempty_object&  s
    r   c                 C  s   t | j}t }t }tt| jD ]N}| jd d |f }|j}||krft| jd d |f |d||< || ||< q$t	j
||tt| jd}| j|_| j|_|S )N)idx)r\   rw   )r!   r\   r2   rangerr   rw   rZ   rT   _nonempty_seriesry   r   r~   Zarangeattrs)rW   r   Z	dt_s_dictr4   rt   Zseriesdtresr9   r9   r:   meta_nonempty_dataframe6  s    
r   c                 C  s  t | }|tjkr"tjd| jdS |  r<|ddg| jdS |tjkr| jtkrdtjddg| jdS tjddg| j| jdS n|tjk rd	}ztj	|d| j
| j| jd
W S  tk
 r   | j
d kr|dgnd }tj||d| j
| j| jd
 Y S X nt|tjkrtjd	d| j
| jdS |tjkrtdd}ztj|d| j
| jdW S  tk
r   tdd}| j
d kr||d gnd }tj||d| j
| jd Y S X n|tjkrt| jdkrtjt| j| jd}ntjjddg| j| jd}tj|| jdS |tjkrndd | jD }dd | jD }ztj||| jdW S  tk
rl   tj||| jd Y S X tdtt |  d S )Nrn   rQ   ro   TFab)rQ   rT   
1970-01-01)startperiodsfreqtzrQ   z
1970-01-02)r   r   r   rQ   Dr   )ordered
categoriesr   c                 S  s   g | ]}t |qS r9   )_nonempty_index)rj   lr9   r9   r:   rv     s     z#_nonempty_index.<locals>.<listcomp>c                 S  s   g | ]}d d gqS )r   r9   rs   r9   r9   r:   rv     s     )levelscodesnames)r   labelsr   z'Don't know how to handle index of type )r   ry   Z
RangeIndexrQ   Z
is_numericr   rT   boolZDatetimeIndexZ
date_ranger   r   r|   ZPeriodIndexZperiod_rangeZTimedeltaIndexr~   Ztimedelta64Ztimedelta_rangeCategoricalIndexrr   r   Categoricalr   r   
from_codes
MultiIndexr   r   r   r0   )r   typr   r4   r   r   r9   r9   r:   r   G  s    


                     r   c                 C  s  |d krt | j}| j}t| dkr8| jd gd }nlt|r\tjd|jd}||g}nHt	|rt| j
jr| j
jd gd }| j
j}nt | j
j}| j
jd d }tj||| j
jd}nt|rtjdd g|d}nt|rtjdd g|d}nt|r |j}td	|td
|g}nt|rHt|j}tj||g|d}n\t|rpt|j}tj||g|d}n4t|tjkrt|}nt|}tj||g|d}tj|| j|d}| j|_|S )Nr   rn   r   )r   r   ro   rS   g      ?2000Z2001)rQ   r\   )r   r\   rT   rr   rZ   r   ry   	Timestampr   r   catr   r   r   r+   arrayr*   r   r   Periodr	   r)   subtyper   r   r%   Z_lookupr~   r   rQ   r   )sr   rT   r4   entryZcatsr   r]   r9   r9   r:   r     sF    







r   c                 C  s
   t | jS rR   )r    _metarV   r9   r9   r:   _meta_lib_from_array_da  s    r   c                 C  s   t S rR   )ry   rV   r9   r9   r:   _meta_lib_from_array_numpy  s    r   Fc                 C  s   t jjj| ||dS )N)sort_categoriesignore_order)ry   apitypesr
   )Zto_unionr   r   r9   r9   r:   union_categoricals_pandas  s
      r   c                 C  s   t S rR   )r   rU   r9   r9   r:   get_parallel_type_series  s    r   c                 C  s   t S rR   )r   r   r9   r9   r:   get_parallel_type_dataframe  s    r   c                 C  s   t S rR   )r   r   r9   r9   r:   get_parallel_type_index  s    r   c                 C  s
   t | jS rR   )r   r   )or9   r9   r:   get_parallel_type_frame  s    r   c                 C  s   t S rR   )r   r   r9   r9   r:   get_parallel_type_object  s    r   Tutf8c                 C  s   t jj| ||||dS )N)r\   encodinghash_key
categorize)ry   utilZhash_pandas_object)rb   r\   r   r   r   r9   r9   r:   hash_object_pandas  s        r   c                      s"   e Zd Zdd fddZ  ZS )ShuffleGroupResultr3   )returnc                   s8   t   }|  D ] \}}|t|7 }|t|7 }q|S )ag  
        The result of the shuffle split are typically small dictionaries
        (#keys << 100; typically <= 32) The splits are often non-uniformly
        distributed. Some of the splits may even be empty. Sampling the
        dictionary for size estimation can cause severe errors.

        See also https://github.com/dask/distributed/issues/4962
        )super
__sizeof__rz   r-   )selfZ
total_sizekdf	__class__r9   r:   r     s
    	
zShuffleGroupResult.__sizeof__)rH   rI   rJ   r   __classcell__r9   r9   r   r:   r     s   r   c                   s|   t |r|j}tjj|jtjdd|\}}| 	| |
 } fddt|d d |dd  D }ttt||S )NFr^   c                   s8   g | ]0\}}r& j || jd dn j || qS T)Zdrop)rZ   reset_index)rj   r   r   Zdf2ignore_indexr9   r:   rv     s   z&group_split_pandas.<locals>.<listcomp>r   ro   )r/   valuesry   Z_libsZalgosZgroupsort_indexerastyper~   ZintpZtakeZcumsumzipr   r   )r   rk   r   r   Zindexer	locationspartsr9   r   r:   group_split_pandas  s     
r   outerc              	     sz  | dd}|dkr,tj f||d|S t d tjrzt d tjrtdt D ]&}t | tjs\ | d |< q\tjt	 |d d j
dS t d tjrd d  dd   }	tfd	d
|	D r
 fddtjD }
tjj|
jdS jftdd
 |	D  }t|}ztjj|jdW S  tk
rb   t| Y S X  d  dd  S  d j}t|tjpt|tjotdd
 |jD }|rdd  D }tdd  D }n }d }|rt|d tjrntdd
 |D r|r,|}|d jdk}nddd |D }t H tdt |rbtdt  tjdd |D fd|i| }W 5 Q R X | r||  jtjfdd|D fd|i|}|j}|j!D ]}|D ] }|"|}|d k	r qqg }|D ]X}||j#kr.|||  n6tj$t|ddd}tj%&||j'j(|j'j)}|| qt	||d||< t|s||_q|j*|jd}nBt 4 tdt |rtdt  tj||dd}W 5 Q R X nt+|d j,r*|d krtdd |D }tj-t	||d||d j
dS t . |rFtdt  tj|fd|i|}W 5 Q R X |d k	rv||_|S )Nr   Fro   )axisjoinr   category)r   r   c                 3  s&   | ]}t |tjo|j jkV  qd S rR   )rp   ry   r   nlevels)rj   r   )firstr9   r:   ru   (  s   z concat_pandas.<locals>.<genexpr>c                   s"   g | ] t  fd dD qS )c                   s   g | ]}|  qS r9   )Z_get_level_valuesrs   nr9   r:   rv   -  s     z,concat_pandas.<locals>.<listcomp>.<listcomp>)r   )rj   )dfsr   r:   rv   ,  s   z!concat_pandas.<locals>.<listcomp>)r   c                 s  s   | ]}|j V  qd S rR   )Z_values)rj   r   r9   r9   r:   ru   2  s     c                 s  s   | ]}t |tjV  qd S rR   )rp   ry   r   rs   r9   r9   r:   ru   ?  s     c                 S  s   g | ]}|j d dqS r   )r   rj   r   r9   r9   r:   rv   C  s     c                 S  s   g | ]
}|j qS r9   rg   r   r9   r9   r:   rv   D  s     c                 s  s   | ]}t |tjV  qd S rR   )rp   ry   r   r   r9   r9   r:   ru   M  s     c                 S  s2   g | ]*}t |tjr|n| j|jd idqS )r   rw   )rp   ry   r   to_framerenamerQ   r   r9   r9   r:   rv   V  s   
ignorec                 S  s   g | ]}|j d k jqS )r   )dtypesr   Tr   r9   r9   r:   rv   b  s     r   c                   s   g | ]}||j   qS r9   )rw   intersectionr   )not_catr9   r:   rv   k  s     r   i8rS   r   )r   sortc                 S  s   g | ]
}|j qS r9   rg   r   r9   r9   r:   rv     s     )r\   rQ   ).popry   r   rp   r   r   r   rr   r   r
   rQ   r   r{   r   Zfrom_arraysr   r   rq   r~   Zconcatenatefrom_tuplesr   appendr\   anyr   r   r   warningscatch_warningssimplefilterRuntimeWarningFutureWarning
differencegetrw   fullr   r   r   r   r   Zreindexr   rT   r   )r   r   r   uniformZfilter_warningr   r8   r   rt   restZarraysZ	to_concatZ
new_tuplesZ
dfs0_indexZhas_categoricalindexZdfs2indZdfs3Zcat_maskr]   Ztemp_indcolr   sampler   r   r4   r9   )r   r   r   r:   concat_pandas  s    

 








  





 
r   c                 C  s   t jjj| |dS )Nr   )ry   r   r   ZCategoricalDtyper   r9   r9   r:   categorical_dtype_pandas  s    r   c                 C  s   |   S rR   )tolistrb   r9   r9   r:   tolist_pandas  s    r   c                 C  s   t jj| S rR   )ry   r   r   r   r   r9   r9   r:   is_categorical_dtype_pandas  s    r   c                 C  s
   t jjjS rR   )ry   coregroupbyZGrouperr   r9   r9   r:   get_grouper_pandas  s    r  linearc                 C  s   t | ||S rR   r   )r   qinterpolationr9   r9   r:   
percentile  s    r  c                   @  s   e Zd ZdZdS )PandasBackendEntrypointzPandas-Backend Entrypoint Class for Dask-DataFrame

    Note that all DataFrame-creation functions are defined
    and registered 'in-place' within the ``dask.dataframe``
    ``io`` module.
    N)rH   rI   rJ   rK   r9   r9   r9   r:   r    s   r  cudfc                  C  s   dd l } d S ra   Z	dask_cudfr	  r9   r9   r:   _register_cudf  s    	r
  cupyc                    sF   z,dd l  dd l} t| j fdd}W n tk
r@   Y nX d S )Nr   c                   s    S rR   r9   rV   r  r9   r:   meta_lib_from_array_cupy  s    z8_register_cupy_to_cudf.<locals>.meta_lib_from_array_cupy)r  r  r    registerndarrayImportError)r  r  r9   r  r:   _register_cupy_to_cudf  s    
r  )N)N)N)N)N)FF)Tr   NT)F)r   r   FTF)NF)r  )v
__future__r   r   typingr   Znumpyr~   rM   ry   Zpandas.api.typesr   r   r   r   r   r	   r
   Zdask.array.corer   Zdask.array.dispatchr   Zdask.array.percentiler   Zdask.backendsr   r   Zdask.dataframe.corer   r   r   r   r   Zdask.dataframe.dispatchr   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   Zdask.dataframe.extensionsr%   r&   Zdask.dataframe.utilsr'   r(   r)   r*   r+   Zdask.sizeofr,   r-   Z
dask.utilsr.   r/   r0   r1   rN   r  rT   rU   r   Z	Timedeltar   ZIntervalr   r`   __annotations__Zscipy.sparsesparsespZspmatrixr  rd   ZDatetimeTZDtypere   r   objectr   r   r   r   r   r  r   r   r   r   r   r   r   r   r   r2   r   r   r   r   r   r   
extensionsZExtensionDtyper   r  r  r  Zregister_backendZregister_lazyr
  r  r9   r9   r9   r:   <module>   s    $
Ds









A




A
,










             


