U
    /e4                     @   s   d dl Z d dlmZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZmZ d dlmZ d dlmZ G dd dZG dd deZG dd deZdd Z dd Z!dd Z"dd Z#dS )    N)defaultdict)datetime)is_bool_dtype)Array)tokenize)methods)PANDAS_GT_130)Seriesnew_dd_object)is_index_likeis_series_likemeta_nonempty)HighLevelGraph)is_arraylikec                   @   s<   e Zd Zdd Zedd Zedd Zdd Zd	d
 ZdS )_IndexerBasec                 C   s
   || _ d S N)obj)selfr    r   ;/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/indexing.py__init__   s    z_IndexerBase.__init__c                 C   s   | j jS r   )r   _namer   r   r   r   r      s    z_IndexerBase._namec                 C   s   t d S r   )NotImplementedErrorr   r   r   r   _meta_indexer   s    z_IndexerBase._meta_indexerc                 C   s$   |dkr| j S | jdd|f S dS )z
        get metadata
        N)r   r   )r   iindexercindexerr   r   r   
_make_meta   s    z_IndexerBase._make_metac                 C   s   t | jt| jfS r   )type__name__r   r   r   r   r   r   __dask_tokenize__(   s    z_IndexerBase.__dask_tokenize__N)	r   
__module____qualname__r   propertyr   r   r   r    r   r   r   r   r      s   

	r   c                   @   s(   e Zd Zedd Zdd Zdd ZdS )_iLocIndexerc                 C   s
   | j jjS r   )r   _metailocr   r   r   r   r   -   s    z_iLocIndexer._meta_indexerc                 C   sx   d}t |tst|t|dkr*td|\}}|td krFt|| jjjs\| 	||S | jj| }| j
|S d S )Nzd'DataFrame.iloc' only supports selecting columns. It must be used like 'df.iloc[:, column_indexer]'.   Too many indexers)
isinstancetupler   len
ValueErrorslicer   columnsZ	is_unique_iloc__getitem__)r   keymsgr   r   Z	col_namesr   r   r   r0   1   s    

z_iLocIndexer.__getitem__c                 C   s0   |t d kst| ||}| jjtj||dS )Nmeta)r-   AssertionErrorr   r   map_partitionsr   r&   r   r   r   r4   r   r   r   r/   K   s    z_iLocIndexer._ilocN)r   r!   r"   r#   r   r0   r/   r   r   r   r   r$   ,   s   
r$   c                   @   sl   e Zd ZdZedd Zdd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd Zdd Zdd Zdd ZdS )_LocIndexerz"Helper class for the .loc accessorc                 C   s
   | j jjS r   )r   r%   locr   r   r   r   r   U   s    z_LocIndexer._meta_indexerc                 C   sR   t |tr>t|| jjkr,d}tjj||d }|d }n|}d }| 	||S )Nr(   r      )
r)   r*   r+   r   ndimpdcoreZindexingZIndexingError_loc)r   r1   r2   r   r   r   r   r   r0   Y   s    

z_LocIndexer.__getitem__c                 C   s&  t |tr| ||S t |tr,| ||S t|rF| || j|S | jjr| 	|}t |t
rn| ||S t|rt|js| |j|S t |tst|r| ||S | ||S nht |ttjfst|rt|jsd}t|nt |t
s t
||}| ||}| jjtj|||dS dS )z%Helper function for the .loc accessorz^Cannot index with list against unknown division. Try setting divisions using ``ddf.set_index``r3   N)r)   r	   _loc_seriesr   
_loc_arraycallabler>   r   known_divisions_maybe_partial_time_stringr-   
_loc_slicer   r   dtype	_loc_listvalueslistr   _loc_elementnpZndarrayKeyErrorr   r6   r   Ztry_loc)r   r   r   r2   r4   r   r   r   r>   j   s@    





   z_LocIndexer._locc                 C   s   t | jjj}t||}|S )z{
        Convert index-indexer for partial time string slicing
        if obj.index is DatetimeIndex / PeriodIndex
        )r   r   r%   indexrC   )r   r   idxr   r   r   rC      s    
z&_LocIndexer._maybe_partial_time_stringc                 C   s6   t |jstd| ||}| jjtj||d|dS )NzuCannot index with non-boolean dask Series. Try passing computed values instead (e.g. ``ddf.loc[iindexer.compute()]``)z
loc-series)tokenr4   )r   rE   rK   r   r   r6   r   r9   r7   r   r   r   r?      s    
    z_LocIndexer._loc_seriesc                 C   s   | d| jj}| ||S )N_)Zto_dask_dataframer   rL   r?   )r   r   r   Ziindexer_seriesr   r   r   r@      s    z_LocIndexer._loc_arrayc                 C   s   dt || j }| |}| ||}t|ri }g }t| }t|D ]:\}	\}
}tj	| j
|
f||f|||	f< |t|d  qJ|t|d d d  tj||| jgd}n&d d g}|df|di}t||}t||||dS )Nloc-%sr   r:   Zdependenciesr4   	divisions)r   r   _get_partitionsr   r+   sorteditems	enumerater   r9   r   appendr   from_collectionsheadr
   )r   r   r   namepartsr4   dskrT   rW   idivindexergraphr   r   r   rF      s     
z_LocIndexer._loc_listc                 C   s   dt || j }| |}|| jjd k s:|| jjd krJtdt| |dftj| j|ft	|||fi}| 
||}tj||| jgd}t|||||gdS )NrP   r   rQ   z"the label [%s] is not in the indexrR   rS   )r   r   rU   rT   rK   strr   r9   r   r-   r   r   rZ   r
   )r   r   r   r\   partr^   r4   rb   r   r   r   rI      s    
 	z_LocIndexer._loc_elementc                 C   s2   t |tst|r t| jj|S t| jj|S d S r   )r)   rH   r   _partitions_of_index_valuesr   rT   _partition_of_index_value)r   keysr   r   r   rU      s    z_LocIndexer._get_partitionsc                 C   s   t | jj|S r   )_coerce_loc_indexr   rT   )r   r1   r   r   r   rh      s    z_LocIndexer._coerce_loc_indexc                 C   s  dt |||  }t|tst|jdks,t|jd k	rD| |j}nd}|jd k	r`| |j}n| jj	d }|jd kr| jj
r|jd kr| jjd nt| jjd |j}n| |j}|jd kr| jj
r|jd kr| jjd nt| jjd |j}n| |j}||kr<|dftj| j|ft|j|j|fi}||g}	n*|dftj| j|ft|jd |fi}td|| D ]N}
|d kr| j||
 f|||
f< n&tj| j||
 ftd d |f|||
f< qntj| j|ftd |j|f|||| f< |jd kr | jjd }nt|| jj| }|jd kr,| jjd }nt|| jj|d  }|f| jj|d |d   |f }	t|	t|d kst| ||}tj||| jgd}t||||	dS )NrP   )Nr:   r   r:   rQ   rR   rS   )r   r)   r-   r5   stepstartrU   stopr   ZnpartitionsrB   rT   minrh   maxr   r9   r   ranger+   r   r   rZ   r
   )r   r   r   r\   rj   rk   istartistopr^   rT   r_   Z	div_startZdiv_stopr4   rb   r   r   r   rD      s|    





"z_LocIndexer._loc_sliceN)r   r!   r"   __doc__r#   r   r0   r>   rC   r?   r@   rF   rI   rU   rh   rD   r   r   r   r   r8   R   s   
(	r8   c                 C   sJ   | d dkrd}t |t| |}t| |}tt| d td|d S )a'  In which partition does this value lie?

    >>> _partition_of_index_value([0, 5, 10], 3)
    0
    >>> _partition_of_index_value([0, 5, 10], 8)
    1
    >>> _partition_of_index_value([0, 5, 10], 100)
    1
    >>> _partition_of_index_value([0, 5, 10], 5)  # left-inclusive divisions
    1
    r   N4Can not use loc on DataFrame without known divisionsr'   r:   )r,   rh   bisectbisect_rightrl   r+   rm   )rT   valr2   r_   r   r   r   rf   8  s    
rf   c                 C   sd   | d dkrd}t |tt}|D ]:}t| |}tt| d td|d }|| | q$|S )aS  Return defaultdict of division and values pairs
    Each key corresponds to the division which values are index values belong
    to the division.

    >>> sorted(_partitions_of_index_values([0, 5, 10], [3]).items())
    [(0, [3])]
    >>> sorted(_partitions_of_index_values([0, 5, 10], [3, 8, 5]).items())
    [(0, [3]), (1, [8, 5])]
    r   Nrr   r'   r:   )	r,   r   rH   rs   rt   rl   r+   rm   rY   )rT   rG   r2   resultsru   r_   r`   r   r   r   re   L  s    
re   c                 C   sJ   | rt | d trt|S | rFt | d tjrFt|| d jS |S )zxTransform values to be comparable against divisions

    This is particularly valuable to use with pandas datetimes
    r   )r)   r   r<   Z	TimestamprJ   Z
datetime64ZastyperE   )rT   or   r   r   rh   b  s
    
rh   c                 C   s   t | stt| tjtjfs"|S tr,i }nddi}t|trt|jt	r^| j
|jdf|}n|j}t|jt	r| j
|jdf|}n|j}t||S t|t	r| j
|df|}| j
|df|}tt||t||S |S )z`
    Convert indexer for partial string selection
    if data has DatetimeIndex/PeriodIndex
    kindr9   leftright)r   r5   r)   r<   ZDatetimeIndexZPeriodIndexr   r-   rj   rc   Z_maybe_cast_slice_boundrk   rl   rm   )rL   ra   Zkind_optionrj   rk   r   r   r   rC   n  s&    


rC   )$rs   collectionsr   r   ZnumpyrJ   Zpandasr<   Zpandas.api.typesr   Zdask.array.corer   Z	dask.baser   Zdask.dataframer   Zdask.dataframe._compatr   Zdask.dataframe.corer	   r
   Zdask.dataframe.utilsr   r   r   Zdask.highlevelgraphr   Z
dask.utilsr   r   r$   r8   rf   re   rh   rC   r   r   r   r   <module>   s*   & g