U
    /e                     @   s   d dl Z d dlmZmZ d dlmZ d dlZd dlZd dl	m
Z
 d dlmZ zd dlmZ W n ek
rt   dZY nX dd Zdd	 ZdddZdd Zdd Zdd ZdddZeG dd deZdS )    N)Protocolruntime_checkable)uuid4)LocalFileSystem)parsec                 C   s   | ot | tpt| S )z'Check if an fsspec file-system is local)
isinstancer   _is_local_fs_pyarrowfs r   ;/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/io/utils.py_is_local_fs   s    
r   c                 C   s0   | r,t | drt| jS t | dr,| jdkS dS )z-Check if a pyarrow-based file-system is localr
   	type_namelocalF)hasattrr   r
   r   r	   r   r   r   r      s    



r   Fc              	   C   s0  |rddl m} |j}ndd }| jdk	o2d| jk}|r|t| jd d}dd	 |d
g D }dd	 |d
g D }ni }i }	tt| D ]}
| |
 }|j	|kr|j	|krt
jg ddj||j	 j}n
||j	 }n*z||j}W n tk
r   Y qY nX ||	|j	< q|r,|D ]}d|	|< q|	S )z4Convert a pyarrow.Schema object to pandas dtype dictr   )PYARROW_NULLABLE_DTYPE_MAPPINGc                 S   s   |   S )N)Zto_pandas_dtype)tr   r   r   <lambda>,       z%_get_pyarrow_dtypes.<locals>.<lambda>Ns   pandasutf8c              	   S   s&   i | ]}| d | dd|d qS )
field_namenameNZ
numpy_typeget.0cr   r   r   
<dictcomp>2   s    z'_get_pyarrow_dtypes.<locals>.<dictcomp>columnsc              	   S   sB   i | ]:}|d  dkr|d r| d| dd|d  ddqS )Zpandas_type)datetimeZ
datetimetzmetadatar   r   Ntimezoner   r   r   r   r   r   6   s      zM8[ns]dtypecategory)Zdask.dataframe.io.parquet.arrowr   r   r    jsonloadsdecoderangelenr   pdSeriesdtZtz_localizer#   typeNotImplementedError)Zschema
categoriesZuse_nullable_dtypesr   Ztype_mapperZhas_pandas_metadataZpandas_metadataZpandas_metadata_dtypestzZdtypesifieldZnumpy_dtypecatr   r   r   _get_pyarrow_dtypes%   s>    




r4   c                    s   fdd| D   fdd|p"g D }t |dkr:d}n<t |dkrf|d }|d dkrv|d |_ntjj||d	}tj |d
}|r||j_|S )aN  Get the final metadata for the dask.dataframe

    Parameters
    ----------
    to_read_columns : list
        All the columns to end up with, including index names
    file_dtypes : dict
        Mapping from column name to dtype for every element
        of ``to_read_columns``
    index_cols : list
        Subset of ``to_read_columns`` that should move to the
        index
    column_index_names : list
        The values for df.columns.name for a MultiIndex in the
        columns, or df.index.name for a regular Index in the columns

    Returns
    -------
    meta : DataFrame
    c              	      s$   i | ]}|t jg  |d dqS )Zint64r"   )r*   r+   r   r   )file_dtypesr   r   r   p   s     z%_meta_from_dtypes.<locals>.<dictcomp>c                    s   g | ]}  |qS r   )popr   )datar   r   
<listcomp>s   s     z%_meta_from_dtypes.<locals>.<listcomp>r   N   Z__index_level_0__)names)index)r)   r   r*   Z
MultiIndexZfrom_arraysZ	DataFramer   r:   )Zto_read_columnsr5   Z
index_colsZcolumn_index_namesZindexesr;   Zdfr   )r7   r5   r   _meta_from_dtypes[   s    
r<   c                   C   s   t  jS )z0Simple utility function to get random hex string)r   hexr   r   r   r   _guid   s    r>   c                 C   s   |dkr| S | | S )z5Helper function to place an object on a context stackN)enter_context)objstackr   r   r   _set_context   s    rB   c           	         s   dk	r fdd| D S |p$i   }|dd}|dkrĈdk	rtsttjtdkrĈ| ddpdgt|  }dd	}|d	krtd
| d fddt	| |D S dk	r fdd| D S  fdd| D S )a  Return a list of open-file objects given
    a list of input-file paths.

    WARNING: This utility is experimental, and is meant
    for internal ``dask.dataframe`` use only.

    Parameters
    ----------
    paths : list(str)
        Remote or local path of the parquet file
    fs : fsspec object, optional
        File-system instance to use for file handling
    context_stack : contextlib.ExitStack, Optional
        Context manager to use for open files.
    open_file_func : callable, optional
        Callable function to use for file opening. If this argument
        is specified, ``open_file_func(path, **kwargs)`` will be used
        to open each file in ``paths``. Default is ``fs.open``.
    precache_options : dict, optional
        Dictionary of key-word arguments to use for precaching.
        If ``precache_options`` contains ``{"method": "parquet"}``,
        ``fsspec.parquet.open_parquet_file`` will be used for remote
        storage.
    **kwargs :
        Key-word arguments to pass to the appropriate open function
    Nc                    s   g | ]}t |f qS r   )rB   r   path)context_stackkwargsopen_file_funcr   r   r8      s   z%_open_input_files.<locals>.<listcomp>methodparquetz	2021.11.0
row_groups
cache_typepartsz<'parts' `cache_type` required for 'parquet' precaching, got .c                    s.   g | ]&\}}t tj|f|d  qS ))r
   rJ   )rB   fsspec_parquetZopen_parquet_file)r   rD   ZrgsrE   r
   rF   r   r   r8      s   
c                    s    g | ]}t j|f qS r   rB   openrC   rO   r   r   r8      s     c                    s   g | ]}t t|f qS r   rP   rC   )rE   rF   r   r   r8      s     )
copyr6   r   parse_versionfsspec__version__updater)   
ValueErrorzip)	pathsr
   rE   rG   Zprecache_optionsrF   ZprecacherJ   rK   r   )rE   r
   rF   rG   r   _open_input_files   s6    #	


rZ   c                   @   s,   e Zd ZdZedd Zdd Zdd ZdS )	DataFrameIOFunctionzkDataFrame IO function with projectable columns

    Enables column projection in ``DataFrameIOLayer``.
    c                 C   s   t dS )z$Return the current column projectionNr.   )selfr   r   r   r      s    zDataFrameIOFunction.columnsc                 C   s   t dS )zUReturn a new DataFrameIOFunction object
        with a new column projection
        Nr\   )r]   r   r   r   r   project_columns   s    z#DataFrameIOFunction.project_columnsc                 O   s   t dS )z Return a new DataFrame partitionNr\   )r]   argsrF   r   r   r   __call__   s    zDataFrameIOFunction.__call__N)__name__
__module____qualname____doc__propertyr   r^   r`   r   r   r   r   r[      s
   
r[   )F)NNNN)r%   typingr   r   uuidr   rT   Zpandasr*   Zfsspec.implementations.localr   Zpackaging.versionr   rS   Zfsspec.parquetrI   rN   ImportErrorr   r   r4   r<   r>   rB   rZ   r[   r   r   r   r   <module>   s.   
	
6*	    
N