U
    f/emA                     @  sf  d Z ddlmZ ddlZddlZddlmZmZ ddlm	Z	 ddl
mZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZmZmZm Z  dddddZ!d2dddddddddZ"G dd dZ#G dd de#Z$G d d! d!e#Z%eej&d" d#d3d&d'dd(d)dd*d+d,d-d.Z'eej&d" d#d4dddd/d0d1Z(dS )5z parquet compat     )annotationsN)AnyAnyStr)catch_warnings)FilePathOrBufferStorageOptions)import_optional_dependencyAbstractMethodError)doc)	DataFrame
MultiIndex
get_option)generic)Version)	IOHandles
get_handleis_fsspec_urlis_urlstringify_pathstrBaseImpl)enginereturnc                 C  s   | dkrt d} | dkr|ttg}d}|D ]D}z| W   S  tk
rj } z|dt| 7 }W 5 d}~X Y q(X q(td| | dkrt S | dkrt S td	dS )
zreturn our implementationautozio.parquet.engine z
 - NzUnable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:pyarrowfastparquetz.engine must be one of 'pyarrow', 'fastparquet')r   PyArrowImplFastParquetImplImportErrorr   
ValueError)r   Zengine_classesZ
error_msgsZengine_classerr r#   5/tmp/pip-unpacked-wheel-tiezk1ph/pandas/io/parquet.py
get_engine%   s$    $
r%   rbFr   r   r   boolz.tuple[FilePathOrBuffer, IOHandles | None, Any])pathfsstorage_optionsmodeis_dirr   c                 C  s   t | }t|r:|dkr:td}|jj|f|p0i \}}n|rVt|rN|dkrVtdd}|s|st|trt	j
|st||d|d}d}|j}|||fS )zFile handling for PyArrow.Nfsspecr&   z8storage_options passed with buffer, or non-supported URLFZis_textr*   )r   r   r   coreZ	url_to_fsr   r!   
isinstancer   osr(   isdirr   handle)r(   r)   r*   r+   r,   path_or_handler-   handlesr#   r#   r$   _get_path_or_handleG   s8    

	   r6   c                   @  s6   e Zd ZeddddZddddZd
dd	ZdS )r   r   )dfc                 C  sx   t | tstdt | jtr>tdd | jjD sRtdn| jjdkrRtdtdd | jj	D }|sttdd S )	Nz+to_parquet only supports IO with DataFramesc                 s  s   | ]}|j d kV  qdS )>   stringemptyN)inferred_type).0xr#   r#   r$   	<genexpr>v   s    z.BaseImpl.validate_dataframe.<locals>.<genexpr>z
                    parquet must have string column names for all values in
                     each level of the MultiIndex
                    >   r8   r9   z%parquet must have string column namesc                 s  s    | ]}|d k	rt |tV  qd S N)r0   r   )r;   namer#   r#   r$   r=      s     z!Index level names must be strings)
r0   r   r!   columnsr   alllevelsr:   indexnames)r7   Zvalid_namesr#   r#   r$   validate_dataframen   s     
zBaseImpl.validate_dataframec                 K  s   t | d S r>   r	   )selfr7   r(   compressionkwargsr#   r#   r$   write   s    zBaseImpl.writeNc                 K  s   t | d S r>   r	   )rF   r(   r@   rH   r#   r#   r$   read   s    zBaseImpl.read)N)__name__
__module____qualname__staticmethodrE   rI   rJ   r#   r#   r#   r$   r   m   s   c                   @  s>   e Zd Zdd Zdddddd	d
dddZdd	dddZdS )r   c                 C  s&   t ddd dd l}dd l}|| _d S )Nr   z(pyarrow is required for parquet support.extrar   )r   Zpyarrow.parquetZpandas.core.arrays._arrow_utilsapi)rF   r   pandasr#   r#   r$   __init__   s     zPyArrowImpl.__init__snappyNr   zFilePathOrBuffer[AnyStr]
str | Nonebool | Noner   list[str] | None)r7   r(   rG   rC   r*   partition_colsc                 K  s   |  | d|dd i}|d k	r*||d< | jjj|f|}	t||dd |d|d k	d\}
}|d< zH|d k	r| jjj|	|
f||d| n| jjj	|	|
fd|i| W 5 |d k	r|  X d S )NZschemaZpreserve_index
filesystemwb)r*   r+   r,   )rG   rX   rG   )
rE   poprQ   ZTableZfrom_pandasr6   closeparquetZwrite_to_datasetZwrite_table)rF   r7   r(   rG   rC   r*   rX   rH   Zfrom_pandas_kwargstabler4   r5   r#   r#   r$   rI      sB    


	 
zPyArrowImpl.writeFr*   c                 K  sH  d|d< i }|rdd l }| j | | j | | j | | j |	 | j
 | | j | | j | | j | | j | | j | i
}|j|d< td}	|	dkrd|d< t||dd |d	d
\}
}|d< zB| jjj|
fd|i|jf |}|	dkr(|jddd}|W S |d k	rB|  X d S )NTZuse_pandas_metadatar   Ztypes_mapperzmode.data_managerarrayZsplit_blocksrY   r&   )r*   r+   r@   F)copy)rR   rQ   Zint8Z	Int8DtypeZint16Z
Int16DtypeZint32Z
Int32DtypeZint64Z
Int64DtypeZuint8Z
UInt8DtypeZuint16ZUInt16DtypeZuint32ZUInt32DtypeZuint64ZUInt64DtypeZbool_ZBooleanDtyper8   ZStringDtypegetr   r6   r[   r\   r]   Z
read_table	to_pandasZ_as_manager)rF   r(   r@   use_nullable_dtypesr*   rH   Zto_pandas_kwargspdmappingmanagerr4   r5   resultr#   r#   r$   rJ      sd              



zPyArrowImpl.read)rT   NNN)NFNrK   rL   rM   rS   rI   rJ   r#   r#   r#   r$   r      s       /   r   c                   @  s6   e Zd Zdd Zdddddd	Zddd
ddZdS )r   c                 C  s   t ddd}|| _d S )Nr   z,fastparquet is required for parquet support.rO   )r   rQ   )rF   r   r#   r#   r$   rS      s
     zFastParquetImpl.__init__rT   Nr   r   )r7   r*   c              	     s   |  | d|kr$|d k	r$tdnd|kr6|d}|d k	rFd|d< t|}t|rrtd  fdd|d< nr~td	td
d$ | jj||f|||d| W 5 Q R X d S )Npartition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning dataZhiveZfile_schemer-   c                   s    j | dfpi   S )NrZ   openr(   _r-   r*   r#   r$   <lambda>#  s
    z'FastParquetImpl.write.<locals>.<lambda>	open_withz?storage_options passed with file object or non-fsspec file pathT)record)rG   Zwrite_indexrj   )	rE   r!   r[   r   r   r   r   rQ   rI   )rF   r7   r(   rG   rC   rX   r*   rH   r#   ro   r$   rI     s8    


zFastParquetImpl.writer_   c           
        s   | dd}|rtdt|}i }d }t|r|td t| jjtdkrh j|dfp\i j	|d< q fdd	|d
< n,t
|trtj|st|ddd}|j}| jj|f|}|jf d|i|}	|d k	r|  |	S )Nrd   FzNThe 'use_nullable_dtypes' argument is not supported for the fastparquet enginer-   z0.6.1r&   r)   c                   s    j | dfpi   S )Nr&   rk   rm   ro   r#   r$   rp   I  s
    z&FastParquetImpl.read.<locals>.<lambda>rq   r.   r@   )r[   r!   r   r   r   r   rQ   __version__rl   r)   r0   r   r1   r(   r2   r   r3   ZParquetFilerc   r\   )
rF   r(   r@   r*   rH   rd   Zparquet_kwargsr5   Zparquet_filerh   r#   ro   r$   rJ   5  s>        zFastParquetImpl.read)rT   NNN)NNri   r#   r#   r#   r$   r      s       3   r   r*   r_   r   rT   r   zFilePathOrBuffer | NonerU   rV   rW   zbytes | None)r7   r(   r   rG   rC   r*   rX   r   c           
      K  sr   t |tr|g}t|}|dkr(t n|}	|j| |	f||||d| |dkrjt |	tjsbt|	 S dS dS )a  
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : str or file-like object, default None
        If a string, it will be used as Root Directory path
        when writing a partitioned dataset. By file-like object,
        we refer to objects with a write() method, such as a file handle
        (e.g. via builtin open function) or io.BytesIO. The engine
        fastparquet does not accept file-like objects. If path is None,
        a bytes object is returned.

        .. versionchanged:: 1.2.0

    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    compression : {{'snappy', 'gzip', 'brotli', None}}, default 'snappy'
        Name of the compression to use. Use ``None`` for no compression.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.
    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.
    {storage_options}

        .. versionadded:: 1.2.0

    kwargs
        Additional keyword arguments passed to the engine

    Returns
    -------
    bytes if no path argument is provided else None
    N)rG   rC   rX   r*   )r0   r   r%   ioBytesIOrI   AssertionErrorgetvalue)
r7   r(   r   rG   rC   r*   rX   rH   implZpath_or_bufr#   r#   r$   
to_parquet]  s&    9

ry   )r   r*   rd   c                 K  s"   t |}|j| f|||d|S )a  
    Load a parquet object from the file path, returning a DataFrame.

    Parameters
    ----------
    path : str, path object or file-like object
        Any valid string path is acceptable. The string could be a URL. Valid
        URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
        expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``

        If you want to pass in a path object, pandas accepts any
        ``os.PathLike``.

        By file-like object, we refer to objects with a ``read()`` method,
        such as a file handle (e.g. via builtin ``open`` function)
        or ``StringIO``.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    columns : list, default=None
        If not None, only these columns will be read from the file.

    {storage_options}

        .. versionadded:: 1.3.0

    use_nullable_dtypes : bool, default False
        If True, use dtypes that use ``pd.NA`` as missing value indicator
        for the resulting DataFrame (only applicable for ``engine="pyarrow"``).
        As new dtypes are added that support ``pd.NA`` in the future, the
        output with this option will change to use those dtypes.
        Note: this is an experimental option, and behaviour (e.g. additional
        support dtypes) may change without notice.

        .. versionadded:: 1.2.0

    **kwargs
        Any additional kwargs are passed to the engine.

    Returns
    -------
    DataFrame
    )r@   r*   rd   )r%   rJ   )r(   r   r@   r*   rd   rH   rx   r#   r#   r$   read_parquet  s    ;rz   )Nr&   F)Nr   rT   NNN)r   NNF))__doc__
__future__r   rt   r1   typingr   r   warningsr   Zpandas._typingr   r   Zpandas.compat._optionalr   Zpandas.errorsr
   Zpandas.util._decoratorsr   rR   r   r   r   Zpandas.corer   Zpandas.util.versionr   Zpandas.io.commonr   r   r   r   r   r%   r6   r   r   r   Z_shared_docsry   rz   r#   r#   r#   r$   <module>   sF   	%   &$ic       O    