U
    /e                     @   sn   d dl Z d dlZd dlmZmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZ dd
dZdd ZdS )    N)OpenFileget_fs_token_paths)infer_compression
read_block)tokenize)delayed)
is_integerparse_bytesF128 MiB10 kiBc              	      s"  t | ttttjfstdt| d|d\}}	t|	dkrHt	d|  |dk	rzt |trbt
|}t|srtdt|}|dkrdggt|	 }
dggt|	 }n8g }
g }|	D ]( dkrt}n }|dk	rtd	d
 }|dkrtdq|dkr|
g  |g  q|| r@||kr@|||  }n|}d}dg}g }|| |d d kr||7 }|t| ||d |d   qR|||d   |rd|d< |d  d8  < |
| || qttg }t|	|
|D ]b\}}t| |fdd|D } fddt|||D }|| q|r
|dkrnd}t |trt
|}t|	d  dp}dkr||}nT||}||}|sʐq|kr||dd   }q|| }q|}W 5 Q R X |r|||	fS ||fS )aU	  Given a path or paths, return delayed objects that read from those paths.

    The path may be a filename like ``'2015-01-01.csv'`` or a globstring
    like ``'2015-*-*.csv'``.

    The path may be preceded by a protocol, like ``s3://`` or ``hdfs://`` if
    those libraries are installed.

    This cleanly breaks data by a delimiter if given, so that block boundaries
    start directly after a delimiter and end on the delimiter.

    Parameters
    ----------
    urlpath : string or list
        Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
        to read from alternative filesystems. To read from multiple files you
        can pass a globstring or a list of paths, with the caveat that they
        must all have the same protocol.
    delimiter : bytes
        An optional delimiter, like ``b'\n'`` on which to split blocks of
        bytes.
    not_zero : bool
        Force seek of start-of-file delimiter, discarding header.
    blocksize : int, str
        Chunk size in bytes, defaults to "128 MiB"
    compression : string or None
        String like 'gzip' or 'xz'.  Must support efficient random access.
    sample : int, string, or boolean
        Whether or not to return a header sample.
        Values can be ``False`` for "no sample requested"
        Or an integer or string value like ``2**20`` or ``"1 MiB"``
    include_path : bool
        Whether or not to include the path with the bytes representing a particular file.
        Default is False.
    **kwargs : dict
        Extra options that make sense to a particular storage connection, e.g.
        host, port, username, password, etc.

    Examples
    --------
    >>> sample, blocks = read_bytes('2015-*-*.csv', delimiter=b'\n')  # doctest: +SKIP
    >>> sample, blocks = read_bytes('s3://bucket/2015-*-*.csv', delimiter=b'\n')  # doctest: +SKIP
    >>> sample, paths, blocks = read_bytes('2015-*-*.csv', include_path=True)  # doctest: +SKIP

    Returns
    -------
    sample : bytes
        The sample header
    blocks : list of lists of ``dask.Delayed``
        Each list corresponds to a file, and each delayed object computes to a
        block of bytes from that file.
    paths : list of strings, only included if include_path is True
        List of same length as blocks, where each item is the path to the file
        represented in the corresponding block.

    z3Path should be a string, os.PathLike, list or tuplerb)modeZstorage_optionsr   z%s resolved to no filesNzblocksize must be an integerZinferzHCannot do chunked reads on compressed files. To read, set blocksize=NonesizezfBacking filesystem couldn't determine file size, cannot do chunked reads. To read, set blocksize=None.      c                    s   g | ]}d | d  qS )zread-block-- ).0o)tokenr   3/tmp/pip-unpacked-wheel-dbjnr7gq/dask/bytes/core.py
<listcomp>   s     zread_bytes.<locals>.<listcomp>c              	      s.   g | ]&\}}}t  d |||dqS )compression)Zdask_key_name)r   )r   r   keyl)r   delayed_read	delimiterfspathr   r   r      s   Tr   r   )
isinstancestrlisttupleosPathLike	TypeErrorr   lenOSErrorr	   r   intr   
ValueErrorinfoappendr   read_block_from_filezipr   Zukeyr   readsplit)Zurlpathr   Znot_zero	blocksizesampler   Zinclude_pathkwargsZfs_tokenpathsoffsetslengthscompr   Z
blocksize1ZplaceofflengthoutoffsetkeysvaluesfZsample_buffnewr   )r   r   r   r    r!   r   r   
read_bytes   s    B













rB   c              
   C   sT   t  | @}|dkr0|d kr0| W  5 Q R  S t||||W  5 Q R  S Q R X d S )Nr   )copyr1   r   )Z	lazy_filer:   bsr   r@   r   r   r   r/      s    r/   )NFr
   r   NF)rC   r&   Zfsspec.corer   r   Zfsspec.utilsr   r   Z	dask.baser   Zdask.delayedr   Z
dask.utilsr   r	   rB   r/   r   r   r   r   <module>   s         
 1