U
    f/e)                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddl	Z	ddl
Zddl
mZ ddlmZ dd Ze	jjd	d
 Zdd Ze	jjdd ZdS )zd
Tests multithreading behaviour for reading and
parsing files for each parser defined in parsers.py
    )	ExitStack)BytesIO)
ThreadPoolN)	DataFramec                 C   s\   t tj| dtdd}d|d< d|d< d|d< tjd| dd	|d
< tj| dd|d< |S )z
    Construct a DataFrame for testing.

    Parameters
    ----------
    num_rows : int
        The number of rows for our DataFrame.

    Returns
    -------
    df : DataFrame
       Zabcde)columnsZfoobarZbazz20000101 09:00:00s)ZperiodsfreqdateZint64)Zdtypeint)r   nprandomZrandlistpdZ
date_rangeZarange)num_rowsdf r   L/tmp/pip-unpacked-wheel-tiezk1ph/pandas/tests/io/parser/test_multi_thread.py_construct_dataframe   s    r   c           	   	      s   | }d d} fddt |D }t Rfdd|D }td}||j|}|d }|D ]}t|| qdW 5 Q R X d S )Ni'  d   c                    s(   g | ] }d  dd t D  qS )
c                 s   s(   | ] }|d d|d d|d V  qdS )d,Nr   .0ir   r   r   	<genexpr>/   s     zBtest_multi_thread_string_io_read_csv.<locals>.<listcomp>.<genexpr>)joinrangeencode)r   _)max_row_ranger   r   
<listcomp>.   s   z8test_multi_thread_string_io_read_csv.<locals>.<listcomp>c                    s   g | ]}  t|qS r   )enter_contextr   )r   b)stackr   r   r#   5   s        r   )r   r   r$   r   mapread_csvtmassert_frame_equal)	all_parsersparserZ	num_filesZbytes_to_dffilespoolresultsZfirst_resultresultr   )r"   r&   r   $test_multi_thread_string_io_read_csv'   s    
r2   c              	      sx   fdd} fddt D }td}|||}W 5 Q R X |d j}|dd D ]
}	||	_q^t|}
|
S )	a  
    Generate a DataFrame via multi-thread.

    Parameters
    ----------
    parser : BaseParser
        The parser object to use for reading the data.
    path : str
        The location of the CSV file to read.
    num_rows : int
        The number of rows to read per task.
    num_tasks : int
        The number of tasks to use for reading this DataFrame.

    Returns
    -------
    df : DataFrame
    c                    sB   | \}}|s" j dd|dgdS  j ddt|d |dgdS )aj  
        Create a reader for part of the CSV.

        Parameters
        ----------
        arg : tuple
            A tuple of the following:

            * start : int
                The starting row to start for parsing CSV
            * nrows : int
                The number of rows to read.

        Returns
        -------
        df : DataFrame
        r   r   )	index_colheadernrowsparse_datesN   	   )r3   r4   Zskiprowsr5   r6   )r)   r   )argstartr5   )r-   pathr   r   readerT   s"        
z0_generate_multi_thread_dataframe.<locals>.readerc                    s    g | ]} |    fqS r   r   r   )r   	num_tasksr   r   r#   v   s    z4_generate_multi_thread_dataframe.<locals>.<listcomp>)Z	processesr   r7   N)r   r   r(   r   r   concat)r-   r;   r   r=   r<   Ztasksr/   r0   r4   rfinal_dataframer   )r   r=   r-   r;   r    _generate_multi_thread_dataframe@   s    "

rA   c              	   C   sV   d}d}| }d}t |}t|*}|| t||||}t|| W 5 Q R X d S )N   i z__thread_pool_reader__.csv)r   r*   Zensure_cleanZto_csvrA   r+   )r,   r=   r   r-   	file_namer   r;   r@   r   r   r   )test_multi_thread_path_multipart_read_csv   s    
   rD   )__doc__
contextlibr   ior   Zmultiprocessing.poolr   Znumpyr   ZpytestZpandasr   r   Zpandas._testingZ_testingr*   r   markZslowr2   rA   rD   r   r   r   r   <module>   s   
F