U
    /e                     @   s  d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlZe	dZ
e	dZd dlmZ d dlmZmZ d dlZd dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZmZmZm Z m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z*m+Z+m,Z,m-Z- d dl.m/Z/ dd eD dg Z0dd Z1dd Z2d3 Z4e45ddZ6d3 Z7d3 Z8ddd d!Z9d"d# e9: D Z;d$d%d&d!Z<e
=d'd e>e9D Z?d(Z@d)ZAd*ZBeB5d+d,ZCejDEd-e
jFe9fe
jGe;fe
jHe<fgZIeId.d/ ZJeId0d1 ZKeId2d3 ZLeId4d5 ZMeId6d7 ZNeId8d9 ZOeId:d; ZPejDEd<ejFe
jFe9fejGe
jGe;fgd=d> ZQejDEd<ejFe
jFe9fejGe
jGe;fgd?d@ ZRejDEd<ejFe
jFe9fejGe
jGe;fgdAdB ZSejDEdCejFe
jFe9eBfejGe
jGe;eCfgdDdE ZTdFdGgdFdGggZUdHdIgdHdIggZVejDEdJe
jFeUfe
jGeVfgdKdL ZWejDEdJe
jFeUfe
jGeVfgdMdN ZXejDEdOejFe
jFe4dfejGe
jGe6dfejGe
jGe7dPfgdQdR ZYejDEdSejFe
jFe4dTfejGe
jGe6dUdVgfgdWdX ZZejDEdSejFe
jFe4dTfejGe
jGe6dUdYgfgdZd[ Z[ejDEd<ejFe
jFe9fejGe
jGe;fgd\d] Z\ejDEd<ejFe
jFe9fejGe
jGe;fgd^d_ Z]ejDEd`ejFe9fejGe;fgdadb Z^ejDEd`ejFe9fejGe;fgdcdd Z_ejDEd`ejFe9fejGe;fgdedf Z`ejDEd`ejFe9fejGe;fgdgdh ZaejDEd`ejFe9fejGe;fgdidj Zbdkdl Zcdmdn Zddodp Zedqdr Zfdsdt Zgdudv Zhdwdx Zidydz Zjd{d| ZkejDjlejDEd}d~dgdd Zmdd Zndd Zodd ZpejDEdddgejDEde0dd ZqejDjrdd Zsdd Ztdd Zudd Zvdd Zwdd Zxdd Zydd Zzdd Z{dd Z|dd Z}dd Z~dd Zdd Zdd Zdd Zdd Zdd Zdd ZejDjddZejDEdejdedejdeddgdd Zdd Zdd Zdd ZddÄ Zddń ZddǄ ZddɄ Zdd˄ Zdd̈́ Zddτ Zddф Zddӄ ZddՄ Zddׄ Zddل Zddۄ ZejDjddddބ Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZejDEdddgdd ZejDEdddddddgdddfddgdddfgdd  Zdd ZejDEddUdgdgdddggg g g dUgg dUggd	d edD gd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZejDEdddgdd ZdS (      N)BytesIOStringIO)mockZpandaszdask.dataframe)compr)partition_allvalmap)compute_as_if_collection)
read_bytes)compress)flatten)tm)_infer_block_sizeauto_blocksize
block_maskpandas_read_texttext_blocks_to_pandas)optimize_dataframe_getitem)	assert_eqhas_known_categories)DataFrameIOLayer)filetext	filetextstmpdirtmpfile)	hlg_layerc                 C   s   g | ]}|qS  r   ).0fmtr   r   D/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/io/tests/test_csv.py
<listcomp>#   s     r   c                 C   s   d ttj|  dS )N
)joinmapstrstripsplit)sr   r   r   normalize_text&   s    r'   c                 C   s   t j| d S )N   )ospathr%   )r*   r   r   r   parse_filename*   s    r+   z
name,amount
Alice,100
Bob,-200
Charlie,300
Dennis,400
Edith,-500
Frank,600
Alice,200
Frank,-200
Bob,600
Alice,400
Frank,200
Alice,300
Edith,600
,	z
name   amount
Alice    100
Bob     -200
Charlie  300
Dennis   400
Edith   -500
Frank    600
Alice    200
Frank   -200
Bob      600
Alice    400
Frank    200
Alice    300
Edith    600
a  
Date,Open,High,Low,Close,Volume,Adj Close
2015-08-28,198.50,199.839996,197.919998,199.240005,143298900,199.240005
2015-08-27,197.020004,199.419998,195.210007,199.160004,266244700,199.160004
2015-08-26,192.080002,194.789993,188.369995,194.679993,328058100,194.679993
2015-08-25,195.429993,195.449997,186.919998,187.229996,353966700,187.229996
2015-08-24,197.630005,197.630005,182.399994,189.550003,478672400,189.550003
2015-08-21,201.729996,203.940002,197.520004,197.630005,328271500,197.630005
2015-08-20,206.509995,208.289993,203.899994,204.009995,185865600,204.009995
2015-08-19,209.089996,210.009995,207.350006,208.279999,167316300,208.279999
2015-08-18,210.259995,210.679993,209.699997,209.929993,70043800,209.929993
s3   name,amount,id
Alice,100,1
Bob,200,2
Charlie,300,3
s   name,amount,id
s4   name,amount,id
Dennis,400,4
Edith,500,5
Frank,600,6
)2014-01-01.csv2014-01-02.csv2014-01-03.csvc                 C   s   i | ]\}}|| d dqS )   ,   	)replacer   kvr   r   r   
<dictcomp>i   s      r7   sT       name  amount  id
   Alice     100   1
     Bob     200   2
 Charlie     300   3
s       name  amount  id
sT       name  amount  id
  Dennis     400   4
   Edith     500   5
   Frank     600   6
c                 C   s   g | ]}t tt| qS r   )pdread_csvr   	csv_filesr   r5   r   r   r   r   {   s     sL   # some header lines
# that may be present
# in a data file
# before any datasB   # some footer lines
# that may be present
# at the end of the files   str, int, int
r1   r2   zreader,filesc                 C   sT   |d }t | |di }t|jdddgks.tt|dks>t|j dksPtd S )Nr.       nameamountid      )r   listcolumnsAssertionErrorlenr?   sumreaderfilesbdfr   r   r   test_pandas_read_text   s
    rL   c                 C   s8   |d }t | |ddddgi}t|jddgks4td S )Nr.   r<   usecolsr=   r?   )r   rB   rC   rD   rG   r   r   r   test_pandas_read_text_kwargs   s    rN   c                 C   s0   |d }t | |di ddi}|jjdks,td S )Nr.   r<   r>   float)r   r>   dtyperD   rG   r   r   r   $test_pandas_read_text_dtype_coercion   s    rQ   c                 C   sl   |d }| dd\}}|d }t| ||i }t|jdddgksFtt|dksVt|j dkshtd S )	Nr.      
r(   r=   r>   r?   r@   rA   )r%   r   rB   rC   rD   rE   r?   rF   )rH   rI   rJ   headerrK   r   r   r   !test_pandas_read_text_with_header   s    rT   c                    s    fddt  D }i }t|  d di } d dd d }t| ||||}t|tjsbtt|j	ddd	gksztt| ||||}t|tjstt
|d
stt|jdkstt|j d d S )Nc                    s   g | ]} | gqS r   r   r;   rI   r   r   r      s     z5test_text_blocks_to_pandas_simple.<locals>.<listcomp>r.   r<   rR   r   r=   r>   r?   daskr@   i4  )sortedr   r%   r   
isinstancedd	DataFramerD   rB   rC   hasattrrE   rV   r   r>   rF   )rH   rI   blockskwargsheadrS   rK   valuesr   rU   r   !test_text_blocks_to_pandas_simple   s    r`   c                    s    fddt  D }dd |D }dddgi}t|  d d|} d d	d
 d	 }t| ||||}t|jddgks~t| }|j|jk std S )Nc                    s   g | ]} | qS r   r   r;   rU   r   r   r      s     z5test_text_blocks_to_pandas_kwargs.<locals>.<listcomp>c                 S   s   g | ]
}|gqS r   r   )r   rJ   r   r   r   r      s     rM   r=   r?   r.   r<   rR   r   )	rW   r   r%   r   rB   rC   rD   computeall)rH   rI   r\   r]   r^   rS   rK   resultr   rU   r   !test_text_blocks_to_pandas_kwargs   s    rd   c           	      C   s   |d  dd d }g }t|D ]0}|| }| d}|dd td|D  q"t| ||t i }t| j	ddtj	ddd	d
 tddg }t| ||| dddgi}t| j	dd|j	ddd	d
 d S )Nr.   rR   r   c                 S   s   g | ]}d  |qS rR   )r!   )r   bsr   r   r   r      s     z6test_text_blocks_to_pandas_blocked.<locals>.<listcomp>   TZdropFZcheck_dtyper=   r?   rM   )
r%   rW   appendr   r   expectedr^   r   ra   reset_index)	rH   rI   rS   r\   r5   rJ   linesrK   	expected2r   r   r   "test_text_blocks_to_pandas_blocked   s2    

    

ro   zdd_read,pd_read,filesc              	      sr   dd |  D }tt t|dd> | dd}t fddt|D }t||d	d
 W 5 Q R X d S )Nc                 S   s   i | ]\}}|t d  | qS re   )comment_headerr   r=   contentr   r   r   r7      s      z!test_skiprows.<locals>.<dictcomp>rJ   mode2014-01-*.csvskiprowsc                    s   g | ]} |d qS rv   r   r   npd_readskipr   r   r      s     z!test_skiprows.<locals>.<listcomp>Fri   )	itemsrE   rp   
splitlinesr   r8   concatrW   r   dd_readr|   rI   rK   expected_dfr   r{   r   test_skiprows   s    r   c              	      sd   dd |  D }t|dd< | ddd}t fdd	t|D }t||d
d W 5 Q R X d S )Nc              	   S   s(   i | ] \}}|t d  |d dd qS )rR   s     # just some comment
r(   rp   r3   rq   r   r   r   r7     s    z test_comment.<locals>.<dictcomp>rJ   rs   ru   #commentc                    s   g | ]} |d dqS )r   r   r   ry   r|   r   r   r     s     z test_comment.<locals>.<listcomp>Fri   r~   r   r8   r   rW   r   r   r   r   r   test_comment   s    r   c              	      st   dd |  D }tt t|dd@ | ddd}t fdd	t|D }t||d
d W 5 Q R X d S )Nc                 S   s   i | ]\}}||d  t  qS re   )comment_footerrq   r   r   r   r7     s      z#test_skipfooter.<locals>.<dictcomp>rJ   rs   ru   pythonZ
skipfooterenginec                    s   g | ]} |d dqS )r   r   r   ry   r{   r   r   r     s     z#test_skipfooter.<locals>.<listcomp>Fri   )	r~   rE   r   r   r   r8   r   rW   r   r   r   r{   r   test_skipfooter  s    r   zdd_read,pd_read,files,unitsc              	      sx   fdd|  D }dddddgt|dd	> | d
d}t fddt|D }t||dd W 5 Q R X d S )Nc              	      s,   i | ]$\}}|t d  |d d   d qS )rR   r(   r   rq   )unitsr   r   r7   '  s    z)test_skiprows_as_list.<locals>.<dictcomp>r   r(   rg   r@      rJ   rs   ru   rv   c                    s   g | ]} |d qS rx   r   ry   r{   r   r   r   .  s     z)test_skiprows_as_list.<locals>.<listcomp>Fri   r   )r   r|   rI   r   rK   r   r   )r|   r}   r   r   test_skiprows_as_list  s    
r   s   aa,bb
1,1.0
2,2.0s   10,20
30,40s   aa	bb
1	1.0
2	2.0s   10	20
30	40zreader,blocksc                    sn   | t |d d dd |d d dd d }t| || i }tj|dd}t fdd|D sjtd S )Nr   rS   rR   sync	schedulerc                 3   s"   | ]}|j   j  kV  qd S N)dtypesZto_dict)r   rK   r^   r   r   	<genexpr>E  s     z&test_enforce_dtypes.<locals>.<genexpr>)r   r%   r   rV   ra   rb   rD   )rH   r\   rS   dfsr   r   r   test_enforce_dtypes=  s
    r   c              	   C   s   |d |d d  dd|d d gg}| t|d d dd}|d d dd d }tt* t| |||i dd}tj|d	d
i W 5 Q R X d S )Nr   r(      a   Ar   rR   TZenforcer   r   )	r3   r   r%   pytestraises
ValueErrorr   rV   ra   )rH   r\   r^   rS   r   r   r   r   test_enforce_columnsH  s    (r   zdd_read,pd_read,text,sepz\s+c              	   C   sf   t |T}| |dtj|d}t|jddgks2t|jddjdd}t||||d	 W 5 Q R X d S )
N   )	blocksizelineterminatorsepr=   r>   r   r   Trh   r   )	r   r)   lineseprB   rC   rD   ra   rl   r   )r   r|   textr   fnfrc   r   r   r   test_read_csvZ  s
    	
r   zdd_read,pd_read,text,skip   r(      c              	   C   sB   ddg}t |(}| |||d}t|||||d W 5 Q R X d S )Nr=   r>   rw   names)r   r   r   r|   r   r}   r   r   actualr   r   r   test_read_csv_large_skiprowsk  s    
r      c                 C   s   ddg}t |}tjtdd. | |d||d }t|||||d W 5 Q R X tt, tt | |d||d W 5 Q R X W 5 Q R X W 5 Q R X d S )	Nr=   r>   zsample=blocksizematch   )r   rw   r   r   r   )r   r   warnsUserWarningra   r   r   r   r   r   r   r   .test_read_csv_skiprows_only_in_first_partitiony  s    
r   c              	   C   s\   t |ddF | d}t|tdd d}| |}|t|| }t||dd W 5 Q R X d S )NrJ   rs   ru   Fri   r.   )r   r   rk   r   )r   r|   rI   rK   r   rn   r   r   r   test_read_csv_files  s    r   c              
      sz   t  ddd t d d }t fdd|D }| |}t||dd tt | g  W 5 Q R X W 5 Q R X d S )NrJ   rs   rg   c                    s   g | ]}t  | qS r   )r   r;   rI   r|   r   r   r     s     z,test_read_csv_files_list.<locals>.<listcomp>Fri   )r   rW   r8   r   r   r   r   r   )r   r|   rI   Zsubsetsolresr   r   r   test_read_csv_files_list  s    r   zdd_read,filesc              	   C   s`   t |ddJ | dddtid}|j  }d|ks:td|ksFtd	|ksRtW 5 Q R X d S )
NrJ   rs   ru   Tr*   include_path_column
convertersr.   r/   r0   )r   r+   r*   ra   uniquerD   r   rI   rK   	filenamesr   r   r   !test_read_csv_include_path_column  s    r   c              	   C   s`   t |ddJ | dddtid}|j  }d|ks:td|ksFtd|ksRtW 5 Q R X d S )	NrJ   rs   ru   filenamer   r.   r/   r0   )r   r+   r   ra   r   rD   r   r   r   r   (test_read_csv_include_path_column_as_str  s    r   c              
   C   s>   t |dd( tt | ddd W 5 Q R X W 5 Q R X d S )NrJ   rs   ru   r=   r   )r   r   r   r   )r   rI   r   r   r   5test_read_csv_include_path_column_with_duplicate_name  s    r   c              	   C   sx   t |ddb | ddd}|jjdks*tt|js8t| ddd}| }|jjdks\tt|jsjtW 5 Q R X d S )NrJ   rs   ru   Tr   category)r   r*   rP   rD   r   ra   r   rI   rK   r   rc   r   r   r   3test_read_csv_include_path_column_is_dtype_category  s    r   c              	   C   s   t |ddt | dddd}|jdks*t|jjdks:tt|jsHt| dddd}| }|jjdksntt|js|tW 5 Q R X d S )	NrJ   rs   ru   Z10BT)r   r   r@   r   )r   npartitionsrD   r*   rP   r   ra   r   r   r   r   Ctest_read_csv_include_path_column_with_multiple_partitions_per_file  s    r   c               	   C   s   t t} tj| ddd}|jdd}|jjdks:tt	tj
|j| dd}t|D ]V\}}|t|jd k r|j|j|d  k  st|dkrZ|j|j| k sZtqZt| d}t|| W 5 Q R X d S )	N   r   r>   r   r   rg   r(   r   )r   csv_textrY   r9   	set_indexra   indexr=   rD   r   rZ   rV   Z__dask_keys__	enumeraterE   Z	divisionsrb   r8   r   )r   r   rc   r\   iblockrk   r   r   r   test_read_csv_index  s"    
   r   c               	   C   sJ   t t8} tj| tdd}|}tj| tdd}t|| W 5 Q R X d S )Nr   rv   )r   r   rY   r9   ranger8   r   )r   r   rc   rk   r   r   r   test_read_csv_skiprows_range
  s
    
r   c               	   C   s   t tp} tj| dddgd}|dg }tj| ddgd}|dg }| j|jk s\t| j|jk sttW 5 Q R X d S )Nr   HighLow)r   rM   rM   )	r   
timeseriesrY   r9   r8   ra   r_   rb   rD   )r   rK   Z	df_selectrk   Zexpected_selectr   r   r   test_usecols  s    


r   c               	   C   s`   t tN} tj| dd}tj| dd}|j|jks6ttj| dd}|jdksRtW 5 Q R X d S )NZ30Br   Z30Z64MiBr(   )r   r   rY   r9   r   rD   r   arJ   cr   r   r   test_string_blocksize  s    
r   c               	   C   sT   t d} t| :}tj|ddd}d|jks0t|j  dksFtW 5 Q R X d S )Nzm
    name, amount
    Alice,100
    Bob,-200
    Charlie,300
    Dennis,400
    Edith,-500
    Frank,600
    Tr   )skipinitialspacer   r>   iX  )	r'   r   rY   r9   rC   rD   r>   maxra   r   r   rK   r   r   r   test_skipinitialspace&  s    
r   c               	   C   sB   t d} t| (}tj|dd}|j jtks4tW 5 Q R X d S )Nzp
    name,amount
    Alice,100.5
    Bob,-200.5
    Charlie,300
    Dennis,400
    Edith,-500
    Frank,600
    r   r   )	r'   r   rY   r9   r>   ra   rP   rO   rD   r   r   r   r   test_consistent_dtypes:  s    
r   c               	   C   s`   t d} t d}t| |d8 tjddd}|jjtks>t|j jtksRtW 5 Q R X d S )Nz@
    name,amount
    Alice,100
    Bob,-200
    Charlie,300
    z8
    name,amount
    1,400
    2,-500
    Frank,600
    z	foo.1.csvz	foo.2.csv	foo.*.csv   r   )	r'   r   rY   r9   r=   rP   objectrD   ra   )text1text2rK   r   r   r   test_consistent_dtypes_2L  s    		r   c               	   C   s   t d} t d}t| |dn tjdddidd}|jjdksDtt|jrRt| }|jjdksjtt	|jj
jd	d
ddgkstW 5 Q R X d S )NzJ
    fruit,count
    apple,10
    apple,25
    pear,100
    orange,15
    zO
    fruit,count
    apple,200
    banana,300
    orange,400
    banana,10
    r   r   fruitr   r   )rP   r   ZappleZbananaZorangeZpear)r'   r   rY   r9   r   rP   rD   r   ra   rW   cat
categories)r   r   rK   r   r   r   r   test_categorical_dtypese  s    

r   c               
   C   s  t d} t d}tjjjdddgdd}t| |d tjd	d
d
dd}|jj	j
dks`t|jj	j
dksrttjtjddddddg|jdtjddddddg|jddddddddgd}t|| tjd	|d
dd}|jj	j
dkst|jj	j
dkstt|jj	j|j |jj	jdks.tt|| tjjjdddgdd}tjd	|d
dd}|d j	 |d< |jj	j
dkst|jj	j
dkst|jj	jdkstt|| tjd	tjjjddd}|jj	j
dksttjd	d
d}|jj	j
dkstW 5 Q R X d S )Nz%
    A,B
    a,a
    b,b
    a,a
    z%
    A,B
    a,a
    b,b
    c,c
    r   rJ   r   F)orderedr   r   r   )ABrP   )r   r   r(   rg   r   Tr   )r'   r8   apitypesZCategoricalDtyper   rY   r9   r   r   ZknownrD   r   rZ   ZCategoricalr   r   r   Zassert_index_equalr   Z
as_ordered)r   r   rP   rc   rk   r   r   r   test_categorical_known  sX      


 r   compressionZinfergzipc              
   C   s   t  }ttj|dd}|t  |	  ttj|dd}|t  |	  t
t tjtj|d| d}W 5 Q R X t| ttdd d kstW 5 Q R X d S )	Nza.csv.gzwbzb.csv.gzz*.csv.gzr   r    r(   rg   )r   r   openr)   r*   r!   writer   encodecloser   r   r   rY   r9   rE   ra   r%   rD   )r   tdirr   rK   r   r   r   test_compression_multiple_files  s    "r  c               	   C   sP   t d>} tj| dd}t| dks,tt|jddgksBtW 5 Q R X d S )Nza,br   r   r   rJ   )r   rY   r9   rE   ra   rD   rB   rC   r   rK   r   r   r   test_empty_csv_file  s    
r	  c               	   C   sB   t tdd,} tj| dd}t|jdddgks4tW 5 Q R X d S )NrJ   rs   Fsampler=   r>   r?   )r   r:   rY   r9   rB   rC   rD   r  r   r   r   test_read_csv_no_sample  s    r  c               	   C   sH   t tdd2 tjddd} tjddd}| j|jks:tW 5 Q R X d S )NrJ   rs   ru   Tr   F)r   r:   rY   r9   _namerD   r   rJ   r   r   r   "test_read_csv_sensitive_to_enforce  s    r  r   
   r   c              
      s   | r| t krtd|   ddddd| d | rBtt |  tnt} fdd	| D }t|d
dp | r|rtt	 t
jd  |d}W 5 Q R X nt
jd  |d}t|jddjddtjdddd W 5 Q R X d S )Nz%compress function not provided for %sz.gzz.bz2z.zipz.xz)r   bz2zipxz c                    s   i | ]\}}|  |qS r   r   r4   suffixr   r   r7     s      z-test_read_csv_compression.<locals>.<dictcomp>rJ   rs   ru   r   r   r   Trh   Fri   )r
   r   r}   getr   r:   r~   r   r   r   rY   r9   r   ra   rl   rk   )r   r   files2Zrenamed_filesrK   r   r  r   test_read_csv_compression  s    
r  c                  C   s   t td t} t| dd tt"}tjddd}|j	dksDt
W 5 Q R X t|dks^t
t|d j}d|ksxt
d	|kst
tjd
d}tjddd d}W 5 Q R X |rt
tt* tt tjddd}W 5 Q R X W 5 Q R X W 5 Q R X d S )Nr   rJ   rs   ru   r  r@   r(   r   zblocksize=NoneT)record)r   r   foo)r   r
   r:   r   r   r   r   rY   r9   r   rD   rE   r#   messagewarningscatch_warningsr   NotImplementedError)r  wrK   msgr  r   r   r   test_warn_non_seekable_files  s    r"  c               	   C   sX   d} t | B}tj|ddd}|j  dks4t|j  dksJtW 5 Q R X d S )Nz!a,b
1,2
2,3
3,4
4,5
5,6
6,7r   z
)r   r         )r   rY   r9   rJ   rF   ra   rD   r   r   r   r   r   test_windows_line_terminator  s
    
r%  c               	   C   sL   d} t d| i2 tjdddd}tjddd}t||dd W 5 Q R X d S )	Nzid0,name0,x0,y0
id,name,x,y
1034,Victor,-0.25,0.84
998,Xavier,-0.48,-0.13
999,Zelda,0.00,0.47
980,Alice,0.67,-0.98
989,Zelda,-0.04,0.03
ztest_header_int.csvr(   @   )rS   r   r   FZcheck_index)r   rY   r9   r8   r   )r   rK   rk   r   r   r   test_header_int  s    	r(  c               	   C   s\   t ddddB tjdd d} tddgd	d
gd}t|  jdd| W 5 Q R X d S )Nz1,2r  z3,4)z
.tmp.1.csvz
.tmp.2.csvz
.tmp.3.csvz
.tmp.*.csvr   r(   r@   rg      )r   r(   Trh   )r   rY   r9   r8   rZ   r   ra   rl   )rK   rk   r   r   r   test_header_None.  s    r*  c                   C   s<   t tddtsttdddks&ttdddks8td S )Ni     r@   d   i  rg      )rX   r   intrD   r   r   r   r   test_auto_blocksize5  s    r/  c                    s@   t d}G dd d  fdd}| |d| t s<tdS )zp
    psutil returns a total memory of `None` on some systems
    see https://github.com/dask/dask/pull/7601
    psutilc                   @   s   e Zd ZdZdS )z*test__infer_block_size.<locals>.MockOutputN)__name__
__module____qualname__totalr   r   r   r   
MockOutputB  s   r5  c                      s    S r   r   r   r5  r   r   mock_virtual_memoryE  s    z3test__infer_block_size.<locals>.mock_virtual_memoryvirtual_memoryN)r   importorskipsetattrr   rD   )monkeypatchr0  r7  r   r6  r   test__infer_block_size;  s
    
r<  c                  C   s,   t dd} | tdkstt| ts(td S )Nl    J)r@   g    A)r   r.  rD   rX   r   r   r   r   test_auto_blocksize_max64mbL  s    
r=  c              	   C   s   t d}| j}| }tjtd}| t	j
jjd| t||}ttdd0 td |jsht|jd d |ks~tW 5 Q R X d S )	Nr0  )wrapsr	   rJ   rs   r.   r(   r   )r   r9  r8  r4  	cpu_countr   ZMockr	   r:  rV   Z	dataframeiocsvr   r   r:   rY   r9   calledrD   Z	call_args)r;  r0  Ztotal_memoryr?  Zmock_read_bytesZexpected_block_sizer   r   r   test_auto_blocksize_csvR  s    




rC  c               	   C   sP   ddd} t | 4 tjddd tjddd}|jd	k sBtW 5 Q R X d S )
NzAa,b
0,'abcdefghijklmnopqrstuvwxyz'
1,'abcdefghijklmnopqrstuvwxyz'z.a,b
111111,-11111
222222,-22222
333333,-33333
).overflow1.csv.overflow2.csvrD  4   r
  rE  #   i8)r   rY   r9   r   rb   rD   )rI   rK   r   r   r   test_head_partial_line_fix`  s    
rI  c               
   C   sR   d} zt |  dstW n2 tk
rL } z| t|ks<tW 5 d }~X Y nX d S )Nz.not.a.real.file.csvF)rY   r9   rD   OSErrorr#   r   er   r   r    test_read_csv_raises_on_no_filesr  s    
rM  c               	   C   s   t t} t| }t| }|j|jks.tt|j t	dt|j t	dksVtt
|jt	sfttj| ddgd}|j|jkstW 5 Q R X d S )Nkeyr(   r   )rw   Z	na_values)r   r   rY   r9   r  rD   rW   rV   keysr#   rX   r   r   r   r   $test_read_csv_has_deterministic_name{  s    


(rQ  c               	   C   sZ   t ttdB td} td}t| j tdt|j tdksLtW 5 Q R X d S )N)z
_foo.1.csvz
_foo.2.csvz
_foo.*.csvrN  )	r   r   rY   r9   rW   rV   rP  r#   rD   r  r   r   r   -test_multiple_read_csv_has_deterministic_name  s    

rR  c               	   C   sD   t t2} tj| dd}tj| dd}|j|jks6tW 5 Q R X d S )NZ10kBr   Z20kB)r   r   rY   r9   r  rD   )r   r   rJ   r   r   r   4test_read_csv_has_different_names_based_on_blocksize  s    
rS  c               	   C   s<   t d*} tj| d d}t|jddgks.tW 5 Q R X d S )Nzalice,1
bob,2r   r   r(   )r   rY   r9   rB   rC   rD   r  r   r   r   test_csv_with_integer_names  s    
rT  c               
   C   s  d} t dD ]}| d7 } q| d7 } d}t| }t|}d}tt }tj|ddgd	jd
d W 5 Q R X |	|| st
tt}tj|ddjd
d W 5 Q R X |	|st
d}tt"}tj|dddidjd
d W 5 Q R X t|j|kst
tt&}tj|ddgddidjd
d W 5 Q R X t|j|| ksTt
d}tt*}tj|ddgtttddjd
d W 5 Q R X t|j|kst
tj|dtttdd}t|| W 5 Q R X d S )Nz*numbers,names,more_numbers,integers,dates
  z1,,2,3,2017-10-31 00:00:00
z"1.5,bar,2.5,3,4998-01-01 00:00:00
a\  

-------------------------------------------------------------

The following columns also failed to properly parse as dates:

- dates

This is usually due to an invalid value in that column. To
diagnose and fix it's recommended to drop these columns from the
`parse_dates` keyword, and manually convert them to dates later
using `dd.to_datetime`.aU  Mismatched dtypes found in `pd.read_csv`/`pd.read_table`.

+--------------+---------+----------+
| Column       | Found   | Expected |
+--------------+---------+----------+
| more_numbers | float64 | int64    |
| names        | object  | float64  |
| numbers      | float64 | int64    |
+--------------+---------+----------+

- names
  ValueError(.*)

Usually this is due to dask's dtype inference failing, and
*may* be fixed by specifying dtypes manually by adding:

dtype={'more_numbers': 'float64',
       'names': 'object',
       'numbers': 'float64'}

to the call to `read_csv`/`read_table`.2   dates)r  parse_datesr   r   r
  aa  Mismatched dtypes found in `pd.read_csv`/`pd.read_table`.

+--------------+---------+----------+
| Column       | Found   | Expected |
+--------------+---------+----------+
| more_numbers | float64 | int64    |
| numbers      | float64 | int64    |
+--------------+---------+----------+

Usually this is due to dask's dtype inference failing, and
*may* be fixed by specifying dtypes manually by adding:

dtype={'more_numbers': 'float64',
       'numbers': 'float64'}

to the call to `read_csv`/`read_table`.

Alternatively, provide `assume_missing=True` to interpret
all unspecified integer columns as floats.r   O)r  rP   )r  rX  rP   aQ  Mismatched dtypes found in `pd.read_csv`/`pd.read_table`.

The following columns failed to properly parse as dates:

- dates

This is usually due to an invalid value in that column. To
diagnose and fix it's recommended to drop these columns from the
`parse_dates` keyword, and manually convert them to dates later
using `dd.to_datetime`.)Zmore_numbersr   numbers)r   r   r8   r9   r   r   r   rY   ra   r   rD   r#   valuerO   r   r   )r   _Zdate_msgr   r   r!  rL  r   r   r   r   test_late_dtypes  sb    

$ &   

r]  c               	   C   s  d} t dD ]}| d7 } q| d7 } t| z}t|}tj|ddd}t||dti tj|dddd	id
}t|| tj|ddd d
}t||dti W 5 Q R X d} t dD ]}| d7 } q| d7 } t| 4}t|}tj|dd	dd}|jj	d	kst
W 5 Q R X d S )Nz$numbers,names,more_numbers,integers
rU  z
1,foo,2,3
z1.5,bar,2.5,3
rV  T)r  assume_missingZintegersZint64)r  r^  rP   znumbers,integers
z1,2
z1.5,2
r   )r  rP   r^  )r   r   r8   r9   rY   r   astyperO   rZ  rP   rD   )r   r\  r   r   r   rK   r   r   r   test_assume_missing  s2    


   



r`  c                  C   sh   t tV} ztj| ddd ds$tW n2 tk
rX } zdt|ksHtW 5 d }~X Y nX W 5 Q R X d S )Nr   r=   )r   	index_colFr   )r   r   rY   r9   rD   r   r#   rK  r   r   r   test_index_col5  s    
rb  c               	   C   s   t tr} tj| ddddgdgd}tj| dddgdgddd}t|| tj| dddgdgdd}t|| W 5 Q R X d S )Nr   r)  Datera  rS   rM   rX  i rS   rM   rX  r   )rS   rM   rX  r   r   r8   r9   rY   r   r   r   rK   ddfr   r   r   0test_read_csv_with_datetime_index_partitions_one>  s,    
        
ri  c               	   C   s\   t tJ} tj| ddddgdgd}tj| dddgdgddd}t|| W 5 Q R X d S )Nr   r)  rc  rd  i  re  rf  rg  r   r   r   .test_read_csv_with_datetime_index_partitions_nP  s$    
        rj  z(https://github.com/dask/dask/issues/5787)reasonencodingzutf-16)Zmarksz	utf-16-lez	utf-16-bec           	   	   C   s   t tdd}|d }|d }|d }t ||||d}tdV}|j|| dd	 t j|| d
}tj|| dd}| }tt	|j
|_
t|| W 5 Q R X d S )Nr   r,  r   gffffff
@gZӼ?)r   rJ   r   d.csvF)rl  r   )rl  rU  )rl  r   )r8   Seriesr   rZ   r   to_csvr9   rY   ra   rE   r   r   )	rl  arbrZcrZdrZtest_dfr   r   rm  r   r   r   test_encoding_gh601_  s    	
rs  c               	   C   sh   d dd} t| J}tj|dd}t|tj|dd tj|dd}t|tj|dd W 5 Q R X d S )Nza b c-d
1 2 3
4 5 6 r-   r   )	delimiter)r3   r   rY   r9   r   r8   r   r   r   r   test_read_csv_header_issue_823x  s    
rv  c               	   C   s:   t t(} tj| d d}t|tj| d d W 5 Q R X d S )Nr   )r   r   rY   r9   r   r8   r  r   r   r   test_none_usecols  s    
rw  c               	   C   sp   t d} t| V}tj|ddggd}tj|ddggd}|j|jk sNtt|t|ksbtW 5 Q R X d S )NaX  
    ID,date,time
    10,2003-11-04,180036
    11,2003-11-05,125640
    12,2003-11-01,2519
    13,2003-10-22,142559
    14,2003-10-24,163113
    15,2003-10-20,170133
    16,2003-11-11,160448
    17,2003-11-03,171759
    18,2003-11-07,190928
    19,2003-10-21,84623
    20,2003-10-25,192207
    21,2003-11-13,180156
    22,2003-11-15,131037
    datetime)rX  	r'   r   rY   r9   r8   rC   rb   rD   rE   )Z	pdmc_textr   rh  rK   r   r   r   test_parse_dates_multi_column  s    
r{  c               	   C   sh   t d} t| N}tj|ddd}tj|ddd}|j|jk sFtt|t|ksZtW 5 Q R X d S )NzB
    name###amount
    alice###100
    bob###200
    charlie###300z###r   )r   r   rz  )Zsep_textr   rh  rK   r   r   r   test_read_csv_sep  s    
r|  c               	   C   sJ   dd d } t | dd(}tj|d dddd	gd
djdd W 5 Q R X d S )Ns   0,my
1,data
rU  s	   2,foobarr   rs   r,   r    r   rJ   r   )rS   r   r   r   r   r   r   )r   rY   r9   ra   datar   r   r   r   test_read_csv_slash_r  s    r  c               	   C   s>   d} t | dd$}ttj|tdtj|td W 5 Q R X d S )Ns   a,b
1,2
3,4
5,6r   rs   r   )r   r   r8   r9   rO   rY   r}  r   r   r   test_read_csv_singleton_dtype  s    r  c               	   C   s   t  } t| d }| | dd| |< t| ddF tjdd ddd	d
gd}td}|j|jk	 slt
t|| W 5 Q R X d S )N   names   NamerJ   rs   ru   r(   r=   r>   r?   )rS   rw   r   r.   )r:   copyrW   r3   r   rY   r9   r8   rC   rb   rD   r   )rI   r5   rh  rK   r   r   r   test_robust_column_mismatch  s       
r  c               	   C   s   t  } t| d }| | dd| |< t| ddF td}|jddd	gk sXt	|
 jddd	d
gk svt	W 5 Q R X d S )Nr  r  s   addressrJ   rs   ru   r=   r>   r?   address)r:   r  rW   r3   r   rY   r9   rC   rb   rD   ra   )rI   r5   rh  r   r   r   "test_different_columns_are_allowed  s    
r  c               
   C   s   d} t | N}d}tt tj||d W 5 Q R X ttj||d dtj|d d W 5 Q R X d}||  } t | \}dt| }tt tj||dd W 5 Q R X ttj||d dd	tj|d dd
 W 5 Q R X d S )NzAAAAAA,BBBBB,CCCCC,DDDDD,EEEEE
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15r   r
  )r  rS   r   z# skip
# these
# lines
r@   )r  rw   )r  rS   rw   )rS   rw   )	r   r   r   r   rY   r9   r   r8   rE   )r   r   r  skiptextr   r   r   !test_error_if_sample_is_too_small  s&    
 
r  c               	   C   sP   d} ddg}t | 2}tj||dd}tj||d}t||dd W 5 Q R X d S )	Nz?Alice,100
Bob,-200
Charlie,300
Dennis,400
Edith,-500
Frank,600
r=   r>      )r   r   )r   Fr'  )r   rY   r9   r8   r   )r   r   r   rh  rK   r   r   r   test_read_csv_names_not_none  s    
r  c               
      sr  t ddddgddddgd	} d
D ]F}t| |}t > |j dd ttj	 d
 jdd}t||  W 5 Q R X t r |j ddd}tj
|ddi}|t fddt|D kstttj	 d
 jdd}t||  W 5 Q R X t b tj	 d}|j|dd}| fddt|D ksBtt|
 jdd}t||  W 5 Q R X q$d S )Nr   rJ   r   rm  r(   rg   r@   r)  xyr(   rg   Fr   *Trh   )r   ra   r   r   c                 3   s"   | ]}t j | d V  qdS )z.partNr)   r*   r!   ry   dnr   r   r   !  s    ztest_to_csv.<locals>.<genexpr>
data_*.csvc                    s"   g | ]}t j d | dqS )Zdata_rn  r  ry   r  r   r   r   *  s    ztest_to_csv.<locals>.<listcomp>)r8   rZ   rY   from_pandasr   rp  r9   r)   r*   r!   ra   rl   r   rV   tupler   rD   )rK   r   r   rc   rpathsr   r   r  r   test_to_csv  s.     
  r  c                  C   s  t ddddgddddgd	} t| d}t 4}tt tj	
|d
}|| W 5 Q R X W 5 Q R X t ddddddddddddddddgdddddddddddddd d!d"gd	}t|d"}t B}tj	
|d#}|j|d$d% t| jd&d'}t|| W 5 Q R X t| d}t R}|j|d$d% tj	
|d#}|j|d(d$d) t| jd&d'}t||  W 5 Q R X t|d"}t R}|j|d$d% tj	
|d#}|j|d(d$d) t| jd&d'}t|| W 5 Q R X d S )*Nr   rJ   r   rm  r(   rg   r@   r)  r  zdata_*_*.csvrL  r   ghr   jr5   lmrz   opr   rA   r      	   r     r   r      r+  r  r  Fr   Trh   r   )rt   r   )r8   rZ   rY   r  r   r   r   r   r)   r*   r!   rp  r9   ra   rl   r   )rK   r   r  r   Zdf16rc   r   r   r   &test_to_csv_multiple_files_cornercases1  s^     "r  c               
   C   s   t ddddgddddgd	} d
D ]}t| |}t D}tj|d}|j|ddd t	|
 jdd}t||  W 5 Q R X t T}tj|d}|j|dddd}tj
|dd t	|
 jdd}t||  W 5 Q R X q$d S )Nr   rJ   r   rm  r(   rg   r@   r)  r  r  test.csvFT)r   single_filerh   )r   ra   r  r   r   )r8   rZ   rY   r  r   r)   r*   r!   rp  r9   ra   rl   r   rV   )rK   r   r   r  r   rc   r  r   r   r   test_to_single_csvj  s     r  c               
   C   s   t ddddgddddgd	} t| d}t D}tj|d
}tj	t
dd |j|dd ddd W 5 Q R X W 5 Q R X d S )Nr   rJ   r   rm  r(   rg   r@   r)  r  r  z9name_function is not supported under the single file moder   c                 S   s   | S r   r   )r  r   r   r   <lambda>  r<   z7test_to_single_csv_with_name_function.<locals>.<lambda>FT)Zname_functionr   r  r8   rZ   rY   r  r   r)   r*   r!   r   r   r   rp  rK   r   r  r   r   r   r   %test_to_single_csv_with_name_function}  s     r  c               
   C   s|   t ddddgddddgd	} t| d}t @}tj|d
}tj	t
dd |j|dddd W 5 Q R X W 5 Q R X d S )Nr   rJ   r   rm  r(   rg   r@   r)  r  r  zDheader_first_partition_only cannot be False in the single file mode.r   FT)r   header_first_partition_onlyr  r  r  r   r   r   3test_to_single_csv_with_header_first_partition_only  s        r  c               
   C   s   t ddddgddddgd	} d
D ]b}t| |}t F}tj|d}|j|dddd t j	|ddj
dd}t||  W 5 Q R X q$d S )Nr   rJ   r   rm  r(   rg   r@   r)  r  r  ztest.csv.gzFr   T)r   r   r  r  rh   )r8   rZ   rY   r  r   r)   r*   r!   rp  r9   rl   r   )rK   r   r   r  r   rc   r   r   r   test_to_single_csv_gzip  s     r  z#to_csv does not support compressionc               
   C   s   t jddddgddddgd	d
dddgd} dD ]N}t| |}td0}|j|dd t j|ddd}t||  W 5 Q R X q0d S )Nr   rJ   r   rm  r(   rg   r@   r)  r        ?       @      @      @r   r  rA  r   r  r   )ra  r   )	r8   rZ   rY   r  r   rp  r9   r   Zassert_frame_equal)rK   r   r   r   rc   r   r   r   test_to_csv_gzip  s     

r  c               	   C   s   t jddddgddddgd	d
dddgd} tj| dd}t Z}tjt|d}|	| dt
|ksptt
|s~tttj|d }W 5 Q R X |jj| jjk std S )Nr   rJ   r   rm  r(   rg   r@   r)  r  r  r  r  r  r   r   Zcreatemer  )r8   rZ   rY   r  r   r)   r*   r!   r#   rp  listdirrD   r9   ra   r  r_   rb   )df0rK   dirZdir0rc   r   r   r   test_to_csv_nodir  s     

"r  c               	   C   s   t jddddgddddgd	d
dddgd} tj| dd}t >}t|}|| t|sbt	t
tj|d }W 5 Q R X |jj| jjk st	d S )Nr   rJ   r   rm  r(   rg   r@   r)  r  r  r  r  r  r   r  r  )r8   rZ   rY   r  r   r#   rp  r)   r  rD   r9   r*   r!   ra   r  r_   rb   r  rK   r  rc   r   r   r   test_to_csv_simple  s     

"r  c               	   C   s   t jddddgddddgd	} tj| d
d}t J}t|}|j|dd t|sXt	tj
tj|dd dgd }W 5 Q R X |j| k st	d S )Nr   rJ   r   rm  r  r  r  r  r   rg   r  Fr   r  r  )rS   r   )r8   ro  rY   r  r   r#   rp  r)   r  rD   r9   r*   r!   ra   r  rb   r  r   r   r   test_to_csv_series  s    *r  c               	      s   ddl m dg  fdd} tdddd	gd
dddgd}tj|dd}t H}|j|dd| id  d svtt	t
j|d}t||dd W 5 Q R X d S )Nr   r  Fc                     s   d d< | |S )NTr   r   argsr]   flagmp_getr   r   my_get  s    z$test_to_csv_with_get.<locals>.my_getr   rJ   r   rm  r(   rg   r@   r)  r  r  r   r   compute_kwargsr  r'  )dask.multiprocessingr  r8   rZ   rY   r  r   rp  rD   r9   r)   r*   r!   r   )r  rK   rh  r  rc   r   r  r   test_to_csv_with_get  s     r  c               
      s   ddl m  tddddgddd	d
gd} tj| dd} fdd}t ,}tt	 |j
|d|d W 5 Q R X W 5 Q R X d S )Nr   r  r   rJ   r   rm  r(   rg   r@   r)  r  r  c                     s
    | |S r   r   r  r  r   r   r    s    z:test_to_csv_warns_using_scheduler_argument.<locals>.my_getF)r   r   )r  r  r8   rZ   rY   r  r   r   r   FutureWarningrp  rK   rh  r  r  r   r  r   *test_to_csv_warns_using_scheduler_argument  s     r  c               
      s   ddl m  tddddgddd	d
gd} tj| dd} fdd}t <}tt	o`t
t |j|d|d|id W 5 Q R X W 5 Q R X d S )Nr   r  r   rJ   r   rm  r(   rg   r@   r)  r  r  c                     s
    | |S r   r   r  r  r   r   r    s    z@test_to_csv_errors_using_multiple_scheduler_args.<locals>.my_getFr   )r   r   r  )r  r  r8   rZ   rY   r  r   r   r   r   r   r  rp  r  r   r  r   0test_to_csv_errors_using_multiple_scheduler_args  s        r  c               	      sr   ddl m   fdd} tddddgd	d
ddgd}tj|d
d}t }|j|d| ddd W 5 Q R X d S )Nr   r  c                     s   |d dkst  | |S )Ntest_kwargs_passedfoobar)rD   r  r  r   r   r    s    zBtest_to_csv_keeps_all_non_scheduler_compute_kwargs.<locals>.my_getr   rJ   r   rm  r(   rg   r@   r)  r  r  Fr  )r   r  r  )r  r  r8   rZ   rY   r  r   rp  )r  rK   rh  r  r   r  r   2test_to_csv_keeps_all_non_scheduler_compute_kwargs	  s     r  c                  C   sf   t dtdi} tj| dd}|d}|d ds<t|d d	sNtt	d t	d	 d S )
Nr   r  rg   r  foo*.csvr   foo0.csvr(   zfoo1.csv)
r8   rZ   r   rY   r  rp  endswithrD   r)   remove)rK   rh  r  r   r   r   test_to_csv_paths  s    

r  zheader, expected)Fr  )Tx,y
c              
   C   s   t g g d}tj|dd}t z}|jtj|dd| d tj	tj|drZt
tj|d}t|}| }||kst
W 5 Q R X t| W 5 Q R X d S )	Nr  r(   r  z	fooe*.csvFr   rS   z	fooe1.csvz	fooe0.csv)r8   rZ   rY   r  r   rp  r)   r*   r!   existsrD   r  readliner  )rS   rk   ZdfeZddfer  r   fpliner   r   r   "test_to_csv_header_empty_dataframe&  s    
r  z?header,header_first_partition_only,expected_first,expected_next)FFa,1
d,4
)TFr  r  )FTr  r  )TTr  r  ZaaZbbFzaa,bb
Tr  c              
   C   s   d}t ddddddgddd	d
ddgd}tj||d}t }|jtj|dd| |d tj|d}t	|}	|	
 }
|
|kstW 5 Q R X t| tj|d}t	|}	|	
 }
|
|kstW 5 Q R X t| W 5 Q R X d S )Nrg   r   rJ   r   rm  rL  r   r(   r@   r)  r   rA   r  r  z	fooa*.csvF)r   rS   r  z	fooa0.csvz	fooa1.csv)r8   rZ   rY   r  r   rp  r)   r*   r!   r  r  rD   r  )rS   r  Zexpected_firstZexpected_nextZpartition_countrK   rh  r  r   r  r  r   r   r   test_to_csv_header5  s(    (


r  c               
   C   s   t ddgi} tj| dd}ddh}t L}|jtj|dddd	 tj|d
}t	|d}|
 }W 5 Q R X W 5 Q R X ||kstd S )Nr  r   r(   r  s   0
s   0
r  F)rS   r   r  rb)r8   rZ   rY   r  r   rp  r)   r*   r!   r  readrD   )rK   rh  rk   r  r   r   rawr   r   r   test_to_csv_line_ending]  s    r  block_listsrg   r@   r)  r   rA   c                 C   s   g | ]}t t|qS r   )rB   r   r   r   r   r   r   r   v  s     c                 C   s,   t t| }t|tt t| ks(td S r   )rB   r   rE   r   rD   )r  maskr   r   r   test_block_maskp  s    
r  c                  C   s   t  } tdddgD ]:\}}ttj| t|d d}|| W 5 Q R X qtj	tj| dddt
id	d
dgd }tddgddgddgddgd}|d d|d< t||dd W 5 Q R X d S )Nz0, 1, 2r  z6, 7, 8rn  r   *.csvTr*   r   r   C)r   r   r   r   rA   r(   r   rg   r  z0.csvz2.csv)r   r   r  r*   r   Fr'  )r   r   r  r)   r*   r!   r#   r  rY   r9   r+   ra   r8   rZ   r_  r   )r  r5   rr   filerc   rK   r   r   r   &test_reading_empty_csv_files_with_path~  s&    
r  c                 C   s^   t jt| d}tdddg}|j|dd t|}|	d}t
||d  d S )Nr  r  r)  )r  barFr   r  )r)   r*   r!   r#   r8   rZ   rp  rY   r9   groupbyr   Z	get_groupra   )r   r*   df1Zddf1Zddfsr   r   r   test_read_csv_groupby_get_group  s    

r  c                 C   s~   t jt| d}td}ttt|}tdd t	||D g}|
| td}t|| d}t|| | d S )Nr  abcdefghijklmnopqrstuvwxyzc                 S   s   i | ]\}}||qS r   r   )r   r   r6   r   r   r   r7     s      z1test_csv_getitem_column_order.<locals>.<dictcomp>Zhczzkylaaper(   )r)   r*   r!   r#   rB   r   rE   r8   rZ   r  rp  rY   r9   r^   r   )r   r*   rC   r_   r  Zdf2r   r   r   test_csv_getitem_column_order  s    
r  c               	   C   s   t t} t| }||d dk dg }t| }||d dk dg }t|j|jgd}t|d}t	|t
sttt|jddhkstt|| W 5 Q R X d S )Nr   g     i@r   )rP  zread-csv)r   r   r8   r9   rY   r   rV   r  r   rX   r   rD   setrC   r   )r   expectrh  ZdskZsubgraph_rdr   r   r   &test_getitem_optimization_after_filter  s    



r  c              	   C   s   t jt| d}d}tt|}t|d}|| W 5 Q R X t	j
tdd tj|dd W 5 Q R X tj|ddd	}t|| d S )
Nr  s   a,b
1,"hi
"
2,"oi
"
r   zEOF encounteredr   r   r
  r(   )r  Zsample_rows)r)   r*   r!   r#   r8   r9   r   r  r  r   r   r   rY   r   )r   r*   r~  rk   r   rK   r   r   r   test_csv_parse_fail  s    r  c           	      C   sD  dd l }ddlm} tjt| d}tjt| d}t|dN}tdD ]>}|	|
ddd	d
|
ddd	d
|
ddd	d qLW 5 Q R X ||| t|dN}tdD ]>}|	|
ddd	d
|
ddd	d
|
ddd	d qW 5 Q R X tj|d dtd d}tj|d dtd d}|j |j ks@td S )Nr   )copyfilezold.csvZnew_csvr   r  r(   i ʚ;Z09z, r    r   r@   r,   )rS   ru  rP   r   )randomshutilr  r)   r*   r!   r#   r  r   r  	randrangerY   r9   rV   rP  rD   )	r   r  r  Zold_csv_pathZnew_csv_pathr   r\  Znew_dfZold_dfr   r   r   6test_csv_name_should_be_different_even_if_head_is_same  s<    2
2        r  c                 C   s   dd t ddD dd t ddD d}tj|d}t| d }t d	D ]}|j| d
| ddd qJtj|d dd}t|jt	|jgd	  d S )Nc                 S   s   g | ]}|qS r   r   r  r   r   r   r     s     z8test_select_with_include_path_column.<locals>.<listcomp>r   r,  r   )col1Zcol2)r~  /rA   file_rn  Fr   r  Tr   )
r   r8   rZ   r#   rp  rY   r9   r   r  r   )r   rm  rK   Z	temp_pathr   rh  r   r   r   $test_select_with_include_path_column  s    *r  	use_namesc                 C   s   t d}|r dddg}ddg}nd  }}tjt| d}tj|d dj|ddd tj|d	||d
}tj|d	||dd}t	||dd d S )Nz    city1,1992-09-13,10
    city2,1992-09-13,14
    city3,1992-09-13,98
    city4,1992-09-13,13
    city5,1992-09-13,45
    city6,1992-09-13,64
    Zcityrx  Zsalesz	input.csvr   Fr  r   )rS   r   rM   <   )rS   r   rM   r   r'  )
r   r)   r*   r!   r#   r8   r9   rp  rY   r   )r   r  rA  r   rM   r*   rK   rh  r   r   r   test_names_with_header_0  s$    

	r  )r   r)   r  r@  r   r   Zunittestr   r   r9  r8   rY   Zfsspec.compressionr   Ztlzr   r   rV   Z	dask.baser   Zdask.bytes.corer	   Zdask.bytes.utilsr
   Z	dask.corer   Zdask.dataframe._compatr   Zdask.dataframe.io.csvr   r   r   r   r   Zdask.dataframe.optimizer   Zdask.dataframe.utilsr   r   Zdask.layersr   Z
dask.utilsr   r   r   r   Zdask.utils_testr   Zcompression_fmtsr'   r+   r$   r   r3   Ztsv_textZ	tsv_text2r   r:   r~   Z	tsv_filesZ	fwf_filesr   rW   rk   rp   r   Zcsv_units_rowZtsv_units_rowmarkZparametrizer9   Z
read_tableZread_fwfZcsv_and_tablerL   rN   rQ   rT   r`   rd   ro   r   r   r   r   Z
csv_blocksZ
tsv_blocksr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zslowr  r	  r  r  r  r}   r"  r%  r(  r*  r/  r<  r=  rC  rI  rM  rQ  rR  rS  rT  r]  r`  rb  ri  rj  ZxfailZxfail_pandas_100paramrs  rv  rw  r{  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r   r   r   r   <module>   s  












	


 
 

	



 
 
 
 
 


?
	v#	

9

	"