U
    /eL                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlmZ e
dZe
dZe
jddd	Ze
d
 e
dZd dlmZ d dlmZmZ d dlmZ d dlmZmZ zd dlmZ W n e k
r   dZY nX d dl!m"Z" d dl#m$Z$ d dl%m&Z& e	e"ddZ"dZ'dddZ(edd Z)e
j*dd Z+dZ,e
j*ddd d! Z-e
j*d"d# Z.ee'e(fd$d%Z/e
* e
j0j1d&d' Z2d(d) Z3d*d+ Z4d,d- Z5d.d/ Z6d0d1 Z7d2d3 Z8d4d5 Z9e
j0:d6d7d8d9d:gd;d< Z;e
j0:d6d7d8d9d:gd=d> Z<e
j0:d?d@dA eD dBdA eD  dCdD Z=e
j0:dEdFdGgdHdI Z>dJdK Z?dLdM Z@e
j0:dNdOdPge
j0:dQdRdSgdTdU ZAe
j0:dNdOdPgdVdW ZBe
j0:dNdOdPgdXdY ZCdS )Z    N)contextmanager)partial)parses3fsboto3motoz1.3.14)Z
minversionZflaskrequests)compr)get_fs_token_paths
open_files)S3FileSystem)concatvalmap)compute)
read_bytes)compresssync)Z	schedulertests   {"amount": 100, "name": "Alice"}
{"amount": 200, "name": "Bob"}
{"amount": 300, "name": "Charlie"}
{"amount": 400, "name": "Dennis"}
s   {"amount": 500, "name": "Alice"}
{"amount": 600, "name": "Bob"}
{"amount": 700, "name": "Charlie"}
{"amount": 800, "name": "Dennis"}
)ztest/accounts.1.jsonztest/accounts.2.jsonc               	   c   s2   t tj} z
dV  W 5 tj  tj|  X dS )z
    Get a context manager to safely set environment variables
    All changes will be undone on close, hence environment variables set
    within this contextmanager will neither persist nor change global state.
    N)dictosenvironclearupdate)Zsaved_environ r   </tmp/pip-unpacked-wheel-dbjnr7gq/dask/bytes/tests/test_s3.py!ensure_safe_environment_variables5   s
    


r   c                   C   s   t ddidS )Nendpoint_urlhttp://127.0.0.1:5555/)client_kwargs)r   r   r   r   r   s3soD   s    r   r   module)Zscopec               	   c   s   t   dtjd< dtjd< tjtdtjd} d}zt	t
}|jrLW qW n tk
rb   Y nX |d8 }td |d	ks6td
q6d V  |   z| jdd W n< tjk
r   |   tjdkrtd| j d Y nX W 5 Q R X d S )NZ
foobar_keyZAWS_ACCESS_KEY_IDZfoobar_secretZAWS_SECRET_ACCESS_KEYzmoto_server s3 -p 5555)stdout   g?r   z!Timed out waiting for moto server   )timeoutwin32zTASKKILL /F /PID z /T)r   r   r   
subprocessPopenshlexsplitDEVNULLr   getendpoint_uriok	ExceptiontimesleepAssertionError	terminatewaitTimeoutExpiredkillsysplatformcallpid)procr$   rr   r   r   s3_baseL   s2    

 


r<   c              	   c   s   t  }|V  W 5 Q R X d S N)
s3_context)r<   fsr   r   r   s3p   s    r@   c              
   c   s   t jdtd}|j| dd | D ]\}}|j| ||d q$tjdddid	}tj  |	  z
|V  W 5 |j
| dd
 X d S )Nr@   r   zpublic-read-write)BucketZACLrB   ZKeyZBodyTr   r   )anonr   )	recursive)r   clientr,   Zcreate_bucketitems
put_objectr   r   Zclear_instance_cacheZinvalidate_cacheZrm)ZbucketfilesrF   fdatar?   r   r   r   r>   v   s     

r>   c                 c   s  t d}t d}ddddddddddddd	d
ddddddddddddddddddddddddddddddddddddddddddd d!d"dd#d$d%d&d'dddddddd(d)d*d+d,dd-dddddddddddd.d/d0d0d0dd0d0d0d0d0dd1d1d1d1d1dd2d3d4d5d6dd7}||}||d8d9}t }t|}|j	|d:d; d<}t
jd=d>d?}	|	jt||d@ dA}tddBD ]&}
|dC |	jt||
|d@ qdDV  dDS )Ez
    Fixture with sample yellowtrip CSVs loaded into S3.

    Provides the following CSVs:

    * s3://test/nyc-taxi/2015/yellow_tripdata_2015-01.csv
    * s3://test/nyc-taxi/2014/yellow_tripdata_2015-mm.csv
      for mm from 01 - 12.
    numpypandas      )r   rO   rN   r#      z2015-01-15 19:05:39z2015-01-10 20:33:38z2015-01-10 20:33:39z2015-01-15 19:23:42z2015-01-10 20:53:28z2015-01-10 20:43:41z2015-01-10 20:35:31z2015-01-10 20:52:58gq=
ףp?gffffff
@g?g      ?g      @g    Rg    Rg   `}Rg   Rg   '~Rg   `D@g    \D@g   fD@g   `^[D@g   @aD@Ng   b~Rg   Rg   |Rg  FRg   DRg   @`D@g   *aD@g   `iD@g   (\D@g   @_D@g      (@g      -@g      #@g      @g      .@g      ?g      
@g       @g        g333333?g1@g1@g%@g333333@gL0@)ZVendorIDZtpep_pickup_datetimeZtpep_dropoff_datetimeZpassenger_countZtrip_distanceZpickup_longitudeZpickup_latitudeZ
RateCodeIDZstore_and_fwd_flagZdropoff_longitudeZdropoff_latitudeZpayment_typeZfare_amountextraZmta_taxZ
tip_amountZtolls_amountZimprovement_surchargeZtotal_amount   i'  Findexz)nyc-taxi/2015/yellow_tripdata_2015-01.csvr@   r   rA   rC   z.nyc-taxi/2014/yellow_tripdata_2014-{:0>2d}.csv   r   N)pytestimportorskip	DataFrameZtakearangerepeatioBytesIOTextIOWrapperZto_csvr   rF   rH   test_bucket_namerangeseekformat)r@   nppdrK   sampledffilesfilekeyrF   ir   r   r   s3_with_yellow_tripdata   s    

9


rk   c               	   C   s   t ddd} | jdkst| jdks(tt ddd} | jdksBt| jdksPttt t ddd W 5 Q R X tt t ddd W 5 Q R X d S )Nri   secret)ri   rl   )usernamepassword)ri   rm   )rl   rn   )DaskS3FileSystemri   r1   rl   rW   raisesKeyError)r@   r   r   r   test_get_s3   s    rr   c           
   
   C   s   dd t D }t|fddi|}t|t  D ]"\}}|}|| W 5 Q R X q0tdt d f|\}}tt| }	t	t
t  t	|	kstd S )Nc                 S   s   g | ]}d t  d | qS )s3://z/more/)r_   .0rJ   r   r   r   
<listcomp>   s     z)test_open_files_write.<locals>.<listcomp>modewbrs   z/more/test/accounts.*)rI   r   zipvalueswriter   r_   r   r   setlistr1   )
r@   r   pathsZfilsZfilrK   rJ   re   rz   resultsr   r   r   test_open_files_write   s    
r   c                 C   s   t dt d f|\}}t|ts&t|d d tttd  d d ksNt|ds\tt|tt	fsntt|d tt	fstt
|d d dstttt|ttksttt| }t|tt kstd S )Nrs   /test/accounts.*rS   r      
dask)r   r_   
isinstancebytesr1   rI   sortedendswithr}   tuplehasattrsummaplenr   r   r|   rz   )r@   r   re   rz   r   r   r   r   test_read_bytes   s    (r   c                 C   s   t dt d fddd|\}}|ds0tt dt d fddd|\}}|ds`tt dt d fddd|\}}|dstd S )Nrs   r   P   r   )re   	delimiterz/test/accounts.1.jsonrN   )r   r_   r   r1   )r@   r   re   rz   r   r   r    test_read_bytes_sample_delimiter  s6    





r   c              	   C   s.   t t tdt d f| W 5 Q R X d S )Nrs   z/non-existing/*)rW   rp   IOErrorr   r_   )r@   r   r   r   r   !test_read_bytes_non_existing_glob#  s    r   c                 C   s>   t dt d fdd i|\}}ttt|ttks:td S )Nrs   r   	blocksize)r   r_   r   r   r   rI   r1   )r@   r   _rz   r   r   r   test_read_bytes_blocksize_none(  s    

r   c                 C   sh   t dt dfd dd|\}}t|dks2tt dt dfd dd|\}}t|dksdtd S )Nrs   z*/nyc-taxi/2015/yellow_tripdata_2015-01.csvT)r   rD   rO   z/nyc-taxi/2014/*.csv   )r   r_   r   r1   )rk   r   r   Lr   r   r   'test_read_bytes_blocksize_on_large_data/  s$    



r   r   rS      -   i  c                    s   t dt d fd i|\}}ttt| fddt D ksHttt	| }t
dd |D t
dd t D kstd	|d
}d	t d
}t|t|kstd S )Nrs   z/test/account*r   c                    s   g | ]}t t|  d qS )rO   )maxr   ru   vr   r   r   rv   F  s    z)test_read_bytes_block.<locals>.<listcomp>c                 s   s   | ]}t |V  qd S r=   r   ru   r;   r   r   r   	<genexpr>K  s     z(test_read_bytes_block.<locals>.<genexpr>c                 s   s   | ]}t |V  qd S r=   r   r   r   r   r   r   K  s         r   )r   r_   r}   r   r   rI   rz   r1   r   r   r   joinr)   r|   )r@   r   r   r   valsr   ourlines	testlinesr   r   r   test_read_bytes_blockA  s    

,r   c                 C   sn  t dt d f|dd|\}}t dt d f|dd|\}}dd t|D dd t|D kslttt| }d	d |D }td
d |D std|d}ddd tt	D d}	||	kstd}
t dt d f||
d|\}}tt| }dd |D }t
dd |D t|d ks:td|}ddd tt	D }||ksjtd S )Nrs   z/test/accounts*r   )r   r   s   fooc                 S   s   g | ]
}|j qS r   ri   )ru   ar   r   r   rv   `  s     z-test_read_bytes_delimited.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   r   )ru   br   r   r   rv   `  s     c                 S   s   g | ]}|r|qS r   r   r   r   r   r   rv   c  s      c                 s   s   | ]}| d V  qdS )r   Nr   r   r   r   r   r   d  s     z,test_read_bytes_delimited.<locals>.<genexpr>r   c                 s   s   | ]}t | V  qd S r=   rI   ru   kr   r   r   r   f  s        }c                 S   s   g | ]}|r|qS r   r   r   r   r   r   rv   r  s      c                 s   s   | ]}| d V  qdS )r   Nr   r   r   r   r   r   t  s     rN   c                 s   s   | ]}t | V  qd S r=   r   r   r   r   r   r   v  s     )r   r_   r   r1   r   allr   r)   r   rI   r   r   )r@   r   r   r   rz   Zvalues2r   resr   r   dZoursr   r   r   r   test_read_bytes_delimitedR  sL    



(

$
r   zfmt,blocksizec                 C   s   g | ]}|d fqS r=   r   ru   fmtr   r   r   rv   |  s     rv   c                 C   s   g | ]}|d fqS )
   r   r   r   r   r   rv   |  s     c              
   C   s   |t krtd | j  tdtt | t |rr|rrtt	 t
d||d| W 5 Q R X W 5 Q R  d S t
d||d|\}}|tttd  d d st|dsttt| }d|dd	d
 ttD kstW 5 Q R X d S )Nz!compression function not providedr   s3://compress/test/accounts.*)compressionr   r   r   r   r   c                 S   s   g | ]}t | qS r   r   r   r   r   r   rv     s     z$test_compression.<locals>.<listcomp>)r   )r   )r   rW   skip_cacher   r>   r   rI   rp   
ValueErrorr   
startswithr   r1   r   r   r   r   )r@   r   r   r   re   rz   r   r   r   r   test_compressionz  s6    

  
"r   rw   rtrbc           	   
   C   s   t dt d fd|i|}t|ttks0tt|ttD ]F\}}|4}| }t| }|dkrn||kszn| sztW 5 Q R X q>d S )Nrs   r   rw   r   )	r   r_   r   rI   r1   ry   r   readdecode)	r@   rw   r   ZmyfilesZ	lazy_filepathrJ   rK   Zsolr   r   r   test_open_files  s    
r   c                 C   s   | d S )NrN   r   )xr   r   r   <lambda>  r   r   c              	   C   s   t dtZ td
ddi|\}}tdddi|\}}dd t|D dd t|D ks`tW 5 Q R X t dttt tdddi|\}}W 5 Q R X dd t|D d	d t|D kstd S )Nr   r   rD   Tc                 S   s   g | ]
}|j qS r   _keyru   Zaar   r   r   rv     s     z5test_modification_time_read_bytes.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   r   )ru   Zbbr   r   r   rv     s     c                 S   s   g | ]
}|j qS r   r   r   r   r   r   rv     s     c                 S   s   g | ]
}|j qS r   r   )ru   ccr   r   r   rv     s     )r   )r   )r   )r>   rI   r   r   r1   r   double)r@   r   r   r   r   cr   r   r   !test_modification_time_read_bytes  s    2 r   enginepyarrowZfastparquetmetadata_fileTFc              	      s  dd l }td}td}td}t|}t|j}	|dkrX|	tdk rXtd |dkr|	jdkrt|jtd	krtd
 dt }
|j|j	d|j
d|j	d|jd|j	d|jd|jjdddgdddd|j|	dddd}|j|dd}|j|
|||d dd | |
D }|rHd|ks:td|ksHtd|ksVt|j|
dd||d }t|jd!ks~t|j|| trtt& |j|
||d"d#dd$id%  W 5 Q R X |j|
||d"d#d&d'id%  t|
|d(d  dd) fd*d+
}tt" |j|
|||d,d-d%  W 5 Q R X |j|
||d.|id%}|j|| |j|
||d/d0id%}|j|| d S )1Nr   dask.dataframerM   rL   r   z0.13.1z*pyarrow < 0.13.1 not supported for parquetrN   z0.5.0z1#7056 - new s3fs not supported before pyarrow 3.0s3://%s/test.parquet  ZdtypehelloyoupeoplesizeOi32Zi64rJ   ZbhelloZfoo)namerT     	chunksize)r   storage_optionswrite_metadata_filec                 S   s   g | ]}| d d qS /r)   rt   r   r   r   rv     s     z test_parquet.<locals>.<listcomp>_common_metadata	_metadatapart.0.parquetT)rU   Zcalculate_divisionsr   r   rO   Zprecache_optionsparquet)methodr   )r   r   Zopen_file_optionsi@  )r   Z	max_block)r   )checkc                    s   | st  j||S r=   )r1   open)r   argskwargsr?   r   r   _open  s    ztest_parquet.<locals>._openF)open_file_funcr   r   Z
cache_typer   )r   rW   rX   parse_version__version__r   majorr_   rY   rZ   int32int64float64randomchoiceastypeZIndexfrom_pandas
to_parquetlsr1   read_parquetr   Z	divisionsutils	assert_eqfsspec_parquetrp   r   r   r
   )r@   r   r   r   r   ddrd   rc   libZlib_versionurlrK   rf   rI   df2r   Zdf3Zdf4r   r   r   test_parquet  s    






         

r  c              	   C   s"  t | t d}t d}t d}dt }||jd|jd|jd|jd|jd|jd|jj	ddd	gdd

dd}|j|dd}|j|||ddd |j|||dddd dd | |D }	d|	kstd|	kstd|	kst|j|d||d}
|jj|||g|
dd d S )Nr   rM   rL   zs3://%s/test.parquet.appendr   r   r   r   r   r   r   r   r   r   FT)r   r   write_indexr   )r   r   r  appendZignore_divisionsc                 S   s   g | ]}| d d qS r   r   rt   r   r   r   rv   K  s     z'test_parquet_append.<locals>.<listcomp>r   r   r   )rU   r   r   )Zcheck_index)rW   rX   r_   rY   rZ   r   r   r   r   r   r   r   r   r   r1   r   r   r   r   )r@   r   r   r  rd   rc   r  rK   rf   rI   r  r   r   r   test_parquet_append'  sZ    




	r	  c           
   	   C   s0  t | t d}t d}t d}dt }|d|ddddgi}|j|d	d
}|j||d|dd |j||tf |ddid | 	 j
dkst|j||tf |ddid | 	 j
dkst|j||tf |ddid | 	 jdkst| 	 |d }	|	jdks"tW 5 Q R X d S )Nr   rM   rL   r   r   r   rS   rN   r   r   FT)r   r  r   r   default_fill_cache)r   r   default_block_sizei   z
/_metadata)rW   rX   r_   rY   arrayr   r   r   r   currentr
  r1   r  r   r   )
r@   r   r   r  rd   rc   r  rK   rf   rJ   r   r   r   test_parquet_wstoragepars^  sD    



  r  )Dr\   r   r(   r&   r6   r/   
contextlibr   	functoolsr   rW   Zpackaging.versionr   r   rX   r   r   r   r   Zfsspec.compressionr	   Zfsspec.corer
   r   r   ro   Ztlzr   r   Zfsspec.parquetr   r  ImportErrorr   r   Zdask.bytes.corer   Zdask.bytes.utilsr   r_   rI   r   Zfixturer   r,   r<   r@   r>   markZslowrk   rr   r   r   r   r   r   r   Zparametrizer   r   r   r   r   r   r  r	  r  r   r   r   r   <module>   s   








#
W

'

o
6