SlideShare una empresa de Scribd logo
1 de 39
Descargar para leer sin conexión
Parallel
Computing
With Dask
Christian Aichinger
https://greek0.net
@chaichinger
def download(url):
return requests.get(url).content
for url in urls:
download(url)
def download(url):
return requests.get(url).content
@asyncio.coroutine
def asyncio_download(loop):
futures = [loop.run_in_executor(None, download, url)
for url in urls]
return [(yield from future) for future in futures]
loop = asyncio.get_event_loop()
job = asyncio_download_coroutine(loop)
loop.run_until_complete(job)
@dask.delayed
def download(url):
return requests.get(url).content
contents = [download(url) for url in urls]
dask.compute(contents)
def process_cpu(url):
url = url.encode()
charsum = 0
for c1 in url:
for c2 in url:
for c3 in url:
charsum += c1 * c2 * c3
return charsum
[process_cpu(url) for url in urls]
@dask.delayed
def process_cpu(url):
...
graph = [process_cpu(url) for url in urls]
dask.compute(graph)
@dask.delayed
def process_cpu(url):
...
graph = [process_cpu(url) for url in urls]
dask.compute(graph,
get=dask.multiprocessing.get)
@dask.delayed
def f(arg):
print("f", arg)
return 2 * arg
@dask.delayed
def g(args):
print("g", args)
return sum(args)
lst = [1, 2, 3]
graph = g([f(i) for i in lst])
f-#0
g
f
g-#1
f-#2 f-#3
f f
print("result", graph.compute())
f 2
f 1
f 3
g [2, 4, 6]
result 12
f-#0
g
f
g-#1
f-#2 f-#3
f f
Collection similar to Python lists
import dask.bag as db
db.from_sequence(urls)
.map(download)
.map(convert_to_image)
.filter(lambda img: img.size[0] < 500)
.map(remove_artifacts)
.map(save_to_disk)
.compute()
import dask.bag as db
import json
js = db.read_text('log-2017*.gz').map(json.loads)
js.take(2)
({'name': 'Alice',
'location': {'city': 'LA', 'state': 'CA'}},
{'name': 'Bob',
'location': {'city': 'NYC', 'state': 'NY'})
result = js.pluck('name').frequencies()
dict(result)
{'Alice': 10000, 'Bob': 5555, 'Charlie': ...}
http://dask.pydata.org/en/latest/examples/bag-json.html
Collection similar to NumPy Arrays
import dask.array as da
import skimage.io
delayed_imread = dask.delayed(skimage.io.imread, pure=True)
sample = skimage.io.imread(urls[0])
images = [delayed_imread(url) for url in urls]
images = [da.from_delayed(img,
dtype=sample.dtype,
shape=sample.shape)
for img in images]
images = da.stack(images, axis=0)
images.shape
(1000000, 360, 500, 3)
images.shape
(1000000, 360, 500, 3)
max_img = images.mean(axis=3).max(axis=0)
max_img.shape
(360, 500)
max_img.compute()
array([[ 157., 155., 153., ..., 134., 137.],
[ 154., 153., 151., ..., 129., 132.],
...,
[ 97., 66., 81., ..., 74., 82.]])
da.linalg.svd(max_img, 10)
da.fft.fft(max_img)
('tensordot-#0', 2, 1, 2)
sum
apply
('transpose-#1', 1, 2)
apply apply
('wrapped-#2', 2, 1)
apply applytranspose
('tensordot-#0', 1, 1, 1)
sum
apply
('transpose-#1', 1, 1)
apply
('wrapped-#2', 1, 1)
apply
transpose
('tensordot-#0', 2, 0, 2)
apply
('wrapped-#2', 2, 0)
apply applytranspose
('transpose-#1', 0, 2)
apply apply
('tensordot-#0', 0, 1, 0)
sum
apply
('wrapped-#2', 0, 1)
transpose
('transpose-#1', 1, 0)
('tensordot-#0', 0, 0, 2)
sum
('wrapped-#2', 0, 0)
apply apply
transpose
('tensordot-#0', 2, 0, 0)
sum
('transpose-#1', 0, 0)
apply
('tensordot-#0', 0, 0, 0) ('tensordot-#0', 2, 2, 0)
apply
('wrapped-#2', 2, 2)
apply
applytranspose
('transpose-#1', 2, 0)
apply apply
('tensordot-#0', 0, 2, 2)
apply
('transpose-#1', 2, 2)
apply
('wrapped-#2', 0, 2)
apply
transpose
('tensordot-#0', 1, 0, 1)
apply
('transpose-#1', 0, 1)
('wrapped-#2', 1, 0)
transpose
('tensordot-#0', 2, 1, 0) ('tensordot-#0', 0, 2, 1)
sum
('transpose-#1', 2, 1)
apply
('tensordot-#0', 0, 2, 0) ('tensordot-#0', 0, 0, 1)
('tensordot-#0', 0, 1, 2)
('tensordot-#0', 1, 2, 1)
('wrapped-#2', 1, 2)
transpose
('tensordot-#0', 2, 2, 2) ('tensordot-#0', 1, 2, 2)
sum
('tensordot-#0', 2, 2, 1)
sum
('tensordot-#0', 1, 0, 0)
sum
('tensordot-#0', 1, 1, 0)('tensordot-#0', 2, 0, 1) ('tensordot-#0', 0, 1, 1)('tensordot-#0', 1, 2, 0)
('tensordot-#0', 1, 0, 2)
('tensordot-#0', 2, 1, 1)
('tensordot-#0', 1, 1, 2)('sum-#3', 2, 0) ('sum-#3', 0, 0) ('sum-#3', 0, 1)
('sum-#3', 2, 2) ('sum-#3', 1, 2)('sum-#3', 0, 2)
('sum-#3', 1, 1)('sum-#3', 2, 1) ('sum-#3', 1, 0)
onesones onesones
onesones
ones ones
ones
('tensordot-#0', 2, 1, 2)
sum
apply
('transpose-#1', 1, 2)
apply
('wrapped-#2', 2, 1)
appltranspose
('tensordot-#0', 2, 0, 2)
apply
('wrapped-#2', 2, 0)
applytranspose
('transpose-#1', 0, 2)
apply
('tensordot-#0', 0, 0, 2)
sum
('wrapped-#2', 0, 0)
apply
transpose
('tensordot-#0', 2, 0, 0)
sum
('transpose-#1', 0, 0)
('tensordot-#0', 0, 0, 0) ('tensordot-#0', 2, 2, 0)
apply
('wrapped-#2', 2, 2)
apply
transpose
('transpos
('tensordot-#0', 0, 2, 2)
apply
('transpose-#1', 2, 2)
('wrapped-#2', 0, 2
trans
('tensordot-#0', 2, 1, 0)
('tensordot-#0', 0, 1, 2)('tensordot-#0', 2, 2, 2) ('sum-#3', 2, 0)
('sum-#3', 2, 2) ('sum-#3', 0, 2)
ones ones
onesones ones
Collection similar to Pandas Dataframes
__Request received (wms) : #17236, 2016-12-27 16:03:44.898007,
current_connections = connected=4, accepted=4, idle threads=4
appid="mapcache" client_ip=10.0.39.1 user_agent="..." query=…
__Request processed (wms) : #17236, total_duration=00:00:11.377182
cache_hits=7917 cache_misses=0
success_rate=100% successes=262144 failures=0
RE_REQ_RECEIVE = re.compile(r"""
__Request receiveds+
((?P<iface>w+))s*:s* # Interface (wfs, wms)
#(?P<req_id>d+),s* # Request id
(?P<starttime>[^,]+),s* # Request start timestamp
current_connections=s*
...
""", re.VERBOSE)
RE_REQ_PROCESSED = re.compile(r"""
__Request processeds+
(w+)s*:s* # Interface (wfs, wms)
#(?P<req_id>d+),s* # Request id
total_duration=(?P<total_duration>[0-9:.]+)s+
...
""", re.VERBOSE)
bag = db.read_text(files)
ddf_recv = (bag
.str.strip()
.map(lambda line: REQ_RECEIVE.match(line))
.remove(lambda el: el is None)
.map(lambda m: m.groupdict())
.to_dataframe(columns=pd.DataFrame(columns=RECV_COLS))
)
ddf_proc = (bag ...)
requests = ddf_recv.merge(ddf_proc, on='req_id', how='inner')
slow_req = requests[
(requests.starttime >= datetime(2017, 5, 1) &
(requests.starttime < datetime(2017, 5, 2) &
(requests.total_duration >= timedelta(seconds=5))]
slow_req = slow_req.compute(get=dask.multiprocessing.get)
$ dask-scheduler
Scheduler at: tcp://10.0.0.8:8786
$ ssh worker1 dask-client 10.0.0.8:8786
$ ssh worker2 dask-client 10.0.0.8:8786
$ ssh worker3 dask-client 10.0.0.8:8786
from distributed import Client
client = Client('10.0.0.8:8786')
Image Credit
●
UBIMET background and company logo
Used with permission
●
CPU frequency scaling:
Created by Wikipedia user Newhorizons msk, in the public domain
https://en.wikipedia.org/wiki/File:Clock_CPU_Scaling.jpg
●
Parallel computing:
Created by the US government, in the public domain
https://computing.llnl.gov/tutorials/parallel_comp/
●
Python logo:
A trademark of the Python Software Foundation
https://www.python.org/community/logos/
●
Dask logo:
Part of the Dask source distribution, licensed BSD v3
https://github.com/dask/dask/blob/master/docs/source/images/dask_horizontal.svg
●
All charts and graphs: created myself
●
Bag
By Pixabay user “OpenClipart-Vectors”, in the public domain
https://pixabay.com/p-156023/?no_redirect
●
Array
Jerome S. Higgins, in the public domain
https://commons.wikimedia.org/wiki/File:Land_Act_of_1785_section_numbering.png
●
Frame
Modified form of a Wellcome Trust image, licensed CC-BY 4.0
https://commons.wikimedia.org/wiki/File:Picture_frame_Wellcome_L0051764.jpg
●
Dask Array Composition of NumPy Arrays, Dask DataFrame Composition of Pandas Dataframes
Partially modified, part of the Dask source distribution, licensed BSD v3
All from https://github.com/dask/dask/blob/master/docs/source/images/
●
Cluster:
Created by Julian Herzog, licensed GNU FDL v2 / CC-BY 4.0
https://commons.wikimedia.org/wiki/File:High_Performance_Computing_Center_Stuttgart_HLRS_2015_08_Cray_XC40_Hazel_Hen_IO.jpg
●
Dask Distributed graph:
Partially modified, part of the Dask source distribution, licensed BSD v3
https://github.com/dask/dask/blob/9f344bbf38610e03f723ac034f9c4a390a7debec/docs/source/images/distributed-layout.svg

Más contenido relacionado

La actualidad más candente

How to stand on the shoulders of giants
How to stand on the shoulders of giantsHow to stand on the shoulders of giants
How to stand on the shoulders of giantsIan Barber
 
Getting started with RDO Havana
Getting started with RDO HavanaGetting started with RDO Havana
Getting started with RDO HavanaDan Radez
 
Correcting Common Async/Await Mistakes in .NET
Correcting Common Async/Await Mistakes in .NETCorrecting Common Async/Await Mistakes in .NET
Correcting Common Async/Await Mistakes in .NETBrandon Minnick, MBA
 
Py conkr 20150829_docker-python
Py conkr 20150829_docker-pythonPy conkr 20150829_docker-python
Py conkr 20150829_docker-pythonEric Ahn
 
Intro to OTP in Elixir
Intro to OTP in ElixirIntro to OTP in Elixir
Intro to OTP in ElixirJesse Anderson
 
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with PuppetPuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with PuppetWalter Heck
 
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with PuppetPuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with PuppetOlinData
 
The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212Mahmoud Samir Fayed
 
Best Practices in Handling Performance Issues
Best Practices in Handling Performance IssuesBest Practices in Handling Performance Issues
Best Practices in Handling Performance IssuesOdoo
 
Http capturing
Http capturingHttp capturing
Http capturingEric Ahn
 
QA Fest 2019. Saar Rachamim. Developing Tools, While Testing
QA Fest 2019. Saar Rachamim. Developing Tools, While TestingQA Fest 2019. Saar Rachamim. Developing Tools, While Testing
QA Fest 2019. Saar Rachamim. Developing Tools, While TestingQAFest
 
Kubernetes Tutorial
Kubernetes TutorialKubernetes Tutorial
Kubernetes TutorialCi Jie Li
 
AnyMQ, Hippie, and the real-time web
AnyMQ, Hippie, and the real-time webAnyMQ, Hippie, and the real-time web
AnyMQ, Hippie, and the real-time webclkao
 
Logstash for SEO: come monitorare i Log del Web Server in realtime
Logstash for SEO: come monitorare i Log del Web Server in realtimeLogstash for SEO: come monitorare i Log del Web Server in realtime
Logstash for SEO: come monitorare i Log del Web Server in realtimeAndrea Cardinale
 
Beyond php it's not (just) about the code
Beyond php   it's not (just) about the codeBeyond php   it's not (just) about the code
Beyond php it's not (just) about the codeWim Godden
 
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...GeilDanke
 

La actualidad más candente (20)

How to stand on the shoulders of giants
How to stand on the shoulders of giantsHow to stand on the shoulders of giants
How to stand on the shoulders of giants
 
Getting started with RDO Havana
Getting started with RDO HavanaGetting started with RDO Havana
Getting started with RDO Havana
 
Correcting Common Async/Await Mistakes in .NET
Correcting Common Async/Await Mistakes in .NETCorrecting Common Async/Await Mistakes in .NET
Correcting Common Async/Await Mistakes in .NET
 
Py conkr 20150829_docker-python
Py conkr 20150829_docker-pythonPy conkr 20150829_docker-python
Py conkr 20150829_docker-python
 
Intro to OTP in Elixir
Intro to OTP in ElixirIntro to OTP in Elixir
Intro to OTP in Elixir
 
Redis 101
Redis 101Redis 101
Redis 101
 
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with PuppetPuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
 
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with PuppetPuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
 
The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212
 
Best Practices in Handling Performance Issues
Best Practices in Handling Performance IssuesBest Practices in Handling Performance Issues
Best Practices in Handling Performance Issues
 
Http capturing
Http capturingHttp capturing
Http capturing
 
QA Fest 2019. Saar Rachamim. Developing Tools, While Testing
QA Fest 2019. Saar Rachamim. Developing Tools, While TestingQA Fest 2019. Saar Rachamim. Developing Tools, While Testing
QA Fest 2019. Saar Rachamim. Developing Tools, While Testing
 
Kubernetes Tutorial
Kubernetes TutorialKubernetes Tutorial
Kubernetes Tutorial
 
AnyMQ, Hippie, and the real-time web
AnyMQ, Hippie, and the real-time webAnyMQ, Hippie, and the real-time web
AnyMQ, Hippie, and the real-time web
 
Logstash for SEO: come monitorare i Log del Web Server in realtime
Logstash for SEO: come monitorare i Log del Web Server in realtimeLogstash for SEO: come monitorare i Log del Web Server in realtime
Logstash for SEO: come monitorare i Log del Web Server in realtime
 
Log mining
Log miningLog mining
Log mining
 
Beyond php it's not (just) about the code
Beyond php   it's not (just) about the codeBeyond php   it's not (just) about the code
Beyond php it's not (just) about the code
 
はじめてのGroovy
はじめてのGroovyはじめてのGroovy
はじめてのGroovy
 
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
 
Common scenarios in vcl
Common scenarios in vclCommon scenarios in vcl
Common scenarios in vcl
 

Similar a Parallel Computing With Dask - PyDays 2017

fog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the Cloudfog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the CloudWesley Beary
 
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)Wesley Beary
 
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...Big Data Spain
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with ClojureDmitry Buzdin
 
async/await in Swift
async/await in Swiftasync/await in Swift
async/await in SwiftPeter Friese
 
Beyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeBeyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeWim Godden
 
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011Masahiro Nagano
 
An intro to Docker, Terraform, and Amazon ECS
An intro to Docker, Terraform, and Amazon ECSAn intro to Docker, Terraform, and Amazon ECS
An intro to Docker, Terraform, and Amazon ECSYevgeniy Brikman
 
Centralize your Business Logic with Pipelines in Elixir
Centralize your Business Logic with Pipelines in ElixirCentralize your Business Logic with Pipelines in Elixir
Centralize your Business Logic with Pipelines in ElixirMichael Viveros
 
And the Greatest of These Is ... Rack Support
And the Greatest of These Is ... Rack SupportAnd the Greatest of These Is ... Rack Support
And the Greatest of These Is ... Rack SupportBen Scofield
 
Rhebok, High Performance Rack Handler / Rubykaigi 2015
Rhebok, High Performance Rack Handler / Rubykaigi 2015Rhebok, High Performance Rack Handler / Rubykaigi 2015
Rhebok, High Performance Rack Handler / Rubykaigi 2015Masahiro Nagano
 
Emerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the HorizonEmerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the HorizonAlex Payne
 
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRaimonds Simanovskis
 
Performance and stability testing \w Gatling
Performance and stability testing \w GatlingPerformance and stability testing \w Gatling
Performance and stability testing \w GatlingDmitry Vrublevsky
 
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)Brian Sam-Bodden
 
Railsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slideshareRailsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slidesharetomcopeland
 
リローダブルClojureアプリケーション
リローダブルClojureアプリケーションリローダブルClojureアプリケーション
リローダブルClojureアプリケーションKenji Nakamura
 

Similar a Parallel Computing With Dask - PyDays 2017 (20)

fog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the Cloudfog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the Cloud
 
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
 
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with Clojure
 
async/await in Swift
async/await in Swiftasync/await in Swift
async/await in Swift
 
Play!ng with scala
Play!ng with scalaPlay!ng with scala
Play!ng with scala
 
Beyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeBeyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the code
 
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
 
An intro to Docker, Terraform, and Amazon ECS
An intro to Docker, Terraform, and Amazon ECSAn intro to Docker, Terraform, and Amazon ECS
An intro to Docker, Terraform, and Amazon ECS
 
Centralize your Business Logic with Pipelines in Elixir
Centralize your Business Logic with Pipelines in ElixirCentralize your Business Logic with Pipelines in Elixir
Centralize your Business Logic with Pipelines in Elixir
 
Server Side Swift: Vapor
Server Side Swift: VaporServer Side Swift: Vapor
Server Side Swift: Vapor
 
And the Greatest of These Is ... Rack Support
And the Greatest of These Is ... Rack SupportAnd the Greatest of These Is ... Rack Support
And the Greatest of These Is ... Rack Support
 
Little Big Ruby
Little Big RubyLittle Big Ruby
Little Big Ruby
 
Rhebok, High Performance Rack Handler / Rubykaigi 2015
Rhebok, High Performance Rack Handler / Rubykaigi 2015Rhebok, High Performance Rack Handler / Rubykaigi 2015
Rhebok, High Performance Rack Handler / Rubykaigi 2015
 
Emerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the HorizonEmerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the Horizon
 
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
 
Performance and stability testing \w Gatling
Performance and stability testing \w GatlingPerformance and stability testing \w Gatling
Performance and stability testing \w Gatling
 
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
 
Railsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slideshareRailsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slideshare
 
リローダブルClojureアプリケーション
リローダブルClojureアプリケーションリローダブルClojureアプリケーション
リローダブルClojureアプリケーション
 

Último

OpenChain Education Work Group Monthly Meeting - 2024-04-10 - Full Recording
OpenChain Education Work Group Monthly Meeting - 2024-04-10 - Full RecordingOpenChain Education Work Group Monthly Meeting - 2024-04-10 - Full Recording
OpenChain Education Work Group Monthly Meeting - 2024-04-10 - Full RecordingShane Coughlan
 
Strategies for using alternative queries to mitigate zero results
Strategies for using alternative queries to mitigate zero resultsStrategies for using alternative queries to mitigate zero results
Strategies for using alternative queries to mitigate zero resultsJean Silva
 
OpenChain AI Study Group - Europe and Asia Recap - 2024-04-11 - Full Recording
OpenChain AI Study Group - Europe and Asia Recap - 2024-04-11 - Full RecordingOpenChain AI Study Group - Europe and Asia Recap - 2024-04-11 - Full Recording
OpenChain AI Study Group - Europe and Asia Recap - 2024-04-11 - Full RecordingShane Coughlan
 
Precise and Complete Requirements? An Elusive Goal
Precise and Complete Requirements? An Elusive GoalPrecise and Complete Requirements? An Elusive Goal
Precise and Complete Requirements? An Elusive GoalLionel Briand
 
SoftTeco - Software Development Company Profile
SoftTeco - Software Development Company ProfileSoftTeco - Software Development Company Profile
SoftTeco - Software Development Company Profileakrivarotava
 
Large Language Models for Test Case Evolution and Repair
Large Language Models for Test Case Evolution and RepairLarge Language Models for Test Case Evolution and Repair
Large Language Models for Test Case Evolution and RepairLionel Briand
 
CRM Contender Series: HubSpot vs. Salesforce
CRM Contender Series: HubSpot vs. SalesforceCRM Contender Series: HubSpot vs. Salesforce
CRM Contender Series: HubSpot vs. SalesforceBrainSell Technologies
 
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte GermanySuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte GermanyChristoph Pohl
 
Introduction to Firebase Workshop Slides
Introduction to Firebase Workshop SlidesIntroduction to Firebase Workshop Slides
Introduction to Firebase Workshop Slidesvaideheekore1
 
Patterns for automating API delivery. API conference
Patterns for automating API delivery. API conferencePatterns for automating API delivery. API conference
Patterns for automating API delivery. API conferencessuser9e7c64
 
Sending Calendar Invites on SES and Calendarsnack.pdf
Sending Calendar Invites on SES and Calendarsnack.pdfSending Calendar Invites on SES and Calendarsnack.pdf
Sending Calendar Invites on SES and Calendarsnack.pdf31events.com
 
UI5ers live - Custom Controls wrapping 3rd-party libs.pptx
UI5ers live - Custom Controls wrapping 3rd-party libs.pptxUI5ers live - Custom Controls wrapping 3rd-party libs.pptx
UI5ers live - Custom Controls wrapping 3rd-party libs.pptxAndreas Kunz
 
Leveraging AI for Mobile App Testing on Real Devices | Applitools + Kobiton
Leveraging AI for Mobile App Testing on Real Devices | Applitools + KobitonLeveraging AI for Mobile App Testing on Real Devices | Applitools + Kobiton
Leveraging AI for Mobile App Testing on Real Devices | Applitools + KobitonApplitools
 
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...Akihiro Suda
 
How to submit a standout Adobe Champion Application
How to submit a standout Adobe Champion ApplicationHow to submit a standout Adobe Champion Application
How to submit a standout Adobe Champion ApplicationBradBedford3
 
Alfresco TTL#157 - Troubleshooting Made Easy: Deciphering Alfresco mTLS Confi...
Alfresco TTL#157 - Troubleshooting Made Easy: Deciphering Alfresco mTLS Confi...Alfresco TTL#157 - Troubleshooting Made Easy: Deciphering Alfresco mTLS Confi...
Alfresco TTL#157 - Troubleshooting Made Easy: Deciphering Alfresco mTLS Confi...Angel Borroy López
 
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptxReal-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptxRTS corp
 
SensoDat: Simulation-based Sensor Dataset of Self-driving Cars
SensoDat: Simulation-based Sensor Dataset of Self-driving CarsSensoDat: Simulation-based Sensor Dataset of Self-driving Cars
SensoDat: Simulation-based Sensor Dataset of Self-driving CarsChristian Birchler
 
2024 DevNexus Patterns for Resiliency: Shuffle shards
2024 DevNexus Patterns for Resiliency: Shuffle shards2024 DevNexus Patterns for Resiliency: Shuffle shards
2024 DevNexus Patterns for Resiliency: Shuffle shardsChristopher Curtin
 
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...Cizo Technology Services
 

Último (20)

OpenChain Education Work Group Monthly Meeting - 2024-04-10 - Full Recording
OpenChain Education Work Group Monthly Meeting - 2024-04-10 - Full RecordingOpenChain Education Work Group Monthly Meeting - 2024-04-10 - Full Recording
OpenChain Education Work Group Monthly Meeting - 2024-04-10 - Full Recording
 
Strategies for using alternative queries to mitigate zero results
Strategies for using alternative queries to mitigate zero resultsStrategies for using alternative queries to mitigate zero results
Strategies for using alternative queries to mitigate zero results
 
OpenChain AI Study Group - Europe and Asia Recap - 2024-04-11 - Full Recording
OpenChain AI Study Group - Europe and Asia Recap - 2024-04-11 - Full RecordingOpenChain AI Study Group - Europe and Asia Recap - 2024-04-11 - Full Recording
OpenChain AI Study Group - Europe and Asia Recap - 2024-04-11 - Full Recording
 
Precise and Complete Requirements? An Elusive Goal
Precise and Complete Requirements? An Elusive GoalPrecise and Complete Requirements? An Elusive Goal
Precise and Complete Requirements? An Elusive Goal
 
SoftTeco - Software Development Company Profile
SoftTeco - Software Development Company ProfileSoftTeco - Software Development Company Profile
SoftTeco - Software Development Company Profile
 
Large Language Models for Test Case Evolution and Repair
Large Language Models for Test Case Evolution and RepairLarge Language Models for Test Case Evolution and Repair
Large Language Models for Test Case Evolution and Repair
 
CRM Contender Series: HubSpot vs. Salesforce
CRM Contender Series: HubSpot vs. SalesforceCRM Contender Series: HubSpot vs. Salesforce
CRM Contender Series: HubSpot vs. Salesforce
 
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte GermanySuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
 
Introduction to Firebase Workshop Slides
Introduction to Firebase Workshop SlidesIntroduction to Firebase Workshop Slides
Introduction to Firebase Workshop Slides
 
Patterns for automating API delivery. API conference
Patterns for automating API delivery. API conferencePatterns for automating API delivery. API conference
Patterns for automating API delivery. API conference
 
Sending Calendar Invites on SES and Calendarsnack.pdf
Sending Calendar Invites on SES and Calendarsnack.pdfSending Calendar Invites on SES and Calendarsnack.pdf
Sending Calendar Invites on SES and Calendarsnack.pdf
 
UI5ers live - Custom Controls wrapping 3rd-party libs.pptx
UI5ers live - Custom Controls wrapping 3rd-party libs.pptxUI5ers live - Custom Controls wrapping 3rd-party libs.pptx
UI5ers live - Custom Controls wrapping 3rd-party libs.pptx
 
Leveraging AI for Mobile App Testing on Real Devices | Applitools + Kobiton
Leveraging AI for Mobile App Testing on Real Devices | Applitools + KobitonLeveraging AI for Mobile App Testing on Real Devices | Applitools + Kobiton
Leveraging AI for Mobile App Testing on Real Devices | Applitools + Kobiton
 
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
 
How to submit a standout Adobe Champion Application
How to submit a standout Adobe Champion ApplicationHow to submit a standout Adobe Champion Application
How to submit a standout Adobe Champion Application
 
Alfresco TTL#157 - Troubleshooting Made Easy: Deciphering Alfresco mTLS Confi...
Alfresco TTL#157 - Troubleshooting Made Easy: Deciphering Alfresco mTLS Confi...Alfresco TTL#157 - Troubleshooting Made Easy: Deciphering Alfresco mTLS Confi...
Alfresco TTL#157 - Troubleshooting Made Easy: Deciphering Alfresco mTLS Confi...
 
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptxReal-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
 
SensoDat: Simulation-based Sensor Dataset of Self-driving Cars
SensoDat: Simulation-based Sensor Dataset of Self-driving CarsSensoDat: Simulation-based Sensor Dataset of Self-driving Cars
SensoDat: Simulation-based Sensor Dataset of Self-driving Cars
 
2024 DevNexus Patterns for Resiliency: Shuffle shards
2024 DevNexus Patterns for Resiliency: Shuffle shards2024 DevNexus Patterns for Resiliency: Shuffle shards
2024 DevNexus Patterns for Resiliency: Shuffle shards
 
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
 

Parallel Computing With Dask - PyDays 2017

  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 10. def download(url): return requests.get(url).content @asyncio.coroutine def asyncio_download(loop): futures = [loop.run_in_executor(None, download, url) for url in urls] return [(yield from future) for future in futures] loop = asyncio.get_event_loop() job = asyncio_download_coroutine(loop) loop.run_until_complete(job)
  • 11. @dask.delayed def download(url): return requests.get(url).content contents = [download(url) for url in urls] dask.compute(contents)
  • 12.
  • 13. def process_cpu(url): url = url.encode() charsum = 0 for c1 in url: for c2 in url: for c3 in url: charsum += c1 * c2 * c3 return charsum [process_cpu(url) for url in urls]
  • 14.
  • 15. @dask.delayed def process_cpu(url): ... graph = [process_cpu(url) for url in urls] dask.compute(graph)
  • 16. @dask.delayed def process_cpu(url): ... graph = [process_cpu(url) for url in urls] dask.compute(graph, get=dask.multiprocessing.get)
  • 17. @dask.delayed def f(arg): print("f", arg) return 2 * arg @dask.delayed def g(args): print("g", args) return sum(args) lst = [1, 2, 3] graph = g([f(i) for i in lst]) f-#0 g f g-#1 f-#2 f-#3 f f
  • 18. print("result", graph.compute()) f 2 f 1 f 3 g [2, 4, 6] result 12 f-#0 g f g-#1 f-#2 f-#3 f f
  • 19. Collection similar to Python lists
  • 20. import dask.bag as db db.from_sequence(urls) .map(download) .map(convert_to_image) .filter(lambda img: img.size[0] < 500) .map(remove_artifacts) .map(save_to_disk) .compute()
  • 21. import dask.bag as db import json js = db.read_text('log-2017*.gz').map(json.loads) js.take(2) ({'name': 'Alice', 'location': {'city': 'LA', 'state': 'CA'}}, {'name': 'Bob', 'location': {'city': 'NYC', 'state': 'NY'}) result = js.pluck('name').frequencies() dict(result) {'Alice': 10000, 'Bob': 5555, 'Charlie': ...} http://dask.pydata.org/en/latest/examples/bag-json.html
  • 22. Collection similar to NumPy Arrays
  • 23.
  • 24. import dask.array as da import skimage.io delayed_imread = dask.delayed(skimage.io.imread, pure=True) sample = skimage.io.imread(urls[0]) images = [delayed_imread(url) for url in urls] images = [da.from_delayed(img, dtype=sample.dtype, shape=sample.shape) for img in images] images = da.stack(images, axis=0) images.shape (1000000, 360, 500, 3)
  • 25. images.shape (1000000, 360, 500, 3) max_img = images.mean(axis=3).max(axis=0) max_img.shape (360, 500) max_img.compute() array([[ 157., 155., 153., ..., 134., 137.], [ 154., 153., 151., ..., 129., 132.], ..., [ 97., 66., 81., ..., 74., 82.]]) da.linalg.svd(max_img, 10) da.fft.fft(max_img)
  • 26. ('tensordot-#0', 2, 1, 2) sum apply ('transpose-#1', 1, 2) apply apply ('wrapped-#2', 2, 1) apply applytranspose ('tensordot-#0', 1, 1, 1) sum apply ('transpose-#1', 1, 1) apply ('wrapped-#2', 1, 1) apply transpose ('tensordot-#0', 2, 0, 2) apply ('wrapped-#2', 2, 0) apply applytranspose ('transpose-#1', 0, 2) apply apply ('tensordot-#0', 0, 1, 0) sum apply ('wrapped-#2', 0, 1) transpose ('transpose-#1', 1, 0) ('tensordot-#0', 0, 0, 2) sum ('wrapped-#2', 0, 0) apply apply transpose ('tensordot-#0', 2, 0, 0) sum ('transpose-#1', 0, 0) apply ('tensordot-#0', 0, 0, 0) ('tensordot-#0', 2, 2, 0) apply ('wrapped-#2', 2, 2) apply applytranspose ('transpose-#1', 2, 0) apply apply ('tensordot-#0', 0, 2, 2) apply ('transpose-#1', 2, 2) apply ('wrapped-#2', 0, 2) apply transpose ('tensordot-#0', 1, 0, 1) apply ('transpose-#1', 0, 1) ('wrapped-#2', 1, 0) transpose ('tensordot-#0', 2, 1, 0) ('tensordot-#0', 0, 2, 1) sum ('transpose-#1', 2, 1) apply ('tensordot-#0', 0, 2, 0) ('tensordot-#0', 0, 0, 1) ('tensordot-#0', 0, 1, 2) ('tensordot-#0', 1, 2, 1) ('wrapped-#2', 1, 2) transpose ('tensordot-#0', 2, 2, 2) ('tensordot-#0', 1, 2, 2) sum ('tensordot-#0', 2, 2, 1) sum ('tensordot-#0', 1, 0, 0) sum ('tensordot-#0', 1, 1, 0)('tensordot-#0', 2, 0, 1) ('tensordot-#0', 0, 1, 1)('tensordot-#0', 1, 2, 0) ('tensordot-#0', 1, 0, 2) ('tensordot-#0', 2, 1, 1) ('tensordot-#0', 1, 1, 2)('sum-#3', 2, 0) ('sum-#3', 0, 0) ('sum-#3', 0, 1) ('sum-#3', 2, 2) ('sum-#3', 1, 2)('sum-#3', 0, 2) ('sum-#3', 1, 1)('sum-#3', 2, 1) ('sum-#3', 1, 0) onesones onesones onesones ones ones ones ('tensordot-#0', 2, 1, 2) sum apply ('transpose-#1', 1, 2) apply ('wrapped-#2', 2, 1) appltranspose ('tensordot-#0', 2, 0, 2) apply ('wrapped-#2', 2, 0) applytranspose ('transpose-#1', 0, 2) apply ('tensordot-#0', 0, 0, 2) sum ('wrapped-#2', 0, 0) apply transpose ('tensordot-#0', 2, 0, 0) sum ('transpose-#1', 0, 0) ('tensordot-#0', 0, 0, 0) ('tensordot-#0', 2, 2, 0) apply ('wrapped-#2', 2, 2) apply transpose ('transpos ('tensordot-#0', 0, 2, 2) apply ('transpose-#1', 2, 2) ('wrapped-#2', 0, 2 trans ('tensordot-#0', 2, 1, 0) ('tensordot-#0', 0, 1, 2)('tensordot-#0', 2, 2, 2) ('sum-#3', 2, 0) ('sum-#3', 2, 2) ('sum-#3', 0, 2) ones ones onesones ones
  • 27. Collection similar to Pandas Dataframes
  • 28.
  • 29. __Request received (wms) : #17236, 2016-12-27 16:03:44.898007, current_connections = connected=4, accepted=4, idle threads=4 appid="mapcache" client_ip=10.0.39.1 user_agent="..." query=… __Request processed (wms) : #17236, total_duration=00:00:11.377182 cache_hits=7917 cache_misses=0 success_rate=100% successes=262144 failures=0
  • 30. RE_REQ_RECEIVE = re.compile(r""" __Request receiveds+ ((?P<iface>w+))s*:s* # Interface (wfs, wms) #(?P<req_id>d+),s* # Request id (?P<starttime>[^,]+),s* # Request start timestamp current_connections=s* ... """, re.VERBOSE) RE_REQ_PROCESSED = re.compile(r""" __Request processeds+ (w+)s*:s* # Interface (wfs, wms) #(?P<req_id>d+),s* # Request id total_duration=(?P<total_duration>[0-9:.]+)s+ ... """, re.VERBOSE)
  • 31. bag = db.read_text(files) ddf_recv = (bag .str.strip() .map(lambda line: REQ_RECEIVE.match(line)) .remove(lambda el: el is None) .map(lambda m: m.groupdict()) .to_dataframe(columns=pd.DataFrame(columns=RECV_COLS)) ) ddf_proc = (bag ...) requests = ddf_recv.merge(ddf_proc, on='req_id', how='inner')
  • 32. slow_req = requests[ (requests.starttime >= datetime(2017, 5, 1) & (requests.starttime < datetime(2017, 5, 2) & (requests.total_duration >= timedelta(seconds=5))] slow_req = slow_req.compute(get=dask.multiprocessing.get)
  • 33.
  • 34.
  • 35. $ dask-scheduler Scheduler at: tcp://10.0.0.8:8786 $ ssh worker1 dask-client 10.0.0.8:8786 $ ssh worker2 dask-client 10.0.0.8:8786 $ ssh worker3 dask-client 10.0.0.8:8786
  • 36. from distributed import Client client = Client('10.0.0.8:8786')
  • 37.
  • 38.
  • 39. Image Credit ● UBIMET background and company logo Used with permission ● CPU frequency scaling: Created by Wikipedia user Newhorizons msk, in the public domain https://en.wikipedia.org/wiki/File:Clock_CPU_Scaling.jpg ● Parallel computing: Created by the US government, in the public domain https://computing.llnl.gov/tutorials/parallel_comp/ ● Python logo: A trademark of the Python Software Foundation https://www.python.org/community/logos/ ● Dask logo: Part of the Dask source distribution, licensed BSD v3 https://github.com/dask/dask/blob/master/docs/source/images/dask_horizontal.svg ● All charts and graphs: created myself ● Bag By Pixabay user “OpenClipart-Vectors”, in the public domain https://pixabay.com/p-156023/?no_redirect ● Array Jerome S. Higgins, in the public domain https://commons.wikimedia.org/wiki/File:Land_Act_of_1785_section_numbering.png ● Frame Modified form of a Wellcome Trust image, licensed CC-BY 4.0 https://commons.wikimedia.org/wiki/File:Picture_frame_Wellcome_L0051764.jpg ● Dask Array Composition of NumPy Arrays, Dask DataFrame Composition of Pandas Dataframes Partially modified, part of the Dask source distribution, licensed BSD v3 All from https://github.com/dask/dask/blob/master/docs/source/images/ ● Cluster: Created by Julian Herzog, licensed GNU FDL v2 / CC-BY 4.0 https://commons.wikimedia.org/wiki/File:High_Performance_Computing_Center_Stuttgart_HLRS_2015_08_Cray_XC40_Hazel_Hen_IO.jpg ● Dask Distributed graph: Partially modified, part of the Dask source distribution, licensed BSD v3 https://github.com/dask/dask/blob/9f344bbf38610e03f723ac034f9c4a390a7debec/docs/source/images/distributed-layout.svg