53. include_recipe "packages"
include_recipe "ruby"
include_recipe "apache2"
if platform?("centos","redhat")
if dist_only?
# just the gem, we'll install the apache module within apache2
package "rubygem-passenger"
return
else
package "httpd-devel"
end
else
%w{ apache2-prefork-dev libapr1-dev }.each do |pkg|
package pkg do
action :upgrade
end
end
end
gem_package "passenger" do
version node[:passenger][:version]
end
execute "passenger_module" do
command 'echo -en "nnnn" | passenger-install-apache2-module'
creates node[:passenger][:module_path]
end
54. import boto
import boto.emr
from boto.emr.step import StreamingStep
Connect to Elastic MapReduce
from boto.emr.bootstrap_action import BootstrapAction
import time
# set your aws keys and S3 bucket, e.g. from environment or .boto
AWSKEY=
SECRETKEY=
S3_BUCKET=
NUM_INSTANCES = 1
conn = boto.connect_emr(AWSKEY,SECRETKEY)
bootstrap_step = BootstrapAction("download.tst",
"s3://elasticmapreduce/bootstrap-actions/download.sh",None)
Install packages
step = StreamingStep(name='Wordcount',
mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
cache_files = ["s3n://" + S3_BUCKET + "/boto.mod#boto.mod"],
reducer='aggregate',
input='s3n://elasticmapreduce/samples/wordcount/input',
output='s3n://' + S3_BUCKET + '/output/wordcount_output')
Set up mappers &
jobid = conn.run_jobflow(
name="testbootstrap",
reduces
log_uri="s3://" + S3_BUCKET + "/logs",
steps = [step],
bootstrap_actions=[bootstrap_step],
num_instances=NUM_INSTANCES)
print "finished spawning job (note: starting still takes time)"
state = conn.describe_jobflow(jobid).state
print "job state = ", state
print "job id = ", jobid
while state != u'COMPLETED':
print time.localtime() job state
time.sleep(30)
state = conn.describe_jobflow(jobid).state
print "job state = ", state
print "job id = ", jobid
print "final output can be found in s3://" + S3_BUCKET + "/output" + TIMESTAMP
print "try: $ s3cmd sync s3://" + S3_BUCKET + "/output" + TIMESTAMP + " ."
55.
56. “I terminate the
instance and
relaunch it. Thats
my error handling”
Source: @jtimberman on Twitter
77. Linpack benchmark
880-instance CC1 cluster
Performance: 41.82 TFlops*
*#231 in Nov 2010 Top 500 rankings
78. Credit: K. Jorissen, F. D.Villa, and J. J. Rehr
WIEN2k Parallel Performance (U. Washington)
KS for huge system
at 1 k-point
H size 56,000 (25GB)
Runtime (16x8 processors)
Local (Infiniband) 3h:48
Cloud (10Gbps) 1h:30 ($40)
VERY DEMANDING
network performance
•1200 atom unit cell; SCALAPACK+MPI diagonalization, matrix size 50k-100k
124. deesingh@amazon.com
Twitter:@mndoci
http://slideshare.net/mndoci
http://mndoci.com
Inspiration and ideas from
Matt Wood& Larry Lessig
Credit” Oberazzi under a CC-BY-NC-SA license