SlideShare una empresa de Scribd logo
1 de 76
RHadoop, Hadoop for R
Scholarly Activity 05-09 change

  50%

37.5%

  25%

12.5%

   0%

-12.5%

 -25%

-37.5%
         R        SAS       SPSS     S-Plus    Stata
Scholarly Activity 05-09 change

                                                 50%

                                               37.5%

                                                 25%

                                               12.5%
                      Packages
                                                  0%

10000                                          -12.5%

                                                -25%
 1000                                          -37.5%
                                                        R        SAS       SPSS     S-Plus    Stata

  100


   10


    1
        2002   2004   2006       2008   2010
Scholarly Activity 05-09 change

                                                 50%

                                               37.5%

                                                 25%

                                               12.5%
                      Packages
                                                  0%

10000                                          -12.5%

                                                -25%
 1000                                          -37.5%
                                                        R        SAS       SPSS     S-Plus    Stata

  100


   10
                                                    http://r4stats.com/popularity
    1
        2002   2004   2006       2008   2010
David Champagne, CTO
f s
    h d
r
rh d f s


rhb
      ase
rh d f s


rhb
      ase



      rm
  r
rmr
sapply(data, function)

mapreduce(data, map = function)
library(rmr)

mapreduce(…)
Rmr
Rmr




      Java, C++
Rmr




                  Cascading,
      Java, C++
                   Crunch
Rmr, Rhipe, Dumbo,
Rmr
 Pydoop, Hadoopy




                     Cascading,
        Java, C++
                      Crunch
Rmr, Rhipe, Dumbo,
Rmr
 Pydoop, Hadoopy




                     Cascading,
        Java, C++
                      Crunch
Expose MR   Hide MR




Rmr, Rhipe, Dumbo,
Rmr
 Pydoop, Hadoopy




                               Cascading,
        Java, C++
                                Crunch
Expose MR   Hide MR
                               Hive, Pig




Rmr, Rhipe, Dumbo,
Rmr
 Pydoop, Hadoopy




                               Cascading,
        Java, C++
                                Crunch
Expose MR   Hide MR
                               Hive, Pig




Rmr, Rhipe, Dumbo,
Rmr                            Cascalog,
 Pydoop, Hadoopy           Scalding, Scrunch




                               Cascading,
        Java, C++
                                Crunch
mapreduce(input, output, map, reduce)
mapreduce(input, output, map, reduce)
mapreduce(input, output, map, reduce)
mapreduce(input, output, map, reduce)
mapreduce(input, output, map, reduce)
mapreduce(input, output, map, reduce)
map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v)


    reduce = function(k, vv) keyval(k, length(vv))
map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v)


    reduce = function(k, vv) keyval(k, length(vv))
map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v)


    reduce = function(k, vv) keyval(k, length(vv))
map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v)


    reduce = function(k, vv) keyval(k, length(vv))
map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v)


    reduce = function(k, vv) keyval(k, length(vv))
map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v)


    reduce = function(k, vv) keyval(k, length(vv))
map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v)


    reduce = function(k, vv) keyval(k, length(vv))
map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v)


    reduce = function(k, vv) keyval(k, length(vv))
condition = function(x) x > 10
condition = function(x) x > 10


out = mapreduce(
condition = function(x) x > 10


out = mapreduce(
        input = input,
condition = function(x) x > 10


out = mapreduce(
        input = input,
        map = function(k,v)
condition = function(x) x > 10


out = mapreduce(
        input = input,
        map = function(k,v)
                 if (condition(v)) keyval(k,v))
condition = function(x) x > 10


out = mapreduce(
        input = input,
        map = function(k,v)
                 if (condition(v)) keyval(k,v))
x = from.dfs(hdfs.object)

hdfs.object = to.dfs(x)
INSERT OVERWRITE TABLE pv_gender_sum
SELECT pv_users.gender, count (DISTINCT pv_users.userid)
FROM pv_users
GROUP BY pv_users.gender;
INSERT OVERWRITE TABLE pv_gender_sum
SELECT pv_users.gender, count (DISTINCT pv_users.userid)
FROM pv_users
GROUP BY pv_users.gender;

mapreduce(input =
  mapreduce(input = "pv_users",
    map = function(k, v) keyval(v['userid'], v['gender']),
    reduce = function(uid, genders)
      lapply(unique(genders), function(g) keyval(NULL, g)),
  output = "pv_gender_sum",
  map = function(x, gender) keyval(gender, 1)
  reduce = function(gender,counts)
             keyval(k,sum(unlist(counts)))
kmeans =
  function(points, ncenters, iterations = 10,
           distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) {
    newCenters = kmeans.iter(points, distfun, ncenters = ncenters)
    for(i in 1:iterations) {
      newCenters = kmeans.iter(points, distfun, centers = newCenters)}
    newCenters}
kmeans =
  function(points, ncenters, iterations = 10,
           distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) {
    newCenters = kmeans.iter(points, distfun, ncenters = ncenters)
    for(i in 1:iterations) {
      newCenters = kmeans.iter(points, distfun, centers = newCenters)}
    newCenters}


kmeans.iter =
  function(points, distfun, ncenters = dim(centers)[1], centers = NULL) {
    from.dfs(
      mapreduce(
        input = points,
        map = if (is.null(centers)) {
                 function(k,v) keyval(sample(1:ncenters,1),v)}
              else {
                 function(k,v) {
                   distances = apply(centers, 1, function(c) distfun(c,v))
                   keyval(centers[which.min(distances),], v)}},
        reduce = function(k,vv) keyval(NULL, apply(do.call(rbind, vv), 2, mean))),
      to.data.frame = T)}
kmeans =
  function(points, ncenters, iterations = 10,
           distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) {
    newCenters = kmeans.iter(points, distfun, ncenters = ncenters)
    for(i in 1:iterations) {
      newCenters = kmeans.iter(points, distfun, centers = newCenters)}
    newCenters}


kmeans.iter =
  function(points, distfun, ncenters = dim(centers)[1], centers = NULL) {
    from.dfs(
      mapreduce(
        input = points,
        map = if (is.null(centers)) {
                 function(k,v) keyval(sample(1:ncenters,1),v)}
              else {
                 function(k,v) {
                   distances = apply(centers, 1, function(c) distfun(c,v))
                   keyval(centers[which.min(distances),], v)}},
        reduce = function(k,vv) keyval(NULL, apply(do.call(rbind, vv), 2, mean))),
      to.data.frame = T)}
kmeans =
  function(points, ncenters, iterations = 10,
           distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) {
    newCenters = kmeans.iter(points, distfun, ncenters = ncenters)
    for(i in 1:iterations) {
      newCenters = kmeans.iter(points, distfun, centers = newCenters)}
    newCenters}


kmeans.iter =
  function(points, distfun, ncenters = dim(centers)[1], centers = NULL) {
    from.dfs(
      mapreduce(
        input = points,
        map = if (is.null(centers)) {
                 function(k,v) keyval(sample(1:ncenters,1),v)}
              else {
                 function(k,v) {
                   distances = apply(centers, 1, function(c) distfun(c,v))
                   keyval(centers[which.min(distances),], v)}},
        reduce = function(k,vv) keyval(NULL, apply(do.call(rbind, vv), 2, mean))),
      to.data.frame = T)}
kmeans =
  function(points, ncenters, iterations = 10,
           distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) {
    newCenters = kmeans.iter(points, distfun, ncenters = ncenters)
    for(i in 1:iterations) {
      newCenters = kmeans.iter(points, distfun, centers = newCenters)}
    newCenters}


kmeans.iter =
  function(points, distfun, ncenters = dim(centers)[1], centers = NULL) {
    from.dfs(
      mapreduce(
        input = points,
        map = if (is.null(centers)) {
                 function(k,v) keyval(sample(1:ncenters,1),v)}
              else {
                 function(k,v) {
                   distances = apply(centers, 1, function(c) distfun(c,v))
                   keyval(centers[which.min(distances),], v)}},
        reduce = function(k,vv) keyval(NULL, apply(do.call(rbind, vv), 2, mean))),
      to.data.frame = T)}
#!/usr/bin/python
import sys
from math import fabs
from org.apache.pig.scripting import Pig

filename = "student.txt"
k = 4
tolerance = 0.01

MAX_SCORE = 4
MIN_SCORE = 0
MAX_ITERATION = 100

# initial centroid, equally divide the space
initial_centroids = ""
last_centroids = [None] * k
for i in range(k):
    last_centroids[i] = MIN_SCORE + float(i)/k*(MAX_SCORE-MIN_SCORE)
    initial_centroids = initial_centroids + str(last_centroids[i])
    if i!=k-1:
        initial_centroids = initial_centroids + ":"

P = Pig.compile("""register udf.jar
                   DEFINE find_centroid FindCentroid('$centroids');
                   raw = load 'student.txt' as (name:chararray, age:int, gpa:double);
                   centroided = foreach raw generate gpa, find_centroid(gpa) as centroid;
                   grouped = group centroided by centroid;
                   result = foreach grouped generate group, AVG(centroided.gpa);
                   store result into 'output';
                """)

converged = False
iter_num = 0
while iter_num<MAX_ITERATION:
    Q = P.bind({'centroids':initial_centroids})
    results = Q.runSingle()
if results.isSuccessful() == "FAILED":
        raise "Pig job failed"
    iter = results.result("result").iterator()
    centroids = [None] * k
    distance_move = 0
    # get new centroid of this iteration, caculate the moving distance with last iteration
    for i in range(k):
        tuple = iter.next()
        centroids[i] = float(str(tuple.get(1)))
        distance_move = distance_move + fabs(last_centroids[i]-centroids[i])
    distance_move = distance_move / k;
    Pig.fs("rmr output")
    print("iteration " + str(iter_num))
    print("average distance moved: " + str(distance_move))
    if distance_move<tolerance:
        sys.stdout.write("k-means converged at centroids: [")
        sys.stdout.write(",".join(str(v) for v in centroids))
        sys.stdout.write("]n")
        converged = True
        break
    last_centroids = centroids[:]
    initial_centroids = ""
    for i in range(k):
        initial_centroids = initial_centroids + str(last_centroids[i])
        if i!=k-1:
            initial_centroids = initial_centroids + ":"
    iter_num += 1

if not converged:
    print("not converge after " + str(iter_num) + " iterations")
    sys.stdout.write("last centroids: [")
    sys.stdout.write(",".join(str(v) for v in last_centroids))
    sys.stdout.write("]n")
import java.io.IOException;

import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;


public class FindCentroid extends EvalFunc<Double> {
    double[] centroids;
    public FindCentroid(String initialCentroid) {
        String[] centroidStrings = initialCentroid.split(":");
        centroids = new double[centroidStrings.length];
        for (int i=0;i<centroidStrings.length;i++)
            centroids[i] = Double.parseDouble(centroidStrings[i]);
    }
    @Override
    public Double exec(Tuple input) throws IOException {
        double min_distance = Double.MAX_VALUE;
        double closest_centroid = 0;
        for (double centroid : centroids) {
            double distance = Math.abs(centroid - (Double)input.get(0));
            if (distance < min_distance) {
                min_distance = distance;
                closest_centroid = centroid;
            }
        }
        return closest_centroid;
    }

}
mapreduce(mapreduce(…
mapreduce(mapreduce(…

mapreduce(input = c(input1, input2), …)
mapreduce(mapreduce(…

mapreduce(input = c(input1, input2), …)

equijoin = function(
    left.input, right.input, input,
    output,
    outer,
    map.left, map.right,
    reduce, reduce.all)
out1 = mapreduce(…)
mapreduce(input = out1, <xyz>)
mapreduce(input = out1, <abc>)
out1 = mapreduce(…)
mapreduce(input = out1, <xyz>)
mapreduce(input = out1, <abc>)

abstract.job = function(input, output, …) {
   …
   result = mapreduce(input = input,
                      output = output)
   …
   result}
input.format, output.format, format
input.format, output.format, format
combine
input.format, output.format, format
combine
reduce.on.data.frame
input.format, output.format, format
combine
reduce.on.data.frame
local, hadoop backends
input.format, output.format, format
combine
reduce.on.data.frame
local, hadoop backends
backend.parameters
input.format, output.format, format
combine
reduce.on.data.frame
local, hadoop backends
backend.parameters
profiling
input.format, output.format, format
combine
reduce.on.data.frame
local, hadoop backends
backend.parameters
profiling
verbose
RHADOOP USER
ONE FAT CLUSTER AVE.
HYDROPOWER CITY, OR 0x0000




             RHADOOP@
      REVOLUTIONANALYTICS.COM

Más contenido relacionado

La actualidad más candente

ComputeFest 2012: Intro To R for Physical Sciences
ComputeFest 2012: Intro To R for Physical SciencesComputeFest 2012: Intro To R for Physical Sciences
ComputeFest 2012: Intro To R for Physical Sciences
alexstorer
 
Hadoop本 輪読会 1章〜2章
Hadoop本 輪読会 1章〜2章Hadoop本 輪読会 1章〜2章
Hadoop本 輪読会 1章〜2章
moai kids
 

La actualidad más candente (20)

Stata cheat sheet analysis
Stata cheat sheet analysisStata cheat sheet analysis
Stata cheat sheet analysis
 
Hands on data science with r.pptx
Hands  on data science with r.pptxHands  on data science with r.pptx
Hands on data science with r.pptx
 
Data manipulation on r
Data manipulation on rData manipulation on r
Data manipulation on r
 
Polimorfismo cosa?
Polimorfismo cosa?Polimorfismo cosa?
Polimorfismo cosa?
 
Stata cheat sheet: data processing
Stata cheat sheet: data processingStata cheat sheet: data processing
Stata cheat sheet: data processing
 
Building a Functional Stream in Scala
Building a Functional Stream in ScalaBuilding a Functional Stream in Scala
Building a Functional Stream in Scala
 
ComputeFest 2012: Intro To R for Physical Sciences
ComputeFest 2012: Intro To R for Physical SciencesComputeFest 2012: Intro To R for Physical Sciences
ComputeFest 2012: Intro To R for Physical Sciences
 
R + 15 minutes = Hadoop cluster
R + 15 minutes = Hadoop clusterR + 15 minutes = Hadoop cluster
R + 15 minutes = Hadoop cluster
 
Phil Bartie QGIS PLPython
Phil Bartie QGIS PLPythonPhil Bartie QGIS PLPython
Phil Bartie QGIS PLPython
 
Morel, a Functional Query Language
Morel, a Functional Query LanguageMorel, a Functional Query Language
Morel, a Functional Query Language
 
RHive tutorials - Basic functions
RHive tutorials - Basic functionsRHive tutorials - Basic functions
RHive tutorials - Basic functions
 
Python for R Users
Python for R UsersPython for R Users
Python for R Users
 
R seminar dplyr package
R seminar dplyr packageR seminar dplyr package
R seminar dplyr package
 
Stata cheat sheet: data transformation
Stata  cheat sheet: data transformationStata  cheat sheet: data transformation
Stata cheat sheet: data transformation
 
Upgrading To The New Map Reduce API
Upgrading To The New Map Reduce APIUpgrading To The New Map Reduce API
Upgrading To The New Map Reduce API
 
Stata cheatsheet transformation
Stata cheatsheet transformationStata cheatsheet transformation
Stata cheatsheet transformation
 
Stata Programming Cheat Sheet
Stata Programming Cheat SheetStata Programming Cheat Sheet
Stata Programming Cheat Sheet
 
Dplyr and Plyr
Dplyr and PlyrDplyr and Plyr
Dplyr and Plyr
 
Hadoop本 輪読会 1章〜2章
Hadoop本 輪読会 1章〜2章Hadoop本 輪読会 1章〜2章
Hadoop本 輪読会 1章〜2章
 
Python for R users
Python for R usersPython for R users
Python for R users
 

Similar a RHadoop, R meets Hadoop

Aaron Ellison Keynote: Reaching the 99%
Aaron Ellison Keynote: Reaching the 99%Aaron Ellison Keynote: Reaching the 99%
Aaron Ellison Keynote: Reaching the 99%
David LeBauer
 
Apache Spark for Library Developers with William Benton and Erik Erlandson
 Apache Spark for Library Developers with William Benton and Erik Erlandson Apache Spark for Library Developers with William Benton and Erik Erlandson
Apache Spark for Library Developers with William Benton and Erik Erlandson
Databricks
 

Similar a RHadoop, R meets Hadoop (20)

RHadoop の紹介
RHadoop の紹介RHadoop の紹介
RHadoop の紹介
 
Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)
Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)
Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)
 
Jan 2012 HUG: RHadoop
Jan 2012 HUG: RHadoopJan 2012 HUG: RHadoop
Jan 2012 HUG: RHadoop
 
Aaron Ellison Keynote: Reaching the 99%
Aaron Ellison Keynote: Reaching the 99%Aaron Ellison Keynote: Reaching the 99%
Aaron Ellison Keynote: Reaching the 99%
 
R meets Hadoop
R meets HadoopR meets Hadoop
R meets Hadoop
 
Monadologie
MonadologieMonadologie
Monadologie
 
R code for data manipulation
R code for data manipulationR code for data manipulation
R code for data manipulation
 
R code for data manipulation
R code for data manipulationR code for data manipulation
R code for data manipulation
 
Apache Spark for Library Developers with William Benton and Erik Erlandson
 Apache Spark for Library Developers with William Benton and Erik Erlandson Apache Spark for Library Developers with William Benton and Erik Erlandson
Apache Spark for Library Developers with William Benton and Erik Erlandson
 
Ruby on Big Data @ Philly Ruby Group
Ruby on Big Data @ Philly Ruby GroupRuby on Big Data @ Philly Ruby Group
Ruby on Big Data @ Philly Ruby Group
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions
 
Will it Blend? - ScalaSyd February 2015
Will it Blend? - ScalaSyd February 2015Will it Blend? - ScalaSyd February 2015
Will it Blend? - ScalaSyd February 2015
 
Clojure to Slang
Clojure to SlangClojure to Slang
Clojure to Slang
 
Hadoop I/O Analysis
Hadoop I/O AnalysisHadoop I/O Analysis
Hadoop I/O Analysis
 
support vector regression
support vector regressionsupport vector regression
support vector regression
 
Feature Extraction
Feature ExtractionFeature Extraction
Feature Extraction
 
Introduction to Functional Programming with Haskell and JavaScript
Introduction to Functional Programming with Haskell and JavaScriptIntroduction to Functional Programming with Haskell and JavaScript
Introduction to Functional Programming with Haskell and JavaScript
 
Ruby on Big Data (Cassandra + Hadoop)
Ruby on Big Data (Cassandra + Hadoop)Ruby on Big Data (Cassandra + Hadoop)
Ruby on Big Data (Cassandra + Hadoop)
 
Getting Functional with Scala
Getting Functional with ScalaGetting Functional with Scala
Getting Functional with Scala
 

Más de Revolution Analytics

The network structure of cran 2015 07-02 final
The network structure of cran 2015 07-02 finalThe network structure of cran 2015 07-02 final
The network structure of cran 2015 07-02 final
Revolution Analytics
 

Más de Revolution Analytics (20)

Speeding up R with Parallel Programming in the Cloud
Speeding up R with Parallel Programming in the CloudSpeeding up R with Parallel Programming in the Cloud
Speeding up R with Parallel Programming in the Cloud
 
Migrating Existing Open Source Machine Learning to Azure
Migrating Existing Open Source Machine Learning to AzureMigrating Existing Open Source Machine Learning to Azure
Migrating Existing Open Source Machine Learning to Azure
 
R in Minecraft
R in Minecraft R in Minecraft
R in Minecraft
 
The case for R for AI developers
The case for R for AI developersThe case for R for AI developers
The case for R for AI developers
 
Speed up R with parallel programming in the Cloud
Speed up R with parallel programming in the CloudSpeed up R with parallel programming in the Cloud
Speed up R with parallel programming in the Cloud
 
The R Ecosystem
The R EcosystemThe R Ecosystem
The R Ecosystem
 
R Then and Now
R Then and NowR Then and Now
R Then and Now
 
Predicting Loan Delinquency at One Million Transactions per Second
Predicting Loan Delinquency at One Million Transactions per SecondPredicting Loan Delinquency at One Million Transactions per Second
Predicting Loan Delinquency at One Million Transactions per Second
 
Reproducible Data Science with R
Reproducible Data Science with RReproducible Data Science with R
Reproducible Data Science with R
 
The Value of Open Source Communities
The Value of Open Source CommunitiesThe Value of Open Source Communities
The Value of Open Source Communities
 
The R Ecosystem
The R EcosystemThe R Ecosystem
The R Ecosystem
 
R at Microsoft (useR! 2016)
R at Microsoft (useR! 2016)R at Microsoft (useR! 2016)
R at Microsoft (useR! 2016)
 
Building a scalable data science platform with R
Building a scalable data science platform with RBuilding a scalable data science platform with R
Building a scalable data science platform with R
 
R at Microsoft
R at MicrosoftR at Microsoft
R at Microsoft
 
The Business Economics and Opportunity of Open Source Data Science
The Business Economics and Opportunity of Open Source Data ScienceThe Business Economics and Opportunity of Open Source Data Science
The Business Economics and Opportunity of Open Source Data Science
 
Taking R Analytics to SQL and the Cloud
Taking R Analytics to SQL and the CloudTaking R Analytics to SQL and the Cloud
Taking R Analytics to SQL and the Cloud
 
The Network structure of R packages on CRAN & BioConductor
The Network structure of R packages on CRAN & BioConductorThe Network structure of R packages on CRAN & BioConductor
The Network structure of R packages on CRAN & BioConductor
 
The network structure of cran 2015 07-02 final
The network structure of cran 2015 07-02 finalThe network structure of cran 2015 07-02 final
The network structure of cran 2015 07-02 final
 
Simple Reproducibility with the checkpoint package
Simple Reproducibilitywith the checkpoint packageSimple Reproducibilitywith the checkpoint package
Simple Reproducibility with the checkpoint package
 
R at Microsoft
R at MicrosoftR at Microsoft
R at Microsoft
 

Último

EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptxEIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
Earley Information Science
 
Artificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and MythsArtificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and Myths
Joaquim Jorge
 

Último (20)

Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...
Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...
Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...
 
08448380779 Call Girls In Friends Colony Women Seeking Men
08448380779 Call Girls In Friends Colony Women Seeking Men08448380779 Call Girls In Friends Colony Women Seeking Men
08448380779 Call Girls In Friends Colony Women Seeking Men
 
Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024
 
A Call to Action for Generative AI in 2024
A Call to Action for Generative AI in 2024A Call to Action for Generative AI in 2024
A Call to Action for Generative AI in 2024
 
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
 
Boost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivityBoost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivity
 
08448380779 Call Girls In Greater Kailash - I Women Seeking Men
08448380779 Call Girls In Greater Kailash - I Women Seeking Men08448380779 Call Girls In Greater Kailash - I Women Seeking Men
08448380779 Call Girls In Greater Kailash - I Women Seeking Men
 
Factors to Consider When Choosing Accounts Payable Services Providers.pptx
Factors to Consider When Choosing Accounts Payable Services Providers.pptxFactors to Consider When Choosing Accounts Payable Services Providers.pptx
Factors to Consider When Choosing Accounts Payable Services Providers.pptx
 
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
 
What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?
 
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
 
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptxEIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
 
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
 
Tata AIG General Insurance Company - Insurer Innovation Award 2024
Tata AIG General Insurance Company - Insurer Innovation Award 2024Tata AIG General Insurance Company - Insurer Innovation Award 2024
Tata AIG General Insurance Company - Insurer Innovation Award 2024
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt Robison
 
Artificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and MythsArtificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and Myths
 
The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024
 
Boost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdfBoost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdf
 
Scaling API-first – The story of a global engineering organization
Scaling API-first – The story of a global engineering organizationScaling API-first – The story of a global engineering organization
Scaling API-first – The story of a global engineering organization
 

RHadoop, R meets Hadoop

  • 2.
  • 3. Scholarly Activity 05-09 change 50% 37.5% 25% 12.5% 0% -12.5% -25% -37.5% R SAS SPSS S-Plus Stata
  • 4. Scholarly Activity 05-09 change 50% 37.5% 25% 12.5% Packages 0% 10000 -12.5% -25% 1000 -37.5% R SAS SPSS S-Plus Stata 100 10 1 2002 2004 2006 2008 2010
  • 5. Scholarly Activity 05-09 change 50% 37.5% 25% 12.5% Packages 0% 10000 -12.5% -25% 1000 -37.5% R SAS SPSS S-Plus Stata 100 10 http://r4stats.com/popularity 1 2002 2004 2006 2008 2010
  • 6.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14. f s h d r
  • 15. rh d f s rhb ase
  • 16. rh d f s rhb ase rm r
  • 17. rmr
  • 18.
  • 19.
  • 22. Rmr
  • 23. Rmr Java, C++
  • 24. Rmr Cascading, Java, C++ Crunch
  • 25. Rmr, Rhipe, Dumbo, Rmr Pydoop, Hadoopy Cascading, Java, C++ Crunch
  • 26. Rmr, Rhipe, Dumbo, Rmr Pydoop, Hadoopy Cascading, Java, C++ Crunch
  • 27. Expose MR Hide MR Rmr, Rhipe, Dumbo, Rmr Pydoop, Hadoopy Cascading, Java, C++ Crunch
  • 28. Expose MR Hide MR Hive, Pig Rmr, Rhipe, Dumbo, Rmr Pydoop, Hadoopy Cascading, Java, C++ Crunch
  • 29. Expose MR Hide MR Hive, Pig Rmr, Rhipe, Dumbo, Rmr Cascalog, Pydoop, Hadoopy Scalding, Scrunch Cascading, Java, C++ Crunch
  • 36. map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v) reduce = function(k, vv) keyval(k, length(vv))
  • 37. map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v) reduce = function(k, vv) keyval(k, length(vv))
  • 38. map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v) reduce = function(k, vv) keyval(k, length(vv))
  • 39. map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v) reduce = function(k, vv) keyval(k, length(vv))
  • 40. map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v) reduce = function(k, vv) keyval(k, length(vv))
  • 41. map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v) reduce = function(k, vv) keyval(k, length(vv))
  • 42. map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v) reduce = function(k, vv) keyval(k, length(vv))
  • 43. map = function(k, v) if (hash(k) %% 10 == 0)keyval(k, v) reduce = function(k, vv) keyval(k, length(vv))
  • 45. condition = function(x) x > 10 out = mapreduce(
  • 46. condition = function(x) x > 10 out = mapreduce( input = input,
  • 47. condition = function(x) x > 10 out = mapreduce( input = input, map = function(k,v)
  • 48. condition = function(x) x > 10 out = mapreduce( input = input, map = function(k,v) if (condition(v)) keyval(k,v))
  • 49. condition = function(x) x > 10 out = mapreduce( input = input, map = function(k,v) if (condition(v)) keyval(k,v))
  • 51. INSERT OVERWRITE TABLE pv_gender_sum SELECT pv_users.gender, count (DISTINCT pv_users.userid) FROM pv_users GROUP BY pv_users.gender;
  • 52. INSERT OVERWRITE TABLE pv_gender_sum SELECT pv_users.gender, count (DISTINCT pv_users.userid) FROM pv_users GROUP BY pv_users.gender; mapreduce(input = mapreduce(input = "pv_users", map = function(k, v) keyval(v['userid'], v['gender']), reduce = function(uid, genders) lapply(unique(genders), function(g) keyval(NULL, g)), output = "pv_gender_sum", map = function(x, gender) keyval(gender, 1) reduce = function(gender,counts) keyval(k,sum(unlist(counts)))
  • 53. kmeans = function(points, ncenters, iterations = 10, distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) { newCenters = kmeans.iter(points, distfun, ncenters = ncenters) for(i in 1:iterations) { newCenters = kmeans.iter(points, distfun, centers = newCenters)} newCenters}
  • 54. kmeans = function(points, ncenters, iterations = 10, distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) { newCenters = kmeans.iter(points, distfun, ncenters = ncenters) for(i in 1:iterations) { newCenters = kmeans.iter(points, distfun, centers = newCenters)} newCenters} kmeans.iter = function(points, distfun, ncenters = dim(centers)[1], centers = NULL) { from.dfs( mapreduce( input = points, map = if (is.null(centers)) { function(k,v) keyval(sample(1:ncenters,1),v)} else { function(k,v) { distances = apply(centers, 1, function(c) distfun(c,v)) keyval(centers[which.min(distances),], v)}}, reduce = function(k,vv) keyval(NULL, apply(do.call(rbind, vv), 2, mean))), to.data.frame = T)}
  • 55. kmeans = function(points, ncenters, iterations = 10, distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) { newCenters = kmeans.iter(points, distfun, ncenters = ncenters) for(i in 1:iterations) { newCenters = kmeans.iter(points, distfun, centers = newCenters)} newCenters} kmeans.iter = function(points, distfun, ncenters = dim(centers)[1], centers = NULL) { from.dfs( mapreduce( input = points, map = if (is.null(centers)) { function(k,v) keyval(sample(1:ncenters,1),v)} else { function(k,v) { distances = apply(centers, 1, function(c) distfun(c,v)) keyval(centers[which.min(distances),], v)}}, reduce = function(k,vv) keyval(NULL, apply(do.call(rbind, vv), 2, mean))), to.data.frame = T)}
  • 56. kmeans = function(points, ncenters, iterations = 10, distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) { newCenters = kmeans.iter(points, distfun, ncenters = ncenters) for(i in 1:iterations) { newCenters = kmeans.iter(points, distfun, centers = newCenters)} newCenters} kmeans.iter = function(points, distfun, ncenters = dim(centers)[1], centers = NULL) { from.dfs( mapreduce( input = points, map = if (is.null(centers)) { function(k,v) keyval(sample(1:ncenters,1),v)} else { function(k,v) { distances = apply(centers, 1, function(c) distfun(c,v)) keyval(centers[which.min(distances),], v)}}, reduce = function(k,vv) keyval(NULL, apply(do.call(rbind, vv), 2, mean))), to.data.frame = T)}
  • 57. kmeans = function(points, ncenters, iterations = 10, distfun = function(a,b) norm(as.matrix(a-b), type = 'F')) { newCenters = kmeans.iter(points, distfun, ncenters = ncenters) for(i in 1:iterations) { newCenters = kmeans.iter(points, distfun, centers = newCenters)} newCenters} kmeans.iter = function(points, distfun, ncenters = dim(centers)[1], centers = NULL) { from.dfs( mapreduce( input = points, map = if (is.null(centers)) { function(k,v) keyval(sample(1:ncenters,1),v)} else { function(k,v) { distances = apply(centers, 1, function(c) distfun(c,v)) keyval(centers[which.min(distances),], v)}}, reduce = function(k,vv) keyval(NULL, apply(do.call(rbind, vv), 2, mean))), to.data.frame = T)}
  • 58. #!/usr/bin/python import sys from math import fabs from org.apache.pig.scripting import Pig filename = "student.txt" k = 4 tolerance = 0.01 MAX_SCORE = 4 MIN_SCORE = 0 MAX_ITERATION = 100 # initial centroid, equally divide the space initial_centroids = "" last_centroids = [None] * k for i in range(k): last_centroids[i] = MIN_SCORE + float(i)/k*(MAX_SCORE-MIN_SCORE) initial_centroids = initial_centroids + str(last_centroids[i]) if i!=k-1: initial_centroids = initial_centroids + ":" P = Pig.compile("""register udf.jar DEFINE find_centroid FindCentroid('$centroids'); raw = load 'student.txt' as (name:chararray, age:int, gpa:double); centroided = foreach raw generate gpa, find_centroid(gpa) as centroid; grouped = group centroided by centroid; result = foreach grouped generate group, AVG(centroided.gpa); store result into 'output'; """) converged = False iter_num = 0 while iter_num<MAX_ITERATION: Q = P.bind({'centroids':initial_centroids}) results = Q.runSingle()
  • 59. if results.isSuccessful() == "FAILED": raise "Pig job failed" iter = results.result("result").iterator() centroids = [None] * k distance_move = 0 # get new centroid of this iteration, caculate the moving distance with last iteration for i in range(k): tuple = iter.next() centroids[i] = float(str(tuple.get(1))) distance_move = distance_move + fabs(last_centroids[i]-centroids[i]) distance_move = distance_move / k; Pig.fs("rmr output") print("iteration " + str(iter_num)) print("average distance moved: " + str(distance_move)) if distance_move<tolerance: sys.stdout.write("k-means converged at centroids: [") sys.stdout.write(",".join(str(v) for v in centroids)) sys.stdout.write("]n") converged = True break last_centroids = centroids[:] initial_centroids = "" for i in range(k): initial_centroids = initial_centroids + str(last_centroids[i]) if i!=k-1: initial_centroids = initial_centroids + ":" iter_num += 1 if not converged: print("not converge after " + str(iter_num) + " iterations") sys.stdout.write("last centroids: [") sys.stdout.write(",".join(str(v) for v in last_centroids)) sys.stdout.write("]n")
  • 60. import java.io.IOException; import org.apache.pig.EvalFunc; import org.apache.pig.data.Tuple; public class FindCentroid extends EvalFunc<Double> { double[] centroids; public FindCentroid(String initialCentroid) { String[] centroidStrings = initialCentroid.split(":"); centroids = new double[centroidStrings.length]; for (int i=0;i<centroidStrings.length;i++) centroids[i] = Double.parseDouble(centroidStrings[i]); } @Override public Double exec(Tuple input) throws IOException { double min_distance = Double.MAX_VALUE; double closest_centroid = 0; for (double centroid : centroids) { double distance = Math.abs(centroid - (Double)input.get(0)); if (distance < min_distance) { min_distance = distance; closest_centroid = centroid; } } return closest_centroid; } }
  • 63. mapreduce(mapreduce(… mapreduce(input = c(input1, input2), …) equijoin = function( left.input, right.input, input, output, outer, map.left, map.right, reduce, reduce.all)
  • 64. out1 = mapreduce(…) mapreduce(input = out1, <xyz>) mapreduce(input = out1, <abc>)
  • 65. out1 = mapreduce(…) mapreduce(input = out1, <xyz>) mapreduce(input = out1, <abc>) abstract.job = function(input, output, …) { … result = mapreduce(input = input, output = output) … result}
  • 71. input.format, output.format, format combine reduce.on.data.frame local, hadoop backends backend.parameters profiling
  • 72. input.format, output.format, format combine reduce.on.data.frame local, hadoop backends backend.parameters profiling verbose
  • 73.
  • 74.
  • 75.
  • 76. RHADOOP USER ONE FAT CLUSTER AVE. HYDROPOWER CITY, OR 0x0000 RHADOOP@ REVOLUTIONANALYTICS.COM

Notas del editor

  1. What is R\nWhat is RHadoop\nOpen source project\nstarted by RevoLution\naims to make R and Hadoop work together\nwhat is revolution\n
  2. \n
  3. \n
  4. \n
  5. Faster, assured builds\nLarge Data extensions\nWeb deployments\nTech support\nConsulting service\nTraining\n
  6. \n
  7. hadoop bring horizontal scalability\nr sophisticated analytics\ncombination could be powerful\n
  8. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  9. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  10. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  11. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  12. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  13. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  14. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  15. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  16. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  17. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  18. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  19. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  20. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  21. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  22. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  23. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  24. Hadoop is one project but also a family of projects. We started the integration path with three projects targeting three members of the Hadoop family\n\nHadoop hdfs provides acces to hdfs file system. can be divided into two sub-APis: file level and byte level\n
  25. A way to access big data sets\n\n
  26. A simple way to write parallel programs &amp;#x2013; everyone will have to\n \n\n
  27. Very R-like, building on the functional characteristics of R\n\n
  28. Just a library&amp;#xA0;\n
  29. \n Much simpler than writing Java\n Not as simple as Hive, Pig at what they do, but more general\n Great for prototyping, can transition to production -- optimize instead of rewriting! Lower risk, always executable.\n
  30. \n Much simpler than writing Java\n Not as simple as Hive, Pig at what they do, but more general\n Great for prototyping, can transition to production -- optimize instead of rewriting! Lower risk, always executable.\n
  31. \n Much simpler than writing Java\n Not as simple as Hive, Pig at what they do, but more general\n Great for prototyping, can transition to production -- optimize instead of rewriting! Lower risk, always executable.\n
  32. \n Much simpler than writing Java\n Not as simple as Hive, Pig at what they do, but more general\n Great for prototyping, can transition to production -- optimize instead of rewriting! Lower risk, always executable.\n
  33. \n Much simpler than writing Java\n Not as simple as Hive, Pig at what they do, but more general\n Great for prototyping, can transition to production -- optimize instead of rewriting! Lower risk, always executable.\n
  34. \n Much simpler than writing Java\n Not as simple as Hive, Pig at what they do, but more general\n Great for prototyping, can transition to production -- optimize instead of rewriting! Lower risk, always executable.\n
  35. \n Much simpler than writing Java\n Not as simple as Hive, Pig at what they do, but more general\n Great for prototyping, can transition to production -- optimize instead of rewriting! Lower risk, always executable.\n
  36. mapreduce first an most important element of API\ninput can be as simple as a path\noutput the same or skip for managed space with stubs\nmap reduce simple R functions as opposed to Rhipe\n
  37. mapreduce first an most important element of API\ninput can be as simple as a path\noutput the same or skip for managed space with stubs\nmap reduce simple R functions as opposed to Rhipe\n
  38. mapreduce first an most important element of API\ninput can be as simple as a path\noutput the same or skip for managed space with stubs\nmap reduce simple R functions as opposed to Rhipe\n
  39. mapreduce first an most important element of API\ninput can be as simple as a path\noutput the same or skip for managed space with stubs\nmap reduce simple R functions as opposed to Rhipe\n
  40. mapreduce first an most important element of API\ninput can be as simple as a path\noutput the same or skip for managed space with stubs\nmap reduce simple R functions as opposed to Rhipe\n
  41. mapreduce first an most important element of API\ninput can be as simple as a path\noutput the same or skip for managed space with stubs\nmap reduce simple R functions as opposed to Rhipe\n
  42. mapreduce first an most important element of API\ninput can be as simple as a path\noutput the same or skip for managed space with stubs\nmap reduce simple R functions as opposed to Rhipe\n
  43. mapreduce first an most important element of API\ninput can be as simple as a path\noutput the same or skip for managed space with stubs\nmap reduce simple R functions as opposed to Rhipe\n
  44. simple map example -- filtering\nreduce example -- counting\n
  45. simple map example -- filtering\nreduce example -- counting\n
  46. simple map example -- filtering\nreduce example -- counting\n
  47. simple map example -- filtering\nreduce example -- counting\n
  48. simple map example -- filtering\nreduce example -- counting\n
  49. simple map example -- filtering\nreduce example -- counting\n
  50. simple map example -- filtering\nreduce example -- counting\n
  51. simple map example -- filtering\nreduce example -- counting\n
  52. simple map example -- filtering\nreduce example -- counting\n
  53. simple map example -- filtering\nreduce example -- counting\n
  54. simple map example -- filtering\nreduce example -- counting\n
  55. simple map example -- filtering\nreduce example -- counting\n
  56. simple map example -- filtering\nreduce example -- counting\n
  57. easy to parametrize jobs\n
  58. easy to parametrize jobs\n
  59. easy to parametrize jobs\n
  60. easy to parametrize jobs\n
  61. easy to parametrize jobs\n
  62. easy to parametrize jobs\n
  63. second pillar of API, the memory-hdfs bridge\n
  64. A language like HIVE makes a class of problems easy to solve, but it is not a general tool\n The cost of doing the same operation in rmr is modest and it provides a broader set of capabilities\n
  65. A language like HIVE makes a class of problems easy to solve, but it is not a general tool\n The cost of doing the same operation in rmr is modest and it provides a broader set of capabilities\n
  66. kmeans implementation in two simple functions\nnote how easy it is to get data in and out of the cluster\n
  67. kmeans implementation in two simple functions\nnote how easy it is to get data in and out of the cluster\n
  68. kmeans implementation in two simple functions\nnote how easy it is to get data in and out of the cluster\n
  69. kmeans implementation in two simple functions\nnote how easy it is to get data in and out of the cluster\n
  70. skip quickly to other slides\nnotice three different languages\n
  71. \n
  72. \n
  73. more things you can do combining the elements of the API\n
  74. more things you can do combining the elements of the API\n
  75. more things you can do combining the elements of the API\n
  76. \n
  77. \n
  78. \n
  79. \n
  80. \n
  81. \n
  82. \n
  83. \n
  84. \n
  85. \n
  86. \n
  87. \n
  88. \n