SlideShare una empresa de Scribd logo
1 de 16
R:
    apply
Fisher
    sesejun@is.ocha.ac.jp
         2009/11/19
USPS
ImageName     Class   0,0   0,1   0,2   0,3   0,4
img_2_00_02   1       0     0     0     0     0
img_2_00_03   1       0     38    22    0     0
img_2_00_05   1       13    0     64    13    42
...
img_0_00_09   -1      34    53    0     38    0
img_0_00_28   -1      0     64    0     98    93
img_0_01_08   -1      13    0     0     59    13
img_0_03_05   -1      34    34    0     0     0
img_3_29_25   img_5_03_31   img_3_06_30   img_3_17_08
k-NN
Apply Family
•                                  ,      ,
    •   for
    •
    apply(X, 1,        )

    apply(X, 2,        )

apply(X, c(1,2),           )

    lapply(X,      )
                                                 dataframe


    sapply(X,      )
                                                   table

     sweep(X, M,V)             X       (M=1)   (M=2)         (M=c(1,2))   V
> m <- matrix((1:9)**2, nrow=3)   > l <- list(a=1:3, b=4:6)
> m                               > l
     [,1] [,2] [,3]               $a
[1,]    1   16   49               [1] 1 2 3
[2,]    4   25   64
[3,]    9   36   81               $b
> apply(m, 1, sum)                [1] 4 5 6
[1] 66 93 126
> apply(m, 2, sum)                > lapply(l, sum)
[1] 14 77 194                     $a
> apply(m, c(1,2), sqrt)          [1] 6
     [,1] [,2] [,3]
[1,]    1    4     7              $b
[2,]    2    5     8              [1] 15
[3,]    3    6     9
                                  > sapply(l, sum)
                                   a b
                                   6 15
K-NN
 •
> iris.train <- read.table("iris_train.csv", sep=",", header=T)
> iris.test <- read.table("iris_test.csv", sep=",", header=T)

> q <- iris.test[1,1:4]

> diff <- sweep(iris.train[1:4], 2, t(q))

> diff * diff

> distquery <- apply(diff * diff, 1, sum)

> sort(distquery)

> order(distquery)
1

> iris.train[order(distquery)[1:5],]

> iris.train[order(distquery)[1:5],]$Class

> knnclasses <- table(iris.train[order(distquery)[1:5],]$Class)

> as.factor(table(knnclasses)

> sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T)

> labels(sortedtable)[1]

> predclass <- labels(sortedtable)[1]

> predclass == iris.test$Class[1]
>   knnpredict <- function(train,class,query,k) {
+   diff <- sweep(train,2,query)
+   distquery <- apply(diff * diff, 1, sum)
+   knnclasses <- class[order(distquery)[1:k]]
+   sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T)
+   labels(sortedtable)[1]
+   }

> knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[1,1:4]),
5)

> knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[10,1:4]),
1)


> for (i in 1:length(rownames(iris.test))) {
+ pred <- knnpredict(iris.train[1:4], iris.train$Class,
t(iris.test[i,1:4]),10)
+ result <- pred == iris.test[i,]$Class
+ cat(paste(pred, iris.test[i,]$Class, result, sep="t"))
+ cat("n")
+ }
> resvec <- c()
> for (i in 1:30) {
+ pred <- knnpredict(iris.train[1:4], iris.train$Class,
t(iris.test[i,1:4]),10)
+ resvec <- append(resvec, pred == iris.test[i,]$Class)
+ }
> sum(resvec)/length(resvec)
SVM
SVM
> iris.train <- read.table("iris_train.csv", sep=",", header=T)
> iris.test <- read.table("iris_test.csv", sep=",", header=T)

> library("e1071")

> iris.model <- svm(iris.train[1:4], iris.train$Class)

> iris.pred <- predict(iris.model, iris.test[1:4])

> table(iris.pred, iris.test$Class)

iris.pred         Iris-setosa Iris-versicolor Iris-virginica
  Iris-setosa               7               0              0
  Iris-versicolor           0               9              0
  Iris-virginica            0               0             14
> iris.model <- svm(iris.train[1:4], iris.train$Class, kernel=”linear”)

> iris.pred <- predict(iris.model, iris.test[1:4])

> table(iris.pred, iris.test$Class)

iris.pred         Iris-setosa Iris-versicolor Iris-virginica
  Iris-setosa               7               0              0
  Iris-versicolor           0               9              0
  Iris-virginica            0               0             14
1. IRIS
                                                   3
  1. IRIS                              4    ("Sepal.length","Sepal.width",
     "Petal.length","Petal.width")


  2. IRIS
                                                           K-NN


2. USPS
  1. USPS                            5-NN                        (0-9)


  2. K-NN      K


  3. USPS                            SVM                radial

Más contenido relacionado

La actualidad más candente

Data Clustering with R
Data Clustering with RData Clustering with R
Data Clustering with RYanchang Zhao
 
Optics with monocle - Modeling the part and the whole
Optics with monocle - Modeling the part and the wholeOptics with monocle - Modeling the part and the whole
Optics with monocle - Modeling the part and the wholeIlan Godik
 
Data manipulation on r
Data manipulation on rData manipulation on r
Data manipulation on rAbhik Seal
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyrRomain Francois
 
Clustering and Visualisation using R programming
Clustering and Visualisation using R programmingClustering and Visualisation using R programming
Clustering and Visualisation using R programmingNixon Mendez
 
Grouping & Summarizing Data in R
Grouping & Summarizing Data in RGrouping & Summarizing Data in R
Grouping & Summarizing Data in RJeffrey Breen
 
24 double integral over polar coordinate
24 double integral over polar coordinate24 double integral over polar coordinate
24 double integral over polar coordinatemath267
 
Final Exam Review (Integration)
Final Exam Review (Integration)Final Exam Review (Integration)
Final Exam Review (Integration)Matthew Leingang
 
Parabola direction , vertex ,roots, minimum and maximum
Parabola direction , vertex ,roots, minimum and maximumParabola direction , vertex ,roots, minimum and maximum
Parabola direction , vertex ,roots, minimum and maximumNadeem Uddin
 
Table of Useful R commands.
Table of Useful R commands.Table of Useful R commands.
Table of Useful R commands.Dr. Volkan OBAN
 
Lesson 8: Curves, Arc Length, Acceleration
Lesson 8: Curves, Arc Length, AccelerationLesson 8: Curves, Arc Length, Acceleration
Lesson 8: Curves, Arc Length, AccelerationMatthew Leingang
 
Python Seaborn Data Visualization
Python Seaborn Data Visualization Python Seaborn Data Visualization
Python Seaborn Data Visualization Sourabh Sahu
 
Send + More = Money – Let’s mash 2 monads to solve a simple CSP
Send + More = Money – Let’s mash 2 monads to solve a simple CSPSend + More = Money – Let’s mash 2 monads to solve a simple CSP
Send + More = Money – Let’s mash 2 monads to solve a simple CSPFilippo Vitale
 

La actualidad más candente (20)

Data Clustering with R
Data Clustering with RData Clustering with R
Data Clustering with R
 
Beyond Scala Lens
Beyond Scala LensBeyond Scala Lens
Beyond Scala Lens
 
R seminar dplyr package
R seminar dplyr packageR seminar dplyr package
R seminar dplyr package
 
Optics with monocle - Modeling the part and the whole
Optics with monocle - Modeling the part and the wholeOptics with monocle - Modeling the part and the whole
Optics with monocle - Modeling the part and the whole
 
Data manipulation on r
Data manipulation on rData manipulation on r
Data manipulation on r
 
Jacobson Theorem
Jacobson TheoremJacobson Theorem
Jacobson Theorem
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyr
 
Clustering and Visualisation using R programming
Clustering and Visualisation using R programmingClustering and Visualisation using R programming
Clustering and Visualisation using R programming
 
Grouping & Summarizing Data in R
Grouping & Summarizing Data in RGrouping & Summarizing Data in R
Grouping & Summarizing Data in R
 
19 tables
19 tables19 tables
19 tables
 
24 double integral over polar coordinate
24 double integral over polar coordinate24 double integral over polar coordinate
24 double integral over polar coordinate
 
Final Exam Review (Integration)
Final Exam Review (Integration)Final Exam Review (Integration)
Final Exam Review (Integration)
 
Parabola direction , vertex ,roots, minimum and maximum
Parabola direction , vertex ,roots, minimum and maximumParabola direction , vertex ,roots, minimum and maximum
Parabola direction , vertex ,roots, minimum and maximum
 
08 functions
08 functions08 functions
08 functions
 
Ese563
Ese563 Ese563
Ese563
 
maths basics
maths basicsmaths basics
maths basics
 
Table of Useful R commands.
Table of Useful R commands.Table of Useful R commands.
Table of Useful R commands.
 
Lesson 8: Curves, Arc Length, Acceleration
Lesson 8: Curves, Arc Length, AccelerationLesson 8: Curves, Arc Length, Acceleration
Lesson 8: Curves, Arc Length, Acceleration
 
Python Seaborn Data Visualization
Python Seaborn Data Visualization Python Seaborn Data Visualization
Python Seaborn Data Visualization
 
Send + More = Money – Let’s mash 2 monads to solve a simple CSP
Send + More = Money – Let’s mash 2 monads to solve a simple CSPSend + More = Money – Let’s mash 2 monads to solve a simple CSP
Send + More = Money – Let’s mash 2 monads to solve a simple CSP
 

Destacado

20110214nips2010 read
20110214nips2010 read20110214nips2010 read
20110214nips2010 readsesejun
 
bioinfolec_9th_20071019
bioinfolec_9th_20071019bioinfolec_9th_20071019
bioinfolec_9th_20071019sesejun
 
Datamining 7th kmeans
Datamining 7th kmeansDatamining 7th kmeans
Datamining 7th kmeanssesejun
 
Ohp Seijoen H20 01 Programming No Nagare
Ohp Seijoen H20 01 Programming No NagareOhp Seijoen H20 01 Programming No Nagare
Ohp Seijoen H20 01 Programming No Nagaresesejun
 
bioinfolec_3rd_20070629
bioinfolec_3rd_20070629bioinfolec_3rd_20070629
bioinfolec_3rd_20070629sesejun
 
Extreme Web Performance for Mobile Devices - Velocity NY
Extreme Web Performance for Mobile Devices - Velocity NYExtreme Web Performance for Mobile Devices - Velocity NY
Extreme Web Performance for Mobile Devices - Velocity NYMaximiliano Firtman
 
Probabilistic data structures. Part 4. Similarity
Probabilistic data structures. Part 4. SimilarityProbabilistic data structures. Part 4. Similarity
Probabilistic data structures. Part 4. SimilarityAndrii Gakhov
 
Pecha Kucha: Ukrainian Food Traditions
Pecha Kucha: Ukrainian Food TraditionsPecha Kucha: Ukrainian Food Traditions
Pecha Kucha: Ukrainian Food TraditionsAndrii Gakhov
 
Datamining 3rd naivebayes
Datamining 3rd naivebayesDatamining 3rd naivebayes
Datamining 3rd naivebayessesejun
 
Consistent hashing
Consistent hashingConsistent hashing
Consistent hashingzroger
 
20110524zurichngs 1st pub
20110524zurichngs 1st pub20110524zurichngs 1st pub
20110524zurichngs 1st pubsesejun
 
A Gentle Introduction to Locality Sensitive Hashing with Apache Spark
A Gentle Introduction to Locality Sensitive Hashing with Apache SparkA Gentle Introduction to Locality Sensitive Hashing with Apache Spark
A Gentle Introduction to Locality Sensitive Hashing with Apache SparkFrançois Garillot
 
Data Mining - lecture 6 - 2014
Data Mining - lecture 6 - 2014Data Mining - lecture 6 - 2014
Data Mining - lecture 6 - 2014Andrii Gakhov
 

Destacado (14)

20110214nips2010 read
20110214nips2010 read20110214nips2010 read
20110214nips2010 read
 
080811
080811080811
080811
 
bioinfolec_9th_20071019
bioinfolec_9th_20071019bioinfolec_9th_20071019
bioinfolec_9th_20071019
 
Datamining 7th kmeans
Datamining 7th kmeansDatamining 7th kmeans
Datamining 7th kmeans
 
Ohp Seijoen H20 01 Programming No Nagare
Ohp Seijoen H20 01 Programming No NagareOhp Seijoen H20 01 Programming No Nagare
Ohp Seijoen H20 01 Programming No Nagare
 
bioinfolec_3rd_20070629
bioinfolec_3rd_20070629bioinfolec_3rd_20070629
bioinfolec_3rd_20070629
 
Extreme Web Performance for Mobile Devices - Velocity NY
Extreme Web Performance for Mobile Devices - Velocity NYExtreme Web Performance for Mobile Devices - Velocity NY
Extreme Web Performance for Mobile Devices - Velocity NY
 
Probabilistic data structures. Part 4. Similarity
Probabilistic data structures. Part 4. SimilarityProbabilistic data structures. Part 4. Similarity
Probabilistic data structures. Part 4. Similarity
 
Pecha Kucha: Ukrainian Food Traditions
Pecha Kucha: Ukrainian Food TraditionsPecha Kucha: Ukrainian Food Traditions
Pecha Kucha: Ukrainian Food Traditions
 
Datamining 3rd naivebayes
Datamining 3rd naivebayesDatamining 3rd naivebayes
Datamining 3rd naivebayes
 
Consistent hashing
Consistent hashingConsistent hashing
Consistent hashing
 
20110524zurichngs 1st pub
20110524zurichngs 1st pub20110524zurichngs 1st pub
20110524zurichngs 1st pub
 
A Gentle Introduction to Locality Sensitive Hashing with Apache Spark
A Gentle Introduction to Locality Sensitive Hashing with Apache SparkA Gentle Introduction to Locality Sensitive Hashing with Apache Spark
A Gentle Introduction to Locality Sensitive Hashing with Apache Spark
 
Data Mining - lecture 6 - 2014
Data Mining - lecture 6 - 2014Data Mining - lecture 6 - 2014
Data Mining - lecture 6 - 2014
 

Similar a Datamining R 4th

Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavVyacheslav Arbuzov
 
PRE: Datamining 2nd R
PRE: Datamining 2nd RPRE: Datamining 2nd R
PRE: Datamining 2nd Rsesejun
 
Datamining R 1st
Datamining R 1stDatamining R 1st
Datamining R 1stsesejun
 
Datamining r 1st
Datamining r 1stDatamining r 1st
Datamining r 1stsesejun
 
An Introduction into Anomaly Detection Using CUSUM
An Introduction into Anomaly Detection Using CUSUMAn Introduction into Anomaly Detection Using CUSUM
An Introduction into Anomaly Detection Using CUSUMDominik Dahlem
 
Useful javascript
Useful javascriptUseful javascript
Useful javascriptLei Kang
 
Chapter 04-discriminant analysis
Chapter 04-discriminant analysisChapter 04-discriminant analysis
Chapter 04-discriminant analysisRaman Kannan
 
[1062BPY12001] Data analysis with R / week 2
[1062BPY12001] Data analysis with R / week 2[1062BPY12001] Data analysis with R / week 2
[1062BPY12001] Data analysis with R / week 2Kevin Chun-Hsien Hsu
 
Cloudera - A Taste of random decision forests
Cloudera - A Taste of random decision forestsCloudera - A Taste of random decision forests
Cloudera - A Taste of random decision forestsDataconomy Media
 
R Workshop for Beginners
R Workshop for BeginnersR Workshop for Beginners
R Workshop for BeginnersMetamarkets
 
Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...
Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...
Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...Databricks
 
Datastructure tree
Datastructure treeDatastructure tree
Datastructure treerantd
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Dr. Volkan OBAN
 
Артём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisАртём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisSpbDotNet Community
 
Python 101 language features and functional programming
Python 101 language features and functional programmingPython 101 language features and functional programming
Python 101 language features and functional programmingLukasz Dynowski
 
PHP and MySQL Tips and tricks, DC 2007
PHP and MySQL Tips and tricks, DC 2007PHP and MySQL Tips and tricks, DC 2007
PHP and MySQL Tips and tricks, DC 2007Damien Seguy
 

Similar a Datamining R 4th (20)

R
RR
R
 
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
 
PRE: Datamining 2nd R
PRE: Datamining 2nd RPRE: Datamining 2nd R
PRE: Datamining 2nd R
 
Datamining R 1st
Datamining R 1stDatamining R 1st
Datamining R 1st
 
Datamining r 1st
Datamining r 1stDatamining r 1st
Datamining r 1st
 
An Introduction into Anomaly Detection Using CUSUM
An Introduction into Anomaly Detection Using CUSUMAn Introduction into Anomaly Detection Using CUSUM
An Introduction into Anomaly Detection Using CUSUM
 
Useful javascript
Useful javascriptUseful javascript
Useful javascript
 
R and data mining
R and data miningR and data mining
R and data mining
 
Chapter 04-discriminant analysis
Chapter 04-discriminant analysisChapter 04-discriminant analysis
Chapter 04-discriminant analysis
 
Test (S) on R
Test (S) on RTest (S) on R
Test (S) on R
 
[1062BPY12001] Data analysis with R / week 2
[1062BPY12001] Data analysis with R / week 2[1062BPY12001] Data analysis with R / week 2
[1062BPY12001] Data analysis with R / week 2
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
Cloudera - A Taste of random decision forests
Cloudera - A Taste of random decision forestsCloudera - A Taste of random decision forests
Cloudera - A Taste of random decision forests
 
R Workshop for Beginners
R Workshop for BeginnersR Workshop for Beginners
R Workshop for Beginners
 
Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...
Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...
Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...
 
Datastructure tree
Datastructure treeDatastructure tree
Datastructure tree
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
 
Артём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisАртём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data Analysis
 
Python 101 language features and functional programming
Python 101 language features and functional programmingPython 101 language features and functional programming
Python 101 language features and functional programming
 
PHP and MySQL Tips and tricks, DC 2007
PHP and MySQL Tips and tricks, DC 2007PHP and MySQL Tips and tricks, DC 2007
PHP and MySQL Tips and tricks, DC 2007
 

Más de sesejun

RNAseqによる変動遺伝子抽出の統計: A Review
RNAseqによる変動遺伝子抽出の統計: A ReviewRNAseqによる変動遺伝子抽出の統計: A Review
RNAseqによる変動遺伝子抽出の統計: A Reviewsesejun
 
バイオインフォマティクスによる遺伝子発現解析
バイオインフォマティクスによる遺伝子発現解析バイオインフォマティクスによる遺伝子発現解析
バイオインフォマティクスによる遺伝子発現解析sesejun
 
次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習sesejun
 
20110602labseminar pub
20110602labseminar pub20110602labseminar pub
20110602labseminar pubsesejun
 
20110524zurichngs 2nd pub
20110524zurichngs 2nd pub20110524zurichngs 2nd pub
20110524zurichngs 2nd pubsesejun
 
Datamining 9th association_rule.key
Datamining 9th association_rule.keyDatamining 9th association_rule.key
Datamining 9th association_rule.keysesejun
 
Datamining 8th hclustering
Datamining 8th hclusteringDatamining 8th hclustering
Datamining 8th hclusteringsesejun
 
Datamining r 3rd
Datamining r 3rdDatamining r 3rd
Datamining r 3rdsesejun
 
Datamining r 2nd
Datamining r 2ndDatamining r 2nd
Datamining r 2ndsesejun
 
Datamining 6th svm
Datamining 6th svmDatamining 6th svm
Datamining 6th svmsesejun
 
Datamining 5th knn
Datamining 5th knnDatamining 5th knn
Datamining 5th knnsesejun
 
Datamining 4th adaboost
Datamining 4th adaboostDatamining 4th adaboost
Datamining 4th adaboostsesejun
 
Datamining 2nd decisiontree
Datamining 2nd decisiontreeDatamining 2nd decisiontree
Datamining 2nd decisiontreesesejun
 
100401 Bioinfoinfra
100401 Bioinfoinfra100401 Bioinfoinfra
100401 Bioinfoinfrasesejun
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclusteringsesejun
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rulesesejun
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rulesesejun
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclusteringsesejun
 
Datamining 7th Kmeans
Datamining 7th KmeansDatamining 7th Kmeans
Datamining 7th Kmeanssesejun
 
Datamining 6th Svm
Datamining 6th SvmDatamining 6th Svm
Datamining 6th Svmsesejun
 

Más de sesejun (20)

RNAseqによる変動遺伝子抽出の統計: A Review
RNAseqによる変動遺伝子抽出の統計: A ReviewRNAseqによる変動遺伝子抽出の統計: A Review
RNAseqによる変動遺伝子抽出の統計: A Review
 
バイオインフォマティクスによる遺伝子発現解析
バイオインフォマティクスによる遺伝子発現解析バイオインフォマティクスによる遺伝子発現解析
バイオインフォマティクスによる遺伝子発現解析
 
次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習
 
20110602labseminar pub
20110602labseminar pub20110602labseminar pub
20110602labseminar pub
 
20110524zurichngs 2nd pub
20110524zurichngs 2nd pub20110524zurichngs 2nd pub
20110524zurichngs 2nd pub
 
Datamining 9th association_rule.key
Datamining 9th association_rule.keyDatamining 9th association_rule.key
Datamining 9th association_rule.key
 
Datamining 8th hclustering
Datamining 8th hclusteringDatamining 8th hclustering
Datamining 8th hclustering
 
Datamining r 3rd
Datamining r 3rdDatamining r 3rd
Datamining r 3rd
 
Datamining r 2nd
Datamining r 2ndDatamining r 2nd
Datamining r 2nd
 
Datamining 6th svm
Datamining 6th svmDatamining 6th svm
Datamining 6th svm
 
Datamining 5th knn
Datamining 5th knnDatamining 5th knn
Datamining 5th knn
 
Datamining 4th adaboost
Datamining 4th adaboostDatamining 4th adaboost
Datamining 4th adaboost
 
Datamining 2nd decisiontree
Datamining 2nd decisiontreeDatamining 2nd decisiontree
Datamining 2nd decisiontree
 
100401 Bioinfoinfra
100401 Bioinfoinfra100401 Bioinfoinfra
100401 Bioinfoinfra
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclustering
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rule
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rule
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclustering
 
Datamining 7th Kmeans
Datamining 7th KmeansDatamining 7th Kmeans
Datamining 7th Kmeans
 
Datamining 6th Svm
Datamining 6th SvmDatamining 6th Svm
Datamining 6th Svm
 

Datamining R 4th

  • 1. R: apply Fisher sesejun@is.ocha.ac.jp 2009/11/19
  • 3. ImageName Class 0,0 0,1 0,2 0,3 0,4 img_2_00_02 1 0 0 0 0 0 img_2_00_03 1 0 38 22 0 0 img_2_00_05 1 13 0 64 13 42 ... img_0_00_09 -1 34 53 0 38 0 img_0_00_28 -1 0 64 0 98 93 img_0_01_08 -1 13 0 0 59 13 img_0_03_05 -1 34 34 0 0 0
  • 4.
  • 5. img_3_29_25 img_5_03_31 img_3_06_30 img_3_17_08
  • 7. Apply Family • , , • for • apply(X, 1, ) apply(X, 2, ) apply(X, c(1,2), ) lapply(X, ) dataframe sapply(X, ) table sweep(X, M,V) X (M=1) (M=2) (M=c(1,2)) V
  • 8. > m <- matrix((1:9)**2, nrow=3) > l <- list(a=1:3, b=4:6) > m > l [,1] [,2] [,3] $a [1,] 1 16 49 [1] 1 2 3 [2,] 4 25 64 [3,] 9 36 81 $b > apply(m, 1, sum) [1] 4 5 6 [1] 66 93 126 > apply(m, 2, sum) > lapply(l, sum) [1] 14 77 194 $a > apply(m, c(1,2), sqrt) [1] 6 [,1] [,2] [,3] [1,] 1 4 7 $b [2,] 2 5 8 [1] 15 [3,] 3 6 9 > sapply(l, sum) a b 6 15
  • 9. K-NN • > iris.train <- read.table("iris_train.csv", sep=",", header=T) > iris.test <- read.table("iris_test.csv", sep=",", header=T) > q <- iris.test[1,1:4] > diff <- sweep(iris.train[1:4], 2, t(q)) > diff * diff > distquery <- apply(diff * diff, 1, sum) > sort(distquery) > order(distquery)
  • 10. 1 > iris.train[order(distquery)[1:5],] > iris.train[order(distquery)[1:5],]$Class > knnclasses <- table(iris.train[order(distquery)[1:5],]$Class) > as.factor(table(knnclasses) > sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T) > labels(sortedtable)[1] > predclass <- labels(sortedtable)[1] > predclass == iris.test$Class[1]
  • 11. > knnpredict <- function(train,class,query,k) { + diff <- sweep(train,2,query) + distquery <- apply(diff * diff, 1, sum) + knnclasses <- class[order(distquery)[1:k]] + sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T) + labels(sortedtable)[1] + } > knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[1,1:4]), 5) > knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[10,1:4]), 1) > for (i in 1:length(rownames(iris.test))) { + pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[i,1:4]),10) + result <- pred == iris.test[i,]$Class + cat(paste(pred, iris.test[i,]$Class, result, sep="t")) + cat("n") + }
  • 12. > resvec <- c() > for (i in 1:30) { + pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[i,1:4]),10) + resvec <- append(resvec, pred == iris.test[i,]$Class) + } > sum(resvec)/length(resvec)
  • 13. SVM
  • 14. SVM > iris.train <- read.table("iris_train.csv", sep=",", header=T) > iris.test <- read.table("iris_test.csv", sep=",", header=T) > library("e1071") > iris.model <- svm(iris.train[1:4], iris.train$Class) > iris.pred <- predict(iris.model, iris.test[1:4]) > table(iris.pred, iris.test$Class) iris.pred Iris-setosa Iris-versicolor Iris-virginica Iris-setosa 7 0 0 Iris-versicolor 0 9 0 Iris-virginica 0 0 14
  • 15. > iris.model <- svm(iris.train[1:4], iris.train$Class, kernel=”linear”) > iris.pred <- predict(iris.model, iris.test[1:4]) > table(iris.pred, iris.test$Class) iris.pred Iris-setosa Iris-versicolor Iris-virginica Iris-setosa 7 0 0 Iris-versicolor 0 9 0 Iris-virginica 0 0 14
  • 16. 1. IRIS 3 1. IRIS 4 ("Sepal.length","Sepal.width", "Petal.length","Petal.width") 2. IRIS K-NN 2. USPS 1. USPS 5-NN (0-9) 2. K-NN K 3. USPS SVM radial