SlideShare una empresa de Scribd logo
1 de 49
Descargar para leer sin conexión
R
    Tsukuba.R #9 (2011/11/12)
                    @a_bicky
• Takeshi Arabiki                         1

    ‣ Twitter: @a_bicky
    ‣        : id:a_bicky

•
                              R

•
                http://d.hatena.ne.jp/a_bicky/
• Takeshi Arabiki                          1

    ‣ Twitter: @a_bicky
    ‣        : id:a_bicky

•
                              R        SciPy



•
                http://d.hatena.ne.jp/a_bicky/
Osaka.R #4                                     Tokyo.R #16




http://www.slideshare.net/abicky/twitterr   http://www.slideshare.net/abicky/r-9034336
•
•             R                    8   ,9

•
•
•
•

    http://www.amazon.co.jp/gp/product/4431712186
reshape2
> install.packages("reshape2")
> library(reshape2)
> head(tips) #
  total_bill tip     sex smoker   day     time size
1      16.99 1.01 Female     No   Sun   Dinner    2
2      10.34 1.66   Male     No   Sun   Dinner    3
3      21.01 3.50   Male     No   Sun   Dinner    3
4      23.68 3.31   Male     No   Sun   Dinner    2
5      24.59 3.61 Female     No   Sun   Dinner    4
6      25.29 4.71   Male     No   Sun   Dinner    4
tips

  total_bill:
  tip:
  sex:          Male, Female
  smoker:                             Yes, No
  day:          Thur, Fri, Sat, Sun
  time:             Lunch, Dinner
  size:
•
•
    •   subset
    •   cbind, [, $, [[
    •   transform, within
•
    •   subset
    •   cbind, [, $, [[
    •   transform, within
•
•                           order
•
> class(tips)
[1] "data.frame"
> mode(tips)     # data.frame list
[1] "list"
> head(tips[["total_bill"]])    # list
[1] 16.99 10.34 21.01 23.68 24.59 25.29
> head(tips$total_bill)         #
[1] 16.99 10.34 21.01 23.68 24.59 25.29
> head(tips["total_bill"])      #       data.frame
  total_bill
1      16.99
2      10.34
3      21.01
4      23.68
5      24.59
6      25.29
> head(tips[c("total_bill", "tip")]) #
  total_bill tip
1      16.99 1.01
2      10.34 1.66
3      21.01 3.50
4      23.68 3.31
5      24.59 3.61
6      25.29 4.71
> head(tips[[c("total_bill", "tip")]]) #
Error in .subset2(x, i, exact = exact) : subscript out of bounds
> tips[[c(1, 2)]] # tips[[1]][[2]]
[1] 10.34
> tips[1:2, 1:2]    #
  total_bill tip
1      16.99 1.01
2      10.34 1.66
> tips[1:2, c("total_bill", "tip")]   #
  total_bill tip
1      16.99 1.01
2      10.34 1.66
> head(tips[-(1:2), -(1:2)])    #
     sex smoker day   time size
3   Male     No Sun Dinner    3
4   Male     No Sun Dinner    2
5 Female     No Sun Dinner    4
6   Male     No Sun Dinner    4
7   Male     No Sun Dinner    2
8   Male     No Sun Dinner    4
subset


> args(subset.data.frame)
function (x, subset, select, drop = FALSE, ...)
NULL
> (tips.vip <- subset(tips, total_bill > 30 & size == 2))
     total_bill tip sex smoker day     time size
84        32.68 5.00 Male   Yes Thur Lunch      2
174       31.85 3.18 Male   Yes Sun Dinner      2
176       32.90 3.11 Male   Yes Sun Dinner      2
180       34.63 3.55 Male   Yes Sun Dinner      2
185       40.55 3.00 Male   Yes Sun Dinner      2
238       32.83 1.17 Male   Yes Sat Dinner      2
> levels(tips.vip$smoker) #
[1] "No" "Yes"
> levels(droplevels(tips.vip)$smoker)   #
[1] "Yes"
cbind, [, $, [[


> head(cbind(tips, type = ifelse(tips$tip < 2, "         ", "   ")), 3)
  total_bill tip     sex smoker day   time size     type
1      16.99 1.01 Female     No Sun Dinner    2
2      10.34 1.66   Male     No Sun Dinner      3
3      21.01 3.50   Male     No Sun Dinner      3
> tips$type <- ifelse(tips$tip < 2, "    ", "       ")
> head(tips, 3)
  total_bill tip     sex smoker day   time size     type
1      16.99 1.01 Female     No Sun Dinner    2
2      10.34 1.66   Male     No Sun Dinner      3
3      21.01 3.50   Male     No Sun Dinner      3
> data(tips)   #
transform, within

> args(transform.data.frame)
function (`_data`, ...)
NULL
> head(transform(tips, type = ifelse(tips$tip < 2, "         ", "   ")), 3)
  total_bill tip     sex smoker day   time size       type
1      16.99 1.01 Female     No Sun Dinner    2
2      10.34 1.66   Male     No Sun Dinner        3
3      21.01 3.50   Male     No Sun Dinner        3
> args(within.data.frame)
function (data, expr, ...)
NULL
> head(within(tips, { type <- c() # within
+                     type[tip < 2] <- "      "
+                     type[tip >= 2] <- "    " }), 3)
  total_bill tip     sex smoker day   time size       type
1      16.99 1.01 Female     No Sun Dinner    2
2      10.34 1.66   Male     No Sun Dinner        3
3      21.01 3.50   Male     No Sun Dinner        3
subset

> # subset
> head(subset(tips, select   = c(tip, sex, smoker)), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = 2:4), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = -c(total_bill, size, time, day)), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = -c(1, 5:7)), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = c(tip:smoker)), 1)
   tip    sex smoker
1 1.01 Female     No
> head(subset(tips, select   = -c(total_bill, day:size)), 1)
   tip    sex smoker
1 1.01 Female     No
[, $, [[

> # NULL
> tips$size <- NULL
> head(tips, 3)
  total_bill tip      sex smoker day   time
1      16.99 1.01 Female      No Sun Dinner
2      10.34 1.66    Male     No Sun Dinner
3      21.01 3.50    Male     No Sun Dinner
> tips[["time"]] <- NULL
> head(tips, 3)
  total_bill tip      sex smoker day
1      16.99 1.01 Female      No Sun
2      10.34 1.66    Male     No Sun
3      21.01 3.50    Male     No Sun
> tips["day"] <- NULL; tips[1] <- NULL
> head(tips, 3)
   tip    sex smoker
1 1.01 Female     No
2 1.66   Male     No
3 3.50   Male     No
> data(tips)
transform, within


> # NULL
> head(transform(tips, total_bill = NULL, size = NULL, time = NULL, day =
NULL), 3)
   tip     sex smoker
1 1.01 Female      No
2 1.66    Male     No
3 3.50    Male     No
> # rm
> head(within(tips, rm(total_bill, size, time, day)), 3)
   tip     sex smoker
1 1.01 Female      No
2 1.66    Male     No
3 3.50    Male     No
> head(transform(tips, tip = 10), 3)
  total_bill tip    sex smoker day   time size
1      16.99 10 Female      No Sun Dinner    2
2      10.34 10    Male     No Sun Dinner    3
3      21.01 10    Male     No Sun Dinner    3
> head(within(tips, tip <- 10), 3)
  total_bill tip    sex smoker day   time size
1      16.99 10 Female      No Sun Dinner    2
2      10.34 10    Male     No Sun Dinner    3
3      21.01 10    Male     No Sun Dinner    3
> tips$tip <- 10
> head(tips, 3)
  total_bill tip    sex smoker day   time size
1      16.99 10 Female      No Sun Dinner    2
2      10.34 10    Male     No Sun Dinner    3
3      21.01 10    Male     No Sun Dinner    3
> data(tips)
order


> head(tips[order(tips$sex), ], 4) #
    total_bill tip      sex smoker day    time size
1        16.99 1.01 Female      No Sun Dinner     2
5        24.59 3.61 Female      No Sun Dinner     4
12       35.26 5.00 Female      No Sun Dinner     4
15       14.83 3.02 Female      No Sun Dinner     2
> head(tips[order(tips$sex, decreasing = TRUE), ], 4)   #
   total_bill tip sex smoker day       time size
2       10.34 1.66 Male      No Sun Dinner     3
3       21.01 3.50 Male      No Sun Dinner     3
4       23.68 3.31 Male      No Sun Dinner     2
6       25.29 4.71 Male      No Sun Dinner     4
> head(tips[order(tips$sex, tips$tip), ], 4) #
     total_bill tip      sex smoker day     time size
68         3.07 1.00 Female     Yes Sat Dinner      1
93         5.75 1.00 Female     Yes Fri Dinner      2
112        7.25 1.00 Female      No Sat Dinner      1
1         16.99 1.01 Female      No Sun Dinner      2
data.frame


> (tip <- data.frame(date = sample(seq(as.Date("2011-11-09"), by = "day", len = 4)),
+                   total_bill = sample(1:4 * 10),
+                   tip = sample(1:4)))
        date total_bill tip
1 2011-11-10         30   4
2 2011-11-12         40   2
3 2011-11-11         10   1
4 2011-11-09         20   3
> #
> tip <- tip[order(tip$date), ]
> transform(tip, total_bill = cumsum(total_bill), tip = cumsum(tip))
        date total_bill tip
4 2011-11-09         20   3
1 2011-11-10         50   7
3 2011-11-11         60   8
2 2011-11-12        100 10
> head(tips[c("tip", "total_bill", "sex", "size", "time", "day", "smoker")])
  tip total_bill    sex size   time day smoker
1 10       16.99 Female    2 Dinner Sun     No
2 10       10.34   Male    3 Dinner Sun     No
3 10       21.01   Male    3 Dinner Sun     No
4 10       23.68   Male    2 Dinner Sun     No
5 10       24.59 Female    4 Dinner Sun     No
6 10       25.29   Male    4 Dinner Sun     No
•
•   table
•   xtabs
•           aggregate
•           by
> args(colSums)
function (x, na.rm = FALSE, dims = 1L)
NULL
> colSums(subset(tips, select = c(total_bill, tip)), na.rm = TRUE)
total_bill         tip
   4827.77      731.58
> args(colMeans)
function (x, na.rm = FALSE, dims = 1L)
NULL
> colMeans(subset(tips, select = c(total_bill, tip)), na.rm = TRUE)
total_bill         tip
 19.785943   2.998279
> # apply                colSums
> apply(subset(tips, select = c(total_bill, tip)), 2, sum, na.rm = TRUE)
total_bill         tip
   4827.77      731.58
table

> args(table)
function (..., exclude = if    (useNA == "no") c(NA, NaN), useNA = c("no",
     "ifany", "always"), dnn   = list.names(...), deparse.level = 1)
NULL
> table(subset(tips, select    = c(sex, smoker)))
         smoker
sex       No Yes
  Female 54 33
  Male    97 60
>    # 4
> table(subset(tips, select    = c(sex, smoker, day, size)))
, , day = Fri, size = 1

        smoker
sex      No Yes
  Female 0     0
  Male    0    1
table

> args(addmargins)
function (A, margin = seq_along(dim(A)), FUN = sum, quiet = FALSE)
NULL
> #
> addmargins(table(subset(tips, select = c(sex, smoker))))
        smoker
sex       No Yes Sum
  Female 54 33 87
  Male    97 60 157
  Sum    151 93 244
> #
> args(prop.table)
function (x, margin = NULL)
NULL
> prop.table(table(subset(tips, select = c(sex, smoker))))
        smoker
sex             No       Yes
  Female 0.2213115 0.1352459
  Male   0.3975410 0.2459016
xtabs

> args(xtabs)
function (formula = ~., data = parent.frame(), subset, sparse = FALSE,
     na.action, exclude = c(NA, NaN), drop.unused.levels = FALSE)
NULL
> #
> xtabs(~ sex + smoker, tips)
         smoker
sex       No Yes
  Female 54 33
  Male    97 60
> #
> xtabs(cbind(total_bill, tip) ~ sex + smoker, tips)
, , = total_bill

        smoker
sex           No     Yes
  Female 977.68 593.27
  Male   1919.75 1337.07
aggregate

> args(aggregate.data.frame)
function (x, by, FUN, ..., simplify = TRUE)
NULL
> # FUN              1
> aggregate(tips[c("total_bill", "tip")], tips[c("sex", "day")], sum)
      sex day total_bill     tip
1 Female Fri      127.31 25.03
2    Male Fri     198.57 26.93
3 Female Sat      551.05 78.45
4    Male Sat    1227.35 181.95
5 Female Sun      357.70 60.61
6    Male Sun    1269.46 186.78
7 Female Thur     534.89 82.42
8    Male Thur    561.44 89.41
> # formula
> aggregate(cbind(total_bill, tip) ~ sex + day, tips, sum)
      sex day total_bill     tip
1 Female Fri      127.31 25.03
by

> args(by)
function (data, INDICES, FUN, ..., simplify = TRUE)
NULL
> # aggregate          FUN               OK
> (ret <- by(tips[c("total_bill", "tip")], tips[c("sex", "day")], range))
sex: Female
day: Fri
[1] 1.00 22.75
------------------------------------------------------------
sex: Male
day: Fri
[1] 1.50 40.17


> # data.frame
> cbind(expand.grid(dimnames(ret)), do.call(rbind, ret))
     sex day     1     2
1 Female Fri 1.00 22.75
2   Male Fri 1.50 40.17
•           reshape
•   merge
reshape

> args(reshape)
function (data, varying = NULL, v.names = NULL, timevar = "time",
     idvar = "id", ids = 1L:NROW(data), times = seq_along(varying[[1L]]),
     drop = NULL, direction, new.row.names = NULL, sep = ".",
     split = if (sep == "") {
          list(regexp = "[A-Za-z][0-9]", include = TRUE)
     } else {
          list(regexp = sep, include = FALSE, fixed = TRUE)
     })
NULL
> head(reshape(tips, idvar = c("sex", "smoker", "time", "size"),
+                timevar = "day", drop = "total_bill", direction = "wide"))
        sex smoker   time size tip.Sun tip.Sat tip.Thur tip.Fri
1 Female        No Dinner    2    1.01    2.75        3     3.25
2     Male      No Dinner    3    1.66    3.35       NA       NA
4     Male      No Dinner    2    3.31    4.08       NA     3.50
5 Female        No Dinner    4    3.61    2.45       NA       NA
6     Male      No Dinner    4    4.71    7.58       NA       NA
17 Female       No Dinner    3    1.67    3.07       NA       NA
reshape


> # idvar    timevar
> (a <- data.frame(a = c(1:3, 1), b = c(1:3, 1), c = 1:4))
  a b c
1 1 1 1
2 2 2 2
3 3 3 3
4 1 1 4
> reshape(a, idvar = "a", timevar = "b", direction = "wide")
  a c.1 c.2 c.3
1 1   1 NA NA
2 2 NA    2 NA
3 3 NA NA     3
merge

> #
> (user.type <- data.frame(sex = rep(c("Male", "Female"), each = 2),
+                           smoker = c("Yes", "No"),
+                           type = LETTERS[1:4]))
      sex smoker type
1    Male    Yes    A
2    Male     No    B
3 Female     Yes    C
4 Female      No    D
> args(merge.data.frame)
function (x, y, by = intersect(names(x), names(y)), by.x = by,
     by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE,
     suffixes = c(".x", ".y"), incomparables = NULL, ...)
NULL
> merge(tips, user.type, by = c("sex", "smoker"), sort = FALSE)[54:55, ]
       sex smoker total_bill tip day     time size type
54 Female      No      10.65 1.50 Thur Lunch      2   D
55    Male     No      10.27 1.71 Sun Dinner      2   B
•
•   R
•   reshape2
•   melt
•   cast
•
Excel
R

> acast(melt(tips, id.var = c("sex", "smoker", "day"), measure.var = "tip"),
+        sex + smoker ~ day, sum, margins = TRUE)
                Fri    Sat    Sun   Thur (all)
Female_No      6.25 35.42 46.61 61.49 149.77
Female_Yes    18.78 43.03 14.00 18.93 94.74
Female_(all) 25.03 78.45 60.61 80.42 244.51
Male_No        5.00 104.21 133.96 58.83 302.00
Male_Yes      21.93 77.74 52.82 30.58 183.07
Male_(all)    26.93 181.95 186.78 89.41 485.07
(all)_(all) 51.96 260.40 247.39 169.83 729.58




                                                       reshape2
reshape2

  melt                                    cast

 melt
 id
> head(tipsm <- melt(tips, measure.vars = c("total_bill", "tip")))
     sex smoker day    time size   variable value
1 Female     No Sun Dinner     2 total_bill 16.99
2   Male     No Sun Dinner     3 total_bill 10.34
3   Male     No Sun Dinner     3 total_bill 21.01
4   Male     No Sun Dinner     2 total_bill 23.68
5 Female     No Sun Dinner     4 total_bill 24.59
6   Male     No Sun Dinner     4 total_bill 25.29
> levels(tipsm$variable)
[1] "total_bill" "tip"
melt
> args(melt.data.frame)
function (data, id.vars, measure.vars, variable_name = "variable",
     na.rm = !preserve.na, preserve.na = TRUE, ...)
NULL
> #                  factor     id
> head(melt(tips), 1)
Using sex, smoker, day, time as id variables
      sex smoker day   time   variable value
1 Female      No Sun Dinner total_bill 16.99
> # id     measure
> head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size")), 1)
      sex smoker day   time size   variable value
1 Female      No Sun Dinner    2 total_bill 16.99
> # id      measure
> head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size"),
+                  measure.vars = "tip"), 1)
      sex smoker day   time size variable value
1 Female      No Sun Dinner    2      tip 1.01
cast
formula                                    fun.aggregate
 > args(acast) #         array                       acast
 function (data, formula, fun.aggregate    = NULL, ..., margins = NULL,
      subset = NULL, fill = NULL, drop =   TRUE, value_var = guess_value(data))
 NULL
 > args(dcast) #         data.frame                          dcast
 function (data, formula, fun.aggregate    = NULL, ..., margins = NULL,
      subset = NULL, fill = NULL, drop =   TRUE, value_var = guess_value(data))
 NULL


     formula
 ...
 .
 acast     hoge ~ fuga ~ piyo
 ※dcast     1                              hoge ~ fuga + piyo
> tipsm <- melt(tips, measure.vars = c("total_bill", "tip"))
> acast(tipsm, sex ~ smoker, length)
         No Yes
Female 108 64
Male    194 120
> #
> acast(tipsm, smoker ~ sex, length)
     Female Male
No      108 194
Yes      64 120
> #
> acast(tipsm, sex ~ smoker, length, margins = TRUE)
         No Yes (all)
Female 108 64     172
Male    194 120   314
(all) 302 184     486
> # size
> acast(tipsm, smoker ~ sex + size, length)
    Female_1 Female_2 Female_3 Female_4 Female_5 Female_6 Male_1 Male_2
Male_3
No         4       66       18       14        2        4      0    114
34
Yes        2       48       10        4        0        0      2     82
14
    Male_4 Male_5 Male_6
No      38      4      4
Yes     18      4      0
> # 3
> acast(tipsm, smoker ~ sex ~ size, length)
, , 1

    Female Male
No       4    0
Yes      2    2
> #             sum
> acast(tipsm, sex ~ day, sum)
           Fri   Sat      Sun    Thur
Female 152.34 629.5 418.31 617.31 total_bill       tip
Male   225.50 1409.3 1456.24 650.85
> # total_bill      tip             sum
> acast(tipsm, sex + variable ~ day, sum)
                      Fri      Sat      Sun  Thur
Female_total_bill 127.31 551.05 357.70 534.89
Female_tip          25.03    78.45    60.61 82.42
Male_total_bill   198.57 1227.35 1269.46 561.44
Male_tip            26.93 181.95 186.78 89.41
> #             tip      sum
> acast(tipsm, sex ~ day, sum, subset = .(variable == "tip"))
         Fri    Sat     Sun Thur
Female 25.03 78.45 60.61 82.42
Male   26.93 181.95 186.78 89.41
reshape2   aggregate table   xtabs
Rデータフレーム自由自在

Más contenido relacionado

La actualidad más candente

心理学のためのPsychパッケージ
心理学のためのPsychパッケージ心理学のためのPsychパッケージ
心理学のためのPsychパッケージ
考司 小杉
 
状態空間モデルの考え方・使い方 - TokyoR #38
状態空間モデルの考え方・使い方 - TokyoR #38状態空間モデルの考え方・使い方 - TokyoR #38
状態空間モデルの考え方・使い方 - TokyoR #38
horihorio
 
ブートストラップ法とその周辺とR
ブートストラップ法とその周辺とRブートストラップ法とその周辺とR
ブートストラップ法とその周辺とR
Daisuke Yoneoka
 
傾向スコア:その概念とRによる実装
傾向スコア:その概念とRによる実装傾向スコア:その概念とRによる実装
傾向スコア:その概念とRによる実装
takehikoihayashi
 
第五回統計学勉強会@東大駒場
第五回統計学勉強会@東大駒場第五回統計学勉強会@東大駒場
第五回統計学勉強会@東大駒場
Daisuke Yoneoka
 
統計的因果推論 勉強用 isseing333
統計的因果推論 勉強用 isseing333統計的因果推論 勉強用 isseing333
統計的因果推論 勉強用 isseing333
Issei Kurahashi
 

La actualidad más candente (20)

心理学のためのPsychパッケージ
心理学のためのPsychパッケージ心理学のためのPsychパッケージ
心理学のためのPsychパッケージ
 
『バックドア基準の入門』@統数研研究集会
『バックドア基準の入門』@統数研研究集会『バックドア基準の入門』@統数研研究集会
『バックドア基準の入門』@統数研研究集会
 
データ解析7 主成分分析の基礎
データ解析7 主成分分析の基礎データ解析7 主成分分析の基礎
データ解析7 主成分分析の基礎
 
Rの高速化
Rの高速化Rの高速化
Rの高速化
 
PyMCがあれば,ベイズ推定でもう泣いたりなんかしない
PyMCがあれば,ベイズ推定でもう泣いたりなんかしないPyMCがあれば,ベイズ推定でもう泣いたりなんかしない
PyMCがあれば,ベイズ推定でもう泣いたりなんかしない
 
Rで因子分析 商用ソフトで実行できない因子分析のあれこれ
Rで因子分析 商用ソフトで実行できない因子分析のあれこれRで因子分析 商用ソフトで実行できない因子分析のあれこれ
Rで因子分析 商用ソフトで実行できない因子分析のあれこれ
 
Stan超初心者入門
Stan超初心者入門Stan超初心者入門
Stan超初心者入門
 
状態空間モデルの考え方・使い方 - TokyoR #38
状態空間モデルの考え方・使い方 - TokyoR #38状態空間モデルの考え方・使い方 - TokyoR #38
状態空間モデルの考え方・使い方 - TokyoR #38
 
階層ベイズとWAIC
階層ベイズとWAIC階層ベイズとWAIC
階層ベイズとWAIC
 
Rで計量時系列分析~CRANパッケージ総ざらい~
Rで計量時系列分析~CRANパッケージ総ざらい~ Rで計量時系列分析~CRANパッケージ総ざらい~
Rで計量時系列分析~CRANパッケージ総ざらい~
 
2 3.GLMの基礎
2 3.GLMの基礎2 3.GLMの基礎
2 3.GLMの基礎
 
統計的因果推論からCausalMLまで走り抜けるスライド
統計的因果推論からCausalMLまで走り抜けるスライド統計的因果推論からCausalMLまで走り抜けるスライド
統計的因果推論からCausalMLまで走り抜けるスライド
 
ブートストラップ法とその周辺とR
ブートストラップ法とその周辺とRブートストラップ法とその周辺とR
ブートストラップ法とその周辺とR
 
Rで学ぶ離散選択モデル
Rで学ぶ離散選択モデルRで学ぶ離散選択モデル
Rで学ぶ離散選択モデル
 
傾向スコア:その概念とRによる実装
傾向スコア:その概念とRによる実装傾向スコア:その概念とRによる実装
傾向スコア:その概念とRによる実装
 
第五回統計学勉強会@東大駒場
第五回統計学勉強会@東大駒場第五回統計学勉強会@東大駒場
第五回統計学勉強会@東大駒場
 
コピュラと金融工学の新展開(?)
コピュラと金融工学の新展開(?)コピュラと金融工学の新展開(?)
コピュラと金融工学の新展開(?)
 
統計的因果推論 勉強用 isseing333
統計的因果推論 勉強用 isseing333統計的因果推論 勉強用 isseing333
統計的因果推論 勉強用 isseing333
 
Introduction to statistics
Introduction to statisticsIntroduction to statistics
Introduction to statistics
 
WI2研究会(公開用) “データ分析でよく使う前処理の整理と対処”
WI2研究会(公開用) “データ分析でよく使う前処理の整理と対処” WI2研究会(公開用) “データ分析でよく使う前処理の整理と対処”
WI2研究会(公開用) “データ分析でよく使う前処理の整理と対処”
 

Destacado

VLDB2013 Session 1 Emerging Hardware
VLDB2013 Session 1 Emerging HardwareVLDB2013 Session 1 Emerging Hardware
VLDB2013 Session 1 Emerging Hardware
Takuma Wakamori
 
ICDE2014 Session 14 Data Warehousing
ICDE2014 Session 14 Data WarehousingICDE2014 Session 14 Data Warehousing
ICDE2014 Session 14 Data Warehousing
Takuma Wakamori
 
巨大な表を高速に扱うData.table について
巨大な表を高速に扱うData.table について巨大な表を高速に扱うData.table について
巨大な表を高速に扱うData.table について
Haruka Ozaki
 

Destacado (13)

VLDB2013 Session 1 Emerging Hardware
VLDB2013 Session 1 Emerging HardwareVLDB2013 Session 1 Emerging Hardware
VLDB2013 Session 1 Emerging Hardware
 
ICDE2014 Session 14 Data Warehousing
ICDE2014 Session 14 Data WarehousingICDE2014 Session 14 Data Warehousing
ICDE2014 Session 14 Data Warehousing
 
ICDE2015 Research 3: Distributed Storage and Processing
ICDE2015 Research 3: Distributed Storage and ProcessingICDE2015 Research 3: Distributed Storage and Processing
ICDE2015 Research 3: Distributed Storage and Processing
 
データ・ビジュアライゼーション&ストーリーテリングを学ぶ!ハンズオンセミナー
データ・ビジュアライゼーション&ストーリーテリングを学ぶ!ハンズオンセミナーデータ・ビジュアライゼーション&ストーリーテリングを学ぶ!ハンズオンセミナー
データ・ビジュアライゼーション&ストーリーテリングを学ぶ!ハンズオンセミナー
 
統計を始める方へ①_データ環境Rの基本的なプログラミング|データアーティスト
統計を始める方へ①_データ環境Rの基本的なプログラミング|データアーティスト統計を始める方へ①_データ環境Rの基本的なプログラミング|データアーティスト
統計を始める方へ①_データ環境Rの基本的なプログラミング|データアーティスト
 
AI(人工知能)インフォグラフィックス【時間をかけずにすぐわかる】
AI(人工知能)インフォグラフィックス【時間をかけずにすぐわかる】AI(人工知能)インフォグラフィックス【時間をかけずにすぐわかる】
AI(人工知能)インフォグラフィックス【時間をかけずにすぐわかる】
 
Rstudio事始め
Rstudio事始めRstudio事始め
Rstudio事始め
 
巨大な表を高速に扱うData.table について
巨大な表を高速に扱うData.table について巨大な表を高速に扱うData.table について
巨大な表を高速に扱うData.table について
 
遅延価値観数と階層ベイズを用いた男心をくすぐる女の戦略.R
遅延価値観数と階層ベイズを用いた男心をくすぐる女の戦略.R遅延価値観数と階層ベイズを用いた男心をくすぐる女の戦略.R
遅延価値観数と階層ベイズを用いた男心をくすぐる女の戦略.R
 
Rの導入とRStudio事始め(改訂版)
Rの導入とRStudio事始め(改訂版)Rの導入とRStudio事始め(改訂版)
Rの導入とRStudio事始め(改訂版)
 
30分でわかる『R』によるデータ分析|データアーティスト
30分でわかる『R』によるデータ分析|データアーティスト30分でわかる『R』によるデータ分析|データアーティスト
30分でわかる『R』によるデータ分析|データアーティスト
 
Newman アルゴリズムによるソーシャルグラフのクラスタリング
Newman アルゴリズムによるソーシャルグラフのクラスタリングNewman アルゴリズムによるソーシャルグラフのクラスタリング
Newman アルゴリズムによるソーシャルグラフのクラスタリング
 
はじめての「R」
はじめての「R」はじめての「R」
はじめての「R」
 

Más de Takeshi Arabiki

Introduction to Japanese Morphological Analysis
Introduction to Japanese Morphological AnalysisIntroduction to Japanese Morphological Analysis
Introduction to Japanese Morphological Analysis
Takeshi Arabiki
 
Rのデータ構造とメモリ管理
Rのデータ構造とメモリ管理Rのデータ構造とメモリ管理
Rのデータ構造とメモリ管理
Takeshi Arabiki
 
Introduction to Favmemo for Immature Engineers
Introduction to Favmemo for Immature EngineersIntroduction to Favmemo for Immature Engineers
Introduction to Favmemo for Immature Engineers
Takeshi Arabiki
 

Más de Takeshi Arabiki (17)

開発の心得
開発の心得開発の心得
開発の心得
 
クックパッド特売情報 における自然言語処理 〜固有表現抽出を利用した検索システム〜
クックパッド特売情報 における自然言語処理 〜固有表現抽出を利用した検索システム〜クックパッド特売情報 における自然言語処理 〜固有表現抽出を利用した検索システム〜
クックパッド特売情報 における自然言語処理 〜固有表現抽出を利用した検索システム〜
 
Introduction to Japanese Morphological Analysis
Introduction to Japanese Morphological AnalysisIntroduction to Japanese Morphological Analysis
Introduction to Japanese Morphological Analysis
 
R による文書分類入門
R による文書分類入門R による文書分類入門
R による文書分類入門
 
Rのデータ構造とメモリ管理
Rのデータ構造とメモリ管理Rのデータ構造とメモリ管理
Rのデータ構造とメモリ管理
 
HTML5 Canvas で学ぶアフィン変換
HTML5 Canvas で学ぶアフィン変換HTML5 Canvas で学ぶアフィン変換
HTML5 Canvas で学ぶアフィン変換
 
Introduction to Favmemo for Immature Engineers
Introduction to Favmemo for Immature EngineersIntroduction to Favmemo for Immature Engineers
Introduction to Favmemo for Immature Engineers
 
Rのスコープとフレームと環境と
Rのスコープとフレームと環境とRのスコープとフレームと環境と
Rのスコープとフレームと環境と
 
twitteRで快適Rライフ!
twitteRで快適Rライフ!twitteRで快適Rライフ!
twitteRで快適Rライフ!
 
RではじめるTwitter解析
RではじめるTwitter解析RではじめるTwitter解析
RではじめるTwitter解析
 
R版Getopt::Longを作ってみた
R版Getopt::Longを作ってみたR版Getopt::Longを作ってみた
R版Getopt::Longを作ってみた
 
HMM, MEMM, CRF メモ
HMM, MEMM, CRF メモHMM, MEMM, CRF メモ
HMM, MEMM, CRF メモ
 
文字列カーネルによる辞書なしツイート分類 〜文字列カーネル入門〜
文字列カーネルによる辞書なしツイート分類 〜文字列カーネル入門〜文字列カーネルによる辞書なしツイート分類 〜文字列カーネル入門〜
文字列カーネルによる辞書なしツイート分類 〜文字列カーネル入門〜
 
Rデバッグあれこれ
RデバッグあれこれRデバッグあれこれ
Rデバッグあれこれ
 
はじめてのまっぷりでゅ〜す
はじめてのまっぷりでゅ〜すはじめてのまっぷりでゅ〜す
はじめてのまっぷりでゅ〜す
 
TwitterのデータをRであれこれ
TwitterのデータをRであれこれTwitterのデータをRであれこれ
TwitterのデータをRであれこれ
 
Twitterのデータを取得する準備
Twitterのデータを取得する準備Twitterのデータを取得する準備
Twitterのデータを取得する準備
 

Último

Architecting Cloud Native Applications
Architecting Cloud Native ApplicationsArchitecting Cloud Native Applications
Architecting Cloud Native Applications
WSO2
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
?#DUbAI#??##{{(☎️+971_581248768%)**%*]'#abortion pills for sale in dubai@
 
Why Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire businessWhy Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire business
panagenda
 

Último (20)

TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
 
Real Time Object Detection Using Open CV
Real Time Object Detection Using Open CVReal Time Object Detection Using Open CV
Real Time Object Detection Using Open CV
 
Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...
 
presentation ICT roal in 21st century education
presentation ICT roal in 21st century educationpresentation ICT roal in 21st century education
presentation ICT roal in 21st century education
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : Uncertainty
 
MS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsMS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectors
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024
 
Corporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptxCorporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptx
 
Automating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps ScriptAutomating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps Script
 
Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of Terraform
 
Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024
 
Architecting Cloud Native Applications
Architecting Cloud Native ApplicationsArchitecting Cloud Native Applications
Architecting Cloud Native Applications
 
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
 
Ransomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdfRansomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdf
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?
 
Why Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire businessWhy Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire business
 

Rデータフレーム自由自在

  • 1. R Tsukuba.R #9 (2011/11/12) @a_bicky
  • 2. • Takeshi Arabiki 1 ‣ Twitter: @a_bicky ‣ : id:a_bicky • R • http://d.hatena.ne.jp/a_bicky/
  • 3. • Takeshi Arabiki 1 ‣ Twitter: @a_bicky ‣ : id:a_bicky • R SciPy • http://d.hatena.ne.jp/a_bicky/
  • 4. Osaka.R #4 Tokyo.R #16 http://www.slideshare.net/abicky/twitterr http://www.slideshare.net/abicky/r-9034336
  • 5. • • R 8 ,9 • • • • http://www.amazon.co.jp/gp/product/4431712186
  • 6.
  • 7. reshape2 > install.packages("reshape2") > library(reshape2) > head(tips) # total_bill tip sex smoker day time size 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 4 23.68 3.31 Male No Sun Dinner 2 5 24.59 3.61 Female No Sun Dinner 4 6 25.29 4.71 Male No Sun Dinner 4
  • 8. tips total_bill: tip: sex: Male, Female smoker: Yes, No day: Thur, Fri, Sat, Sun time: Lunch, Dinner size:
  • 9.
  • 10. • • • subset • cbind, [, $, [[ • transform, within • • subset • cbind, [, $, [[ • transform, within • • order •
  • 11. > class(tips) [1] "data.frame" > mode(tips) # data.frame list [1] "list" > head(tips[["total_bill"]]) # list [1] 16.99 10.34 21.01 23.68 24.59 25.29 > head(tips$total_bill) # [1] 16.99 10.34 21.01 23.68 24.59 25.29 > head(tips["total_bill"]) # data.frame total_bill 1 16.99 2 10.34 3 21.01 4 23.68 5 24.59 6 25.29
  • 12. > head(tips[c("total_bill", "tip")]) # total_bill tip 1 16.99 1.01 2 10.34 1.66 3 21.01 3.50 4 23.68 3.31 5 24.59 3.61 6 25.29 4.71 > head(tips[[c("total_bill", "tip")]]) # Error in .subset2(x, i, exact = exact) : subscript out of bounds > tips[[c(1, 2)]] # tips[[1]][[2]] [1] 10.34
  • 13. > tips[1:2, 1:2] # total_bill tip 1 16.99 1.01 2 10.34 1.66 > tips[1:2, c("total_bill", "tip")] # total_bill tip 1 16.99 1.01 2 10.34 1.66 > head(tips[-(1:2), -(1:2)]) # sex smoker day time size 3 Male No Sun Dinner 3 4 Male No Sun Dinner 2 5 Female No Sun Dinner 4 6 Male No Sun Dinner 4 7 Male No Sun Dinner 2 8 Male No Sun Dinner 4
  • 14. subset > args(subset.data.frame) function (x, subset, select, drop = FALSE, ...) NULL > (tips.vip <- subset(tips, total_bill > 30 & size == 2)) total_bill tip sex smoker day time size 84 32.68 5.00 Male Yes Thur Lunch 2 174 31.85 3.18 Male Yes Sun Dinner 2 176 32.90 3.11 Male Yes Sun Dinner 2 180 34.63 3.55 Male Yes Sun Dinner 2 185 40.55 3.00 Male Yes Sun Dinner 2 238 32.83 1.17 Male Yes Sat Dinner 2 > levels(tips.vip$smoker) # [1] "No" "Yes" > levels(droplevels(tips.vip)$smoker) # [1] "Yes"
  • 15. cbind, [, $, [[ > head(cbind(tips, type = ifelse(tips$tip < 2, " ", " ")), 3) total_bill tip sex smoker day time size type 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 > tips$type <- ifelse(tips$tip < 2, " ", " ") > head(tips, 3) total_bill tip sex smoker day time size type 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 > data(tips) #
  • 16. transform, within > args(transform.data.frame) function (`_data`, ...) NULL > head(transform(tips, type = ifelse(tips$tip < 2, " ", " ")), 3) total_bill tip sex smoker day time size type 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 > args(within.data.frame) function (data, expr, ...) NULL > head(within(tips, { type <- c() # within + type[tip < 2] <- " " + type[tip >= 2] <- " " }), 3) total_bill tip sex smoker day time size type 1 16.99 1.01 Female No Sun Dinner 2 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3
  • 17. subset > # subset > head(subset(tips, select = c(tip, sex, smoker)), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = 2:4), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = -c(total_bill, size, time, day)), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = -c(1, 5:7)), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = c(tip:smoker)), 1) tip sex smoker 1 1.01 Female No > head(subset(tips, select = -c(total_bill, day:size)), 1) tip sex smoker 1 1.01 Female No
  • 18. [, $, [[ > # NULL > tips$size <- NULL > head(tips, 3) total_bill tip sex smoker day time 1 16.99 1.01 Female No Sun Dinner 2 10.34 1.66 Male No Sun Dinner 3 21.01 3.50 Male No Sun Dinner > tips[["time"]] <- NULL > head(tips, 3) total_bill tip sex smoker day 1 16.99 1.01 Female No Sun 2 10.34 1.66 Male No Sun 3 21.01 3.50 Male No Sun > tips["day"] <- NULL; tips[1] <- NULL > head(tips, 3) tip sex smoker 1 1.01 Female No 2 1.66 Male No 3 3.50 Male No > data(tips)
  • 19. transform, within > # NULL > head(transform(tips, total_bill = NULL, size = NULL, time = NULL, day = NULL), 3) tip sex smoker 1 1.01 Female No 2 1.66 Male No 3 3.50 Male No > # rm > head(within(tips, rm(total_bill, size, time, day)), 3) tip sex smoker 1 1.01 Female No 2 1.66 Male No 3 3.50 Male No
  • 20. > head(transform(tips, tip = 10), 3) total_bill tip sex smoker day time size 1 16.99 10 Female No Sun Dinner 2 2 10.34 10 Male No Sun Dinner 3 3 21.01 10 Male No Sun Dinner 3 > head(within(tips, tip <- 10), 3) total_bill tip sex smoker day time size 1 16.99 10 Female No Sun Dinner 2 2 10.34 10 Male No Sun Dinner 3 3 21.01 10 Male No Sun Dinner 3 > tips$tip <- 10 > head(tips, 3) total_bill tip sex smoker day time size 1 16.99 10 Female No Sun Dinner 2 2 10.34 10 Male No Sun Dinner 3 3 21.01 10 Male No Sun Dinner 3 > data(tips)
  • 21. order > head(tips[order(tips$sex), ], 4) # total_bill tip sex smoker day time size 1 16.99 1.01 Female No Sun Dinner 2 5 24.59 3.61 Female No Sun Dinner 4 12 35.26 5.00 Female No Sun Dinner 4 15 14.83 3.02 Female No Sun Dinner 2 > head(tips[order(tips$sex, decreasing = TRUE), ], 4) # total_bill tip sex smoker day time size 2 10.34 1.66 Male No Sun Dinner 3 3 21.01 3.50 Male No Sun Dinner 3 4 23.68 3.31 Male No Sun Dinner 2 6 25.29 4.71 Male No Sun Dinner 4 > head(tips[order(tips$sex, tips$tip), ], 4) # total_bill tip sex smoker day time size 68 3.07 1.00 Female Yes Sat Dinner 1 93 5.75 1.00 Female Yes Fri Dinner 2 112 7.25 1.00 Female No Sat Dinner 1 1 16.99 1.01 Female No Sun Dinner 2
  • 22. data.frame > (tip <- data.frame(date = sample(seq(as.Date("2011-11-09"), by = "day", len = 4)), + total_bill = sample(1:4 * 10), + tip = sample(1:4))) date total_bill tip 1 2011-11-10 30 4 2 2011-11-12 40 2 3 2011-11-11 10 1 4 2011-11-09 20 3 > # > tip <- tip[order(tip$date), ] > transform(tip, total_bill = cumsum(total_bill), tip = cumsum(tip)) date total_bill tip 4 2011-11-09 20 3 1 2011-11-10 50 7 3 2011-11-11 60 8 2 2011-11-12 100 10
  • 23. > head(tips[c("tip", "total_bill", "sex", "size", "time", "day", "smoker")]) tip total_bill sex size time day smoker 1 10 16.99 Female 2 Dinner Sun No 2 10 10.34 Male 3 Dinner Sun No 3 10 21.01 Male 3 Dinner Sun No 4 10 23.68 Male 2 Dinner Sun No 5 10 24.59 Female 4 Dinner Sun No 6 10 25.29 Male 4 Dinner Sun No
  • 24.
  • 25. • • table • xtabs • aggregate • by
  • 26. > args(colSums) function (x, na.rm = FALSE, dims = 1L) NULL > colSums(subset(tips, select = c(total_bill, tip)), na.rm = TRUE) total_bill tip 4827.77 731.58 > args(colMeans) function (x, na.rm = FALSE, dims = 1L) NULL > colMeans(subset(tips, select = c(total_bill, tip)), na.rm = TRUE) total_bill tip 19.785943 2.998279 > # apply colSums > apply(subset(tips, select = c(total_bill, tip)), 2, sum, na.rm = TRUE) total_bill tip 4827.77 731.58
  • 27. table > args(table) function (..., exclude = if (useNA == "no") c(NA, NaN), useNA = c("no", "ifany", "always"), dnn = list.names(...), deparse.level = 1) NULL > table(subset(tips, select = c(sex, smoker))) smoker sex No Yes Female 54 33 Male 97 60 > # 4 > table(subset(tips, select = c(sex, smoker, day, size))) , , day = Fri, size = 1 smoker sex No Yes Female 0 0 Male 0 1
  • 28. table > args(addmargins) function (A, margin = seq_along(dim(A)), FUN = sum, quiet = FALSE) NULL > # > addmargins(table(subset(tips, select = c(sex, smoker)))) smoker sex No Yes Sum Female 54 33 87 Male 97 60 157 Sum 151 93 244 > # > args(prop.table) function (x, margin = NULL) NULL > prop.table(table(subset(tips, select = c(sex, smoker)))) smoker sex No Yes Female 0.2213115 0.1352459 Male 0.3975410 0.2459016
  • 29. xtabs > args(xtabs) function (formula = ~., data = parent.frame(), subset, sparse = FALSE, na.action, exclude = c(NA, NaN), drop.unused.levels = FALSE) NULL > # > xtabs(~ sex + smoker, tips) smoker sex No Yes Female 54 33 Male 97 60 > # > xtabs(cbind(total_bill, tip) ~ sex + smoker, tips) , , = total_bill smoker sex No Yes Female 977.68 593.27 Male 1919.75 1337.07
  • 30. aggregate > args(aggregate.data.frame) function (x, by, FUN, ..., simplify = TRUE) NULL > # FUN 1 > aggregate(tips[c("total_bill", "tip")], tips[c("sex", "day")], sum) sex day total_bill tip 1 Female Fri 127.31 25.03 2 Male Fri 198.57 26.93 3 Female Sat 551.05 78.45 4 Male Sat 1227.35 181.95 5 Female Sun 357.70 60.61 6 Male Sun 1269.46 186.78 7 Female Thur 534.89 82.42 8 Male Thur 561.44 89.41 > # formula > aggregate(cbind(total_bill, tip) ~ sex + day, tips, sum) sex day total_bill tip 1 Female Fri 127.31 25.03
  • 31. by > args(by) function (data, INDICES, FUN, ..., simplify = TRUE) NULL > # aggregate FUN OK > (ret <- by(tips[c("total_bill", "tip")], tips[c("sex", "day")], range)) sex: Female day: Fri [1] 1.00 22.75 ------------------------------------------------------------ sex: Male day: Fri [1] 1.50 40.17 > # data.frame > cbind(expand.grid(dimnames(ret)), do.call(rbind, ret)) sex day 1 2 1 Female Fri 1.00 22.75 2 Male Fri 1.50 40.17
  • 32.
  • 33. reshape • merge
  • 34. reshape > args(reshape) function (data, varying = NULL, v.names = NULL, timevar = "time", idvar = "id", ids = 1L:NROW(data), times = seq_along(varying[[1L]]), drop = NULL, direction, new.row.names = NULL, sep = ".", split = if (sep == "") { list(regexp = "[A-Za-z][0-9]", include = TRUE) } else { list(regexp = sep, include = FALSE, fixed = TRUE) }) NULL > head(reshape(tips, idvar = c("sex", "smoker", "time", "size"), + timevar = "day", drop = "total_bill", direction = "wide")) sex smoker time size tip.Sun tip.Sat tip.Thur tip.Fri 1 Female No Dinner 2 1.01 2.75 3 3.25 2 Male No Dinner 3 1.66 3.35 NA NA 4 Male No Dinner 2 3.31 4.08 NA 3.50 5 Female No Dinner 4 3.61 2.45 NA NA 6 Male No Dinner 4 4.71 7.58 NA NA 17 Female No Dinner 3 1.67 3.07 NA NA
  • 35. reshape > # idvar timevar > (a <- data.frame(a = c(1:3, 1), b = c(1:3, 1), c = 1:4)) a b c 1 1 1 1 2 2 2 2 3 3 3 3 4 1 1 4 > reshape(a, idvar = "a", timevar = "b", direction = "wide") a c.1 c.2 c.3 1 1 1 NA NA 2 2 NA 2 NA 3 3 NA NA 3
  • 36. merge > # > (user.type <- data.frame(sex = rep(c("Male", "Female"), each = 2), + smoker = c("Yes", "No"), + type = LETTERS[1:4])) sex smoker type 1 Male Yes A 2 Male No B 3 Female Yes C 4 Female No D > args(merge.data.frame) function (x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE, suffixes = c(".x", ".y"), incomparables = NULL, ...) NULL > merge(tips, user.type, by = c("sex", "smoker"), sort = FALSE)[54:55, ] sex smoker total_bill tip day time size type 54 Female No 10.65 1.50 Thur Lunch 2 D 55 Male No 10.27 1.71 Sun Dinner 2 B
  • 37.
  • 38. • • R • reshape2 • melt • cast •
  • 39. Excel
  • 40. R > acast(melt(tips, id.var = c("sex", "smoker", "day"), measure.var = "tip"), + sex + smoker ~ day, sum, margins = TRUE) Fri Sat Sun Thur (all) Female_No 6.25 35.42 46.61 61.49 149.77 Female_Yes 18.78 43.03 14.00 18.93 94.74 Female_(all) 25.03 78.45 60.61 80.42 244.51 Male_No 5.00 104.21 133.96 58.83 302.00 Male_Yes 21.93 77.74 52.82 30.58 183.07 Male_(all) 26.93 181.95 186.78 89.41 485.07 (all)_(all) 51.96 260.40 247.39 169.83 729.58 reshape2
  • 41. reshape2 melt cast melt id > head(tipsm <- melt(tips, measure.vars = c("total_bill", "tip"))) sex smoker day time size variable value 1 Female No Sun Dinner 2 total_bill 16.99 2 Male No Sun Dinner 3 total_bill 10.34 3 Male No Sun Dinner 3 total_bill 21.01 4 Male No Sun Dinner 2 total_bill 23.68 5 Female No Sun Dinner 4 total_bill 24.59 6 Male No Sun Dinner 4 total_bill 25.29 > levels(tipsm$variable) [1] "total_bill" "tip"
  • 42. melt > args(melt.data.frame) function (data, id.vars, measure.vars, variable_name = "variable", na.rm = !preserve.na, preserve.na = TRUE, ...) NULL > # factor id > head(melt(tips), 1) Using sex, smoker, day, time as id variables sex smoker day time variable value 1 Female No Sun Dinner total_bill 16.99 > # id measure > head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size")), 1) sex smoker day time size variable value 1 Female No Sun Dinner 2 total_bill 16.99 > # id measure > head(melt(tips, id.vars = c("sex", "smoker", "day", "time", "size"), + measure.vars = "tip"), 1) sex smoker day time size variable value 1 Female No Sun Dinner 2 tip 1.01
  • 43. cast formula fun.aggregate > args(acast) # array acast function (data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data)) NULL > args(dcast) # data.frame dcast function (data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value_var = guess_value(data)) NULL formula ... . acast hoge ~ fuga ~ piyo ※dcast 1 hoge ~ fuga + piyo
  • 44. > tipsm <- melt(tips, measure.vars = c("total_bill", "tip")) > acast(tipsm, sex ~ smoker, length) No Yes Female 108 64 Male 194 120 > # > acast(tipsm, smoker ~ sex, length) Female Male No 108 194 Yes 64 120 > # > acast(tipsm, sex ~ smoker, length, margins = TRUE) No Yes (all) Female 108 64 172 Male 194 120 314 (all) 302 184 486
  • 45. > # size > acast(tipsm, smoker ~ sex + size, length) Female_1 Female_2 Female_3 Female_4 Female_5 Female_6 Male_1 Male_2 Male_3 No 4 66 18 14 2 4 0 114 34 Yes 2 48 10 4 0 0 2 82 14 Male_4 Male_5 Male_6 No 38 4 4 Yes 18 4 0 > # 3 > acast(tipsm, smoker ~ sex ~ size, length) , , 1 Female Male No 4 0 Yes 2 2
  • 46. > # sum > acast(tipsm, sex ~ day, sum) Fri Sat Sun Thur Female 152.34 629.5 418.31 617.31 total_bill tip Male 225.50 1409.3 1456.24 650.85 > # total_bill tip sum > acast(tipsm, sex + variable ~ day, sum) Fri Sat Sun Thur Female_total_bill 127.31 551.05 357.70 534.89 Female_tip 25.03 78.45 60.61 82.42 Male_total_bill 198.57 1227.35 1269.46 561.44 Male_tip 26.93 181.95 186.78 89.41 > # tip sum > acast(tipsm, sex ~ day, sum, subset = .(variable == "tip")) Fri Sat Sun Thur Female 25.03 78.45 60.61 82.42 Male 26.93 181.95 186.78 89.41
  • 47.
  • 48. reshape2 aggregate table xtabs