47. クエリの内容を確認
select
class
, sex
, count(survived) as total_cnt
, sum(survived) as survived_cnt
from
titanic_db.titanic
group by
class
, sex
order by
class
, sex;
以下の 4 カラムを返す
- 客室ランク,
- 性別,
- 全乗客数,
- ⽣存者数(値が 1 のもの)
客室ランクと性別ごとに
値を集約する
客室ランクと性別で,
アルファベット順に並べる
49. クエリの内容を確認
select
class
, sex
, total_cnt
, survived_cnt
, round(cast(survived_cnt as double)/cast(total_cnt as double), 2) as survival_rate
from (
select
class
, sex
, count(survived) as total_cnt
, sum(survived) as survived_cnt
from
titanic_db.titanic
where
class != '*ʼ
group by
class
, sex
)
order by
survival_rate desc;
50. クエリの内容を確認
select
class
, sex
, total_cnt
, survived_cnt
, round(cast(survived_cnt as double)/cast(total_cnt as double), 2) as survival_rate
from (
select
class
, sex
, count(survived) as total_cnt
, sum(survived) as survived_cnt
from
titanic_db.titanic
where
class != '*ʼ
group by
class
, sex
)
order by
survival_rate desc;
⽣存者数を⼈数で割って⽣存率を算出
先ほどのクエリをサブクエリとして
実⾏して,その結果に対してさらに
処理を⾏う
69. tsv + gzip テーブルに対するクエリの実⾏ (1)
select
prod_id
, count(1) as deal_conut
, avg(quantity_sold) as average_sold_num
, sum(quantity_sold*amount_sold) as total_sales
from
swingbench_db.sales_gz
where
year(time_id) = 2013
and month(time_id) = 4
group by
prod_id
order by
total_sales desc
limit 20;
70. tsv + gzip テーブルに対するクエリの実⾏ (2)
select
prod_id
, count(1) as deal_conut
, avg(quantity_sold) as average_sold_num
, sum(quantity_sold*amount_sold) as total_sales
from
swingbench_db.sales_gz
where
year(time_id) = 2013
and month(time_id) = 4
group by
prod_id
order by
total_sales desc
limit 20;
gz テーブルに対するクエリ
timestamp を年⽉に変換
72. parquet + snappy テーブルに対するクエリの実⾏ (1)
select
prod_id
, count(1) as deal_conut
, avg(quantity_sold) as average_sold_num
, sum(quantity_sold*amount_sold) as total_sales
from
swingbench_db.sales_parquet
where
year = 2013
and month = 4
group by
prod_id
order by
total_sales desc
limit 20;
73. parquet + snappy テーブルに対するクエリの実⾏ (2)
select
prod_id
, count(1) as deal_conut
, avg(quantity_sold) as average_sold_num
, sum(quantity_sold*amount_sold) as total_sales
from
swingbench_db.sales_parquet
where
year = 2013
and month = 4
group by
prod_id
order by
total_sales desc
limit 20;
parquet テーブルに対するクエリ
パーティション情報を利⽤