Más contenido relacionado La actualidad más candente (20) 性能測定道 実践編19. #define LOOP 1000000000
#define SIZE (1024*1024*1024)
char *region = malloc(SIZE);
int i, j;
for (j = 0; j < LOOP; j) {
for (i = 0; i < SIZE; i++) {
region[i];
}
}
実行時間 (LOOP SIZE)
= 1回のメモリアクセスレイテンシ(?)
20. #define LOOP 1000000000
#define SIZE (1024*1024*1024)
char *region = malloc(SIZE);
int i, j;
for (j = 0; j < LOOP; j) {
for (i = 0; i < SIZE; i++) {
region[i];
}
}
実行時間 (LOOP SIZE)
= 1回のメモリアクセスレイテンシ(?)
40. #define LOOP 1000000000
#define SIZE (1024*1024*1024)
char *region = malloc(SIZE);
int i, j;
for (j = 0; j < LOOP; j) {
for (i = 0; i < SIZE; i++) {
region[i];
}
}
実行時間 (LOOP SIZE)
= 1回のメモリアクセスレイテンシ(?)
57. movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
movq
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
(%%rax),
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
%rax
分岐予測器をニート化
•
分岐予測器の動き自体はコントロールが難しい
•
→ 仕事を与えない
63. micbench
live on AWS EC2
$ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
65. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password:
Last login: Sat Feb 1 03:27:02 2014 from 35.69.30.125.dy.iij4u.or.jp
__| __|_ )
_| (
/
___|___|___|
Amazon Linux AMI
https://aws.amazon.com/amazon-linux-ami/2013.09-release-notes/
$
66. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password:
Last login: Sat Feb 1 03:27:02 2014 from 35.69.30.125.dy.iij4u.or.jp
__| __|_ )
_| (
/
___|___|___|
Amazon Linux AMI
https://aws.amazon.com/amazon-linux-ami/2013.09-release-notes/
$ micbench mem -h
67. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password:
Last login: Sat Feb 1 03:27:02 2014 from 35.69.30.125.dy.iij4u.or.jp
__| __|_ )
_| (
/
___|___|___|
Amazon Linux AMI
https://aws.amazon.com/amazon-linux-ami/2013.09-release-notes/
$ micbench mem -h
Usage: micbench mem [options]
-h, --help
-m, --multi NUM
-t, --timeout NUM
-S, --seq
-R, --rand
-L, --local
sharing one region)
-a, --affinity AFFINITY
-s, --size SIZE
-H, --hugetlbfile PATH
-z, --hugepagesize SIZE
-v, --verbose
--debug
Show help
Multiplicity of memory access (default: )
Running time of memory access test (in sec) (default: sec)
Sequential memory access mode (default mode)
Random memory access mode (default: sequential access mode)
Allocate separated memory region for each thread (default:
CPU and memory utilization policy
Size of memory region (default: 1MB)
Use HugePages if specified. Give a path to file on hugetlbfs.
Size of HugePage (default: 2MB)
70. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ cd /sys/devices/system/cpu/cpu0/cache
$ echo `cat index0/level` `cat index0/type` `cat index0/size`
1 Data 32K
$ echo `cat index1/level` `cat index1/type` `cat index1/size`
1 Instruction 32K
$ echo `cat index2/level` `cat index2/type` `cat index2/size`
2 Unified 256K
$ echo `cat index3/level` `cat index3/type` `cat index3/size`
3 Unified 20480K
73. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ micbench mem --rand --size 16K --affinity 0:c0 --timeout 10
loop end: t=10.000098
access_pattern
random
multiplicity 1
local false
page_size 4096
size
32768
use_hugepages false
total_ops 2172125184
total_clk 17885453816
exec_time 10.000098
ops_per_sec 2.172104e+08
clk_per_op
8.234081e+00
total_exec_time 11.881029
74. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ micbench mem --rand --size 16K --affinity 0:c0 --timeout 10
loop end: t=10.000098
access_pattern
random
multiplicity 1
local false
page_size 4096
size
32768
use_hugepages false
total_ops 2172125184
total_clk 17885453816
exec_time 10.000098
ops_per_sec 2.172104e+08
clk_per_op
8.234081e+00
total_exec_time 11.881029
L1D cache アクセスレイテンシ
8.23クロック ( 5%)
(Wikipedia調べ:3クロック)
77. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ micbench mem --rand --size 128K --affinity 0:c0 --timeout 10
loop end: t=10.022191
access_pattern
random
multiplicity 1
local false
page_size 4096
size
131072
use_hugepages false
total_ops 729284608
total_clk 17940879364
exec_time 10.022191
ops_per_sec 7.276698e+07
clk_per_op
2.460066e+01
total_exec_time 10.047088
78. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ micbench mem --rand --size 128K --affinity 0:c0 --timeout 10
loop end: t=10.022191
access_pattern
random
multiplicity 1
local false
page_size 4096
size
131072
use_hugepages false
total_ops 729284608
total_clk 17940879364
exec_time 10.022191
ops_per_sec 7.276698e+07
clk_per_op
2.460066e+01
total_exec_time 10.047088
L2D cache アクセスレイテンシ
24.6クロック ( 5%)
(Wikipedia調べ:8クロック)
81. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ micbench mem --rand --size 16MB --affinity 0:c0 --timeout 10
loop end: t=10.017083
access_pattern
random
multiplicity 1
local false
page_size 4096
size
16777216
use_hugepages false
total_ops 40370176
total_clk 17984571328
exec_time 10.017083
ops_per_sec 4.030133e+06
clk_per_op
4.454915e+02
total_exec_time 10.293443
82. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ micbench mem --rand --size 16MB --affinity 0:c0 --timeout 10
loop end: t=10.017083
access_pattern
random
multiplicity 1
local false
page_size 4096
size
16777216
use_hugepages false
total_ops 40370176
total_clk 17984571328
exec_time 10.017083
ops_per_sec 4.030133e+06
clk_per_op
4.454915e+02
total_exec_time 10.293443
L3 shared アクセスレイテンシ
445クロック
85. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ micbench mem --rand --size 256MB --affinity 0:c0 --timeout 10
loop end: t=10.025490
access_pattern
random
multiplicity 1
local false
page_size 4096
size
268435456
use_hugepages false
total_ops 31719424
total_clk 18000368292
exec_time 10.025490
ops_per_sec 3.163878e+06
clk_per_op
5.674872e+02
total_exec_time 16.503597
86. $ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ micbench mem --rand --size 256MB --affinity 0:c0 --timeout 10
loop end: t=10.025490
access_pattern
random
multiplicity 1
local false
page_size 4096
size
268435456
use_hugepages false
total_ops 31719424
total_clk 18000368292
exec_time 10.025490
ops_per_sec 3.163878e+06
clk_per_op
5.674872e+02
total_exec_time 16.503597
主記憶アクセスレイテンシ
567クロック
95. micbench
A microbenchmarking toolset
$ micbench -h
!
Usage: micbench command [options]
!
available commands:
help : 'help command' explains about command
io : IO benchmark on block devices and files
lock : lock and sync. cost benchmark
mem : memory latency and bandwidth benchmark
96. micbench
A microbenchmarking toolset
$ micbench -h
!
Usage: micbench command [options]
!
available commands:
help : 'help command' explains about command
io : IO benchmark on block devices and files
lock : lock and sync. cost benchmark
mem : memory latency and bandwidth benchmark
97. モニタリングツール
•
sysstat (mpstat / iostat)
(…)
!
avg-cpu:
%user
0.00
%nice %system %iowait
0.00
1.00
99.00
%steal
0.00
%idle
0.00
!
Device:
xvdap1
xvdap3
xvdf
xvdg
!
(…)
tps
0.00
0.00
64.00
0.00
Blk_read/s
0.00
0.00
1024.00
0.00
Blk_wrtn/s
0.00
0.00
0.00
0.00
Blk_read
0
0
1024
0
Blk_wrtn
0
0
0
0
98. モニタリングツール
•
sysstat (mpstat / iostat)
(…)
!
avg-cpu:
%user
0.00
%nice %system %iowait
0.00
1.00
99.00
!
Device:
xvdap1
xvdap3
xvdf
xvdg
!
(…)
tps
0.00
0.00
64.00
0.00
a
d
a
e
Blk_read/s
0.00
0.00
1024.00
0.00
r
t
o
N
!
le
b
%steal
0.00
Blk_wrtn/s
0.00
0.00
0.00
0.00
%idle
0.00
Blk_read
0
0
1024
0
Blk_wrtn
0
0
0
0
99. モニタリングツール
•
sysstat (mpstat / iostat)
(…)
!
avg-cpu:
%user
0.00
%nice %system %iowait
0.00
1.00
99.00
!
Device:
xvdap1
xvdap3
xvdf
xvdg
tps
0.00
0.00
64.00
0.00
a
d
a
e
Blk_read/s
0.00
0.00
1024.00
0.00
r
t
o
N
!
le
b
%steal
0.00
Blk_wrtn/s
0.00
0.00
0.00
0.00
%idle
0.00
Blk_read
0
0
1024
0
Blk_wrtn
0
0
0
0
!
(…)
•
プログラムから集計するのが面倒なフォーマット
100. モニタリングツール
•
sysstat (mpstat / iostat)
(…)
!
avg-cpu:
!
Device:
xvdap1
xvdap3
xvdf
xvdg
!
(…)
%user
0.00
%nice %system %iowait
0.00
1.00
99.00
Not
tps
0.00
0.00
64.00
0.00
real
%steal
0.00
Blk_read/s
0.00
0.00
1024.00
0.00
%idle
0.00
time
Blk_wrtn/s
0.00
0.00
0.00
0.00
Blk_read
0
0
1024
0
Blk_wrtn
0
0
0
0
101. モニタリングツール
•
sysstat (mpstat / iostat)
(…)
!
avg-cpu:
!
Device:
xvdap1
xvdap3
xvdf
xvdg
%user
0.00
%nice %system %iowait
0.00
1.00
99.00
Not
tps
0.00
0.00
64.00
0.00
real
%steal
0.00
Blk_read/s
0.00
0.00
1024.00
0.00
%idle
0.00
time
Blk_wrtn/s
0.00
0.00
0.00
0.00
Blk_read
0
0
1024
0
!
(…)
•
最短で1秒間隔でしかモニタリングできない
Blk_wrtn
0
0
0
0
103. PerfMonger
live on AWS EC2
$ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
104. PerfMonger
live on AWS EC2
$ ssh jpugstudy@54.201.1.14
jpugstudy@54.201.1.14's password: JPUGstudy20140201
※現在はアクセスできません
$ perfmonger record
{"time": 1391233526.2996, "cpuinfo": {"nr_cpu": 1, "all": {"%usr": 0.00, "%nice": 0.00, "%sys":
0.00, "%iowait": 0.00, "%irq": 0.00, "%soft": 0.00, "%steal": 0.00, "%guest": 0.00, "%idle":
0.00}, "cpus": [{"%usr": 0.00, "%nice": 0.00, "%sys": 0.00, "%iowait": 0.00, "%irq": 0.00,
"%soft": 0.00, "%steal": 0.00, "%guest": 0.00, "%idle": 100.00}]}, "ioinfo": {"devices":
["xvda1", "xvda3", "xvdf", "xvdg"], "xvda1": {"r/s": 0.0000, "w/s": 0.0000, "rsec/s": 0.0000,
"wsec/s": 0.0000, "r_await": 0.0000, "w_await": 0.0000, "avgrq-sz": 0.0000, "avgqu-sz": 0.0000},
"xvda3": {"r/s": 0.0000, "w/s": 0.0000, "rsec/s": 0.0000, "wsec/s": 0.0000, "r_await": 0.0000,
"w_await": 0.0000, "avgrq-sz": 0.0000, "avgqu-sz": 0.0000}, "xvdf": {"r/s": 0.0000, "w/s":
0.0000, "rsec/s": 0.0000, "wsec/s": 0.0000, "r_await": 0.0000, "w_await": 0.0000, "avgrq-sz":
0.0000, "avgqu-sz": 0.0000}, "xvdg": {"r/s": 0.0000, "w/s": 0.0000, "rsec/s": 0.0000, "wsec/s":
0.0000, "r_await": 0.0000, "w_await": 0.0000, "avgrq-sz": 0.0000, "avgqu-sz": 0.0000}, "total":
{"r/s": 0.0000, "w/s": 0.0000, "rsec/s": 0.0000, "wsec/s": 0.0000, "r_await": 0.0000, "w_await":
0.0000}}}