SlideShare una empresa de Scribd logo
1 de 26
Descargar para leer sin conexión
1
MongoDB
visualisation of slow operations
Kay Agahd
4 June 2013
2
idealo and MongoDB
●
idealo = Europe's leading price comparison web site
●
Germany, Austria, United Kingdom, France, Italy, Poland and Spain
●
250 millions offers online (May 2013)
●
fast growing
●
different types of databases (MySQL, Oracle, MongoDB)
●
MongoDB in production since v1.6
●
sharding in production since MongoDB v1.8
●
MongoDB stores offers for back-end usage
●
30 mongoDB servers for offerStore + 3 servers for offerHistory
●
15 mongoDB servers for other purposes
●
nearly 15 TB of data all together
3
Review profiling
●
MongoDB supports profiling of “slow” operations
●
“slow” is a threshold to be set when turning profiling on (default 100 ms)
●
profiling per-database or per-instance on a running mongod
●
profiler writes collected data to a capped collection “system.profile”
4
Example of slow op entry (1/2)
{
"ts" : ISODate("2013­04­05T01:41:31.710Z"),
"op" : "getmore",
"ns" : "offerStore.offer",
"query" : {
“query” : {
"shopId" : 123,
"onlineProductIds" : {"$ne" : null},
"smallPicture" : {"$ne" : null},
"_id" : {"$gt" : 1555008076},
"lastChange" : {"$gt" : ISODate("2013­04­02T22:00:00Z")}
},
“orderby” : {
“_id” : 1
}
},
"cursorid" : NumberLong("5773493375904448215"),
"ntoreturn" : 500, 
...
5
Example of slow op entry (2/2)
...
"keyUpdates" : 0,
"numYield" : 2350,
"ts" : "lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong(8724165),
"w" : NumberLong(0)
},
"timeAcquiringMicros" : {
"r" : NumberLong(5321722),
"w" : NumberLong(7)
}
},
"nreturned" : 500,
"responseLength" : 94656,
"millis" : 5322,
"client" : "172.16.65.202",
"user" : "pl_parser"
}
6
Inconveniences
●
each mongod needs to be handled separately
●
replSet: connect to master and every slave
●
sharding: incomplete view through router, thus replSet * n shards
●
gives only a view on a limited time span due to capped collection
●
different formats of “query” field makes querying more difficult
●
bug: ops through mongos omit the user (JIRA: SERVER-7538)
7
Example of different formats/schemata
- “query” as flat document:
{ "query" : { "shopId" : 123, 
    "onlineProductIds" : { "$ne" : null } }, 
   "user" : "pl_parser"}
- “query” embedded:
{"query" : { "query" : { "shopId" : 123,
       "onlineProductIds" : { "$ne" : null } }, 
   "orderby" : { "_id" : NumberLong(1) } }, 
 "user" : "pl_parser"}
- “query” embedded as $query:
{ "query" : { "$query" : { "shopId" : 123, 
          "onlineProductIds" : { "$ne" : null } }, 
              "$orderby" : { "_id" : NumberLong(1) },
              "$comment" : "profiling comment" }, 
 "user" : "pl_parser" }
8
idealo requirements
●
quick overview of types of slow-ops and their quantity within a time period
(“types” means op type, user, server, queried and sorted fields)
●
historical view to see how slow-ops evolve to extrapolate them
●
discovering spikes in time or in slow-op types
●
filtering by slow-op types and/or time range to drill down
9
Goals
●
faster queries
●
better adapted indexes
●
better adapted data schema
●
higher throughput by smarter workflow
10
Steps to go
●
two global steps:
●
1) collect and aggregate slow ops from all mongod's into one global
collection
●
2) GUI to query and show results
11
Step 1 of 2
●
global collection:
●
allows easy and fast querying of the whole mongoDB (shard) system
●
keeps historical data (no capped collection)
●
located on another replSet to avoid interfering with profiled mongod's
●
collector:
●
guarantee that only 1 instance is running at once (or add logic to avoid
doubled entries)
●
use tailable cursors to collect data from profiled mongod's
●
in case of failure: reconnect before data gets overwritten but avoid DoS
●
monitor it (nagios etc.)
●
profiled entries:
●
reduce size by keeping only interesting fields
●
make them easier to query (i.e. only 1 schema)
●
aggregate fields inside “query” and “orderby” to values
●
choose short field names
12
slow-op example
●
slow-op example of above becomes:
{
"_id" : ObjectId("512e43099bbcf52b9aff3602"),
"ts" : ISODate("2013­04­05T01:41:31.710Z"),
"adr" : "s233.ipx",
"op" : "getmore",
"fields" : 
["shopId","onlineProductIds","smallPicture","_id",“lastChange“],
"sort" : ["_id”],
"nret" : 500,
"reslen" : 94656,
"millis" : 5322,
"user" : "pl_parser"
}
13
Step 2 of 2
●
GUI:
●
x-axis = execution time
●
y-axis = duration of slow op
●
size of point = quantity of slow-op type
●
zoomable in x or y axis
14
How to query slow ops
●
group by time component allows resolution by year, month, week etc.
●
group by server address, user, operation, queried fields and sorted fields
allows to define different slow-op types
●
filter allows to focus on time period and specific slow ops
●
use slavePreferred option
●
error handling, i.e. result exceeds max of 16 MB
15
Query example
{$match:{ts : {$gt : #, $lt : # }}},
   fields : {$all : ["_id","shopId","bokey"]}
{$group:{_id : {op : "$op",
   user : "$user",
   fields : "$fields",
   year : { $year : "$ts" },
   month : { $month : "$ts" },
   dayOfMonth : { $dayOfMonth : "$ts" },
   hour : { $hour : "$ts" }},
   count : { $sum : 1 },
millis : { $sum : "$millis" },
avgMs : { $avg : "$millis" },
minMs : { $min : "$millis" },
maxMs : { $max : "$millis" },
firstts : { $first : "$ts" }}},
{ $sort:{ firstts : 1 }}
Filter
Slow-op
Resolution
Data
16
GUI
17
Resolution by minute
18
Resolution by minute & filter
19
dygraph.js
●
general syntax:
<script type="text/javascript">
   g = new Dygraph(document.getElementById("graph"),
    "x­name,   graph1­name,   graph2­name,   ..., graphN­namen" + 
    "x­value1, graph1­value1, graph2­value1, ..., graphN­value1n" +
    "x­value2, graph1­value2, graph2­value2, ..., graphN­value2n" +
    ...
    "x­valueN, graph1­valueN, graph2­valueN, ..., graphN­valueNn"
   );
 </script>
●
example for 2 slow-op types:
<script type="text/javascript">
 g = new Dygraph(document.getElementById("graph"),
 "Date,op=query;fields=[_id;shopId],n,min,max,op=query;fields=[_id],n,min,maxn" + 
 "2013/03/17,  5.4, 10, 3.2,  7.8,            10.4, 123, 3.1, 20.2n" + 
 "2013/03/18, 12.4, 23, 3.4, 55.8,               0,   0,   0,    0n" +
 "2013/03/19,    0,  0,   0,    0,            33.5,  66, 3.1, 89.3n" 
   );
</script>
20
dygraph.js Options 1/3
●
hide legend values from being drawn as graph:
<script type="text/javascript">
 g = new Dygraph(document.getElementById("graph"),
  "Date,op=query;fields=[_id;shopId],n,min,max,op=query;fields=[_id],n,min,maxn" + 
  "2013/03/17, 5.4, 10, 3.2, 7.8, 10.4, 123, 3.1, 20.2n" + 
  "2013/03/18, 12.4, 23, 3.4, 55.8, 0, 0, 0, 0n" +  
  "2013/03/19, 0, 0, 0, 0, 33.5, 66, 3.1, 89.3n",
  {//options:
    visibility:[true, false, false, false, true, false, false, false],
    showLabelsOnHighlight:false,
    hideOverlayOnMouseOut:false,
    labelsSeparateLines: true,
    drawPoints: true,
    legend: "always",
    xlabel: "Date",
    ylabel: "seconds",
    ... more options ...
  }
); 
21
dygraph.js Options 2/3
●
show custom legend on mouse over:
 highlightCallback: function(e, x, pts, row) {
  var text = "";
  var legend = new Array();
  for (var i = 0; i < pts.length; i++) {
    var rangeY = g.yAxisRange();
      if(pts[i].yval >= rangeY[0] && pts[i].yval <= rangeY[1]){//hide outside series
        var seriesProps = g.getPropertiesForSeries(pts[i].name);
        var count = g.getValue(row, seriesProps.column+1);
        var minSec = g.getValue(row, seriesProps.column+2);
        var maxSec = g.getValue(row, seriesProps.column+3);
        if(pts[i].yval != 0 && count != 0){
          legend.push([seriesProps.color, pts[i], count, minSec, maxSec]);
        }}}//end for
  legend.sort(function(a,b){return b[1].yval­a[1].yval});//sort by y­values
  for (var i = 0; i < legend.length; i++) {
    text += "<span style='color: " + legend[i][0] + ";'> " + legend[i][1].name +       
 "</span><br/><span>" + Dygraph.dateString_(legend[i][1].xval) + " count:" +        
 legend[i][2] + " minSec:" + legend[i][3] + " maxSec:" + legend[i][4] + "avgSec:" + 
 legend[i][1].yval + " </span><br/>";
  }
  document.getElementById("status").innerHTML = text; }, ... more options ...
22
dygraph.js Options 3/3
●
draw circles with surface of count:
  drawPointCallback : function(g, seriesName, ctx, cx, cy, color, pSize){
    if(lastSeries != seriesName || isNaN(currentRow) ){
lastSeries = seriesName;
currentRow = g.getLeftBoundary_() ­ 1;
    }
    currentRow++;
    var col = g.indexFromSetName(seriesName);
    var count = g.getValue(currentRow, col+1);
    ctx.strokeStyle = color;
    ctx.lineWidth = 0.8;
    ctx.beginPath();
    ctx.arc(cx, cy, Math.sqrt(count/Math.PI), 0, 2 * Math.PI, false); 
    ctx.closePath();
    ctx.stroke();
  }
 }//end options
);//end dygraph
23
Profiling status
24
Collector read/write status
25
Questions?
26
Thank you!

Más contenido relacionado

La actualidad más candente

MongoDB - Sharded Cluster Tutorial
MongoDB - Sharded Cluster TutorialMongoDB - Sharded Cluster Tutorial
MongoDB - Sharded Cluster TutorialJason Terpko
 
Spark & Cassandra - DevFest Córdoba
Spark & Cassandra - DevFest CórdobaSpark & Cassandra - DevFest Córdoba
Spark & Cassandra - DevFest CórdobaJose Mº Muñoz
 
Mongo db improve the performance of your application codemotion2016
Mongo db improve the performance of your application codemotion2016Mongo db improve the performance of your application codemotion2016
Mongo db improve the performance of your application codemotion2016Juan Antonio Roy Couto
 
Using MongoDB and Python
Using MongoDB and PythonUsing MongoDB and Python
Using MongoDB and PythonMike Bright
 
MongoDB - An Introduction
MongoDB - An IntroductionMongoDB - An Introduction
MongoDB - An Introductionsethfloydjr
 
MySQL Without The SQL -- Oh My! PHP Detroit July 2018
MySQL Without The SQL -- Oh My! PHP Detroit July 2018MySQL Without The SQL -- Oh My! PHP Detroit July 2018
MySQL Without The SQL -- Oh My! PHP Detroit July 2018Dave Stokes
 
Ui5 con@Banglore - UI5 App with Offline Storage using PouchDB
Ui5 con@Banglore - UI5 App with Offline Storage using PouchDBUi5 con@Banglore - UI5 App with Offline Storage using PouchDB
Ui5 con@Banglore - UI5 App with Offline Storage using PouchDBGAURAV SHROFF
 
LOFAR - finding transients in the radio spectrum
LOFAR - finding transients in the radio spectrumLOFAR - finding transients in the radio spectrum
LOFAR - finding transients in the radio spectrumGijs Molenaar
 
Webinar: Getting Started with MongoDB - Back to Basics
Webinar: Getting Started with MongoDB - Back to BasicsWebinar: Getting Started with MongoDB - Back to Basics
Webinar: Getting Started with MongoDB - Back to BasicsMongoDB
 
When big data meet python @ COSCUP 2012
When big data meet python @ COSCUP 2012When big data meet python @ COSCUP 2012
When big data meet python @ COSCUP 2012Jimmy Lai
 
Introduction to MongoDB
Introduction to MongoDBIntroduction to MongoDB
Introduction to MongoDBNosh Petigara
 
ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...
ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...
ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...Big Data Spain
 
Back to Basics Webinar 1: Introduction to NoSQL
Back to Basics Webinar 1: Introduction to NoSQLBack to Basics Webinar 1: Introduction to NoSQL
Back to Basics Webinar 1: Introduction to NoSQLMongoDB
 

La actualidad más candente (20)

MongoDB - Sharded Cluster Tutorial
MongoDB - Sharded Cluster TutorialMongoDB - Sharded Cluster Tutorial
MongoDB - Sharded Cluster Tutorial
 
Sharded cluster tutorial
Sharded cluster tutorialSharded cluster tutorial
Sharded cluster tutorial
 
MongoDB
MongoDBMongoDB
MongoDB
 
Spark & Cassandra - DevFest Córdoba
Spark & Cassandra - DevFest CórdobaSpark & Cassandra - DevFest Córdoba
Spark & Cassandra - DevFest Córdoba
 
Mongo db improve the performance of your application codemotion2016
Mongo db improve the performance of your application codemotion2016Mongo db improve the performance of your application codemotion2016
Mongo db improve the performance of your application codemotion2016
 
Logs management
Logs managementLogs management
Logs management
 
Using MongoDB and Python
Using MongoDB and PythonUsing MongoDB and Python
Using MongoDB and Python
 
MongoDB - An Introduction
MongoDB - An IntroductionMongoDB - An Introduction
MongoDB - An Introduction
 
MySQL Without The SQL -- Oh My! PHP Detroit July 2018
MySQL Without The SQL -- Oh My! PHP Detroit July 2018MySQL Without The SQL -- Oh My! PHP Detroit July 2018
MySQL Without The SQL -- Oh My! PHP Detroit July 2018
 
Intro to cassandra
Intro to cassandraIntro to cassandra
Intro to cassandra
 
Ui5 con@Banglore - UI5 App with Offline Storage using PouchDB
Ui5 con@Banglore - UI5 App with Offline Storage using PouchDBUi5 con@Banglore - UI5 App with Offline Storage using PouchDB
Ui5 con@Banglore - UI5 App with Offline Storage using PouchDB
 
MongoDB
MongoDBMongoDB
MongoDB
 
LOFAR - finding transients in the radio spectrum
LOFAR - finding transients in the radio spectrumLOFAR - finding transients in the radio spectrum
LOFAR - finding transients in the radio spectrum
 
Webinar: Getting Started with MongoDB - Back to Basics
Webinar: Getting Started with MongoDB - Back to BasicsWebinar: Getting Started with MongoDB - Back to Basics
Webinar: Getting Started with MongoDB - Back to Basics
 
Indexing In MongoDB
Indexing In MongoDBIndexing In MongoDB
Indexing In MongoDB
 
MongoDB - Ekino PHP
MongoDB - Ekino PHPMongoDB - Ekino PHP
MongoDB - Ekino PHP
 
When big data meet python @ COSCUP 2012
When big data meet python @ COSCUP 2012When big data meet python @ COSCUP 2012
When big data meet python @ COSCUP 2012
 
Introduction to MongoDB
Introduction to MongoDBIntroduction to MongoDB
Introduction to MongoDB
 
ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...
ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...
ToroDB: Scaling PostgreSQL like MongoDB by Álvaro Hernández at Big Data Spain...
 
Back to Basics Webinar 1: Introduction to NoSQL
Back to Basics Webinar 1: Introduction to NoSQLBack to Basics Webinar 1: Introduction to NoSQL
Back to Basics Webinar 1: Introduction to NoSQL
 

Destacado

A Sceptical Guide to Functional Programming
A Sceptical Guide to Functional ProgrammingA Sceptical Guide to Functional Programming
A Sceptical Guide to Functional ProgrammingGarth Gilmour
 
Usability Testing Intro
Usability Testing IntroUsability Testing Intro
Usability Testing IntroErica Burnett
 
Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?
Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?
Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?RUDDER
 
Perl Development (Sample Courseware)
Perl Development (Sample Courseware)Perl Development (Sample Courseware)
Perl Development (Sample Courseware)Garth Gilmour
 

Destacado (6)

A Sceptical Guide to Functional Programming
A Sceptical Guide to Functional ProgrammingA Sceptical Guide to Functional Programming
A Sceptical Guide to Functional Programming
 
Usability Testing Intro
Usability Testing IntroUsability Testing Intro
Usability Testing Intro
 
Scala sydoct2011
Scala sydoct2011Scala sydoct2011
Scala sydoct2011
 
Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?
Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?
Scala.io 2013 - M’enfin Scalac, où glandes-tu encore ?
 
Perl Development (Sample Courseware)
Perl Development (Sample Courseware)Perl Development (Sample Courseware)
Perl Development (Sample Courseware)
 
Raphaël JS Conf
Raphaël JS ConfRaphaël JS Conf
Raphaël JS Conf
 

Similar a MongoDB - visualisation of slow operations

MongoDB 3.2 - a giant leap. What’s new?
MongoDB 3.2 - a giant leap. What’s new?MongoDB 3.2 - a giant leap. What’s new?
MongoDB 3.2 - a giant leap. What’s new?Binary Studio
 
Eko10 Workshop Opensource Database Auditing
Eko10  Workshop Opensource Database AuditingEko10  Workshop Opensource Database Auditing
Eko10 Workshop Opensource Database AuditingJuan Berner
 
Eko10 workshop - OPEN SOURCE DATABASE MONITORING
Eko10 workshop - OPEN SOURCE DATABASE MONITORINGEko10 workshop - OPEN SOURCE DATABASE MONITORING
Eko10 workshop - OPEN SOURCE DATABASE MONITORINGPablo Garbossa
 
MongoDB: Advantages of an Open Source NoSQL Database
MongoDB: Advantages of an Open Source NoSQL DatabaseMongoDB: Advantages of an Open Source NoSQL Database
MongoDB: Advantages of an Open Source NoSQL DatabaseFITC
 
MongoDB Workshop Universidad de Huelva
MongoDB Workshop Universidad de HuelvaMongoDB Workshop Universidad de Huelva
MongoDB Workshop Universidad de HuelvaJuan Antonio Roy Couto
 
Quick overview on mongo db
Quick overview on mongo dbQuick overview on mongo db
Quick overview on mongo dbEman Mohamed
 
MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...
MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...
MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...ronwarshawsky
 
Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...
Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...
Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...Hernan Costante
 
Log MongoDB slow query
Log MongoDB slow queryLog MongoDB slow query
Log MongoDB slow queryHadi Ariawan
 
2019.06.27 Intro to Ceph
2019.06.27 Intro to Ceph2019.06.27 Intro to Ceph
2019.06.27 Intro to CephCeph Community
 
Logs @ OVHcloud
Logs @ OVHcloudLogs @ OVHcloud
Logs @ OVHcloudOVHcloud
 
MongoDB Pros and Cons
MongoDB Pros and ConsMongoDB Pros and Cons
MongoDB Pros and Consjohnrjenson
 
AWS Big Data Demystified #1: Big data architecture lessons learned
AWS Big Data Demystified #1: Big data architecture lessons learned AWS Big Data Demystified #1: Big data architecture lessons learned
AWS Big Data Demystified #1: Big data architecture lessons learned Omid Vahdaty
 
MongoDB : The Definitive Guide
MongoDB : The Definitive GuideMongoDB : The Definitive Guide
MongoDB : The Definitive GuideWildan Maulana
 
Big data @ Hootsuite analtyics
Big data @ Hootsuite analtyicsBig data @ Hootsuite analtyics
Big data @ Hootsuite analtyicsClaudiu Coman
 
Log stage zero-cost structured logging
Log stage  zero-cost structured loggingLog stage  zero-cost structured logging
Log stage zero-cost structured loggingMaksym Ratoshniuk
 
Logstage - zero-cost-tructured-logging
Logstage - zero-cost-tructured-loggingLogstage - zero-cost-tructured-logging
Logstage - zero-cost-tructured-logging7mind
 

Similar a MongoDB - visualisation of slow operations (20)

MongoDB 3.2 - a giant leap. What’s new?
MongoDB 3.2 - a giant leap. What’s new?MongoDB 3.2 - a giant leap. What’s new?
MongoDB 3.2 - a giant leap. What’s new?
 
Eko10 Workshop Opensource Database Auditing
Eko10  Workshop Opensource Database AuditingEko10  Workshop Opensource Database Auditing
Eko10 Workshop Opensource Database Auditing
 
Eko10 workshop - OPEN SOURCE DATABASE MONITORING
Eko10 workshop - OPEN SOURCE DATABASE MONITORINGEko10 workshop - OPEN SOURCE DATABASE MONITORING
Eko10 workshop - OPEN SOURCE DATABASE MONITORING
 
Mongodb (1)
Mongodb (1)Mongodb (1)
Mongodb (1)
 
MongoDB: Advantages of an Open Source NoSQL Database
MongoDB: Advantages of an Open Source NoSQL DatabaseMongoDB: Advantages of an Open Source NoSQL Database
MongoDB: Advantages of an Open Source NoSQL Database
 
MongoDB Workshop Universidad de Huelva
MongoDB Workshop Universidad de HuelvaMongoDB Workshop Universidad de Huelva
MongoDB Workshop Universidad de Huelva
 
mongodb tutorial
mongodb tutorialmongodb tutorial
mongodb tutorial
 
Quick overview on mongo db
Quick overview on mongo dbQuick overview on mongo db
Quick overview on mongo db
 
MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...
MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...
MongoDB performance tuning and load testing, NOSQL Now! 2013 Conference prese...
 
Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...
Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...
Eko10 - Security Monitoring for Big Infrastructures without a Million Dollar ...
 
Log MongoDB slow query
Log MongoDB slow queryLog MongoDB slow query
Log MongoDB slow query
 
2019.06.27 Intro to Ceph
2019.06.27 Intro to Ceph2019.06.27 Intro to Ceph
2019.06.27 Intro to Ceph
 
Logs @ OVHcloud
Logs @ OVHcloudLogs @ OVHcloud
Logs @ OVHcloud
 
MongoDB Pros and Cons
MongoDB Pros and ConsMongoDB Pros and Cons
MongoDB Pros and Cons
 
MongoDB FabLab León
MongoDB FabLab LeónMongoDB FabLab León
MongoDB FabLab León
 
AWS Big Data Demystified #1: Big data architecture lessons learned
AWS Big Data Demystified #1: Big data architecture lessons learned AWS Big Data Demystified #1: Big data architecture lessons learned
AWS Big Data Demystified #1: Big data architecture lessons learned
 
MongoDB : The Definitive Guide
MongoDB : The Definitive GuideMongoDB : The Definitive Guide
MongoDB : The Definitive Guide
 
Big data @ Hootsuite analtyics
Big data @ Hootsuite analtyicsBig data @ Hootsuite analtyics
Big data @ Hootsuite analtyics
 
Log stage zero-cost structured logging
Log stage  zero-cost structured loggingLog stage  zero-cost structured logging
Log stage zero-cost structured logging
 
Logstage - zero-cost-tructured-logging
Logstage - zero-cost-tructured-loggingLogstage - zero-cost-tructured-logging
Logstage - zero-cost-tructured-logging
 

Último

Microsoft 365 Copilot: How to boost your productivity with AI – Part two: Dat...
Microsoft 365 Copilot: How to boost your productivity with AI – Part two: Dat...Microsoft 365 Copilot: How to boost your productivity with AI – Part two: Dat...
Microsoft 365 Copilot: How to boost your productivity with AI – Part two: Dat...Nikki Chapple
 
A Glance At The Java Performance Toolbox
A Glance At The Java Performance ToolboxA Glance At The Java Performance Toolbox
A Glance At The Java Performance ToolboxAna-Maria Mihalceanu
 
Emixa Mendix Meetup 11 April 2024 about Mendix Native development
Emixa Mendix Meetup 11 April 2024 about Mendix Native developmentEmixa Mendix Meetup 11 April 2024 about Mendix Native development
Emixa Mendix Meetup 11 April 2024 about Mendix Native developmentPim van der Noll
 
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...Nikki Chapple
 
Microservices, Docker deploy and Microservices source code in C#
Microservices, Docker deploy and Microservices source code in C#Microservices, Docker deploy and Microservices source code in C#
Microservices, Docker deploy and Microservices source code in C#Karmanjay Verma
 
Tampa BSides - The No BS SOC (slides from April 6, 2024 talk)
Tampa BSides - The No BS SOC (slides from April 6, 2024 talk)Tampa BSides - The No BS SOC (slides from April 6, 2024 talk)
Tampa BSides - The No BS SOC (slides from April 6, 2024 talk)Mark Simos
 
A Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software DevelopersA Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software DevelopersNicole Novielli
 
So einfach geht modernes Roaming fuer Notes und Nomad.pdf
So einfach geht modernes Roaming fuer Notes und Nomad.pdfSo einfach geht modernes Roaming fuer Notes und Nomad.pdf
So einfach geht modernes Roaming fuer Notes und Nomad.pdfpanagenda
 
Modern Roaming for Notes and Nomad – Cheaper Faster Better Stronger
Modern Roaming for Notes and Nomad – Cheaper Faster Better StrongerModern Roaming for Notes and Nomad – Cheaper Faster Better Stronger
Modern Roaming for Notes and Nomad – Cheaper Faster Better Strongerpanagenda
 
Assure Ecommerce and Retail Operations Uptime with ThousandEyes
Assure Ecommerce and Retail Operations Uptime with ThousandEyesAssure Ecommerce and Retail Operations Uptime with ThousandEyes
Assure Ecommerce and Retail Operations Uptime with ThousandEyesThousandEyes
 
Accelerating Enterprise Software Engineering with Platformless
Accelerating Enterprise Software Engineering with PlatformlessAccelerating Enterprise Software Engineering with Platformless
Accelerating Enterprise Software Engineering with PlatformlessWSO2
 
Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024Hiroshi SHIBATA
 
4. Cobus Valentine- Cybersecurity Threats and Solutions for the Public Sector
4. Cobus Valentine- Cybersecurity Threats and Solutions for the Public Sector4. Cobus Valentine- Cybersecurity Threats and Solutions for the Public Sector
4. Cobus Valentine- Cybersecurity Threats and Solutions for the Public Sectoritnewsafrica
 
Testing tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examplesTesting tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examplesKari Kakkonen
 
Email Marketing Automation for Bonterra Impact Management (fka Social Solutio...
Email Marketing Automation for Bonterra Impact Management (fka Social Solutio...Email Marketing Automation for Bonterra Impact Management (fka Social Solutio...
Email Marketing Automation for Bonterra Impact Management (fka Social Solutio...Jeffrey Haguewood
 
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...Wes McKinney
 
Glenn Lazarus- Why Your Observability Strategy Needs Security Observability
Glenn Lazarus- Why Your Observability Strategy Needs Security ObservabilityGlenn Lazarus- Why Your Observability Strategy Needs Security Observability
Glenn Lazarus- Why Your Observability Strategy Needs Security Observabilityitnewsafrica
 
Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...itnewsafrica
 
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyesHow to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyesThousandEyes
 
2024 April Patch Tuesday
2024 April Patch Tuesday2024 April Patch Tuesday
2024 April Patch TuesdayIvanti
 

Último (20)

Microsoft 365 Copilot: How to boost your productivity with AI – Part two: Dat...
Microsoft 365 Copilot: How to boost your productivity with AI – Part two: Dat...Microsoft 365 Copilot: How to boost your productivity with AI – Part two: Dat...
Microsoft 365 Copilot: How to boost your productivity with AI – Part two: Dat...
 
A Glance At The Java Performance Toolbox
A Glance At The Java Performance ToolboxA Glance At The Java Performance Toolbox
A Glance At The Java Performance Toolbox
 
Emixa Mendix Meetup 11 April 2024 about Mendix Native development
Emixa Mendix Meetup 11 April 2024 about Mendix Native developmentEmixa Mendix Meetup 11 April 2024 about Mendix Native development
Emixa Mendix Meetup 11 April 2024 about Mendix Native development
 
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
 
Microservices, Docker deploy and Microservices source code in C#
Microservices, Docker deploy and Microservices source code in C#Microservices, Docker deploy and Microservices source code in C#
Microservices, Docker deploy and Microservices source code in C#
 
Tampa BSides - The No BS SOC (slides from April 6, 2024 talk)
Tampa BSides - The No BS SOC (slides from April 6, 2024 talk)Tampa BSides - The No BS SOC (slides from April 6, 2024 talk)
Tampa BSides - The No BS SOC (slides from April 6, 2024 talk)
 
A Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software DevelopersA Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software Developers
 
So einfach geht modernes Roaming fuer Notes und Nomad.pdf
So einfach geht modernes Roaming fuer Notes und Nomad.pdfSo einfach geht modernes Roaming fuer Notes und Nomad.pdf
So einfach geht modernes Roaming fuer Notes und Nomad.pdf
 
Modern Roaming for Notes and Nomad – Cheaper Faster Better Stronger
Modern Roaming for Notes and Nomad – Cheaper Faster Better StrongerModern Roaming for Notes and Nomad – Cheaper Faster Better Stronger
Modern Roaming for Notes and Nomad – Cheaper Faster Better Stronger
 
Assure Ecommerce and Retail Operations Uptime with ThousandEyes
Assure Ecommerce and Retail Operations Uptime with ThousandEyesAssure Ecommerce and Retail Operations Uptime with ThousandEyes
Assure Ecommerce and Retail Operations Uptime with ThousandEyes
 
Accelerating Enterprise Software Engineering with Platformless
Accelerating Enterprise Software Engineering with PlatformlessAccelerating Enterprise Software Engineering with Platformless
Accelerating Enterprise Software Engineering with Platformless
 
Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024Long journey of Ruby standard library at RubyConf AU 2024
Long journey of Ruby standard library at RubyConf AU 2024
 
4. Cobus Valentine- Cybersecurity Threats and Solutions for the Public Sector
4. Cobus Valentine- Cybersecurity Threats and Solutions for the Public Sector4. Cobus Valentine- Cybersecurity Threats and Solutions for the Public Sector
4. Cobus Valentine- Cybersecurity Threats and Solutions for the Public Sector
 
Testing tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examplesTesting tools and AI - ideas what to try with some tool examples
Testing tools and AI - ideas what to try with some tool examples
 
Email Marketing Automation for Bonterra Impact Management (fka Social Solutio...
Email Marketing Automation for Bonterra Impact Management (fka Social Solutio...Email Marketing Automation for Bonterra Impact Management (fka Social Solutio...
Email Marketing Automation for Bonterra Impact Management (fka Social Solutio...
 
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
The Future Roadmap for the Composable Data Stack - Wes McKinney - Data Counci...
 
Glenn Lazarus- Why Your Observability Strategy Needs Security Observability
Glenn Lazarus- Why Your Observability Strategy Needs Security ObservabilityGlenn Lazarus- Why Your Observability Strategy Needs Security Observability
Glenn Lazarus- Why Your Observability Strategy Needs Security Observability
 
Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...
 
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyesHow to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
How to Effectively Monitor SD-WAN and SASE Environments with ThousandEyes
 
2024 April Patch Tuesday
2024 April Patch Tuesday2024 April Patch Tuesday
2024 April Patch Tuesday
 

MongoDB - visualisation of slow operations

  • 1. 1 MongoDB visualisation of slow operations Kay Agahd 4 June 2013
  • 2. 2 idealo and MongoDB ● idealo = Europe's leading price comparison web site ● Germany, Austria, United Kingdom, France, Italy, Poland and Spain ● 250 millions offers online (May 2013) ● fast growing ● different types of databases (MySQL, Oracle, MongoDB) ● MongoDB in production since v1.6 ● sharding in production since MongoDB v1.8 ● MongoDB stores offers for back-end usage ● 30 mongoDB servers for offerStore + 3 servers for offerHistory ● 15 mongoDB servers for other purposes ● nearly 15 TB of data all together
  • 3. 3 Review profiling ● MongoDB supports profiling of “slow” operations ● “slow” is a threshold to be set when turning profiling on (default 100 ms) ● profiling per-database or per-instance on a running mongod ● profiler writes collected data to a capped collection “system.profile”
  • 4. 4 Example of slow op entry (1/2) { "ts" : ISODate("2013­04­05T01:41:31.710Z"), "op" : "getmore", "ns" : "offerStore.offer", "query" : { “query” : { "shopId" : 123, "onlineProductIds" : {"$ne" : null}, "smallPicture" : {"$ne" : null}, "_id" : {"$gt" : 1555008076}, "lastChange" : {"$gt" : ISODate("2013­04­02T22:00:00Z")} }, “orderby” : { “_id” : 1 } }, "cursorid" : NumberLong("5773493375904448215"), "ntoreturn" : 500,  ...
  • 5. 5 Example of slow op entry (2/2) ... "keyUpdates" : 0, "numYield" : 2350, "ts" : "lockStats" : { "timeLockedMicros" : { "r" : NumberLong(8724165), "w" : NumberLong(0) }, "timeAcquiringMicros" : { "r" : NumberLong(5321722), "w" : NumberLong(7) } }, "nreturned" : 500, "responseLength" : 94656, "millis" : 5322, "client" : "172.16.65.202", "user" : "pl_parser" }
  • 6. 6 Inconveniences ● each mongod needs to be handled separately ● replSet: connect to master and every slave ● sharding: incomplete view through router, thus replSet * n shards ● gives only a view on a limited time span due to capped collection ● different formats of “query” field makes querying more difficult ● bug: ops through mongos omit the user (JIRA: SERVER-7538)
  • 7. 7 Example of different formats/schemata - “query” as flat document: { "query" : { "shopId" : 123,      "onlineProductIds" : { "$ne" : null } },     "user" : "pl_parser"} - “query” embedded: {"query" : { "query" : { "shopId" : 123,        "onlineProductIds" : { "$ne" : null } },     "orderby" : { "_id" : NumberLong(1) } },   "user" : "pl_parser"} - “query” embedded as $query: { "query" : { "$query" : { "shopId" : 123,            "onlineProductIds" : { "$ne" : null } },                "$orderby" : { "_id" : NumberLong(1) },               "$comment" : "profiling comment" },   "user" : "pl_parser" }
  • 8. 8 idealo requirements ● quick overview of types of slow-ops and their quantity within a time period (“types” means op type, user, server, queried and sorted fields) ● historical view to see how slow-ops evolve to extrapolate them ● discovering spikes in time or in slow-op types ● filtering by slow-op types and/or time range to drill down
  • 9. 9 Goals ● faster queries ● better adapted indexes ● better adapted data schema ● higher throughput by smarter workflow
  • 10. 10 Steps to go ● two global steps: ● 1) collect and aggregate slow ops from all mongod's into one global collection ● 2) GUI to query and show results
  • 11. 11 Step 1 of 2 ● global collection: ● allows easy and fast querying of the whole mongoDB (shard) system ● keeps historical data (no capped collection) ● located on another replSet to avoid interfering with profiled mongod's ● collector: ● guarantee that only 1 instance is running at once (or add logic to avoid doubled entries) ● use tailable cursors to collect data from profiled mongod's ● in case of failure: reconnect before data gets overwritten but avoid DoS ● monitor it (nagios etc.) ● profiled entries: ● reduce size by keeping only interesting fields ● make them easier to query (i.e. only 1 schema) ● aggregate fields inside “query” and “orderby” to values ● choose short field names
  • 12. 12 slow-op example ● slow-op example of above becomes: { "_id" : ObjectId("512e43099bbcf52b9aff3602"), "ts" : ISODate("2013­04­05T01:41:31.710Z"), "adr" : "s233.ipx", "op" : "getmore", "fields" :  ["shopId","onlineProductIds","smallPicture","_id",“lastChange“], "sort" : ["_id”], "nret" : 500, "reslen" : 94656, "millis" : 5322, "user" : "pl_parser" }
  • 13. 13 Step 2 of 2 ● GUI: ● x-axis = execution time ● y-axis = duration of slow op ● size of point = quantity of slow-op type ● zoomable in x or y axis
  • 14. 14 How to query slow ops ● group by time component allows resolution by year, month, week etc. ● group by server address, user, operation, queried fields and sorted fields allows to define different slow-op types ● filter allows to focus on time period and specific slow ops ● use slavePreferred option ● error handling, i.e. result exceeds max of 16 MB
  • 19. 19 dygraph.js ● general syntax: <script type="text/javascript">    g = new Dygraph(document.getElementById("graph"),     "x­name,   graph1­name,   graph2­name,   ..., graphN­namen" +      "x­value1, graph1­value1, graph2­value1, ..., graphN­value1n" +     "x­value2, graph1­value2, graph2­value2, ..., graphN­value2n" +     ...     "x­valueN, graph1­valueN, graph2­valueN, ..., graphN­valueNn"    );  </script> ● example for 2 slow-op types: <script type="text/javascript">  g = new Dygraph(document.getElementById("graph"),  "Date,op=query;fields=[_id;shopId],n,min,max,op=query;fields=[_id],n,min,maxn" +   "2013/03/17,  5.4, 10, 3.2,  7.8,            10.4, 123, 3.1, 20.2n" +   "2013/03/18, 12.4, 23, 3.4, 55.8,               0,   0,   0,    0n" +  "2013/03/19,    0,  0,   0,    0,            33.5,  66, 3.1, 89.3n"     ); </script>
  • 20. 20 dygraph.js Options 1/3 ● hide legend values from being drawn as graph: <script type="text/javascript">  g = new Dygraph(document.getElementById("graph"),   "Date,op=query;fields=[_id;shopId],n,min,max,op=query;fields=[_id],n,min,maxn" +    "2013/03/17, 5.4, 10, 3.2, 7.8, 10.4, 123, 3.1, 20.2n" +    "2013/03/18, 12.4, 23, 3.4, 55.8, 0, 0, 0, 0n" +     "2013/03/19, 0, 0, 0, 0, 33.5, 66, 3.1, 89.3n",   {//options:     visibility:[true, false, false, false, true, false, false, false],     showLabelsOnHighlight:false,     hideOverlayOnMouseOut:false,     labelsSeparateLines: true,     drawPoints: true,     legend: "always",     xlabel: "Date",     ylabel: "seconds",     ... more options ...   } ); 
  • 21. 21 dygraph.js Options 2/3 ● show custom legend on mouse over:  highlightCallback: function(e, x, pts, row) {   var text = "";   var legend = new Array();   for (var i = 0; i < pts.length; i++) {     var rangeY = g.yAxisRange();       if(pts[i].yval >= rangeY[0] && pts[i].yval <= rangeY[1]){//hide outside series         var seriesProps = g.getPropertiesForSeries(pts[i].name);         var count = g.getValue(row, seriesProps.column+1);         var minSec = g.getValue(row, seriesProps.column+2);         var maxSec = g.getValue(row, seriesProps.column+3);         if(pts[i].yval != 0 && count != 0){           legend.push([seriesProps.color, pts[i], count, minSec, maxSec]);         }}}//end for   legend.sort(function(a,b){return b[1].yval­a[1].yval});//sort by y­values   for (var i = 0; i < legend.length; i++) {     text += "<span style='color: " + legend[i][0] + ";'> " + legend[i][1].name +         "</span><br/><span>" + Dygraph.dateString_(legend[i][1].xval) + " count:" +          legend[i][2] + " minSec:" + legend[i][3] + " maxSec:" + legend[i][4] + "avgSec:" +   legend[i][1].yval + " </span><br/>";   }   document.getElementById("status").innerHTML = text; }, ... more options ...
  • 22. 22 dygraph.js Options 3/3 ● draw circles with surface of count:   drawPointCallback : function(g, seriesName, ctx, cx, cy, color, pSize){     if(lastSeries != seriesName || isNaN(currentRow) ){ lastSeries = seriesName; currentRow = g.getLeftBoundary_() ­ 1;     }     currentRow++;     var col = g.indexFromSetName(seriesName);     var count = g.getValue(currentRow, col+1);     ctx.strokeStyle = color;     ctx.lineWidth = 0.8;     ctx.beginPath();     ctx.arc(cx, cy, Math.sqrt(count/Math.PI), 0, 2 * Math.PI, false);      ctx.closePath();     ctx.stroke();   }  }//end options );//end dygraph