SlideShare una empresa de Scribd logo
1 de 61
Descargar para leer sin conexión
Αποχαιρετισμός στους Δίσκους:
    Αποδοτική Επεξεργασία
    Περίπλοκων Δεδομένων


Διομήδης Σπινέλλης
Καθηγητής
Τμήμα Διοικητικής Επιστήμης και Τεχνολογίας
Οικονομικό Πανεπιστήμιο Αθηνών
http://www.dmst.aueb.gr/dds
                                              1




                                                  1
2
3
4
5
1

    6
7
8
9
2



    10
11
3
    12
13
Worst case latency (Log scale)




                                 L1 D cache   L2 cache   DDR RAM    Hard disk
                                   1.3 ns      9.7 ns     28.5 ns   25.6 ms




                                                                                14
15
16
17
1

    18
Time (Log scale)




                   Function call   System call   Local IPC   Remote IPC
                      1.3ns          1.9μs         4.3μs       1.2ms




                   select Locations.cc1, Divisions.name,
                   avg(CO2), count(*), Locations.lat,
                   Locations.long, POPDENSITY.DENSITY from
                   Papers
                   inner join Locations on
                   Papers.confLocId = Locations.id
                   inner join Divisions on
                   Locations.cc1 = Divisions.country
                   inner join POPDENSITY on
                   Divisions.name = upper(POPDENSITY.name)
                   where Divisions.code = '00' and CO2 notnull
                   group by Locations.cc1
                   having count(*) > 20
                   order by avg(CO2) desc;




                                                                          19
/* Get the data */
if (mcSet.dataLen) {
     data = xmalloc(mcSet.dataLen);
     if (lseek(fd, mcSet.data.off, SEEK_SET) == -1)
          CORRUPT();
     if (read(fd, data, mcSet.dataLen) != mcSet.dataLen)
          CORRUPT();
     if (lseek(fd, mcSet.u.firstMsg, SEEK_SET) == -1)
          CORRUPT();
     for (i = 0; i < mcSet.numMsgs; ++i) {
          if (read(fd, &mcMsg, sizeof(mcMsg)) != sizeof(mcMsg))
               CORRUPT();
          if (mcMsg.invalid) {
               --i;
               continue;
          }
          msg = xmalloc(sizeof(msgT));
          memset(msg, '0', sizeof(*msg));
            /* […] */
          msg->msgId = mcMsg.msgId;
          msg->str = xstrdup((char *) (data + mcMsg.msg.off));
     }
     free(data);
}




                       2

                                                                  20
MMAP(2)                   FreeBSD System Calls
Manual                  MMAP(2)

NAME
     mmap -- allocate memory, or map files or
devices into memory

SYNOPSIS
     #include <sys/mman.h>

     void *
     mmap(void *addr, size_t len, int prot, int
flags, int fd, off_t offset);

DESCRIPTION
     The mmap() system call causes the pages
starting at addr and continuing for at most len
bytes to be mapped from the object described by
fd, starting at byte offset offset.




                                                  21
[dds@istlab /usr/src/sys/vm]$ ls
default_pager.c uma_int.h       vm_page.c
device_pager.c vm.h             vm_page.h
memguard.c      vm_contig.c     vm_pageout.c
memguard.h      vm_extern.h     vm_pageout.h
phys_pager.c    vm_fault.c      vm_pager.c
pmap.h          vm_glue.c       vm_pager.h
redzone.c       vm_init.c       vm_param.h
redzone.h       vm_kern.c       vm_phys.c
sg_pager.c      vm_kern.h       vm_phys.h
swap_pager.c    vm_map.c        vm_reserv.c
swap_pager.h    vm_map.h        vm_reserv.h
uma.h           vm_meter.c      vm_unix.c
uma_core.c      vm_mmap.c       vm_zeroidle.c
uma_dbg.c       vm_object.c     vnode_pager.c
uma_dbg.h       vm_object.h     vnode_pager.h




                                                22
3



    23
$ ls -lh sparse
-rw-r--r-- 1 dds dds 500G
Mar 19 20:32 sparse


$ du -h sparse
28K     sparse




            4
  διεργασία 1        διεργασία 2

          r/o              r/o




            φυσική μνήμη




                                   24
διεργασία 1        διεργασία 2

        r/w              r/w




          φυσική μνήμη




διεργασία 1        διεργασία 2

        read             read




          φυσική μνήμη




διεργασία 1        διεργασία 2

        read                   r/w



                 αντίγραφο
          φυσική μνήμη




                                     25
5
C++

      26
e.g. 1




     CC-BY 2.5 Claudio Rocchini




                                  27
01110010011 0111101101101011 0000101101110011 00101         // romane
01110010011 0111101101101011 0000101101110011 1010101110011 // romanus

01110010011 0111101101101011 10101011011000111010101110011          // romulus




01110010011 10101011000100110 0101011 0111001110011                 // rubens
01110010011 10101011000100110 0101011 10010                         // ruber

01110010011 10101011000100110 100101100011011 0111101101110 // rubicon
01110010011 10101011000100110 100101100011011 1010101101110011001000111
                                               010101110011 // rubicundus




      Δομή                  Δομή δίσκου                     Ιστοσελίδα
     μνήμης                                                  με νέους
                                                             δεσμούς

   Κατασκευή                  Κατασκευή
    δένδρου                     δομής                     wikipedialize
     ριζών                     δίσκου


    Κατάλογος                                             Αρχική
     άρθρων                                             ιστοσελίδα




  for (;;) {
      i = bitpos;
      // Loop until the end of the current node or the end of the word
      while (i < p->end && i < len * 8) {

       // Covering whole byte?
       if (i % 8 == 0 && i + 8 <= p->end && (i + 8) / 8 <= len &&
           data[i / 8] == p->data[i / 8]) {
         i += 8;
         continue;
       }

       // Split point
       if (getbit(data, i) != getbit(p->data, i)) {
         // Node with the new data
         struct pnode *n = new_node(data + i / 8, i % 8, (len - i / 8) * 8,
                                    NULL, NULL, true);
         // Tail of the current node
         struct pnode *t = new_node(p->data + i / 8, i % 8, p->end - (i & ~7),
                                    p->zero, p->one, p->is_terminal);
         // Head of current node
         if (getbit(data, i))
           *p2 = new_node(p->data, bitpos, i, t, n, false);
         else
           *p2 = new_node(p->data, bitpos, i, n, t, false);
         free(p);
         return;
       }
       i++;
     } // while




                                                                                 28
// Write the given node to the specified file, returning its file offset.
// On return the file's offset is set to the first free byte.
static long
write_node(struct pnode *p, FILE *f)
{
  long my_offset = ftell(f);
  size_t ret;

  if (p->one) {
    struct pnode_disk_one pdo;
    size_t dlen = datalen(p->end);
    long len = sizeof(pdo) + dlen;

    fseek(f, len, SEEK_CUR);
    pdo.h.type = dt_one;
    pdo.h.is_terminal = p->is_terminal;
    pdo.h.has_zero = (p->zero != NULL);
    pdo.h.has_one = true;
    pdo.h.begin = p->begin;
    pdo.h.end = p->end;
    if (p->zero)
      write_node(p->zero, f);
    pdo.one = write_node(p->one, f);
    long saved_offset = ftell(f);
    fseek(f, my_offset, SEEK_SET);
    fwrite(&pdo, 1, sizeof(pdo), f);
    fwrite(p->data, 1, dlen, f);
    fseek(f, saved_offset, SEEK_SET);
    return my_offset;
  } else {




$ zcat enwiki-latest-all-titles-in-ns0.gz | wc -c
106,237,053


$ wc -c enwiki.pt
144,657,286 enwiki.pt




                                                                            29
$ curl
http://www.kiosek.com/dostoevsky/library/crimeandpuni
shment.txt |
perl -pe 's/[rn]/ /g' >crimeandpunishment.txt




                                                        30
$ wc crimeandpunishment.txt
       0 203,273 1,462,661 crimeandpunishment.txt




$ time ./wpltest en en_US.UTF-8 ISO-8859-1
data/enwiki.pt <crimeandpunishment.txt >/dev/null




$ time ./wpltest en en_US.UTF-8 ISO-8859-1
data/enwiki.pt <crimeandpunishment.txt >/dev/null

Checked 406,225 prefixes

real   0m5.859s   # Cold cache
real   0m1.876s   # Warm cache
user   0m1.780s
sys    0m0.090s




                                                    31
// Prevent memory alignment problems
memcpy(&end, &(p.h->end), sizeof(end));
                                                                                                      /*




while (i < end && i < len * 8) {
  if (i % 8 == 0 && i + 8 <= end && data[i / 8] == pdata[i / 8]) {
                                                                                                       */
                                                                                                       * to understand this
                                                                                                       * You are not expected




    i += 8;
    prefix += 8;
    continue;
  }
  // Split point
  if (getbit(data, i) != getbit(pdata, i))
    return best / 8;
  i++;
  prefix++;
}
if (i == end && p.h->is_terminal)
  best = prefix;
if (i == len * 8)
  return best / 8;
// Move to next node
bitpos = end % 8;
int covered = end / 8;
if (getbit(data, end)) {
  if (!p.h->has_one)
    return best / 8;
  switch (p.h->type) {
  case dt_both:
    p.h = (struct pnode_disk_head *)(base + p.b->one);
    break;
  case dt_one:
    p.h = (struct pnode_disk_head *)(base + p.o->one);
    break;
  case dt_short:
  default:
    assert(0);
  }
} else {
  if (!p.h->has_zero)
    return best / 8;
  switch (p.h->type) {
  case dt_both:
    p.h = (struct pnode_disk_head *)(base + p.b->zero);
    break;
  case dt_one:
    // Advance to the end of this node
    p.h = (struct pnode_disk_head *)((char *)p.h + sizeof(struct pnode_disk_one) + datalen(end));
    break;
  case dt_short:
    // Advance to the end of this node
    p.h = (struct pnode_disk_head *)((char *)p.h + sizeof(struct pnode_disk_short) + datalen(end));
    break;
  default:
    assert(0);
  }
}




                                                                                                                                32
e.g. 2

The problem with wikipedia




                             33
Κατασκευή
 γράφου

                     Δομή
Λίστα ακμών       δεδομένων
                   γράφου




    Κορυφές         Διαδρομή
  αρχής, τέλους


                   BFS


                     Δομή
                  δεδομένων
                   γράφου




    Κορυφές         Διαδρομή
  αρχής, τέλους


Κατασκευή          BFS
 γράφου

                     Δομή
Λίστα ακμών       δεδομένων
                   γράφου




                               34
Κατασκευή
  γράφου

                             Δομή
 Λίστα ακμών              δεδομένων
                           γράφου




// Loop through all lines,
// adding them to the graph
 while (std::getline(in, line)) {
   int split = line.find('001');
   if (split == std::string::npos) {
     std::cerr << "No separator: " <<
       line << std::endl;
     continue;
   }
   n.setName(line.substr(0, split));
   NodesIter from(entries->insert(n).first);
   n.setName(line.substr(split + 1));
   NodesIter to(entries->insert(n).first);
   (const_cast<Node &>(*from)).addEdge(
     const_cast<Node *>(&*to));
 }




      Κορυφές                Διαδρομή
    αρχής, τέλους


 Κατασκευή                  BFS
  γράφου

                             Δομή
 Λίστα ακμών              δεδομένων
                           γράφου




                                               35
Διαδρομή



                                                  BFS


                                                    Δομή
                                                 δεδομένων
                                                  γράφου




          Tacoma Narrows Bridge
     p=




     Suspension bridge             Washington
p=                         p=



          Geneva                       Montana
p=                                p=

                                                       Ουρά
                                           []=
William Howard Taft
p=




          Tacoma Narrows Bridge
     p=




     Suspension bridge             Washington
p=                         p=



          Geneva                       Montana
p=                                p=

                                                       Ουρά
                                           []=Tacoma Narrow Bridge
William Howard Taft
p=




                                                                     36
Tacoma Narrows Bridge
     p=




     Suspension bridge             Washington
p=                         p=



          Geneva                       Montana
p=                                p=

                                                        Ουρά
                                           []=Tacoma Narrow Bridge
William Howard Taft
p=




          Tacoma Narrows Bridge
     p=




     Suspension bridge             Washington
p=                         p=



          Geneva                       Montana
p=                                p=

                                                        Ουρά
                                           []=
William Howard Taft
p=




          Tacoma Narrows Bridge
     p=




     Suspension bridge             Washington
p=Tacoma Narrows Bridge    p=



          Geneva                       Montana
p=                                p=

                                                        Ουρά
                                           []= Suspension bridge
William Howard Taft
p=




                                                                     37
Tacoma Narrows Bridge
     p=




     Suspension bridge             Washington
p=Tacoma Narrows Bridge    p=Tacoma Narrows Bridge



          Geneva                       Montana
p=                                p=

                                                        Ουρά
                                           []= Suspension bridge
William Howard Taft                            Washington
p=




          Tacoma Narrows Bridge
     p=




     Suspension bridge             Washington
p=Tacoma Narrows Bridge    p=Tacoma Narrows Bridge



          Geneva                       Montana
p=                                p=

                                                        Ουρά
                                           []= Suspension bridge
William Howard Taft                            Washington
p=




          Tacoma Narrows Bridge
     p=




     Suspension bridge             Washington
p=Tacoma Narrows Bridge    p=Tacoma Narrows Bridge



          Geneva                       Montana
p=Suspension bridge               p=

                                                        Ουρά
                                           []= Washington
William Howard Taft                            Geneva
p=




                                                                   38
Tacoma Narrows Bridge
     p=




     Suspension bridge              Washington
p=Tacoma Narrows Bridge    p=Tacoma Narrows Bridge



          Geneva                    Montana
p=Suspension bridge               p=Washington

                                                       Ουρά
                                         []= Geneva
William Howard Taft                          Montana
p=




          Tacoma Narrows Bridge
     p=




     Suspension bridge              Washington
p=Tacoma Narrows Bridge    p=Tacoma Narrows Bridge



          Geneva                    Montana
p=Suspension bridge               p=Washington

                                                       Ουρά
                                         []=Montana
William Howard Taft
p=




          Tacoma Narrows Bridge
     p=




     Suspension bridge              Washington
p=Tacoma Narrows Bridge    p=Tacoma Narrows Bridge



          Geneva                    Montana
p=Suspension bridge               p=Washington

                                                       Ουρά
                                         []=Montana
William Howard Taft
p=Geneva




                                                              39
static bool breadthFirstSearchFor(NodePtr from, NodePtr to,
  size_t n) {
  std::queue<NodePtr> q;

    from->setColor(Node::Gray);
    q.push(from);
    while (!q.empty()) {
      NodePtr u = q.front();
      q.pop();
      const Edges edges = u->getEdges();
      for (Edges::const_iterator j = edges.begin();
           j != edges.end(); j++)
        if ((*j)->getColor() == Node::White) {
          (*j)->setColor(Node::Gray);
          (*j)->setPredecessor(u);
          if (*j == to)
            return true; // Found
          q.push(*j);
        }
      u->setColor(Node::Black);
    }
    return false; // Not found
}




           Κορυφές                        Διαδρομή
         αρχής, τέλους


     Κατασκευή                          BFS
      γράφου

                                         Δομή
     Λίστα ακμών                      δεδομένων
                                       γράφου




                                         Δομή
                                      δεδομένων
                                       γράφου




                                                              40
41
Δομή
                                  δεδομένων
                                   γράφου




#include   <string>
#include   <iostream>
#include   <queue>
#include   <list>
#include   <functional>

#include   <boost/interprocess/managed_mapped_file.hpp>
#include   <boost/interprocess/offset_ptr.hpp>
#include   <boost/interprocess/allocators/allocator.hpp>
#include   <boost/unordered_set.hpp>
#include   <boost/interprocess/containers/string.hpp>
#include   <boost/interprocess/containers/slist.hpp>

#include <boost/filesystem.hpp>

#include <boost/filesystem/operations.hpp>




                                                           42
#include     <string>
#include     <iostream>
#include     <queue>
#include     <list>
#include     <functional>

#include     <boost/interprocess/managed_mapped_file.hpp>
#include     <boost/interprocess/offset_ptr.hpp>
#include     <boost/interprocess/allocators/allocator.hpp>
#include     <boost/unordered_set.hpp>
#include     <boost/interprocess/containers/string.hpp>
#include     <boost/interprocess/containers/slist.hpp>

#include <boost/filesystem.hpp>

#include <boost/filesystem/operations.hpp>




typedef managed_mapped_file::segment_manager SegmentManager;


typedef allocator<char, SegmentManager> CharAllocator;
typedef basic_string<char, std::char_traits<char>, CharAllocator>
       CharString;


typedef allocator<Node, SegmentManager> NodeAllocator;
typedef boost::unordered_set<Node, boost::hash<Node>,
                             NodeEqual, NodeAllocator> Nodes;

typedef offset_ptr<Node> NodePtr;
typedef allocator<NodePtr, SegmentManager> NodePtrAllocator;
typedef slist<NodePtr, NodePtrAllocator> Edges;



typedef allocator<void, SegmentManager> VoidAllocator;

typedef allocator<Edges, SegmentManager> EdgesAllocator;




// A graph node, suitable for performing a breadh-first search
class Node {
  public:
    typedef enum {White, Gray, Black} Color;

     private:
       CharString name;            //   Node name
       Color color;                //   Color used during BFS
       NodePtr predecessor;        //   BFS predecessor
       Edges edges;                //   Node's edges

     public:
       // Since VoidAllocator is convertible to any other
       // allocator<T>, we can simplify the initialization
       // taking just one allocator for all inner containers.
       Node(const std::string &n, const VoidAllocator &voidAlloc)
         : name(n.begin(), n.end(), voidAlloc), color(White),
           predecessor(NULL), edges(voidAlloc) {}

      void addEdge(NodePtr p) {
        edges.push_front(p);
      }
};




                                                                    43
/*
 * Read ^A-separated nodes from the inputFile, storing the graph
 * structure in the specified backingFile.
 */
static void readData(const char *backingFile, const char *inputFile) {
   std::ifstream in(inputFile, std::ios::binary);

 if (in.fail()) {
   perror(inputFile);
   exit(1);
 }

 boost::filesystem::remove_all(backingFile);
 managed_mapped_file segment(create_only, backingFile, FileSize);

 // An allocator convertible to any allocator<T, SegmentManager> type
 VoidAllocator allocInst (segment.get_segment_manager());

 // Construct the memory map and fill it
 Nodes *entries = segment.construct<Nodes>("entries")(Elements,
   boost::hash<Node>(), NodeEqual(), allocInst);

 std::string line;
 Node n(std::string(), allocInst);    // To save construction costs




/*
 * Search and report the shortest graph path from "from" to "to"
 * The graph is stored in backingFile.
 */
static void searchData(const char *backingFile,
  const std::string &from, const std::string &to) {

  managed_mapped_file segment(open_copy_on_write, backingFile);

  // An allocator convertible to any allocator<T, SegmentManager>
  VoidAllocator allocInst(segment.get_segment_manager());

  // Obtain the previously saved entries
  Nodes *entries = segment.find<Nodes>("entries").first;

  NodePtr toPtr;
  bool found = breadthFirstSearchFor(
    findNode(entries, Node(from, allocInst)),
    toPtr = findNode(entries, Node(to, allocInst)), entries->size());




                                                                         44
Κορυφές         Διαδρομή
  αρχής, τέλους


Κατασκευή          BFS
 γράφου

                     Δομή
Λίστα ακμών       δεδομένων
                   γράφου




                               45
$ ./smap -r graph.bin graph.txt




$ ./smap -s graph.bin 'Tacoma Narrows Bridge'
'William howard taft'

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
*
Tacoma Narrows Bridge
Washington
Montana
William howard taft




$ ./smap -s graph.bin 'Tacoma Narrows Bridge'
'24-hour analog dial'

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
**
Tacoma Narrows Bridge
Suspension bridge
Geneva
Watch
24-hour analog dial




                                                         46
$ ./smap -s graph.bin 'Tacoma Narrows Bridge' 'Wet t-shirt
contest'

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
*
Tacoma Narrows Bridge
Washington
Starbucks
Toplessness
Wet t-shirt contest




              The problem with wikipedia




                                                             47
Performance




                                      Κατασκευή δομής δεδομένων


                           18:00
Χρόνος (ω:λ)




                           12:00

                           06:00

                           00:00
                                            MySQL            mmap
                      Server               15:59:43
                      Client system        03:16:59         00:04:32
                      Client user          00:52:48         00:04:52




                                        Taft: Κρύα κρυφή μνήμη

                           4.500
Χρόνος / κόμβο (μs)




                           4.000
                           3.500
                           3.000
                           2.500
                           2.000
                           1.500
                           1.000
                             500
                               0
                                           MySQL             mmap
                      Waiting               348              3.886
                      Server                259
                      Client system          58                  19
                      Client user            16                   2




                                                                       48
Taft: Ζεστή κρυφή μνήμη

                             450
Χρόνος / κόμβο (μs)




                             400
                             350
                             300
                             250
                             200
                             150
                             100
                              50
                               0
                                            MySQL             mmap
                      Waiting                 23               0
                      Server                 305
                      Client system           59                  5
                      Client user             15                  3




                                      24h Clock: Κρύα κρυφή μνήμη

                           2.500
Χρόνος / κόμβο (μs)




                           2.000
                           1.500
                           1.000
                             500
                               0
                                            MySQL             mmap
                      Waiting                415              1.977
                      Server                 472
                      Client system          103                  10
                      Client user             26                   4




                                   24h Clock: Ζεστή κρυφή μνήμη

                             800
Χρόνος / κόμβο (μs)




                             700
                             600
                             500
                             400
                             300
                             200
                             100
                               0
                                            MySQL             mmap
                      Waiting                120               0
                      Server                 469
                      Client system          103                  3
                      Client user             27                  4




                                                                       49
Κλιμάκωση απόδοσης (κρύα μνήμη)
Χρόνος (ρ) / κόμβο (ms)




                          5          mmap
                                     MySQL
                          4

                          3

                          2

                          1

                          0
                              0        2000      4000        6000          8000
                                                                Χιλιάδες

                                            Αριθμός κόμβων




                                                                                  50
ACID

A
       51
C
I
D
    52
SQL


      53
A case…




            Application code

          vector<Customer> customers1;
            Customer c1(d1,cd1,s1,p1);
            customers1.push_back(c1);
                         …
               vector<Truck> trucks;
      Truck t1(cs1,dc1,pc1,rlp1, customers1);
               trucks.push_back(t1);
                        ….




ODBC
JDBC
                                                54
register
  L1 D cache
      L2 cache
           L3 cache
               DRAM
               HDD cache
                 HDD / SSD




                             55
534,681,000 εντολές ΚΜΕ




                                     100,000
Μέγιστη διεκπεραιωτικότητα (MB/s )




                                      10,000


                                       1,000


                                        100


                                         10


                                          1
                                                L1 D cache       L2 cache     DDR RAM   Hard disk
Χείριστη αναμονή (λογ. κλιμ.)




                                          L1 D cache         L2 cache       DDR RAM     Hard disk
                                            1.3 ns            9.7 ns         28.5 ns     25.6 ms




                                                                                                    56
Χείριστη αναμονή (λογ. κλιμ.)




                                L1 D cache   L2 cache   DDR RAM    Hard disk
                                  1.3 ns      9.7 ns     28.5 ns   25.6 ms




                                                                               57
// Write the given node to the specified file, returning its file offset.
// On return the file's offset is set to the first free byte.
static long
write_node(struct pnode *p, FILE *f)
{
  long my_offset = ftell(f);
  size_t ret;

  if (p->one) {
    struct pnode_disk_one pdo;
    size_t dlen = datalen(p->end);
    long len = sizeof(pdo) + dlen;

    fseek(f, len, SEEK_CUR);
    pdo.h.type = dt_one;
    pdo.h.is_terminal = p->is_terminal;
    pdo.h.has_zero = (p->zero != NULL);
    pdo.h.has_one = true;
    pdo.h.begin = p->begin;
    pdo.h.end = p->end;
    if (p->zero)
      write_node(p->zero, f);
    pdo.one = write_node(p->one, f);
    long saved_offset = ftell(f);
    fseek(f, my_offset, SEEK_SET);
    fwrite(&pdo, 1, sizeof(pdo), f);
    fwrite(p->data, 1, dlen, f);
    fseek(f, saved_offset, SEEK_SET);
    return my_offset;
  } else {




                                                                            58
#include <boost/interprocess/managed_mapped_file.hpp>
#include <boost/interprocess/offset_ptr.hpp>
#include <boost/interprocess/allocators/allocator.hpp>

#include <boost/unordered_set.hpp>

#include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/containers/slist.hpp>




       βήμα 1                        βήμα Ν

                w                       r/ο




                  φυσική μνήμη




    διεργασία 1                 διεργασία 2

                read                          r/w



                             αντίγραφο
                  φυσική μνήμη




                                                         59
www.spinellis.gr
twitter.com/CoolSWEng
      dds@aueb.gr




                        60
www.spinellis.gr/wpl




www.spinellis.gr/blog/20101030/smap.cpp




                                          61

Más contenido relacionado

La actualidad más candente

Unix Programming with Perl
Unix Programming with PerlUnix Programming with Perl
Unix Programming with Perl
Kazuho Oku
 
ソーシャルアプリ向けシステム監視運用の勘所
ソーシャルアプリ向けシステム監視運用の勘所ソーシャルアプリ向けシステム監視運用の勘所
ソーシャルアプリ向けシステム監視運用の勘所
Tatsuro Hisamori
 
Making Mongo realtime - oplog tailing in Meteor
Making Mongo realtime - oplog tailing in MeteorMaking Mongo realtime - oplog tailing in Meteor
Making Mongo realtime - oplog tailing in Meteor
yaliceme
 

La actualidad más candente (20)

File-I/O -- ist doch ganz einfach, oder?
File-I/O -- ist doch ganz einfach, oder?File-I/O -- ist doch ganz einfach, oder?
File-I/O -- ist doch ganz einfach, oder?
 
Hebrew Windows Cluster 2012 in a one slide diagram
Hebrew Windows Cluster 2012 in a one slide diagramHebrew Windows Cluster 2012 in a one slide diagram
Hebrew Windows Cluster 2012 in a one slide diagram
 
Unix v6 セミナー vol. 5
Unix v6 セミナー vol. 5Unix v6 セミナー vol. 5
Unix v6 セミナー vol. 5
 
JS Fest 2019 Node.js Antipatterns
JS Fest 2019 Node.js AntipatternsJS Fest 2019 Node.js Antipatterns
JS Fest 2019 Node.js Antipatterns
 
Abusing text/template for data transformation
Abusing text/template for data transformationAbusing text/template for data transformation
Abusing text/template for data transformation
 
Unix Programming with Perl
Unix Programming with PerlUnix Programming with Perl
Unix Programming with Perl
 
gemdiff
gemdiffgemdiff
gemdiff
 
OOP in Rust
OOP in RustOOP in Rust
OOP in Rust
 
Caching and tuning fun for high scalability @ 4Developers
Caching and tuning fun for high scalability @ 4DevelopersCaching and tuning fun for high scalability @ 4Developers
Caching and tuning fun for high scalability @ 4Developers
 
Caching and tuning fun for high scalability
Caching and tuning fun for high scalabilityCaching and tuning fun for high scalability
Caching and tuning fun for high scalability
 
Cuda cluster
Cuda clusterCuda cluster
Cuda cluster
 
Opa hackathon
Opa hackathonOpa hackathon
Opa hackathon
 
ソーシャルアプリ向けシステム監視運用の勘所
ソーシャルアプリ向けシステム監視運用の勘所ソーシャルアプリ向けシステム監視運用の勘所
ソーシャルアプリ向けシステム監視運用の勘所
 
Making Mongo realtime - oplog tailing in Meteor
Making Mongo realtime - oplog tailing in MeteorMaking Mongo realtime - oplog tailing in Meteor
Making Mongo realtime - oplog tailing in Meteor
 
Unix Programming with Perl 2
Unix Programming with Perl 2Unix Programming with Perl 2
Unix Programming with Perl 2
 
The Browser Environment - A Systems Programmer's Perspective [sinatra edition]
The Browser Environment - A Systems Programmer's Perspective [sinatra edition]The Browser Environment - A Systems Programmer's Perspective [sinatra edition]
The Browser Environment - A Systems Programmer's Perspective [sinatra edition]
 
Whispered secrets
Whispered secretsWhispered secrets
Whispered secrets
 
Whispered secrets
Whispered secretsWhispered secrets
Whispered secrets
 
Advfs 3 in-memory structures
Advfs 3 in-memory structuresAdvfs 3 in-memory structures
Advfs 3 in-memory structures
 
Implementing Software Machines in Go and C
Implementing Software Machines in Go and CImplementing Software Machines in Go and C
Implementing Software Machines in Go and C
 

Destacado (9)

Μεταπολεμική Λογοτεχνία
Μεταπολεμική ΛογοτεχνίαΜεταπολεμική Λογοτεχνία
Μεταπολεμική Λογοτεχνία
 
μεταπολεμικη ποιηση
μεταπολεμικη ποιησημεταπολεμικη ποιηση
μεταπολεμικη ποιηση
 
Πόλεμος και Ειρήνη
Πόλεμος και ΕιρήνηΠόλεμος και Ειρήνη
Πόλεμος και Ειρήνη
 
Αντιπολεμικα τραγουδια και αφισες
Αντιπολεμικα τραγουδια και αφισεςΑντιπολεμικα τραγουδια και αφισες
Αντιπολεμικα τραγουδια και αφισες
 
"ΑΝ ΘΕΛΕΙΣ ΝΑ ΛΕΓΕΣΑΙ ΑΝΘΡΩΠΟΣ" ΤΑΣΟΣ ΛΕΙΒΑΔΙΤΗΣ
"ΑΝ ΘΕΛΕΙΣ ΝΑ ΛΕΓΕΣΑΙ ΑΝΘΡΩΠΟΣ" ΤΑΣΟΣ ΛΕΙΒΑΔΙΤΗΣ"ΑΝ ΘΕΛΕΙΣ ΝΑ ΛΕΓΕΣΑΙ ΑΝΘΡΩΠΟΣ" ΤΑΣΟΣ ΛΕΙΒΑΔΙΤΗΣ
"ΑΝ ΘΕΛΕΙΣ ΝΑ ΛΕΓΕΣΑΙ ΑΝΘΡΩΠΟΣ" ΤΑΣΟΣ ΛΕΙΒΑΔΙΤΗΣ
 
αντι να περιμενεις να σου συμβει το θαυμα
αντι να περιμενεις να σου συμβει το θαυμααντι να περιμενεις να σου συμβει το θαυμα
αντι να περιμενεις να σου συμβει το θαυμα
 
Eiρηνη- πόλεμος 5η ενοτητα γλωσσα γ
Eiρηνη- πόλεμος 5η ενοτητα γλωσσα γEiρηνη- πόλεμος 5η ενοτητα γλωσσα γ
Eiρηνη- πόλεμος 5η ενοτητα γλωσσα γ
 
Αντιπολεμικά
ΑντιπολεμικάΑντιπολεμικά
Αντιπολεμικά
 
γιορτη καστανου στο χωριο λιβαδι
γιορτη καστανου στο χωριο λιβαδιγιορτη καστανου στο χωριο λιβαδι
γιορτη καστανου στο χωριο λιβαδι
 

Similar a Farewell to Disks: Efficient Processing of Obstinate Data

Assignment of SOS operating systemThe file lmemman.c has one incom.pdf
Assignment of SOS operating systemThe file lmemman.c has one incom.pdfAssignment of SOS operating systemThe file lmemman.c has one incom.pdf
Assignment of SOS operating systemThe file lmemman.c has one incom.pdf
sktambifortune
 
All I know about rsc.io/c2go
All I know about rsc.io/c2goAll I know about rsc.io/c2go
All I know about rsc.io/c2go
Moriyoshi Koizumi
 
The TCP/IP stack in the FreeBSD kernel COSCUP 2014
The TCP/IP stack in the FreeBSD kernel COSCUP 2014The TCP/IP stack in the FreeBSD kernel COSCUP 2014
The TCP/IP stack in the FreeBSD kernel COSCUP 2014
Kevin Lo
 
Unit 6
Unit 6Unit 6
Unit 6
siddr
 
Memory Optimization
Memory OptimizationMemory Optimization
Memory Optimization
guest3eed30
 
Memory Optimization
Memory OptimizationMemory Optimization
Memory Optimization
Wei Lin
 
Putting a Fork in Fork (Linux Process and Memory Management)
Putting a Fork in Fork (Linux Process and Memory Management)Putting a Fork in Fork (Linux Process and Memory Management)
Putting a Fork in Fork (Linux Process and Memory Management)
David Evans
 

Similar a Farewell to Disks: Efficient Processing of Obstinate Data (20)

Assignment of SOS operating systemThe file lmemman.c has one incom.pdf
Assignment of SOS operating systemThe file lmemman.c has one incom.pdfAssignment of SOS operating systemThe file lmemman.c has one incom.pdf
Assignment of SOS operating systemThe file lmemman.c has one incom.pdf
 
All I know about rsc.io/c2go
All I know about rsc.io/c2goAll I know about rsc.io/c2go
All I know about rsc.io/c2go
 
Sysprog17
Sysprog17Sysprog17
Sysprog17
 
Linux seccomp(2) vs OpenBSD pledge(2)
Linux seccomp(2) vs OpenBSD pledge(2)Linux seccomp(2) vs OpenBSD pledge(2)
Linux seccomp(2) vs OpenBSD pledge(2)
 
Computer networkppt4577
Computer networkppt4577Computer networkppt4577
Computer networkppt4577
 
Sysprog 16
Sysprog 16Sysprog 16
Sysprog 16
 
The TCP/IP Stack in the Linux Kernel
The TCP/IP Stack in the Linux KernelThe TCP/IP Stack in the Linux Kernel
The TCP/IP Stack in the Linux Kernel
 
The TCP/IP stack in the FreeBSD kernel COSCUP 2014
The TCP/IP stack in the FreeBSD kernel COSCUP 2014The TCP/IP stack in the FreeBSD kernel COSCUP 2014
The TCP/IP stack in the FreeBSD kernel COSCUP 2014
 
Ui disk & terminal drivers
Ui disk & terminal driversUi disk & terminal drivers
Ui disk & terminal drivers
 
Keynote 1 - Engineering Software Analytics Studies
Keynote 1 - Engineering Software Analytics StudiesKeynote 1 - Engineering Software Analytics Studies
Keynote 1 - Engineering Software Analytics Studies
 
Npc14
Npc14Npc14
Npc14
 
C Assignment Help
C Assignment HelpC Assignment Help
C Assignment Help
 
(Slightly) Smarter Smart Pointers
(Slightly) Smarter Smart Pointers(Slightly) Smarter Smart Pointers
(Slightly) Smarter Smart Pointers
 
Unix 5 en
Unix 5 enUnix 5 en
Unix 5 en
 
Unit 6
Unit 6Unit 6
Unit 6
 
Memory Optimization
Memory OptimizationMemory Optimization
Memory Optimization
 
Memory Optimization
Memory OptimizationMemory Optimization
Memory Optimization
 
Putting a Fork in Fork (Linux Process and Memory Management)
Putting a Fork in Fork (Linux Process and Memory Management)Putting a Fork in Fork (Linux Process and Memory Management)
Putting a Fork in Fork (Linux Process and Memory Management)
 
Linux.pdf
Linux.pdfLinux.pdf
Linux.pdf
 
ES6 is Nigh
ES6 is NighES6 is Nigh
ES6 is Nigh
 

Más de Distinguished Lecturer Series - Leon The Mathematician

Más de Distinguished Lecturer Series - Leon The Mathematician (20)

Machine Learning Tools and Particle Swarm Optimization for Content-Based Sear...
Machine Learning Tools and Particle Swarm Optimization for Content-Based Sear...Machine Learning Tools and Particle Swarm Optimization for Content-Based Sear...
Machine Learning Tools and Particle Swarm Optimization for Content-Based Sear...
 
Compressive Spectral Image Sensing, Processing, and Optimization
Compressive Spectral Image Sensing, Processing, and OptimizationCompressive Spectral Image Sensing, Processing, and Optimization
Compressive Spectral Image Sensing, Processing, and Optimization
 
Influence Propagation in Large Graphs - Theorems and Algorithms
Influence Propagation in Large Graphs - Theorems and AlgorithmsInfluence Propagation in Large Graphs - Theorems and Algorithms
Influence Propagation in Large Graphs - Theorems and Algorithms
 
Defying Nyquist in Analog to Digital Conversion
Defying Nyquist in Analog to Digital ConversionDefying Nyquist in Analog to Digital Conversion
Defying Nyquist in Analog to Digital Conversion
 
Opening Second Greek Signal Processing Jam
Opening Second Greek Signal Processing JamOpening Second Greek Signal Processing Jam
Opening Second Greek Signal Processing Jam
 
Sparse and Low Rank Representations in Music Signal Analysis
 Sparse and Low Rank Representations in Music Signal  Analysis Sparse and Low Rank Representations in Music Signal  Analysis
Sparse and Low Rank Representations in Music Signal Analysis
 
Nonlinear Communications: Achievable Rates, Estimation, and Decoding
Nonlinear Communications: Achievable Rates, Estimation, and DecodingNonlinear Communications: Achievable Rates, Estimation, and Decoding
Nonlinear Communications: Achievable Rates, Estimation, and Decoding
 
Sparsity Control for Robustness and Social Data Analysis
Sparsity Control for Robustness and Social Data AnalysisSparsity Control for Robustness and Social Data Analysis
Sparsity Control for Robustness and Social Data Analysis
 
Mixture Models for Image Analysis
Mixture Models for Image AnalysisMixture Models for Image Analysis
Mixture Models for Image Analysis
 
Semantic 3DTV Content Analysis and Description
Semantic 3DTV Content Analysis and DescriptionSemantic 3DTV Content Analysis and Description
Semantic 3DTV Content Analysis and Description
 
Sparse and Redundant Representations: Theory and Applications
Sparse and Redundant Representations: Theory and ApplicationsSparse and Redundant Representations: Theory and Applications
Sparse and Redundant Representations: Theory and Applications
 
Tribute to Nicolas Galatsanos
Tribute to Nicolas GalatsanosTribute to Nicolas Galatsanos
Tribute to Nicolas Galatsanos
 
Data Quality: Not Your Typical Database Problem
Data Quality: Not Your Typical Database ProblemData Quality: Not Your Typical Database Problem
Data Quality: Not Your Typical Database Problem
 
From Programs to Systems – Building a Smarter World
From Programs to Systems – Building a Smarter WorldFrom Programs to Systems – Building a Smarter World
From Programs to Systems – Building a Smarter World
 
Artificial Intelligence and Human Thinking
Artificial Intelligence and Human ThinkingArtificial Intelligence and Human Thinking
Artificial Intelligence and Human Thinking
 
Artificial Intelligence and Human Thinking
Artificial Intelligence and Human ThinkingArtificial Intelligence and Human Thinking
Artificial Intelligence and Human Thinking
 
State Space Exploration for NASA’s Safety Critical Systems
State Space Exploration for NASA’s Safety Critical SystemsState Space Exploration for NASA’s Safety Critical Systems
State Space Exploration for NASA’s Safety Critical Systems
 
Web Usage Miningand Using Ontology for Capturing Web Usage Semantic
Web Usage Miningand Using Ontology for Capturing Web Usage SemanticWeb Usage Miningand Using Ontology for Capturing Web Usage Semantic
Web Usage Miningand Using Ontology for Capturing Web Usage Semantic
 
Descriptive Granularity - Building Foundations of Data Mining
Descriptive Granularity - Building Foundations of Data MiningDescriptive Granularity - Building Foundations of Data Mining
Descriptive Granularity - Building Foundations of Data Mining
 
The Tower of Knowledge A Generic System Architecture
The Tower of Knowledge A Generic System ArchitectureThe Tower of Knowledge A Generic System Architecture
The Tower of Knowledge A Generic System Architecture
 

Último

An Overview of Mutual Funds Bcom Project.pdf
An Overview of Mutual Funds Bcom Project.pdfAn Overview of Mutual Funds Bcom Project.pdf
An Overview of Mutual Funds Bcom Project.pdf
SanaAli374401
 
Beyond the EU: DORA and NIS 2 Directive's Global Impact
Beyond the EU: DORA and NIS 2 Directive's Global ImpactBeyond the EU: DORA and NIS 2 Directive's Global Impact
Beyond the EU: DORA and NIS 2 Directive's Global Impact
PECB
 
1029 - Danh muc Sach Giao Khoa 10 . pdf
1029 -  Danh muc Sach Giao Khoa 10 . pdf1029 -  Danh muc Sach Giao Khoa 10 . pdf
1029 - Danh muc Sach Giao Khoa 10 . pdf
QucHHunhnh
 
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in DelhiRussian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
kauryashika82
 
Gardella_PRCampaignConclusion Pitch Letter
Gardella_PRCampaignConclusion Pitch LetterGardella_PRCampaignConclusion Pitch Letter
Gardella_PRCampaignConclusion Pitch Letter
MateoGardella
 

Último (20)

An Overview of Mutual Funds Bcom Project.pdf
An Overview of Mutual Funds Bcom Project.pdfAn Overview of Mutual Funds Bcom Project.pdf
An Overview of Mutual Funds Bcom Project.pdf
 
Paris 2024 Olympic Geographies - an activity
Paris 2024 Olympic Geographies - an activityParis 2024 Olympic Geographies - an activity
Paris 2024 Olympic Geographies - an activity
 
Unit-V; Pricing (Pharma Marketing Management).pptx
Unit-V; Pricing (Pharma Marketing Management).pptxUnit-V; Pricing (Pharma Marketing Management).pptx
Unit-V; Pricing (Pharma Marketing Management).pptx
 
Introduction to Nonprofit Accounting: The Basics
Introduction to Nonprofit Accounting: The BasicsIntroduction to Nonprofit Accounting: The Basics
Introduction to Nonprofit Accounting: The Basics
 
PROCESS RECORDING FORMAT.docx
PROCESS      RECORDING        FORMAT.docxPROCESS      RECORDING        FORMAT.docx
PROCESS RECORDING FORMAT.docx
 
INDIA QUIZ 2024 RLAC DELHI UNIVERSITY.pptx
INDIA QUIZ 2024 RLAC DELHI UNIVERSITY.pptxINDIA QUIZ 2024 RLAC DELHI UNIVERSITY.pptx
INDIA QUIZ 2024 RLAC DELHI UNIVERSITY.pptx
 
This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.
 
Mehran University Newsletter Vol-X, Issue-I, 2024
Mehran University Newsletter Vol-X, Issue-I, 2024Mehran University Newsletter Vol-X, Issue-I, 2024
Mehran University Newsletter Vol-X, Issue-I, 2024
 
Beyond the EU: DORA and NIS 2 Directive's Global Impact
Beyond the EU: DORA and NIS 2 Directive's Global ImpactBeyond the EU: DORA and NIS 2 Directive's Global Impact
Beyond the EU: DORA and NIS 2 Directive's Global Impact
 
Accessible design: Minimum effort, maximum impact
Accessible design: Minimum effort, maximum impactAccessible design: Minimum effort, maximum impact
Accessible design: Minimum effort, maximum impact
 
Key note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfKey note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdf
 
1029 - Danh muc Sach Giao Khoa 10 . pdf
1029 -  Danh muc Sach Giao Khoa 10 . pdf1029 -  Danh muc Sach Giao Khoa 10 . pdf
1029 - Danh muc Sach Giao Khoa 10 . pdf
 
Unit-IV- Pharma. Marketing Channels.pptx
Unit-IV- Pharma. Marketing Channels.pptxUnit-IV- Pharma. Marketing Channels.pptx
Unit-IV- Pharma. Marketing Channels.pptx
 
Grant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy ConsultingGrant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy Consulting
 
Basic Civil Engineering first year Notes- Chapter 4 Building.pptx
Basic Civil Engineering first year Notes- Chapter 4 Building.pptxBasic Civil Engineering first year Notes- Chapter 4 Building.pptx
Basic Civil Engineering first year Notes- Chapter 4 Building.pptx
 
Sports & Fitness Value Added Course FY..
Sports & Fitness Value Added Course FY..Sports & Fitness Value Added Course FY..
Sports & Fitness Value Added Course FY..
 
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in DelhiRussian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
 
Mattingly "AI & Prompt Design: Structured Data, Assistants, & RAG"
Mattingly "AI & Prompt Design: Structured Data, Assistants, & RAG"Mattingly "AI & Prompt Design: Structured Data, Assistants, & RAG"
Mattingly "AI & Prompt Design: Structured Data, Assistants, & RAG"
 
Gardella_PRCampaignConclusion Pitch Letter
Gardella_PRCampaignConclusion Pitch LetterGardella_PRCampaignConclusion Pitch Letter
Gardella_PRCampaignConclusion Pitch Letter
 
fourth grading exam for kindergarten in writing
fourth grading exam for kindergarten in writingfourth grading exam for kindergarten in writing
fourth grading exam for kindergarten in writing
 

Farewell to Disks: Efficient Processing of Obstinate Data

  • 1. Αποχαιρετισμός στους Δίσκους: Αποδοτική Επεξεργασία Περίπλοκων Δεδομένων Διομήδης Σπινέλλης Καθηγητής Τμήμα Διοικητικής Επιστήμης και Τεχνολογίας Οικονομικό Πανεπιστήμιο Αθηνών http://www.dmst.aueb.gr/dds 1 1
  • 2. 2
  • 3. 3
  • 4. 4
  • 5. 5
  • 6. 1 6
  • 7. 7
  • 8. 8
  • 9. 9
  • 10. 2 10
  • 11. 11
  • 12. 3 12
  • 13. 13
  • 14. Worst case latency (Log scale) L1 D cache L2 cache DDR RAM Hard disk 1.3 ns 9.7 ns 28.5 ns 25.6 ms 14
  • 15. 15
  • 16. 16
  • 17. 17
  • 18. 1 18
  • 19. Time (Log scale) Function call System call Local IPC Remote IPC 1.3ns 1.9μs 4.3μs 1.2ms select Locations.cc1, Divisions.name, avg(CO2), count(*), Locations.lat, Locations.long, POPDENSITY.DENSITY from Papers inner join Locations on Papers.confLocId = Locations.id inner join Divisions on Locations.cc1 = Divisions.country inner join POPDENSITY on Divisions.name = upper(POPDENSITY.name) where Divisions.code = '00' and CO2 notnull group by Locations.cc1 having count(*) > 20 order by avg(CO2) desc; 19
  • 20. /* Get the data */ if (mcSet.dataLen) { data = xmalloc(mcSet.dataLen); if (lseek(fd, mcSet.data.off, SEEK_SET) == -1) CORRUPT(); if (read(fd, data, mcSet.dataLen) != mcSet.dataLen) CORRUPT(); if (lseek(fd, mcSet.u.firstMsg, SEEK_SET) == -1) CORRUPT(); for (i = 0; i < mcSet.numMsgs; ++i) { if (read(fd, &mcMsg, sizeof(mcMsg)) != sizeof(mcMsg)) CORRUPT(); if (mcMsg.invalid) { --i; continue; } msg = xmalloc(sizeof(msgT)); memset(msg, '0', sizeof(*msg)); /* […] */ msg->msgId = mcMsg.msgId; msg->str = xstrdup((char *) (data + mcMsg.msg.off)); } free(data); } 2 20
  • 21. MMAP(2) FreeBSD System Calls Manual MMAP(2) NAME mmap -- allocate memory, or map files or devices into memory SYNOPSIS #include <sys/mman.h> void * mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset); DESCRIPTION The mmap() system call causes the pages starting at addr and continuing for at most len bytes to be mapped from the object described by fd, starting at byte offset offset. 21
  • 22. [dds@istlab /usr/src/sys/vm]$ ls default_pager.c uma_int.h vm_page.c device_pager.c vm.h vm_page.h memguard.c vm_contig.c vm_pageout.c memguard.h vm_extern.h vm_pageout.h phys_pager.c vm_fault.c vm_pager.c pmap.h vm_glue.c vm_pager.h redzone.c vm_init.c vm_param.h redzone.h vm_kern.c vm_phys.c sg_pager.c vm_kern.h vm_phys.h swap_pager.c vm_map.c vm_reserv.c swap_pager.h vm_map.h vm_reserv.h uma.h vm_meter.c vm_unix.c uma_core.c vm_mmap.c vm_zeroidle.c uma_dbg.c vm_object.c vnode_pager.c uma_dbg.h vm_object.h vnode_pager.h 22
  • 23. 3 23
  • 24. $ ls -lh sparse -rw-r--r-- 1 dds dds 500G Mar 19 20:32 sparse $ du -h sparse 28K sparse 4 διεργασία 1 διεργασία 2 r/o r/o φυσική μνήμη 24
  • 25. διεργασία 1 διεργασία 2 r/w r/w φυσική μνήμη διεργασία 1 διεργασία 2 read read φυσική μνήμη διεργασία 1 διεργασία 2 read r/w αντίγραφο φυσική μνήμη 25
  • 26. 5 C++ 26
  • 27. e.g. 1 CC-BY 2.5 Claudio Rocchini 27
  • 28. 01110010011 0111101101101011 0000101101110011 00101 // romane 01110010011 0111101101101011 0000101101110011 1010101110011 // romanus 01110010011 0111101101101011 10101011011000111010101110011 // romulus 01110010011 10101011000100110 0101011 0111001110011 // rubens 01110010011 10101011000100110 0101011 10010 // ruber 01110010011 10101011000100110 100101100011011 0111101101110 // rubicon 01110010011 10101011000100110 100101100011011 1010101101110011001000111 010101110011 // rubicundus Δομή Δομή δίσκου Ιστοσελίδα μνήμης με νέους δεσμούς Κατασκευή Κατασκευή δένδρου δομής wikipedialize ριζών δίσκου Κατάλογος Αρχική άρθρων ιστοσελίδα for (;;) { i = bitpos; // Loop until the end of the current node or the end of the word while (i < p->end && i < len * 8) { // Covering whole byte? if (i % 8 == 0 && i + 8 <= p->end && (i + 8) / 8 <= len && data[i / 8] == p->data[i / 8]) { i += 8; continue; } // Split point if (getbit(data, i) != getbit(p->data, i)) { // Node with the new data struct pnode *n = new_node(data + i / 8, i % 8, (len - i / 8) * 8, NULL, NULL, true); // Tail of the current node struct pnode *t = new_node(p->data + i / 8, i % 8, p->end - (i & ~7), p->zero, p->one, p->is_terminal); // Head of current node if (getbit(data, i)) *p2 = new_node(p->data, bitpos, i, t, n, false); else *p2 = new_node(p->data, bitpos, i, n, t, false); free(p); return; } i++; } // while 28
  • 29. // Write the given node to the specified file, returning its file offset. // On return the file's offset is set to the first free byte. static long write_node(struct pnode *p, FILE *f) { long my_offset = ftell(f); size_t ret; if (p->one) { struct pnode_disk_one pdo; size_t dlen = datalen(p->end); long len = sizeof(pdo) + dlen; fseek(f, len, SEEK_CUR); pdo.h.type = dt_one; pdo.h.is_terminal = p->is_terminal; pdo.h.has_zero = (p->zero != NULL); pdo.h.has_one = true; pdo.h.begin = p->begin; pdo.h.end = p->end; if (p->zero) write_node(p->zero, f); pdo.one = write_node(p->one, f); long saved_offset = ftell(f); fseek(f, my_offset, SEEK_SET); fwrite(&pdo, 1, sizeof(pdo), f); fwrite(p->data, 1, dlen, f); fseek(f, saved_offset, SEEK_SET); return my_offset; } else { $ zcat enwiki-latest-all-titles-in-ns0.gz | wc -c 106,237,053 $ wc -c enwiki.pt 144,657,286 enwiki.pt 29
  • 31. $ wc crimeandpunishment.txt 0 203,273 1,462,661 crimeandpunishment.txt $ time ./wpltest en en_US.UTF-8 ISO-8859-1 data/enwiki.pt <crimeandpunishment.txt >/dev/null $ time ./wpltest en en_US.UTF-8 ISO-8859-1 data/enwiki.pt <crimeandpunishment.txt >/dev/null Checked 406,225 prefixes real 0m5.859s # Cold cache real 0m1.876s # Warm cache user 0m1.780s sys 0m0.090s 31
  • 32. // Prevent memory alignment problems memcpy(&end, &(p.h->end), sizeof(end)); /* while (i < end && i < len * 8) { if (i % 8 == 0 && i + 8 <= end && data[i / 8] == pdata[i / 8]) { */ * to understand this * You are not expected i += 8; prefix += 8; continue; } // Split point if (getbit(data, i) != getbit(pdata, i)) return best / 8; i++; prefix++; } if (i == end && p.h->is_terminal) best = prefix; if (i == len * 8) return best / 8; // Move to next node bitpos = end % 8; int covered = end / 8; if (getbit(data, end)) { if (!p.h->has_one) return best / 8; switch (p.h->type) { case dt_both: p.h = (struct pnode_disk_head *)(base + p.b->one); break; case dt_one: p.h = (struct pnode_disk_head *)(base + p.o->one); break; case dt_short: default: assert(0); } } else { if (!p.h->has_zero) return best / 8; switch (p.h->type) { case dt_both: p.h = (struct pnode_disk_head *)(base + p.b->zero); break; case dt_one: // Advance to the end of this node p.h = (struct pnode_disk_head *)((char *)p.h + sizeof(struct pnode_disk_one) + datalen(end)); break; case dt_short: // Advance to the end of this node p.h = (struct pnode_disk_head *)((char *)p.h + sizeof(struct pnode_disk_short) + datalen(end)); break; default: assert(0); } } 32
  • 33. e.g. 2 The problem with wikipedia 33
  • 34. Κατασκευή γράφου Δομή Λίστα ακμών δεδομένων γράφου Κορυφές Διαδρομή αρχής, τέλους BFS Δομή δεδομένων γράφου Κορυφές Διαδρομή αρχής, τέλους Κατασκευή BFS γράφου Δομή Λίστα ακμών δεδομένων γράφου 34
  • 35. Κατασκευή γράφου Δομή Λίστα ακμών δεδομένων γράφου // Loop through all lines, // adding them to the graph while (std::getline(in, line)) { int split = line.find('001'); if (split == std::string::npos) { std::cerr << "No separator: " << line << std::endl; continue; } n.setName(line.substr(0, split)); NodesIter from(entries->insert(n).first); n.setName(line.substr(split + 1)); NodesIter to(entries->insert(n).first); (const_cast<Node &>(*from)).addEdge( const_cast<Node *>(&*to)); } Κορυφές Διαδρομή αρχής, τέλους Κατασκευή BFS γράφου Δομή Λίστα ακμών δεδομένων γράφου 35
  • 36. Διαδρομή BFS Δομή δεδομένων γράφου Tacoma Narrows Bridge p= Suspension bridge Washington p= p= Geneva Montana p= p= Ουρά []= William Howard Taft p= Tacoma Narrows Bridge p= Suspension bridge Washington p= p= Geneva Montana p= p= Ουρά []=Tacoma Narrow Bridge William Howard Taft p= 36
  • 37. Tacoma Narrows Bridge p= Suspension bridge Washington p= p= Geneva Montana p= p= Ουρά []=Tacoma Narrow Bridge William Howard Taft p= Tacoma Narrows Bridge p= Suspension bridge Washington p= p= Geneva Montana p= p= Ουρά []= William Howard Taft p= Tacoma Narrows Bridge p= Suspension bridge Washington p=Tacoma Narrows Bridge p= Geneva Montana p= p= Ουρά []= Suspension bridge William Howard Taft p= 37
  • 38. Tacoma Narrows Bridge p= Suspension bridge Washington p=Tacoma Narrows Bridge p=Tacoma Narrows Bridge Geneva Montana p= p= Ουρά []= Suspension bridge William Howard Taft Washington p= Tacoma Narrows Bridge p= Suspension bridge Washington p=Tacoma Narrows Bridge p=Tacoma Narrows Bridge Geneva Montana p= p= Ουρά []= Suspension bridge William Howard Taft Washington p= Tacoma Narrows Bridge p= Suspension bridge Washington p=Tacoma Narrows Bridge p=Tacoma Narrows Bridge Geneva Montana p=Suspension bridge p= Ουρά []= Washington William Howard Taft Geneva p= 38
  • 39. Tacoma Narrows Bridge p= Suspension bridge Washington p=Tacoma Narrows Bridge p=Tacoma Narrows Bridge Geneva Montana p=Suspension bridge p=Washington Ουρά []= Geneva William Howard Taft Montana p= Tacoma Narrows Bridge p= Suspension bridge Washington p=Tacoma Narrows Bridge p=Tacoma Narrows Bridge Geneva Montana p=Suspension bridge p=Washington Ουρά []=Montana William Howard Taft p= Tacoma Narrows Bridge p= Suspension bridge Washington p=Tacoma Narrows Bridge p=Tacoma Narrows Bridge Geneva Montana p=Suspension bridge p=Washington Ουρά []=Montana William Howard Taft p=Geneva 39
  • 40. static bool breadthFirstSearchFor(NodePtr from, NodePtr to, size_t n) { std::queue<NodePtr> q; from->setColor(Node::Gray); q.push(from); while (!q.empty()) { NodePtr u = q.front(); q.pop(); const Edges edges = u->getEdges(); for (Edges::const_iterator j = edges.begin(); j != edges.end(); j++) if ((*j)->getColor() == Node::White) { (*j)->setColor(Node::Gray); (*j)->setPredecessor(u); if (*j == to) return true; // Found q.push(*j); } u->setColor(Node::Black); } return false; // Not found } Κορυφές Διαδρομή αρχής, τέλους Κατασκευή BFS γράφου Δομή Λίστα ακμών δεδομένων γράφου Δομή δεδομένων γράφου 40
  • 41. 41
  • 42. Δομή δεδομένων γράφου #include <string> #include <iostream> #include <queue> #include <list> #include <functional> #include <boost/interprocess/managed_mapped_file.hpp> #include <boost/interprocess/offset_ptr.hpp> #include <boost/interprocess/allocators/allocator.hpp> #include <boost/unordered_set.hpp> #include <boost/interprocess/containers/string.hpp> #include <boost/interprocess/containers/slist.hpp> #include <boost/filesystem.hpp> #include <boost/filesystem/operations.hpp> 42
  • 43. #include <string> #include <iostream> #include <queue> #include <list> #include <functional> #include <boost/interprocess/managed_mapped_file.hpp> #include <boost/interprocess/offset_ptr.hpp> #include <boost/interprocess/allocators/allocator.hpp> #include <boost/unordered_set.hpp> #include <boost/interprocess/containers/string.hpp> #include <boost/interprocess/containers/slist.hpp> #include <boost/filesystem.hpp> #include <boost/filesystem/operations.hpp> typedef managed_mapped_file::segment_manager SegmentManager; typedef allocator<char, SegmentManager> CharAllocator; typedef basic_string<char, std::char_traits<char>, CharAllocator> CharString; typedef allocator<Node, SegmentManager> NodeAllocator; typedef boost::unordered_set<Node, boost::hash<Node>, NodeEqual, NodeAllocator> Nodes; typedef offset_ptr<Node> NodePtr; typedef allocator<NodePtr, SegmentManager> NodePtrAllocator; typedef slist<NodePtr, NodePtrAllocator> Edges; typedef allocator<void, SegmentManager> VoidAllocator; typedef allocator<Edges, SegmentManager> EdgesAllocator; // A graph node, suitable for performing a breadh-first search class Node { public: typedef enum {White, Gray, Black} Color; private: CharString name; // Node name Color color; // Color used during BFS NodePtr predecessor; // BFS predecessor Edges edges; // Node's edges public: // Since VoidAllocator is convertible to any other // allocator<T>, we can simplify the initialization // taking just one allocator for all inner containers. Node(const std::string &n, const VoidAllocator &voidAlloc) : name(n.begin(), n.end(), voidAlloc), color(White), predecessor(NULL), edges(voidAlloc) {} void addEdge(NodePtr p) { edges.push_front(p); } }; 43
  • 44. /* * Read ^A-separated nodes from the inputFile, storing the graph * structure in the specified backingFile. */ static void readData(const char *backingFile, const char *inputFile) { std::ifstream in(inputFile, std::ios::binary); if (in.fail()) { perror(inputFile); exit(1); } boost::filesystem::remove_all(backingFile); managed_mapped_file segment(create_only, backingFile, FileSize); // An allocator convertible to any allocator<T, SegmentManager> type VoidAllocator allocInst (segment.get_segment_manager()); // Construct the memory map and fill it Nodes *entries = segment.construct<Nodes>("entries")(Elements, boost::hash<Node>(), NodeEqual(), allocInst); std::string line; Node n(std::string(), allocInst); // To save construction costs /* * Search and report the shortest graph path from "from" to "to" * The graph is stored in backingFile. */ static void searchData(const char *backingFile, const std::string &from, const std::string &to) { managed_mapped_file segment(open_copy_on_write, backingFile); // An allocator convertible to any allocator<T, SegmentManager> VoidAllocator allocInst(segment.get_segment_manager()); // Obtain the previously saved entries Nodes *entries = segment.find<Nodes>("entries").first; NodePtr toPtr; bool found = breadthFirstSearchFor( findNode(entries, Node(from, allocInst)), toPtr = findNode(entries, Node(to, allocInst)), entries->size()); 44
  • 45. Κορυφές Διαδρομή αρχής, τέλους Κατασκευή BFS γράφου Δομή Λίστα ακμών δεδομένων γράφου 45
  • 46. $ ./smap -r graph.bin graph.txt $ ./smap -s graph.bin 'Tacoma Narrows Bridge' 'William howard taft' 0% 10 20 30 40 50 60 70 80 90 100% |----|----|----|----|----|----|----|----|----|----| * Tacoma Narrows Bridge Washington Montana William howard taft $ ./smap -s graph.bin 'Tacoma Narrows Bridge' '24-hour analog dial' 0% 10 20 30 40 50 60 70 80 90 100% |----|----|----|----|----|----|----|----|----|----| ** Tacoma Narrows Bridge Suspension bridge Geneva Watch 24-hour analog dial 46
  • 47. $ ./smap -s graph.bin 'Tacoma Narrows Bridge' 'Wet t-shirt contest' 0% 10 20 30 40 50 60 70 80 90 100% |----|----|----|----|----|----|----|----|----|----| * Tacoma Narrows Bridge Washington Starbucks Toplessness Wet t-shirt contest The problem with wikipedia 47
  • 48. Performance Κατασκευή δομής δεδομένων 18:00 Χρόνος (ω:λ) 12:00 06:00 00:00 MySQL mmap Server 15:59:43 Client system 03:16:59 00:04:32 Client user 00:52:48 00:04:52 Taft: Κρύα κρυφή μνήμη 4.500 Χρόνος / κόμβο (μs) 4.000 3.500 3.000 2.500 2.000 1.500 1.000 500 0 MySQL mmap Waiting 348 3.886 Server 259 Client system 58 19 Client user 16 2 48
  • 49. Taft: Ζεστή κρυφή μνήμη 450 Χρόνος / κόμβο (μs) 400 350 300 250 200 150 100 50 0 MySQL mmap Waiting 23 0 Server 305 Client system 59 5 Client user 15 3 24h Clock: Κρύα κρυφή μνήμη 2.500 Χρόνος / κόμβο (μs) 2.000 1.500 1.000 500 0 MySQL mmap Waiting 415 1.977 Server 472 Client system 103 10 Client user 26 4 24h Clock: Ζεστή κρυφή μνήμη 800 Χρόνος / κόμβο (μs) 700 600 500 400 300 200 100 0 MySQL mmap Waiting 120 0 Server 469 Client system 103 3 Client user 27 4 49
  • 50. Κλιμάκωση απόδοσης (κρύα μνήμη) Χρόνος (ρ) / κόμβο (ms) 5 mmap MySQL 4 3 2 1 0 0 2000 4000 6000 8000 Χιλιάδες Αριθμός κόμβων 50
  • 51. ACID A 51
  • 52. C I D 52
  • 53. SQL 53
  • 54. A case… Application code vector<Customer> customers1; Customer c1(d1,cd1,s1,p1); customers1.push_back(c1); … vector<Truck> trucks; Truck t1(cs1,dc1,pc1,rlp1, customers1); trucks.push_back(t1); …. ODBC JDBC 54
  • 55. register L1 D cache L2 cache L3 cache DRAM HDD cache HDD / SSD 55
  • 56. 534,681,000 εντολές ΚΜΕ 100,000 Μέγιστη διεκπεραιωτικότητα (MB/s ) 10,000 1,000 100 10 1 L1 D cache L2 cache DDR RAM Hard disk Χείριστη αναμονή (λογ. κλιμ.) L1 D cache L2 cache DDR RAM Hard disk 1.3 ns 9.7 ns 28.5 ns 25.6 ms 56
  • 57. Χείριστη αναμονή (λογ. κλιμ.) L1 D cache L2 cache DDR RAM Hard disk 1.3 ns 9.7 ns 28.5 ns 25.6 ms 57
  • 58. // Write the given node to the specified file, returning its file offset. // On return the file's offset is set to the first free byte. static long write_node(struct pnode *p, FILE *f) { long my_offset = ftell(f); size_t ret; if (p->one) { struct pnode_disk_one pdo; size_t dlen = datalen(p->end); long len = sizeof(pdo) + dlen; fseek(f, len, SEEK_CUR); pdo.h.type = dt_one; pdo.h.is_terminal = p->is_terminal; pdo.h.has_zero = (p->zero != NULL); pdo.h.has_one = true; pdo.h.begin = p->begin; pdo.h.end = p->end; if (p->zero) write_node(p->zero, f); pdo.one = write_node(p->one, f); long saved_offset = ftell(f); fseek(f, my_offset, SEEK_SET); fwrite(&pdo, 1, sizeof(pdo), f); fwrite(p->data, 1, dlen, f); fseek(f, saved_offset, SEEK_SET); return my_offset; } else { 58
  • 59. #include <boost/interprocess/managed_mapped_file.hpp> #include <boost/interprocess/offset_ptr.hpp> #include <boost/interprocess/allocators/allocator.hpp> #include <boost/unordered_set.hpp> #include <boost/interprocess/containers/string.hpp> #include <boost/interprocess/containers/slist.hpp> βήμα 1 βήμα Ν w r/ο φυσική μνήμη διεργασία 1 διεργασία 2 read r/w αντίγραφο φυσική μνήμη 59