Se ha denunciado esta presentación.
Utilizamos tu perfil de LinkedIn y tus datos de actividad para personalizar los anuncios y mostrarte publicidad más relevante. Puedes cambiar tus preferencias de publicidad en cualquier momento.

SMP Implementation for OpenBSD/sgi [Japanese Edition]

CBUG Apr.24 2010 at Tokyo

  • Sé el primero en comentar

SMP Implementation for OpenBSD/sgi [Japanese Edition]

  1. 1. SMP Implementation for OpenBSD/sgi [ ] Takuya ASADA<syuu@openbsd.org>
  2. 2. • AsiaBSDCon 2010 • http://bit.ly/octane2
  3. 3. • BSD EmbeddedOS 64bit SMP • MIPS *BSD MIPS SMP • • SMP …
  4. 4. MIPS/SMP (1) • Broadcom SiByte BCM1250 2core 1GHz MIPS64, DDR DRAM, GbE, PCI, HT • Cisco 3845 •
  5. 5. MIPS/SMP (1) • Broadcom SiByte BCM1250 2core 1GHz MIPS64, DDR DRAM, GbE, PCI, HT • Cisco 3845 • 128 orz
  6. 6. MIPS/SMP (2) • SGI ebay • • SGI Octane 2core OpenBSD
  7. 7. MIPS/SMP (2) • SGI ebay • • SGI Octane 2core OpenBSD $33!!!!
  8. 8. Octane2 Processors MIPS R12000 400MHz x2 Memory 1GB SDRAM Graphics 3D Graphics Card Sound Integrated Digital Audio Storage 35GB SCSI HDD Ethernet 100BASE-T
  9. 9. OpenBSD developer • 2009/4 • OpenBSD/sgi • 2009/9 developer • 2009/11
  10. 10. • OpenBSD • -current •
  11. 11. Demo
  12. 12. '(&!!" 4% overhead '(%!!" '($!!" '(#!!" 46% speed up '(!!!" ,-.-/01)02+!" &!!" ,-.-/01)02+!342" %!!" $!!" #!!" !" (sec) ")*'" ")*#" ")*+"
  13. 13. OpenBSD/amd64 &!!" %#!" 2% overhead %!!" 44% speed up $#!" )*+*,-." )*+*,-./01" $!!" #!" !" "'($" "'(%" "'(&" (sec)
  14. 14. • cpu_info • • • per-processor cpu_info • IPI: Inter-Processor Interrupt • • Per-processor ASID • • TLB shootdown • Lazy FPU handling • • Per-processor clock
  15. 15. • per-processor • • …
  16. 16. per-processor • sgi port • per-processor cpu_info • pmap per-processor * per-process
  17. 17. clock.c // defined as global variables u_int32_t cpu_counter_last; u_int32_t cpu_counter_interval; u_int32_t pendingticks; uint32_t clock_int5(uint32_t mask, struct trap_frame *tf) ... clkdiff = cp0_get_count() - cpu_counter_last; while (clkdiff >= cpu_counter_interval) { cpu_counter_last += cpu_counter_interval; clkdiff = cp0_get_count() - cpu_counter_last; pendingticks++;
  18. 18. clock.c uint32_t clock_int5(uint32_t mask, struct trap_frame *tf) ... struct cpu_info *ci = curcpu(); clkdiff = cp0_get_count() - ci->ci_cpu_counter_last; while (clkdiff >= ci->ci_cpu_counter_interval) { ci->ci_cpu_counter_last += ci->ci_cpu_counter_interval; clkdiff = cp0_get_count() - ci->ci_cpu_counter_last; ci->ci_pendingticks++;
  19. 19. pmap • MIPS TLB ASID 8bit ID TLB • 8bit PID ASID ASID Process • SMP ASID • ASID per-processor * per-process
  20. 20. pmap uint pmap_alloc_tlbpid(struct proc *p) ... tlbpid_cnt = id + 1; pmap->pm_tlbpid = id; uint pmap_alloc_tlbpid(struct proc *p) ... tlbpid_cnt[cpuid] = id + 1; pmap->pm_tlbpid[cpuid] = id;
  21. 21. • • : mutex, mp_lock • : CAS, 64bit add, etc.. • • • • trap()
  22. 22. uint32_t clock_int5(uint32_t mask, struct trap_frame *tf) ... if (tf->ipl < IPL_CLOCK) { #ifdef MULTIPROCESSOR __mp_lock(&kernel_lock); #endif while (ci->ci_pendingticks) { clk_count.ec_count++; hardclock(tf); ci->ci_pendingticks--; } #ifdef MULTIPROCESSOR __mp_unlock(&kernel_lock); #endif
  23. 23. uint32_t clock_int5(uint32_t mask, struct trap_frame *tf) ... if (tf->ipl < IPL_CLOCK) { #ifdef MULTIPROCESSOR __mp_lock(&kernel_lock); #endif while (ci->ci_pendingticks) { clk_count.ec_count++; hardclock(tf); ci->ci_pendingticks--; } #ifdef MULTIPROCESSOR __mp_unlock(&kernel_lock); #endif Actually, it causes a bug... described later
  24. 24. • • TLB •
  25. 25. • • • • →
  26. 26. TLB • MIPS TLB • TLB shootdown TLB invalidate/update • IPI(Inter-Processor Interrupt)
  27. 27. TLB shootdown CPU A CPU B Fault TLB shootdown IPI IPI TLB invalidate TLB invalidate CPU B CPU A
  28. 28. • MIPS R10000/R12000 • Octane •
  29. 29. SMP • • • • TLB shootdown
  30. 30. • • • •
  31. 31. JTAG ICE • • • Embedded OS SMP • Octane or
  32. 32. ddb • OpenBSD • • SMP ddb ddb • …
  33. 33. printf()! • • printf(message) • • • printf() •
  34. 34. printf • Octane • • • cpu0 com0 cpu1 com1 • ;-)
  35. 35. print • • printf • •
  36. 36. • • •
  37. 37. void mtx_enter(struct mutex *mtx) ...         for (;;) {                 if (mtx->mtx_wantipl != IPL_NONE)                         s = splraise(mtx->mtx_wantipl);                 if (try_lock(mtx)) {                         if (mtx->mtx_wantipl != IPL_NONE)                                 mtx->mtx_oldipl = s;                         mtx->mtx_owner = curcpu();                         return;                 }                 if (mtx->mtx_wantipl != IPL_NONE)                         splx(s);                 if (++i > MTX_TIMEOUT)                         panic("mtx deadlockedn”);         }
  38. 38. void mtx_enter(struct mutex *mtx) ...         for (;;) {                 if (mtx->mtx_wantipl != IPL_NONE)                         s = splraise(mtx->mtx_wantipl);                 if (try_lock(mtx)) {                         if (mtx->mtx_wantipl != IPL_NONE)                                 mtx->mtx_oldipl = s;                         mtx->mtx_owner = curcpu();                         return;                 }                 if (mtx->mtx_wantipl != IPL_NONE)                         splx(s);                 if (++i > MTX_TIMEOUT)                         panic("mtx deadlockedn”);         }
  39. 39. CPU A CPU B Lock Lock A B
  40. 40. CPU A CPU B Lock Lock A B
  41. 41. CPU A CPU B Lock Lock A B
  42. 42. void mtx_enter(struct mutex *mtx) ...         for (;;) {                 if (mtx->mtx_wantipl != IPL_NONE)                         s = splraise(mtx->mtx_wantipl);                 if (try_lock(mtx)) {                         if (mtx->mtx_wantipl != IPL_NONE)                                 mtx->mtx_oldipl = s;                         mtx->mtx_owner = curcpu();                         mtx->mtx_ra =                          __builtin_return_address(0);                         return;                 }                 if (mtx->mtx_wantipl != IPL_NONE)                         splx(s);                 if (++i > MTX_TIMEOUT)                         panic("mtx deadlocked ra:%pn",                                 mtx->mtx_ra);         }
  43. 43. IPI CPU A CPU B Interrupt Fault Disable interrupt Acquire lock Lock Wait until released TLB shootdown Blocked IPI Wait ACK
  44. 44. IPI CPU A CPU B Interrupt Fault Disable interrupt Acquire lock Lock Wait until released TLB shootdown IPI Wait ACK
  45. 45. IPI CPU A CPU B Interrupt Fault Disable interrupt Acquire lock Re-enable interrupt Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK
  46. 46. IPI CPU A CPU B Interrupt Fault Disable interrupt Acquire lock Re-enable interrupt Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK ACK for rendezvous
  47. 47. IPI CPU A CPU B Interrupt Fault IPI Disable interrupt Acquire lock Re-enable interrupt Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK ACK for rendezvous
  48. 48. uint32_t clock_int5(uint32_t mask, struct trap_frame *tf) ... if (tf->ipl < IPL_CLOCK) { #ifdef MULTIPROCESSOR u_int32_t sr; sr = getsr(); ENABLEIPI(); __mp_lock(&kernel_lock); #endif while (ci->ci_pendingticks) { clk_count.ec_count++; hardclock(tf); ci->ci_pendingticks--; } #ifdef MULTIPROCESSOR __mp_unlock(&kernel_lock); setsr(sr); #endif
  49. 49. splhigh() IPI CPU A CPU B splhigh() Fault Acquire lock Lock Wait until released TLB shootdown Blocked IPI Wait ACK
  50. 50. splhigh() IPI CPU A CPU B splhigh() Fault Acquire lock Lock Wait until released TLB shootdown IPI Wait ACK
  51. 51. splhigh() IPI CPU A CPU B splhigh() Fault Acquire lock Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK
  52. 52. splhigh() IPI CPU A CPU B splhigh() Fault Acquire lock Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK ACK for rendezvous
  53. 53. splhigh() IPI CPU A CPU B IPL_IPI splhigh() Fault IPL_HIGH Acquire lock Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK ACK for rendezvous
  54. 54.  #define        IPL_TTY         4       /* terminal */  #define        IPL_VM          5       /* memory allocation */  #define        IPL_CLOCK       6       /* clock */ #define        IPL_HIGH        7       /* everything */ #define        NIPLS           8       /* Number of levels */  #define        IPL_TTY         4       /* terminal */  #define        IPL_VM          5       /* memory allocation */  #define        IPL_CLOCK       6       /* clock */ #define        IPL_HIGH        7       /* everything */ #define        IPL_IPI         8       /* ipi */ #define        NIPLS           9       /* Number of levels */
  55. 55. • cpu_info • malloc() → • TLB • MIPS TLB TLB TLB cpu_info • wrapper
  56. 56. wrapper vaddr_t smp_malloc(size_t size) ... if (size < PAGE_SIZE) { va = (vaddr_t)malloc(size, M_DEVBUF, M_NOWAIT); if (va == NULL) return NULL; error = pmap_extract(pmap_kernel(), va, &pa); if (error == FALSE) return NULL; } else { TAILQ_INIT(&mlist); error = uvm_pglistalloc(size, 0, -1L, 0, 0, &mlist, 1, UVM_PLA_NOWAIT); if (error) return NULL; m = TAILQ_FIRST(&mlist); pa = VM_PAGE_TO_PHYS(m); } return PHYS_TO_XKPHYS(pa, CCA_CACHED);
  57. 57. TLB shootdown • invalidate/update TLB shootdown kernel mode user mode • shootdown shootdown ASID • pmap TLB shootdown
  58. 58. void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) ... arg.pmap = pmap; arg.va = va; smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *)&arg); void pmap_invalidate_page_action(void *arg) ... pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap; vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va; if (is_kernel_pmap(pmap)) { pmap_TLB_invalidate_kernel(va); return; } if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) return; else if (!(pmap->pm_active & PCPU_GET(cpumask))) { pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; return; } va = pmap_va_asid(pmap, (va & ~PGOFSET)); mips_TBIS(va);
  59. 59. CPU_INFO_FOREACH(cii, ci) if (cpuset_isset(&cpus_running, ci)) { unsigned int i = ci->ci_cpuid; unsigned int m = 1 << i; if (pmap->pm_asid[i].pma_asidgen != pmap_asid_info[i].pma_asidgen) continue; else if (ci->ci_curpmap != pmap) { pmap->pm_asid[i].pma_asidgen = 0; continue; } cpumask |= m; } if (cpumask == 1 << cpuid) { u_long asid; asid = pmap->pm_asid[cpuid].pma_asid << VMTLB_PID_SHIFT; tlb_flush_addr(va | asid); } else if (cpumask) { struct pmap_invalidate_page_arg arg; arg.pmap = pmap; arg.va = va; smp_rendezvous_cpus(cpumask, pmap_invalidate_user_page_action, &arg); }
  60. 60. • SGI Origin 350 SMP • Cavium OCTEON OpenBSD SoC • SMP • rthread •

×