SlideShare una empresa de Scribd logo
1 de 57
Descargar para leer sin conexión
TEACHING YOUR
MACHINE
TO FIND
FRAUDSTERS

Ian Barber
ianb@php.net
phpir.com
twitter.com/ianbarber
http://joind.in/3429




https://github.com/ianbarber/FindingFraudsters-Talk
5%
           3%
SOME      .1%
SMALL
NUMBERS    8%
99%
ACCURACY
REALLY     REALLY
             LEGITIMATE   FRAUD


EVALUATED
                989         0
LEGITIMATE


EVALUATED
                 10         1
  FRAUD
REALLY     REALLY
             LEGITIMATE   FRAUD



      90%
EVALUATED
LEGITIMATE
          WR ONG989         0



EVALUATED
                 10         1
  FRAUD
ANOMALY DETECTION
30




         22.5
Clicks




          15




          7.5




           0
                Date
SOFTWARE
ARCHITECTURE
                           Alarm

               Detector

                          No Alarm
                Buffer


User Clicks    Landing
    Ad          Page
DETECTOR
              statistics

 Expected
  Clicks
              Threshold    Data Buffer
Sensitivity



               Alarm
average.php
function detect($sen) {
  $window = array(); $i = 0;
  $alarmCount = 0; $dtd = 0;
  $avg = $stddev = 0;
  $fraud = fopen("fraudclicks.csv", 'r');
  while($d = fgetcsv($fraud)) {
    $i++;
    if(count($window) > 7) {
      array_shift($window);
      $avg = array_sum($window) / 7;
      foreach($window as $val) {
        $stddev += pow($val - $average, 2);
      }
      $stddev = sqrt($stddev/7);
0.2




0.15




 0.1




0.05




  0
       1   2   3   4   5   6   7   8   9   10 11 12 13 14 15 16 17 18 19 20
if($d[1] > ($avg + ($sen * $stddev))){
          $alarmCount++;
          if($i > 201) {
            break;
          }
        } else {
          if($i > 201) {
            $dtd++;
          }
        }
      }
      array_push($window, $d[1]);
    }
    return array($alarmCount-1, $dtd);
}
1.6 SENSITIVITY
          30
                18 False Alarms          1 Day To Detect

         22.5
Clicks




          15




          7.5




           0
                                  Date
2.7 SENSITIVITY
          30
                1 False Alarm      18 Days To Detect

         22.5
Clicks




          15




          7.5




           0
                                Date
SICKNESS
AVAILABILITY
function detect($sens) {          sickavail.php
  $i = 0; $alarms = 0; $dtd = 0;
  $window = array(); $avail = array();
  $fraud = fopen("fraudclicks.csv", 'r');
  while($dat = fgetcsv($fraud)) {
    $dow = date("w", strtotime($dat[0]));
    if( count($window) >= 7
        && isset($avail[$dow]) ) {

      $sick = 0;
      foreach($window as $day => $value) {
        $dowavg = array_sum($avail[$day]) /
                  count($avail[$day]);
        $sick += $value / $dowavg;
      }
      $sick /= count($window);
$avlblty = array_sum($avail[$dow]) /
           count($avail[$dow]);
  $est = $sick * $avlblty;

  $fac = fac($dat[1]);
  $p = exp(-$est) * pow($est,$dat[1])
       / $fac; // poisson calc

  if($p < $sens && $dat[1] > $est) {
    $alarms++;
    if($i > 201) { break; }
  } else {
    if($i > 201) { $dtd++; }
  }

} // end if
0.2




0.15




 0.1




0.05




  0
       1   2   3   4   5   6   7   8   9   10
0.011 SENSITIVITY
          30
                1 False Alarm          1 Day To Detect

         22.5
Clicks




          15




          7.5




           0
                                Date
SUPERVISED CLASSIFIERS
classification model
SOFTWARE
ARCHITECTURE
                               Fraud

            Classifier

                             Not Fraud
  User     Transaction
Purchase    Processor


           Transaction
                              Learner
            Database
EVALUATING THE CLASSIFIER

Training Data   Learner      Model




 Test Data
                            Prediction
                Classifier   Accuracy
   Model
20




15




10




5




0
     0   5   10   15   20
20




15




10




5
             ?
0
     0   5       10   15   20
20




15




10




5
             ?
0
     0   5       10   15   20
$docs = array(
 array('fraud' => false, 'price' => 1699,
       'desc'=>'toy ninja', 'ship' => 'US'),
 array('fraud' => false, 'price' => 20000,
       'desc' => 'TV','ship' => 'US'),
 array('fraud' => false, 'price' => 2500,
       'desc' => 'cds', 'ship' => 'US'),
 array('fraud' => true, 'price' => 20000,
       'desc' => 'console', 'ship' => 'CN'),
 array('fraud' => true, 'price' => 5000,
       'desc' => 'books', 'ship' => 'US'),
 array('fraud' => true, 'price' => 15000,
       'desc' => 'ipod', 'ship' => 'CN'),
);
$db   = new XapianWritableDatabase("index",
                Xapian::DB_CREATE_OR_OPEN);
$idx = new XapianTermGenerator();
$stem = new XapianStem("english");
$idx->set_stemmer($stem);

foreach($docs as $key => $doc) {
    $xdoc = new XapianDocument();
    $xdoc->set_data($doc['fraud'] ?
                    "fraud" : "clean");
    $idx->set_document($xdoc);
    $idx->index_text($doc['price'] . ' ' .
         $doc['desc'] . ' ' . $doc['ship']);
    $db->add_document($xdoc, $key);
}
$db = null;
                               frau dknn.php
$test = array(                     testknn.ph
                                              p
   'price' => 10000, 'desc' => 'TV',
   'ship' => 'CN'
);

$db   = new XapianWritableDatabase("index",
         Xapian::DB_CREATE_OR_OPEN);
$idx = new XapianTermGenerator();
$stem = new XapianStem("english");
$idx->set_stemmer($stem);

$xdoc = new XapianDocument();
$idx->set_document($xdoc);
$idx->index_text($test['price'] . ' ' .
      $test['desc'] . ' ' . $test['ship']);
$id = $db->add_document($xdoc);
$enq = new XapianEnquire($db);
$rset = new XapianRSet();
$rset->add_document($id);
$eset = $enq->get_eset(10, $rset);
$terms = array();
$i = $eset->begin();
while ( !$i->equals($eset->end()) ) {
  $terms[] = $i->get_term(); $i->next();
}

$q = new XapianQuery(
         XapianQuery::OP_OR, $terms);
$enq->set_query($q);
$matches = $enq->get_mset(0, 4, $rset);
$i = $matches->begin();
while (!$i->equals($matches->end())) {
  if($i->get_document()->get_docid() != $id)
  {
    $class = $i->get_document()->get_data();
    var_dump($class);
  }
  $i->next();
}
$db->delete_document($id);


$ php testknn.php
string(5) "clean"
string(5) "fraud"
string(5) "fraud"
TRANSACTION
PARAMETERS
function compareEmailToName($name, $email) {
  $name = strtolower($name);
  $email = strtolower($email);
  $parts = explode(" ", $name);
  $pcnt = 0;

  list($user, $dom) = explode("@", $email);
  $user = str_replace(
              array(".", "+"), " ", $user);
  $dom = preg_replace("/..*/", "", $dom);

  similar_text($name, $user, $pcnt);
  if($pcnt > 80) { return 1.0; }
  similar_text($name, $dom, $pcnt);
  if($pcnt > 80) { return 0.8; }
                                 email.php
if(count($parts)) {
       $highest = 0;
       foreach($parts as $part) {
         similar_text($user, $part, $pcnt);
         if($pcnt > 50 && $pcnt > $highest) {
           $highest = $percent;
         }
         similar_text($dom, $part, $pcnt);
         if($pcnt > 50 && $pcnt > $highest) {
            $highest = $percent;
         }
       }
       return (1.7 * ($highest/100)) - 1;
     }

     return -1;
}
$data = array(
  'purchase_value' => 20993,
  'geo_country' => 'DE',
  'previous_orders' => 1,
  'time' => 6,
  'timegap' => 146632,
  'product_category' => 'small_item',
  'delivery_matches_card' => 0,
  'geo_ip_matches_card' => 1,
  'difference_from_last_trans' => 8755,
  'free_shipping' => 0,
  'email_like_name' => 0,
  'free_email_provider' => 0,
  'disposable_email_provider' => 0,
  'quantity' => 2,
  'fraud' => 0);
SUPPORT
VECTOR MACHINES
20




15




10




5




0
     0   5   10   15   20
20




15




10




5




0
     0   5   10   15   20
20




15




10




5




0
     0   5   10   15   20
20




15




10




5




0
     0   5   10   15   20
20




15




10




5




0
     0   5   10   15   20
$ apt-get install libsvm-dev
$ apt-get install libsvm-tools

$ yum install libsvm-devel

$ pecl install svm-beta
$ echo extension=svm.so > /etc/php.d/svm.ini
$ php -r '$s = new svm(); $m = $s->train
(array(array(-1, -1), array(1, 1))); echo
$m->predict(array(0, -1));'
-1
$fh = fopen('paydata.csv', 'r');
$output = array();

while($data = fgetcsv($fh)) {
  $output[] = array(
     $data[14] == 1 ? -1 : 1,
     1 => ($data[0]/20000.00) - 1.0, // price
     2 => $data[1] == 'CN' ? 1.0:-1.0,
     3 => $data[1] == 'US' ? 1.0:-1.0,
     4 => $data[5] == 'digital' ? 1.0:-1.0,
     5 => $data[7] == 1 ? 1.0:-1.0, //geo
     6 => $data[6] == 1 ? 1.0:-1.0, // deliv
     12 => $data[9] == 1 ? 1.0:-1.0, // ship
     13 => ($data[13] / 1.5) - 1.0, // qty
  );
}                                learn.php
$svm = new svm();
$model = $svm->train($output,
               array(-1 => 0.65, 1 => 0.5));
$model->save('learn.model');

$fp = $tp = $fn = $tn = 0;
foreach($output as $test) {
  $res = $model->predict($test);
  if($test[0] > 0) {
    if($res > 0) { $tp++; }
    else { $fn++; }
  } else {
    if($res > 0) { $fp++; }
    else { $tn++; }
  }
}
// ...snip.. loading test data from
// paytest.csv

$model = new SVMModel('learn.model');

$fp = $tp = $fn = $tn = 0;
foreach($output as $test) {
  $res = $model->predict($test);
  if($test[0] > 0) {
    if($res > 0) { $tp++; }
    else { $fn++; }
  } else {
    if($res > 0) { $fp++; }
    else { $tn++; }
  }
}
                                   test.php
var_dump("True Positive " . $tp);
var_dump("True Negative " . $tn);
var_dump("False Positive " . $fp);
var_dump("False Negative " . $fn);
var_dump("Accuracy " .
        (($tp+$tn)/($tp+$tn+$fp+$fn)));
$ php learn.php
string(18) "True Positive 8316"
string(18) "True Negative 1682"
string(16) "False Positive 2"
string(16) "False Negative 0"
string(15) "Accuracy 0.9998"

$ php test.php
string(17) "True Positive 844"
string(17) "True Negative 155"
string(16) "False Positive 0"
string(16) "False Negative 1"
string(14) "Accuracy 0.999"
training data


  Test         Verify       Update



Automated     Manual        Manual
Time Series           Class Based



   Sensitivity             Model



 False    Days To    False        False
Alarms    Detect    Positives   Negatives
(shogun)
TEACHING YOUR
MACHINE
TO FIND
FRAUDSTERS

http://joind.in/3429

Ian Barber
ianb@php.net
Title Slide - CSI
http://www.flickr.com/photos/39matt/5241862082
Sickness Availability - Chicago Fire Department
http://www.flickr.com/photos/mike_miley/3929146730/
Model Buildings - Ah Ain’t Long For This Whorl
http://www.flickr.com/photos/chadmiller/98014022/
Repeat Customer - McDonald’s Loyalty Card
http://www.flickr.com/photos/fsse-info/3658873057/
Shipping - FedEx Truck
http://www.flickr.com/photos/moto_club4ag/4852235145/
Velocity - Chevrolet Chevelle Dragster
http://www.flickr.com/photos/jns001/2958999006/
GeoIP - Earth Asia Terminator View
http://www.flickr.com/photos/flyingsinger/86898564/
Multiple Items - Boxes
http://www.flickr.com/photos/skrewtape/851672959/

Más contenido relacionado

La actualidad más candente

Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...Mail.ru Group
 
Introdução ao Perl 6
Introdução ao Perl 6Introdução ao Perl 6
Introdução ao Perl 6garux
 
News of the Symfony2 World
News of the Symfony2 WorldNews of the Symfony2 World
News of the Symfony2 WorldFabien Potencier
 
Advanced modulinos
Advanced modulinosAdvanced modulinos
Advanced modulinosbrian d foy
 
Xlab #1: Advantages of functional programming in Java 8
Xlab #1: Advantages of functional programming in Java 8Xlab #1: Advantages of functional programming in Java 8
Xlab #1: Advantages of functional programming in Java 8XSolve
 
The Magic Of Tie
The Magic Of TieThe Magic Of Tie
The Magic Of Tiebrian d foy
 
Créer une base NoSQL en 1 heure
Créer une base NoSQL en 1 heureCréer une base NoSQL en 1 heure
Créer une base NoSQL en 1 heureAmaury Bouchard
 
Advanced modulinos trial
Advanced modulinos trialAdvanced modulinos trial
Advanced modulinos trialbrian d foy
 
20 modules i haven't yet talked about
20 modules i haven't yet talked about20 modules i haven't yet talked about
20 modules i haven't yet talked aboutTatsuhiko Miyagawa
 
Melhorando sua API com DSLs
Melhorando sua API com DSLsMelhorando sua API com DSLs
Melhorando sua API com DSLsAugusto Pascutti
 
Introduction to CloudForecast / YAPC::Asia 2010 Tokyo
Introduction to CloudForecast / YAPC::Asia 2010 TokyoIntroduction to CloudForecast / YAPC::Asia 2010 Tokyo
Introduction to CloudForecast / YAPC::Asia 2010 TokyoMasahiro Nagano
 
PHP Language Trivia
PHP Language TriviaPHP Language Trivia
PHP Language TriviaNikita Popov
 

La actualidad más candente (18)

zinno
zinnozinno
zinno
 
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
 
Introdução ao Perl 6
Introdução ao Perl 6Introdução ao Perl 6
Introdução ao Perl 6
 
News of the Symfony2 World
News of the Symfony2 WorldNews of the Symfony2 World
News of the Symfony2 World
 
C99
C99C99
C99
 
Php 101: PDO
Php 101: PDOPhp 101: PDO
Php 101: PDO
 
Advanced modulinos
Advanced modulinosAdvanced modulinos
Advanced modulinos
 
Xlab #1: Advantages of functional programming in Java 8
Xlab #1: Advantages of functional programming in Java 8Xlab #1: Advantages of functional programming in Java 8
Xlab #1: Advantages of functional programming in Java 8
 
The Magic Of Tie
The Magic Of TieThe Magic Of Tie
The Magic Of Tie
 
C99[2]
C99[2]C99[2]
C99[2]
 
Créer une base NoSQL en 1 heure
Créer une base NoSQL en 1 heureCréer une base NoSQL en 1 heure
Créer une base NoSQL en 1 heure
 
Advanced modulinos trial
Advanced modulinos trialAdvanced modulinos trial
Advanced modulinos trial
 
Cod
CodCod
Cod
 
20 modules i haven't yet talked about
20 modules i haven't yet talked about20 modules i haven't yet talked about
20 modules i haven't yet talked about
 
Melhorando sua API com DSLs
Melhorando sua API com DSLsMelhorando sua API com DSLs
Melhorando sua API com DSLs
 
Perl 6 by example
Perl 6 by examplePerl 6 by example
Perl 6 by example
 
Introduction to CloudForecast / YAPC::Asia 2010 Tokyo
Introduction to CloudForecast / YAPC::Asia 2010 TokyoIntroduction to CloudForecast / YAPC::Asia 2010 Tokyo
Introduction to CloudForecast / YAPC::Asia 2010 Tokyo
 
PHP Language Trivia
PHP Language TriviaPHP Language Trivia
PHP Language Trivia
 

Destacado

Deloittes 2009 Technology Fast 500™ Ranking
Deloittes 2009 Technology Fast 500™  RankingDeloittes 2009 Technology Fast 500™  Ranking
Deloittes 2009 Technology Fast 500™ Rankinglisaswiftney
 
Deloitte-2014-Technology-Fast500
Deloitte-2014-Technology-Fast500Deloitte-2014-Technology-Fast500
Deloitte-2014-Technology-Fast500Seth Greenberg
 
dollar general annual reports 2002
dollar general annual reports 2002dollar general annual reports 2002
dollar general annual reports 2002finance41
 
Deployment Tactics
Deployment TacticsDeployment Tactics
Deployment TacticsIan Barber
 
Arc Sight Info Documents 10 21 2009
Arc Sight Info Documents 10 21 2009Arc Sight Info Documents 10 21 2009
Arc Sight Info Documents 10 21 2009mattdriscoll
 
The Pixel Lab 2015 | Don't lose heart - Sean Coleman
The Pixel Lab 2015 | Don't lose heart - Sean Coleman The Pixel Lab 2015 | Don't lose heart - Sean Coleman
The Pixel Lab 2015 | Don't lose heart - Sean Coleman power to the pixel
 
Arc Sight Info Documents 12 3 2009
Arc Sight Info Documents 12 3 2009Arc Sight Info Documents 12 3 2009
Arc Sight Info Documents 12 3 2009mattdriscoll
 
Document Classification In PHP - Slight Return
Document Classification In PHP - Slight ReturnDocument Classification In PHP - Slight Return
Document Classification In PHP - Slight ReturnIan Barber
 
ZeroMQ Is The Answer: PHP Tek 11 Version
ZeroMQ Is The Answer: PHP Tek 11 VersionZeroMQ Is The Answer: PHP Tek 11 Version
ZeroMQ Is The Answer: PHP Tek 11 VersionIan Barber
 
Social media & dirigeants du Cac 40 : que disent les conversations ?
Social media & dirigeants du Cac 40 : que disent les conversations ?Social media & dirigeants du Cac 40 : que disent les conversations ?
Social media & dirigeants du Cac 40 : que disent les conversations ?Linkfluence
 
Israel pide un rey
Israel pide un reyIsrael pide un rey
Israel pide un reyCoke Neto
 
Technology-Fast-500-Winners-Brochure.PDF
Technology-Fast-500-Winners-Brochure.PDFTechnology-Fast-500-Winners-Brochure.PDF
Technology-Fast-500-Winners-Brochure.PDFJustin Campbell
 

Destacado (16)

Deloittes 2009 Technology Fast 500™ Ranking
Deloittes 2009 Technology Fast 500™  RankingDeloittes 2009 Technology Fast 500™  Ranking
Deloittes 2009 Technology Fast 500™ Ranking
 
Canada Deber 2pdf
Canada Deber 2pdfCanada Deber 2pdf
Canada Deber 2pdf
 
Deloitte-2014-Technology-Fast500
Deloitte-2014-Technology-Fast500Deloitte-2014-Technology-Fast500
Deloitte-2014-Technology-Fast500
 
dollar general annual reports 2002
dollar general annual reports 2002dollar general annual reports 2002
dollar general annual reports 2002
 
Deployment Tactics
Deployment TacticsDeployment Tactics
Deployment Tactics
 
20140528 valeant story draft deckv85
20140528 valeant story draft deckv8520140528 valeant story draft deckv85
20140528 valeant story draft deckv85
 
Arc Sight Info Documents 10 21 2009
Arc Sight Info Documents 10 21 2009Arc Sight Info Documents 10 21 2009
Arc Sight Info Documents 10 21 2009
 
The Pixel Lab 2015 | Don't lose heart - Sean Coleman
The Pixel Lab 2015 | Don't lose heart - Sean Coleman The Pixel Lab 2015 | Don't lose heart - Sean Coleman
The Pixel Lab 2015 | Don't lose heart - Sean Coleman
 
Arc Sight Info Documents 12 3 2009
Arc Sight Info Documents 12 3 2009Arc Sight Info Documents 12 3 2009
Arc Sight Info Documents 12 3 2009
 
Document Classification In PHP - Slight Return
Document Classification In PHP - Slight ReturnDocument Classification In PHP - Slight Return
Document Classification In PHP - Slight Return
 
ZeroMQ Is The Answer: PHP Tek 11 Version
ZeroMQ Is The Answer: PHP Tek 11 VersionZeroMQ Is The Answer: PHP Tek 11 Version
ZeroMQ Is The Answer: PHP Tek 11 Version
 
Social media & dirigeants du Cac 40 : que disent les conversations ?
Social media & dirigeants du Cac 40 : que disent les conversations ?Social media & dirigeants du Cac 40 : que disent les conversations ?
Social media & dirigeants du Cac 40 : que disent les conversations ?
 
Eca´s probabilidad y estadística Agosto 2012-Enero 2013
Eca´s probabilidad y estadística Agosto 2012-Enero 2013Eca´s probabilidad y estadística Agosto 2012-Enero 2013
Eca´s probabilidad y estadística Agosto 2012-Enero 2013
 
4 de febrero de 1992 pdf
4 de febrero de 1992 pdf4 de febrero de 1992 pdf
4 de febrero de 1992 pdf
 
Israel pide un rey
Israel pide un reyIsrael pide un rey
Israel pide un rey
 
Technology-Fast-500-Winners-Brochure.PDF
Technology-Fast-500-Winners-Brochure.PDFTechnology-Fast-500-Winners-Brochure.PDF
Technology-Fast-500-Winners-Brochure.PDF
 

Similar a Teaching Your Machine to Detect Fraud With Supervised Learning Techniques

Javascript & jQuery: A pragmatic introduction
Javascript & jQuery: A pragmatic introductionJavascript & jQuery: A pragmatic introduction
Javascript & jQuery: A pragmatic introductionIban Martinez
 
Crazy things done on PHP
Crazy things done on PHPCrazy things done on PHP
Crazy things done on PHPTaras Kalapun
 
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRaimonds Simanovskis
 
Your code sucks, let's fix it - DPC UnCon
Your code sucks, let's fix it - DPC UnConYour code sucks, let's fix it - DPC UnCon
Your code sucks, let's fix it - DPC UnConRafael Dohms
 
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012Amazon Web Services
 
Mocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitMocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitmfrost503
 
Unit testing with zend framework tek11
Unit testing with zend framework tek11Unit testing with zend framework tek11
Unit testing with zend framework tek11Michelangelo van Dam
 
Mocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitMocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitmfrost503
 
Unit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBeneluxUnit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBeneluxMichelangelo van Dam
 
Document Classification In PHP
Document Classification In PHPDocument Classification In PHP
Document Classification In PHPIan Barber
 
Gta v savegame
Gta v savegameGta v savegame
Gta v savegamehozayfa999
 
WordPress Realtime - WordCamp São Paulo 2015
WordPress Realtime - WordCamp São Paulo 2015WordPress Realtime - WordCamp São Paulo 2015
WordPress Realtime - WordCamp São Paulo 2015Fernando Daciuk
 
Game Development with SDL and Perl
Game Development with SDL and PerlGame Development with SDL and Perl
Game Development with SDL and Perlgarux
 
R57shell
R57shellR57shell
R57shellady36
 

Similar a Teaching Your Machine to Detect Fraud With Supervised Learning Techniques (20)

Javascript & jQuery: A pragmatic introduction
Javascript & jQuery: A pragmatic introductionJavascript & jQuery: A pragmatic introduction
Javascript & jQuery: A pragmatic introduction
 
Crazy things done on PHP
Crazy things done on PHPCrazy things done on PHP
Crazy things done on PHP
 
Coding website
Coding websiteCoding website
Coding website
 
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
 
Your code sucks, let's fix it - DPC UnCon
Your code sucks, let's fix it - DPC UnConYour code sucks, let's fix it - DPC UnCon
Your code sucks, let's fix it - DPC UnCon
 
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
 
Mocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitMocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnit
 
My Development Story
My Development StoryMy Development Story
My Development Story
 
Unit testing with zend framework tek11
Unit testing with zend framework tek11Unit testing with zend framework tek11
Unit testing with zend framework tek11
 
Mocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitMocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnit
 
Mocking Demystified
Mocking DemystifiedMocking Demystified
Mocking Demystified
 
Ns2programs
Ns2programsNs2programs
Ns2programs
 
Unit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBeneluxUnit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBenelux
 
Document Classification In PHP
Document Classification In PHPDocument Classification In PHP
Document Classification In PHP
 
Gta v savegame
Gta v savegameGta v savegame
Gta v savegame
 
WordPress Realtime - WordCamp São Paulo 2015
WordPress Realtime - WordCamp São Paulo 2015WordPress Realtime - WordCamp São Paulo 2015
WordPress Realtime - WordCamp São Paulo 2015
 
Database api
Database apiDatabase api
Database api
 
ddd+scala
ddd+scaladdd+scala
ddd+scala
 
Game Development with SDL and Perl
Game Development with SDL and PerlGame Development with SDL and Perl
Game Development with SDL and Perl
 
R57shell
R57shellR57shell
R57shell
 

Último

TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024Lonnie McRorey
 
Digital Identity is Under Attack: FIDO Paris Seminar.pptx
Digital Identity is Under Attack: FIDO Paris Seminar.pptxDigital Identity is Under Attack: FIDO Paris Seminar.pptx
Digital Identity is Under Attack: FIDO Paris Seminar.pptxLoriGlavin3
 
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxThe Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxLoriGlavin3
 
TrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data PrivacyTrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data PrivacyTrustArc
 
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024BookNet Canada
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfAlex Barbosa Coqueiro
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Manik S Magar
 
Scale your database traffic with Read & Write split using MySQL Router
Scale your database traffic with Read & Write split using MySQL RouterScale your database traffic with Read & Write split using MySQL Router
Scale your database traffic with Read & Write split using MySQL RouterMydbops
 
The State of Passkeys with FIDO Alliance.pptx
The State of Passkeys with FIDO Alliance.pptxThe State of Passkeys with FIDO Alliance.pptx
The State of Passkeys with FIDO Alliance.pptxLoriGlavin3
 
Sample pptx for embedding into website for demo
Sample pptx for embedding into website for demoSample pptx for embedding into website for demo
Sample pptx for embedding into website for demoHarshalMandlekar2
 
Ryan Mahoney - Will Artificial Intelligence Replace Real Estate Agents
Ryan Mahoney - Will Artificial Intelligence Replace Real Estate AgentsRyan Mahoney - Will Artificial Intelligence Replace Real Estate Agents
Ryan Mahoney - Will Artificial Intelligence Replace Real Estate AgentsRyan Mahoney
 
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Mark Simos
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfAddepto
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024BookNet Canada
 
From Family Reminiscence to Scholarly Archive .
From Family Reminiscence to Scholarly Archive .From Family Reminiscence to Scholarly Archive .
From Family Reminiscence to Scholarly Archive .Alan Dix
 
Visualising and forecasting stocks using Dash
Visualising and forecasting stocks using DashVisualising and forecasting stocks using Dash
Visualising and forecasting stocks using Dashnarutouzumaki53779
 
What is Artificial Intelligence?????????
What is Artificial Intelligence?????????What is Artificial Intelligence?????????
What is Artificial Intelligence?????????blackmambaettijean
 
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024Stephanie Beckett
 
Take control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteTake control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteDianaGray10
 
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024BookNet Canada
 

Último (20)

TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024
 
Digital Identity is Under Attack: FIDO Paris Seminar.pptx
Digital Identity is Under Attack: FIDO Paris Seminar.pptxDigital Identity is Under Attack: FIDO Paris Seminar.pptx
Digital Identity is Under Attack: FIDO Paris Seminar.pptx
 
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxThe Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
 
TrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data PrivacyTrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data Privacy
 
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdf
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!
 
Scale your database traffic with Read & Write split using MySQL Router
Scale your database traffic with Read & Write split using MySQL RouterScale your database traffic with Read & Write split using MySQL Router
Scale your database traffic with Read & Write split using MySQL Router
 
The State of Passkeys with FIDO Alliance.pptx
The State of Passkeys with FIDO Alliance.pptxThe State of Passkeys with FIDO Alliance.pptx
The State of Passkeys with FIDO Alliance.pptx
 
Sample pptx for embedding into website for demo
Sample pptx for embedding into website for demoSample pptx for embedding into website for demo
Sample pptx for embedding into website for demo
 
Ryan Mahoney - Will Artificial Intelligence Replace Real Estate Agents
Ryan Mahoney - Will Artificial Intelligence Replace Real Estate AgentsRyan Mahoney - Will Artificial Intelligence Replace Real Estate Agents
Ryan Mahoney - Will Artificial Intelligence Replace Real Estate Agents
 
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
Tampa BSides - Chef's Tour of Microsoft Security Adoption Framework (SAF)
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdf
 
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
 
From Family Reminiscence to Scholarly Archive .
From Family Reminiscence to Scholarly Archive .From Family Reminiscence to Scholarly Archive .
From Family Reminiscence to Scholarly Archive .
 
Visualising and forecasting stocks using Dash
Visualising and forecasting stocks using DashVisualising and forecasting stocks using Dash
Visualising and forecasting stocks using Dash
 
What is Artificial Intelligence?????????
What is Artificial Intelligence?????????What is Artificial Intelligence?????????
What is Artificial Intelligence?????????
 
What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024What's New in Teams Calling, Meetings and Devices March 2024
What's New in Teams Calling, Meetings and Devices March 2024
 
Take control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test SuiteTake control of your SAP testing with UiPath Test Suite
Take control of your SAP testing with UiPath Test Suite
 
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC CataList - Tech Forum 2024
 

Teaching Your Machine to Detect Fraud With Supervised Learning Techniques

  • 1. TEACHING YOUR MACHINE TO FIND FRAUDSTERS Ian Barber ianb@php.net phpir.com twitter.com/ianbarber
  • 3. 5% 3% SOME .1% SMALL NUMBERS 8%
  • 5. REALLY REALLY LEGITIMATE FRAUD EVALUATED 989 0 LEGITIMATE EVALUATED 10 1 FRAUD
  • 6. REALLY REALLY LEGITIMATE FRAUD 90% EVALUATED LEGITIMATE WR ONG989 0 EVALUATED 10 1 FRAUD
  • 8. 30 22.5 Clicks 15 7.5 0 Date
  • 9. SOFTWARE ARCHITECTURE Alarm Detector No Alarm Buffer User Clicks Landing Ad Page
  • 10. DETECTOR statistics Expected Clicks Threshold Data Buffer Sensitivity Alarm
  • 11. average.php function detect($sen) { $window = array(); $i = 0; $alarmCount = 0; $dtd = 0; $avg = $stddev = 0; $fraud = fopen("fraudclicks.csv", 'r'); while($d = fgetcsv($fraud)) { $i++; if(count($window) > 7) { array_shift($window); $avg = array_sum($window) / 7; foreach($window as $val) { $stddev += pow($val - $average, 2); } $stddev = sqrt($stddev/7);
  • 12. 0.2 0.15 0.1 0.05 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
  • 13. if($d[1] > ($avg + ($sen * $stddev))){ $alarmCount++; if($i > 201) { break; } } else { if($i > 201) { $dtd++; } } } array_push($window, $d[1]); } return array($alarmCount-1, $dtd); }
  • 14. 1.6 SENSITIVITY 30 18 False Alarms 1 Day To Detect 22.5 Clicks 15 7.5 0 Date
  • 15. 2.7 SENSITIVITY 30 1 False Alarm 18 Days To Detect 22.5 Clicks 15 7.5 0 Date
  • 17. function detect($sens) { sickavail.php $i = 0; $alarms = 0; $dtd = 0; $window = array(); $avail = array(); $fraud = fopen("fraudclicks.csv", 'r'); while($dat = fgetcsv($fraud)) { $dow = date("w", strtotime($dat[0])); if( count($window) >= 7 && isset($avail[$dow]) ) { $sick = 0; foreach($window as $day => $value) { $dowavg = array_sum($avail[$day]) / count($avail[$day]); $sick += $value / $dowavg; } $sick /= count($window);
  • 18. $avlblty = array_sum($avail[$dow]) / count($avail[$dow]); $est = $sick * $avlblty; $fac = fac($dat[1]); $p = exp(-$est) * pow($est,$dat[1]) / $fac; // poisson calc if($p < $sens && $dat[1] > $est) { $alarms++; if($i > 201) { break; } } else { if($i > 201) { $dtd++; } } } // end if
  • 19. 0.2 0.15 0.1 0.05 0 1 2 3 4 5 6 7 8 9 10
  • 20. 0.011 SENSITIVITY 30 1 False Alarm 1 Day To Detect 22.5 Clicks 15 7.5 0 Date
  • 22. classification model SOFTWARE ARCHITECTURE Fraud Classifier Not Fraud User Transaction Purchase Processor Transaction Learner Database
  • 23. EVALUATING THE CLASSIFIER Training Data Learner Model Test Data Prediction Classifier Accuracy Model
  • 24. 20 15 10 5 0 0 5 10 15 20
  • 25. 20 15 10 5 ? 0 0 5 10 15 20
  • 26. 20 15 10 5 ? 0 0 5 10 15 20
  • 27. $docs = array( array('fraud' => false, 'price' => 1699, 'desc'=>'toy ninja', 'ship' => 'US'), array('fraud' => false, 'price' => 20000, 'desc' => 'TV','ship' => 'US'), array('fraud' => false, 'price' => 2500, 'desc' => 'cds', 'ship' => 'US'), array('fraud' => true, 'price' => 20000, 'desc' => 'console', 'ship' => 'CN'), array('fraud' => true, 'price' => 5000, 'desc' => 'books', 'ship' => 'US'), array('fraud' => true, 'price' => 15000, 'desc' => 'ipod', 'ship' => 'CN'), );
  • 28. $db = new XapianWritableDatabase("index", Xapian::DB_CREATE_OR_OPEN); $idx = new XapianTermGenerator(); $stem = new XapianStem("english"); $idx->set_stemmer($stem); foreach($docs as $key => $doc) { $xdoc = new XapianDocument(); $xdoc->set_data($doc['fraud'] ? "fraud" : "clean"); $idx->set_document($xdoc); $idx->index_text($doc['price'] . ' ' . $doc['desc'] . ' ' . $doc['ship']); $db->add_document($xdoc, $key); } $db = null; frau dknn.php
  • 29. $test = array( testknn.ph p 'price' => 10000, 'desc' => 'TV', 'ship' => 'CN' ); $db = new XapianWritableDatabase("index", Xapian::DB_CREATE_OR_OPEN); $idx = new XapianTermGenerator(); $stem = new XapianStem("english"); $idx->set_stemmer($stem); $xdoc = new XapianDocument(); $idx->set_document($xdoc); $idx->index_text($test['price'] . ' ' . $test['desc'] . ' ' . $test['ship']); $id = $db->add_document($xdoc);
  • 30. $enq = new XapianEnquire($db); $rset = new XapianRSet(); $rset->add_document($id); $eset = $enq->get_eset(10, $rset); $terms = array(); $i = $eset->begin(); while ( !$i->equals($eset->end()) ) { $terms[] = $i->get_term(); $i->next(); } $q = new XapianQuery( XapianQuery::OP_OR, $terms); $enq->set_query($q); $matches = $enq->get_mset(0, 4, $rset);
  • 31. $i = $matches->begin(); while (!$i->equals($matches->end())) { if($i->get_document()->get_docid() != $id) { $class = $i->get_document()->get_data(); var_dump($class); } $i->next(); } $db->delete_document($id); $ php testknn.php string(5) "clean" string(5) "fraud" string(5) "fraud"
  • 33.
  • 34.
  • 35. function compareEmailToName($name, $email) { $name = strtolower($name); $email = strtolower($email); $parts = explode(" ", $name); $pcnt = 0; list($user, $dom) = explode("@", $email); $user = str_replace( array(".", "+"), " ", $user); $dom = preg_replace("/..*/", "", $dom); similar_text($name, $user, $pcnt); if($pcnt > 80) { return 1.0; } similar_text($name, $dom, $pcnt); if($pcnt > 80) { return 0.8; } email.php
  • 36. if(count($parts)) { $highest = 0; foreach($parts as $part) { similar_text($user, $part, $pcnt); if($pcnt > 50 && $pcnt > $highest) { $highest = $percent; } similar_text($dom, $part, $pcnt); if($pcnt > 50 && $pcnt > $highest) { $highest = $percent; } } return (1.7 * ($highest/100)) - 1; } return -1; }
  • 37.
  • 38.
  • 39.
  • 40. $data = array( 'purchase_value' => 20993, 'geo_country' => 'DE', 'previous_orders' => 1, 'time' => 6, 'timegap' => 146632, 'product_category' => 'small_item', 'delivery_matches_card' => 0, 'geo_ip_matches_card' => 1, 'difference_from_last_trans' => 8755, 'free_shipping' => 0, 'email_like_name' => 0, 'free_email_provider' => 0, 'disposable_email_provider' => 0, 'quantity' => 2, 'fraud' => 0);
  • 42. 20 15 10 5 0 0 5 10 15 20
  • 43. 20 15 10 5 0 0 5 10 15 20
  • 44. 20 15 10 5 0 0 5 10 15 20
  • 45. 20 15 10 5 0 0 5 10 15 20
  • 46. 20 15 10 5 0 0 5 10 15 20
  • 47. $ apt-get install libsvm-dev $ apt-get install libsvm-tools $ yum install libsvm-devel $ pecl install svm-beta $ echo extension=svm.so > /etc/php.d/svm.ini $ php -r '$s = new svm(); $m = $s->train (array(array(-1, -1), array(1, 1))); echo $m->predict(array(0, -1));' -1
  • 48. $fh = fopen('paydata.csv', 'r'); $output = array(); while($data = fgetcsv($fh)) { $output[] = array( $data[14] == 1 ? -1 : 1, 1 => ($data[0]/20000.00) - 1.0, // price 2 => $data[1] == 'CN' ? 1.0:-1.0, 3 => $data[1] == 'US' ? 1.0:-1.0, 4 => $data[5] == 'digital' ? 1.0:-1.0, 5 => $data[7] == 1 ? 1.0:-1.0, //geo 6 => $data[6] == 1 ? 1.0:-1.0, // deliv 12 => $data[9] == 1 ? 1.0:-1.0, // ship 13 => ($data[13] / 1.5) - 1.0, // qty ); } learn.php
  • 49. $svm = new svm(); $model = $svm->train($output, array(-1 => 0.65, 1 => 0.5)); $model->save('learn.model'); $fp = $tp = $fn = $tn = 0; foreach($output as $test) { $res = $model->predict($test); if($test[0] > 0) { if($res > 0) { $tp++; } else { $fn++; } } else { if($res > 0) { $fp++; } else { $tn++; } } }
  • 50. // ...snip.. loading test data from // paytest.csv $model = new SVMModel('learn.model'); $fp = $tp = $fn = $tn = 0; foreach($output as $test) { $res = $model->predict($test); if($test[0] > 0) { if($res > 0) { $tp++; } else { $fn++; } } else { if($res > 0) { $fp++; } else { $tn++; } } } test.php
  • 51. var_dump("True Positive " . $tp); var_dump("True Negative " . $tn); var_dump("False Positive " . $fp); var_dump("False Negative " . $fn); var_dump("Accuracy " . (($tp+$tn)/($tp+$tn+$fp+$fn)));
  • 52. $ php learn.php string(18) "True Positive 8316" string(18) "True Negative 1682" string(16) "False Positive 2" string(16) "False Negative 0" string(15) "Accuracy 0.9998" $ php test.php string(17) "True Positive 844" string(17) "True Negative 155" string(16) "False Positive 0" string(16) "False Negative 1" string(14) "Accuracy 0.999"
  • 53. training data Test Verify Update Automated Manual Manual
  • 54. Time Series Class Based Sensitivity Model False Days To False False Alarms Detect Positives Negatives
  • 57. Title Slide - CSI http://www.flickr.com/photos/39matt/5241862082 Sickness Availability - Chicago Fire Department http://www.flickr.com/photos/mike_miley/3929146730/ Model Buildings - Ah Ain’t Long For This Whorl http://www.flickr.com/photos/chadmiller/98014022/ Repeat Customer - McDonald’s Loyalty Card http://www.flickr.com/photos/fsse-info/3658873057/ Shipping - FedEx Truck http://www.flickr.com/photos/moto_club4ag/4852235145/ Velocity - Chevrolet Chevelle Dragster http://www.flickr.com/photos/jns001/2958999006/ GeoIP - Earth Asia Terminator View http://www.flickr.com/photos/flyingsinger/86898564/ Multiple Items - Boxes http://www.flickr.com/photos/skrewtape/851672959/