SlideShare una empresa de Scribd logo
1 de 31
2010/06/24
                       
kaneko.satoko(at)ocha.ac.jp 
                   
 
    Bioconductor(Biostrings)        
            (p distance)        
                                            
Bioconductor Biostrings                                   
Biostrings 
> source("h>p://www.bioconductor.org/biocLite.R") 
> biocLite(“Biostrings”)    #         1               




> library(Biostrings)     #   R                               
Bioconductor/Biostrings                              1 
> ls(“package:Biostrings”)        #Biostrings                       

> x <‐ "CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT" 
> DNAString(x)     #DNA           
  54‐le>er "DNAString" instance 
seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 

> s <‐ DNAString(x)       #   s DNA        (x)    
> length(s)   
[1] 54          #      s    DNA                           54 

> length(x)     
[1] 1      #       x                                      1 
Bioconductor/Biostrings                      2 
 54‐le>er "DNAString" instance 
seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 

> alphabetFrequency(s, baseOnly=TRUE)    #            
      A  C  G  T other 
[1,] 12 12 15 15     0 

> reverseComplement(s)       #       
  54‐le>er "DNAString" instance 
seq: AGCATCGATCAGCTAGCATCGATCAGCTAGCTAGCTAGCTAGCTACTACGTACG 

> dna2rna(s)             #RNA    (T ‐>U) 
  54‐le>er "RNAString" instance 
seq: CGUACGUAGUAGCUAGCUAGCUAGCUAGCUGAUCGAUGCUAGCUGAUCGAUGCU 
Bioconductor/Biostrings                      3 
 54‐le>er "DNAString" instance 
seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 

> m1 <‐ matchPa>ern(“GCTA”, s)  #                       
> m1 
  Views on a 54‐le>er DNAString subject 
subject: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 
views: 
    start end width 
[1]    12  15     4 [GCTA] 
[2]    16  19     4 [GCTA] 
[3]    20  23     4 [GCTA] 
[4]    24  27     4 [GCTA] 
[5]    38  41     4 [GCTA] 
1
   Makorin1 
   22.61kb 
   (CDS:1446bp) 

  Makorin1‐p1 
  1592bp 
                   region A         region B            region C 

Makorin1‐p1   Makorin1                         processed pseudogene         
Makorin1‐p1 regionB                        Makorin1 regionB                            
Makorin1  mRNA                                              regionB          mRNA  
                                                                        regionC   
                                 

regionB                                                                                            
                                                                     
                                                    regionC                                    
regionB                                    

          regionB regionC                                                   
                                                     
1         

    Makorin1 
    22.61kb 
    (CDS:1446bp) 

    Makorin1‐p1 
    1592bp 
                    region A     region B            region C 

       
1) Makorin1‐p1                      Makorin1                                      

2)                       Makorin1 mRNA                                                
    Makorin1‐p1 Makorin1                                        Makorin1‐p1                       
    Makorin1                                                 

3) Makorin1 mRNA                                                                          
     Makorin1‐p1                     (regionB                                                 
                                                )                         

    regionB regionC                                                           
                                                         
 –             1‐


                                      

 (Null hypothesis)            




                      
                          
                                  
                                          
 –         2‐
                           2                                    

                               
     
         
                   

                  
                       

                      
               


         False negagve                         False posigve        
2
                                         Makorin1‐p1                         
 Makorin1‐p1                        ortholog rat                                   
 Makorin1‐p1                                                                                    
 Mus musculus domes3cus                             5              
                                                                 Subgenus
                      M. booduga
                     M. fragilicauda    India+Lao/Thai
                                           booduga
                    M. terricolor
             1.5    M. macedonicus
       4.3   mya    M. spicilegus
       mya
                     M. spretus
                     M. musculus castaneus
                                                    Palearctic
                                                                 Mus
                                                    musculus
                     M. m. domesticus
                     M. m. molossinus
                     M. caroli
                    M. cookii                   Southeast Asia
                    M. cervicolor                cervicolor
                     M. pahari Coelomys
                       M. mattheyi Nannomys
                   M. platythrix Pyromys
                    Apodemus agrarius
                             Micromys minutus
                      Rattus norvegicus                          (from Suzuki et al. 2004 Mol. Phylogenet. Evol.
0.01                                                                                    33:626-646, Figure 1, 4.)
3
                                                                

       
                                 
                     Makorin1‐p1.fasta 
                                                     
 Mus musculus domes3cus
 dom
                 [Macintosh HD/        /tg03/bin]
 Mus musculus molossinus
 mol
                      
 Mus musculus castaneus
       cas
                                              Makorin1‐p1.fasta                          
 Mus musculus musculus
        mus
           regionB 1‐617, regionC 618‐1256        
 Mus spretus
                  spr
 Mus caroli
                   car

p distance        
         2                          (number of differences)/                                  
             alignment                       
dom    CCTGCCCCAA ATGTCAGATC ACATCTCACT TTGTCATTCC AAGTAATCAC TGGGTGGAGT
spr1   .......... ...C...... .......... .......... ......GT.. ..........
car1   .......... ...C.GA... ......A... ..T....... ....G.GT.. .........G

dom‐spr1: 3/60 = 0.05 
dom‐car1: 9/60 = 0.15 
spr1‐car1: 6/60 =  0.10          
region B region C                                                   
regionB regionC                                    number of differences      p distance                       
                  region B                          bp
             region C                           bp
pair 
         number of differences
               p distance
   number of differences
               p distance
 dom – mol
 dom – cas
 dom – mus
 dom – spr
 dom – car
  mol ‐ cas
 mol – mus
 mol – spr
 mol – car
 cas – mus
  cas – spr
  cas – car
 mus – spr 
 mus – car
  spr – car
region B region C                                                  
        Makorin1‐p1                      (region B, regionC)            
                                    p distance             

      
1) domesgcus          Biostrings DNAstring            
2) B              
3) B                     
4) C              
5) C                     
6) domesgcus                      DNAstring          B         C            
7)                                        
8)                                              4                               
Biostrings                        p distance                                          1
library(Biostrings)  #R                                            

#Makorin1‐p1.fasta domesgcus              ””                               
# DNA              dom               
> dom <‐ DNAString("") 

#dom          1           617                  domB            
> domB <‐ substring(dom,1,617) 

#domB                           lengthB         (p distance                                )        
> lengthB <‐ length(domB) 

#dom          618          1256                  domC                  
> domC <‐ substring(dom, 618,1256) 

#domC                           lengthC         (p distance                                )        
> lengthC <‐ length(domC) 

#                                                                                  
#        lengthB                                                               
> lengthB 
[1] 617 
Biostrings                        p distance                       2
#Makorin1‐p1.fasta molossinus         ””                     
# DNA              mol             
> mol <‐ DNAString("") 

#mol         1        617                  molB      
> molB <‐ substring(mol,1,617) 

#mol         618          1256               molC        
> molC <‐ substring(mol, 618,1256) 


#                         castaneus(cas), musculus(mus), spretus(spr), caroli(car) 
#                                                  number of differences p distance  
#                            
Biostrings                                 p distance           3
> x <‐ domB 
> y <‐ molB 

#      x       y(    domB molB)                          
>  comp<‐ c(compareStrings(x,y)) 

#               ?                          ?    
> subt <‐ gsub("(['?'])", "", comp) 

#subt DNA                ide            
> ide <‐ DNAString(subt) 

#ide           len        
> len <‐ length(ide) 

#x y                         dif            
> dif <‐ (lengthB – len ) 
> dif       #x y                                             

#regionB  p distance                
> pdis <‐ dif/lengthB 
> pdis  #p distance                                4             
Biostrings                           p distance                       4
CotEditor                                   pdistanceB.R         
[Macintosh HD/          /tg03/bin]                    

comp <‐ c(compareStrings(x,y)) 
                                            2       lengthB lengthC         
subt <‐ gsub("(['?'])", "", comp) 
                                             pdistanceC.R      bin              
ide <‐ DNAString(subt) 
len <‐ length(ide) 
dif <‐ (lengthB ‐ len) 
pdis <‐ dif/lengthB 

                                               x y           
> x <‐ 
> y <‐ 
> source("/Users/tg03/bin/pdistanceB.R") 
                           bin                        
> source("pdistanceB.R") 
                2                 
> dif 
> pdis 
region B region C                                              (           )
                 region B                    617  bp
     region C                 639  bp
pair 
        number of differences
      p distance
    number of differences
   p distance
dom – mol
             6
                  0.010
                  7               0.011 
dom – cas
             6
                  0.010
                  7
              0.011 
dom – mus
             8
                  0.013
                  8
              0.013 
dom – spr
            16
                  0.026
               14
                0.022 
dom – car
            30
                  0.049
               39
                0.061 
mol – cas
             0
                     0
                   0
                0 
mol – mus
             4
                  0.006
                  1
              0.002 
mol – spr
            14
                  0.023
               17
                0.027  
mol – car
            28
                  0.045
               38
                0.059
cas – mus
             4
                  0.006
                  1
              0.002 
cas – spr
            14
                  0.023
               17
                0.027
cas – car
            28
                  0.045
               38
                0.059
mus – spr 
           14
                  0.023
               18
                0.028
mus – car
            28
                  0.045
               39
                0.061
                                                                                              
spr – car
            32
                  0.052
               37
                0.058
 1
1) regionB p distance x                   x         
> x <‐ c(x         ) 

2) regionC   p distance y                 y             
> y <‐ c(y           ) 

3)                                x   y                         

4) plot()                
> plot(x,y,xlim=c(            ,       ), ylim=c(           ,       )) 
 1            
1) regionB p distance x                          x        
> x <‐ c(0.010, 0.010, 0.013, 0.026, 0.049, 0, 0.006 ,0.023, 0.045, 0.006 , 0.023 , 0.045, 
0.023, 0.045, 0.052 ) 

2) regionC p distance y                          y          
> y <‐ c(0.011, 0.011, 0.013, 0.022, 0.061, 0, 0.002, 0.027, 0.059, 0.002, 0.027, 0.059, 
0.028, 0.061, 0.058 ) 

3) max()                                            x    y                                   
> max(x) 
[1] 0.052  

> max(y) 
[1] 0.061 

4) plot()                  
> plot(x,y,xlim=c(0,0.065), ylim=c(0,0.065)) 
2
4’)                                              
> plot(x,y,xlab='regionB',ylab='regionC', xlim=c(0,0.065), ylim=c(0,0.065)) 




regionB regionC    p distance                                 
regionB regionC                                                                 
                         
                                               
1
                                                           
                                                    (d)            
                                             

                                                 
                                                               


                      (x3,y3)

                      d3
                                 d4
(x1,y1)
                    (x4,y4)
    d1
        d2

           (x2,y2)
2
                                                                                  

                                          
                           



                                                                          
                                                                              



> xdev <‐ (x‐mean(x))    # x                    
> ydev <‐ (y‐mean(y))    # y                    
> bmul<‐ xdev*ydev       # x y                      
> bnum <‐ sum(bmul)      # x y                                  (   ) 
> bsqu <‐ xdev^2          # x                2  
> bden <‐ sum(bsqu)      #  x                2         (   ) 
> b <‐ bnum/bden         #      (   ) 
> b 
[1] 1.317939 
3
                                                  
                                              
                                 




> a1 <‐ sum(y)/length(y) 
> a2 <‐ b*(sum(x)/length(x)) 
> a <‐ a1‐a2 
[1] ‐0.003636326 


> abline (a,b)   
#a b                      y = a + bx      
regionB regionC                             y=x                     
                           regionB regionC              
          y=‐0.0036+1.3x                            
(y=‐0.0036+1.3x   y=x                                  ) 

      regionB regionC                           
                                                             
 
p distance                                             1
‐pdis_line.R‐   

library("Biostrings"); 
 x <‐""
dom <‐ "[domesgcus              ]"; 
mol <‐ "[molossinus            ]"; 
cas <‐ "[castaneus          ]"; 
mus <‐ "[musculus            ]"; 
spr <‐ "[spretus         ]"; 
car <‐ "[caroli        ]";
 
seqs     <‐ c(dom,mol,cas,mus,spr,car);
seqnames <‐ c("dom","mol","cas","mus","spr","car");
nseqs <‐ length(seqs);
npoints <‐ length(x); 
x = vector(length=npoints); 
y = vector(length=npoints); 
k = 0;  
                    
 
p distance                                        2
for (i1 in 1:(nseqs‐1)){
  for (i2 in (i1+1):nseqs ){
    k = k + 1; 
#    cat(sprint("%d %dn",i1,i2));
    seq1 = DNAString(seqs[i1]);
    seq2 = DNAString(seqs[i2]);
    seq_b1 = substring( seq1, 1,   617 );
    seq_c1 = substring( seq1, 618, 1256 );
    seq_b2 = substring( seq2, 1,   617 );
    seq_c2 = substring( seq2, 618, 1256 );
    len_b  = length( seq_b1 );
    cmp_b  = c(compareStrings(seq_b1,seq_b2));
    sub_b  = gsub("(['?'])","",cmp_b);
    subt_b = DNAString(sub_b);
    dif_b  = length(subt_b);
    n_b    = len_b ‐ dif_b;
    pdis_b = n_b / len_b; 

                  
 
p distance                                                 3
‐pdis_line.R‐ 

 x[k]   = pdis_b;
     len_c  = length( seq_c1 );
     cmp_c  = c(compareStrings(seq_c1,seq_c2));
     sub_c  = gsub("(['?'])","",cmp_c);
     subt_c = DNAString(sub_c);
     dif_c  = length(subt_c);
     n_c    = len_c ‐ dif_c;
     pdis_c = n_c / len_c;
     y[k]   = pdis_c;
     cat(sprint('%s %s %d %g %gn',seqnames[i1],seqnames[i2],k,pdis_b,pdis_c)); 
   }
 }
 xdev <‐ x‐mean(x);
 ydev <‐ y‐mean(y);
 b    <‐ sum(xdev*ydev)/sum(xdev*xdev);
 a    <‐ mean(y) ‐ b*mean(x);
  
 cat(sprint('a=%g, b=%gn',a,b));
 
p distance                                           4
pdis_line.R         
                                                           




         R                    
                                              
                
(                      bin            path                    ) 

                        (p distance              )                  
 
                     
 

         

Más contenido relacionado

Destacado

100513_homology_search(ensembl)
100513_homology_search(ensembl)100513_homology_search(ensembl)
100513_homology_search(ensembl)ocha_kaneko
 
100610_blastclustalw
100610_blastclustalw100610_blastclustalw
100610_blastclustalwocha_kaneko
 
100701_statistics3
100701_statistics3100701_statistics3
100701_statistics3ocha_kaneko
 
100617_statistics1
100617_statistics1100617_statistics1
100617_statistics1ocha_kaneko
 

Destacado (6)

100513_homology_search(ensembl)
100513_homology_search(ensembl)100513_homology_search(ensembl)
100513_homology_search(ensembl)
 
100520_dotplot
100520_dotplot100520_dotplot
100520_dotplot
 
090601-dotplot
090601-dotplot090601-dotplot
090601-dotplot
 
100610_blastclustalw
100610_blastclustalw100610_blastclustalw
100610_blastclustalw
 
100701_statistics3
100701_statistics3100701_statistics3
100701_statistics3
 
100617_statistics1
100617_statistics1100617_statistics1
100617_statistics1
 

Más de ocha_kaneko

100506-unix-ensembl
100506-unix-ensembl100506-unix-ensembl
100506-unix-ensemblocha_kaneko
 
100422-intro,setup
100422-intro,setup100422-intro,setup
100422-intro,setupocha_kaneko
 
090622_blast-clustalw
090622_blast-clustalw090622_blast-clustalw
090622_blast-clustalwocha_kaneko
 
090615-TogoWS SOAP
090615-TogoWS SOAP090615-TogoWS SOAP
090615-TogoWS SOAPocha_kaneko
 
090608-TogoWS REST
090608-TogoWS REST090608-TogoWS REST
090608-TogoWS RESTocha_kaneko
 
090518_unix-ensembl
090518_unix-ensembl090518_unix-ensembl
090518_unix-ensemblocha_kaneko
 
090511-intro, setup
090511-intro, setup090511-intro, setup
090511-intro, setupocha_kaneko
 

Más de ocha_kaneko (8)

100506-unix-ensembl
100506-unix-ensembl100506-unix-ensembl
100506-unix-ensembl
 
100422-intro,setup
100422-intro,setup100422-intro,setup
100422-intro,setup
 
Statistics_R
Statistics_RStatistics_R
Statistics_R
 
090622_blast-clustalw
090622_blast-clustalw090622_blast-clustalw
090622_blast-clustalw
 
090615-TogoWS SOAP
090615-TogoWS SOAP090615-TogoWS SOAP
090615-TogoWS SOAP
 
090608-TogoWS REST
090608-TogoWS REST090608-TogoWS REST
090608-TogoWS REST
 
090518_unix-ensembl
090518_unix-ensembl090518_unix-ensembl
090518_unix-ensembl
 
090511-intro, setup
090511-intro, setup090511-intro, setup
090511-intro, setup
 

Último

BỘ LUYỆN NGHE TIẾNG ANH 8 GLOBAL SUCCESS CẢ NĂM (GỒM 12 UNITS, MỖI UNIT GỒM 3...
BỘ LUYỆN NGHE TIẾNG ANH 8 GLOBAL SUCCESS CẢ NĂM (GỒM 12 UNITS, MỖI UNIT GỒM 3...BỘ LUYỆN NGHE TIẾNG ANH 8 GLOBAL SUCCESS CẢ NĂM (GỒM 12 UNITS, MỖI UNIT GỒM 3...
BỘ LUYỆN NGHE TIẾNG ANH 8 GLOBAL SUCCESS CẢ NĂM (GỒM 12 UNITS, MỖI UNIT GỒM 3...Nguyen Thanh Tu Collection
 
會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽
會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽
會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽中 央社
 
How to the fix Attribute Error in odoo 17
How to the fix Attribute Error in odoo 17How to the fix Attribute Error in odoo 17
How to the fix Attribute Error in odoo 17Celine George
 
Danh sách HSG Bộ môn cấp trường - Cấp THPT.pdf
Danh sách HSG Bộ môn cấp trường - Cấp THPT.pdfDanh sách HSG Bộ môn cấp trường - Cấp THPT.pdf
Danh sách HSG Bộ môn cấp trường - Cấp THPT.pdfQucHHunhnh
 
Neurulation and the formation of the neural tube
Neurulation and the formation of the neural tubeNeurulation and the formation of the neural tube
Neurulation and the formation of the neural tubeSaadHumayun7
 
The Ultimate Guide to Social Media Marketing in 2024.pdf
The Ultimate Guide to Social Media Marketing in 2024.pdfThe Ultimate Guide to Social Media Marketing in 2024.pdf
The Ultimate Guide to Social Media Marketing in 2024.pdfdm4ashexcelr
 
The Last Leaf, a short story by O. Henry
The Last Leaf, a short story by O. HenryThe Last Leaf, a short story by O. Henry
The Last Leaf, a short story by O. HenryEugene Lysak
 
MichaelStarkes_UncutGemsProjectSummary.pdf
MichaelStarkes_UncutGemsProjectSummary.pdfMichaelStarkes_UncutGemsProjectSummary.pdf
MichaelStarkes_UncutGemsProjectSummary.pdfmstarkes24
 
ppt your views.ppt your views of your college in your eyes
ppt your views.ppt your views of your college in your eyesppt your views.ppt your views of your college in your eyes
ppt your views.ppt your views of your college in your eyesashishpaul799
 
Pragya Champions Chalice 2024 Prelims & Finals Q/A set, General Quiz
Pragya Champions Chalice 2024 Prelims & Finals Q/A set, General QuizPragya Champions Chalice 2024 Prelims & Finals Q/A set, General Quiz
Pragya Champions Chalice 2024 Prelims & Finals Q/A set, General QuizPragya - UEM Kolkata Quiz Club
 
philosophy and it's principles based on the life
philosophy and it's principles based on the lifephilosophy and it's principles based on the life
philosophy and it's principles based on the lifeNitinDeodare
 
Features of Video Calls in the Discuss Module in Odoo 17
Features of Video Calls in the Discuss Module in Odoo 17Features of Video Calls in the Discuss Module in Odoo 17
Features of Video Calls in the Discuss Module in Odoo 17Celine George
 
Incoming and Outgoing Shipments in 2 STEPS Using Odoo 17
Incoming and Outgoing Shipments in 2 STEPS Using Odoo 17Incoming and Outgoing Shipments in 2 STEPS Using Odoo 17
Incoming and Outgoing Shipments in 2 STEPS Using Odoo 17Celine George
 
Matatag-Curriculum and the 21st Century Skills Presentation.pptx
Matatag-Curriculum and the 21st Century Skills Presentation.pptxMatatag-Curriculum and the 21st Century Skills Presentation.pptx
Matatag-Curriculum and the 21st Century Skills Presentation.pptxJenilouCasareno
 
Exploring Gemini AI and Integration with MuleSoft | MuleSoft Mysore Meetup #45
Exploring Gemini AI and Integration with MuleSoft | MuleSoft Mysore Meetup #45Exploring Gemini AI and Integration with MuleSoft | MuleSoft Mysore Meetup #45
Exploring Gemini AI and Integration with MuleSoft | MuleSoft Mysore Meetup #45MysoreMuleSoftMeetup
 
Dementia (Alzheimer & vasular dementia).
Dementia (Alzheimer & vasular dementia).Dementia (Alzheimer & vasular dementia).
Dementia (Alzheimer & vasular dementia).Mohamed Rizk Khodair
 
Morse OER Some Benefits and Challenges.pptx
Morse OER Some Benefits and Challenges.pptxMorse OER Some Benefits and Challenges.pptx
Morse OER Some Benefits and Challenges.pptxjmorse8
 
Post Exam Fun(da) Intra UEM General Quiz 2024 - Prelims q&a.pdf
Post Exam Fun(da) Intra UEM General Quiz 2024 - Prelims q&a.pdfPost Exam Fun(da) Intra UEM General Quiz 2024 - Prelims q&a.pdf
Post Exam Fun(da) Intra UEM General Quiz 2024 - Prelims q&a.pdfPragya - UEM Kolkata Quiz Club
 

Último (20)

BỘ LUYỆN NGHE TIẾNG ANH 8 GLOBAL SUCCESS CẢ NĂM (GỒM 12 UNITS, MỖI UNIT GỒM 3...
BỘ LUYỆN NGHE TIẾNG ANH 8 GLOBAL SUCCESS CẢ NĂM (GỒM 12 UNITS, MỖI UNIT GỒM 3...BỘ LUYỆN NGHE TIẾNG ANH 8 GLOBAL SUCCESS CẢ NĂM (GỒM 12 UNITS, MỖI UNIT GỒM 3...
BỘ LUYỆN NGHE TIẾNG ANH 8 GLOBAL SUCCESS CẢ NĂM (GỒM 12 UNITS, MỖI UNIT GỒM 3...
 
會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽
會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽
會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽會考英聽
 
How to the fix Attribute Error in odoo 17
How to the fix Attribute Error in odoo 17How to the fix Attribute Error in odoo 17
How to the fix Attribute Error in odoo 17
 
Danh sách HSG Bộ môn cấp trường - Cấp THPT.pdf
Danh sách HSG Bộ môn cấp trường - Cấp THPT.pdfDanh sách HSG Bộ môn cấp trường - Cấp THPT.pdf
Danh sách HSG Bộ môn cấp trường - Cấp THPT.pdf
 
Neurulation and the formation of the neural tube
Neurulation and the formation of the neural tubeNeurulation and the formation of the neural tube
Neurulation and the formation of the neural tube
 
The Ultimate Guide to Social Media Marketing in 2024.pdf
The Ultimate Guide to Social Media Marketing in 2024.pdfThe Ultimate Guide to Social Media Marketing in 2024.pdf
The Ultimate Guide to Social Media Marketing in 2024.pdf
 
The Last Leaf, a short story by O. Henry
The Last Leaf, a short story by O. HenryThe Last Leaf, a short story by O. Henry
The Last Leaf, a short story by O. Henry
 
MichaelStarkes_UncutGemsProjectSummary.pdf
MichaelStarkes_UncutGemsProjectSummary.pdfMichaelStarkes_UncutGemsProjectSummary.pdf
MichaelStarkes_UncutGemsProjectSummary.pdf
 
ppt your views.ppt your views of your college in your eyes
ppt your views.ppt your views of your college in your eyesppt your views.ppt your views of your college in your eyes
ppt your views.ppt your views of your college in your eyes
 
“O BEIJO” EM ARTE .
“O BEIJO” EM ARTE                       .“O BEIJO” EM ARTE                       .
“O BEIJO” EM ARTE .
 
Post Exam Fun(da) Intra UEM General Quiz - Finals.pdf
Post Exam Fun(da) Intra UEM General Quiz - Finals.pdfPost Exam Fun(da) Intra UEM General Quiz - Finals.pdf
Post Exam Fun(da) Intra UEM General Quiz - Finals.pdf
 
Pragya Champions Chalice 2024 Prelims & Finals Q/A set, General Quiz
Pragya Champions Chalice 2024 Prelims & Finals Q/A set, General QuizPragya Champions Chalice 2024 Prelims & Finals Q/A set, General Quiz
Pragya Champions Chalice 2024 Prelims & Finals Q/A set, General Quiz
 
philosophy and it's principles based on the life
philosophy and it's principles based on the lifephilosophy and it's principles based on the life
philosophy and it's principles based on the life
 
Features of Video Calls in the Discuss Module in Odoo 17
Features of Video Calls in the Discuss Module in Odoo 17Features of Video Calls in the Discuss Module in Odoo 17
Features of Video Calls in the Discuss Module in Odoo 17
 
Incoming and Outgoing Shipments in 2 STEPS Using Odoo 17
Incoming and Outgoing Shipments in 2 STEPS Using Odoo 17Incoming and Outgoing Shipments in 2 STEPS Using Odoo 17
Incoming and Outgoing Shipments in 2 STEPS Using Odoo 17
 
Matatag-Curriculum and the 21st Century Skills Presentation.pptx
Matatag-Curriculum and the 21st Century Skills Presentation.pptxMatatag-Curriculum and the 21st Century Skills Presentation.pptx
Matatag-Curriculum and the 21st Century Skills Presentation.pptx
 
Exploring Gemini AI and Integration with MuleSoft | MuleSoft Mysore Meetup #45
Exploring Gemini AI and Integration with MuleSoft | MuleSoft Mysore Meetup #45Exploring Gemini AI and Integration with MuleSoft | MuleSoft Mysore Meetup #45
Exploring Gemini AI and Integration with MuleSoft | MuleSoft Mysore Meetup #45
 
Dementia (Alzheimer & vasular dementia).
Dementia (Alzheimer & vasular dementia).Dementia (Alzheimer & vasular dementia).
Dementia (Alzheimer & vasular dementia).
 
Morse OER Some Benefits and Challenges.pptx
Morse OER Some Benefits and Challenges.pptxMorse OER Some Benefits and Challenges.pptx
Morse OER Some Benefits and Challenges.pptx
 
Post Exam Fun(da) Intra UEM General Quiz 2024 - Prelims q&a.pdf
Post Exam Fun(da) Intra UEM General Quiz 2024 - Prelims q&a.pdfPost Exam Fun(da) Intra UEM General Quiz 2024 - Prelims q&a.pdf
Post Exam Fun(da) Intra UEM General Quiz 2024 - Prelims q&a.pdf
 

100624_statistics2

  • 1. 2010/06/24   kaneko.satoko(at)ocha.ac.jp   
  • 2.     Bioconductor(Biostrings)     (p distance)      
  • 3. Bioconductor Biostrings   Biostrings  > source("h>p://www.bioconductor.org/biocLite.R")  > biocLite(“Biostrings”)    # 1   > library(Biostrings)   # R  
  • 4. Bioconductor/Biostrings  1  > ls(“package:Biostrings”)    #Biostrings   > x <‐ "CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT"  > DNAString(x)   #DNA     54‐le>er "DNAString" instance  seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  > s <‐ DNAString(x)    # s DNA (x)   > length(s)    [1] 54     # s  DNA 54  > length(x)    [1] 1    # x 1 
  • 5. Bioconductor/Biostrings  2   54‐le>er "DNAString" instance  seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  > alphabetFrequency(s, baseOnly=TRUE)  #         A  C  G  T other  [1,] 12 12 15 15     0  > reverseComplement(s)   #     54‐le>er "DNAString" instance  seq: AGCATCGATCAGCTAGCATCGATCAGCTAGCTAGCTAGCTAGCTACTACGTACG  > dna2rna(s)      #RNA (T ‐>U)    54‐le>er "RNAString" instance  seq: CGUACGUAGUAGCUAGCUAGCUAGCUAGCUGAUCGAUGCUAGCUGAUCGAUGCU 
  • 6. Bioconductor/Biostrings  3   54‐le>er "DNAString" instance  seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  > m1 <‐ matchPa>ern(“GCTA”, s)  #   > m1    Views on a 54‐le>er DNAString subject  subject: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  views:      start end width  [1]    12  15     4 [GCTA]  [2]    16  19     4 [GCTA]  [3]    20  23     4 [GCTA]  [4]    24  27     4 [GCTA]  [5]    38  41     4 [GCTA] 
  • 7. 1 Makorin1  22.61kb  (CDS:1446bp)  Makorin1‐p1  1592bp  region A   region B   region C  Makorin1‐p1 Makorin1 processed pseudogene   Makorin1‐p1 regionB Makorin1 regionB   Makorin1  mRNA regionB mRNA   regionC     regionB     regionC   regionB   regionB regionC    
  • 8. 1 Makorin1  22.61kb  (CDS:1446bp)  Makorin1‐p1  1592bp  region A   region B   region C    1) Makorin1‐p1 Makorin1   2)  Makorin1 mRNA       Makorin1‐p1 Makorin1 Makorin1‐p1       Makorin1   3) Makorin1 mRNA        Makorin1‐p1 (regionB   )    regionB regionC      
  • 9.  – 1‐   (Null hypothesis)          
  • 10.  – 2‐ 2   False negagve  False posigve  
  • 11. 2 Makorin1‐p1   Makorin1‐p1 ortholog rat   Makorin1‐p1   Mus musculus domes3cus 5   Subgenus M. booduga M. fragilicauda India+Lao/Thai booduga M. terricolor 1.5 M. macedonicus 4.3 mya M. spicilegus mya M. spretus M. musculus castaneus Palearctic Mus musculus M. m. domesticus M. m. molossinus M. caroli M. cookii Southeast Asia M. cervicolor cervicolor M. pahari Coelomys M. mattheyi Nannomys M. platythrix Pyromys Apodemus agrarius Micromys minutus Rattus norvegicus (from Suzuki et al. 2004 Mol. Phylogenet. Evol. 0.01 33:626-646, Figure 1, 4.)
  • 12. 3   Makorin1‐p1.fasta    Mus musculus domes3cus dom [Macintosh HD/ /tg03/bin] Mus musculus molossinus mol   Mus musculus castaneus cas Makorin1‐p1.fasta   Mus musculus musculus mus regionB 1‐617, regionC 618‐1256   Mus spretus spr Mus caroli car p distance   2 (number of differences)/   alignment   dom CCTGCCCCAA ATGTCAGATC ACATCTCACT TTGTCATTCC AAGTAATCAC TGGGTGGAGT spr1 .......... ...C...... .......... .......... ......GT.. .......... car1 .......... ...C.GA... ......A... ..T....... ....G.GT.. .........G dom‐spr1: 3/60 = 0.05  dom‐car1: 9/60 = 0.15  spr1‐car1: 6/60 =  0.10   
  • 13. region B region C regionB regionC number of differences p distance   region B                          bp region C                           bp pair  number of differences p distance number of differences p distance dom – mol dom – cas dom – mus dom – spr dom – car mol ‐ cas mol – mus mol – spr mol – car cas – mus cas – spr cas – car mus – spr  mus – car spr – car
  • 14. region B region C Makorin1‐p1 (region B, regionC)   p distance     1) domesgcus Biostrings DNAstring   2) B   3) B   4) C   5) C   6) domesgcus DNAstring B C   7)    8)  4  
  • 15. Biostrings p distance 1 library(Biostrings)  #R   #Makorin1‐p1.fasta domesgcus ””   # DNA dom   > dom <‐ DNAString("")  #dom 1 617 domB   > domB <‐ substring(dom,1,617)  #domB lengthB (p distance )   > lengthB <‐ length(domB)  #dom 618 1256 domC   > domC <‐ substring(dom, 618,1256)  #domC lengthC (p distance )   > lengthC <‐ length(domC)  #   # lengthB   > lengthB  [1] 617 
  • 16. Biostrings p distance 2 #Makorin1‐p1.fasta molossinus ””   # DNA mol   > mol <‐ DNAString("")  #mol 1 617 molB   > molB <‐ substring(mol,1,617)  #mol 618 1256 molC   > molC <‐ substring(mol, 618,1256)  # castaneus(cas), musculus(mus), spretus(spr), caroli(car)  # number of differences p distance   #  
  • 17. Biostrings p distance 3 > x <‐ domB  > y <‐ molB  # x y( domB molB)   >  comp<‐ c(compareStrings(x,y))  # ? ?   > subt <‐ gsub("(['?'])", "", comp)  #subt DNA ide   > ide <‐ DNAString(subt)  #ide len   > len <‐ length(ide)  #x y dif   > dif <‐ (lengthB – len )  > dif   #x y   #regionB  p distance   > pdis <‐ dif/lengthB  > pdis  #p distance 4  
  • 18. Biostrings p distance 4 CotEditor pdistanceB.R   [Macintosh HD/ /tg03/bin]   comp <‐ c(compareStrings(x,y))  2 lengthB lengthC   subt <‐ gsub("(['?'])", "", comp)  pdistanceC.R bin   ide <‐ DNAString(subt)  len <‐ length(ide)  dif <‐ (lengthB ‐ len)  pdis <‐ dif/lengthB  x y   > x <‐  > y <‐  > source("/Users/tg03/bin/pdistanceB.R")  bin   > source("pdistanceB.R")  2   > dif  > pdis 
  • 19. region B region C ( ) region B                    617  bp region C                 639  bp pair  number of differences p distance number of differences p distance dom – mol 6 0.010 7   0.011  dom – cas 6 0.010 7 0.011  dom – mus 8 0.013 8 0.013  dom – spr 16 0.026 14 0.022  dom – car 30 0.049 39 0.061  mol – cas 0 0 0 0  mol – mus 4 0.006 1 0.002  mol – spr 14 0.023 17 0.027   mol – car 28 0.045 38 0.059 cas – mus 4 0.006 1 0.002  cas – spr 14 0.023 17 0.027 cas – car 28 0.045 38 0.059 mus – spr  14 0.023 18 0.028 mus – car 28 0.045 39 0.061 spr – car 32 0.052 37 0.058
  • 20.  1 1) regionB p distance x x   > x <‐ c(x )  2) regionC p distance y y   > y <‐ c(y )  3) x y   4) plot()   > plot(x,y,xlim=c( , ), ylim=c( , )) 
  • 21.  1  1) regionB p distance x x   > x <‐ c(0.010, 0.010, 0.013, 0.026, 0.049, 0, 0.006 ,0.023, 0.045, 0.006 , 0.023 , 0.045,  0.023, 0.045, 0.052 )  2) regionC p distance y y   > y <‐ c(0.011, 0.011, 0.013, 0.022, 0.061, 0, 0.002, 0.027, 0.059, 0.002, 0.027, 0.059,  0.028, 0.061, 0.058 )  3) max() x y   > max(x)  [1] 0.052   > max(y)  [1] 0.061  4) plot()   > plot(x,y,xlim=c(0,0.065), ylim=c(0,0.065)) 
  • 22. 2 4’)    > plot(x,y,xlab='regionB',ylab='regionC', xlim=c(0,0.065), ylim=c(0,0.065))  regionB regionC p distance   regionB regionC      
  • 23. 1   (d)         (x3,y3) d3 d4 (x1,y1) (x4,y4) d1 d2 (x2,y2)
  • 24. 2           > xdev <‐ (x‐mean(x))  # x   > ydev <‐ (y‐mean(y))  # y   > bmul<‐ xdev*ydev  # x y   > bnum <‐ sum(bmul)  # x y ( )  > bsqu <‐ xdev^2     # x 2   > bden <‐ sum(bsqu)  #  x 2 ( )  > b <‐ bnum/bden    #      ( )  > b  [1] 1.317939 
  • 25. 3       > a1 <‐ sum(y)/length(y)  > a2 <‐ b*(sum(x)/length(x))  > a <‐ a1‐a2  [1] ‐0.003636326  > abline (a,b)    #a b y = a + bx   
  • 26. regionB regionC y=x   regionB regionC   y=‐0.0036+1.3x   (y=‐0.0036+1.3x y=x )  regionB regionC    
  • 27.   p distance 1 ‐pdis_line.R‐    library("Biostrings");   x <‐"" dom <‐ "[domesgcus ]";  mol <‐ "[molossinus ]";  cas <‐ "[castaneus ]";  mus <‐ "[musculus ]";  spr <‐ "[spretus ]";  car <‐ "[caroli ]";   seqs     <‐ c(dom,mol,cas,mus,spr,car); seqnames <‐ c("dom","mol","cas","mus","spr","car"); nseqs <‐ length(seqs); npoints <‐ length(x);  x = vector(length=npoints);  y = vector(length=npoints);  k = 0;    
  • 28.   p distance 2 for (i1 in 1:(nseqs‐1)){   for (i2 in (i1+1):nseqs ){     k = k + 1;  #    cat(sprint("%d %dn",i1,i2));     seq1 = DNAString(seqs[i1]);     seq2 = DNAString(seqs[i2]);     seq_b1 = substring( seq1, 1,   617 );     seq_c1 = substring( seq1, 618, 1256 );     seq_b2 = substring( seq2, 1,   617 );     seq_c2 = substring( seq2, 618, 1256 );     len_b  = length( seq_b1 );     cmp_b  = c(compareStrings(seq_b1,seq_b2));     sub_b  = gsub("(['?'])","",cmp_b);     subt_b = DNAString(sub_b);     dif_b  = length(subt_b);     n_b    = len_b ‐ dif_b;     pdis_b = n_b / len_b;   
  • 29.   p distance 3 ‐pdis_line.R‐  x[k]   = pdis_b;     len_c  = length( seq_c1 );     cmp_c  = c(compareStrings(seq_c1,seq_c2));     sub_c  = gsub("(['?'])","",cmp_c);     subt_c = DNAString(sub_c);     dif_c  = length(subt_c);     n_c    = len_c ‐ dif_c;     pdis_c = n_c / len_c;     y[k]   = pdis_c;     cat(sprint('%s %s %d %g %gn',seqnames[i1],seqnames[i2],k,pdis_b,pdis_c));    } } xdev <‐ x‐mean(x); ydev <‐ y‐mean(y); b    <‐ sum(xdev*ydev)/sum(xdev*xdev); a    <‐ mean(y) ‐ b*mean(x);   cat(sprint('a=%g, b=%gn',a,b));
  • 30.   p distance 4 pdis_line.R      R       ( bin path )  (p distance )  
  • 31.