SlideShare una empresa de Scribd logo
1 de 31
2010/06/24
                       
kaneko.satoko(at)ocha.ac.jp 
                   
 
    Bioconductor(Biostrings)        
            (p distance)        
                                            
Bioconductor Biostrings                                   
Biostrings 
> source("h>p://www.bioconductor.org/biocLite.R") 
> biocLite(“Biostrings”)    #         1               




> library(Biostrings)     #   R                               
Bioconductor/Biostrings                              1 
> ls(“package:Biostrings”)        #Biostrings                       

> x <‐ "CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT" 
> DNAString(x)     #DNA           
  54‐le>er "DNAString" instance 
seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 

> s <‐ DNAString(x)       #   s DNA        (x)    
> length(s)   
[1] 54          #      s    DNA                           54 

> length(x)     
[1] 1      #       x                                      1 
Bioconductor/Biostrings                      2 
 54‐le>er "DNAString" instance 
seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 

> alphabetFrequency(s, baseOnly=TRUE)    #            
      A  C  G  T other 
[1,] 12 12 15 15     0 

> reverseComplement(s)       #       
  54‐le>er "DNAString" instance 
seq: AGCATCGATCAGCTAGCATCGATCAGCTAGCTAGCTAGCTAGCTACTACGTACG 

> dna2rna(s)             #RNA    (T ‐>U) 
  54‐le>er "RNAString" instance 
seq: CGUACGUAGUAGCUAGCUAGCUAGCUAGCUGAUCGAUGCUAGCUGAUCGAUGCU 
Bioconductor/Biostrings                      3 
 54‐le>er "DNAString" instance 
seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 

> m1 <‐ matchPa>ern(“GCTA”, s)  #                       
> m1 
  Views on a 54‐le>er DNAString subject 
subject: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT 
views: 
    start end width 
[1]    12  15     4 [GCTA] 
[2]    16  19     4 [GCTA] 
[3]    20  23     4 [GCTA] 
[4]    24  27     4 [GCTA] 
[5]    38  41     4 [GCTA] 
1
   Makorin1 
   22.61kb 
   (CDS:1446bp) 

  Makorin1‐p1 
  1592bp 
                   region A         region B            region C 

Makorin1‐p1   Makorin1                         processed pseudogene         
Makorin1‐p1 regionB                        Makorin1 regionB                            
Makorin1  mRNA                                              regionB          mRNA  
                                                                        regionC   
                                 

regionB                                                                                            
                                                                     
                                                    regionC                                    
regionB                                    

          regionB regionC                                                   
                                                     
1         

    Makorin1 
    22.61kb 
    (CDS:1446bp) 

    Makorin1‐p1 
    1592bp 
                    region A     region B            region C 

       
1) Makorin1‐p1                      Makorin1                                      

2)                       Makorin1 mRNA                                                
    Makorin1‐p1 Makorin1                                        Makorin1‐p1                       
    Makorin1                                                 

3) Makorin1 mRNA                                                                          
     Makorin1‐p1                     (regionB                                                 
                                                )                         

    regionB regionC                                                           
                                                         
 –             1‐


                                      

 (Null hypothesis)            




                      
                          
                                  
                                          
 –         2‐
                           2                                    

                               
     
         
                   

                  
                       

                      
               


         False negagve                         False posigve        
2
                                         Makorin1‐p1                         
 Makorin1‐p1                        ortholog rat                                   
 Makorin1‐p1                                                                                    
 Mus musculus domes3cus                             5              
                                                                 Subgenus
                      M. booduga
                     M. fragilicauda    India+Lao/Thai
                                           booduga
                    M. terricolor
             1.5    M. macedonicus
       4.3   mya    M. spicilegus
       mya
                     M. spretus
                     M. musculus castaneus
                                                    Palearctic
                                                                 Mus
                                                    musculus
                     M. m. domesticus
                     M. m. molossinus
                     M. caroli
                    M. cookii                   Southeast Asia
                    M. cervicolor                cervicolor
                     M. pahari Coelomys
                       M. mattheyi Nannomys
                   M. platythrix Pyromys
                    Apodemus agrarius
                             Micromys minutus
                      Rattus norvegicus                          (from Suzuki et al. 2004 Mol. Phylogenet. Evol.
0.01                                                                                    33:626-646, Figure 1, 4.)
3
                                                                

       
                                 
                     Makorin1‐p1.fasta 
                                                     
 Mus musculus domes3cus
 dom
                 [Macintosh HD/        /tg03/bin]
 Mus musculus molossinus
 mol
                      
 Mus musculus castaneus
       cas
                                              Makorin1‐p1.fasta                          
 Mus musculus musculus
        mus
           regionB 1‐617, regionC 618‐1256        
 Mus spretus
                  spr
 Mus caroli
                   car

p distance        
         2                          (number of differences)/                                  
             alignment                       
dom    CCTGCCCCAA ATGTCAGATC ACATCTCACT TTGTCATTCC AAGTAATCAC TGGGTGGAGT
spr1   .......... ...C...... .......... .......... ......GT.. ..........
car1   .......... ...C.GA... ......A... ..T....... ....G.GT.. .........G

dom‐spr1: 3/60 = 0.05 
dom‐car1: 9/60 = 0.15 
spr1‐car1: 6/60 =  0.10          
region B region C                                                   
regionB regionC                                    number of differences      p distance                       
                  region B                          bp
             region C                           bp
pair 
         number of differences
               p distance
   number of differences
               p distance
 dom – mol
 dom – cas
 dom – mus
 dom – spr
 dom – car
  mol ‐ cas
 mol – mus
 mol – spr
 mol – car
 cas – mus
  cas – spr
  cas – car
 mus – spr 
 mus – car
  spr – car
region B region C                                                  
        Makorin1‐p1                      (region B, regionC)            
                                    p distance             

      
1) domesgcus          Biostrings DNAstring            
2) B              
3) B                     
4) C              
5) C                     
6) domesgcus                      DNAstring          B         C            
7)                                        
8)                                              4                               
Biostrings                        p distance                                          1
library(Biostrings)  #R                                            

#Makorin1‐p1.fasta domesgcus              ””                               
# DNA              dom               
> dom <‐ DNAString("") 

#dom          1           617                  domB            
> domB <‐ substring(dom,1,617) 

#domB                           lengthB         (p distance                                )        
> lengthB <‐ length(domB) 

#dom          618          1256                  domC                  
> domC <‐ substring(dom, 618,1256) 

#domC                           lengthC         (p distance                                )        
> lengthC <‐ length(domC) 

#                                                                                  
#        lengthB                                                               
> lengthB 
[1] 617 
Biostrings                        p distance                       2
#Makorin1‐p1.fasta molossinus         ””                     
# DNA              mol             
> mol <‐ DNAString("") 

#mol         1        617                  molB      
> molB <‐ substring(mol,1,617) 

#mol         618          1256               molC        
> molC <‐ substring(mol, 618,1256) 


#                         castaneus(cas), musculus(mus), spretus(spr), caroli(car) 
#                                                  number of differences p distance  
#                            
Biostrings                                 p distance           3
> x <‐ domB 
> y <‐ molB 

#      x       y(    domB molB)                          
>  comp<‐ c(compareStrings(x,y)) 

#               ?                          ?    
> subt <‐ gsub("(['?'])", "", comp) 

#subt DNA                ide            
> ide <‐ DNAString(subt) 

#ide           len        
> len <‐ length(ide) 

#x y                         dif            
> dif <‐ (lengthB – len ) 
> dif       #x y                                             

#regionB  p distance                
> pdis <‐ dif/lengthB 
> pdis  #p distance                                4             
Biostrings                           p distance                       4
CotEditor                                   pdistanceB.R         
[Macintosh HD/          /tg03/bin]                    

comp <‐ c(compareStrings(x,y)) 
                                            2       lengthB lengthC         
subt <‐ gsub("(['?'])", "", comp) 
                                             pdistanceC.R      bin              
ide <‐ DNAString(subt) 
len <‐ length(ide) 
dif <‐ (lengthB ‐ len) 
pdis <‐ dif/lengthB 

                                               x y           
> x <‐ 
> y <‐ 
> source("/Users/tg03/bin/pdistanceB.R") 
                           bin                        
> source("pdistanceB.R") 
                2                 
> dif 
> pdis 
region B region C                                              (           )
                 region B                    617  bp
     region C                 639  bp
pair 
        number of differences
      p distance
    number of differences
   p distance
dom – mol
             6
                  0.010
                  7               0.011 
dom – cas
             6
                  0.010
                  7
              0.011 
dom – mus
             8
                  0.013
                  8
              0.013 
dom – spr
            16
                  0.026
               14
                0.022 
dom – car
            30
                  0.049
               39
                0.061 
mol – cas
             0
                     0
                   0
                0 
mol – mus
             4
                  0.006
                  1
              0.002 
mol – spr
            14
                  0.023
               17
                0.027  
mol – car
            28
                  0.045
               38
                0.059
cas – mus
             4
                  0.006
                  1
              0.002 
cas – spr
            14
                  0.023
               17
                0.027
cas – car
            28
                  0.045
               38
                0.059
mus – spr 
           14
                  0.023
               18
                0.028
mus – car
            28
                  0.045
               39
                0.061
                                                                                              
spr – car
            32
                  0.052
               37
                0.058
 1
1) regionB p distance x                   x         
> x <‐ c(x         ) 

2) regionC   p distance y                 y             
> y <‐ c(y           ) 

3)                                x   y                         

4) plot()                
> plot(x,y,xlim=c(            ,       ), ylim=c(           ,       )) 
 1            
1) regionB p distance x                          x        
> x <‐ c(0.010, 0.010, 0.013, 0.026, 0.049, 0, 0.006 ,0.023, 0.045, 0.006 , 0.023 , 0.045, 
0.023, 0.045, 0.052 ) 

2) regionC p distance y                          y          
> y <‐ c(0.011, 0.011, 0.013, 0.022, 0.061, 0, 0.002, 0.027, 0.059, 0.002, 0.027, 0.059, 
0.028, 0.061, 0.058 ) 

3) max()                                            x    y                                   
> max(x) 
[1] 0.052  

> max(y) 
[1] 0.061 

4) plot()                  
> plot(x,y,xlim=c(0,0.065), ylim=c(0,0.065)) 
2
4’)                                              
> plot(x,y,xlab='regionB',ylab='regionC', xlim=c(0,0.065), ylim=c(0,0.065)) 




regionB regionC    p distance                                 
regionB regionC                                                                 
                         
                                               
1
                                                           
                                                    (d)            
                                             

                                                 
                                                               


                      (x3,y3)

                      d3
                                 d4
(x1,y1)
                    (x4,y4)
    d1
        d2

           (x2,y2)
2
                                                                                  

                                          
                           



                                                                          
                                                                              



> xdev <‐ (x‐mean(x))    # x                    
> ydev <‐ (y‐mean(y))    # y                    
> bmul<‐ xdev*ydev       # x y                      
> bnum <‐ sum(bmul)      # x y                                  (   ) 
> bsqu <‐ xdev^2          # x                2  
> bden <‐ sum(bsqu)      #  x                2         (   ) 
> b <‐ bnum/bden         #      (   ) 
> b 
[1] 1.317939 
3
                                                  
                                              
                                 




> a1 <‐ sum(y)/length(y) 
> a2 <‐ b*(sum(x)/length(x)) 
> a <‐ a1‐a2 
[1] ‐0.003636326 


> abline (a,b)   
#a b                      y = a + bx      
regionB regionC                             y=x                     
                           regionB regionC              
          y=‐0.0036+1.3x                            
(y=‐0.0036+1.3x   y=x                                  ) 

      regionB regionC                           
                                                             
 
p distance                                             1
‐pdis_line.R‐   

library("Biostrings"); 
 x <‐""
dom <‐ "[domesgcus              ]"; 
mol <‐ "[molossinus            ]"; 
cas <‐ "[castaneus          ]"; 
mus <‐ "[musculus            ]"; 
spr <‐ "[spretus         ]"; 
car <‐ "[caroli        ]";
 
seqs     <‐ c(dom,mol,cas,mus,spr,car);
seqnames <‐ c("dom","mol","cas","mus","spr","car");
nseqs <‐ length(seqs);
npoints <‐ length(x); 
x = vector(length=npoints); 
y = vector(length=npoints); 
k = 0;  
                    
 
p distance                                        2
for (i1 in 1:(nseqs‐1)){
  for (i2 in (i1+1):nseqs ){
    k = k + 1; 
#    cat(sprint("%d %dn",i1,i2));
    seq1 = DNAString(seqs[i1]);
    seq2 = DNAString(seqs[i2]);
    seq_b1 = substring( seq1, 1,   617 );
    seq_c1 = substring( seq1, 618, 1256 );
    seq_b2 = substring( seq2, 1,   617 );
    seq_c2 = substring( seq2, 618, 1256 );
    len_b  = length( seq_b1 );
    cmp_b  = c(compareStrings(seq_b1,seq_b2));
    sub_b  = gsub("(['?'])","",cmp_b);
    subt_b = DNAString(sub_b);
    dif_b  = length(subt_b);
    n_b    = len_b ‐ dif_b;
    pdis_b = n_b / len_b; 

                  
 
p distance                                                 3
‐pdis_line.R‐ 

 x[k]   = pdis_b;
     len_c  = length( seq_c1 );
     cmp_c  = c(compareStrings(seq_c1,seq_c2));
     sub_c  = gsub("(['?'])","",cmp_c);
     subt_c = DNAString(sub_c);
     dif_c  = length(subt_c);
     n_c    = len_c ‐ dif_c;
     pdis_c = n_c / len_c;
     y[k]   = pdis_c;
     cat(sprint('%s %s %d %g %gn',seqnames[i1],seqnames[i2],k,pdis_b,pdis_c)); 
   }
 }
 xdev <‐ x‐mean(x);
 ydev <‐ y‐mean(y);
 b    <‐ sum(xdev*ydev)/sum(xdev*xdev);
 a    <‐ mean(y) ‐ b*mean(x);
  
 cat(sprint('a=%g, b=%gn',a,b));
 
p distance                                           4
pdis_line.R         
                                                           




         R                    
                                              
                
(                      bin            path                    ) 

                        (p distance              )                  
 
                     
 

         

Más contenido relacionado

Destacado

100513_homology_search(ensembl)
100513_homology_search(ensembl)100513_homology_search(ensembl)
100513_homology_search(ensembl)ocha_kaneko
 
100610_blastclustalw
100610_blastclustalw100610_blastclustalw
100610_blastclustalwocha_kaneko
 
100701_statistics3
100701_statistics3100701_statistics3
100701_statistics3ocha_kaneko
 
100617_statistics1
100617_statistics1100617_statistics1
100617_statistics1ocha_kaneko
 

Destacado (6)

100513_homology_search(ensembl)
100513_homology_search(ensembl)100513_homology_search(ensembl)
100513_homology_search(ensembl)
 
100520_dotplot
100520_dotplot100520_dotplot
100520_dotplot
 
090601-dotplot
090601-dotplot090601-dotplot
090601-dotplot
 
100610_blastclustalw
100610_blastclustalw100610_blastclustalw
100610_blastclustalw
 
100701_statistics3
100701_statistics3100701_statistics3
100701_statistics3
 
100617_statistics1
100617_statistics1100617_statistics1
100617_statistics1
 

Más de ocha_kaneko

100506-unix-ensembl
100506-unix-ensembl100506-unix-ensembl
100506-unix-ensemblocha_kaneko
 
100422-intro,setup
100422-intro,setup100422-intro,setup
100422-intro,setupocha_kaneko
 
090622_blast-clustalw
090622_blast-clustalw090622_blast-clustalw
090622_blast-clustalwocha_kaneko
 
090615-TogoWS SOAP
090615-TogoWS SOAP090615-TogoWS SOAP
090615-TogoWS SOAPocha_kaneko
 
090608-TogoWS REST
090608-TogoWS REST090608-TogoWS REST
090608-TogoWS RESTocha_kaneko
 
090518_unix-ensembl
090518_unix-ensembl090518_unix-ensembl
090518_unix-ensemblocha_kaneko
 
090511-intro, setup
090511-intro, setup090511-intro, setup
090511-intro, setupocha_kaneko
 

Más de ocha_kaneko (8)

100506-unix-ensembl
100506-unix-ensembl100506-unix-ensembl
100506-unix-ensembl
 
100422-intro,setup
100422-intro,setup100422-intro,setup
100422-intro,setup
 
Statistics_R
Statistics_RStatistics_R
Statistics_R
 
090622_blast-clustalw
090622_blast-clustalw090622_blast-clustalw
090622_blast-clustalw
 
090615-TogoWS SOAP
090615-TogoWS SOAP090615-TogoWS SOAP
090615-TogoWS SOAP
 
090608-TogoWS REST
090608-TogoWS REST090608-TogoWS REST
090608-TogoWS REST
 
090518_unix-ensembl
090518_unix-ensembl090518_unix-ensembl
090518_unix-ensembl
 
090511-intro, setup
090511-intro, setup090511-intro, setup
090511-intro, setup
 

Último

How to setup Pycharm environment for Odoo 17.pptx
How to setup Pycharm environment for Odoo 17.pptxHow to setup Pycharm environment for Odoo 17.pptx
How to setup Pycharm environment for Odoo 17.pptxCeline George
 
Unit 3 Emotional Intelligence and Spiritual Intelligence.pdf
Unit 3 Emotional Intelligence and Spiritual Intelligence.pdfUnit 3 Emotional Intelligence and Spiritual Intelligence.pdf
Unit 3 Emotional Intelligence and Spiritual Intelligence.pdfDr Vijay Vishwakarma
 
ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.MaryamAhmad92
 
Sensory_Experience_and_Emotional_Resonance_in_Gabriel_Okaras_The_Piano_and_Th...
Sensory_Experience_and_Emotional_Resonance_in_Gabriel_Okaras_The_Piano_and_Th...Sensory_Experience_and_Emotional_Resonance_in_Gabriel_Okaras_The_Piano_and_Th...
Sensory_Experience_and_Emotional_Resonance_in_Gabriel_Okaras_The_Piano_and_Th...Pooja Bhuva
 
Python Notes for mca i year students osmania university.docx
Python Notes for mca i year students osmania university.docxPython Notes for mca i year students osmania university.docx
Python Notes for mca i year students osmania university.docxRamakrishna Reddy Bijjam
 
Key note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfKey note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfAdmir Softic
 
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...Nguyen Thanh Tu Collection
 
This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.christianmathematics
 
Google Gemini An AI Revolution in Education.pptx
Google Gemini An AI Revolution in Education.pptxGoogle Gemini An AI Revolution in Education.pptx
Google Gemini An AI Revolution in Education.pptxDr. Sarita Anand
 
REMIFENTANIL: An Ultra short acting opioid.pptx
REMIFENTANIL: An Ultra short acting opioid.pptxREMIFENTANIL: An Ultra short acting opioid.pptx
REMIFENTANIL: An Ultra short acting opioid.pptxDr. Ravikiran H M Gowda
 
On_Translating_a_Tamil_Poem_by_A_K_Ramanujan.pptx
On_Translating_a_Tamil_Poem_by_A_K_Ramanujan.pptxOn_Translating_a_Tamil_Poem_by_A_K_Ramanujan.pptx
On_Translating_a_Tamil_Poem_by_A_K_Ramanujan.pptxPooja Bhuva
 
Beyond_Borders_Understanding_Anime_and_Manga_Fandom_A_Comprehensive_Audience_...
Beyond_Borders_Understanding_Anime_and_Manga_Fandom_A_Comprehensive_Audience_...Beyond_Borders_Understanding_Anime_and_Manga_Fandom_A_Comprehensive_Audience_...
Beyond_Borders_Understanding_Anime_and_Manga_Fandom_A_Comprehensive_Audience_...Pooja Bhuva
 
Micro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdfMicro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdfPoh-Sun Goh
 
Basic Civil Engineering first year Notes- Chapter 4 Building.pptx
Basic Civil Engineering first year Notes- Chapter 4 Building.pptxBasic Civil Engineering first year Notes- Chapter 4 Building.pptx
Basic Civil Engineering first year Notes- Chapter 4 Building.pptxDenish Jangid
 
General Principles of Intellectual Property: Concepts of Intellectual Proper...
General Principles of Intellectual Property: Concepts of Intellectual  Proper...General Principles of Intellectual Property: Concepts of Intellectual  Proper...
General Principles of Intellectual Property: Concepts of Intellectual Proper...Poonam Aher Patil
 
On National Teacher Day, meet the 2024-25 Kenan Fellows
On National Teacher Day, meet the 2024-25 Kenan FellowsOn National Teacher Day, meet the 2024-25 Kenan Fellows
On National Teacher Day, meet the 2024-25 Kenan FellowsMebane Rash
 
Towards a code of practice for AI in AT.pptx
Towards a code of practice for AI in AT.pptxTowards a code of practice for AI in AT.pptx
Towards a code of practice for AI in AT.pptxJisc
 
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...ZurliaSoop
 
Salient Features of India constitution especially power and functions
Salient Features of India constitution especially power and functionsSalient Features of India constitution especially power and functions
Salient Features of India constitution especially power and functionsKarakKing
 

Último (20)

How to setup Pycharm environment for Odoo 17.pptx
How to setup Pycharm environment for Odoo 17.pptxHow to setup Pycharm environment for Odoo 17.pptx
How to setup Pycharm environment for Odoo 17.pptx
 
Unit 3 Emotional Intelligence and Spiritual Intelligence.pdf
Unit 3 Emotional Intelligence and Spiritual Intelligence.pdfUnit 3 Emotional Intelligence and Spiritual Intelligence.pdf
Unit 3 Emotional Intelligence and Spiritual Intelligence.pdf
 
Mehran University Newsletter Vol-X, Issue-I, 2024
Mehran University Newsletter Vol-X, Issue-I, 2024Mehran University Newsletter Vol-X, Issue-I, 2024
Mehran University Newsletter Vol-X, Issue-I, 2024
 
ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.
 
Sensory_Experience_and_Emotional_Resonance_in_Gabriel_Okaras_The_Piano_and_Th...
Sensory_Experience_and_Emotional_Resonance_in_Gabriel_Okaras_The_Piano_and_Th...Sensory_Experience_and_Emotional_Resonance_in_Gabriel_Okaras_The_Piano_and_Th...
Sensory_Experience_and_Emotional_Resonance_in_Gabriel_Okaras_The_Piano_and_Th...
 
Python Notes for mca i year students osmania university.docx
Python Notes for mca i year students osmania university.docxPython Notes for mca i year students osmania university.docx
Python Notes for mca i year students osmania university.docx
 
Key note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfKey note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdf
 
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
TỔNG ÔN TẬP THI VÀO LỚP 10 MÔN TIẾNG ANH NĂM HỌC 2023 - 2024 CÓ ĐÁP ÁN (NGỮ Â...
 
This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.This PowerPoint helps students to consider the concept of infinity.
This PowerPoint helps students to consider the concept of infinity.
 
Google Gemini An AI Revolution in Education.pptx
Google Gemini An AI Revolution in Education.pptxGoogle Gemini An AI Revolution in Education.pptx
Google Gemini An AI Revolution in Education.pptx
 
REMIFENTANIL: An Ultra short acting opioid.pptx
REMIFENTANIL: An Ultra short acting opioid.pptxREMIFENTANIL: An Ultra short acting opioid.pptx
REMIFENTANIL: An Ultra short acting opioid.pptx
 
On_Translating_a_Tamil_Poem_by_A_K_Ramanujan.pptx
On_Translating_a_Tamil_Poem_by_A_K_Ramanujan.pptxOn_Translating_a_Tamil_Poem_by_A_K_Ramanujan.pptx
On_Translating_a_Tamil_Poem_by_A_K_Ramanujan.pptx
 
Beyond_Borders_Understanding_Anime_and_Manga_Fandom_A_Comprehensive_Audience_...
Beyond_Borders_Understanding_Anime_and_Manga_Fandom_A_Comprehensive_Audience_...Beyond_Borders_Understanding_Anime_and_Manga_Fandom_A_Comprehensive_Audience_...
Beyond_Borders_Understanding_Anime_and_Manga_Fandom_A_Comprehensive_Audience_...
 
Micro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdfMicro-Scholarship, What it is, How can it help me.pdf
Micro-Scholarship, What it is, How can it help me.pdf
 
Basic Civil Engineering first year Notes- Chapter 4 Building.pptx
Basic Civil Engineering first year Notes- Chapter 4 Building.pptxBasic Civil Engineering first year Notes- Chapter 4 Building.pptx
Basic Civil Engineering first year Notes- Chapter 4 Building.pptx
 
General Principles of Intellectual Property: Concepts of Intellectual Proper...
General Principles of Intellectual Property: Concepts of Intellectual  Proper...General Principles of Intellectual Property: Concepts of Intellectual  Proper...
General Principles of Intellectual Property: Concepts of Intellectual Proper...
 
On National Teacher Day, meet the 2024-25 Kenan Fellows
On National Teacher Day, meet the 2024-25 Kenan FellowsOn National Teacher Day, meet the 2024-25 Kenan Fellows
On National Teacher Day, meet the 2024-25 Kenan Fellows
 
Towards a code of practice for AI in AT.pptx
Towards a code of practice for AI in AT.pptxTowards a code of practice for AI in AT.pptx
Towards a code of practice for AI in AT.pptx
 
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Hongkong ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
 
Salient Features of India constitution especially power and functions
Salient Features of India constitution especially power and functionsSalient Features of India constitution especially power and functions
Salient Features of India constitution especially power and functions
 

100624_statistics2

  • 1. 2010/06/24   kaneko.satoko(at)ocha.ac.jp   
  • 2.     Bioconductor(Biostrings)     (p distance)      
  • 3. Bioconductor Biostrings   Biostrings  > source("h>p://www.bioconductor.org/biocLite.R")  > biocLite(“Biostrings”)    # 1   > library(Biostrings)   # R  
  • 4. Bioconductor/Biostrings  1  > ls(“package:Biostrings”)    #Biostrings   > x <‐ "CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT"  > DNAString(x)   #DNA     54‐le>er "DNAString" instance  seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  > s <‐ DNAString(x)    # s DNA (x)   > length(s)    [1] 54     # s  DNA 54  > length(x)    [1] 1    # x 1 
  • 5. Bioconductor/Biostrings  2   54‐le>er "DNAString" instance  seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  > alphabetFrequency(s, baseOnly=TRUE)  #         A  C  G  T other  [1,] 12 12 15 15     0  > reverseComplement(s)   #     54‐le>er "DNAString" instance  seq: AGCATCGATCAGCTAGCATCGATCAGCTAGCTAGCTAGCTAGCTACTACGTACG  > dna2rna(s)      #RNA (T ‐>U)    54‐le>er "RNAString" instance  seq: CGUACGUAGUAGCUAGCUAGCUAGCUAGCUGAUCGAUGCUAGCUGAUCGAUGCU 
  • 6. Bioconductor/Biostrings  3   54‐le>er "DNAString" instance  seq: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  > m1 <‐ matchPa>ern(“GCTA”, s)  #   > m1    Views on a 54‐le>er DNAString subject  subject: CGTACGTAGTAGCTAGCTAGCTAGCTAGCTGATCGATGCTAGCTGATCGATGCT  views:      start end width  [1]    12  15     4 [GCTA]  [2]    16  19     4 [GCTA]  [3]    20  23     4 [GCTA]  [4]    24  27     4 [GCTA]  [5]    38  41     4 [GCTA] 
  • 7. 1 Makorin1  22.61kb  (CDS:1446bp)  Makorin1‐p1  1592bp  region A   region B   region C  Makorin1‐p1 Makorin1 processed pseudogene   Makorin1‐p1 regionB Makorin1 regionB   Makorin1  mRNA regionB mRNA   regionC     regionB     regionC   regionB   regionB regionC    
  • 8. 1 Makorin1  22.61kb  (CDS:1446bp)  Makorin1‐p1  1592bp  region A   region B   region C    1) Makorin1‐p1 Makorin1   2)  Makorin1 mRNA       Makorin1‐p1 Makorin1 Makorin1‐p1       Makorin1   3) Makorin1 mRNA        Makorin1‐p1 (regionB   )    regionB regionC      
  • 9.  – 1‐   (Null hypothesis)          
  • 10.  – 2‐ 2   False negagve  False posigve  
  • 11. 2 Makorin1‐p1   Makorin1‐p1 ortholog rat   Makorin1‐p1   Mus musculus domes3cus 5   Subgenus M. booduga M. fragilicauda India+Lao/Thai booduga M. terricolor 1.5 M. macedonicus 4.3 mya M. spicilegus mya M. spretus M. musculus castaneus Palearctic Mus musculus M. m. domesticus M. m. molossinus M. caroli M. cookii Southeast Asia M. cervicolor cervicolor M. pahari Coelomys M. mattheyi Nannomys M. platythrix Pyromys Apodemus agrarius Micromys minutus Rattus norvegicus (from Suzuki et al. 2004 Mol. Phylogenet. Evol. 0.01 33:626-646, Figure 1, 4.)
  • 12. 3   Makorin1‐p1.fasta    Mus musculus domes3cus dom [Macintosh HD/ /tg03/bin] Mus musculus molossinus mol   Mus musculus castaneus cas Makorin1‐p1.fasta   Mus musculus musculus mus regionB 1‐617, regionC 618‐1256   Mus spretus spr Mus caroli car p distance   2 (number of differences)/   alignment   dom CCTGCCCCAA ATGTCAGATC ACATCTCACT TTGTCATTCC AAGTAATCAC TGGGTGGAGT spr1 .......... ...C...... .......... .......... ......GT.. .......... car1 .......... ...C.GA... ......A... ..T....... ....G.GT.. .........G dom‐spr1: 3/60 = 0.05  dom‐car1: 9/60 = 0.15  spr1‐car1: 6/60 =  0.10   
  • 13. region B region C regionB regionC number of differences p distance   region B                          bp region C                           bp pair  number of differences p distance number of differences p distance dom – mol dom – cas dom – mus dom – spr dom – car mol ‐ cas mol – mus mol – spr mol – car cas – mus cas – spr cas – car mus – spr  mus – car spr – car
  • 14. region B region C Makorin1‐p1 (region B, regionC)   p distance     1) domesgcus Biostrings DNAstring   2) B   3) B   4) C   5) C   6) domesgcus DNAstring B C   7)    8)  4  
  • 15. Biostrings p distance 1 library(Biostrings)  #R   #Makorin1‐p1.fasta domesgcus ””   # DNA dom   > dom <‐ DNAString("")  #dom 1 617 domB   > domB <‐ substring(dom,1,617)  #domB lengthB (p distance )   > lengthB <‐ length(domB)  #dom 618 1256 domC   > domC <‐ substring(dom, 618,1256)  #domC lengthC (p distance )   > lengthC <‐ length(domC)  #   # lengthB   > lengthB  [1] 617 
  • 16. Biostrings p distance 2 #Makorin1‐p1.fasta molossinus ””   # DNA mol   > mol <‐ DNAString("")  #mol 1 617 molB   > molB <‐ substring(mol,1,617)  #mol 618 1256 molC   > molC <‐ substring(mol, 618,1256)  # castaneus(cas), musculus(mus), spretus(spr), caroli(car)  # number of differences p distance   #  
  • 17. Biostrings p distance 3 > x <‐ domB  > y <‐ molB  # x y( domB molB)   >  comp<‐ c(compareStrings(x,y))  # ? ?   > subt <‐ gsub("(['?'])", "", comp)  #subt DNA ide   > ide <‐ DNAString(subt)  #ide len   > len <‐ length(ide)  #x y dif   > dif <‐ (lengthB – len )  > dif   #x y   #regionB  p distance   > pdis <‐ dif/lengthB  > pdis  #p distance 4  
  • 18. Biostrings p distance 4 CotEditor pdistanceB.R   [Macintosh HD/ /tg03/bin]   comp <‐ c(compareStrings(x,y))  2 lengthB lengthC   subt <‐ gsub("(['?'])", "", comp)  pdistanceC.R bin   ide <‐ DNAString(subt)  len <‐ length(ide)  dif <‐ (lengthB ‐ len)  pdis <‐ dif/lengthB  x y   > x <‐  > y <‐  > source("/Users/tg03/bin/pdistanceB.R")  bin   > source("pdistanceB.R")  2   > dif  > pdis 
  • 19. region B region C ( ) region B                    617  bp region C                 639  bp pair  number of differences p distance number of differences p distance dom – mol 6 0.010 7   0.011  dom – cas 6 0.010 7 0.011  dom – mus 8 0.013 8 0.013  dom – spr 16 0.026 14 0.022  dom – car 30 0.049 39 0.061  mol – cas 0 0 0 0  mol – mus 4 0.006 1 0.002  mol – spr 14 0.023 17 0.027   mol – car 28 0.045 38 0.059 cas – mus 4 0.006 1 0.002  cas – spr 14 0.023 17 0.027 cas – car 28 0.045 38 0.059 mus – spr  14 0.023 18 0.028 mus – car 28 0.045 39 0.061 spr – car 32 0.052 37 0.058
  • 20.  1 1) regionB p distance x x   > x <‐ c(x )  2) regionC p distance y y   > y <‐ c(y )  3) x y   4) plot()   > plot(x,y,xlim=c( , ), ylim=c( , )) 
  • 21.  1  1) regionB p distance x x   > x <‐ c(0.010, 0.010, 0.013, 0.026, 0.049, 0, 0.006 ,0.023, 0.045, 0.006 , 0.023 , 0.045,  0.023, 0.045, 0.052 )  2) regionC p distance y y   > y <‐ c(0.011, 0.011, 0.013, 0.022, 0.061, 0, 0.002, 0.027, 0.059, 0.002, 0.027, 0.059,  0.028, 0.061, 0.058 )  3) max() x y   > max(x)  [1] 0.052   > max(y)  [1] 0.061  4) plot()   > plot(x,y,xlim=c(0,0.065), ylim=c(0,0.065)) 
  • 22. 2 4’)    > plot(x,y,xlab='regionB',ylab='regionC', xlim=c(0,0.065), ylim=c(0,0.065))  regionB regionC p distance   regionB regionC      
  • 23. 1   (d)         (x3,y3) d3 d4 (x1,y1) (x4,y4) d1 d2 (x2,y2)
  • 24. 2           > xdev <‐ (x‐mean(x))  # x   > ydev <‐ (y‐mean(y))  # y   > bmul<‐ xdev*ydev  # x y   > bnum <‐ sum(bmul)  # x y ( )  > bsqu <‐ xdev^2     # x 2   > bden <‐ sum(bsqu)  #  x 2 ( )  > b <‐ bnum/bden    #      ( )  > b  [1] 1.317939 
  • 25. 3       > a1 <‐ sum(y)/length(y)  > a2 <‐ b*(sum(x)/length(x))  > a <‐ a1‐a2  [1] ‐0.003636326  > abline (a,b)    #a b y = a + bx   
  • 26. regionB regionC y=x   regionB regionC   y=‐0.0036+1.3x   (y=‐0.0036+1.3x y=x )  regionB regionC    
  • 27.   p distance 1 ‐pdis_line.R‐    library("Biostrings");   x <‐"" dom <‐ "[domesgcus ]";  mol <‐ "[molossinus ]";  cas <‐ "[castaneus ]";  mus <‐ "[musculus ]";  spr <‐ "[spretus ]";  car <‐ "[caroli ]";   seqs     <‐ c(dom,mol,cas,mus,spr,car); seqnames <‐ c("dom","mol","cas","mus","spr","car"); nseqs <‐ length(seqs); npoints <‐ length(x);  x = vector(length=npoints);  y = vector(length=npoints);  k = 0;    
  • 28.   p distance 2 for (i1 in 1:(nseqs‐1)){   for (i2 in (i1+1):nseqs ){     k = k + 1;  #    cat(sprint("%d %dn",i1,i2));     seq1 = DNAString(seqs[i1]);     seq2 = DNAString(seqs[i2]);     seq_b1 = substring( seq1, 1,   617 );     seq_c1 = substring( seq1, 618, 1256 );     seq_b2 = substring( seq2, 1,   617 );     seq_c2 = substring( seq2, 618, 1256 );     len_b  = length( seq_b1 );     cmp_b  = c(compareStrings(seq_b1,seq_b2));     sub_b  = gsub("(['?'])","",cmp_b);     subt_b = DNAString(sub_b);     dif_b  = length(subt_b);     n_b    = len_b ‐ dif_b;     pdis_b = n_b / len_b;   
  • 29.   p distance 3 ‐pdis_line.R‐  x[k]   = pdis_b;     len_c  = length( seq_c1 );     cmp_c  = c(compareStrings(seq_c1,seq_c2));     sub_c  = gsub("(['?'])","",cmp_c);     subt_c = DNAString(sub_c);     dif_c  = length(subt_c);     n_c    = len_c ‐ dif_c;     pdis_c = n_c / len_c;     y[k]   = pdis_c;     cat(sprint('%s %s %d %g %gn',seqnames[i1],seqnames[i2],k,pdis_b,pdis_c));    } } xdev <‐ x‐mean(x); ydev <‐ y‐mean(y); b    <‐ sum(xdev*ydev)/sum(xdev*xdev); a    <‐ mean(y) ‐ b*mean(x);   cat(sprint('a=%g, b=%gn',a,b));
  • 30.   p distance 4 pdis_line.R      R       ( bin path )  (p distance )  
  • 31.