#This code accompanies the paper Quicke, Jones & Epstein (2007) Correcting the problem of false incongruence due to noise imbalance in the incongruence length difference (ILD) test #Systematic Biology, Volume 56, Issue 3 May 2007 , pages 496 - 503 #The code was written by Owen R. Jones (owen.jones@imperial.ac.uk) # # #This code is for generating Nexus files to serve as PAUP inputs. #In this case the noise is added to the end of the matrix. #There is a function (genNex.addition()) which is implemented in the last few lines of this file. genNex.addition<-function(path1, path2, numpseudo=5,numildreps=20,cong="INC",HSREPS=100){ #Define the names of the output files paupfile<-paste("paup8",cong,"ILD",numildreps,path1,path2,sep="_") paramfile<-paste("param8",cong,"ILD",numildreps,path1,path2,sep="_") helpfile<-paste("help8",cong,"ILD",numildreps,path1,path2,sep="_") write(file=paramfile,paste(numpseudo,"add",HSREPS,numildreps,cong,sep="\t") ,append=F) write(file=paupfile,paste("[***START FUNCTION***]"), append=F) write(file=helpfile,paste("pseudorep", "ildrep", "noiselev", "part", "propnoise"),append=F) # reading the two data partitions # skip the first row (the row of zeros) test1<-data.matrix(read.table(path1,as.is=TRUE,skip=1)) #1st partition test2<-data.matrix(read.table(path2,as.is=TRUE,skip=1)) #2nd partition #get the zero rows test1.zerorow<-data.matrix(read.table(path1,as.is=TRUE,nrows=1)) #1st partition test2.zerorow<-data.matrix(read.table(path2,as.is=TRUE,nrows=1)) #2nd partition originalcombined<-cbind(test1,test2) ntax<-nrow(test1)+1 nchar1<-ncol(test1) nchar2<-ncol(test2) tot.chars<-nchar1+nchar2 # total number of characters charlist<-1:tot.chars # this little block creates names for the taxa of the form 10A, 11A .. taxnames<-paste("A",array(10:(10+ntax-1)),sep="") #START OF THE NEXUS FILE # OUTPUTLOG 4 #How much noise to add. signal<-nchar2 #we want to add noise in these proportions noiseprop<-noiseprop<-c(seq(.10,.90,.10),.995) total<-signal/(1-noiseprop) added<-total-signal #therefore we add this many extra characters, selected at random from within the original added<-ceiling(added) propnoisy<-1-(nchar2/(added+nchar2)) write(file=paramfile,paste("Noiselevels="),append=T) write(file=paramfile,paste(added),append=T) write(file=paramfile,paste("PropNoise="),append=T) write(file=paramfile,paste(propnoisy),append=T) charvect<-1:nchar2 write(file=paupfile,paste("#NEXUS\n"), append=T) write(file=paupfile,paste(" BEGIN PAUP;\ set autoclose=yes\ notifybeep=no\ showexcluded=no;\ LOG file=",paste("!outputlog4_8",cong,path1,path2,sep="_"),"\ start=yes append=yes;\ [logfile created]\ ENDBLOCK; "), append=T) ########################################################################################## write(file=paupfile,"[***200 reps to find the shortest MAX NOISE tree***]", append=T) # I need to repeat this for every level of noise... for (noiselev in 1:length(noiseprop)){ for (r in 1:200){ test1.noisy<-apply(test1,2,sample) write(file=paupfile,paste("[Noiselev = ",noiselev,"Rep = ",r,"of 100.]"), append=T) test2.noisy<-test2 #sample (with replacement) from the original matrix to select extra columns to add cols2add<-sample(charvect, added[noiselev], replace = TRUE, prob = NULL) test2additional<-test2[,cols2add] test2.noisy<-cbind(test2,test2additional) test2.noisy<-apply(test2.noisy,2,sample) test1.zerorow<-rep(0,dim(test1.noisy)[2]) test2.zerorow<-rep(0,dim(test2.noisy)[2]) madenoisy<-cbind(test1.noisy,test2.noisy) madenoisy<-rbind(c(test1.zerorow,test2.zerorow),madenoisy) madenoisynames<-cbind(taxnames,madenoisy) tot.chars2<-dim(madenoisy)[2] write(file=paupfile,paste( "BEGIN DATA;\ DIMENSIONS NCHAR=",tot.chars2," NTAX=",ntax, ";\n FORMAT SYMBOLS=\"0 1\";\n\n MATRIX"), append=T) ##################################################### write.table(file=paupfile,madenoisynames, col.names=F, row.names=F, quote=FALSE, sep="\t", append=T) ##################################################### write(file=helpfile,paste( "999","0",noiselev,"1",propnoisy[noiselev]),append=T) write(file=helpfile,paste( "999","0",noiselev,"2",propnoisy[noiselev]),append=T) write(file=paupfile,paste(";\ ENDBLOCK;"), append=T) write(file=paupfile,paste("BEGIN PAUP;\ log stop;\ include all;\ charset part1=1-",nchar1,"; charset part2=",nchar1+1,"-",tot.chars2,"; Exclude part2;\ HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,"; [ORIGINALpartitionONE]\ Log start append;\ PSCORES 1 /TL=YES RI=YES HI=YES;\ log stop;\ cleartrees;\ include part2;\ exclude part1;\ HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,";\ [ORIGINALpartitionTWO]\ Log start append;\ PSCORES 1 /TL=YES RI=YES HI=YES;\ log stop;\ cleartrees;\ include all;"),append=T)#calculates trees for the two partitions }} write(file=paupfile,"[***starting pseudoreplicate loop***]", append=T) for(pseudorepnumber in 1:numpseudo) { #how many times to repeat the sequence of noise levels write(file=paupfile,paste("[***pseudorepnumber = ",pseudorepnumber," ***]"), append=T) write(file=paupfile,paste("[***ORIGINAL TREE**]"), append=T) nchar1<-dim(test1)[2] nchar2<-dim(test2)[2] tot.chars<-nchar1+nchar2 write(file=paupfile,paste("BEGIN DATA;\ DIMENSIONS NCHAR=",tot.chars," NTAX=",ntax,";\n\n FORMAT SYMBOLS=\"0 1\";\n\n MATRIX"), append=T) # this block does a parsimony analysis of the two original partitions ######1111########################################### originalcombined<-cbind(test1,test2) originalcombined<-rbind(rep(0,dim(originalcombined)[2]),originalcombined) originalwnames<-cbind(taxnames,originalcombined) write.table(file=paupfile,originalwnames, col.names=F, row.names=F, quote=FALSE, sep="\t", append=T) ##################################################### write(file=helpfile,paste( pseudorepnumber,"0","0","1","0"),append=T) write(file=helpfile,paste( pseudorepnumber,"0","0","2","0"),append=T) write(file=paupfile,paste(";\ ENDBLOCK;"), append=T) write(file=paupfile,paste(" BEGIN PAUP;\n\n log stop;\ include all;\ charset part1=1-",nchar1, ";\ charset part2=",nchar1+1,"-",tot.chars,";\n", "Exclude part2;\n HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,";\ [ORIGINALpartitionONE]\ Log start append;\ PSCORES 1 /TL=YES RI=YES HI=YES;\ log stop;\ cleartrees;\ include part2;\ exclude part1;\n HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,";\ [ORIGINALpartitionTWO]\ Log start append;\ PSCORES 1 / TL=YES RI=YES HI=YES;\ log stop;\ cleartrees;\ include all;\n "),append=T) write(file=paupfile,paste(";\ ENDBLOCK;"), append=T) #ILD for the original partitions if(numildreps<1){write(file=paupfile,paste("[***NO ILD TEST HAS BEEN CARRIED OUT***]"), append=T)}else{ for(ildrep in 1:numildreps){ #now need to create lists of characters for part1 and part2 write(file=paupfile,paste("[***ildrep = ",ildrep," ***]"), append=T) #shuffle the characters shufflist<-sample(tot.chars) #create the character partitions part1<-part1<-shufflist[1:nchar1] part2<-shufflist[(nchar1+1):(nchar1+nchar2)] write(file=paupfile,"[***********STARTING ILD REPLICATE] " , append=T) write(file=helpfile,paste( pseudorepnumber,ildrep,"0","1","0"),append=T) write(file=helpfile,paste( pseudorepnumber,ildrep,"0","2","0"),append=T) write(file=paupfile,paste("charset part1="),append=T) write(file=paupfile,part1,append=T) write(file=paupfile,paste("; charset part2="),append=T) write(file=paupfile,part2,append=T) write(file=paupfile,paste(" ;\ [startingILDpartONE]\ include all;\ Exclude part2;\ HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,";\ log start append;\ PSCORES 1 /TL=YES HI=YES RI=YES;\ log stop;\ cleartrees;\ include part2;\ exclude part1;\ [startingILDpartTWO]\ HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,";\ log start append;\ PSCORES 1 /TL=YES HI=YES RI=YES;\ cleartrees;\ include all;\ log stop;"),append=T) }} ##################################################### #NOW ADD NOISE SEQUENTIALLY #NOISE LEVELS for(noiselev in 1:length(added)){ # increasing noise loop #make a copy of the test matrix "test2" test2.noisy<-test2 #sample (with replacement) from the original matrix to select extra columns to add cols2add<-sample(charvect, added[noiselev], replace = TRUE, prob = NULL) test2additional<-test2.noisy[,cols2add] if(length(cols2add)==1){test2additional<-sample(test2additional)}else{ test2additional<-apply(test2additional,2,sample)} test2.noisy<-cbind(test2.noisy,test2additional) madenoisy<-cbind(test1,test2.noisy) madenoisy<-rbind(rep(0,dim(madenoisy)[2]),madenoisy) madenoisynames<-cbind(taxnames,madenoisy) tot.chars2<-dim(madenoisy)[2] nchar1<-dim(test1)[2] nchar2<-dim(test2.noisy)[2] tot.chars2<-nchar1+nchar2 write(file=paupfile,paste( "BEGIN DATA;\ DIMENSIONS NCHAR=",tot.chars2," NTAX=",ntax, ";\n FORMAT SYMBOLS=\"0 1\";\n\n MATRIX"), append=T) #####555################################################ write.table(file=paupfile,madenoisynames, col.names=F, row.names=F, quote=FALSE, sep="\t", append=T) ##################################################### write(file=helpfile,paste( pseudorepnumber,"0",noiselev,"1",propnoisy[noiselev]),append=T) write(file=helpfile,paste( pseudorepnumber,"0",noiselev,"2",propnoisy[noiselev]),append=T) write(file=paupfile,paste(";\ ENDBLOCK;"), append=T) write(file=paupfile,paste("BEGIN PAUP;\ log stop;\ include all;\ charset part1=1-",nchar1,"; charset part2=",nchar1+1,"-",tot.chars2,"; Exclude part2;\ HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,"; [ORIGINALpartitionONE]\ Log start append;\ PSCORES 1 /TL=YES RI=YES HI=YES;\ log stop;\ cleartrees;\ include part2;\ exclude part1;\ HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,";\ [ORIGINALpartitionTWO]\ Log start append;\ PSCORES 1 /TL=YES RI=YES HI=YES;\ log stop;\ cleartrees;\ include all;"),append=T) #calculates trees for the two original partitions if(numildreps<1){write(file=paupfile,paste("[***NO ILD TEST HAS BEEN CARRIED OUT***]"), append=T)}else{ for(ildrep in 1: numildreps){ #now need to create lists of characters for part1 and part2 write(file=paupfile,paste("[***ildrep = ",ildrep," ***]"), append=T) #shuffle the characters shufflist<-sample(tot.chars2) #create the character partitions part1<-part1<-shufflist[1:nchar1] part2<-shufflist[(nchar1+1):(nchar1+nchar2)] write(file=paupfile,"[***********STARTING ILD REPLICATE] " , append=T) write(file=helpfile,paste( pseudorepnumber,ildrep,noiselev,"1",propnoisy[noiselev]),append=T) write(file=helpfile,paste( pseudorepnumber,ildrep,noiselev,"2",propnoisy[noiselev]),append=T) write(file=paupfile,paste("charset part1="),append=T) write(file=paupfile,part1,append=T) write(file=paupfile,paste("; charset part2="),append=T) write(file=paupfile,part2,append=T) write(file=paupfile,paste(" ;\ [startingILDpartONE]\ include all;\ Exclude part2;\ HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,";\ log start append;\ PSCORES 1 /TL=YES HI=YES RI=YES;\ log stop;\ cleartrees;\ include part2;\ exclude part1;\ [startingILDpartTWO]\ HS START=STEPWISE ADDSEQ=RANDOM NCHUCK=1 CHUCKSCORE=1 NREPS=",HSREPS,";\ log start append;\ PSCORES 1 /TL=YES HI=YES RI=YES;\ cleartrees;\ include all;\ log stop;"),append=T) } } } # ends the stepping through extra characters to be permuted, now need to build matrix and paup file } # should be the psuudoreplicates loop } #Implementing the function... setwd("/Users/orj/Documents/Wasps/ILD_test/ILD_test_program/Simulation_files/Type_8_noise/32taxa/") incpairs<-matrix(data=NA,nrow=15,ncol=2) incpairs[,1]<-seq(1,29,2) incpairs[,2]<-seq(2,30,2) for (i in 1:15){ path1<-paste("r32tax",incpairs[i,1],sep="") path2<-paste("r32tax",incpairs[i,2],sep="") genNex.addition(path1, path2, numpseudo=5,numildreps=20,cong="INC",HSREPS=100)} incpairs<-matrix(data=NA,nrow=15,ncol=2) incpairs[,1]<-1:15 incpairs[,2]<-1:15 for (i in 1:15){ path1<-paste("r32tax",incpairs[i,1],sep="") path2<-paste("r32tax",incpairs[i,2],sep="") genNex.addition(path1, path2, numpseudo=5,numildreps=20,cong="CON",HSREPS=100)}