# Author: Robert Waterhouse #!/usr/local/bin/perl use strict; # list the BUSCO trimmed alignment files in busco_alns my @usc_buscos=split(/\s+/,`ls busco_alns/*.trm`); # hash to store species and eogs and aligned sequences undef my %eog2spe2seq; undef my %specs; # array to store eog and species lists my @eogs=(); # cycle through each and save data foreach my $busco (@usc_buscos) { my $eog=''; if($busco=~/(EOG\S{8})/) { $eog=$1; } open(IN,$busco) || die $!; my @alllines=; close(IN); my $fasta=join("\n",@alllines); my @eachfasta=split(/>/,$fasta); shift(@eachfasta); foreach my $fas (@eachfasta) { my @lines=split(/\n/,$fas); my $header=shift(@lines); my $spe=''; if($header=~/^(\S{6})/) { $spe=$1; } my $seq=join("",@lines); $eog2spe2seq{$eog}{$spe}=$seq; $specs{$spe}=1; } push(@eogs,$eog); } # print out concatenated sequences open(OUT,">concatenated_buscos.aln") || die $!; foreach my $spe (sort keys %specs) { print OUT ">$spe\n"; foreach my $eog (@eogs) { print OUT $eog2spe2seq{$eog}{$spe}; } print OUT "\n"; } close(OUT);