多线程拆分bam文件
#!perl use warnings; #use strict; use threads; use Thread::Semaphore; use File::Basename qw(basename); die "perl $0 <bam> <threads>\n" if @ARGV != 2; my $semaphore = Thread::Semaphore->new($ARGV[1]); my $id = basename($ARGV[0], ".bam"); if(-s "$ARGV[0].bai") { }else{ `samtools index $ARGV[0]`; } my $outdir = "${id}_split"; mkdir $outdir; my (%hash, $hd, $rg, $pg); open HEAD, "samtools view -H $ARGV[0] |" or die $!; while(<HEAD>) { if(/^\@SQ/) { my ($chr) = $_ =~ /SN:(\S+)/; $hash{$chr} = $_; next; } if(/^\@HD/) { $hd .= "$_"; next; } if(/^\@RG/) { $rg .= "$_"; next; } if(/^\@PG/) { $pg .= "$_"; next; } } foreach(keys %hash) { $semaphore->down(); my $thread = threads->create(\&splitchr, $_); $thread->detach(); } &waitDone; sub waitDone{ my $num = 0; while($num < $ARGV[1]) { $semaphore->down(); $num ++; } } sub splitchr{ my $chr = shift; open $chr, "> $outdir/$id.$chr.sam" or die $!; print $chr "$hd$hash{$chr}$rg$pg"; my $content = `samtools view $ARGV[0] $chr`; print $chr "$content"; close $chr; `samtools view -bS $outdir/$id.$chr.sam > $outdir/$id.$chr.bam`; `rm $outdir/$id.$chr.sam -rf`; }
仅仅适合内存较大的集群~
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。