多线程拆分bam文件

#!perl
use warnings;
#use strict;
use threads;
use Thread::Semaphore;
use File::Basename qw(basename);

die "perl $0 <bam> <threads>\n" if @ARGV != 2;

my $semaphore = Thread::Semaphore->new($ARGV[1]);
my $id = basename($ARGV[0], ".bam");
if(-s "$ARGV[0].bai")
{
	
}else{
	`samtools index $ARGV[0]`;
}
my $outdir = "${id}_split";
mkdir $outdir;

my (%hash, $hd, $rg, $pg);
open HEAD, "samtools view -H $ARGV[0] |" or die $!;
while(<HEAD>)
{
	if(/^\@SQ/)
	{
		my ($chr) = $_ =~ /SN:(\S+)/;
		$hash{$chr} = $_;
		next;
	}
	if(/^\@HD/)
	{
		$hd .= "$_";
		next;
	}
	if(/^\@RG/)
	{
		$rg .= "$_";
		next;
	}
	if(/^\@PG/)
	{
		$pg .= "$_";
		next;
	}
}

foreach(keys %hash)
{
	$semaphore->down();
	my $thread = threads->create(\&splitchr, $_);
	$thread->detach();
}

&waitDone;

sub waitDone{
	my $num = 0;
	while($num < $ARGV[1])
	{
		$semaphore->down();
		$num ++;
	}
}

sub splitchr{
	my $chr = shift;
	open $chr, "> $outdir/$id.$chr.sam" or die $!;
	print $chr "$hd$hash{$chr}$rg$pg";
	my $content = `samtools view $ARGV[0] $chr`;
	print $chr "$content";
	close $chr;
	`samtools view -bS $outdir/$id.$chr.sam > $outdir/$id.$chr.bam`;
	`rm $outdir/$id.$chr.sam -rf`;
}

仅仅适合内存较大的集群~

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。