Looking for motive in a protein sequence?

I wrote the following script to search for a motive (substring) in a protein (string) sequence. I am starting, and writing it was hard for me. I have two questions regarding the same: 1. Errors: The following script have several errors. I was with him for a long time, but did not understand what and why? 2. To search for one motive (substring) in a protein (string) sequence, the following script was written. My next task is to search for several motifs in a specific order (for example: motif1 motif2 motif 3 motif4 this order cannot be changed) in the same protein sequences (lines)

        use strict;
        use warnings;

        my @file_data=();
        my $motif ='';
        my $protein_seq='';
        my $h= '[VLIM]';   
        my $s= '[AG]';
        my $x= '[ARNDCEQGHILKMFPSTWYV]';
        my $regexp = "($h){4}D($x){4}D"; #motif to be searched is hhhhDxxxxD
        my @locations=();

        @file_data= get_file_data("seq.txt");

        $protein_seq= extract_sequence(@file_data); 

    #searching for a motif hhhhDxxxxD in each protein sequence in the give file

        foreach my $line(@file_data){
        if ($motif=~ /$regexp/){
        print "found motif \n\n";
        }
        else {
        print "not found \n\n";
        }
        }
#recording the location/position of motif to be outputed

        @locations= match_position($regexp,$seq);
        if (@locations){ 
        print "Searching for motifs $regexp \n";
        print "Catalytic site is at location:\n";
        }
        else{
        print "motif not found \n\n";
        }
        exit;

        sub get_file_data{
        my ($filename)=@_;
        use strict;
        use warnings;
        my $sequence='';

        foreach my $line(@file_data){

        if ($line=~ /^\s*$/){
        next;
                }
        elsif ($line=~ /^\s*#/){
        next;
        }
        elsif ($line=~ /^>/){
        next;
        }
        else {
        $sequence.=$line;
        }
        }
        $sequence=~ s/\s//g;
        return $sequence;
        }

        sub(match_positions) {
        my ($regexp, $sequence)=@_;
        use strict;
        my @position=();
        while ($sequence=~ /$regexp/ig){
        push (@position, $-[0]);
        }
        return @position;
        }
+3
source share
2
  • , elsif, -, . get_file_data, :

    next if $line =~ /^\s*$|^>/; 
    $sequence .= $line;
    

    - - , . , . , , #-. : /^\s*$|^>|^#-/

  • , my position=(); @, , perl , - position().

  • :

     my $h= '[VLIM]';   
     my $s= '[AG]';
     my $x= '[ARNDCEQGHILKMFPSTWYV]';
    

    $h , , sub VLIM.

  • -, $&. pos($sequence)-length($&)+1

    push @positions, $-[0];
    

    , :

    use English qw<-no_match_vars>;
    ...
    push @positions, $LAST_MATCH_START[0];
    
  • :

    use IO::File;
    ...
    # Use real file handles
    my $fh = IO::File->new( "<seq.txt" );
    get_file_data( $fh ); # They can be passed
    ...
    sub get_file_data{
        my $file_handle = shift; 
        ...
        # while loop conserves resources
        while ( my $line = <$file_handle> ) { 
            next if $line =~ /^\s*$|^>/;
            $sequence .= $line;
        } 
    
  • - :

    . Smart::Comments

    . script:

     use Smart::Comments;
    

    . , , , , $sequence, :

    ### $sequence
    exit 0;
    

    . , .

+3
  • "elsif" "elseif".
  • , @file_data @fasta_file_data ?

match_positions:

  • .
  • " " "my @position".
  • /regexp/ig /$regexp/ig.
0

Source: https://habr.com/ru/post/1707811/


All Articles