Extract a block of text where the final expression depends on the initial

Question

Extract a block of text where the final expression depends on the initial

I have a text string structured as follows:

= Some Heading (1)

Some text

== Some Sub-Heading (2)

Some more text

=== Some Sub-sub-heading (3)

Some details here

= Some other Heading (4)

I want to extract the contents of the second header, including any subsection. I do not know in advance what the depth of the second heading is, so I need to match the next heading from there, which has the same depth, or shallower, or the end of the line.

In the above example, this will give:

== Some Sub-Heading (2)

Some more text

=== Some Sub-sub-heading (3)

Some details here

This is where I am stuck. How can I use a consistent subexpression by opening the second header as part of the subexpression to close the section.

+3

regex perl

Mathieu dd Feb 02 '09 at 18:56

source share

5 answers

#!/usr/bin/perl

my $all_lines = join "", <>;

# match a Heading that ends with (2) and read everything between that match
# and the next heading of the same depth (\1 matches the 1st matched group)
if ( $all_lines =~ /(=+ Heading )\([2]\)(.*?)\1/s ) {
    print "$2";
}

0

codelogic 02 . '09 19:28

:

my @all = split /(?=^= )/m, join "", <$filehandle>;
shift @all;

0

Leon Timmermans 02 . '09 20:27

daotoad jrockway . , , , .

, , , . . , .

#!/usr/bin/perl

my $all_lines = join "", <>;
# match a Heading that ends with (2) and read everything between that match
# and the next heading of the same depth (\2 matches the 2nd parenthesized group)
if ( $all_lines =~ m/((=+) [^\n]*\(2\)(.*?))(\n\2 |\z)/s ) {
    # then trim it down to just the point before any heading at lesser depth
    my $some_lines = $1;
    my $depth = length($2);
    if ($some_lines =~ m/(.*?)(\n={1,$depth} |\z)/s) {
        print "$1\n";
    }
}

- - !

0

Liudvikas Bukys 02 . '09 22:05

:

/^(?>(=+).*\(2\))(?>[\r\n]+(?=\1=|[^=]).*)*/m

, , , , . , lookahead , , . , .

0

Alan Moore Feb 03 '09 at 13:48

source share

daotoad · Accepted Answer · 2009-02-02T21:03:30+0000

. .

. . CPAN .

#!/usr/bin/perl

use strict;
use warnings;

my $document = Node->new();
my $current = $document;

while ( my $line = <DATA> ) {

    if ( $line =~ /^=+\s/ ) {

        my $current_depth = $current->depth;
        my $line_depth = Node->Heading_Depth( $line );

        if ( $line_depth > $current_depth ) {
            # child node.
            my $line_node = Node->new();
            $line_node->heading( $line );
            $line_node->parent( $current );
            $current->add_children( $line_node );
            $current = $line_node;
        }
        else {

            my $line_node = Node->new();
            while ( my $parent = $current->parent ) {

                if ( $line_depth == $current_depth ) {
                    # sibling node.
                    $line_node->heading( $line );
                    $line_node->parent( $parent );
                    $current = $line_node;
                    $parent->add_children( $current );

                    last;
                }

                # step up one level.
                $current = $parent;
            }
        }

    }
    else {
        $current->add_children( $line );
    }


}

use Data::Dumper;
print Dumper $document;

BEGIN {
    package Node;
    use Scalar::Util qw(weaken blessed );

    sub new {
        my $class = shift;

        my $self = {
            children => [],
            parent   => undef,
            heading  => undef,
        };

        bless $self, $class;
    }

    sub heading {
        my $self = shift;
        if ( @_ ) {
            $self->{heading} = shift;
        }
        return $self->{heading};
    }

    sub depth {
        my $self = shift;

        return $self->Heading_Depth( $self->heading );
    }

    sub parent {
        my $self = shift;
        if ( @_ ) {
            $self->{parent} = shift;
            weaken $self->{parent};
        }
        return $self->{parent};
    }

    sub children {
        my $self = shift;
        return @{ $self->{children} || [] };
    }

    sub add_children {
        my $self = shift;
        push @{$self->{children}}, @_;
    }

    sub stringify {
        my $self = shift;

        my $text = $self->heading;
        foreach my $child ( $self->children ) {
            no warnings 'uninitialized';
            $text .= blessed($child) ? $child->stringify : $child;
        }

        return $text;
    }

    sub Heading_Depth {
        my $class  = shift;
        my $heading = shift || '';

        $heading =~ /^(=*)/;
        my $depth = length $1;


        return $depth;
    }

}

__DATA__
= Heading (1)

Some text

= Heading (2)

Some more text

== Subheading (3)

Some details here

== Subheading (3)

Some details here

= Heading (4)

Extract a block of text where the final expression depends on the initial

More articles: