csnagoya コンパイラを作ろうな宿題1

とりあえず素朴な実装

#!/usr/bin/env perl
# -*- mode: perl; coding: utf-8; -*-

use strict;
use warnings;
use utf8;
use open qw(:utf8);
use open qw(:std);

package Token;
use base qw(Class::Accessor::Fast);
__PACKAGE__->mk_accessors(qw(def str));
sub new {
    my $self = shift(@_);
    my ($str, $def) = @_;
    my $type = ref($self) || $self;
    $self = bless({
        def => $def,
        str => $str,
    }, $type);
    return $self;
}
1;

package Stream;
use base qw(Class::Accessor::Fast);
__PACKAGE__->mk_accessors(qw(
    get_line
    tokens
    rest
    eof
));

my $reserved = {
    MODULE  => undef,
    BEGIN   => undef,
    END     => undef,
    VAR     => undef,
    INTEGER => undef,
    STRING  => undef,
    IF      => undef,
    THEN    => undef,
    ELSE    => undef,
    WHILE   => undef,
    DO      => undef,
};

my $symbol = {
    q{+}  => q{PLUS},
    q{-}  => q{MINUS},
    q{*}  => q{MULT},
    q{/}  => q{DIV},
    q{=}  => q{EQ},
    q{<}  => q{LT},
    q{<=} => q{LE},
    q{>}  => q{GT},
    q{>=} => q{GE},
    q{<>} => q{NE},
    q{:=} => q{ASSIGN},
    q{:}  => q{COLON},
    q{;}  => q{SEMICOLON},
    q{,}  => q{COMMA},
    q{(}  => q{OPEN},
    q{)}  => q{CLOSE},
    q{.}  => q{PERIOD},
};

sub new {
    my $self = shift(@_);
    my ($get_line) = @_;
    my $type = ref($self) || $self;
    $self = bless({
        get_line => $get_line,
        tokens   => [],
        rest     => q{},
        eof      => 0,
    }, $type);
    return $self;
}

sub unget_token {
    my $self = shift(@_);
    my ($token) = @_;
    unshift(@{$self->tokens}, $token);
}

sub get_token {
    my $self = shift(@_);
    my $get_line = $self->get_line;
    my $tokens = $self->tokens;
    my $rest = $self->rest;

    if (@$tokens > 0) {
        return shift(@$tokens);
    } else {
        my $line = $get_line->();
        $rest .= $line if (defined($line));
        my ($new_tokens, $new_rest) = $self->make_token($rest);
        @$tokens = @$new_tokens;
        $self->rest($new_rest);
        return shift(@$tokens) if (@$tokens > 0);
        unless (defined($line)) {
            if (length($rest) > 0) {
                die(qq{"$rest" が解釈されずに残りました\n});
            } else {
                $self->eof(1);
                return Token->new(q{EOF}, q{EOF});
            }
        }
    }
}

sub make_token {
    my $self = shift(@_);
    my ($line) = @_;
    my $tokens = [];
    while (1) {
        if ($line =~ s/^\s+//) { # skip space
            ;
        } elsif ($line =~ s/^([a-z][a-z0-9]*)//i) { # ident or reserved
            my $str = $1;
            my $token;
            if (exists($reserved->{$str})) {
                $token = Token->new($str, $str);
            } else {
                $token = Token->new($str, q{IDENT});
            }
            push(@$tokens, $token);
        } elsif ($line =~ s/^("[^"\\]*(?:\\.[^"\\]*)*")//) { # string literal
            my $str = $1;
            my $token = Token->new($str, q{STR});
            push(@$tokens, $token);
        } elsif ($line =~ s{^(<>|[>:<]=|[-+*/=<>:;,()\.])}{}) { # open or close
            my $str = $1;
            unless (exists($symbol->{$str})) {
                die(qq{undefined symbol : $str});
            }
            my $def = $symbol->{$str};
            my $token = Token->new($str, $def);
            push(@$tokens, $token);
        } else {
            last;
        }
    }
    return ($tokens, $line);
}

1;

package Parser;
use base qw(Class::Accessor::Fast);
__PACKAGE__->mk_accessors(qw(
    stream
));

sub new {
    my $self = shift(@_);
    my ($stream) = @_;
    my $type = ref($self) || $self;
    $self = bless({
        stream => $stream,
    }, $type);
    return $self;
}

sub entry {
    my $self = shift(@_);
    my $stream = $self->stream();
    $self->program();
    $stream->rest eq q{};
}

sub program {
    my $self = shift(@_);
    my $stream = $self->stream();
    $self->xmatch(q{MODULE});
    $self->xmatch(q{IDENT});
    $self->xmatch(q{SEMICOLON});
    $self->xmatch(q{BEGIN});
    $self->statlist();
    $self->xmatch(q{END});
    $self->xmatch(q{IDENT});
    $self->xmatch(q{PERIOD});
}

sub statlist {
    my $self = shift(@_);
    my $stream = $self->stream();
    $self->statement();
    $self->statlist1();
}

sub statlist1 {
    my $self = shift(@_);
    my $stream = $self->stream();
    if ($self->match(q{SEMICOLON})) {
        $self->xmatch(q{SEMICOLON});
        $self->statement();
        $self->statlist1();
    } elsif ($self->match(q{END}) or $self->match(q{ELSE})) {
        ;
    } else {
        warn(qq{error at statlist1});
    }
}

sub statement {
    my $self = shift(@_);
    my $stream = $self->stream();
    if ($self->match(q{IDENT})) {
        $self->xmatch(q{IDENT});
        $self->statement1();
    } else {
        warn(qq{parse error - Statement});
    }
}

sub statement1 {
    my $self = shift(@_);
    my $stream = $self->stream();
    if ($self->match(q{OPEN}) ){
        $self->xmatch(q{OPEN});
        $self->literal();
        $self->xmatch(q{CLOSE});
    } else {
        warn(qq{error - statement1});
    }
}

sub literal {
    my $self = shift(@_);
    my $stream = $self->stream();
    if ($self->match(q{STR})) {
        $self->xmatch(q{STR});
    } else {
        warn(q{error - literal});
    }
}

sub match {
    my $self = shift(@_);
    my $stream = $self->stream();
    my ($str) = @_;
    my $token = $stream->get_token();
    my $ret = 0;
    if ($token->def eq $str) {
        $ret = 1;
    }
    $stream->unget_token($token);
    return $ret;
}

sub xmatch {
    my $self = shift(@_);
    my $stream = $self->stream();
    my ($str) = @_;
    my $token = $stream->get_token();
    if ($token->def eq $str) {
        printf(qq{DEF[%s]\tSTR[%s]\n}, $token->def, $token->str);
        return;
    } else {
        $stream->unget_token($token);
        warn(qq{Parse error - $str/} . $token->def);
    }
}

1;

package main;

sub main {
    my $stream = Stream->new(sub {return scalar(<>)});
    my $parser = Parser->new($stream);
#     while (not($stream->eof())) {
#         my $token = $stream->get_token();
#         printf(qq{DEF[%s]\tSTR[%s]\n}, $token->def, $token->str);
#     }
    $parser->entry();
    return 0;
}

my $ret = main();
exit($ret);

CPAN

#!/usr/bin/env perl
# -*- mode: perl; coding: utf-8; -*-

use strict;
use warnings;
use utf8;
use open qw(:utf8);
use open qw(:std);

use Parse::RecDescent;
use Data::Dumper;

package main;

sub main {
    $::RD_AUTOACTION = q{bless([@item[1..@item-1]], $item[0])};
    my $text = join(q{}, <>);
    my $grammer = <<'    EOL';
if:         'IF'
then:       'THEN'
else:       'ELSE'
while:      'WHILE'
do:         'DO'
ident:      /[a-zA-Z][a-zA-Z0-9]*/
integer:    /[0-9]+/
string:     /"[^"\\]*(?:\\.[^"\\]*)*"/

program:    'MODULE' ident ':' declist(?) 'BEGIN' statlist 'END' ident '.'
			    {print Data::Dumper->Dump([\@item]);}
declist:    'VAR' dec(s)
dec:        ident(s /,/) ':' type ';'
type:       'STRING' | 'INTEGER'
statlist:   statement(s /;/)
statement:  ident '(' expression ')'
expression: term
term:       factor
factor:     literal | '(' expression ')'
literal:    ident | integer | string
    EOL
    my $parser = Parse::RecDescent->new($grammer);
    $parser->program($text);

    return 0;
}

my $ret = main();
exit($ret);