csnagoya コンパイラを作ろうな宿題1
とりあえず素朴な実装
#!/usr/bin/env perl # -*- mode: perl; coding: utf-8; -*- use strict; use warnings; use utf8; use open qw(:utf8); use open qw(:std); package Token; use base qw(Class::Accessor::Fast); __PACKAGE__->mk_accessors(qw(def str)); sub new { my $self = shift(@_); my ($str, $def) = @_; my $type = ref($self) || $self; $self = bless({ def => $def, str => $str, }, $type); return $self; } 1; package Stream; use base qw(Class::Accessor::Fast); __PACKAGE__->mk_accessors(qw( get_line tokens rest eof )); my $reserved = { MODULE => undef, BEGIN => undef, END => undef, VAR => undef, INTEGER => undef, STRING => undef, IF => undef, THEN => undef, ELSE => undef, WHILE => undef, DO => undef, }; my $symbol = { q{+} => q{PLUS}, q{-} => q{MINUS}, q{*} => q{MULT}, q{/} => q{DIV}, q{=} => q{EQ}, q{<} => q{LT}, q{<=} => q{LE}, q{>} => q{GT}, q{>=} => q{GE}, q{<>} => q{NE}, q{:=} => q{ASSIGN}, q{:} => q{COLON}, q{;} => q{SEMICOLON}, q{,} => q{COMMA}, q{(} => q{OPEN}, q{)} => q{CLOSE}, q{.} => q{PERIOD}, }; sub new { my $self = shift(@_); my ($get_line) = @_; my $type = ref($self) || $self; $self = bless({ get_line => $get_line, tokens => [], rest => q{}, eof => 0, }, $type); return $self; } sub unget_token { my $self = shift(@_); my ($token) = @_; unshift(@{$self->tokens}, $token); } sub get_token { my $self = shift(@_); my $get_line = $self->get_line; my $tokens = $self->tokens; my $rest = $self->rest; if (@$tokens > 0) { return shift(@$tokens); } else { my $line = $get_line->(); $rest .= $line if (defined($line)); my ($new_tokens, $new_rest) = $self->make_token($rest); @$tokens = @$new_tokens; $self->rest($new_rest); return shift(@$tokens) if (@$tokens > 0); unless (defined($line)) { if (length($rest) > 0) { die(qq{"$rest" が解釈されずに残りました\n}); } else { $self->eof(1); return Token->new(q{EOF}, q{EOF}); } } } } sub make_token { my $self = shift(@_); my ($line) = @_; my $tokens = []; while (1) { if ($line =~ s/^\s+//) { # skip space ; } elsif ($line =~ s/^([a-z][a-z0-9]*)//i) { # ident or reserved my $str = $1; my $token; if (exists($reserved->{$str})) { $token = Token->new($str, $str); } else { $token = Token->new($str, q{IDENT}); } push(@$tokens, $token); } elsif ($line =~ s/^("[^"\\]*(?:\\.[^"\\]*)*")//) { # string literal my $str = $1; my $token = Token->new($str, q{STR}); push(@$tokens, $token); } elsif ($line =~ s{^(<>|[>:<]=|[-+*/=<>:;,()\.])}{}) { # open or close my $str = $1; unless (exists($symbol->{$str})) { die(qq{undefined symbol : $str}); } my $def = $symbol->{$str}; my $token = Token->new($str, $def); push(@$tokens, $token); } else { last; } } return ($tokens, $line); } 1; package Parser; use base qw(Class::Accessor::Fast); __PACKAGE__->mk_accessors(qw( stream )); sub new { my $self = shift(@_); my ($stream) = @_; my $type = ref($self) || $self; $self = bless({ stream => $stream, }, $type); return $self; } sub entry { my $self = shift(@_); my $stream = $self->stream(); $self->program(); $stream->rest eq q{}; } sub program { my $self = shift(@_); my $stream = $self->stream(); $self->xmatch(q{MODULE}); $self->xmatch(q{IDENT}); $self->xmatch(q{SEMICOLON}); $self->xmatch(q{BEGIN}); $self->statlist(); $self->xmatch(q{END}); $self->xmatch(q{IDENT}); $self->xmatch(q{PERIOD}); } sub statlist { my $self = shift(@_); my $stream = $self->stream(); $self->statement(); $self->statlist1(); } sub statlist1 { my $self = shift(@_); my $stream = $self->stream(); if ($self->match(q{SEMICOLON})) { $self->xmatch(q{SEMICOLON}); $self->statement(); $self->statlist1(); } elsif ($self->match(q{END}) or $self->match(q{ELSE})) { ; } else { warn(qq{error at statlist1}); } } sub statement { my $self = shift(@_); my $stream = $self->stream(); if ($self->match(q{IDENT})) { $self->xmatch(q{IDENT}); $self->statement1(); } else { warn(qq{parse error - Statement}); } } sub statement1 { my $self = shift(@_); my $stream = $self->stream(); if ($self->match(q{OPEN}) ){ $self->xmatch(q{OPEN}); $self->literal(); $self->xmatch(q{CLOSE}); } else { warn(qq{error - statement1}); } } sub literal { my $self = shift(@_); my $stream = $self->stream(); if ($self->match(q{STR})) { $self->xmatch(q{STR}); } else { warn(q{error - literal}); } } sub match { my $self = shift(@_); my $stream = $self->stream(); my ($str) = @_; my $token = $stream->get_token(); my $ret = 0; if ($token->def eq $str) { $ret = 1; } $stream->unget_token($token); return $ret; } sub xmatch { my $self = shift(@_); my $stream = $self->stream(); my ($str) = @_; my $token = $stream->get_token(); if ($token->def eq $str) { printf(qq{DEF[%s]\tSTR[%s]\n}, $token->def, $token->str); return; } else { $stream->unget_token($token); warn(qq{Parse error - $str/} . $token->def); } } 1; package main; sub main { my $stream = Stream->new(sub {return scalar(<>)}); my $parser = Parser->new($stream); # while (not($stream->eof())) { # my $token = $stream->get_token(); # printf(qq{DEF[%s]\tSTR[%s]\n}, $token->def, $token->str); # } $parser->entry(); return 0; } my $ret = main(); exit($ret);
#!/usr/bin/env perl # -*- mode: perl; coding: utf-8; -*- use strict; use warnings; use utf8; use open qw(:utf8); use open qw(:std); use Parse::RecDescent; use Data::Dumper; package main; sub main { $::RD_AUTOACTION = q{bless([@item[1..@item-1]], $item[0])}; my $text = join(q{}, <>); my $grammer = <<' EOL'; if: 'IF' then: 'THEN' else: 'ELSE' while: 'WHILE' do: 'DO' ident: /[a-zA-Z][a-zA-Z0-9]*/ integer: /[0-9]+/ string: /"[^"\\]*(?:\\.[^"\\]*)*"/ program: 'MODULE' ident ':' declist(?) 'BEGIN' statlist 'END' ident '.' {print Data::Dumper->Dump([\@item]);} declist: 'VAR' dec(s) dec: ident(s /,/) ':' type ';' type: 'STRING' | 'INTEGER' statlist: statement(s /;/) statement: ident '(' expression ')' expression: term term: factor factor: literal | '(' expression ')' literal: ident | integer | string EOL my $parser = Parse::RecDescent->new($grammer); $parser->program($text); return 0; } my $ret = main(); exit($ret);