#!/bin/perl # Grammar into HTML # Read in sparql.txt and the tokens.txt file # BUGS: The token "||" gets corrupted as it looks like a rule | # '&&' needs to be && (maybe fixed - check) # Tokens generally. # Rewrite to XML BNF ## #xN // [a-zA-Z], [#xN-#xN] // [abc], [#xN#xN#xN] // ## [^a-z], [^#xN-#xN] // [^abc], [^#xN#xN#xN] # Remove unnecessary () # Outer (), (one term)* or ? $/ = undef ; # Just table or full page. $TABLE = 1 ; $grammarFile = 'sparql.txt' ; $tokensFile = 'tokens.txt' ; ## $grammarFile = 's.txt' ; ## $tokensFile = 't.txt' ; $grammar = &readFile($grammarFile) ; $tokens = &readFile($tokensFile) ; $grammar =~ s!DOCUMENT START!! ; $grammar =~ s!NON-TERMINALS!! ; $grammar =~ s!DOCUMENT END!! ; $grammar =~ s!//.*!!g ; $grammar =~ s!\r!!g ; # remove leading whitespace $grammar =~ s!^[\n\s]*!\n! ; # Merge alts $grammar =~ s!\n\s*\|!\ |!g ; $tokens =~ s!//.*!!g ; $tokens =~ s!\r!!g ; ## Grammar #print "GRAMMAR\n" ; @g = split(/\n\s*/, $grammar) ; @rules = () ; %ruleMap = () ; %tokenMap = () ; %inline = () ; # Grammar rules # Direct from "jjdoc -TEXT=true" for $g (@g) { ($rulename, $rulebody) = split(/:=/,$g) ; $rulename =~ s!^\s*!! ; $rulename =~ s!\s*$!! ; $rulebody =~ s!^\s*!! ; $rulebody =~ s!\s*$!! ; # Remove outer brackets # $rulebody =~ s!^\((.*)\)$!$1! ; # Remove <> round tokens #$rulebody =~ s/\<(\w+)\>/$1/g ; # Leave in - so tokens distinguished from rules next if $rulename eq '' ; #next if $rulebody eq '' ; # Skip the root rule. next if ( $rulename eq 'CompilationUnit' ) ; $rulebody = 'Perl 5 regular expression' if ( $rulename eq 'PatternLiteral' ) ; push @rules, $rulename ; warn "Duplicate rule (grammar): $rulename\n" if defined($ruleMap{$rulename}) ; $ruleMap{$rulename} = $rulebody ; ## print "----------\n" ; ## print $rulename,"\n" ; ## print $rulebody,"\n" ; } # Tokens # Produced by "jj2tokens" # Hand edited to indicate the inlines $tokens =~ s/\n+/\n/g ; $tokens =~ s/^\n// ; @t = split(/\n(?=\<|\[)/, $tokens) ; for $t (@t) { ($tokenname,$tokenbody) = split(/::=/, $t) ; $tokenname =~ s!^\s*!! ; $tokenname =~ s!\s*$!! ; #$tokenname =~ s/^\/g ; #$tokenname =~ s/\>$//g ; $tokenname =~ s/#// ; $tokenbody =~ s!^\s*!! ; $tokenbody =~ s!\s*$!! ; # Remove <> round tokens #$tokenbody =~ s/\<(\w+)\>/$1/g ; # Leave in - so tokens distinguished from rules # Remove outer () # $tokenbody =~ s!^\((.*)\)$!$1! ; # Inline? if ( $tokenname =~ /^\[\<\w*\>\]/ ) { warn "Duplicate inline (token): $tokenname\n" if defined($inline{$tokenname}) ; $tokenname =~ s/^\[//g ; $tokenname =~ s/\]$//g ; $tokenbody =~ s/"/'/g ; # '" $inline{$tokenname} = esc($tokenbody) ; #print "INLINE: ",$tokenname," => ",$tokenbody,"\n" ; } else { push @rules, $tokenname ; warn "Duplicate rule (token): $tokenname\n" if defined($tokenMap{$tokenname}) ; $ruleMap{$tokenname} = $tokenbody ; } } # Table if ( ! $TABLE ) { print "\n"; print "
\n"; print "\n" ; print "\n"; print "\n"; print "\n" ; } print "",code('gRuleLabel', $rlabel)," | \n" ; #print "",span('gRuleHead', $rn)," | \n" ; print "",code('gRuleHead',$rn)," | \n" ; print "::= | \n" ; #print "",span('gRuleBody',$rb)," | \n" ; print "",code('gRuleBody',$rb)," | \n" ; print "
' . $t . '
' ;
}