#!/usr/bin/perl -w # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =head1 NAME jdwp-spec - Perl script for parsing JDWP spec HTML =head1 SYNOPSIS # fetch the HTML wget http://java.sun.com/javase/6/docs/platform/jpda/jdwp/jdwp-protocol.html # necessary step to make the HTML more compliant/easier-to-parse tidy -i jdwp-protocol.html >jdwp-protocol.tidy.html # produce the Perl data structure jdwp-spec jdwp-protocol.tidy.html >spec.pl # optionally, tidy up the Perl data structure perltidy -i=2 spec.pl # now append spec.pl.tdy after the __DATA__ in jdwp-dump =head1 DESCRIPTION This script parses the JDWP specification HTML to produce a perl data structure for use by L. The HTML must be preprocessed by tidy (http://tidy.sf.net/) in order to produce supported HTML. =cut use strict; use YAML; use HTML::TreeBuilder; my $file = shift; my $root = HTML::TreeBuilder->new_from_file($file); #print $root->as_HTML,"\n";exit; print '#' x 61, "\n"; print '##', ' ' x 57, "##\n"; print "## THIS STRUCTURE WAS AUTOMATICALLY GENERATED BY jdwp-spec ##\n"; print '##', ' ' x 57, "##\n"; print '#' x 61, "\n"; my $glob; my $spec = {}; foreach my $section ($root->look_down(_tag => 'h4', sub { $_[0]->as_text =~ / Constants/ })) { my $section_name = clean_text($section->as_text); $section_name =~ s/ Constants//; my $type = lc $section_name; print STDERR "Constants for $type\n"; my $table; my @right = $section->right; my $count = 0; foreach my $right ($section->right) { $count++ <= 3 or last; next unless ($right && ref $right); last if ($right->tag eq 'h4'); next unless ($right->tag eq 'table'); $table = $right; last; } unless ($table) { warn "No table in $section_name section?\n"; next; } $spec->{$type} = {}; foreach my $tr ($table->look_down(_tag => 'tr')) { my @td = $tr->look_down(_tag => 'td') or next; my ($name, $code, $desc) = map { clean_text($_) } @td; $code =~ s/0x([A-Fa-f0-9]+)/hex($1)/e; $glob->{$type}->{$name} = $code; $spec->{$type}->{$code} = { name => $name, desc => $desc, }; } } $spec->{cmd_set} = {}; foreach my $section ($root->look_down(_tag => 'h4', sub { $_[0]->as_text =~ / Command Set/ })) { my $section_name = clean_text($section->as_text); $section_name =~ s/\s+Command Set\s+\((\d+)\)//; my $cmd_set_id = $1; my $cmd_set_name = $section_name; print STDERR "Commands for $cmd_set_name $cmd_set_id\n"; my $spec_cmd_set = $spec->{cmd_set}->{$cmd_set_id} = { name => $cmd_set_name, cmd => {}, }; my $spec_cmd_hash = $spec_cmd_set->{cmd}; # shortcut to save typing my $table; my @right = $section->right; while (@right) { my $right = shift @right; next unless ($right && ref $right); last if ($right->tag eq 'h4'); #print "R: ", $right->tag, "\n"; next unless ($right->tag eq 'h5'); my $head_name = clean_text($right->as_text); $head_name =~ s/(?:\s+Command)?\s+\((\d+)\)//; my $cmd_id = $1; my $cmd_name = $head_name; my $our_spec = $spec_cmd_hash->{$cmd_id} = {}; $our_spec->{name} = $cmd_name; while (@right) { my $right = shift @right; next unless ($right && ref $right); if ($right->tag eq 'h5' or $right->tag eq 'h4') { unshift @right, $right; last; } #print "SR: ", $right->tag, "\n"; if ($right->tag eq 'dl') { if ($cmd_set_id == 64 && $cmd_id == 100) { parse_dl_event_composite($right, $our_spec); } else { parse_dl($right, $our_spec); } } } } } use Data::Dumper; $Data::Dumper::Quotekeys = 0; $Data::Dumper::Sortkeys = 1; print Data::Dumper->Dump([$spec],[qw/spec/]); exit; sub parse_dl { my ($dl, $spec) = @_; my $dt = $dl->look_down(_tag => 'dt', sub { $_[0]->as_text =~ /Out Data/i }); parse_dd(out => scalar $dt->right, $spec); $dt = $dl->look_down(_tag => 'dt', sub { $_[0]->as_text =~ /Reply Data/i }); parse_dd(reply => scalar $dt->right, $spec); $dt = $dl->look_down(_tag => 'dt', sub { $_[0]->as_text =~ /Error Data/i }); parse_dd_error(scalar $dt->right, $spec); } sub parse_dl_event_composite { my ($dl, $spec) = @_; my $dt = $dl->look_down(_tag => 'dt', sub { $_[0]->as_text =~ /Event Data/i }); parse_dd(out => scalar $dt->right, $spec); } sub parse_dd { my ($type, $dd, $spec) = @_; my $repeat; my $sub; unless ($dd && $dd->tag eq 'dd') { print STDERR "No $type data?"; return; } my @list = (); $spec->{$type} = \@list; return if ($dd->as_text eq '(None)'); foreach my $tr ($dd->look_down(_tag => 'tr')) { my @td = $tr->look_down(_tag => 'td') or next; if ($td[0]->attr('colspan') eq '1' && $td[1]->attr('colspan') eq '4') { # first level repeat item my ($type,$name,$desc) = map { clean_text($_) } @td[1..3]; push @$repeat, { type => $type, name => $name, desc => $desc, }; next; } if ($td[0]->attr('colspan') eq '2' && $td[1]->attr('colspan') eq '3') { # second level repeat/case item my ($type,$name,$desc) = map { clean_text($_) } @td[1..3]; push @{$sub}, { type => $type, name => $name, desc => $desc, }; next; } undef $sub; if ($td[0]->attr('colspan') eq '1' && $td[1]->attr('colspan') eq '6') { # start of second level repeat my $subrepeat = $td[1]->look_down(_tag => 'i'); if ($subrepeat) { my @sublist = (); push @$repeat, { type => 'repeat', name => clean_text($subrepeat->as_text), items => \@sublist, }; $sub = \@sublist; } else { print STDERR " 'unknown subrepeat?',\n"; } next; } if ($td[0]->attr('colspan') eq '1' && $td[1]->attr('colspan') eq '5') { # start of second level case my $subcase = $td[1]->look_down(_tag => 'i'); if ($subcase) { my $val; if ($td[1]->as_text =~ /is (\w+):/) { $val = $1; } elsif ($td[1]->as_text =~ /is JDWP\.EventKind\.(\w+):/) { $val = $glob->{eventkind}->{$1} or die "No code for $1\n"; } else { die "Failed to match case description\n"; } my @list = (); push @$repeat, { type => 'case', name => clean_text($subcase->as_text), value => $val, items => \@list, }; $sub = \@list; } else { print STDERR " 'unknown subcase?',\n"; } next; } undef $repeat if ($repeat); if ($td[0]->attr('colspan') eq '5') { # top-level basic item my ($type,$name,$desc) = map { clean_text($_) } @td; push @list, { type => $type, name => $name, desc => $desc, }; } elsif ($td[0]->attr('colspan') eq '7') { # start of first level repeat my $repeat_elt = $td[0]->look_down(_tag => 'i'); if ($repeat_elt) { my @repeat_list = (); push @list, { type => 'repeat', name => clean_text($repeat_elt->as_text), items => \@repeat_list, }; $repeat = \@repeat_list; } else { print STDERR " 'unknown repeat?',\n"; } } else { print STDERR " 'other',\n"; } } } sub parse_dd_error { my ($dd, $spec) = @_; unless ($dd && $dd->tag eq 'dd') { print STDERR "No error data?"; return; } my %error = (); $spec->{error} = \%error; unless ($dd->as_text eq '(None)') { foreach my $tr ($dd->look_down(_tag => 'tr')) { my @td = $tr->look_down(_tag => 'td') or next; my ($name, $desc) = map { clean_text($_) } @td; my $code = $glob->{error}->{$name} or die "No code for $name\n"; $error{$code} = $desc; } } } sub clean_text { $_[0] = $_[0]->as_text if (ref $_); $_[0] =~ s/\r?\n/ /g; $_[0] =~ s/[^ -~]//g; $_[0] =~ s/\s+$//; $_[0] =~ s/^\s+//; $_[0]; } =head1 SEE ALSO Net::Pcap(3), tcpdump(8) =head1 BUGS If you find some (and it shouldn't be difficult), then please let me know. =head1 AUTHOR Mark Hindess, Emark.hindess@googlemail.comE =head1 COPYRIGHT Apache License, Version 2.0, see http://www.apache.org/licenses/LICENSE-2.0 =cut