#!/usr/bin/perl -w # ----------------------------------------------------------------------------- my $program_name = 'The Strace Accounter'; my $version = '1.00'; my $create_date = '23-Aug-2001'; my $author = 'Morten Welinder'; my $author_email = 'terra@gnome.org'; # ----------------------------------------------------------------------------- use strict; use Getopt::Long; my ($opt_help, $opt_version, $opt_format); my $opt_srctype = 'strace'; my $fd_badness_threshold = 2; &GetOptions ('fd-badness-threshold=f' => \$fd_badness_threshold, 'format=s' => \$opt_format, 'help' => \$opt_help, 'src-type=s' => \$opt_srctype, 'version' => \$opt_version, ) || &usage (1); &usage (0) if $opt_help; if ($opt_version) { print "Version $version created $create_date.\n"; exit 0; } (@ARGV <= 1) || &usage (1); my ($dump_table,$dump_prolog,$dump_epilog); { my %valid_formats = ('text' => [sub {}, sub {}, \&dump_table_text], 'html' => [\&dump_prolog_html, \&dump_epilog_html, \&dump_table_html], ); $valid_formats{'txt'} = $valid_formats{'text'}; $opt_format = 'text' unless defined $opt_format; my $handlers = $valid_formats{lc $opt_format}; &usage (1) unless defined $handlers; ($dump_prolog,$dump_epilog,$dump_table) = @$handlers; } if ($opt_srctype !~ /^(strace|truss)$/) { die "$0: Invalid mode.\n"; } # ----------------------------------------------------------------------------- my $filename = $ARGV[0]; $filename = '-' unless defined $filename; my $time_threshold = 0.005; my $filename_count_threshold = 2; # ----------------------------------------------------------------------------- my %syscall_to_time = (); my %syscall_to_count = (); my %filename_count = (); my %fd_source = (0 => '(stdin)', 1 => '(stdout)', 2 => '(stderr)'); my %fd_iosize = (); my @fd_badness = (); my @fd_nonclosed = (); my $lineno = 0; local (*FIL); open (*FIL, $filename) || die "$0: Cannot read $filename: $!\n"; while () { $lineno++; if ($opt_srctype eq 'strace' && /^([a-zA-Z0-9]+)\(.* <([0-9.]+)>$/) { my $syscall = $1; my $time = $2; $syscall_to_count{$syscall}++; $syscall_to_time{$syscall} += $time; } elsif ($opt_srctype eq 'truss' && /^\s*([0-9.]+)\s+([_a-zA-Z0-9]+)\(/) { my $time = $1; my $syscall = $2; s/^\s*([0-9.]+)\s+//; s/$/ <$time>/; $syscall_to_count{$syscall}++; $syscall_to_time{$syscall} += $time; } # This ignores $PWD. Probably ok. if (/^(open|open64|stat|stat64|lstat|lstat64|access|creat|creat64|unlink|chdir|chroot|mkdir|rmdir|readlink|chmod|chown|mknod|utime|execve)\("([^\"]*)"/) { my $filename = $2; $filename_count{$filename}++; } elsif (/^(symlink)\(".*",\s*"([^\"]*)"/) { my $filename = $2; $filename_count{$filename}++; } elsif (/^(openat)\([A-Z_]+,\s*"([^\"]*)"/) { my $filename = $2; $filename_count{$filename}++; } elsif (/^(link|rename)\("([^\"]*)"\s*,\s*"([^\"]*)"/) { my $filename1 = $2; my $filename2 = $3; $filename_count{$filename1}++; $filename_count{$filename2}++; } elsif (/^(bind|connect)\s*\(.*family=(AF_FILE|AF_UNIX).*path="([^\"]*)"/) { my $filename = $3; $filename_count{$filename}++; } if (/^(open|open64|creat|creat64)\s*\("([^\"]*)".*\)\s*=\s*(\d+)/) { my $filename = $2; my $fd = $3; &new_fd ($fd, $filename); } elsif (/^(openat)\([0-9A-Z_]+,\s*"([^\"]*)".*\)\s*=\s*(\d+)/) { my $filename = $2; my $fd = $3; &new_fd ($fd, $filename); } elsif (/^close\s*\((\d+)\)/) { my $fd = $1; &close_fd ($fd); } elsif (/^(socket|so_socket|accept)\s*\(.*\).*=\s*(\d+)/) { my $syscall = $1; my $fd = $2; &new_fd ($fd, "($syscall)"); } elsif (/^inotify_init1\s*\(.*\).*=\s*(\d+)/) { my $fd = $1; &new_fd ($fd, "(inotify)"); } elsif (/^pipe\s*\(\[(\d+),\s*(\d+)\]\s*\).*=\s*\d+/ || /^pipe2\s*\(\[(\d+),\s*(\d+)\],.*\).*=\s*\d+/) { my $fd1 = $1; my $fd2 = $2; &new_fd ($fd1, "(pipe:0)"); &new_fd ($fd2, "(pipe:1)"); } elsif (/^(pipe|socketpair)\s*\(.*\[(\d+),\s*(\d+)\]\s*\).*=\s*\d+/) { my $fd1 = $2; my $fd2 = $3; &new_fd ($fd1, "(pipe:0)"); &new_fd ($fd2, "(pipe:1)"); } elsif (/^pipe\s*\(\)\s*=\s*(\d+)\s+\[(\d+)\]/) { my $fd1 = $1; my $fd2 = $2; &new_fd ($fd1, "(pipe:0)"); &new_fd ($fd2, "(pipe:1)"); } elsif (/^(eventfd2)\s*\(.*\)\s*=\s(\d+)/) { my $syscall = $1; my $fd = $2; &new_fd ($fd, "($syscall)"); } elsif (/^(dup)\s*\((\d+)\).*=\s(\d+)/) { my $syscall = $1; my $old_fd = $2; my $new_fd = $3; if (!exists $fd_source{$old_fd}) { print STDERR "$0: File handle $old_fd appeared out of nowhere in $syscall.\n"; $fd_source{$old_fd} = '?'; } &new_fd ($new_fd, $fd_source{$old_fd}); } elsif (/^(dup2)\s*\((\d+),\s*(\d+)\).*=\s(\d+)/ || /^(fcntl)\s*\((\d+),\s*F_DUPFD,\s*(\d+)\)\s*=\s*\d+/) { my $syscall = $1; my $fd1 = $2; my $fd2 = $3; if (!exists $fd_source{$fd1}) { print STDERR "$0: File handle $fd1 appeared out of nowhere in $syscall.\n"; $fd_source{$fd1} = '?'; } if ($fd1 != $fd2) { &close_fd ($fd2); &new_fd ($fd2, $fd_source{$fd1}); } } elsif (/^(bind|connect)\s*\((\d+),.*family=(AF_UNIX|AF_FILE).*path=@?"([^\"]*)".*\)\s*=\s*\d+/) { my $fd = $2; my $filename = $4; $fd_source{$fd} = "unix:$filename"; # Rename source } elsif (/^(bind|connect)\s*\((\d+),.*family=AF_INET.*port=\D+(\d+).*inet_addr\("([^\"]*)"\).*\)\s*=\s*\d+/) { my $fd = $2; my $inet_port = $3; my $inet_addr = $4; $fd_source{$fd} = "inet:$inet_addr:$inet_port"; } elsif (/^(read|readv|write|recv|writev|send|getdents|getdents64)\s*\((\d+),.*\).*=\s*(\d+)/) { my $syscall = $1; my $fd = $2; my $count = $3; if (!exists $fd_source{$fd}) { print STDERR "$0: File handle $fd appeared out of nowhere in $syscall on line $lineno.\n"; $fd_source{$fd} = '?'; } $fd_iosize{$fd}{$count}++; } elsif (/^(fstat|fcntl|inotify_add_watch)\s*\((\d+)(,.*)?\).*=\s*0/) { my $syscall = $1; my $fd = $2; if (!exists $fd_source{$fd}) { print STDERR "$0: File handle $fd appeared out of nowhere in $syscall on line $lineno.\n"; $fd_source{$fd} = '?'; } } elsif (/^(recvmsg)\s*\((\d+),.*\{cmsg_len=\d+, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, \{(\d+)\}\}.*\)\s*=\s*\d+/) { my $syscall = $1; my $src_fd = $2; my $fd = $3; if (!exists $fd_source{$src_fd}) { print STDERR "$0: File handle $src_fd appeared out of nowhere in $syscall.\n"; $fd_source{$src_fd} = '?'; } &new_fd ($fd, "recvmsg from $src_fd (" . $fd_source{$src_fd} . ")"); } } close (*FIL); # Pretend that open files are now being closed. foreach my $fd (keys %fd_source, keys %fd_iosize) { if (exists $fd_source{$fd}) { push @fd_nonclosed, [$fd, $fd_source{$fd}]; } &close_fd ($fd); } # ----------------------------------------------------------------------------- sub close_fd { my ($fd) = @_; if (exists $fd_source{$fd} && exists $fd_iosize{$fd}) { my $p = $fd_iosize{$fd}; my $badness = 0; my $sum = 0; my $count = 0; foreach my $size (keys %$p) { $sum += $p->{$size} * $size; $count += $p->{$size}; $badness += $p->{$size} / (log ($size + 2) / log (2)); } push @fd_badness, [$fd, $fd_source{$fd}, $badness, $sum, $count]; } delete $fd_source{$fd}; delete $fd_iosize{$fd}; } sub new_fd { my ($fd,$source) = @_; if (exists $fd_source{$fd}) { print STDERR "$0: File handle $fd reappeared from $source on line $lineno without being closed.\n"; &close_fd ($fd); } $fd_source{$fd} = $source; } # ----------------------------------------------------------------------------- &$dump_prolog (); { my @table = (); my $rest_count = 0; my $rest_time = 0; foreach my $syscall (sort { ($syscall_to_time{$b} <=> $syscall_to_time{$a}) || $a cmp $b } (keys %syscall_to_time)) { my $count = $syscall_to_count{$syscall}; my $time = $syscall_to_time{$syscall}; if ($time < $time_threshold) { $rest_count += $count; $rest_time += $time; } else { push @table, [$syscall, $count, sprintf ("%.2f", $time)]; } } if ($rest_count > 0) { push @table, ['(other)', $rest_count, sprintf ("%.2f", $rest_time)]; } &$dump_table ({'title' => "Cumulative Syscall Times", 'fields' => ["Syscall", "Count", "Time(s)"], 'alignment' => ['l', 'r', 'r'], 'data' => \@table, }); } { my @table = (); foreach my $filename (sort { ($filename_count{$b} <=> $filename_count{$a}) || $a cmp $b } (keys %filename_count)) { my $count = $filename_count{$filename}; if ($count >= $filename_count_threshold) { push @table, [$count, $filename]; } } my $header = ("This is a list of files that are accessed, one way or another, at least\n" . &ntimes ($filename_count_threshold) . ". Note, that current directory is not being tracked.\n"); &$dump_table ({'title' => "Repetitive File Name Usage", 'header' => $header, 'fields' => ["Count", "Filename"], 'alignment' => ['r', 'l'], 'data' => \@table, }); } { my @table = (); foreach my $rec (sort { (($b->[2] <=> $a->[2]) || ($a->[1] cmp $b->[1]) || ($a->[0] <=> $b->[0])) } @fd_badness) { my ($fd,$source,$badness, $sum, $count) = @$rec; next if $badness < $fd_badness_threshold; push @table, [sprintf ("%.2f", $badness), $sum, $count, $source]; } my $header = ("This is a list of files that are accessed in small chunks. \"Badness\" is a\n" . "heuristic measure of this. The list is truncated at badness " . sprintf ("%.2f", $fd_badness_threshold) . ". A file\n" . "can appear more than once if it is opened more than once.\n"); &$dump_table ({'title' => "Small-Chunk File Input/Output", 'header' => $header, 'fields' => ["Badness", "Bytes", "I/Os", "File"], 'alignment' => ['r', 'r', 'r', 'l'], 'data' => \@table, }); } { my @table = (); foreach my $rec (sort { ($a->[0] <=> $b->[0]) } @fd_nonclosed) { my ($fd,$source) = @$rec; next if $source =~ /^\(std(in|out|err)\)$/; push @table, [$fd, $source]; } my $header = ("This is a list of files that appear not to have been closed.\n"); &$dump_table ({'title' => "Files Not Closed", 'header' => $header, 'fields' => ["Desc", "File"], 'alignment' => ['r', 'l'], 'data' => \@table, }); } &$dump_epilog (); # ----------------------------------------------------------------------------- sub ntimes { my ($n) = @_; return 'once' if $n == 1; return 'twice' if $n == 2; # No thrice return "$n times"; } # ----------------------------------------------------------------------------- sub usage { my ($result) = @_; print STDERR "$program_name version $version.\n Usage: $0 [option, ...] [trace-log]\n --fd-badness-threshold badness Smallest file badness to show. --format format Select output format, see below. --help Print this help. --src-type tracer Program used for tracing, see below. --version Print program version. Valid formats are text Human readable text. html Hypertext. Valid source types are strace Input is from `strace -T' truss Input is from `truss -D' Created $create_date by $author ($author_email).\n"; exit $result; } # ----------------------------------------------------------------------------- # TEXT handlers sub dump_table_text { my ($args) = @_; my $title = $args->{'title'}; my $header = $args->{'header'}; my $fields = $args->{'fields'}; my $align = $args->{'alignment'}; my $data = $args->{'data'}; my $field_count = @$fields; my @widths = map { length } @$fields; foreach my $line (@$data) { for (my $i = 0; $i < $field_count; $i++) { my $this_length = length ($line->[$i]); if ($this_length > $widths[$i]) { $widths[$i] = $this_length; } } } print "$title.\n", '-' x length ($title), "\n\n"; if (@$data == 0) { print "None.\n"; return; } print "$header\n" if $header; my $txt = ''; for (my $i = 0; $i < $field_count; $i++) { $txt .= ' ' if $i; my $this_align = $align->[$i]; my $this_width = $widths[$i]; my $this_field = $fields->[$i]; if ($this_align eq 'r') { $txt .= sprintf ("%${this_width}s", $this_field); } elsif ($this_align eq 'l') { $txt .= sprintf ("%-${this_width}s", $this_field); } else { die "???"; } } print "$txt\n"; print '-' x (length $txt), "\n"; foreach my $line (@$data) { my $txt = ''; for (my $i = 0; $i < $field_count; $i++) { $txt .= ' ' if $i > 0; my $this_align = $align->[$i]; my $this_width = $widths[$i]; my $this_txt = $line->[$i]; # print STDERR "[$this_align,$this_width,$this_txt]\n"; if ($this_align eq 'r') { $txt .= sprintf ("%${this_width}s", $this_txt); } elsif ($this_align eq 'l') { $txt .= sprintf ("%-${this_width}s", $this_txt); } else { die "???"; } } $txt =~ s/\s+$//; print "$txt\n"; } print "\n\n"; } # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # HTML handlers sub quote_html { my ($txt) = @_; return $txt; # FIXME. } sub dump_prolog_html { print "\n"; print "\n"; print "\n"; print "\n"; print "Strace Accounting\n"; print "\n"; print "\n\n"; } sub dump_epilog_html { print "
\n"; print "Created ", scalar localtime, ".\n"; print "\n\n"; } sub dump_table_html { my ($args) = @_; my $title = $args->{'title'}; my $header = $args->{'header'}; my $fields = $args->{'fields'}; my $align = $args->{'alignment'}; my $data = $args->{'data'}; my $field_count = @$fields; print "

", "e_html ($title), "

\n"; if (@$data == 0) { print "None.\n"; return; } print "e_html ($header), "

\n" if $header; print "\n"; # print "\n"; print " \n"; for (my $i = 0; $i < $field_count; $i++) { my $this_align = $align->[$i]; my $this_field = $fields->[$i]; if ($this_align eq 'r') { print " \n"; } elsif ($this_align eq 'l') { print " \n"; } else { die "???"; } } print " \n"; foreach my $line (@$data) { print " \n"; for (my $i = 0; $i < $field_count; $i++) { my $this_align = $align->[$i]; my $this_txt = $line->[$i]; if ($this_align eq 'r') { print " \n"; } elsif ($this_align eq 'l') { print " \n"; } else { die "???"; } } print " \n"; } print "
", "e_html ($this_field), "", "e_html ($this_field), "
", "e_html ($this_txt), "", "e_html ($this_txt), "
\n

\n\n"; } # -----------------------------------------------------------------------------