#!/usr/local/bin/perl # This software is Copyright (c) 1995 Jeff Weisberg # Permission is granted to use, copy and distribute this software # under the following conditions: # - This license covers the original software, as well as # modified or derived works. # - All modified or derived works must contain this notice # unmodified and in its entirety. # - This software is not to be used for any purpose which # may be considered illegal, immoral, or unethical. # - This software is provided as is and without warranty. # this program generates stats on usenet article count and volume # generates html (oooh, aaah!) # log files to analyze are generated from inn, using a # newsfeed line similar to: # STATS!:*:Tf,WtbNs:/var/adm/news-stats # here at OpNet, we rotate the stats log nightly (in news.daily) # and keep the past 10 days files online (gzipped), and # analyze them (roughly) weekly $k = 100000000; # bignum base $a = 1000; # scale factor (~1k) $t = 0; # tsize overflow $tsize = 0; $nshow = 25; # how many to show for Top N $outfile = "/tmp/usenet-stats.html"; # send html output where? while( <> ){ print "C:$tcount S:$t:$tsize\n" unless( ($tcount % 10000)); ($time, $size, $groups, $from ) = split( /\s/ ); @groups = split(/,/, $groups); $start = $time unless $start; $end = $time unless $end; $start = $time if $time < $start; $end = $time if $time > $end; $tsize += $size; $tcount ++; # tally where from $ourfeeds{ $from } ++; # it only takes a few days for news volume to overflow an int # so we use a pair of ints as "bignums" # sigh, ... if( $tsize > $k){ $t ++; $tsize -= $k; } $es = $size / @groups; foreach $g ( @groups ){ $size_by_group{ $g } += $es; $count_by_group{ $g } ++; if( $size_by_group{ $g } > $k){ $size_by_group{ $g } -= $k; $size_by_group_ov{ $g } ++; } # people put some really odd things in newsgroup headers... $h = $g; $h =~ s/\.+/\./g; $h =~ s/^\.//; $h =~ s/\.$//; while( $h ){ $size_by_hier{ $h } +=$es; $count_by_hier{ $h } ++; if( $size_by_hier{ $h } > $k){ $size_by_hier{ $h } -= $k; $size_by_hier_ov{ $h } ++; } $h =~ s/\.?[^.]+\.?$//; } } # tally by when ($sec, $min, $hrs, $dom, $mon, $yr, $dow, $doy) = localtime($time); $day = sprintf("%.4d/%.2d/%.2d", $yr+1900, $mon+1, $dom); print STDERR "$day, $hrs\n" unless( ($tcount % 10000)); $count_by_day{ $day } ++; $size_by_day{ $day } += $size; if( $size_by_day{ $day } > $k){ $size_by_day{ $day }-= $k; $size_by_day_ov{ $day } ++; } $count_by_hrs[ $hrs ] ++; $size_by_hrs[ $hrs ] += $size; if( $size_by_hrs[ $hrs ] > $k){ $size_by_hrs[ $hrs ]-= $k; $size_by_hrs_ov[ $hrs ] ++; } } # we have to canonicalize our bignums into a usable form print "Adjusting...\n"; $tsize /= $a; $tsize += $t * ($k / $a); print "C:$tcount S:$tsize\n"; foreach $g ( keys %size_by_group){ $ss = $size_by_group{$g} / $a; $ss += $size_by_group_ov{$g} * ($k / $a); $size_by_group{$g} = $ss; } foreach $g ( keys %size_by_hier){ $ss = $size_by_hier{$g} / $a; $ss += $size_by_hier_ov{$g} * ($k / $a); $size_by_hier{$g} = $ss; } foreach $day ( keys %size_by_day){ $ss = $size_by_day{ $day } / $a; $ss += $size_by_day_ov{ $day } * ($k / $a); $size_by_day{ $day } = $ss; } foreach $hrs ( 0..23 ){ $ss = $size_by_hrs[ $hrs ] / $a; $ss += $size_by_hrs_ov[ $hrs ] * ($k / $a); $size_by_hrs[ $hrs ] = $ss; } $dt = $end - $start; $adj = (3600 * 24)/$dt; $ndays = sprintf("%.2f", 1/$adj); print "Summarizing...\n"; # show where from stats $~ = 'FEEDS'; foreach $w ( sort({$ourfeeds{$b} <=> $ourfeeds{$a};} keys %ourfeeds) ){ $n = $ourfeeds{$w}; $p = $n * 100 / $tcount; write; } # spew forth html # note: /img/dot_peri.gif is a 1pixel*1pixel gif # 1pixel gif idea inspired by Mr. "Tekton" www.best.com/~dsiegel open( HTML, "> $outfile" ); $avdc = sprintf("%.2f", $tcount * $adj); $avds = sprintf("%.2f", $tsize * $adj / 1024); print HTML <Usenet Stats

Usenet Stats

The following summarizes the average daily Usenet traffic passing through OpNet during the past $ndays days. average daily article count: $avdc
average daily article volume: ${avds}M

EOH ; $i = 0; print HTML "Usenet Average Daily Traffic Analysis: volume by group\n"; print HTML "\n"; print HTML "\n"; foreach $g ( sort({$size_by_group{$b} <=> $size_by_group{$a};} keys %size_by_group)){ last if $i++==$nshow; $ss = $size_by_group{$g}; $s = sprintf( "%.2f", $ss / 1024 * $adj ); $p = sprintf( "%.2f", $ss * 100.0 / $tsize); $graph = ""; print HTML "\n"; } print HTML "
rankMegabytespercentnewsgroup
$i$s$p$g$graph

\n"; $i = 0; print HTML "Usenet Average Daily Traffic Analysis: article count by group\n"; print HTML "\n"; print HTML "\n"; foreach $g ( sort({$count_by_group{$b} <=> $count_by_group{$a};} keys %count_by_group)){ last if $i++==$nshow; $s = sprintf( "%.2f", $count_by_group{$g} * $adj); $p = sprintf( "%.2f", $count_by_group{$g} * 100 / $tcount); $graph = ""; print HTML "\n"; } print HTML "
rankarticlespercentnewsgroup
$i$s$p$g$graph

\n"; $i = 0; print HTML "Usenet Average Daily Traffic Analysis: volume by hierarchy\n"; print HTML "\n"; print HTML "\n"; foreach $g ( sort({$size_by_hier{$b} <=> $size_by_hier{$a};} keys %size_by_hier)){ last if $i++==$nshow; $ss = $size_by_hier{$g}; $s = sprintf( "%.2f", $ss / 1024 * $adj); $p = sprintf( "%.2f", $ss * 100 / $tsize); $graph = ""; print HTML "\n"; } print HTML "
rankMegabytespercenthierarchy
$i$s$p$g$graph

\n"; $i = 0; print HTML "Usenet Average Daily Traffic Analysis: article count by hierarchy\n"; print HTML "\n"; print HTML "\n"; foreach $g ( sort({$count_by_hier{$b} <=> $count_by_hier{$a};} keys %count_by_hier)){ last if $i++==$nshow; $s = sprintf( "%.2f", $count_by_hier{$g} * $adj); $p = sprintf( "%.2f", $count_by_hier{$g} * 100 / $tcount); $graph = ""; print HTML "\n"; } print HTML "
rankarticlespercenthierarchy
$i$s$p$g$graph

\n"; print HTML "Usenet Traffic Analysis: by day\n"; print HTML "\n"; print HTML "\n"; foreach $day ( sort( keys %count_by_day )){ $sa = $size_by_day{ $day }; $sm = sprintf( "%.2f", $sa / 1024); $pm = sprintf( "%.2f", $sa / 1024 * 100 / $avds); $sb = $count_by_day{ $day }; $sc = sprintf( "%.2f", $sb); $pc = sprintf( "%.2f", $sb * 100 / $avdc); $graphm = ""; $graphc = ""; print HTML "\n"; } print HTML "
dayMegabytesarticlessizecount
$day$sm$sc$graphm$graphc

\n"; print HTML "Usenet Average Daily Traffic Analysis: by hour\n"; print HTML "\n"; print HTML "\n"; foreach $hr ( 0..23 ){ $sa = $size_by_hrs[ $hr ]; $sm = sprintf( "%.2f", $sa / 1024 * $adj); $pm = sprintf( "%.2f", $sa * 100 / $tsize); $sb = $count_by_hrs[ $hr ]; $sc = sprintf( "%.2f", $sb * $adj); $pc = sprintf( "%.2f", $sb * 100 / $tcount); $graphm = ""; $graphc = ""; print HTML "\n"; } print HTML "
hourMegabytesarticlessizecount
$hr$sm$sc$graphm$graphc

\n"; print HTML < EOF ; format FEEDS = @<<<<<<<<<<<<<<<<<<< @###.## @>>>>> $w, $p, $n . ;