#!/usr/local/bin/perl5

# findmissing
# scan news spool, and report articles that are not in the history file.
# If you want to remove them, you need to pipe this to fastrm or xargs rm,
# findmissing will not do it for you, nor will it add anything or remove
# anything from your history database!

# Written by Harry Bochner, 3/95
# This program may be distributed freely, but please annotate any changes,
# and please leave these comments in place.

# Adjust the next two lines to the local configuration.
$history = '/var/spool/libnews/news/history';
$spool= '/var/spool/news';

# Give -v flag for verbose progress report.

$verbose = 0;
$verbose = shift if $ARGV[0] eq "-v";

chdir($spool) || die "$0: $spool: $!";

# scan the news spool recursively, looking for articles; build the internal
# bit string representation that will be checked by check_history.
#
# NB: This routine assumes that directories never have all-numeric names.
# If you want it to understand alt.2600, you'll have to treat that
# as a special case.

sub scan {
  local($group, $path, $depth) = @_;
  local($fn, @dirs, $min, $max, @nums, $size, $str, $dir);

  $0 = "scanning $group" if $depth <= 2;

  $dir = $path || ".";
  unless (opendir(DIR, $dir)) {
    warn("$0: $dir: $!");
    return;
  }
  print "scanning $path\n" if $verbose;

  $min = $max = 0;
  while ($fn = readdir(DIR)) {
    next if $fn =~ /\./ || $fn eq "lost+found";
    push(@dirs, $fn), next
      unless $fn =~ /^\d+$/;

    $min = $fn if $min == 0 || $min > $fn;
    $max = $fn if $fn > $max;
    push(@nums, $fn);
  }
  closedir(DIR);

  if ($min) {
    $min{$group} = $min;
    $size = ($max-$min+7) / 8;
    $str = "\0" x $size;
    foreach (@nums) {
      vec($str, $_-$min, 1) = 1;
    }
    $arts{$group} = $str;
  }

  $group .= "." if $group;
  $path  .= "/" if $path;
  $depth++;
  foreach $fn (@dirs) {
    &scan("$group$fn", "$path$fn", $depth);
  }
}

# Scan the history file, and clear the bit flags for all articles found there.

sub check_history {
  local($arts, @arts, $group, $num);

  print "scanning history\n"
    if $verbose;

  open(IN, "<$history") || die "$0: $history: $!";
  while (<IN>) {
    @arts = split;
    shift @arts; shift @arts;	# skip first two fields
    foreach (@arts) {
      ($group, $num) = split(m,/,);
      next unless $num;		# sanity check
      $arts++;
      # clear that bit
      vec($arts{$group}, $num-$min{$group}, 1) = 0
	if defined $min{$group} && $num >= $min{$group};
    }
    print "$. lines, $arts articles so far\n" if $verbose && $. % 50000 == 0;
    $0 = "history line $." if $. % 50000 == 0;
  }
  close(IN);

  print "done reading history, $arts articles\n" if $verbose;
}

&scan("", "", 0);

&check_history;

# now check all the bitstrings, and if there are any flags still set,
# report that article.

print "checking for missing articles\n" if $verbose;
foreach $group (sort keys %arts) {
  $min = $min{$group};
  $str = unpack("b*", $arts{$group});
  delete $arts{$group}; delete $min{$group};	# recover memory?
  next unless $str =~ /1/;

  $path = "";
  for (split(/x*/, $str)) {
    if ($_) {
      unless ($path) {
	$path = $group; $path =~ s,\.,/,g;
	$0 = "findmissing $group";
      }
      print "$path/$min\n";
    }
    $min++;
  }
}
