The script for the previous.

karttu at megabaud.fi karttu at megabaud.fi
Wed Jan 2 23:55:25 CET 2002


listAs.pl follows:


#!/usr/bin/perl
#
# listAs.ps  --   An Ad-Hoc parsing script for OEIS %A -lines.
# fetched from http://www.research.att.com/~njas/sequences/eisBTfry000$a.txt
# where $a ranges from 00 to some number (say 99).
# Written 2. Jan 2002 by karttu
#
#
# Usage:
# cat eis*.txt | perl listAs.pl | sort | uniq -c | sort -nr > authcounts.txt &
#
# The %A lines that interest us have various formats:
#
# %A A034825 njas
#            Clark Kimberling, ck6 at cedar.evansville.edu
#            Clark Kimberling (ck6 at cedar.evansville.edu)
#            encyclopedia at pommard.inria.fr, Jan 25 2000
#            Patrick De Geest (pdg at worldofnumbers.com), Jun 1998.
#            njas, Robert G. Wilson v (rgwv at kspaint.com)
#            Robert G. Wilson v (rgwv at kspaint.com), Aug 10 2001
#            rkg at cpsc.ucalgary.ca (Richard Guy)
#            njas,jhc
#            njas, mb, Robert G. Wilson v (rgwv at kspaint.com)
#            Antti.Karttunen at iki.fi Oct 28 200
#            Antti.Karttunen at iki.fi (karttu at megabaud.fi) Sep 03 2000
#            Antti Karttunen (karttu at megabaud.fi) and Patrick De Geest (pdg at worldofnumbers.com), Nov 1999.
#            jhc [ conway at math.Princeton.EDU ]
#            Jan Kristian Haugland (jankrihau at hotmail.com)
#            Jan.Hagberg at stat.su.se
#            mlb at well.com (Marc Le Brun)
#            Jud McCranie and Carlos Rivera (jud.mccranie at mindspring.com)
#
# and many more patological cases...
#


sub trim_blankos_and_trash
{
    my($convstring) = @_;

    for($convstring)
     {
       s/^.* by //o; # Submitted by, Suggested by, Sent by, etc.
       s/^by //oi; # Bysomebody
       s/^Including //oi; #
       s/^["\s]+//o;
       s/[\]\)\s"\.]+$//o;
     }

    return($convstring);
}


sub scan_input
{
    my($file) = @_;
    my($adr1);

    while($_ = <$file>)
     {
       if(/^\%A [^ ]* /o)
        {
          my $rest = $';
          chop($rest);

# Submitters separated by commas, semicolons and word " and ":
          my @cs = split(/\s*,\s*|\s*\;\s*|\s* and \s*/,$rest);

          foreach $adr1 (@cs)
           {
             my @words;
             my $word;

             if(($adr1 !~ /^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-8]/oi)
                &&
                ($adr1 !~ /^[0-9]*$/o))
              {
                if($adr1 =~ /^([^[(<]*)[[(<]\s*([^]>)]*)/o) # Name followed by e-mail address in parentheses? (Or the other way)
                 {
                   my $first = trim_blankos_and_trash($1);
                   my $second = trim_blankos_and_trash($2);
                   if(($first =~ /@/o) && ($second !~ /@/o)) # Only the first part contains mail address? like: mlb at well.com (Marc Le Brun)
                    {
                      $adr1 = $second;
                    }
                   else # Presumably it's the second part in parenthesis which is an e-mail.
                    {
                      $adr1 = $first;
                    }

#                  print "The first='$first', the second='$second'\n";
                 }
                else { $adr1 = trim_blankos_and_trash($adr1); }

                if($adr1 =~ /^([^\s]*) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-8]/oi) # Reaping my own stupidity.
                 {
                   $adr1 = $1;
                 }

                if($adr1 =~/^([^@]*)@/o) # An email is the best we have...
                 {
                   my $convstring = $first = $1;
                   for($convstring) { s/\./ /o; }
                   if($first ne $convstring) { $adr1 = $convstring; }
                 }

                if($adr1 =~ /^[0-9]+/o) { next; } # Pieces of dates...

                my @words = split(/\s+/,$adr1);
                foreach $word (@words) { print ucfirst(lc($word)); print ' '; }

                print "\n";
              }
#            else { print "Ignoring date: ",$adr1,"\n"; }
           }
             
        }
     }
    print "\n";
}

&scan_input(*STDIN);



-----------------------------------------------------------------------

And the C-shell script for downloading the whole database from OEIS:

#!/bin/csh
if ("a" == "$1a") then
 echo "Usage: $0 outputfilename (takes more than thirty megabytes)"
 exit 1
endif
set a=0
luuppi:
if ($a < 10) set a=0$a
echo "Dumping http://www.research.att.com/~njas/sequences/eisBTfry000$a.txt"
lynx -source http://www.research.att.com/~njas/sequences/eisBTfry000$a.txt > eis$a.txt
if (`fgrep -c 'Document Not Found' eis$a.txt` > 0) then
 echo "/~njas/sequences/eisBTfry000$a.txt not found anymore, finishing."
 rm -f eis$a.txt
 lynx -source http://www.research.att.com/~njas/sequences/recent.txt > recent.txt
 exit 0
endif
set a=`expr $a + 1`
goto luuppi





More information about the SeqFan mailing list