#!/usr/bin/perl -w
use strict;
my @alphabet=unpack("C66",
      "ţ");
my %mainenc=(
 "sort" =>   
   "\241\242\243\244\245\246\247".
   "\250\251\252\253\254\255\256\257".
   "\260\261\262\263\264\265\266\267".
   "\270\271\272\273\274\275\276\277\300\301".
   "\200\201\202\203\204\205\206\207".
   "\210\211\212\213\214\215\216\217".
   "\220\221\222\223\224\225\226\227".
   "\230\231\232\233\234\235\236\237\240",
 "cp866" =>   
   "\240\241\242\243\244\245\361\246\247".
   "\250\251\252\253\254\255\256\257".
   "\340\341\342\343\344\345\346\347".
   "\350\351\352\353\354\355\356\357".
   "\200\201\202\203\204\205\360\206\207".
   "\210\211\212\213\214\215\216\217".
   "\220\221\222\223\224\225\226\227".
   "\230\231\232\233\234\235\236\237",
 "alt-fido" =>   
   "\240\241\242\243\244\245\361\246\247".
   "\250\251\252\253\254\255\256\257".
   "p\341\342\343\344\345\346\347".
   "\350\351\352\353\354\355\356\357".
   "\200\201\202\203\204\205\360\206\207".
   "\210\211\212\213\214H\216\217".
   "\220\221\222\223\224\225\226\227".
   "\230\231\232\233\234\235\236\237",
 "koi8-r" =>  
   "\301\302\327\307\304\305\243\326\332".
   "\311\312\313\314\315\316\317\320".
   "\322\323\324\325\306\310\303\336".
   "\333\335\337\331\330\334\300\321".
   "\341\342\367\347\344\345\263\366\372".
   "\351\352\353\354\355\356\357\360".
   "\362\363\364\365\346\350\343\376".
   "\373\375\377\371\370\374\340\361",
 "koi7" => "ABWGDE#VZIJKLMNOPRSTUFHC^[]_YX\\\@Q".
   "abwgde\$vzijklmnoprstufhc~{}\"yx|`q",
 "iso8859-5" => 
   "\320\321\322\323\324\325\361\326\327".
   "\330\331\332\333\334\335\336\337".
   "\340\341\342\343\344\345\346\347".
   "\350\351\352\353\354\355\356\357".
   "\260\261\262\263\264\265\241\266\267".
   "\270\271\272\273\274\275\276\277".
   "\300\301\302\303\304\305\306\307".
   "\310\311\312\313\314\315\316\317",
 "osn" => 
   "\320\321\322\323\324\325\361\326\327".
   "\330\331\332\333\334\335\336\337".
   "\340\341\342\343\344\345\346\347".
   "\350\351\352\353\354\355\356\357".
   "\260\261\262\263\264\265\360\266\267".
   "\270\271\272\273\274\275\276\277".
   "\300\301\302\303\304\305\306\307".
   "\310\311\312\313\314\315\316\317",
 "cp1251" => 
   "\340\341\342\343\344\345\270\346\347".
   "\350\351\352\353\354\355\356\357".
   "\360\361\362\363\364\365\366\367".
   "\370\371\372\373\374\375\376\377".
   "\300\301\302\303\304\305\250\306\307".
   "\310\311\312\313\314\315\316\317".
   "\320\321\322\323\324\325\326\327".
   "\330\331\332\333\334\335\336\337",
 "moshkov" => 
  "\341\342\367\347\344\345\243\366\372\351".
  "\352\353\354\355\356\357\360\362\363\364".
  "\365\346\350\343\376\373\375\256\371\370".
  "\374\340\361".
  "\301\302\327\307\304\305\263\326\332\311".
  "\312\313\314\315\316\317\320\322\323\324".
  "\325\306\310\303\336\333\335\254\331\330".
  "\334\300\321",
 "mac" => 
   "\340\341\342\343\344\345\336\346\347".
   "\350\351\352\353\354\355\356\357".
   "\360\361\362\363\364\365\366\367".
   "\370\371\372\373\374\375\376\337".
   "\200\201\202\203\204\205\335\206\207".
   "\210\211\212\213\214\215\216\217".
   "\220\221\222\223\224\225\226\227".
   "\230\231\232\233\234\235\236\237",
 "ascii"=>
   "abwgde^vzijklmnoprstufhc=[]#yx\\`q".
   "ABWGDE&VZIJKLMNOPRSTUFHC+{}\$YX|~Q",
 "cp500"=> 
   "\254\151\355\356\353\357\111\354\277".
   "\200\375\376\373\374\255\256\131".
   "\104\105\102\106\103\107\234\110".
   "\124\121\122\123\130\125\126\127".
   "\220\217\352\372\276\240\252\266\263".
   "\235\332\233\213\267\270\271\253".
   "\144\145\142\146\143\147\236\150".
   "\164\161\162\163\170\165\166\167",
 "dkoi"=> 
   "\167\170\257\215\212\213\131\256\262".
   "\217\220\232\233\234\235\236\237".
   "\252\253\254\255\214\216\200\266".
   "\263\265\267\261\260\264\166\240".
   "\271\272\355\277\274\275\102\354\372".
   "\313\314\315\316\317\332\333\334".
   "\336\337\352\353\276\312\273\376".
   "\373\375\165\357\356\374\270\335",
 "ebcdic"=> 
   "\237\240\252\253\254\255\335\256\257".
   "\260\261\262\263\264\265\266\267".
   "\270\271\272\273\274\275\276\277".
   "\312\313\314\315\316\317\332\333".
   "\130\131\142\143\144\145\102\146\147".
   "\150\151\160\161\162\163\164\165".
   "\166\167\170\200\212\213\214\215".
   "\216\217\220\232\233\234\235\236"
 );

my @info=(
 "cp866" , "    \$=\$   cp866 ".
           " MSDOS/IBM PCDOS. ".
	  "  ,       ".
	  "  DOS.   , ".
	  "     ,   , ".
	  "      128\$=\$0x80.",
 "cp1251" ,  "MS Windows   1251. ".
	  "      MS Windows.",
 "koi8-r", "KOI-8 RFC 1489 \$=\$  KOI-8  19768-74".
	  "   ``''  ".
	  "``''.   - ".
	  "    .",
 "iso8859-5" , " .       ".
	  "``''   ".
          "Sun  Hewlett Packard.",
 "mac" , ",    Macintosh.".
	  "       0xFF  ``''",
 "osn" , " `` '' (   iso8859-5   )",
"moshkov", "  Besta ".
	  "(\\url{http://www.osp.ru/museum/story/24_00.htm}), ".
	  "  2002    ".
          "   . ".
	  "\\url{http://www.lib.ru}. ".
	  ".       www.lib.ru ".
	  " KOI8-R",
 "sort", ",         ",
 "alt-fido", "           ".
	  "  H (  )  p (  ).     , ".
	  "      ".
	  "    (GoldEd,   FIDOnet, ".
	  "       Norton Commander,".
	  "      FoxPro, DBx  ..).  ".
 	  "     FIDO,    ".
	  " ``'' ,     ".
	  "        koi8-r-fido,".
	  " cp1251-fido   \\ldots ,     ".
	  "  ,        ".
	  " - ?",
 "koi7", " koi8-r  ̣  .   ".
	  "       ң   ӣ".
	  " ݣ   (   Σ    ף".
	  "  ,      7F    ".
	  "  .".
"    \\centreline{\\texttt{ţ \$=\$ ".
"ABWGDE\\#VZIJKLMNOPRSTUFHC\\^{}[]\\_YX\$\\backslash\$\@Q}}".
"  \\centreline{\\texttt{ \$=\$ ".
   "abwgde\\\$vzijklmnoprstufhc\\~{}\\{\\}\"yx|`q}}",
 "dkoi" , "DKOI-8 ( EBCDIC)  19768-87, .",
 "cp500" , "  CECP 500, .",
 "ebcdic" , "EBCDIC  19768-74, .", 
 "ascii", "      . ".
	  "   russian.el  (X)Emacs.".
"    \\centreline{\\texttt{ţ \$=\$ ".
" abwgde\\^{}vzijklmnoprstufhc=[]\\\#yx\$\\backslash\$\`q}}".
"  \\centreline{\\texttt{ \$=\$ ".
" ABWGDE\\&VZIJKLMNOPRSTUFHC+\\{\\}\\\$YX|\\~{}Q}}"
, 
);

sub printenc {
 my $ence=shift;
  my @enc=unpack("C66",$ence);
  my $i;
  foreach $i(@enc){
    printf "%c",$i;
  }
  printf "\n";
}


sub add_abscent_symbols{
  my($encarr)=(@_);
  my %present=();
  my $i;
  foreach $i(@{$encarr}){$present{$i}=undef;}
  foreach $i(0..255) {
    if(exists($present{$i})){next;}
    push @{$encarr}, $i;
  }
}

sub packenc {
 my $ence=shift;
  my @enc=unpack("C66",$ence);
  my @match1=();
  my @match2=();
  my $i;

  add_abscent_symbols(\@enc);
  push @match1,0; push @match2,0;
  for($i=1;$i<@enc;$i++){
    if($enc[$i]==$enc[$match2[$#match2]]+1){
      $match2[$#match2]=$match2[$#match2]+1;
    } else {  
      push @match1,$i; push @match2,$i;
    }
  }
  print "  \"";
  foreach ($i=0;$i<@match1;$i++){
    if($match1[$i]==$match2[$i]){
      printf "\\%o",$enc[$match1[$i]];
      #      tr///
      #    - ݣ,   tr/\134\134/r/.
      #  tr/\134/r/  ģ!
      if($enc[$match1[$i]]==0134){
        printf "\\%o",$enc[$match1[$i]];
      }
    } else { 
      if($match1[$i]+1==$match2[$i]){
        printf "\\%o\\%o",$enc[$match1[$i]],$enc[$match2[$i]];
      } else {
        printf "\\%o-\\%o",$enc[$match1[$i]],$enc[$match2[$i]];
      }
    }
    if(($i+1)%5==0) {printf("\""); if($i!=$#match1) {printf(".\n  \"");}}
  }
  if($i%5!=0) {printf("\"");}
  printf ",\n";
}

sub print_srecode_pl{
  open(STDOUT,"> srecode.pl");
  print "#!/usr/bin/perl\n";
  my $encnum=($#info+1)/2;
  my $yocomment=<<EOF;
#  SRECODE.PL (Shuffle RECODE) . 1.0
# Library SRECODE.PL (Shuffle RECODE) ver. 1.0

#  : ..̣,  2002-- 2003
# : http://www.rusf.ru/books/yo

#    , 
#   $encnum 
#     .

# Functions for Perl, providing exchange
# transformation of $encnum russian encodings to
# each other.

#    -ware, ,  
#   ,  
#     ,   
#  . ,   , 
#       ,
#    :  
#     
#  .

#     
# ģ      
#      
# .   ,  
#  ӣ ,    
#  .


# Written by D.V. Khmelev, March 2002 -- April 2003
# More information at http://www.rusf.ru/books/yo

# The following code is YO-ware. YO-ware means that
# you can freely copy, modify and disassemble it in
# binary and/or source text.  However, if you use
# this program code, you should use Russian letter
# YO in all texts in Russian you type in computer
# from e-mails to novels.

# The author disclaims any responsibility for code
# safety and holds no responsibility for the loss of
# data, resulting the the use of the following
# code. However, the author tried to do his best to
# avoid loss of data.

EOF
  print $yocomment;
  my ($i,$key);
  print "%CODE_TABLE=(\n";
  foreach $i(0..$#info/2){
    $key=$info[2*$i];
    #printenc($mainenc{$key});
    printf(" \"%s\" =>\n ",$key);
    packenc($mainenc{$key});
  }
  printf ");\n";

  my $recproc=<<EOF;

#############################################
#     .
#   %CODE_TABLE, 
# ̣ .  :
# Ex: \$str=recode_var(\$str, \$from, \$to);
#
# Recode text  and return its content. 
# Uses %CODE_TABLE defined above.
# Ex: \$str=recode_var(\$str, \$from, \$to);
#############################################

sub recode_var {
 my (\$text, \$from, \$to) = \@\_;
 if (!\$from){ 
   die"Recode error: Source encoding is not set.";
 }
 if (!\$CODE_TABLE{\$from}){
  die
  "Recode error: Undefined source code set (\$from).";
 }
 if (!\$to){ 
  die"Recode error: Destination encoding is not set.";
 }
 if (!\$CODE_TABLE{\$to}){
  die"Recode error: Undefined destination code set (\$to).";
 }
 \$\_ = \$text;
 eval "tr/\$CODE_TABLE{\$from}/\$CODE_TABLE{\$to}/";
 return \$\_;
}

sub testencs{
  my \$teststr= #all letters in KOI-8 encoding
   "\\301\\302\\327\\307\\304\\305\\243\\326\\332".
   "\\311\\312\\313\\314\\315\\316\\317\\320".
   "\\322\\323\\324\\325\\306\\310\\303\\336".
   "\\333\\335\\337\\331\\330\\334\\300\\321".
   "\\341\\342\\367\\347\\344\\345\\263\\366\\372".
   "\\351\\352\\353\\354\\355\\356\\357\\360".
   "\\362\\363\\364\\365\\346\\350\\343\\376".
   "\\373\\375\\377\\371\\370\\374\\340\\361";
  my (\$to1,\$to2);
  foreach \$to1(keys %CODE_TABLE){
    my \$res1=recode_var(\$teststr,"koi8-r",\$to1);
    foreach \$to2(keys %CODE_TABLE){
      my \$res2=recode_var(\$res1,\$to1,\$to2);
      if(!(recode_var(\$res2,\$to2,"koi8-r") eq \$teststr)){
        die 
        "Unsuccesful recoding: koi8-r->\$to1->\$to2->koi8-r";
      }
    }
  }
  print "No information losses for all the encodings!\\n";
}

testencs();
EOF

  print $recproc;
  close(STDOUT);
}

sub print_TeX_table{
 my $ence=shift;
 my ($i,$j);
 my @enc=unpack("C66",$ence);
 my %tab=();
 my $lowerhalf=0;
  for $i(0..$#enc){
    my ($a,$b)=(($enc[$i]&0xF0)/16,$enc[$i]&0x0F);
    if($a<8) {$lowerhalf=1;}
    $tab{$a}{$b}=$i;
  }
  print "\\begin{tabular}{|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|}\\hline\n";
  print "\\&  &";
  for $j(0..15){
    my $delimiter="&";
    if($j==15){$delimiter="\\\\\\hline\n";}
    printf "%X%s",$j,$delimiter;
  }  
  my $istart=8;
  if($lowerhalf) {$istart=0;}
  for($i=$istart;$i<16;$i++){
    printf "%X\$x\$&",$i;
    for $j(0..15){
      my $delimiter="&";
      if($j==15){$delimiter="\\\\\\hline\n";}
      if(exists($tab{$i}{$j})){
	printf "%c", $alphabet[$tab{$i}{$j}];
      } else {
	printf "\\ ";
      }
      printf "%s", $delimiter;
    }
  }
  print "\\end{tabular}\n";
}

sub print_mainextensions_tex{
 my $tab;
 my $i; 
  open STDOUT,">mainextensions.tex";
  foreach $i(0..$#info/2){
    print "\\par\\noindent\\textbf{", $info[2*$i],"} : ", $info[2*$i+1], 
    "\\par\\begin{center} \n";
    print_TeX_table($mainenc{$info[2*$i]});
    print "\\end{center}\n\\par\\bigskip";
  }
  close(STDOUT);
}

sub print_encoding_c {
 my $ence=shift;
  my @enc=unpack("C66",$ence);
  my @match1=();
  my @match2=();
  my ($i,$t);
  print "  \"";
  foreach ($t=$i=0;$i<@enc;$i++,$t++){
    printf "\\%o",$enc[$i];
    if(($i+1)%33==0) {printf("\""); $t=-1; if($i!=$#enc) {printf("\n  \"");} } else {
    if(($t+1)%10==0) {printf("\""); if($i!=$#enc) {printf("\n  \"");}} }
  }
  if($i%33!=0) {printf("\"");}
  printf ",\n";
}


sub get_c_name{
 my ($name)=(@_);
  $name=~ tr/-a-z/_A-Z/;
  return $name;
}

sub print_srecode_c{
  my $encnum=($#info+1)/2;
  my $yocomment=<<EOF;
/*  SRECODE.C (Shuffle RECODE) . 1.0
 * Library SRECODE.C (Shuffle RECODE) ver. 1.0

 *  : ..̣,  2002-- 2003
 * : http://www.rusf.ru/books/yo

 *    , 
 *   $encnum 
 *     .

 * Functions for Perl, providing exchange
 * transformation of $encnum russian encodings to
 * each other.

 *    -ware, ,  
 *   ,  
 *     ,   
 *  . ,   , 
 *       ,
 *    :  
 *     
 *  .

 *     
 * ģ      
 *      
 * .   ,  
 *  ӣ ,    
 *  .

 * Written by D.V. Khmelev, March 2002 -- April 2003
 * More information at http://www.rusf.ru/books/yo

 * The following code is YO-ware. YO-ware means that
 * you can freely copy, modify and disassemble it in
 * binary and/or source text.  However, if you use
 * this program code, you should use Russian letter
 * YO in all texts in Russian you type in computer
 * from e-mails to novels.

 * The author disclaims any responsibility for code
 * safety and holds no responsibility for the loss of
 * data, resulting the the use of the following
 * code. However, the author tried to do his best to
 * avoid loss of data.
 */

EOF
  open(STDOUT,"> srecode.c");
  print $yocomment;
  my ($i,$key);
  print "typedef enum {\n";
  $key=$info[0];
  printf("  %s=0,\n",get_c_name($key));
  foreach $i(1..$#info/2){
    $key=$info[2*$i];
    printf("  %s,\n",get_c_name($key));
  }

  print "  TOTAL_ENCODING_NUMBER",
         "} encoding_t;\n\n";

  print "char * encoding_name[]={\n";
  foreach $i(0..$#info/2){
    $key=$info[2*$i];
    printf("  \"%s\", /* %s */\n",$key,get_c_name($key));
  }
  printf "};\n";
  print "unsigned char * encoding_alphabet[]={\n";
  foreach $i(0..$#info/2){
    $key=$info[2*$i];
    printf("  /* %s */\n",get_c_name($key));
    printf("  (unsigned char*)\n");
    print_encoding_c($mainenc{$key});
  }
  printf "};\n";
my $progtxt=<<EOF;

int *full_encoding_list(encoding_t encoding,
                        int table[]){
  int i,j;
  static int exists[256];
  for(i=0;i<256;i++) exists[i]=0;
  for(i=0;i<66;i++) {
    table[i]=encoding_alphabet[encoding][i];
    exists[table[i]]=1;
  }
  for(j=66,i=0;i<256;i++){
    if(exists[i]) continue;
    table[j++]=i;
  }
  return table;
}


int *fill_recode_table(int table[],
                       encoding_t from,
                       encoding_t to){
  static int full_enc_from[256];
  static int full_enc_to[256];
  int i;
  full_encoding_list(from,full_enc_from);
  full_encoding_list(to,full_enc_to);
  for(i=0;i<256;i++)
    table[full_enc_from[i]]=full_enc_to[i];
  return table;
}

unsigned char *srecode(unsigned char *s,
                       encoding_t from,
                       encoding_t to){
  static int prev_from=-1, prev_to=-1;
  static int recode_table[256]; 
  int i;
  if((from!=prev_from)||(to!=prev_to)){
    prev_from=from; prev_to=to;
    fill_recode_table(recode_table,from,to);
  }
  for(i=0;s[i];i++) s[i]=recode_table[s[i]];
  return s;
}

unsigned char *srecode2(unsigned char *s,
                        unsigned char *t,
                        encoding_t from,
                        encoding_t to){
  static int prev_from=-1, prev_to=-1;
  static int recode_table[256]; 
  int i;
  if((from!=prev_from)||(to!=prev_to)){
    prev_from=from; prev_to=to;
    fill_recode_table(recode_table,from,to);
  }
  for(i=0;t[i];i++) s[i]=recode_table[t[i]];
  s[i]=0;
  return s;
}

/*    SRECODE.C          *
 * end of the code of the library SRECODE.C */
EOF
  print $progtxt;
  close(STDOUT);
}

##   
#printenc($mainenc{"koi8-r"});
#  ,   
#     .
close(STDOUT);
##   srecode.pl:
print_srecode_pl();

##   
print_mainextensions_tex();

##    srecode.c:
print_srecode_c();


