;;; russian.el --- minor mode for Russian texts. ;; Copyright (C) 1994 Valery Alexeev ;; Author: Valery Alexeev ;; Created: 14 Jan 1994 ;; Version: 1.12 ;; Adapted-By: Denis Kosygin ;; Keywords: foreign, russian, wp ;; Requires: FSF Emacs version 19 or Lucid XEmacs version 19.13 ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to ;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. ;; New versions of this package (if they exist) may be found at: ;; http://www.math.uga.edu/~valery/russian.el ;;; Commentary: ;; ;; With this program, you will be able to: ;; 1) Display buffers containing russian characters in arbitrary ;; encodings without changing the buffers' contents if you have at least ;; one russian font installed on your system. ;; 2) Same as above if you do not have any russian fonts at all, using ;; your favourite transliteration scheme. ;; 3) Translate whole buffers or regions of text from one russian ;; standard to another, including arbitrary transliterations. ;; 4) Consequently, print files with russian characters if you have at ;; least one printable russian font. ;; 5) Type russian characters in arbitrary encodings and using ;; arbitrary keyboard layouts. ;; ;; The basic concept of this program is an encoding which is simply a ;; string of 66 characters or a list of 66 strings, for all the ;; letters of Russian alphabet, 33 letters in lower case first, then ;; 33 letters in upper case. There are also extra bits of information ;; for encodings that mess up the usual ASCII characters, such as ;; "jcuken" for example. You can easily add as many new encodings as ;; you like, see "Customisization" below to find out how to do this. ;; ;;; Encodings: ;; ;; The predefined encodings are: ;; ;; 8koi KOI-8 RFC 1489 = old KOI-8 GOST 19768-74 with SMALL IO and ;; CAPITAL IO added, used in relcom.* newsgroups and much of e-mail ;; 7koi KOI-7 ;; alt Alternativnyj Variant = MS DOS code page CP 866 ;; gostcii ISO 8859-5 ;; osn Osnovnoj Variant (the only difference in the cyrillic ;; range between this standard and GOSTCII is CAPITAL IO) ;; win-cp1251 MS Windows code page CP 1251 ;; mac Macintosh standard ;; moshkov Internal encoding of Maxim Moshkov Library (www.lib.ru) ;; dkoi DKOI-8 (Russian EBCDIC) GOST 19768-87, obscure ;; cp500 code page CECP 500, obscure ;; ebcdic EBCDIC GOST 19768-74, obscure ;; ascii american keyboard, phonetic transliteration, ;; probably, the most natural keyboard layout ;; jcuken russian typewriter keyboard layout, used in some TrueType ;; fonts for MS Windows ;; libcon Library of Congress transliteration standard ;; naive the most common transliteration ;; tex AMSTeX & LaTeX transliteration ;; broken-8koi this is what some mailers/gateways do to your email ;; to/from Russia ;; ascii2 one more transliteration, without SMALL IO, CAPITAL IO and ;; CAPITAL HARD SIGN ;; ;; Information about various standards was taken in part from files in ;; the directory ;; /anonymous@nic.funet.fi:/pub/culture/russian/comp/characters, ;; in particular from ;; cyrillic.encoding.faq by Andras Kornai , ;; lettermappings.gz by Dmitri Vulis and RFC 1489. ;; ;; If you know of more standards, please contribute. ;;; Installation: ;; ;; By default, this package uses koi-8 encoding and phonetic ("ascii", ;; also known as "yawerty") keyboard layout. To set different default ;; values, put the following in your .emacs, modified according to ;; your preferences. Below the default values are shown. They will be ;; assigned automatically to variables which you do not set in .emacs. ;; ;; ;; Not nesessary unless you set different values. ;; (setq-default russian-buffer-name "8koi") ;; (setq-default russian-mode-name "8koi") ;; (setq russian-keyboard-name "ascii") ;; (setq russian-font-name "8koi") ;; ;; ;; This is important. Make sure the program is on your ;; ;; load-path (check it with C-h v load-path). ;; (require 'russian) ;; ;; ;; Finally, don't forget to M-x byte-compile-file russian.el. ;; ;; If you use russian minor mode often you may want to provide ;; additional customization. To use russian mode with rmail put in ;; your .emacs: ;; ;; (add-hook 'mail-setup-hook '(lambda () (russian-mode 1))) ;; ;; Function `russian-auto-mode' turns on russian mode automatically in ;; files whose names have extensions ".koi", ".alt" and ".iso" and ;; some others. This function uses patterns specified in variable ;; `russian-auto-mode-regexp'. To use it put in .emacs ;; ;; (add-hook 'find-file-hooks 'russian-auto-mode) ;; ;; and change `russian-auto-mode-regexp' accordingly. ;; It is also convenient to have key-bindings for some of its functions ;; in your .emacs. For example the line ;; ;; (define-key global-map (read-kbd-macro "C-c r") 'russian-mode) ;; ;; lets you toggle russian minor mode by pressing "C-c r". Similarly ;; ;; (define-key global-map (read-kbd-macro "C-c i") 'russian-insertion-mode) ;; (define-key global-map (read-kbd-macro "C-c d") 'russian-display) ;; ;; bind function `russian-insertion-mode' to "C-c i" and function ;; `russian-display' to "C-c d". ;;; Usage: ;; ;; The command ;; ;; M-x russian-mode ;; ;; toggles display of Russian characters. If used with a positive ;; argument, it will prompt for the buffer encoding, otherwise, ;; default value of `russian-buffer-name' will be used. The default ;; value is computed as follows. The package looks at the file name ;; extension and tries to figure out its encoding. By default, if file ;; name ends in ".koi", it is assumed to be in KOI-8 encoding, if the ;; ending is ".alt", then the encoding is set to ALT, and finally, the ;; extension ".iso" sets the encoding to GOSTCII. Otherwise, the ;; default values set in your .emacs are used. If you set none, then ;; package defaults ("8koi") are used. See section "Encodings" above ;; for the description of mentioned encodings. You may alter the way ;; emacs computes encodings by setting variable ;; `russian-auto-modes-alist' in your .emacs accordingly. For example, ;; the following line of Lisp tells emacs besides the default ;; behaviour to use ALT encoding in files whose names end in ".tex": ;; ;; (setq russian-auto-mode-alist (cons '(\\.tex\\' . "alt") ;; russian-auto-mode-alist)) ;; ;; Every time, when russian minor mode is turned on, the list of ;; commands in `russian-mode-hook' is processed. ;;; Display: ;; ;; The function ;; ;; M-x russian-display ;; ;; sets variable `standard-display-table' so that you can see Russian ;; characters encoded according to various standards. The actual file ;; or a buffer won't change, only the way of presenting it. The ;; function needs two parameters: the buffer type and the font. For ;; the buffer type you will be prompted. The font is the font already ;; installed on your system or a transliteration which for the ;; purposes of this program is also considered to be a font. You are ;; not likely to change it often, so the function uses a value already ;; set in your .emacs or the default one. Interactively, you can ;; change this value with M-x russian-set-font. The command ;; `russian-display' also accepts an argument which must be a valid ;; buffer encoding. ;;; Translation: ;; ;; M-x russian-translate-region and ;; M-x russian-translate-buffer ;; ;; will prompt you for the buffer encoding and the new encoding. It ;; translates even from a non 1-to-1 encoding such as the "libcon" or ;; the "naive" encodings and does a better job at it than you'd ;; expect. Translation from "tex" is not currently implemented. It ;; doesn't seem to be very useful and I felt that writing a decoding ;; rule for "tex" would be a tedious and a fruitless job. But if you ;; need it let me know. The completion is enabled so when prompted you ;; can hit TAB to see all available standards. The translation ;; between the "string" type encodings (see below) is the fastest, ;; translation from a "string" type to a "list" type is considerably ;; slower and the translation from the "list" type sometimes gets ;; really slow depending on the size of a region/buffer. ;;; Editing: ;; ;; M-x russian-insertion-mode ;; ;; toggles a minor mode `russian-insertion-mode' which is a supplement ;; to `russian-mode'. When in this mode, typing `a' will actually ;; produce a character corresponding to `a' in the encoding chosen, ;; for example, `\301' in KOI-8. The encoding is set in your .emacs. ;; Interactively, you can choose the encoding by invoking this ;; function with a positive argument. The negative argument will ;; always turn the mode off. A "minor" means that all bindings not ;; directly affected by this mode such as all C-... and M-... ;; keystrokes of the major mode remain in effect, as well as the ;; syntax tables and everything else. So you can use this minor mode ;; while for instance in the Mail mode. When turned on ;; `russian-insertion-mode' adjusts display so that you may see what ;; you type. If you do need to set variables `russian-buffer-name' ;; and `russian-mode-name' to different values, use functions ;; `russian-set-buffer' and `russian-set-mode', mentioned below. To ;; modify the behaviour of `russian-insertion-mode' use ;; `russian-insertion-mode-hook' which is run every time russian ;; insertion is turned on. ;;; Changing the parameters: ;; ;; As has been already explained, you will be prompted for the ;; parameters that you are most likely to change often. At any rate, ;; there are four dedicated functions for changing them ;; ;; M-x russian-set-font ;; M-x russian-set-buffer ;; M-x russian-set-keyboard ;; M-x russian-set-mode ;;; Customization: ;; ;; All the customization described in this section should go ;; **before** the line (require 'russian) to take effect. You can ;; customize this program in several ways. ;; ;; 1) Currently all the predefined standards can be used for ;; translation but not all of them can be set as font, buffer, ;; keyboard or insertion mode names. You can change this by setting ;; the variables ;; ;; russian-font-additional-list ;; russian-buffer-additional-list ;; russian-keyboard-additional-list ;; russian-mode-additional-list ;; ;; For example, ;; ;; (setq russian-font-additional-list ;; '("dkoi" "cp500")) ;; ;; 2) You can add more encodings. First, you have to define an ;; encoding russian-encoding-whatever and it should be a string of 66 ;; characters (for 33 russian letters in the lower case, then 33 ;; letters in the upper case), or a list of 66 strings. You should ;; take a look at the definitions of various encodings in this file. ;; Then, if your encoding is non-standard even in the ASCII range, you ;; should also define russian-encoding-whatever-filter-from and ;; russian-encoding-whatever-filter-to, each of them being a list of 2 ;; strings defining the translation rules, compare "jcuken". If your ;; encoding is of the "list" type and you are planning to translate ;; from it then you also need russian-encoding-whatever-decoding-rule. ;; The latter is a tree-like data structure much as a keymap which ;; contains characters or strings of characters that have to be ;; inserted when a partial completion was succesful. Positive number ;; at the end or in the middle of a branch means "insert the i-th ;; letter of the Russian alphabet", 0.el simply copy this file, ;; M-x query-replace russian RET RET, ;; change the language-specific encodings and default-alists and adjust ;; headers and documentation accordingly. For most letter-based ;; languages that should be all. ;;; Character set autodetect ;; Package is able to autodetect diffirent character sets now. ;; Autodectection is performed with Markov's chains. So that it works ;; for a very short texts, but only for meningful ones. It would fail ;; if you try to detect encoding for raw set of characters in particular ;; encoding (I think it is now a common task anyway ;-). And it will fail ;; in such a situations when it is used to autodetect similar encodings ;; (mac and cp1251 f.e). Performance is poor when package is not ;; byte-compiled. In order to speed up large files processing it tries ;; to limit number of input characters (see russian-detect-chars). ;; Also autodetections is performed only for limited number of encodings ;; (see russian-detect-safe-encoding-list) ;; In order to make autodetection additional encoding table was added: ;; "unumeric". It is table to translate russian letters to abstract ;; form of correspondent 'uppercase' numbers. ;;; XKB and russian input layout support ;; If your OS is already able to input russian characters then you can ;; use this package anyway because it is able to encode input russian ;; chars to any character set you like. So that it is able to produce ;; KOI8 files on Windows and vice-versa. And XKB support was added. ;; The only thing you have to do is to add function like this ;; ;; (define-key global-map 'iso-next-group (lambda () (interactive))) ;; (define-key global-map 'iso-prev-group (lambda () (interactive))) ;; ;; in order to stop (X)Emacs complaining about keyboard switch key. ;; And there is another enhancement to (X)Emacs' behavior: ;; using case and syntax tables for russian letters. Modification ;; is made as a part of input layout changing and fully automatic ;; and transparent. These tables are calculated with using ;; character set definition tables. So that you don't have to ;; provide specific support for additional character sets ;;; LCD Archive Entry: ;; russian.el|Valery Alexeev|valery@math.uga.edu ;; Display, translate and edit buffers containing russian characters.| ;; 08-Jul-99|$Revision: 1.2 $|~/packages/russian.el.Z ;; RCS $Id: russian.el,v 1.2 2001/12/15 22:31:36 mamont Exp mamont $ ;; The revision number does NOT coincide with the version number. ;;; Change Log: ;; $Log: russian.el,v $ ;; Revision 1.12 2001/12/15 22:31:36 mamont ;; D.V. Khmelev [dvk24@newton.cam.ac.uk] ;; added support for Maxim Moshkov Library (www.lib.ru) internal ;; encoding. ;; ;; Revision 1.11 2000/07/03 12:05:43 oleg ;; Significant improvements by Oleg Sesov sesov@mail.ru ;; Changes in buffer encoding auto-detect. Added XKB support, ;; case and syntax table support (now it is possible to go forward/backward ;; word by word even in Russian texts and uppercase/lowercase/capitalize them ;; with one keystroke). Also changed is the input keyboard layout list --- ;; in order to ask for Russian input encodings. Now possible to use Emacs ;; with native OS keyboard switcher to edit texts with different encodings. ;; ;; Revision 1.10 1999/08/26 15:43:29 valery ;; Some typos corrected ;; ;; Revision 1.9 1999/07/15 15:43:29 valery ;; Changed 'cp1251' to 'win-cp1251'. ;; Darrin Edwards corrected ;; bugs in `russian-encoding-tex', added russian-encoding-tex-decoding-rule ;; ;; Revision 1.8 1997/08/17 21:04:08 dkosygin ;; New `russian-get-default'. ;; ;; Revision 1.7 1997/07/25 21:31:20 vladimir ;; Bug fixes by Vladimir Alexiev in russian-get-default and display handling. ;; ;; Revision 1.6 1997/07/21 22:57:36 dkosygin ;; Partial merge with changes by Vladimir Alexiev for Xemacs-19.13. ;; ;; Revision 1.5 1997/05/06 14:10:01 dkosygin ;; Changes by Denis Kosygin . ;; Made russian a minor mode. Added version control. Koi-8 support ;; in defaults. Docs and comments fix. ;; ;; Revision 1.4.2.2 1997/07/21 00:00:00 vladimir ;; Modified by Vladimir Alexiev . ;; Added russian-toggle. ;; ;; Revision 1.4.2.1 1997/07/21 00:00:00 vladimir ;; Modified by Vladimir Alexiev to work with ;; xemacs display-table specifiers. ;; ;; Revision 1.4 1996/03/19 00:00:00 ruslan ;; Modified by Ruslan Polyansky to work (partially) ;; with XEmacs version 19.13. ;; ;; Revision 1.3 1997/08/17 19:46:41 dkosygin ;; Merged changes by Bas de Bakker and Dimitrii Kloper. ;; ;; Revision 1.2 1996/02/06 00:00:00 bas ;; Changes by Bas de Backer to work with Emacs ;; versions >19.29. ;; ;; Revision 1.1.2.1 1997/07/16 00:00:00 dimka ;; Patched by Dmitry Kloper to work with ;; FSF Emacs versions >19.29. ;; ;; Revision 1.1 1994/01/14 00:00:00 ava ;; Initial release by Valery Alexeev . ;;; Code: ;;; Basic definitions and preliminary functions ;;; User configurable variables ;; (configure them in your .emacs, not here): (defvar russian-font-name "8koi" "The encoding of the font used to display Cyrillic characters. Possible values are listed in `russian-font-list'. Names of transliteration schemes can be also used in this variable, if no Cyrillic font is available.") (defvar russian-buffer-name "8koi" "*The encoding of displayed buffer. Possible values are listed in `russian-buffer-list'.") (defvar russian-keyboard-name "ascii" "Type of Russian keyboard. If `ascii' use phonetic correspondence between Cyrillic and Latin characters. If `jcuken' use standard russian typewriter keyboard. Possible values are listed in `russian-keyboard-list'.") (defvar russian-mode-name "8koi" "*The encoding of input Cyrillic characters. Possible values are listed in `russian-mode-list'") (defvar russian-font-additional-list nil "Names of user defined font encodings.") (defvar russian-buffer-additional-list nil "Names of user defined buffer Cyrillic encodings.") (defvar russian-keyboard-additional-list nil "Names of user defined keyboard Cyrillic encodings.") (defvar russian-mode-additional-list nil "Names of user defined encodings for `russian-insertion-mode'.") (defvar russian-auto-mode-alist (list '("\\.\\(8koi\\|koi8?\\)\\'" . "8koi") '("\\.alt\\'" . "alt") '("\\.iso\\'" . "gostcii") '("\\.\\(7koi\\|koi7\\)\\'" . "7koi") '("\\.dir" . "moshkov")) "Alist of automatic parameter settings for russian.el. Each element looks like (REGEXP . ENCODING). Specify ENCODING as the default for `russian-buffer-name' and `russian-mode-name' in files whose names match REGEXP.") (defvar russian-auto-mode-regexp "\\.\\([78]koi\\|koi[78]?\\|iso\\|alt\\|dir\\)\\'" "File names which require use of russian minor mode by default. Used by function `russian-auto-mode'.") (defvar russian-encoding-additional-alist nil "Alist of user defined Cyrillic encodings. Each element looks like \(PROMPT ENCODING MDLNENAME DECODING-RULE FILTER-FROM FILTER-TO\). For example: \(\"wtvr\" russian-encoding-whatever \"WHATEVER\" russian-encoding-whatever-decoding-rule russian-encoding-whatever-filter-from russian-encoding-whatever-filter-to\) PROMPT is a string easy to type, in our case \"wtvr\". ENCODING is the name of the variable defining the encoding, in our case russian-encoding-whatever. MDLNENAME is a string for the modeline, i.e. \"WHATEVER\". DECODING-RULE is russian-encoding-whatever-decoding-rule or nil. FILTER-TO (FILTER-FROM) are russian-encoding-whatever-filter-to \(resp. from\) or nil.") ;; User configurable variables end here. ;;; Defaults: (defconst russian-encoding-default-alist '(("ascii" russian-encoding-ascii "ASCII") ("7koi" russian-encoding-7koi "KOI-7") ("alt" russian-encoding-alt "ALT") ("8koi" russian-encoding-8koi "KOI-8") ("mac" russian-encoding-mac "MAC") ("win-cp1251" russian-encoding-win-cp1251 "WIN") ("osn" russian-encoding-osn "OSN") ("gostcii" russian-encoding-gostcii "GOSTCII") ("moshkov" russian-encoding-moshkov "moshkov") ("naive" russian-encoding-naive "NAIVE" russian-encoding-naive-decoding-rule) ("libcon" russian-encoding-libcon "LibCon" russian-encoding-libcon-decoding-rule) ("broken-8koi" russian-encoding-broken-8koi "BrknKOI-8" russian-encoding-broken-8koi-decoding-rule) ("tex" russian-encoding-tex "CyrTeX" russian-encoding-tex-decoding-rule) ("jcuken" russian-encoding-jcuken "JCUKEN" nil russian-encoding-jcuken-filter-from russian-encoding-jcuken-filter-to) ("jcuken-win" russian-encoding-jcukenwin "JCUKEN-Win" nil russian-encoding-jcukenwin-filter-from russian-encoding-jcukenwin-filter-to) ("cp500" russian-encoding-cp500 "CP500") ("ebcdic" russian-encoding-ebcdic "EBCDIC") ("dkoi" russian-encoding-dkoi "DKOI") ("ascii2" russian-encoding-ascii2 "ASCII-2") ("xkb" russian-encoding-xkb "XKB" nil nil nil t) ("unumeric" russian-encoding-unumeric "UNUMERIC")) "Alist of default Russian encodings.") (defconst russian-font-default-list '("ascii" "ascii2" "7koi" "alt" "8koi" "mac" "win-cp1251" "osn" "moshkov" "gostcii" "naive" "libcon" "jcuken" "jcuken-win") "Cyrillic fonts and transliterations supported by default. Add new fonts and transliteration schemes by changing `russian-font-additional-list'.") (defconst russian-buffer-default-list '("ascii" "ascii2" "7koi" "alt" "8koi" "mac" "win-cp1251" "osn" "moshkov" "gostcii" "naive" "libcon" "broken-8koi" "tex" "jcuken" "jcuken-win" "cp500" "ebcdic" "dkoi") "Cyrillic encodings and transliterations whose display is supported by default. Add new encodings by changing `russian-buffer-additional-list'.") (defconst russian-keyboard-default-list '("ascii" "ascii2" "jcuken" "jcuken-win" "xkb" "alt" "moshkov" "8koi" "mac" "win-cp1251" "osn") "Cyrillic keyboards supported by default. Add new keyboards by changing `russian-keyboard-additional-list'.") (defconst russian-mode-default-list '("ascii" "7koi" "alt" "8koi" "mac" "win-cp1251" "osn" "gostcii" "xkb" "moshkov" "naive" "libcon" "broken-8koi" "tex" "jcuken" "jcuken-win" "cp500" "ebcdic" "dkoi") "Cyrillic encodings supported by default in `russian-insertion-mode'. Add new encodings by changing `russian-mode-additional-list'.") (defconst russian-safe-encoding-name "8koi" "Encoding used in internal conversions by russian.el. This should be an encoding of the \"string\" type and coincide with ASCII for characters below 127.") (defun russian-set-default-encoding () "Determine the default russian encoding of a file by its name. Take the name of the file visited in the current buffer. Search in `russian-auto-mode-alist' for a REGEXP matching the value of `buffer-file-name' until the first match and use the corresponding ENCODING to set `russian-buffer-name'. Use the global defaults if no matches are found. Do nothing if no file is visited in the current buffer." (interactive) (if buffer-file-name (let ((name (file-name-sans-versions (buffer-file-name))) (case-fold-search (memq system-type '(vax-vms windows-nt))) enc match-list) (setq match-list (mapcar '(lambda (x) (if (string-match (car x) name) (cdr x))) russian-auto-mode-alist) enc (eval (cons 'or match-list))) (if enc (setq russian-buffer-name enc))))) (defun russian-auto-mode () "Check if the file requires use of russian minor mode. If the name of file visited in the curent buffer matches `russian-auto-mode-regexp' turn on russian minor mode by default." (if (and buffer-file-name (string-match russian-auto-mode-regexp (file-name-sans-versions buffer-file-name))) (russian-mode 1))) ;;; Setting the variables: (make-variable-buffer-local 'russian-buffer-name) (defvar russian-encoding-alist (append russian-encoding-default-alist russian-encoding-additional-alist) "List of encodings known to russian.el. Add new encodings by changing `russian-encoding-additional-alist'.") (defvar russian-font-list (append russian-font-default-list russian-font-additional-list) "List of supported cyrillic fonts and transliterations. Add new fonts and transliteration schemes by changing `russian-font-additional-list'.") (defvar russian-buffer-list (append russian-buffer-default-list russian-buffer-additional-list) "List of Russian encodings whose display is supported. Add new encodings by changing `russian-buffer-additional-list'") (defvar russian-keyboard-list (append russian-keyboard-default-list russian-keyboard-additional-list) "List of supported cyrillic keyboards. Add new keyboards by changing `russian-keyboard-additional-list'.") (defvar russian-mode-list (append russian-mode-default-list russian-mode-additional-list) "List of Russian encodings supported in `russian-insertion-mode'. Add new encodings by changing `russian-mode-additional-list'.") (defvar russian-font-alist (mapcar 'list russian-font-list)) (defvar russian-buffer-alist (mapcar 'list russian-buffer-list)) (defvar russian-keyboard-alist (mapcar 'list russian-keyboard-list)) (defvar russian-mode-alist (mapcar 'list russian-mode-list)) ;;; Definitions for encodings: (defconst russian-encoding-unumeric [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) (defconst russian-encoding-ascii (concat "abwgde^vzijklmnoprstufhc=[]#yx\\`q" "ABWGDE&VZIJKLMNOPRSTUFHC+{}$YX|~Q")) (defconst russian-encoding-7koi (concat "ABWGDE#VZIJKLMNOPRSTUFHC^[]_YX\\@Q" "abwgde$vzijklmnoprstufhc~{}\"yx|`q")) (defconst russian-encoding-alt (concat "\240\241\242\243\244\245\361\246\247" "\250\251\252\253\254\255\256\257" "\340\341\342\343\344\345\346\347" "\350\351\352\353\354\355\356\357" "\200\201\202\203\204\205\360\206\207" "\210\211\212\213\214\215\216\217" "\220\221\222\223\224\225\226\227" "\230\231\232\233\234\235\236\237")) (defconst russian-encoding-8koi (concat "\301\302\327\307\304\305\243\326\332" "\311\312\313\314\315\316\317\320" "\322\323\324\325\306\310\303\336" "\333\335\337\331\330\334\300\321" "\341\342\367\347\344\345\263\366\372" "\351\352\353\354\355\356\357\360" "\362\363\364\365\346\350\343\376" "\373\375\377\371\370\374\340\361")) (defconst russian-encoding-gostcii (concat "\320\321\322\323\324\325\361\326\327" "\330\331\332\333\334\335\336\337" "\340\341\342\343\344\345\346\347" "\350\351\352\353\354\355\356\357" "\260\261\262\263\264\265\241\266\267" "\270\271\272\273\274\275\276\277" "\300\301\302\303\304\305\306\307" "\310\311\312\313\314\315\316\317")) (defconst russian-encoding-osn (concat "\320\321\322\323\324\325\361\326\327" "\330\331\332\333\334\335\336\337" "\340\341\342\343\344\345\346\347" "\350\351\352\353\354\355\356\357" "\260\261\262\263\264\265\360\266\267" "\270\271\272\273\274\275\276\277" "\300\301\302\303\304\305\306\307" "\310\311\312\313\314\315\316\317")) (defconst russian-encoding-win-cp1251 (concat "\340\341\342\343\344\345\270\346\347" "\350\351\352\353\354\355\356\357" "\360\361\362\363\364\365\366\367" "\370\371\372\373\374\375\376\377" "\300\301\302\303\304\305\250\306\307" "\310\311\312\313\314\315\316\317" "\320\321\322\323\324\325\326\327" "\330\331\332\333\334\335\336\337")) (defconst russian-encoding-mac (concat "\340\341\342\343\344\345\336\346\347" "\350\351\352\353\354\355\356\357" "\360\361\362\363\364\365\366\367" "\370\371\372\373\374\375\376\377" "\200\201\202\203\204\205\335\206\207" "\210\211\212\213\214\215\216\217" "\220\221\222\223\224\225\226\227" "\230\231\232\233\234\235\236\237")) (defconst russian-encoding-moshkov (concat "\341\342\367\347\344\345\243\366\372\351" "\352\353\354\355\356\357\360\362\363\364" "\365\346\350\343\376\373\375\256\371\370" "\374\340\361" "\301\302\327\307\304\305\263\326\332\311" "\312\313\314\315\316\317\320\322\323\324" "\325\306\310\303\336\333\335\254\331\330" "\334\300\321" )) (defconst russian-encoding-cp500 (concat "\254\151\355\356\353\357\111\354\277" "\200\375\376\373\374\255\256\131" "\104\105\102\106\103\107\234\110" "\124\121\122\123\130\125\126\127" "\220\217\352\372\276\240\252\266\263" "\235\332\233\213\267\270\271\253" "\144\145\142\146\143\147\236\150" "\164\161\162\163\170\165\166\167")) (defconst russian-encoding-dkoi (concat "\167\170\257\215\212\213\131\256\262" "\217\220\232\233\234\235\236\237" "\252\253\254\255\214\216\200\266" "\263\265\267\261\260\264\166\240" "\271\272\355\277\274\275\102\354\372" "\313\314\315\316\317\332\333\334" "\336\337\352\353\276\312\273\376" "\373\375\165\357\356\374\270\335")) (defconst russian-encoding-ebcdic (concat "\237\240\252\253\254\255\335\256\257" "\260\261\262\263\264\265\266\267" "\270\271\272\273\274\275\276\277" "\312\313\314\315\316\317\332\333" "\130\131\142\143\144\145\102\146\147" "\150\151\160\161\162\163\164\165" "\166\167\170\200\212\213\214\215" "\216\217\220\232\233\234\235\236")) (defconst russian-encoding-jcuken (concat "f,dult/;pbqrkvyjghcnea[wxio]sm'.z" "FZ")) (defconst russian-encoding-jcukenwin (concat "f,dult`;pbqrkvyjghcnea[wxio]sm'.z" "FZ")) (defconst russian-encoding-naive '("a" "b" "v" "g" "d" "e" "e" "zh" "z" "i" "j" "k" "l" "m" "n" "o" "p" "r" "s" "t" "u" "f" "h" "c" "ch" "sh" "sch" "'" "y" "'" "e" "yu" "ya" "A" "B" "V" "G" "D" "E" "E" "Zh" "Z" "I" "J" "K" "L" "M" "N" "O" "P" "R" "S" "T" "U" "F" "H" "C" "Ch" "Sh" "Sch" "'" "Y" "'" "E" "Yu" "Ya")) (defconst russian-encoding-libcon '("a" "b" "v" "g" "d" "e" "e" "zh" "z" "i" "j" "k" "l" "m" "n" "o" "p" "r" "s" "t" "u" "f" "x" "ts" "ch" "sh" "shch" "\"" "y" "'" "e" "ju" "ja" "A" "B" "V" "G" "D" "E" "E" "ZH" "Z" "I" "J" "K" "L" "M" "N" "O" "P" "R" "S" "T" "U" "F" "X" "TS" "CH" "SH" "SHCH" "\"" "Y" "'" "E" "JU" "JA")) (defconst russian-encoding-broken-8koi '("=C1" "=C2" "=D7" "=C7" "=C4" "=C5" "=A3" "=D6" "=DA" "=C9" "=CA" "=CB" "=CC" "=CD" "=CE" "=CF" "=D0" "=D2" "=D3" "=D4" "=D5" "=C6" "=C8" "=C3" "=DE" "=DB" "=DD" "=DF" "=D9" "=D8" "=DC" "=C0" "=D1" "=E1" "=E2" "=F7" "=E7" "=E4" "=E5" "=B3" "=F6" "=FA" "=E9" "=EA" "=EB" "=EC" "=ED" "=EE" "=EF" "=F0" "=F2" "=F3" "=F4" "=F5" "=E6" "=E8" "=E3" "=FE" "=FB" "=FD" "=FF" "=F9" "=F8" "=FC" "=E0" "=F1")) (defconst russian-encoding-tex-decoding-rule '(nil ; (?a . 1) (?b . 2) (?v . 3) (?g . 4) (?d . 5) (?e . 6) (?z . 9) (?\\ (nil (?\" (nil (?e . 7) (?E . 40))) (?\` (nil (?e . 31) (?E . 64))) (?u (nil (?{ (nil (?i (nil (?} . 11))) (?I (nil (?} . 44))))))))) (?{ (nil (?a (nil (?} . 1))) (?A (nil (?} . 34))) (?z (nil ;; (?} . 9) (?h (nil (?} . 8))))) (?Z (nil ;; (?} . 42) (?H (nil (?} . 41))))) (?h (nil (?} . 23))) (?H (nil (?} . 56))) ; (?s (nil ;Comment these lines if you uncomment ; (?} . 19) ;the "{shch}" version below. ; (?h (nil ; ; (?} . 26))) ; ; (?c (nil ; ; (?h (nil ; ; (?} . 27))))))) ; ;; Some might need this if "{sch}" is not recognized (?s (nil (?} . 19) (?h (nil (?} . 26) (?c (nil (?h (nil (?} . 27))))))))) ; (?S (nil ;Comment these lines if you uncomment ; (?} . 52) ;the "{SHCH}" version below. ; (?H (nil ; ; (?} . 59))) ; ; (?C (nil ; ; (?H (nil ; ; (?} . 60))))))) ; ;; Some might need this if "{SCH}" is not recognized (?S (nil (?} . 52) (?H (nil (?} . 59) (?C (nil (?H (nil (?} . 60))))))))) (?t (nil (?} . 20) (?s (nil (?} . 24))))) (?T (nil (?} . 53) (?S (nil (?} . 57))))) (?c (nil (?h (nil (?} . 25))))) (?C (nil (?H (nil (?} . 58))))) (?\\ (nil ;; (?e (nil ;; (?} . 31))) ;; (?E (nil ;; (?} . 64))) (?c (nil (?d (nil (?p (nil (?r (nil (?i (nil (?m (nil (?e (nil (?} . 28))))))))))))) (?p (nil (?r (nil (?i (nil (?m (nil (?e (nil (?} . 30))))))))))))) (?C (nil (?d (nil (?p (nil (?r (nil (?i (nil (?m (nil (?e (nil (?} . 61))))))))))))) (?p (nil (?r (nil (?i (nil (?m (nil (?e (nil (?} . 63))))))))))))))) (?y (nil (?} . 29) (?u (nil (?} . 32))) (?a (nil (?} . 33))))) (?Y (nil (?} . 62) (?U (nil (?} . 65))) (?A (nil (?} . 66))))))) (?i . 10) (?k . 12) (?l . 13) (?m . 14) (?n . 15) (?o . 16) (?p . 17) (?r . 18) (?u . 21) (?f . 22) ; (?h . 23) ; (?A . 34) (?B . 35) (?V . 36) (?G . 37) (?D . 38) (?E . 39) (?I . 43) (?K . 45) (?L . 46) (?M . 47) (?N . 48) (?O . 49) (?P . 50) (?R . 51) (?U . 54) (?F . 55))) ;; (?H . 56))) (defconst russian-encoding-broken-8koi-decoding-rule '(nil (?\= (nil (?0 (nil (?9 . -9))) (?2 (nil (?0 . -32))) (?\n . 0) (?A (nil (?3 . 7))) (?B (nil (?3 . 40))) (?C (nil (?0 . 32) (?1 . 1) (?2 . 2) (?3 . 24) (?4 . 5) (?5 . 6) (?6 . 22) (?7 . 4) (?8 . 23) (?9 . 10) (?A . 11) (?B . 12) (?C . 13) (?D . 14) (?E . 15) (?F . 16))) (?D (nil (?0 . 17) (?1 . 33) (?2 . 18) (?3 . 19) (?4 . 20) (?5 . 21) (?6 . 8) (?7 . 3) (?8 . 30) (?9 . 29) (?A . 9) (?B . 26) (?C . 31) (?D . 27) (?E . 25) (?F . 28))) (?E (nil (?0 . 65) (?1 . 34) (?2 . 35) (?3 . 57) (?4 . 38) (?5 . 39) (?6 . 55) (?7 . 37) (?8 . 56) (?9 . 43) (?A . 44) (?B . 45) (?C . 46) (?D . 47) (?E . 48) (?F . 49))) (?F (nil (?0 . 50) (?1 . 66) (?2 . 51) (?3 . 52) (?4 . 53) (?5 . 54) (?6 . 41) (?7 . 36) (?8 . 63) (?9 . 62) (?A . 42) (?B . 59) (?C . 64) (?D . 60) (?E . 58) (?F . 61))))))) (defconst russian-encoding-libcon-decoding-rule '(nil (?a . 1) (?b . 2) (?c (24 (?h . 25))) (?d . 5) (?e (6 (?f (nil (?f . [31 22 22]))) (?l (nil (?e (nil (?k (nil (?t (nil (?r . [31 13 6 12 20 18]))))))))))) (?f . 22) (?g . 4) (?h . 23) (?i . 10) (?j (11 (?a . 33) (?o . 7) (?u . 32))) (?k (12 (?h . 23))) (?l . 13) (?m . 14) (?n . 15) (?o . 16) (?p (17 (?o ([17 16] (?e ([17 16 6] (?t . [17 16 31 20]))) (?r (nil (?t (nil (?s (nil (?m . [17 16 18 20 19 14]))))))))))) (?r (18 (?t (nil (?s (nil (?i (nil (?g . [18 20 19 10 4]))))))))) (?s (19 (?o (nil (?v (nil (?e (nil (?t (nil (?s (nil (?k . [19 16 3 6 20 19 12]))))))))))) (?c (nil (?h (27 (?e ([27 6] (?z . [19 25 6 9]))) (?i (nil (?t (nil (?a . [19 25 10 20 1]))))) (?a (nil (?s (nil (?t . [19 25 1 19 20]))))))))) (?h (26 (?c (nil (?h . 27))))))) (?t (20 (?s (24 (?c (nil (?h . [20 27]))) (?j (nil (?a . [20 19 33]))) (?t . [20 19 20]) (?y (nil (?a . [20 19 33]))))))) (?u . 21) (?v . 3) (?w . 3) (?x . 23) (?y (29 (?a . 33) (?o . 7) (?u . 32))) (?z (9 (?h . 8))) (?A . 34) (?B (35 (?' . [35 63]))) (?C (57 (?H . 58) (?h . 58))) (?D (38 (?' . [38 63]))) (?E (39 (?V ([39 36] (?M . [64 36 47]))))) (?F (55 (?' . [55 63]))) (?G (37 (?' . [37 63]))) (?H (56 (?' . [56 63]))) (?I . 43) (?J (44 (?A . 66) (?a . 66) (?O . 40) (?o . 40) (?U . 65) (?u . 65))) (?K (45 (?' . [45 63]) (?H . 56) (?h . 56))) (?L (46 (?' . [46 63]))) (?M (47 (?' . [47 63]))) (?N (48 (?' . [48 63]))) (?O . 49) (?P (50 (?' . [50 63]) (?O (nil (?R (nil (?T (nil (?S (nil (?M . [50 49 51 53 52 47]))))))))))) (?R (51 (?' . [51 63]) (?T (nil (?S (nil (?I (nil (?G . [51 53 52 43 37]))))))))) (?S (52 (?' . [52 63]) (?o (nil (?v (nil (?e (nil (?t (nil (?s (nil (?k . [52 16 3 6 20 19 12]))))))))))) (?C (nil (?H (60 (?I (nil (?T (nil (?A . [52 58 43 53 34]))))) (?A (nil (?S (nil (?T . [52 58 34 52 53]))))))))) (?H (59 (?' . [59 63]) (?C (nil (?H . 60))))) (?c (nil (?h (60 (?i (nil (?t (nil (?a . [52 25 10 20 1]))))) (?a (nil (?s (nil (?t . [52 25 1 19 20]))))))))) (?h (59 (?c (nil (?h . 60))))))) (?T (53 (?' . [53 63]) (?s (57 (?j (nil (?a . [53 19 33]))) (?t . [53 19 20]) (?y (nil (?a . [53 19 33]))))) (?S (57 (?J (nil (?A . [53 52 66]))) (?T . [53 52 53]) (?Y (nil (?A . [53 52 66]))))))) (?U . 54) (?V (36 (?' . [36 63]))) (?W (36 (?' . [36 63]))) (?X (56 (?' . [56 63]))) (?Y (62 (?A . 66) (?O . 40) (?U . 65) (?a . 66) (?o . 40) (?u . 65))) (?Z (42 (?' . [42 63]) (?H (41 (?' . [41 63]))) (?h . 41))) (?\' . 30) (?\ (nil (?s (nil (?c (nil (?h ([-32 19 25] (?i (nil (?p . [-32 27 10 17]))))))))) (?e (nil (?n . [-32 31 15]) (?k . [-32 31 12]) (?p . [-32 31 17]) (?t . [-32 31 20]))) (?E (nil (?k . [-32 64 12]) (?p . [-32 64 17]) (?K . [-32 64 45]) (?P . [-32 64 50]) (?t . [-32 64 20]) (?T . [-32 64 53]))))) (?\t (nil (?s (nil (?c (nil (?h ([-9 19 25] (?i (nil (?p . [-9 27 10 17]))))))))) (?e (nil (?k . [-9 31 12]) (?p . [-9 31 17]) (?t . [-9 31 20]))) (?E (nil (?k . [-9 64 12]) (?p . [-9 64 17]) (?K . [-9 64 45]) (?P . [-9 64 50]) (?t . [-9 64 20]) (?T . [-9 64 53]))))) (?\n (nil (?s (nil (?c (nil (?h ([-10 19 25] (?i (nil (?p . [-10 27 10 17]))))))))) (?e (nil (?n . [-10 31 15]) (?k . [-10 31 12]) (?p . [-10 31 17]) (?t . [-10 31 20]))) (?E (nil (?k . [-10 64 12]) (?p . [-10 64 17]) (?K . [-10 64 45]) (?P . [-10 64 50]) (?t . [-10 64 20]) (?T . [-10 64 53]))))) (?\f (nil (?s (nil (?c (nil (?h ([-12 19 25] (?i (nil (?p . [-12 27 10 17]))))))))) (?e (nil (?n . [-12 31 15]) (?k . [-12 31 12]) (?p . [-12 31 17]) (?t . [-12 31 20]))) (?E (nil (?k . [-12 64 12]) (?p . [-12 64 17]) (?K . [-12 64 45]) (?P . [-12 64 50]) (?t . [-12 64 20]) (?T . [-12 64 53]))))) (?\r (nil (?s (nil (?c (nil (?h ([-13 19 25] (?i (nil (?p . [-13 27 10 17]))))))))) (?e (nil (?n . [-13 31 15]) (?k . [-13 31 12]) (?p . [-13 31 17]) (?t . [-13 31 20]))) (?E (nil (?k . [-13 64 12]) (?p . [-13 64 17]) (?K . [-13 64 45]) (?P . [-13 64 50]) (?t . [-13 64 20]) (?T . [-13 64 53]))))))) (defconst russian-encoding-naive-decoding-rule russian-encoding-libcon-decoding-rule) (defconst russian-encoding-tex '("{a}" "b" "v" "g" "d" "e" "\\\"e" "{zh}" "z" "i" "\\u{i}" "k" "l" "m" "n" "o" "p" "r" "{s}" "{t}" "u" "f" "{h}" "{ts}" "{ch}" "{sh}" "{shch}" "{\\cdprime}" "{y}" "{\\cprime}" "\\`e" "{yu}" "{ya}" "{A}" "B" "V" "G" "D" "E" "\\\"E" "{ZH}" "Z" "I" "\\u{I}" "K" "L" "M" "N" "O" "P" "R" "{S}" "{T}" "U" "F" "{H}" "{TS}" "{CH}" "{SH}" "{SHCH}" "{\\Cdprime}" "{Y}" "{\\Cprime}" "\\`E" "{YU}" "{YA}")) (defconst russian-encoding-ascii2 (concat "abwgde\243vzijklmnoprstufhc^[]_yx\\@q" "ABWGDE\263VZIJKLMNOPRSTUFHC~{}\377YX|`Q")) ;;; Additional encodings outside the cyrillic range: (defconst russian-encoding-jcuken-filter-from '("~`!@#$%^&*_-+=" "+=_!/\":<>?-,;.")) (defconst russian-encoding-jcuken-filter-to '("~`!@#$%^&*_-+={[}]:;\"'<,>.?/" " $@ !_~`(())%+$$^-&=*#")) (defconst russian-encoding-jcukenwin-filter-from '("!@#$%^&*" "!\"#$:,.;" )) (defconst russian-encoding-jcukenwin-filter-to '("!\"#$:,.;" "!@#$%^&*")) ;;; XKB encoding (defconst russian-encoding-xkb [ Cyrillic_a Cyrillic_be Cyrillic_ve Cyrillic_ghe Cyrillic_de Cyrillic_ie Cyrillic_io Cyrillic_zhe Cyrillic_ze Cyrillic_i Cyrillic_shorti Cyrillic_ka Cyrillic_el Cyrillic_em Cyrillic_en Cyrillic_o Cyrillic_pe Cyrillic_er Cyrillic_es Cyrillic_te Cyrillic_u Cyrillic_ef Cyrillic_ha Cyrillic_tse Cyrillic_che Cyrillic_sha Cyrillic_shcha Cyrillic_hardsign Cyrillic_yeru Cyrillic_softsign Cyrillic_e Cyrillic_yu Cyrillic_ya Cyrillic_A Cyrillic_BE Cyrillic_VE Cyrillic_GHE Cyrillic_DE Cyrillic_IE Cyrillic_IO Cyrillic_ZHE Cyrillic_ZE Cyrillic_I Cyrillic_SHORTI Cyrillic_KA Cyrillic_EL Cyrillic_EM Cyrillic_EN Cyrillic_O Cyrillic_PE Cyrillic_ER Cyrillic_ES Cyrillic_TE Cyrillic_U Cyrillic_EF Cyrillic_HA Cyrillic_TSE Cyrillic_CHE Cyrillic_SHA Cyrillic_SHCHA Cyrillic_HARDSIGN Cyrillic_YERU Cyrillic_SOFTSIGN Cyrillic_E Cyrillic_YU Cyrillic_YA]) ;;; Functions for changing the variables: (defun russian-set-font () "Prompt for new font encoding and set `russian-font-name' accordingly." (interactive) (let ((font-name (completing-read (concat "New russian font or a transliteration (default " russian-font-name "): ") russian-font-alist nil t))) (if (not (equal font-name "")) (setq russian-font-name font-name)))) (defun russian-set-buffer () "Prompt for new buffer encoding and set `russian-buffer-name' accordingly." (interactive) (let ((buffer-name (completing-read (concat "New encoding of a russian buffer (default " russian-buffer-name "): ") russian-buffer-alist nil t))) (if (not (equal buffer-name "")) (setq russian-buffer-name buffer-name)))) (defun russian-set-keyboard () "Prompt for new keyboard encoding and set `russian-keyboard-name' accordingly." (interactive) (let ((keyboard-name (completing-read (concat "New russian keyboard (default " russian-keyboard-name "): ") russian-keyboard-alist nil t))) (if (not (equal keyboard-name "")) (setq russian-keyboard-name keyboard-name)))) (defun russian-set-mode () "Prompt for new input encoding and set `russian-mode-name' accordingly." (interactive) (let ((mode-name (completing-read (concat "New russian insertion mode (default " russian-mode-name "): ") russian-mode-alist nil t))) (if (not (equal mode-name "")) (setq russian-mode-name mode-name)))) ;;; Working engine: (defun russian-univ-nth (string-or-list i) (if (listp string-or-list) (nth i string-or-list) (aref string-or-list i))) (defun russian-univ-vector (char-or-string) (if (stringp char-or-string) (apply 'vector (append char-or-string nil)) (vector char-or-string))) (defun russian-univ-append (char-or-string string-or-list) (if (listp string-or-list) (append (vector char-or-string) string-or-list) (concat (vector char-or-string) string-or-list))) (defvar russian-temporary-table nil) (defun russian-get-table (numeric display max long-from long-to) (let* ((from (eval (nth 1 long-from))) (to (eval (nth 1 long-to))) (from-filter (eval (nth 4 long-from))) (to-filter (eval (nth 5 long-to))) (use-assoc (eval (nth 6 long-from))) (from-filter-from (eval (nth 0 from-filter))) (from-filter-to (eval (nth 1 from-filter))) (to-filter-from (eval (nth 0 to-filter))) (to-filter-to (eval (nth 1 to-filter)))) (cond (numeric (progn (setq russian-temporary-table (make-string max ?a)) (let ((i 0)) (while (< i max) (aset russian-temporary-table i i) (setq i (+ 1 i)))))) (use-assoc (setq russian-temporary-table nil)) (t (setq russian-temporary-table (make-display-table)))) (let ((real-from from) (real-to to)) (cond ((and from-filter-from to-filter-from) (let ((i 0) j char1 char2 (flag t) (len1 (length from-filter-from)) (len2 (length to-filter-from))) (while (< i len1) (progn (setq char1 (aref from-filter-from i)) (setq char2 (aref from-filter-to i)) (setq j 0) (while (and flag (< j len2)) (if (equal char2 (aref to-filter-from j)) (progn (setq flag nil) (setq real-from (concat (vector char1) real-from)) (setq real-to (if numeric (concat (vector (aref to-filter-to j)) real-to) (russian-univ-append (russian-univ-nth to-filter-to j) real-to)))) (setq j (+ 1 j)))) (if flag (progn (setq real-from (concat (vector char1) real-from)) (setq real-to (if numeric (concat (vector char2) real-to) (russian-univ-append char2 real-to)))))) (setq i (+ 1 i))))) (from-filter-from (let ((i 0) (len1 (length from-filter-from))) (while (< i len1) (progn (setq real-from (concat (vector (aref from-filter-from i)) real-from)) (setq real-to (if numeric (concat (vector (aref from-filter-to i)) real-to) (russian-univ-append (russian-univ-nth from-filter-to i) real-to))) (setq i (+ 1 i)))))) (to-filter-to (let ((i 0) (len2 (length to-filter-to))) (while (< i len2) (progn (setq real-from (concat (vector (aref to-filter-from i)) real-from)) (setq real-to (if numeric (concat (vector (aref to-filter-to i)) real-to) (russian-univ-append (russian-univ-nth to-filter-to i) real-to))) (setq i (+ 1 i)))))) (t)) (let ((i 0) (len (length real-from)) char-from char-to) (while (< i len) (setq char-from (aref real-from i)) (setq char-to (if numeric (aref real-to i) (russian-univ-nth real-to i))) (if use-assoc (setq russian-temporary-table (cons (cons char-from char-to) russian-temporary-table)) (aset russian-temporary-table char-from char-to)) (setq i (+ 1 i)))) (if display (let ((i 0)) (while (< i max) (if (aref russian-temporary-table i) (aset russian-temporary-table i (russian-univ-vector (aref russian-temporary-table i)))) (setq i (+ 1 i)))))))) ;;; Display part: (defun russian-display (&optional arg) "*Display Russian text in the current buffer in encoding ARG. Prompt for encoding if called interactively. Use default encoding if no argument is given." (interactive "P") (if (interactive-p) (call-interactively 'russian-set-buffer) (if arg (setq russian-buffer-name arg))) (let* ((buffer-long (assoc russian-buffer-name russian-encoding-alist)) (font-long (assoc russian-font-name russian-encoding-alist))) (russian-get-table nil t 256 buffer-long font-long) (if (fboundp 'add-spec-to-specifier) ; xemacs (add-spec-to-specifier current-display-table russian-temporary-table (current-buffer)) (setq buffer-display-table russian-temporary-table)))) (defun russian-undisplay (&optional arg) "Obsolete. Use `russian-mode' with negative argument instead." (interactive) (russian-mode -1)) ;;; Translation: (defun russian-translate-string-list (start end translate-table) (save-excursion (narrow-to-region start end) (goto-char 1) (while (not (eobp)) (let* ((char (following-char)) (char-to (aref translate-table char))) (if char-to (progn (delete-char 1) (insert char-to)) (forward-char)))) (widen))) (defun russian-translate-decoding-rule (start end decoding-rule to) (save-excursion (let (decoding-list chars-moved char-to chars-erase flag-read-more flag-match char-read) (narrow-to-region start end) (goto-char 1) (while (not (eobp)) (setq decoding-list decoding-rule) (setq chars-moved 0) (setq char-to nil) (setq chars-erase 0) (setq flag-read-more t) (setq flag-match t) (setq char-read nil) (catch 'read-more (progn (setq flag-match t) (while (and flag-read-more (not (eobp))) (progn (setq flag-match t) (setq char-read (following-char)) (let ((i 1)) (progn (while (and flag-match (< i (length decoding-list))) (progn (if (eq char-read (car (nth i decoding-list))) (progn (setq flag-match nil) (setq decoding-list (cdr (nth i decoding-list))) (if (atom decoding-list) (progn (setq flag-read-more nil) (setq chars-erase (+ 1 chars-moved)) (setq char-to decoding-list) (throw 'read-more t)) (progn (setq decoding-list (car decoding-list)) (if (car decoding-list) (progn (setq char-to (car decoding-list)) (setq chars-erase (+ 1 chars-moved)))))))) (setq i (+ 1 i))) ) (if flag-match (setq flag-read-more nil)) (forward-char) (setq chars-moved (+ 1 chars-moved)))))))) (backward-char chars-moved) (delete-char chars-erase) (if char-to (if (vectorp char-to) (progn (let ((j 0) (n (length char-to))) (while (< j n) (let ((char (aref char-to j))) (cond ((> char 0) (insert (if (listp to) (nth (- char 1) to) (aref to (- char 1))))) ((< char 0) (insert (- char))) (t))) (setq j (+ 1 j))))) (cond ((> char-to 0) (progn (insert (if (listp to) (nth (- char-to 1) to) (aref to (- char-to 1)))))) ((< char-to 0) (insert (- char-to))) (t))) (or (eobp) (forward-char)))) (widen)))) (defun russian-translate-region (start end &optional non-interactive from-name to-name) "Translate Russian text in region from encoding FROM-NAME to TO-NAME. NON-INTERACTIVE should be t if used non-interactively, FROM-NAME and TO-NAME are strings, which must be valid encoding names." (interactive "r") (if (null non-interactive) (progn (setq from-name (completing-read "Translate from the encoding: " russian-encoding-alist nil t)) (setq to-name (completing-read "Translate to the encoding: " russian-encoding-alist nil t)))) (let* ((from-long (assoc from-name russian-encoding-alist)) (to-long (assoc to-name russian-encoding-alist)) (from (eval (nth 1 from-long))) (to (eval (nth 1 to-long)))) (if (listp from) (if (nth 4 to-long) (save-restriction (narrow-to-region start end) ;; why not make start and end markers instead of integers, and ;; simply pass them? (russian-translate-region (point-min) (point-max) t from-name russian-safe-encoding-name) (russian-translate-region (point-min) (point-max) t russian-safe-encoding-name to-name)) (let ((decoding-rule (eval (nth 3 from-long)))) (russian-translate-decoding-rule start end decoding-rule to))) (if (listp to) (progn (russian-get-table nil nil 256 from-long to-long) (russian-translate-string-list start end russian-temporary-table)) (progn (russian-get-table t nil 256 from-long to-long) (translate-region start end russian-temporary-table)))))) (defun russian-translate-buffer () "Translate Russian text in the whole buffer from one encoding to another." (interactive) (russian-translate-region (point-min) (point-max) nil)) ;;; Russian minor mode: (defconst russian-version 1.06 "The current version of russian.el") (defvar russian-mode nil) (make-variable-buffer-local 'russian-mode) (defconst russian-mode-default-string " Rus") (defvar russian-mode-string russian-mode-default-string "Name of `russian-mode' in the mode line") (make-variable-buffer-local 'russian-mode-string) (or (assq 'russian-mode minor-mode-alist) (setq minor-mode-alist (cons '(russian-mode russian-mode-string) minor-mode-alist))) (defvar russian-mode-hook nil "List of functions to run when `russian-mode' is turned on.") (defun russian-mode (&optional arg) "Display a Russian text in the current buffer. Unconditionally turn off if ARG is negative or zero, turn on with any other value of ARG. Prompt for the buffer encoding if called interactively with positive argument, use defaults otherwise." (interactive "P") (setq russian-mode (if (null arg) (not russian-mode) (> (prefix-numeric-value arg) 0))) (if russian-mode (progn (russian-set-default-encoding) (if (not (equal russian-buffer-name russian-mode-name)) (setq russian-mode-name russian-buffer-name)) (if (and (interactive-p) arg (> (prefix-numeric-value arg) 0)) (call-interactively 'russian-display) (russian-display)) (run-hooks 'russian-mode-hook)) (if russian-insertion-mode (russian-insertion-mode -1)) ;; we use (make-display-table) and not nil here, otherwise the global ;; table takes effect (if (fboundp 'add-spec-to-specifier) ; xemacs (add-spec-to-specifier current-display-table (make-display-table) (current-buffer)) (setq buffer-display-table (make-display-table))))) ;;; Russian insertion modes: (defvar russian-insertion-mode nil "Non-nil if `russian-insertion-mode' is enabled.") (make-variable-buffer-local 'russian-insertion-mode) ;;(put 'russian-insertion-mode 'permanent-local t) (defvar russian-insertion-mode-hook nil "List of functions to run when `russian-insertion-mode' is turned on.") (defvar russian-insertion-mode-string "") (make-variable-buffer-local 'russian-insertion-mode-string) (defvar russian-insertion-mode-map nil) (defvar russian-insertion-mode-table nil) (defvar russian-tables-applied nil "true if case and syntax tables were applied to buffer") (make-variable-buffer-local 'russian-tables-applied) (defvar russian-case-table-orig nil) (make-variable-buffer-local 'russian-case-table-orig) (defvar russian-syntax-table-orig nil) (make-variable-buffer-local 'russian-syntax-table-orig) (defvar russian-input-associations nil "true if use associations") (make-variable-buffer-local 'russian-input-associations) (defvar russian-safe-input-encodings '("alt" "8koi" "mac" "win-cp1251" "osn" "gostcii" "moshkov" "broken-8koi") "These input encodings can be switched on by default") (defconst russian-xemacs-p (string-match "XEmacs" emacs-version)) (defvar russian-bind-as-integer russian-xemacs-p "Intended to fix a bug with XEmacs. XEmacs can't use characters with code greater than 127 in define-key. So if russian-bind-as-integer is t when characters bind as integer and as characters otherwise") (require 'case-table) (if russian-xemacs-p (defun russian-set-case-syntax-pair (uc lc table) (let ((downcase (nth 0 table)) (upcase (nth 1 table))) (aset downcase uc lc) (aset upcase uc uc) (aset downcase lc lc) (aset upcase lc uc) (modify-syntax-entry uc "w " (standard-syntax-table)) (modify-syntax-entry lc "w " (standard-syntax-table))))) (defun russian-set-tables (&optional arg) "Sets case and syntax table for current buffer" (interactive "P") (if (and (interactive-p) (null arg)) (progn (setq russian-buffer-name nil) (call-interactively 'russian-set-buffer) (setq arg russian-buffer-name) )) (let (from) (if arg (progn ;; modify tables if ARG is not null (if (not russian-tables-applied) ;; save original syntax table (progn (setq russian-syntax-table-orig (copy-syntax-table)) (setq russian-case-table-orig (current-case-table)) (setq russian-tables-applied t) )) ;; construct tables (setq from (eval (nth 1 (assoc russian-buffer-name russian-encoding-alist)))) ;; from is a string of lowercase/uppercase alphabet ;; make case table (let ((size (/ (length from) 2))(char 0) UC LC (table (current-case-table))) (while (< char size) (setq LC (aref from char)) (setq UC (aref from (+ char size))) (if (fboundp 'russian-set-case-syntax-pair) ;; fixed version for XEmacs (russian-set-case-syntax-pair UC LC table) (set-case-syntax-pair UC LC table)) (setq char (+ char 1))) (set-case-table table)))) (if russian-tables-applied ;; restore original tables (progn ; (set-syntax-table russian-syntax-table-orig) ; (set-case-table russian-case-table-orig) (setq russian-tables-applied nil) (setq russian-syntax-table-orig nil) (setq russian-case-table-orig nil))))) (defun russian-insertion-mode (&optional arg) "If on, enable insertion of Russian characters in the current buffer. Toggle `russian-insertion-mode' without ARG. With ARG, turn russian insertion minor mode on if ARG is positive and prompt for the encoding if called interactively, turn it off otherwise." (interactive "P") (setq russian-insertion-mode (if (null arg) (not russian-insertion-mode) (> (prefix-numeric-value arg) 0))) (if russian-insertion-mode (progn (if (not russian-mode) (if (and (interactive-p) arg (> (prefix-numeric-value arg) 0)) (call-interactively 'russian-mode) (russian-mode 1))) (if (and (interactive-p) arg (> (prefix-numeric-value arg) 0)) (call-interactively 'russian-set-mode)) (if (not (equal russian-buffer-name russian-mode-name)) (progn (russian-display russian-buffer-name) (setq russian-mode-name russian-buffer-name))) (let* ((long-from (assoc russian-keyboard-name russian-encoding-alist)) (long-to (assoc russian-mode-name russian-encoding-alist)) (real-from (eval (nth 1 long-from))) (use-assoc (eval (nth 6 long-from)))) (setq russian-insertion-mode-map (make-sparse-keymap)) (russian-get-table nil nil 256 long-from long-to) (setq russian-insertion-mode-table russian-temporary-table) (if use-assoc (setq russian-input-associations (copy-list russian-temporary-table)) (setq russian-input-associations nil)) (let ((i 0) char) (while (<= i 255) (setq char (if use-assoc (nth i russian-insertion-mode-table) (aref russian-insertion-mode-table i))) (if char (if russian-xemacs-p (define-key russian-insertion-mode-map ;; xemacs (cond (use-assoc (aref real-from i)) ((or (not russian-bind-as-integer) (< i 127)) (char-to-string i)) (t i)) 'russian-perform-insertion) (define-key russian-insertion-mode-map ;; FSF Emacs (vector (cond (use-assoc (aref real-from i)) (t i))) 'russian-perform-insertion))) (setq i (1+ i)))) (russian-set-tables russian-mode-name) (setq russian-insertion-mode-string (concat "[" (nth 2 long-to) "]")) (run-hooks 'russian-insertion-mode-hook))) (setq russian-insertion-mode-string "") (russian-set-tables nil)) (setq russian-mode-string (concat russian-mode-default-string russian-insertion-mode-string)) (set-buffer-modified-p (buffer-modified-p)) (let ((i 0) test) (while (< i (length minor-mode-map-alist)) (setq test (nth i minor-mode-map-alist)) (if (eq 'russian-insertion-mode (car test)) (progn (setq minor-mode-map-alist (delq test minor-mode-map-alist)) (setq i (- i 1)))) (setq i (+ 1 i)))) (setq minor-mode-map-alist (cons (cons 'russian-insertion-mode russian-insertion-mode-map) minor-mode-map-alist))) (defun russian-perform-insertion () (interactive) (let (value) (if russian-input-associations (progn (setq value (cdr (assoc (if (fboundp 'event-key) (event-key last-command-event) (event-basic-type last-command-event)) russian-input-associations))) (if value (insert value))) (insert (aref russian-insertion-mode-table (if (fboundp 'event-to-character) (event-to-character last-command-event) last-command-event)))))) ;; Guess buffer encoding ;; based on Markov's chains (defvar russian-detect-chars 1000 "How many chars should be checked while detecting encoding") (defconst russian-detect-safe-encoding-list '("8koi" "win-cp1251" "alt" "mac" "osn" "gostcii") "List of encodings could be used while detecting buffer encoding") (defvar russian-detect-table [[ 0 549 743 1387 678 958 284 0 152 696 1103 9 841 396 1224 1641 945 1492 637 1745 1149 324 140 168 93 399 127 29 0 2 61 271 32 165] [ 1770 2 64 248 64 191 271 0 327 372 30 92 884 610 342 482 8 199 295 329 536 28 22 85 12 157 59 18 0 0 0 2 116 315] [ 250 119 0 4 0 1 162 0 0 0 74 0 5 48 4 18 282 0 91 4 2 134 0 4 0 0 0 11 4 179 4 0 0 32] [ 763 535 0 16 0 14 420 0 0 24 188 0 21 41 14 163 403 13 44 227 31 186 0 5 0 2 15 0 0 215 12 0 0 19] [ 347 145 0 4 0 110 29 0 0 0 105 0 28 100 1 2 414 0 178 0 4 49 0 0 2 8 0 0 0 0 0 0 0 0] [ 229 436 7 84 0 4 315 0 62 0 379 0 40 135 0 184 508 1 118 23 6 161 0 1 16 7 0 0 0 120 50 0 0 50] [ 1895 6 108 171 185 169 123 0 47 230 4 154 164 454 397 821 24 166 713 450 785 11 0 59 15 91 71 41 0 0 0 0 15 9] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 9 81 0 0 0 95 297 0 0 0 129 0 21 0 2 168 214 0 2 2 0 17 0 0 0 0 0 0 0 0 0 0 0 0] [ 243 490 22 223 16 65 51 0 2 0 28 0 23 9 174 202 90 0 12 2 0 70 0 0 0 0 0 0 0 93 14 0 0 21] [ 1794 12 24 157 90 86 373 0 53 178 83 120 188 182 301 494 24 56 59 324 608 1 72 133 56 130 77 20 0 0 0 0 53 230] [ 951 0 1 0 4 44 2 0 0 0 0 0 11 0 2 11 0 0 0 14 48 0 0 0 4 20 4 0 0 0 0 0 0 0] [ 455 653 0 65 1 0 86 0 19 4 337 0 330 51 0 34 1086 0 147 67 176 188 0 0 3 0 5 0 0 0 0 0 0 0] [ 105 383 2 0 10 2 431 0 22 2 444 0 32 8 0 85 410 2 0 20 5 119 1 0 0 5 4 1 0 108 742 0 91 360] [ 720 424 9 0 0 0 675 0 0 0 487 0 13 26 17 123 402 77 2 8 0 130 2 0 0 10 0 0 0 103 1 31 0 39] [ 227 1488 0 6 0 29 797 0 4 0 732 0 83 0 0 238 1559 0 2 42 180 184 3 2 40 28 4 16 0 517 149 16 10 192] [ 1887 2 308 525 304 573 148 0 215 171 65 475 210 742 475 649 8 124 1083 723 593 4 7 57 14 224 67 28 0 0 0 19 44 58] [ 41 174 0 0 0 0 274 0 0 0 185 0 25 106 0 3 824 4 522 0 92 101 0 0 0 2 0 0 0 18 11 0 0 50] [ 542 766 2 65 24 309 568 0 21 2 496 0 34 7 35 180 598 21 15 17 48 342 2 27 11 6 17 0 0 163 45 0 22 62] [ 221 154 7 133 0 15 435 0 2 0 200 0 275 232 40 207 369 200 11 72 1353 107 0 9 4 29 1 0 3 58 133 0 10 452] [ 1224 910 3 111 0 10 469 0 0 2 318 0 66 14 45 82 1356 1 386 337 5 131 0 0 2 7 4 0 0 244 836 0 0 48] [ 447 4 45 25 108 176 53 0 92 95 0 11 137 125 56 203 0 47 34 171 152 0 0 40 0 198 38 27 0 0 0 0 126 3] [ 13 108 0 0 0 0 25 0 0 0 29 0 0 6 2 0 21 0 42 0 0 7 4 0 0 0 0 0 0 2 0 0 0 0] [ 455 36 0 8 0 0 4 0 0 0 7 0 0 11 2 23 200 0 10 3 2 6 0 8 0 0 2 0 0 0 0 0 0 0] [ 42 60 0 12 0 0 76 0 0 0 135 0 3 0 0 0 5 0 0 0 0 12 0 0 292 0 0 0 0 18 0 0 0 0] [ 9 276 0 0 0 0 402 0 0 0 233 0 57 5 0 102 20 0 0 0 161 53 0 0 0 0 5 0 0 0 30 0 0 0] [ 50 91 0 4 0 0 188 0 0 0 105 0 34 18 0 16 32 0 2 0 6 20 0 0 0 0 27 0 0 0 71 0 0 0] [ 0 26 0 0 0 0 107 0 0 0 74 0 0 0 0 2 0 0 0 0 0 28 0 0 0 0 0 0 0 0 1 0 0 0] [ 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3] [ 692 0 18 109 22 13 173 0 2 1 0 218 55 44 134 9 0 22 27 45 44 0 0 153 0 12 47 0 0 0 0 0 0 0] [ 1319 0 12 0 2 2 47 0 0 49 6 0 97 0 11 352 0 2 0 79 9 0 2 0 24 5 90 0 0 0 0 0 12 40] [ 23 0 0 0 0 0 0 0 0 0 0 33 1 8 0 5 0 1 0 5 255 0 4 4 0 0 0 0 0 0 0 0 0 0] [ 292 0 49 0 0 27 0 0 6 1 0 0 4 0 2 6 0 0 4 2 136 0 0 0 2 4 0 31 0 0 0 0 2 0] [ 1425 0 8 14 18 43 89 0 11 23 2 4 25 16 19 43 0 4 11 21 225 0 0 22 65 9 0 16 0 0 0 0 35 60]] "Table of letters combination pairs") ;; detect list of possibly used encodings using markov's chains (defun russian-detect-region-encoding (&optional start end) "Tries to detect current buffer's encoding. Detection based on letter frequency analysis. Returns valid encodings list." (unless start (setq start (point-min))) (unless end (setq end (point-max))) (let (enc-list code-table code-table-list enc fmax freq table ch pch cch pcch found (count 0) (stop nil)) (setq enc-list russian-detect-safe-encoding-list) (setq code-table-list nil) (while enc-list (setq enc (car enc-list)) (russian-get-table nil nil 255 (assoc enc russian-encoding-alist) (assoc "unumeric" russian-encoding-alist)) ;; we've got encoding from specified to numbers, save it ;; it's name and initial value of score (setq code-table-list (append code-table-list (list (list enc russian-temporary-table '0)))) (setq enc-list (cdr enc-list))) (save-excursion (narrow-to-region start end) (goto-char 1) (setq pch 0) (while (and (not stop) (not (eobp))) (setq ch (following-char)) (setq code-table code-table-list) (setq found nil) (while code-table (setq table (nth 0 code-table)) (setq pcch (aref (nth 1 table) pch)) (setq pcch (if (null pcch) 0 (+ pcch 1))) (setq cch (aref (nth 1 table) ch)) (setq cch (if (null cch) 0 (+ cch 1))) (if (or (/= pcch 0) (/= cch 0)) (progn (setq found t) (setq freq (aref (aref russian-detect-table pcch) cch)) (if freq (setcar (cddr table) (+ freq (nth 2 table)))))) (setq code-table (cdr code-table))) (if found (progn (setq count (1+ count)) (if (and russian-detect-chars (>= count russian-detect-chars)) (setq stop t)))) (setq pch ch) (forward-char)) (widen)) (setq code-table code-table-list) (setq fmax (nth 2 (car code-table))) (while code-table (if (> (nth 2 (car code-table)) fmax) (progn (setq fmax (nth 2 (car code-table))) (setq freq fmax))) (setq code-table (cdr code-table))) (setq code-table code-table-list) (setq enc nil) (if (and fmax (> fmax 0)) (progn (while code-table (if (>= (* (nth 2 (car code-table)) 1.5) fmax) (setq enc (if enc (append enc (list (nth 0 (car code-table)))) (list (nth 0 (car code-table)))))) (setq code-table (cdr code-table))))) enc)) (defun russian-detect-encoding () "Detects buffer encoding. It calls 'russian-detect-region-encoding' to process selected region." (russian-detect-region-encoding (point-min) (point-max))) (defun russian-detect-or-get-encoding () (interactive) (let (encoding enc initial enc-list) (setq encoding (russian-detect-encoding)) (if (interactive-p) (if (/= (length encoding) 1) (progn (setq enc (if encoding encoding russian-detect-safe-encoding-list)) (setq initial (if (or (not encoding) (member russian-buffer-name encoding)) russian-buffer-name (car encoding))) (while enc (setq enc-list (if enc-list (append enc-list (list (list (car enc)))) (list (list (car enc))))) (setq enc (cdr enc))) (setq encoding (list (completing-read "encoding: " enc-list nil nil initial))))) (unless (= (length encoding) 1) ;; we weren't called interactively, and there are more than one ;; value in the list. I do not know what to do in such a situation!!! (if (or (= (length encoding) 0) (member russian-buffer-name encoding)) (setq encoding (list russian-buffer-name)) (setq encoding (list (car encoding)))))) encoding)) (defun russian-toggle-input (&optional arg) "Turn on russian encoding" (interactive "P") (if (or (and (null arg) (not russian-mode)) (and (numberp arg) (> arg 0)) (and (stringp arg) (member arg russian-buffer-default-list)) (equal arg t)) (progn (if (member arg russian-buffer-default-list) (setq russian-buffer-name arg) (if (not (russian-set-default-encoding)) (setq russian-buffer-name (car (if (interactive-p) (call-interactively 'russian-detect-or-get-encoding) (russian-detect-or-get-encoding)))))) (russian-mode 1) (if (member russian-mode-name russian-safe-input-encodings) (russian-insertion-mode 1))) (if (or (and (numberp arg) (< arg 0)) (not (member russian-mode-name russian-safe-input-encodings))) (russian-insertion-mode -1) (russian-insertion-mode)))) (defun russian-build-detect-table (&optional src-buffer dest-buffer input-encoding ignore-io append-table) "Builds table for character set autodetection, takes input buffer with some text, its encoding, and places the result to given buffer. If append-table is not nil then newly collected data are added to old table, else old data are overwritten. After table has build you can evaluate it (C-x C-e) and test it (russian-detect-or-get-encoding). And if the result satisfies you move newly created table to your .emacs" (interactive (list (read-buffer "Input buffer: " (current-buffer)) (read-buffer "Output buffer: " "*scratch*") (let ((value (completing-read (concat "encoding of a source buffer (default " russian-buffer-name "): ") russian-buffer-alist nil t))) (if (equal value "") russian-buffer-name value)) (yes-or-no-p-minibuf "Ignore russian IO? ") (yes-or-no-p-minibuf "Append data to main table? "))) (let (prior-ch current-ch code-table markov-table (processed 0)) (unless src-buffer (setq src-buffer (current-buffer))) (unless input-encoding (setq input-encoding russian-buffer-name)) (unless input-encoding (setq input-encoding "8koi")) (unless (assoc input-encoding russian-encoding-alist) (cerror "Unable to find such an encoding")) (russian-get-table nil nil 255 (assoc input-encoding russian-encoding-alist) (assoc "unumeric" russian-encoding-alist)) (setq code-table russian-temporary-table) (setq markov-table (make-vector 34 nil)) (if append-table (setq markov-table (vconcat russian-detect-table)) (setq prior-ch 0) (while (< prior-ch 34) (aset markov-table prior-ch (make-vector 34 0)) (setq prior-ch (1+ prior-ch)))) (save-excursion (set-buffer src-buffer) (goto-char (point-min)) (setq prior-ch 0) (while (not (eobp)) (setq current-ch (following-char)) (if (equal current-ch ?\n) (progn (setq processed (1+ processed)) (message (format "%d lines" processed)))) (setq current-ch (aref code-table current-ch)) ;; hack to process russian io as ie (setq current-ch (if current-ch (1+ current-ch) 0)) (if (and ignore-io (equal current-ch 7)) (setq current-ch 6)) (if (or (/= prior-ch 0) (/= current-ch 0)) (aset (aref markov-table prior-ch) current-ch (1+ (aref (aref markov-table prior-ch) current-ch)))) (setq prior-ch current-ch) (forward-char))) (set-buffer (or dest-buffer "*scratch*")) (goto-char (point-max)) (insert "(defvar russian-markov-table [\n") (if ignore-io (progn (setq current-ch 0) (while (< current-ch 34) (aset (aref markov-table current-ch) 7 (aref (aref markov-table current-ch) 6)) (setq current-ch (1+ current-ch))) (aset markov-table 7 (aref markov-table 6)))) (setq prior-ch 0) (while (< prior-ch 34) (insert " [") (setq current-ch 0) (while (< current-ch 34) (insert " " (format "%5d" (aref (aref markov-table prior-ch) current-ch))) (setq current-ch (1+ current-ch))) (insert "]") (setq prior-ch (1+ prior-ch)) (if (< prior-ch 34) (insert "\n"))) (insert "]\n" " \"Table of letters combination pairs\")\n")) (message "ok")) (provide 'russian) ;;; russian.el ends here