Source code for adapya.base.conv

# -*- coding: latin1 -*-
"""
conv - Text conversion routines
===============================

The module conv.py contains several conversion routines for ASCII/EBCDIC
conversion and byte swapping.

Character set is Latin1 per default with cp37 as EBCDIC
and cp819 (ISO-8859-1) as extended ASCII code pages.

The following conversions are defined::

     37 <=> 819
   1047 <=> 819
   1141 <=> 1252

"""
import sys

if sys.byteorder =='little':
    UTF16_NATIVE = 'UTF_16_LE'
else:
    UTF16_NATIVE = 'UTF_16_BE'

tt37_819 = \
    b'\x00\x01\x02\x03\x9C\x09\x86\x7F\x97\x8D\x8E\x0B\x0C\x0D\x0E\x0F'\
    b'\x10\x11\x12\x13\x9D\x85\x08\x87\x18\x19\x92\x8F\x1C\x1D\x1E\x1F'\
    b'\x80\x81\x82\x83\x84\x0A\x17\x1B\x88\x89\x8A\x8B\x8C\x05\x06\x07'\
    b'\x90\x91\x16\x93\x94\x95\x96\x04\x98\x99\x9A\x9B\x14\x15\x9E\x1A'\
    b'\x20\xA0\xE2\xE4\xE0\xE1\xE3\xE5\xE7\xF1\xA2\x2E\x3C\x28\x2B\x7C'\
    b'\x26\xE9\xEA\xEB\xE8\xED\xEE\xEF\xEC\xDF\x21\x24\x2A\x29\x3B\xAC'\
    b'\x2D\x2F\xC2\xC4\xC0\xC1\xC3\xC5\xC7\xD1\xA6\x2C\x25\x5F\x3E\x3F'\
    b'\xF8\xC9\xCA\xCB\xC8\xCD\xCE\xCF\xCC\x60\x3A\x23\x40\x27\x3D\x22'\
    b'\xD8\x61\x62\x63\x64\x65\x66\x67\x68\x69\xAB\xBB\xF0\xFD\xFE\xB1'\
    b'\xB0\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\xAA\xBA\xE6\xB8\xC6\xA4'\
    b'\xB5\x7E\x73\x74\x75\x76\x77\x78\x79\x7A\xA1\xBF\xD0\xDD\xDE\xAE'\
    b'\x5E\xA3\xA5\xB7\xA9\xA7\xB6\xBC\xBD\xBE\x5B\x5D\xAF\xA8\xB4\xD7'\
    b'\x7B\x41\x42\x43\x44\x45\x46\x47\x48\x49\xAD\xF4\xF6\xF2\xF3\xF5'\
    b'\x7D\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\xB9\xFB\xFC\xF9\xFA\xFF'\
    b'\x5C\xF7\x53\x54\x55\x56\x57\x58\x59\x5A\xB2\xD4\xD6\xD2\xD3\xD5'\
    b'\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\xB3\xDB\xDC\xD9\xDA\x9F'

tt819_37 = \
    b'\x00\x01\x02\x03\x37\x2D\x2E\x2F\x16\x05\x25\x0B\x0C\x0D\x0E\x0F'\
    b'\x10\x11\x12\x13\x3C\x3D\x32\x26\x18\x19\x3F\x27\x1C\x1D\x1E\x1F'\
    b'\x40\x5A\x7F\x7B\x5B\x6C\x50\x7D\x4D\x5D\x5C\x4E\x6B\x60\x4B\x61'\
    b'\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\x7A\x5E\x4C\x7E\x6E\x6F'\
    b'\x7C\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6'\
    b'\xD7\xD8\xD9\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xBA\xE0\xBB\xB0\x6D'\
    b'\x79\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91\x92\x93\x94\x95\x96'\
    b'\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xC0\x4F\xD0\xA1\x07'\
    b'\x20\x21\x22\x23\x24\x15\x06\x17\x28\x29\x2A\x2B\x2C\x09\x0A\x1B'\
    b'\x30\x31\x1A\x33\x34\x35\x36\x08\x38\x39\x3A\x3B\x04\x14\x3E\xFF'\
    b'\x41\xAA\x4A\xB1\x9F\xB2\x6A\xB5\xBD\xB4\x9A\x8A\x5F\xCA\xAF\xBC'\
    b'\x90\x8F\xEA\xFA\xBE\xA0\xB6\xB3\x9D\xDA\x9B\x8B\xB7\xB8\xB9\xAB'\
    b'\x64\x65\x62\x66\x63\x67\x9E\x68\x74\x71\x72\x73\x78\x75\x76\x77'\
    b'\xAC\x69\xED\xEE\xEB\xEF\xEC\xBF\x80\xFD\xFE\xFB\xFC\xAD\xAE\x59'\
    b'\x44\x45\x42\x46\x43\x47\x9C\x48\x54\x51\x52\x53\x58\x55\x56\x57'\
    b'\x8C\x49\xCD\xCE\xCB\xCF\xCC\xE1\x70\xDD\xDE\xDB\xDC\x8D\x8E\xDF'

# 1141 EBCDIC German, Austria with Euro to 1252 Windows Latin1
# substitution character is 0x1a
tt1141_1252 = \
    b"\x00\x01\x02\x03\x1A\x09\x1A\x7F\x1A\x8D\x1A\x0B\x0C\x0D\x0E\x0F" \
    b"\x10\x11\x12\x13\x9D\x1A\x08\x1A\x18\x19\x1A\x8F\x1C\x1D\x1E\x1F" \
    b"\x1A\x81\x1A\x1A\x1A\x0A\x17\x1B\x1A\x1A\x1A\x1A\x1A\x05\x06\x07" \
    b"\x90\x1A\x16\x1A\x1A\x1A\x1A\x04\x1A\x1A\x1A\x1A\x14\x15\x1A\x1A" \
    b"\x20\xA0\xE2\x7B\xE0\xE1\xE3\xE5\xE7\xF1\xC4\x2E\x3C\x28\x2B\x21" \
    b"\x26\xE9\xEA\xEB\xE8\xED\xEE\xEF\xEC\x7E\xDC\x24\x2A\x29\x3B\x5E" \
    b"\x2D\x2F\xC2\x5B\xC0\xC1\xC3\xC5\xC7\xD1\xF6\x2C\x25\x5F\x3E\x3F" \
    b"\xF8\xC9\xCA\xCB\xC8\xCD\xCE\xCF\xCC\x60\x3A\x23\xA7\x27\x3D\x22" \
    b"\xD8\x61\x62\x63\x64\x65\x66\x67\x68\x69\xAB\xBB\xF0\xFD\xFE\xB1" \
    b"\xB0\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\xAA\xBA\xE6\xB8\xC6\x80" \
    b"\xB5\xDF\x73\x74\x75\x76\x77\x78\x79\x7A\xA1\xBF\xD0\xDD\xDE\xAE" \
    b"\xA2\xA3\xA5\xB7\xA9\x40\xB6\xBC\xBD\xBE\xAC\x7C\xAF\xA8\xB4\xD7" \
    b"\xE4\x41\x42\x43\x44\x45\x46\x47\x48\x49\xAD\xF4\xA6\xF2\xF3\xF5" \
    b"\xFC\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\xB9\xFB\x7D\xF9\xFA\xFF" \
    b"\xD6\xF7\x53\x54\x55\x56\x57\x58\x59\x5A\xB2\xD4\x5C\xD2\xD3\xD5" \
    b"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\xB3\xDB\x5D\xD9\xDA\x1A"

# substitution character is 0x3f
tt1252_1141 = \
    b"\x00\x01\x02\x03\x37\x2D\x2E\x2F\x16\x05\x25\x0B\x0C\x0D\x0E\x0F" \
    b"\x10\x11\x12\x13\x3C\x3D\x32\x26\x18\x19\x3F\x27\x1C\x1D\x1E\x1F" \
    b"\x40\x4F\x7F\x7B\x5B\x6C\x50\x7D\x4D\x5D\x5C\x4E\x6B\x60\x4B\x61" \
    b"\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\x7A\x5E\x4C\x7E\x6E\x6F" \
    b"\xB5\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6" \
    b"\xD7\xD8\xD9\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\x63\xEC\xFC\x5F\x6D" \
    b"\x79\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91\x92\x93\x94\x95\x96" \
    b"\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\x43\xBB\xDC\x59\x07" \
    b"\x9F\x21\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x09\x3F\x1B" \
    b"\x30\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x14\x3F\x3F" \
    b"\x41\xAA\xB0\xB1\x3F\xB2\xCC\x7C\xBD\xB4\x9A\x8A\xBA\xCA\xAF\xBC" \
    b"\x90\x8F\xEA\xFA\xBE\xA0\xB6\xB3\x9D\xDA\x9B\x8B\xB7\xB8\xB9\xAB" \
    b"\x64\x65\x62\x66\x4A\x67\x9E\x68\x74\x71\x72\x73\x78\x75\x76\x77" \
    b"\xAC\x69\xED\xEE\xEB\xEF\xE0\xBF\x80\xFD\xFE\xFB\x5A\xAD\xAE\xA1" \
    b"\x44\x45\x42\x46\xC0\x47\x9C\x48\x54\x51\x52\x53\x58\x55\x56\x57" \
    b"\x8C\x49\xCD\xCE\xCB\xCF\x6A\xE1\x70\xDD\xDE\xDB\xD0\x8D\x8E\xDF"
#      0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F

# Translation table from codepage 1047 swap NL,LF to 819
tt1047_819 = \
    b"\x00\x01\x02\x03\x9C\x09\x86\x7F\x97\x8D\x8E\x0B\x0C\x0D\x0E\x0F" \
    b"\x10\x11\x12\x13\x9D\x0A\x08\x87\x18\x19\x92\x8F\x1C\x1D\x1E\x1F" \
    b"\x80\x81\x82\x83\x84\x85\x17\x1B\x88\x89\x8A\x8B\x8C\x05\x06\x07" \
    b"\x90\x91\x16\x93\x94\x95\x96\x04\x98\x99\x9A\x9B\x14\x15\x9E\x1A" \
    b"\x20\xA0\xE2\xE4\xE0\xE1\xE3\xE5\xE7\xF1\xA2\x2E\x3C\x28\x2B\x7C" \
    b"\x26\xE9\xEA\xEB\xE8\xED\xEE\xEF\xEC\xDF\x21\x24\x2A\x29\x3B\x5E" \
    b"\x2D\x2F\xC2\xC4\xC0\xC1\xC3\xC5\xC7\xD1\xA6\x2C\x25\x5F\x3E\x3F" \
    b"\xF8\xC9\xCA\xCB\xC8\xCD\xCE\xCF\xCC\x60\x3A\x23\x40\x27\x3D\x22" \
    b"\xD8\x61\x62\x63\x64\x65\x66\x67\x68\x69\xAB\xBB\xF0\xFD\xFE\xB1" \
    b"\xB0\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\xAA\xBA\xE6\xB8\xC6\xA4" \
    b"\xB5\x7E\x73\x74\x75\x76\x77\x78\x79\x7A\xA1\xBF\xD0\x5B\xDE\xAE" \
    b"\xAC\xA3\xA5\xB7\xA9\xA7\xB6\xBC\xBD\xBE\xDD\xA8\xAF\x5D\xB4\xD7" \
    b"\x7B\x41\x42\x43\x44\x45\x46\x47\x48\x49\xAD\xF4\xF6\xF2\xF3\xF5" \
    b"\x7D\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\xB9\xFB\xFC\xF9\xFA\xFF" \
    b"\x5C\xF7\x53\x54\x55\x56\x57\x58\x59\x5A\xB2\xD4\xD6\xD2\xD3\xD5" \
    b"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\xB3\xDB\xDC\xD9\xDA\x9F" \
#      0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F

# Translation table from codepage 819 to 1047 swap NL,LF
tt819_1047 = \
    b"\x00\x01\x02\x03\x37\x2D\x2E\x2F\x16\x05\x15\x0B\x0C\x0D\x0E\x0F" \
    b"\x10\x11\x12\x13\x3C\x3D\x32\x26\x18\x19\x3F\x27\x1C\x1D\x1E\x1F" \
    b"\x40\x5A\x7F\x7B\x5B\x6C\x50\x7D\x4D\x5D\x5C\x4E\x6B\x60\x4B\x61" \
    b"\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\x7A\x5E\x4C\x7E\x6E\x6F" \
    b"\x7C\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6" \
    b"\xD7\xD8\xD9\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xAD\xE0\xBD\x5F\x6D" \
    b"\x79\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91\x92\x93\x94\x95\x96" \
    b"\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xC0\x4F\xD0\xA1\x07" \
    b"\x20\x21\x22\x23\x24\x25\x06\x17\x28\x29\x2A\x2B\x2C\x09\x0A\x1B" \
    b"\x30\x31\x1A\x33\x34\x35\x36\x08\x38\x39\x3A\x3B\x04\x14\x3E\xFF" \
    b"\x41\xAA\x4A\xB1\x9F\xB2\x6A\xB5\xBB\xB4\x9A\x8A\xB0\xCA\xAF\xBC" \
    b"\x90\x8F\xEA\xFA\xBE\xA0\xB6\xB3\x9D\xDA\x9B\x8B\xB7\xB8\xB9\xAB" \
    b"\x64\x65\x62\x66\x63\x67\x9E\x68\x74\x71\x72\x73\x78\x75\x76\x77" \
    b"\xAC\x69\xED\xEE\xEB\xEF\xEC\xBF\x80\xFD\xFE\xFB\xFC\xBA\xAE\x59" \
    b"\x44\x45\x42\x46\x43\x47\x9C\x48\x54\x51\x52\x53\x58\x55\x56\x57" \
    b"\x8C\x49\xCD\xCE\xCB\xCF\xCC\xE1\x70\xDD\xDE\xDB\xDC\x8D\x8E\xDF" \
#      0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F

# translation tables dictionary
ttdic = {(37,819): tt37_819, (819,37): tt819_37,
         (1047,819): tt1047_819, (819,1047): tt819_1047,
         (1141,1252): tt1141_1252, (1252,1141): tt1252_1141,
        }

[docs]def asc2ebc(buf,start,stop,senco=819,tenco=37): """ convert ASCII bytes in buffer to EBCDIC :param buf: buffer :param start: start offset in buf :param stop: ending offset in buf :param senco: source ASCII encoding. Supported are: 819 (Latin1 ISO-8859-1) or 1252 (Windows Latin1) :param tenco: target EBCDIC encoding. Supported are: 37 (US EBCDIC Latin1) or 1141 (US EBCDIC with Euro) """ tt = ttdic[(senco,tenco)] for i in range(start,stop): buf[i] = tt[ord(buf[i])]
[docs]def ebc2asc(buf,start,stop,senco=37,tenco=819): """ convert EBCDIC bytes in buffer to ASCII :param buf: buffer :param start: start offset in buf :param stop: ending offset in buf :param senco: source EBCDIC encoding. Supported code pages 37 (US EBCDIC Latin1) or 1141 (US EBCDIC with Euro) :param tenco: target ASCII encoding. Supported code 819 (Latin1 ISO-8859-1) or 1252 (Windows Latin1) """ tt = ttdic[(senco,tenco)] for i in range(start,stop): buf[i] = tt[ord(buf[i])]
if sys.hexversion < 0x03010100: # Python 2 import array # str2uni() uni2str() import string def ebc2str(istr,senco=37,tenco=819): """ Translate characters string to target encoding >>> ebc2str('\xc1\xc2\xc3') == 'ABC' True """ tt = ttdic[(senco,tenco)] return string.translate(istr,tt) def str2ebc(str,senco=819,tenco=37): """ Translate characters string to target encoding >>> str2ebc('ABC') == '\xc1\xc2\xc3' True """ tt = ttdic[(senco,tenco)] return string.translate(str,tt) def str2asc(str,senco=37,tenco=819): tt = ttdic[(senco,tenco)] return string.translate(str,tt) def uni2str(u): "convert unicode string to binary string" return array.array('u', u).tostring() def str2uni(s): """Convert binary string to unicode string. The length of the input string s must be even otherwise a ValueError exception is raised. """ return array.array('u', s).tounicode() def swap(s): """swap string""" a = array.array('c',s) a.reverse() return a.tostring() else: # Python 3 and higher
[docs] def str2ebc(istr,senco=819,tenco=37): """ Translate bytes in bytes, bytearray or string to target encoding >>> str2ebc(b'ABC') b'\xc1\xc2\xc3' >>> abc=bytearray('ABC',encoding='Latin1') >>> str2ebc(abc) bytearray(b'\xc1\xc2\xc3') >>> str2ebc('ABC') b'\xc1\xc2\xc3' """ if type(istr) is bytes: tt = ttdic[(senco,tenco)] return bytes.translate(istr,tt) elif type(istr) is bytearray: tt = ttdic[(senco,tenco)] return bytearray.translate(istr,tt) else: return istr.encode('cp%03d' % tenco)
[docs] def str2asc(istr,senco=37,tenco=819): """ Translate bytes in bytes, bytearray or string to target encoding and return bytes string """ if type(istr) in (bytes,bytearray): tt = ttdic[(senco,tenco)] return bytes.translate(istr,tt) return istr.encode('cp%03d' % tenco)
def ebc2str(istr,senco=37,tenco=819): return istr.decode('cp%03d' % senco)
[docs] def uni2str(u): """convert unicode string to bytes string :param u: string :returns: bytes string """ return u.encode('UTF16_NATIVE') # native utf-16 w/o byte-order mark (BOM)
[docs] def str2uni(b): """Convert bytes string to unicode string. The length of the input string s must be even otherwise a ValueError exception is raised. """ return b.decode('UTF16_NATIVE') # native utf-16 w/o byte-order mark (BOM)
[docs] def swap(s): """swap string, bytes or bytesarray""" # print( type(s), s) if type(s) == bytes: ba=bytearray(s) ba.reverse() # operates by side-effect: does not return result return bytes(ba) # ... hence separate return statement elif type(s) == bytearray: ba=bytearray(s) ba.reverse() # operates by side-effect: does not return result return ba # ... hence separate return statement else: ss = [] for c in s: ss.insert(0,c) return ''.join(ss)
if __name__ == "__main__": import doctest doctest.testmod() # Copyright 2004-ThisYear Software AG # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # $Date: 2023-12-01 00:54:33 +0100 (Fri, 01 Dec 2023) $ # $Rev: 1072 $