# -*- coding: latin1 -*-
"""
conv - Text conversion routines
===============================
The module conv.py contains several conversion routines for ASCII/EBCDIC
conversion and byte swapping.
Character set is Latin1 per default with cp37 as EBCDIC
and cp819 (ISO-8859-1) as extended ASCII code pages.
The following conversions are defined::
37 <=> 819
1047 <=> 819
1141 <=> 1252
"""
import sys
if sys.byteorder =='little':
UTF16_NATIVE = 'UTF_16_LE'
else:
UTF16_NATIVE = 'UTF_16_BE'
tt37_819 = \
b'\x00\x01\x02\x03\x9C\x09\x86\x7F\x97\x8D\x8E\x0B\x0C\x0D\x0E\x0F'\
b'\x10\x11\x12\x13\x9D\x85\x08\x87\x18\x19\x92\x8F\x1C\x1D\x1E\x1F'\
b'\x80\x81\x82\x83\x84\x0A\x17\x1B\x88\x89\x8A\x8B\x8C\x05\x06\x07'\
b'\x90\x91\x16\x93\x94\x95\x96\x04\x98\x99\x9A\x9B\x14\x15\x9E\x1A'\
b'\x20\xA0\xE2\xE4\xE0\xE1\xE3\xE5\xE7\xF1\xA2\x2E\x3C\x28\x2B\x7C'\
b'\x26\xE9\xEA\xEB\xE8\xED\xEE\xEF\xEC\xDF\x21\x24\x2A\x29\x3B\xAC'\
b'\x2D\x2F\xC2\xC4\xC0\xC1\xC3\xC5\xC7\xD1\xA6\x2C\x25\x5F\x3E\x3F'\
b'\xF8\xC9\xCA\xCB\xC8\xCD\xCE\xCF\xCC\x60\x3A\x23\x40\x27\x3D\x22'\
b'\xD8\x61\x62\x63\x64\x65\x66\x67\x68\x69\xAB\xBB\xF0\xFD\xFE\xB1'\
b'\xB0\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\xAA\xBA\xE6\xB8\xC6\xA4'\
b'\xB5\x7E\x73\x74\x75\x76\x77\x78\x79\x7A\xA1\xBF\xD0\xDD\xDE\xAE'\
b'\x5E\xA3\xA5\xB7\xA9\xA7\xB6\xBC\xBD\xBE\x5B\x5D\xAF\xA8\xB4\xD7'\
b'\x7B\x41\x42\x43\x44\x45\x46\x47\x48\x49\xAD\xF4\xF6\xF2\xF3\xF5'\
b'\x7D\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\xB9\xFB\xFC\xF9\xFA\xFF'\
b'\x5C\xF7\x53\x54\x55\x56\x57\x58\x59\x5A\xB2\xD4\xD6\xD2\xD3\xD5'\
b'\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\xB3\xDB\xDC\xD9\xDA\x9F'
tt819_37 = \
b'\x00\x01\x02\x03\x37\x2D\x2E\x2F\x16\x05\x25\x0B\x0C\x0D\x0E\x0F'\
b'\x10\x11\x12\x13\x3C\x3D\x32\x26\x18\x19\x3F\x27\x1C\x1D\x1E\x1F'\
b'\x40\x5A\x7F\x7B\x5B\x6C\x50\x7D\x4D\x5D\x5C\x4E\x6B\x60\x4B\x61'\
b'\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\x7A\x5E\x4C\x7E\x6E\x6F'\
b'\x7C\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6'\
b'\xD7\xD8\xD9\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xBA\xE0\xBB\xB0\x6D'\
b'\x79\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91\x92\x93\x94\x95\x96'\
b'\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xC0\x4F\xD0\xA1\x07'\
b'\x20\x21\x22\x23\x24\x15\x06\x17\x28\x29\x2A\x2B\x2C\x09\x0A\x1B'\
b'\x30\x31\x1A\x33\x34\x35\x36\x08\x38\x39\x3A\x3B\x04\x14\x3E\xFF'\
b'\x41\xAA\x4A\xB1\x9F\xB2\x6A\xB5\xBD\xB4\x9A\x8A\x5F\xCA\xAF\xBC'\
b'\x90\x8F\xEA\xFA\xBE\xA0\xB6\xB3\x9D\xDA\x9B\x8B\xB7\xB8\xB9\xAB'\
b'\x64\x65\x62\x66\x63\x67\x9E\x68\x74\x71\x72\x73\x78\x75\x76\x77'\
b'\xAC\x69\xED\xEE\xEB\xEF\xEC\xBF\x80\xFD\xFE\xFB\xFC\xAD\xAE\x59'\
b'\x44\x45\x42\x46\x43\x47\x9C\x48\x54\x51\x52\x53\x58\x55\x56\x57'\
b'\x8C\x49\xCD\xCE\xCB\xCF\xCC\xE1\x70\xDD\xDE\xDB\xDC\x8D\x8E\xDF'
# 1141 EBCDIC German, Austria with Euro to 1252 Windows Latin1
# substitution character is 0x1a
tt1141_1252 = \
b"\x00\x01\x02\x03\x1A\x09\x1A\x7F\x1A\x8D\x1A\x0B\x0C\x0D\x0E\x0F" \
b"\x10\x11\x12\x13\x9D\x1A\x08\x1A\x18\x19\x1A\x8F\x1C\x1D\x1E\x1F" \
b"\x1A\x81\x1A\x1A\x1A\x0A\x17\x1B\x1A\x1A\x1A\x1A\x1A\x05\x06\x07" \
b"\x90\x1A\x16\x1A\x1A\x1A\x1A\x04\x1A\x1A\x1A\x1A\x14\x15\x1A\x1A" \
b"\x20\xA0\xE2\x7B\xE0\xE1\xE3\xE5\xE7\xF1\xC4\x2E\x3C\x28\x2B\x21" \
b"\x26\xE9\xEA\xEB\xE8\xED\xEE\xEF\xEC\x7E\xDC\x24\x2A\x29\x3B\x5E" \
b"\x2D\x2F\xC2\x5B\xC0\xC1\xC3\xC5\xC7\xD1\xF6\x2C\x25\x5F\x3E\x3F" \
b"\xF8\xC9\xCA\xCB\xC8\xCD\xCE\xCF\xCC\x60\x3A\x23\xA7\x27\x3D\x22" \
b"\xD8\x61\x62\x63\x64\x65\x66\x67\x68\x69\xAB\xBB\xF0\xFD\xFE\xB1" \
b"\xB0\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\xAA\xBA\xE6\xB8\xC6\x80" \
b"\xB5\xDF\x73\x74\x75\x76\x77\x78\x79\x7A\xA1\xBF\xD0\xDD\xDE\xAE" \
b"\xA2\xA3\xA5\xB7\xA9\x40\xB6\xBC\xBD\xBE\xAC\x7C\xAF\xA8\xB4\xD7" \
b"\xE4\x41\x42\x43\x44\x45\x46\x47\x48\x49\xAD\xF4\xA6\xF2\xF3\xF5" \
b"\xFC\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\xB9\xFB\x7D\xF9\xFA\xFF" \
b"\xD6\xF7\x53\x54\x55\x56\x57\x58\x59\x5A\xB2\xD4\x5C\xD2\xD3\xD5" \
b"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\xB3\xDB\x5D\xD9\xDA\x1A"
# substitution character is 0x3f
tt1252_1141 = \
b"\x00\x01\x02\x03\x37\x2D\x2E\x2F\x16\x05\x25\x0B\x0C\x0D\x0E\x0F" \
b"\x10\x11\x12\x13\x3C\x3D\x32\x26\x18\x19\x3F\x27\x1C\x1D\x1E\x1F" \
b"\x40\x4F\x7F\x7B\x5B\x6C\x50\x7D\x4D\x5D\x5C\x4E\x6B\x60\x4B\x61" \
b"\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\x7A\x5E\x4C\x7E\x6E\x6F" \
b"\xB5\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6" \
b"\xD7\xD8\xD9\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\x63\xEC\xFC\x5F\x6D" \
b"\x79\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91\x92\x93\x94\x95\x96" \
b"\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\x43\xBB\xDC\x59\x07" \
b"\x9F\x21\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x09\x3F\x1B" \
b"\x30\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x3F\x14\x3F\x3F" \
b"\x41\xAA\xB0\xB1\x3F\xB2\xCC\x7C\xBD\xB4\x9A\x8A\xBA\xCA\xAF\xBC" \
b"\x90\x8F\xEA\xFA\xBE\xA0\xB6\xB3\x9D\xDA\x9B\x8B\xB7\xB8\xB9\xAB" \
b"\x64\x65\x62\x66\x4A\x67\x9E\x68\x74\x71\x72\x73\x78\x75\x76\x77" \
b"\xAC\x69\xED\xEE\xEB\xEF\xE0\xBF\x80\xFD\xFE\xFB\x5A\xAD\xAE\xA1" \
b"\x44\x45\x42\x46\xC0\x47\x9C\x48\x54\x51\x52\x53\x58\x55\x56\x57" \
b"\x8C\x49\xCD\xCE\xCB\xCF\x6A\xE1\x70\xDD\xDE\xDB\xD0\x8D\x8E\xDF"
# 0 1 2 3 4 5 6 7 8 9 A B C D E F
# Translation table from codepage 1047 swap NL,LF to 819
tt1047_819 = \
b"\x00\x01\x02\x03\x9C\x09\x86\x7F\x97\x8D\x8E\x0B\x0C\x0D\x0E\x0F" \
b"\x10\x11\x12\x13\x9D\x0A\x08\x87\x18\x19\x92\x8F\x1C\x1D\x1E\x1F" \
b"\x80\x81\x82\x83\x84\x85\x17\x1B\x88\x89\x8A\x8B\x8C\x05\x06\x07" \
b"\x90\x91\x16\x93\x94\x95\x96\x04\x98\x99\x9A\x9B\x14\x15\x9E\x1A" \
b"\x20\xA0\xE2\xE4\xE0\xE1\xE3\xE5\xE7\xF1\xA2\x2E\x3C\x28\x2B\x7C" \
b"\x26\xE9\xEA\xEB\xE8\xED\xEE\xEF\xEC\xDF\x21\x24\x2A\x29\x3B\x5E" \
b"\x2D\x2F\xC2\xC4\xC0\xC1\xC3\xC5\xC7\xD1\xA6\x2C\x25\x5F\x3E\x3F" \
b"\xF8\xC9\xCA\xCB\xC8\xCD\xCE\xCF\xCC\x60\x3A\x23\x40\x27\x3D\x22" \
b"\xD8\x61\x62\x63\x64\x65\x66\x67\x68\x69\xAB\xBB\xF0\xFD\xFE\xB1" \
b"\xB0\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\xAA\xBA\xE6\xB8\xC6\xA4" \
b"\xB5\x7E\x73\x74\x75\x76\x77\x78\x79\x7A\xA1\xBF\xD0\x5B\xDE\xAE" \
b"\xAC\xA3\xA5\xB7\xA9\xA7\xB6\xBC\xBD\xBE\xDD\xA8\xAF\x5D\xB4\xD7" \
b"\x7B\x41\x42\x43\x44\x45\x46\x47\x48\x49\xAD\xF4\xF6\xF2\xF3\xF5" \
b"\x7D\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\xB9\xFB\xFC\xF9\xFA\xFF" \
b"\x5C\xF7\x53\x54\x55\x56\x57\x58\x59\x5A\xB2\xD4\xD6\xD2\xD3\xD5" \
b"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\xB3\xDB\xDC\xD9\xDA\x9F" \
# 0 1 2 3 4 5 6 7 8 9 A B C D E F
# Translation table from codepage 819 to 1047 swap NL,LF
tt819_1047 = \
b"\x00\x01\x02\x03\x37\x2D\x2E\x2F\x16\x05\x15\x0B\x0C\x0D\x0E\x0F" \
b"\x10\x11\x12\x13\x3C\x3D\x32\x26\x18\x19\x3F\x27\x1C\x1D\x1E\x1F" \
b"\x40\x5A\x7F\x7B\x5B\x6C\x50\x7D\x4D\x5D\x5C\x4E\x6B\x60\x4B\x61" \
b"\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\x7A\x5E\x4C\x7E\x6E\x6F" \
b"\x7C\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6" \
b"\xD7\xD8\xD9\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xAD\xE0\xBD\x5F\x6D" \
b"\x79\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91\x92\x93\x94\x95\x96" \
b"\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xC0\x4F\xD0\xA1\x07" \
b"\x20\x21\x22\x23\x24\x25\x06\x17\x28\x29\x2A\x2B\x2C\x09\x0A\x1B" \
b"\x30\x31\x1A\x33\x34\x35\x36\x08\x38\x39\x3A\x3B\x04\x14\x3E\xFF" \
b"\x41\xAA\x4A\xB1\x9F\xB2\x6A\xB5\xBB\xB4\x9A\x8A\xB0\xCA\xAF\xBC" \
b"\x90\x8F\xEA\xFA\xBE\xA0\xB6\xB3\x9D\xDA\x9B\x8B\xB7\xB8\xB9\xAB" \
b"\x64\x65\x62\x66\x63\x67\x9E\x68\x74\x71\x72\x73\x78\x75\x76\x77" \
b"\xAC\x69\xED\xEE\xEB\xEF\xEC\xBF\x80\xFD\xFE\xFB\xFC\xBA\xAE\x59" \
b"\x44\x45\x42\x46\x43\x47\x9C\x48\x54\x51\x52\x53\x58\x55\x56\x57" \
b"\x8C\x49\xCD\xCE\xCB\xCF\xCC\xE1\x70\xDD\xDE\xDB\xDC\x8D\x8E\xDF" \
# 0 1 2 3 4 5 6 7 8 9 A B C D E F
# translation tables dictionary
ttdic = {(37,819): tt37_819, (819,37): tt819_37,
(1047,819): tt1047_819, (819,1047): tt819_1047,
(1141,1252): tt1141_1252, (1252,1141): tt1252_1141,
}
[docs]def asc2ebc(buf,start,stop,senco=819,tenco=37):
""" convert ASCII bytes in buffer to EBCDIC
:param buf: buffer
:param start: start offset in buf
:param stop: ending offset in buf
:param senco: source ASCII encoding. Supported are:
819 (Latin1 ISO-8859-1) or 1252 (Windows Latin1)
:param tenco: target EBCDIC encoding. Supported are:
37 (US EBCDIC Latin1) or 1141 (US EBCDIC with Euro)
"""
tt = ttdic[(senco,tenco)]
for i in range(start,stop):
buf[i] = tt[ord(buf[i])]
[docs]def ebc2asc(buf,start,stop,senco=37,tenco=819):
""" convert EBCDIC bytes in buffer to ASCII
:param buf: buffer
:param start: start offset in buf
:param stop: ending offset in buf
:param senco: source EBCDIC encoding. Supported code pages
37 (US EBCDIC Latin1) or 1141 (US EBCDIC with Euro)
:param tenco: target ASCII encoding. Supported code
819 (Latin1 ISO-8859-1) or 1252 (Windows Latin1)
"""
tt = ttdic[(senco,tenco)]
for i in range(start,stop):
buf[i] = tt[ord(buf[i])]
if sys.hexversion < 0x03010100:
# Python 2
import array # str2uni() uni2str()
import string
def ebc2str(istr,senco=37,tenco=819):
""" Translate characters string to target encoding
>>> ebc2str('\xc1\xc2\xc3') == 'ABC'
True
"""
tt = ttdic[(senco,tenco)]
return string.translate(istr,tt)
def str2ebc(str,senco=819,tenco=37):
""" Translate characters string to target encoding
>>> str2ebc('ABC') == '\xc1\xc2\xc3'
True
"""
tt = ttdic[(senco,tenco)]
return string.translate(str,tt)
def str2asc(str,senco=37,tenco=819):
tt = ttdic[(senco,tenco)]
return string.translate(str,tt)
def uni2str(u):
"convert unicode string to binary string"
return array.array('u', u).tostring()
def str2uni(s):
"""Convert binary string to unicode string.
The length of the input string s must be even otherwise
a ValueError exception is raised. """
return array.array('u', s).tounicode()
def swap(s):
"""swap string"""
a = array.array('c',s)
a.reverse()
return a.tostring()
else:
# Python 3 and higher
[docs] def str2ebc(istr,senco=819,tenco=37):
""" Translate bytes in bytes, bytearray or string
to target encoding
>>> str2ebc(b'ABC')
b'\xc1\xc2\xc3'
>>> abc=bytearray('ABC',encoding='Latin1')
>>> str2ebc(abc)
bytearray(b'\xc1\xc2\xc3')
>>> str2ebc('ABC')
b'\xc1\xc2\xc3'
"""
if type(istr) is bytes:
tt = ttdic[(senco,tenco)]
return bytes.translate(istr,tt)
elif type(istr) is bytearray:
tt = ttdic[(senco,tenco)]
return bytearray.translate(istr,tt)
else:
return istr.encode('cp%03d' % tenco)
[docs] def str2asc(istr,senco=37,tenco=819):
""" Translate bytes in bytes, bytearray or string
to target encoding and return bytes string
"""
if type(istr) in (bytes,bytearray):
tt = ttdic[(senco,tenco)]
return bytes.translate(istr,tt)
return istr.encode('cp%03d' % tenco)
def ebc2str(istr,senco=37,tenco=819):
return istr.decode('cp%03d' % senco)
[docs] def uni2str(u):
"""convert unicode string to bytes string
:param u: string
:returns: bytes string
"""
return u.encode('UTF16_NATIVE') # native utf-16 w/o byte-order mark (BOM)
[docs] def str2uni(b):
"""Convert bytes string to unicode string.
The length of the input string s must be even otherwise
a ValueError exception is raised. """
return b.decode('UTF16_NATIVE') # native utf-16 w/o byte-order mark (BOM)
[docs] def swap(s):
"""swap string, bytes or bytesarray"""
# print( type(s), s)
if type(s) == bytes:
ba=bytearray(s)
ba.reverse() # operates by side-effect: does not return result
return bytes(ba) # ... hence separate return statement
elif type(s) == bytearray:
ba=bytearray(s)
ba.reverse() # operates by side-effect: does not return result
return ba # ... hence separate return statement
else:
ss = []
for c in s:
ss.insert(0,c)
return ''.join(ss)
if __name__ == "__main__":
import doctest
doctest.testmod()
# Copyright 2004-ThisYear Software AG
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# $Date: 2023-12-01 00:54:33 +0100 (Fri, 01 Dec 2023) $
# $Rev: 1072 $