Source code for adapya.base.recordio

"""
recordio - read and write records
=================================

The module recordio contains functions to read or write records with special
structures (variable, variable blocked):

    - RDW   records preceded by a record descriptor word

    - BDW   variable records blocked

    - EXCL4 records preceded by a 4 bytes exclusive record length
            in native byte-order)

"""
from __future__ import print_function          # PY3
from io import BytesIO
import os
import sys
from adapya.base.defs import Abuf
from adapya.base.dump import dump
from adapya.base.datamap import Datamap, Uint2, Uint1, Uint4, NETWORKBO


SEGALL = 0
SEGFIRST = 1
SEGLAST = 2
SEGMIDDLE = 3

def segmenttype(seg):
    if seg > 3:
        raise BaseException('invalid segment type %02X' % seg)
    return ['complete record','first segment',
            'last segment','middle segment'][seg]


rdw =  Datamap('RDW',
    # RDW Record descriptor word
    Uint2('rlen'), # Record length
    Uint1('seg'), # Segment control
    Uint1('seg2'), # Segment control 2 (unused)
    byteOrder=NETWORKBO,
    )

excl4 =  Datamap('EXCL4',
    # 4 Byte Excluded length Record
    Uint4('rlen'), # Record length
    )

# used for writing
wrdw =  Datamap('RDW',
    # RDW Record descriptor word
    Uint2('rlen'), # Record length
    Uint1('seg'), # Segment control
    Uint1('seg2'), # Segment control 2 (unused)
    byteOrder=NETWORKBO,
    buffer=Abuf(4),
    )

isnp =  Datamap('ISN_prefix',
    Uint4('isn'), # Segment descriptor
    byteOrder=NETWORKBO,
    buffer=Abuf(4)
    )


[docs]def readrec(f,recform='',dumphdr='',numrec=0,skiprec=0, ecodec='cp037',debug=0,into=0): """ readrec - Generator function to read records with special record format specified in recform :param f: filehandle of open file :param recform: record format to process - 'RDW' variable record format (2 bytes length, Network byte order) return record without RDW header (exclusive) - 'RDW+' variable record format (2 bytes length, Network byte order) return record including RDW header Note: for segmented records rlen in RDW header is length of first segment and not the whole record - 'BDW' variable record blocked: input includes Block Descriptor Word which is skipped return record without RDW header (exclusive) - 'BDW+' same as BDW but return record including RDW header - 'EXCL4' exclusive 4 bytes length, native byte order :param dumphdr: header text of record; if not empty: prints record :param ecodec: Ebcdic codec for character interpretation when dumping records :param debug: 1 - print RDW information :param into: 1 - read into and return modifyable bytearray rather that bytes (only for non segemented RDW files) Example usage:: >> for rec in readrec(f,recform='RDW',dumphdr='my_records'): >> process(rec) """ V = 1 # variable records VB = 2 # variable blocked includes variable WITH_RDW = 4 # return record with RDW header bu = None # BytesIO object recfm=0 block_rlen = 0 if recform.startswith('RDW'): recfm = V elif recform.startswith('BDW'): recfm = VB if recform.endswith('+'): recfm |= WITH_RDW if recfm & (V|VB): i = 0 # counting complete/logical records while i < skiprec: # skipping records loop rdws = f.read(4) if len(rdws)<4: return rdw.buffer=rdws # use rdws as underlying buffer rlen = rdw.rlen if rlen > 0x7fff: dwtype='record' if recfm & VB and block_rlen==0: dwtype='block' raise BaseException('Invalid %s length %s exceeds 32k-1 in record %d' % (dwtype, rlen, i+1)) if recfm & VB: if block_rlen > 4: # reduce remaining block length by record len block_rlen -= rlen else: # need to consume BDW block header 4 bytes block_rlen = rlen if debug & 1: dump(rdws[0:4],header='Block Descriptor Word') continue # need to read RDW if rdw.rlen > 4: # it's a record record = f.seek(rdw.rlen-4, os.SEEK_CUR) # skip record if rdw.seg in (SEGFIRST, SEGMIDDLE): if debug&1: print('Skipping %s len(%04x) in logical record %d'%( segmenttype(rdw.seg), rdw.rlen, i)) continue # only count last or unsegmented records i += 1 if debug & 1: print('Skipping %s len(%04x) in logical record %d'%( segmenttype(rdw.seg), rdw.rlen, i)) maxrec = skiprec + numrec while 1: rdws = f.read(4) if len(rdws)<4: return rdw.buffer=rdws # use rdws as underlying buffer rlen = rdw.rlen if rlen > 0x7fff: dwtype='record' if recfm & VB and block_rlen==0: dwtype='block' raise BaseException('Invalid %s length %s exceeds 32k-1 in record %d' % (dwtype, rlen, i+1)) if recfm & VB: if block_rlen > 4: # still records in block block_rlen -= rlen else: # need to consume BDW block header 4 bytes block_rlen = rlen if debug & 1: dump(rdws[0:4],header='Block Descriptor Word') continue # need to read RDW if rlen < 5: # empty record yield b'' if debug&1: print('Reading %s len(%04x) in logical record %d'%( segmenttype(rdw.seg), rdw.rlen, i+1)) if rdw.seg : # copy any segmented record to buffer if rdw.seg == SEGFIRST: # first segment bu=BytesIO() if recform.endswith('+'): # record to include RDW bu.write(rdws) bu.write(f.read(rlen-4)) if rdw.seg == SEGLAST: # last segment record = bu.getvalue() # return collected segments if dumphdr: dump( record, header='\n%s: %d, total length %04X'%( dumphdr,i+1,len(record)),ecodec=ecodec ) yield record else: continue # do not count numrec for first/middle segment else: # record is not segmented i.e. complete if recform.endswith('+'): # record to include RDW f.seek(-4, os.SEEK_CUR) # rewind to record start else: rlen -= 4 if into: record = bytearray(rlen) f.readinto(record) else: record = f.read(rlen) if dumphdr: rdwx = ' (%04X,%04X)' %(rdw.rlen, rdw.seg) dump( record, header='\n%s: %d%s'%(dumphdr,i+1,rdwx),ecodec=ecodec ) yield record i += 1 if numrec and i > maxrec: return # while loop elif recform == 'EXCL4': for i in range(skiprec): e4s = f.read(4) if len(e4s)<4: return excl4.buffer=e4s # use rdws as underlying buffer if excl4.rlen > 4: # record record = f.seek(excl4.rlen, os.SEEK_CUR) # skip record i = skiprec # i is total record count starting from 1 maxrec = skiprec+numrec while 1: i += 1 if numrec and i > maxrec: return e4s = f.read(4) if len(e4s)<4: return excl4.buffer=e4s # use rdws as underlying buffer rlen = excl4.rlen if rlen < 1: # empty record yield b'' else: record = f.read(rlen) if dumphdr: dump( record, header='\n%s: %d'%(dumphdr,i),ecodec=ecodec ) yield record # while loop elif recform=='': # textfile, read data is str for i in range(skiprec): record = f.readline() if record == '': # end of file return i = skiprec # i is total record count starting from 1 maxrec = skiprec+numrec while 1: i += 1 if numrec and i > maxrec: return # all processed else: record = f.readline() if record == '': # not b'' return # end of file record = record.rstrip() # remove trailing whitespace and newline if dumphdr: dump( record, header='\n%s: %d'%(dumphdr,i),ecodec=ecodec ) yield record # while loop else: raise BaseException('Invalid recform %r specified' % recform)
[docs]def writerec(f, record, isn=None, recform=''): """ writerec - function to write records with special record format :param f: filehandle of open file :param record: record string/bytearray to be written :param isn: prefix record with an 4 byte integer in Network byte order :param recform: record format to process:: - 'RDW' variable record format: 2 bytes length, 2 bytes emtpy in Network byte order """ if recform == 'RDW': if isn is None: wrdw.rlen = len(record)+4 else: # include ISN as 4 bytes prefix wrdw.rlen = len(record)+8 f.write(wrdw.buffer) if isn is not None: isnp.isn=isn f.write(isnp.buffer) f.write(record) else: raise BaseException('Invalid recform %r specified' % recform)
if __name__ == "__main__": import sys # import doctest # doctest.testmod() print(__doc__) print('\n===> %s has no main section - do not execute it directly! <===\n' % __file__) sys.exit(99) # Copyright 2004-ThisYear Software AG # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.