Source code for GSASII.imports.G2pwd_MIDAS

# -*- coding: utf-8 -*-
'''Import a collection of "lineouts" from MIDAS from a zarr zip file
'''
# TODO: radius to be added to Zarr file
#
from __future__ import division, print_function
import os
try:
    import zarr
except ImportError:
    zarr = None
import numpy as np
from .. import GSASIIobj as G2obj
from .. import GSASIIfiles as G2fil
from .. import GSASIIpath

instprmList = [('Bank',1.0), ('Lam',0.413263), ('Polariz.',0.99),
            ('SH/L',0.002), ('Type','PXC'), ('U',1.163), ('V',-0.126),
            ('W',0.063), ('X',0.0), ('Y',0.0), ('Z',0.0), ('Zero',0.0)]
#   comments
#   sample parameters
sampleprmList = [('InstrName','APS 1-ID'), ('Temperature', 295.0)]
#  'Scale': [1.0, True], 'Type': 'Debye-Scherrer',
# 'Absorption': [0.0, False], 'DisplaceX': [0.0, False], 'DisplaceY': [0.0, False]# 'Pressure': 0.1, 'Time': 0.0, 'FreePrm1': 0.0,
# 'FreePrm2': 0.0, 'FreePrm3': 0.0, 'Gonio. radius': 200.0, 'Omega': 0.0,
# 'Chi': 0.0, 'Phi': 180.0, 'Azimuth': 0.0,
# 'Materials': [{'Name': 'vacuum', 'VolFrac': 1.0}, {'Name': 'vacuum', 'VolFrac': 0.0}],
# 'Thick': 1.0, 'Contrast': [0.0, 0.0], 'Trans': 1.0, 'SlitLen': 0.0}


[docs]
class MIDAS_Zarr_Reader(G2obj.ImportPowderData):
    '''Routine to read multiple powder patterns from a Zarr file
    created by MIDAS. Perhaps in the future, other software might also
    use this file type as well.

    For Midas, the main file is <file>.zip, but optionally sample and
    instrument parameters can be placed in <file>.samprm and <file>.instprm.
    Any parameters placed in those files will override values set in the .zip
    file.
    '''
    mode = None
    midassections = ('InstrumentParameters', 'REtaMap', 'OmegaSumFrame')
    def __init__(self):
        if zarr is None:
            self.UseReader = False
            msg = 'MIDAS_Zarr Reader skipped because zarr module is not installed.'
            G2fil.ImportErrorMsg(msg,{'MIDAS Zarr importer':['zarr']})
        super(self.__class__,self).__init__( # fancy way to self-reference
            extensionlist=('.zarr.zip',),strictExtension=True,
            formatName = 'MIDAS zarr',longFormatName = 'MIDAS zarr intergrated scans')
        self.scriptable = True
        #self.Iparm = {} #only filled for EDS data

    def zarrOpen(self,filename):
        import asyncio
        async def zarrAsyncOpen(filename):
            fp = store = None
            try:
                fp = zarr.open(filename, mode='r')
                return fp,store
            except:
                # workaround for zarr 3.0.x where "auto discover" is
                # not yet implemented
                # (https://github.com/zarr-developers/zarr-python/issues/2922)
                try:
                    store = await zarr.storage.ZipStore.open(filename, mode="r")
                    fp = zarr.open_group(store, mode='r')
                    return fp,store
#                except FileNotFoundError as msg:
                except FileNotFoundError:
                    print (f'cannot read as zarr file: {filename}')
                except Exception as msg:
                    self.errors = f'Exception from zarr module (version={zarr.__version__}):'
                    self.errors += '\n\t' + str(msg)
            return None,None
        fp,store = asyncio.run(zarrAsyncOpen(filename))
        return fp,store
                        

[docs]
    def ContentsValidator(self, filename):
        '''Test if valid by seeing if the zarr module recognizes the file. Then
        get file type (currently Midas only)
        '''
        fp, store = self.zarrOpen(filename)
        if not fp:
            self.errors = f'Error from zarr module (version={zarr.__version__}):'
            #self.errors += '\n\t' + str(msg)
            self.errors += '\nPlease report this'
        elif all([(i in fp) for i in self.midassections]): # are expected MIDAS sections present?
            self.mode = 'midas'
            del fp, store
            return True # Passes test for Midas output
        #else:  # test here for any other formats that might use zarr
        #    pass
        del fp, store
        return



[docs]
    def Reader(self, filename, ParentFrame=None, **kwarg):
        '''Scan file for sections needed by defined file types (currently
        only Midas) and then use appropriate routine to read the file.
        Most of the time the results are placed in the buffer arg (if supplied)
        so the file is not read on most calls.

        For MIDAS, masking can eliminate some or all points in an azimuthal
        region. This will only return "lineouts" (aka diffractograms) that
        have 20 or more points in them.

        Note that if Reader.selections is used to select individual
        "lineouts", the selections are numbered against all possible
        "lineouts" not the ones that have 20 or more points.
        '''
        fpbuffer = kwarg.get('buffer',{})
        if not hasattr(self,'blknum'):
            self.blknum = 0    # image counter for multi-image files
        # check if this is a valid MIDAS file
        if self.mode is None:
            fp, store = self.zarrOpen(filename)
            # are expected MIDAS sections present?
            if all([(i in fp) for i in self.midassections]):
                self.mode = 'midas'
            else:
                print (f'{filename} is not a MIDAS file')
            del fp, store
        #else:  # test here for any other formats that might use zarr
        #    pass
        
        if self.mode == 'midas':
            res = self.readMidas(filename, fpbuffer)
        else:
            res = False
        return res



[docs]
    def readMidas(self, filename, fpbuffer={}):
        '''Read zarr file produced by Midas
        '''
        self.instmsg = 'MIDAS zarr file'
        # has the zarr file already been cached?
        doread = False # yes
        for i in ('intenArr','REtaMap','attributes', 'REtaMap', 'unmasked', '2Read'):
            if i not in fpbuffer:
                doread = True # no
                break

        #======================================================================
        # cache the contents of the zarr file on the first call to this
        # (or every call if no buffer is supplied -- very slow)
        #======================================================================
        if doread:   # read into buffer
            print('Caching MIDAS zarr contents...')
            try:
                fp, store = self.zarrOpen(filename)
                fpbuffer['REtaMap'] = np.array(fp['REtaMap']) # 4 x Nbins x Nazimuth
                # [0]: radius; [1] 2theta; [2] eta; [3] bin area
                # tabulate integrated intensities image & eta
                fpbuffer['intenArr'] = []
                fpbuffer['attributes'] = []
                for i,k in enumerate(fp['OmegaSumFrame']):
                    fpbuffer['intenArr'].append(fp['OmegaSumFrame'][k])
                    fpbuffer['attributes'].append(
                        dict(fp['OmegaSumFrame'][k].attrs.items()))
                Nimg = len(fp['OmegaSumFrame'])   # number of images
                Nbins,Nazim = fpbuffer['REtaMap'][1].shape
                # Nbins: number of points in each lineout (not all of which may be
                #     used due to geometrical masking)
                # Nazim: number of azimuthal "cake slices"

                # get a list of points in use at each azimuth
                fpbuffer['unmasked'] = [(fpbuffer['REtaMap'][3][:,i] != 0) for i in range(Nazim)] # will be True if area is >0
                # find the azimuths with more than 20 points
                mAzm = [i for i in range(Nazim) if sum(fpbuffer['unmasked'][i]) > 20]

                # generate a list of lineouts to be read from selections
                if self.selections:
                    sel = self.selections
                else:
                    sel = range(Nimg*Nazim) # nothing selected, use all
                # fpbuffer['2Read'] is the list of lineouts to be read, where each entry
                # contains two values, the azumuth and the image number (iAzm,iImg)
                # defined points for each lineout are then
                #   intensities : fpbuffer['intenArr'][iImg][:,iAzm][unmasked[iAzm]]
                #   2thetas: fpbuffer['REtaMap'][1][:,iAzm][unmasked[iAzm]]
                fpbuffer['2Read'] = [(i%Nazim,i//Nazim) for i in sel if i%Nazim in mAzm]
                # xfrom Zarr dict into a native dict
                self.MIDASinstprm = {i:fp['InstrumentParameters'][i][0] for i in fp['InstrumentParameters']}
                # change a few keys
                for key,newkey in [('Polariz','Polariz.'),('SH_L','SH/L')]:
                    if key in self.MIDASinstprm:
                        self.MIDASinstprm[newkey] = self.MIDASinstprm[key]
                        del self.MIDASinstprm[key]
            except IOError:
                print ('cannot open file '+ filename)
                return False
            finally:
                del fp, store
            # get overriding sample & instrument parameters
            self.MIDASsampleprm = {}
            samplefile = os.path.splitext(filename)[0] + '.samprm'
            if os.path.exists(samplefile):
                fp = open(samplefile,'r')
                S = fp.readline()
                while S:
                    if not S.strip().startswith('#'):
                        [item,val] = S[:-1].split(':')
                        self.MIDASsampleprm[item.strip("'")] = eval(val)
                    S = fp.readline()
                fp.close()
            # overload from file, if found
            instfile = os.path.splitext(filename)[0] + '.instprm'
            self.instvals = [{},{}]
            if os.path.exists(instfile):
                self.instmsg = 'zarr and .instprm files'
                fp = open(instfile,'r')
                instLines = fp.readlines()
                fp.close()
                nbank,self.instvals = G2fil.ReadInstprm(instLines, None, self.Sample)
        #======================================================================
        # start reading
        #======================================================================
        # look for the next non-empty scan (lineout)
        iAzm,iImg = fpbuffer['2Read'][self.blknum]
        nFrame = fpbuffer['attributes'][iImg].get('Number Of Frames Summed',1.)
        unmasked = fpbuffer['unmasked'][iAzm]
        y = fpbuffer['intenArr'][iImg][:,iAzm][unmasked]/nFrame
        x = fpbuffer['REtaMap'][1][:,iAzm][unmasked]
        normalization = fpbuffer['REtaMap'][3][:,iAzm][unmasked]
        # compute the uncertainty on the normalized intensities
        #   Y(normalized) = Y-norm = Ysum / area
        #   sigma(Y-norm) = sqrt(Ysum) / area = sqrt[Y-norm * area] / area
        #                 = sqrt( Y-norm / area)
        # For GSAS-II want to normalize by the number of frames,
        #   Y(GSAS) = Y-norm / nFrame
        #   sigma(Y-GSAS) = sigma(Y-norm) / nFrame
        #                 = sqrt( Y-norm / area) / nFrame
        #   weight(Y-GSAS) is 1/sigma[Y-GSAS]**2 = nFrame**2 * area / Y(norm)
        w = np.where(y <= 0, np.zeros_like(y), nFrame**2 * normalization/ y )
        omega = 999.  # indicates an error
        try:
            omega = 0.5 * (
                fpbuffer['attributes'][iImg]['FirstOme'] +
                fpbuffer['attributes'][iImg]['LastOme'])
        except:
            pass
        eta = sum(fpbuffer['REtaMap'][2][:,iAzm][unmasked])/sum(unmasked) # compute an averaged Phi value
        radius = 1000   # sample to detector distance (mm)
        if 'Distance' in self.MIDASinstprm:
            radius = self.MIDASinstprm['Distance']/1000   # convert from microns

        # now transfer instprm & sample prms into current histogram
        self.pwdparms['Instrument Parameters'] = [{}, {}]
        inst = {}
        inst.update(instprmList)
        for i in inst:
            if i in self.MIDASinstprm:
                inst[i] = self.MIDASinstprm[i]
        for key,val in inst.items():
            self.pwdparms['Instrument Parameters'][0][key] = [val,val,False]
        self.pwdparms['Instrument Parameters'][0].update(self.instvals[0])
        self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False]
        self.pwdparms['Instrument Parameters'][0]['Bank'] = [iAzm,iAzm,False]
        samp = {}
        samp.update(sampleprmList)
        samp.update(self.MIDASsampleprm)
        try:
            sampleprmList['Temperature'] = fpbuffer['attributes'][iImg]['Temperature']
        except:
            pass
        try:
            sampleprmList['Pressure'] = fpbuffer['attributes'][iImg]['Pressure']
        except:
            pass
        for key,val in samp.items():
            self.Sample[key] = val
        self.numbanks=len(fpbuffer['2Read'])  # number of remaining lineouts to be read
        # save the various GSAS-II instrument and sample parameters
        self.Sample['Gonio. radius'] = float(radius)
#        self.Sample['Omega'] = float(S.split('=')[1])
#        self.Sample['Chi'] = float(S.split('=')[1])
        self.Sample['Phi'] = omega
#        self.Sample['FreePrm1']
#        self.Controls['FreePrm1'] = 'Midas Label'  # relabel the parameter
#        self.Sample['Time']
        self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
        #self.comments = comments[selblk]
        self.powderentry[0] = filename
        #self.powderentry[1] = Pos # position offset (never used, I hope)
        self.powderentry[2] = self.blknum  # consecutive bank number
        self.idstring = f'{os.path.split(filename)[1]} img={iImg} omega={omega} eta={eta}'
        if GSASIIpath.GetConfigValue('debug'):
            print(f'Read entry #{iAzm} img# {iImg} from file {filename}\n{self.idstring}')
#        else:
#            print('.',end='')  # do something since this can take a while
#            sys.stdout.flush()
        # are there more lineouts after this one in current image to read?
        self.blknum += 1
        if self.blknum >= len(fpbuffer['2Read']): # is this the last scan?
            self.repeat = False
        else:
            self.repeat = True
            return True
        # read last pattern, reset buffer & counter
        print()
        self.blknum = 0
        for key in list(fpbuffer.keys()): del fpbuffer[key]
        print('...read done')
        return True