Source code for hpelm.hp_elm

# -*- coding: utf-8 -*-
"""
Created on Mon Oct 27 17:48:33 2014

@author: akusok
"""

import numpy as np
import multiprocessing as mp
from time import time
from .modules import make_hdf5, _ireader, _iwriter, _prepare_fHH, _write_fHH
from tables import open_file
from .elm import ELM


[docs]class HPELM(ELM):
    """Interface for training High-Performance Extreme Learning Machines (HP-ELM).

    Args:
        inputs (int): dimensionality of input data, or number of data features
        outputs (int): dimensionality of output data, or number of classes
        classification ('c'/'wc'/'ml', optional): train ELM for classfication ('c') / weighted classification ('wc') /
            multi-label classification ('ml'). For weighted classification you can provide weights in `w`. ELM will
            compute and use the corresponding classification error instead of Mean Squared Error.
        w (vector, optional): weights vector for weighted classification, lenght (`outputs` * 1).
        batch (int, optional): batch size for data processing in ELM, reduces memory requirements. Does not work
            for model structure selection (validation, cross-validation, Leave-One-Out). Can be changed later directly
            as a class attribute.
        accelerator (string, optional): type of accelerated ELM to use: None, 'GPU', ...
        precision (optional): data precision to use, supports signle ('single', '32' or numpy.float32) or double
            ('double', '64' or numpy.float64). Single precision is faster but may cause numerical errors. Majority
            of GPUs work in single precision. Default: **double**.
        norm (double, optinal): L2-normalization parameter, **None** gives the default value.
        tprint (int, optional): ELM reports its progess every `tprint` seconds or after every batch,
            whatever takes longer.

    Class attributes; attributes that simply store initialization or `train()` parameters are omitted.

    Attributes:
        nnet (object): Implementation of neural network with computational methods, but without
            complex logic. Different implementations are given by different classes: for Python, for GPU, etc.
            See ``hpelm.nnets`` folder for particular files. You can implement your own computational algorithm
            by inheriting from ``hpelm.nnets.SLFN`` and overwriting some methods.
        flist (list of strings): Awailable types of neurons, use them when adding new neurons.

    Note:
        The 'hdf5' type denotes a name of HDF5 file type with a single 2-dimensional array inside. HPELM uses PyTables
        interface to HDF5: http://www.pytables.org/. For HDF5 array examples, see
        http://www.pytables.org/usersguide/libref/homogenous_storage.html. Array name is irrelevant,
        but there must be **only one array per HDF5 file**.

        A 2-dimensional Numpy.ndarray can also be used.
    """

[docs]    def train(self, fX, fT, *args, **kwargs):
        """Universal training interface for HP-ELM model.

        Always trains a basic ELM model without model structure selection.
        L2-regularization is available as `norm` parameter at HPELM initialization.
        Number of neurons selection with validation set for trained HPELM is available in `train_hpv()` method.

        Args:
            fX (hdf5): input data on disk, size (N * `inputs`)
            fT (hdf5): outputs data on disk, size (N * `outputs`)
            'c'/'wc'/'ml' (string, choose one): train HPELM for classification ('c'), classification with weighted
                classes ('wc') or multi-label classification ('ml') with several correct classes per data sample.
                In classification, number of `outputs` is the number of classes; correct class(es) for each sample
                has value 1 and incorrect classes have 0.

        Keyword Args:
            istart (int, optional): index of first data sample to use from `fX`, `istart` < N. If not given,
                all data from `fX` is used. Sample with index `istart` is used for training, indexing is 0-based.
            icount (int, optional): number of data samples to use from `fX`, starting from `istart`, automatically
                adjusted to `istart` + `icount` <= N. If not given, all data starting from `start` is used.
                The last sample used for training is `istart`+`icount`-1, so you can index data as:
                istart_1=0, icount_1=1000; istart_2=1000, icount_2=1000; istart_3=2000, icount_3=1000, ...
            batch (int, optional): batch size for ELM, overwrites batch size from the initialization
        """
        # TODO: move to h5py, because I don't need pyTables features
        # TODO: move to h5py with MPI async IO (driver='mpio')
        # TODO: explain why I don't support parallel processing (huge amount of data to transfer, or fast enough)
        X, T = self._checkdata(fX, fT)
        self._train_parse_args(args, kwargs)

        istart = 0
        icount = np.inf
        if "istart" in kwargs.keys():
            istart = max(0, int(kwargs["istart"]))
        if "icount" in kwargs.keys():
            icount = kwargs["icount"]
        self.add_data(X, T, istart=istart, icount=icount)
        self.nnet.solve()

[docs]    def add_data(self, fX, fT, istart=0, icount=np.inf, fHH=None, fHT=None):
        """Feed new training data (X,T) to HP-ELM model in batches: does not solve ELM itself.

        This method prepares an intermediate solution data, that takes the most time. After that, obtaining
        the solution is fast.

        The intermediate solution consists of two matrices: `HH` and `HT`. They can be in memory for a model computed
        at once, or stored on disk for a model computed in parts or in parallel.

        For iterative solution, provide file names for on-disk matrices in the input parameters `fHH` and `fHT`.
        They will be created if they don't exist, or new results will be merged with the existing ones. This method is
        multiprocess-safe for parallel writing into files `fHH` and `fHT`, that allows you to easily compute ELM
        in parallel. The multiprocess-safeness uses Python module 'fasteners' and a lock file, which is named
        fHH+'.lock' and fHT+'.lock'.

        Args:
            fX (hdf5): (part of) input training data size (N * `inputs`)
            fT (hdf5) (part of) output training data size (N * `outputs`)
            istart (int, optional): index of first data sample to use from `fX`, `istart` < N. If not given,
                all data from `fX` is used. Sample with index `istart` is used for training, indexing is 0-based.
            icount (int, optional): number of data samples to use from `fX`, starting from `istart`, automatically
                adjusted to `istart` + `icount` <= N. If not given, all data starting from `start` is used.
                The last sample used for training is `istart`+`icount`-1, so you can index data as:
                istart_1=0, icount_1=1000; istart_2=1000, icount_2=1000; istart_3=2000, icount_3=1000, ...
            fHH, fHT (string, optional): file names for storing HH and HT matrices. Files are created if they don't
                exist, or new result is added to the existing files if they exist. Parallel writing to the same
                `fHH`, `fHT` files is multiprocess-safe, made specially for parallel training of HP-ELM. Another use
                is to split a very long training of huge ELM into smaller parts, so the training can be interrupted
                and resumed later.

        """
        # initialize
        assert len(self.nnet.neurons) > 0, "Add neurons to ELM before using it"
        X, T = self._checkdata(fX, fT)
        N = X.shape[0]
        _prepare_fHH(fHH, fHT, self.nnet, self.precision)
        # custom range adjustments
        icount = min(icount, N - istart)
        nb = int(np.ceil(float(icount) / self.batch))  # number of batches

        # weighted classification initialization
        if self.classification == "wc" and self.wc is None:
            ns = np.zeros((self.nnet.outputs,))
            for b in xrange(nb):  # batch sum is much faster
                start = b*self.batch + istart
                stop = min((b+1)*self.batch + istart, icount + istart)
                ns += T[start:stop].sum(axis=0)
            ns = ns.astype(self.precision)
            self.wc = ns.sum() / ns  # class weights normalized to number of samples

        # main loop over all the data
        t = time()
        t0 = time()
        eta = 0
        wc_vector = None
        for b in xrange(nb):
            start = b*self.batch + istart
            stop = min((b+1)*self.batch + istart, icount + istart)
            Xb = X[start:stop]
            Tb = T[start:stop]
            if self.classification == "wc":
                wc_vector = self.wc[np.where(Tb == 1)[1]]  # weights for samples in the batch

            self.nnet.add_batch(Xb, Tb, wc_vector)

            # report time
            eta = int(((time()-t0) / (b+1)) * (nb-b-1))
            if time() - t > self.tprint:
                print("processing batch %d/%d, eta %d:%02d:%02d" % (b+1, nb, eta/3600, (eta % 3600)/60, eta % 60))
                t = time()

        # if storing output to disk
        if fHH is not None and fHT is not None:
            HH, HT = self.nnet.get_corr()
            HH[np.diag_indices_from(HH)] -= self.nnet.norm  # norm is already included
            _write_fHH(fHH, fHT, HH, HT)

[docs]    def solve_corr(self, fHH, fHT):
        """Solves an ELM model with the given (covariance) fHH and (correlation) fHT HDF5 files.

        Args:
            fHH (hdf5): an hdf5 file with intermediate solution data
            fHT (hdf5): an hdf5 file with intermediate solution data
        """
        HH, HT = self._checkcorr(fHH, fHT)
        B = self.nnet.solve_corr(HH, HT)
        self.nnet.set_B(B)

[docs]    def predict(self, fX, fY=None, istart=0, icount=np.inf):
        """Iterative predict outputs and save them to HDF5, can use custom range.

        Args:
            fX (hdf5): hdf5 filename or Numpy matrix with input data from which outputs are predicted
            fY (hdf5): hdf5 filename or Numpy matrix to store output data into, if 'None' then Numpy matrix
                is generated automatically.
            istart (int, optional): index of first data sample to use from `fX`, `istart` < N. If not given,
                all data from `fX` is used. Sample with index `istart` is used for training, indexing is 0-based.
            icount (int, optional): number of data samples to use from `fX`, starting from `istart`, automatically
                adjusted to `istart` + `icount` <= N. If not given, all data starting from `start` is used.
                The last sample used for training is `istart`+`icount`-1, so you can index data as:
                istart_1=0, icount_1=1000; istart_2=1000, icount_2=1000; istart_3=2000, icount_3=1000, ...
        """
        # TODO: Accept Numpy arrays in addition to HDF5 files
        assert len(self.nnet.neurons) > 0, "Add neurons to ELM and train it before using"
        assert self.nnet.B is not None, "Train ELM before predicting"
        X, _ = self._checkdata(fX, None)
        N = X.shape[0]
        # custom range adjustments
        icount = min(icount, N - istart)
        nb = int(np.ceil(float(icount) / self.batch))  # number of batches
        # make file to store results
        if isinstance(fY, basestring):
            make_hdf5((icount, self.nnet.outputs), fY, dtype=self.precision)
            h5 = open_file(fY, "a")
            for Y in h5.walk_nodes():
                pass  # find a node with whatever name
        elif fY is None:  # create Numpy array
            Y = np.zeros((icount, self.nnet.outputs), dtype=self.precision)
        else:  # fY is Numpy array
            _, Y = self._checkdata(None, fY)

        t = time()
        t0 = time()
        eta = 0
        for b in xrange(0, nb):
            start = b*self.batch + istart
            stop = min((b+1)*self.batch + istart, icount + istart)

            # get data
            Xb = X[start:stop]
            # process data
            Yb = self.nnet._predict(Xb)
            # write data
            Y[start-istart:stop-istart] = Yb

            # report time
            eta = int(((time()-t0) / (b+1)) * (nb-b-1))
            if time() - t > self.tprint:
                print("processing batch %d/%d, eta %d:%02d:%02d" % (b+1, nb, eta/3600, (eta % 3600)/60, eta % 60))
                t = time()

        if isinstance(fY, basestring):
            h5.flush()
            h5.close()
        elif fY is None:
            return Y

[docs]    def project(self, fX, fH=None, istart=0, icount=np.inf):
        """Iteratively project input data from HDF5 into HPELM hidden layer, and save in another HDF5.

        Args:
            fX (hdf5): hdf5 filename or Numpy matrix with input data to project
            fH (hdf5): hdf5 filename or Numpy matrix to store projected inputs, if 'None' then Numpy matrix
                is generated automatically.
            istart (int, optional): index of first data sample to use from `fX`, `istart` < N. If not given,
                all data from `fX` is used. Sample with index `istart` is used for training, indexing is 0-based.
            icount (int, optional): number of data samples to use from `fX`, starting from `istart`, automatically
                adjusted to `istart` + `icount` <= N. If not given, all data starting from `start` is used.
                The last sample used for training is `istart`+`icount`-1, so you can index data as:
                istart_1=0, icount_1=1000; istart_2=1000, icount_2=1000; istart_3=2000, icount_3=1000, ...
        """
        assert len(self.nnet.neurons) > 0, "Add neurons to ELM before using it"
        X, _ = self._checkdata(fX, None)
        N = X.shape[0]
        # custom range adjustments
        icount = min(icount, N - istart)
        nb = int(np.ceil(float(icount) / self.batch))  # number of batches
        # make file to store results
        if isinstance(fH, basestring):
            make_hdf5((icount, self.nnet.L), fH, dtype=self.precision)
            h5 = open_file(fH, "a")
            for H in h5.walk_nodes():
                pass  # find a node with whatever name
        elif fH is None:  # create Numpy array
            H = np.zeros((icount, self.nnet.L), dtype=self.precision)
        else:  # fY is Numpy array
            assert fH.shape[0] >= icount, "Numpy matrix fH has not enough rows to store projected inputs"
            assert fH.shape[1] == self.nnet.L, "Numpy matrix fH must have same number of columns as number of neurons"
            H = fH

        t = time()
        t0 = time()
        for b in xrange(0, nb):
            start = b*self.batch + istart
            stop = min((b+1)*self.batch + istart, icount + istart)

            # get data
            Xb = X[start:stop]
            # process data
            Hb = self.nnet._project(Xb)
            # write data
            H[start-start:stop-istart] = Hb

            # report time
            eta = int(((time()-t0) / (b+1)) * (nb-b-1))
            if time() - t > self.tprint:
                print("processing batch %d/%d, eta %d:%02d:%02d" % (b+1, nb, eta/3600, (eta % 3600)/60, eta % 60))
                t = time()

        if isinstance(fH, basestring):
            h5.flush()
            h5.close()
        elif fH is None:
            return H

[docs]    def error(self, fT, fY, istart=0, icount=np.inf):
        """Calculate error of model predictions of HPELM.

        Computes Mean Squared Error (MSE) between model predictions Y and true outputs T.
        For classification, computes mis-classification error.
        For multi-label classification, correct classes are all with Y>0.5.

        For weighted classification the error is an average weighted True Positive Rate,
        or percentage of correctly predicted samples for each class, multiplied by weight
        of that class and averaged. If you want something else, just write it yourself :)
        See https://en.wikipedia.org/wiki/Confusion_matrix for details.

        Args:
            fT (hdf5): hdf5 filename with true outputs
            fY (hdf5): hdf5 filename with predicted outputs
            istart (int, optional): index of first data sample to use from `fX`, `istart` < N. If not given,
                all data from `fX` is used. Sample with index `istart` is used for training, indexing is 0-based.
            icount (int, optional): number of data samples to use from `fX`, starting from `istart`, automatically
                adjusted to `istart` + `icount` <= N. If not given, all data starting from `start` is used.
                The last sample used for training is `istart`+`icount`-1, so you can index data as:
                istart_1=0, icount_1=1000; istart_2=1000, icount_2=1000; istart_3=2000, icount_3=1000, ...

        Returns:
            e (double): MSE for regression / classification error for classification.
        """
        _, T = self._checkdata(None, fT)
        _, Y = self._checkdata(None, fY)
        return self._error(T, Y, istart=istart, icount=icount)

    def _error(self, T, Y, istart=0, icount=np.inf):
        """Iterative batch error calcualtion.

        Args:
            T (matrix): true outputs for error calculation
            Y (matrix): predicted outputs for error calculation
            istart (int): index of first sample to process
            icount (int): number of samples to process
        """
        N = T.shape[0]
        icount = min(icount, N - istart)
        nb = int(np.ceil(float(icount) / self.batch))  # number of batches

        if self.classification == "c":
            err = 0
            for b in xrange(nb):
                start = b*self.batch + istart
                stop = min((b+1)*self.batch + istart, icount + istart)
                Tb = np.array(T[start:stop])
                Yb = np.array(Y[start:stop])
                errb = np.mean(Yb.argmax(1) != Tb.argmax(1))
                err += errb * float(stop-start)/icount

        elif self.classification == "wc":  # weighted classification
            c = T.shape[1]
            errc = np.zeros(c)
            countc = np.zeros(c)
            for b in xrange(nb):
                start = b*self.batch + istart
                stop = min((b+1)*self.batch + istart, icount + istart)
                Tb = np.array(T[start:stop])
                Yb = np.array(Y[start:stop])
                for i in xrange(c):  # per-class MSE
                    idx = np.where(Tb[:, i] == 1)[0]
                    if len(idx) > 0:
                        err1 = np.not_equal(Yb[idx].argmax(1), i)
                        errc[i] += err1.sum()
                        countc[i] += len(idx)
            errc = errc / countc  # get mean value
            err = np.sum(errc * self.wc) / np.sum(self.wc)

        elif self.classification == "ml":
            err = 0
            for b in xrange(nb):
                start = b*self.batch + istart
                stop = min((b+1)*self.batch + istart, icount + istart)
                Tb = np.array(T[start:stop])
                Yb = np.array(Y[start:stop])
                errb = np.not_equal(Yb > 0.5, Tb > 0.5).mean()
                err += errb * float(stop-start)/icount

        else:  # MSE error
            err = 0
            for b in xrange(nb):
                start = b*self.batch + istart
                stop = min((b+1)*self.batch + istart, icount + istart)
                Tb = T[start:stop]
                Yb = Y[start:stop]
                errb = np.mean((Tb - Yb)**2)
                err += errb * float(stop-start)/icount

        return err

[docs]    def validation_corr(self, fHH, fHT, fXv, fTv, steps=10):
        """Quick batch error evaluation with different numbers of neurons on a validation set.

        Only feasible implementation of model structure selection with HP-ELM. This method makes a single pass
        over the validation data, computing errors for all numbers of neurons at once. It requires HDF5 files with
        matrices HH and HT: `fHH` and `fHT`, obtained from `add_data(..., fHH, fHT)` method.

        The method writes the best solution to the HPELM model.

        Args:
            fHH (string): name of HDF5 file with HH matrix
            fHT (string): name of HDF5 file with HT matrix
            fXv (string): name of HDF5 file with validation dataset inputs
            fTv (string): name of HDF5 file with validation dataset outputs
            steps (int or vector): amount of different numbers of neurons to test, choosen uniformly on a logarithmic
                scale from 3 to number of neurons in HPELM. Can be given exactly as a vector.

        Returns:
            Ls (vector): numbers of neurons used by `validation_corr()` method
            errs (vector): corresponding errors for number of neurons in `Ls`, with classification error if model
                is run for classification
            confs (list of matrix): list of confusion matrices corresponding to elements in Ls (empty for regression)
        """
        X, T = self._checkdata(fXv, fTv)
        HH, HT = self._checkcorr(fHH, fHT)
        N = X.shape[0]
        L = self.nnet.L
        classification = self.classification is not None

        Ls = np.logspace(np.log(3), np.log(L), steps, base=np.e, endpoint=True)
        Ls = np.ceil(Ls).astype(np.int)
        Ls = np.unique(Ls)  # numbers of neurons to check
        k = Ls.shape[0]
        errs = np.zeros((k,))  # errors for these numbers of neurons
        nb = int(np.ceil(float(N) / self.batch))

        Betas = []  # keep all betas in memory
        confs = []
        for l in Ls:
            Betas.append(self.nnet.solve_corr(HH[:l, :l], HT[:l, :]))
            if classification: confs.append(np.zeros((self.nnet.outputs, self.nnet.outputs)))

        t = time()
        t0 = time()
        for b in xrange(nb):
            start = b*self.batch
            stop = min((b+1)*self.batch, N)
            Tb = np.array(T[start:stop])
            Xb = np.array(X[start:stop])
            Hb = self.nnet._project(Xb)
            for i in xrange(k):
                hb1 = Hb[:, :Ls[i]]
                Yb = np.dot(hb1, Betas[i])
                errs[i] += self._error(Tb, Yb) * float(stop-start)/N
                if classification: confs[i] += self.confusion(Tb, Yb)
            # report time
            eta = int(((time()-t0) / (b+1)) * (nb-b-1))
            if time() - t > self.tprint:
                print("processing batch %d/%d, eta %d:%02d:%02d" % (b+1, nb, eta/3600, (eta % 3600)/60, eta % 60))
                t = time()

        k_opt = np.argmin(errs)
        best_L = Ls[k_opt]
        self.nnet._prune(np.arange(best_L))
        self.nnet.set_B(Betas[k_opt])
        del Betas
        print("%d of %d neurons selected with a validation set" % (best_L, L))
        if best_L > L*0.9:
            print("Hint: try re-training with more hidden neurons")
        return Ls, errs, confs

    # async-IO versions of methods

[docs]    def train_async(self, fX, fT, *args, **kwargs):
        """Training HPELM with asyncronous I/O, good for network drives, etc. See `train()` for reference.

        Spawns new processes using Python's `multiprocessing` module.
        """
        X, T = self._checkdata(fX, fT)
        self._train_parse_args(args, kwargs)

        istart = 0
        icount = np.inf
        if "istart" in kwargs.keys():
            istart = max(0, int(kwargs["istart"]))
        if "icount" in kwargs.keys():
            icount = kwargs["icount"]
        self.add_data_async(fX, fT, istart=istart, icount=icount)
        self.nnet.solve()

[docs]    def add_data_async(self, fX, fT, istart=0, icount=np.inf, fHH=None, fHT=None):
        """Version of `add_data()` with asyncronous I/O. See `add_data()` for reference.

        Spawns new processes using Python's `multiprocessing` module, and requires more memory than non-async version.
        """
        # initialize
        assert len(self.nnet.neurons) > 0, "Add neurons to ELM before using it"
        X, T = self._checkdata(fX, fT)
        N = X.shape[0]
        # TODO: adapt for GPU solver
        _prepare_fHH(fHH, fHT, self.nnet, self.precision)
        # custom range adjustments
        icount = min(icount, N - istart)
        nb = int(np.ceil(float(icount) / self.batch))

        # weighted classification initialization
        if self.classification == "wc" and self.wc is None:
            ns = np.zeros((self.nnet.outputs,))
            for b in xrange(nb):  # batch sum is much faster
                start = b*self.batch + istart
                stop = min((b+1)*self.batch + istart, icount + istart)
                ns += T[start:stop].sum(axis=0)
            ns = ns.astype(self.precision)
            self.wc = ns.sum() / ns  # class weights normalized to number of samples

        # close X and T files opened by _checkdata()
        h5 = self.opened_hdf5.pop()
        h5.close()
        h5 = self.opened_hdf5.pop()
        h5.close()

        # start async reader and writer for HDF5 files
        qX_in = mp.Queue()
        qX_out = mp.Queue(1)
        readerX = mp.Process(target=_ireader, args=(fX, qX_in, qX_out))
        readerX.daemon = True
        readerX.start()
        qT_in = mp.Queue()
        qT_out = mp.Queue(1)
        readerT = mp.Process(target=_ireader, args=(fT, qT_in, qT_out))
        readerT.daemon = True
        readerT.start()

        # main loop over all the data
        t = time()
        t0 = time()
        eta = 0
        wc_vector = None
        for b in xrange(0, nb+1):
            start_next = b*self.batch + istart
            stop_next = min((b+1)*self.batch + istart, icount + istart)
            # prefetch data
            qX_in.put((start_next, stop_next))  # asyncronous reading of next data batch
            qT_in.put((start_next, stop_next))

            if b > 0:  # first iteration only prefetches data
                Xb = qX_out.get()
                Tb = qT_out.get()
                if self.classification == "wc":
                    wc_vector = self.wc[np.where(Tb == 1)[1]]  # weights for samples in the batch

                self.nnet.add_batch(Xb, Tb, wc_vector)

            # report time
            eta = int(((time()-t0) / (b+1)) * (nb-b-1))
            if time() - t > self.tprint:
                print("processing batch %d/%d, eta %d:%02d:%02d" % (b+1, nb, eta/3600, (eta % 3600)/60, eta % 60))
                t = time()

        # close async reader and writer
        readerX.join()
        readerT.join()

        # if storing output to disk
        if fHH is not None and fHT is not None:
            HH, HT = self.nnet.get_corr()
            HH[np.diag_indices_from(HH)] -= self.nnet.norm  # norm is already included
            _write_fHH(fHH, fHT, HH, HT)

[docs]    def predict_async(self, fX, fY, istart=0, icount=np.inf):
        """Version of `predict()` with asyncronous I/O. See `predict()` for reference.

        Spawns new processes using Python's `multiprocessing` module, and requires more memory than non-async version.
        """
        assert len(self.nnet.neurons) > 0, "Add neurons to ELM and train it before using"
        assert self.nnet.B is not None, "Train ELM before predicting"
        X, _ = self._checkdata(fX, None)
        N = X.shape[0]
        # custom range adjustments
        icount = min(icount, N - istart)
        nb = int(np.ceil(float(icount) / self.batch))  # number of batches
        # make file to store results
        make_hdf5((icount, self.nnet.outputs), fY)

        # start async reader and writer for HDF5 files
        qr_in = mp.Queue()
        qr_out = mp.Queue(1)
        reader = mp.Process(target=_ireader, args=(fX, qr_in, qr_out))
        reader.daemon = True
        reader.start()
        qw_in = mp.Queue(1)
        writer = mp.Process(target=_iwriter, args=(fY, qw_in))
        writer.daemon = True
        writer.start()

        t = time()
        t0 = time()
        eta = 0
        for b in xrange(0, nb+1):
            start_next = b*self.batch + istart
            stop_next = min((b+1)*self.batch + istart, icount + istart)
            # prefetch data
            qr_in.put((start_next, stop_next))  # asyncronous reading of next data batch

            if b > 0:  # first iteration only prefetches data
                # get data
                Xb = qr_out.get()
                # process data
                Yb = self.nnet._predict(Xb)
                # save data
                qw_in.put((Yb, start-istart, stop-istart))

            start = start_next
            stop = stop_next
            # report time
            eta = int(((time()-t0) / (b+1)) * (nb-b-1))
            if time() - t > self.tprint:
                print("processing batch %d/%d, eta %d:%02d:%02d" % (b+1, nb, eta/3600, (eta % 3600)/60, eta % 60))
                t = time()

        qw_in.put(None)
        reader.join()
        writer.join()

    def _checkcorr(self, fHH, fHT):
        """Analog of `_checkdata()` for correlation matrices.
        """
        try:
            h5 = open_file(fHH, "r")
        except:
            raise IOError("Cannot read HDF5 file at %s" % fHH)
        node = None
        for node in h5.walk_nodes():
            pass  # find a node with whatever name
        if node:
            HH = node[:]
        else:
            raise IOError("Empty HDF5 file at %s" % fHH)
        h5.close()

        try:
            h5 = open_file(fHT, "r")
        except:
            raise IOError("Cannot read HDF5 file at %s" % fHT)
        node = None
        for node in h5.walk_nodes():
            pass  # find a node with whatever name
        if node:
            HT = node[:]
        else:
            raise IOError("Empty HDF5 file at %s" % fHT)
        h5.close()

        L = self.nnet.L
        c = self.nnet.outputs
        assert len(self.nnet.neurons) > 0, "Cannot solve ELM without neurons"
        assert HH.shape[0] == L and HH.shape[1] == L, "HH has wrong shape: (%d,%d) expected, (%d,%d) found" \
                                                      % (L, L, HH.shape[0], HH.shape[1])
        assert HT.shape[0] == L and HT.shape[1] == c, "HT has wrong shape: (%d,%d) expected, (%d,%d) found" \
                                                      % (L, c, HH.shape[0], HH.shape[1])
        return HH, HT