Source code for hpelm.mss_cv

# -*- coding: utf-8 -*-
"""
Created on Mon Oct 27 17:48:33 2014

@author: akusok
"""

import numpy as np


[docs]def train_cv(self, X, T, k): """Model structure selection with cross-validation. Trains ELM, cross-validates model and sets an optimal validated solution. Args: self (ELM): ELM object that calls `train_v()` X (matrix): training set inputs T (matrix): training set outputs k (int): number of parts to split the dataset into, k-2 parts are used for training and 2 parts are left out: 1 for validation and 1 for test; repeated k times until all parts have been left out for validation and test, and results averaged over these k repetitions. Returns: err_t (double): error for the optimal model, computed in the 'cross-testing' manner on data part which is not used for training or validation """ N = X.shape[0] L = self.nnet.L c = self.nnet.outputs idxk = [] for i in range(k): idxk.append(np.arange(N)[i::k]) datak = [] for i in range(k): items = [(i+j) % k for j in range(k)] idx_tr = np.hstack([idxk[j] for j in items[:-2]]) idx_vl = idxk[items[-2]] idx_ts = idxk[items[-1]] Xtr = X[idx_tr] Ttr = T[idx_tr] Xvl = X[idx_vl] Tvl = T[idx_vl] Xts = X[idx_ts] Tts = T[idx_ts] self.nnet.reset() self.nnet.add_batch(Xtr, Ttr) HH, HT = self.nnet.get_corr() Hvl = self.nnet._project(Xvl) Hts = self.nnet._project(Xts) rank, L = self._ranking(Hvl.shape[1], Hvl, Tvl) datak.append((HH, HT, Hvl, Tvl, Hts, Tts, rank)) e = np.ones((L+1,)) * -1 # errors for all numbers of neurons err = 0 for HH, HT, Hvl, Tvl, _, _, _ in datak: B = self.nnet.solve_corr(HH, HT) Yvl = np.dot(Hvl, B) err += self._error(Tvl, Yvl) / k penalty = err * 0.01 / L # penalty is 1% of error at max(L) e[L] = err + L * penalty # MYOPT function # [iA iB iC iD iE] interval points, # halve the interval each time # initialize intervals iA = 3 iE = L l = iE - iA iB = iA + l/4 iC = iA + l/2 iD = iA + 3*l/4 # TODO: tell about single-letter matrix notations in the whole code l = 1000 # run the while loop at least once while l > 2: # calculate errors at points for idx in [iA, iB, iC, iD, iE]: if e[idx] == -1: # skip already calculated errors err = 0 for HH, HT, Hvl, Tvl, _, _, rank in datak: rank1 = rank[:idx] HH1 = HH[rank1, :][:, rank1] HT1 = HT[rank1, :] B = self.nnet.solve_corr(HH1, HT1) Yvl = np.dot(Hvl[:, rank1], B) err += self._error(Tvl, Yvl) / k e[idx] = err + idx * penalty m = min(e[iA], e[iB], e[iC], e[iD], e[iE]) # find minimum element # halve the search interval if m in (e[iA], e[iB]): iE = iC iC = iB elif m in (e[iD], e[iE]): iA = iC iC = iD else: iA = iB iE = iD l = iE - iA iB = iA + l/4 iD = iA + (3*l)/4 k_opt = [n1 for n1 in [iA, iB, iC, iD, iE] if e[n1] == m][0] # find minimum index best_L = rank[:k_opt] # get test error err_t = 0 for HH, HT, _, _, Hts, Tts, _ in datak: B = self.nnet.solve_corr(HH, HT) Yts = np.dot(Hts, B) err_t += self._error(Tts, Yts) / k self.nnet._prune(best_L) self.nnet.add_batch(X, T) self.nnet.solve() # print "%d of %d neurons selected with a Cross-Validation" % (len(best_nn), nn) # print "the Cross-Validation test error is %f" % err_ts # if len(best_nn) > nn*0.9: # print "Hint: try re-training with more hidden neurons" return err_t