function [trfold,tefold] = makefolds (x, numfolds, xvals) % Makefolds : deterministically separate your data into n folds for crossvalidation % [trfold,tefold] = makefolds (x, numfolds, [xvals]) % % x is a N-element vector (or just N, in which case it is taken to be [1:N]) % numfolds is either % - an integer, in which case test{i} (note: TEST!) will have all indices j such that x(j) is % in i-th part of xvals, where part is xvals split into numfolds equally sized parts if the % number of elements of xvals is a multiple of numfolds. % - a cell array of vectors, in which case test{i} will have all indices j such that x(j) % is in numfolds{i} % (In both cases, train{i} all indices of x not in test{i}) % xvals is optional, and only used if numfolds is an integer. If it is not supplied when % numfolds is an integer, it is assumed to be sort(unique(x)). % % trfold, tefold are cell arrays, % trfold{i} has indices of training examples in the i-th fold, % tefold{i} had indices of test examples in the i-th fold. % % Example 1: [tr,te] = makefolds(1:9,3); % or [tr,te] = makefolds(9,3); % tr = {[4:9],[1:3 7:9],[1:6]}; % te = {[1:3],[4:6],[7:9]}; % % Example 2: [tr,te] = makefolds([1 2 3 2 3 1 3 2 2],3); % tr = {[2:5 7:9], [1 3 5:7],[1 2 4 6 8 9]} % te = {[1 6],[2 4 8 9],[3 5 7]} % % Example 3: [tr,te] = makefolds([1 2 3 2 3 1 3 2 2], 3 ,[2 3 1]); % tr = {[1 3 5:7],[1 2 4 6 8 9], [2:5 7:9]} % te = {[2 4 8 9],[3 5 7],[1 6]} % % Example 4: [tr,te] = makefolds([1:9],3,[2 3 1]); % or [tr,te] = makefolds(9,3,[2 3 1]); % tr = [1:3 7:9],[4:9],[1:6]} % te = {[4:6],[1:3],[7:9]} % % % Dinoj Surendran, dinoj@cs.uchicago.edu April 2005 if ~isnumeric(x), error('x must be a vector or integer'); end if (max(size(x))==1) x = [1:x]; end if isnumeric(numfolds) & nargin<3 xvals = sort(unique(x)); end N = length(x); nf = 0; xfold = {}; if iscell(numfolds) nf = length(numfolds); xfold = numfolds; elseif isnumeric(numfolds) nf = numfolds; tmp = length(xvals)/nf; if (round(tmp) ~= tmp) if (nargin==3) error ('the length of xvals should be a multiple of numfolds'); else warning ('the number of unique elements of x should be a multiple of numfolds - approximating putting %d items in %d folds',length(xvals),nf); end end t = ceil(length(xvals)/nf); for i=1:nf xfold{i} = xvals(1+t*(i-1) : min(t*i,length(xvals))); end end trfold = {}; tefold = {}; % xfold{i} is a vector of values in x to be used for test examples in the i-th fold indices={}; for i=1:length(xvals) indices{i} = find(x==xvals(i)); if (size(indices{i},1)>1) indices{i} = indices{i}'; % guaranteed to be row vec (even ok if empty) end end for n=1:nf trfold{n} = []; tefold{n} = []; % now xfold{n} is a vector of values to use in the n-th fold. for i=1:length(indices) % indices{i} has all indices of x equal to xvals(i) % if xvals(i) is in xfold{n} then it's a test example, else a training example if length(find(xvals(i)==xfold{n})) tefold{n} = [tefold{n} indices{i}]; % else % trfold{n} = [trfold{n} indices{i}]; end end tefold{n} = sort(tefold{n}); trfold{n} = setdiff([1:N],tefold{n}); end