function [X,L,trainindices,testindices] = makedataset files = {}; for i=1:8 for j=1:8 files{1+length(files)} = sprintf('q%dnc%d',i,j); end end X = {}; for i=1:4 i tei{i} = [16*i-15:16*i]; te4{i} = files(tei{i}); tr4{i} = files(setdiff([1:64],tei{i})); fid = fopen(sprintf('trainfiles%d.txt',i),'wt'); for j=1:length(tr4{i}) fprintf(fid,'%s\n',tr4{i}{j}); end fclose(fid); fid = fopen(sprintf('testfiles%d.txt',i),'wt'); for j=1:length(te4{i}) fprintf(fid,'%s\n',te4{i}{j}); end fclose(fid); cmd = sprintf('perl ngrams.pl -s textfeats%d.txt -f 2 -e 1 -o -w -n -x 3 -c trainfiles%d.txt',i,i); cmd system (cmd); cmd = sprintf('perl ngrams.pl -r textfeats%d.txt -t tf%d -c testfiles%d.txt',i,i,i); cmd system (cmd); cmd = sprintf('perl ngrams.pl -r textfeats%d.txt -t tf%d -c trainfiles%d.txt',i,i,i); cmd system (cmd); cmd = sprintf('wc -l textfeats%d.txt > tmp.dat',i); cmd system (cmd); z = loadcell('tmp.dat'); z = strread(z{1},'%s','delimiter',' '); D = str2num(z{1}) + 1; L{i} = []; a = 0; indi = []; indj = []; vals = []; istest = []; trainindices{i} = []; testindices{i} = []; for j=1:length(files) [y,x] = svmlread(sprintf('%s_text_tf%d.dat',files{j},i)); L{i} = vertcat(L{i},y); b = a+length(y); [ii,jj,v]=find(x); indi = vertcat(indi,a+ii); indj = vertcat(indj,jj); vals = vertcat(vals,v); istest(a+1:b) = length(tei{i}) - length(setdiff(tei{i},j)); a = b; end N = length(L{i}); testindices{i} = find(istest); trainindices{i} = setdiff([1:N],testindices{i}); X{i} = sparse(indi,indj,vals,N,D); end L=L{1};