function X = seqread (infile) % SEQREAD - reads a sequence object from a file % % caveats : doesnt deal with empty sequences within tags % doesnt check if length in tags is correct X.data = {}; X.labels = {}; X.origlabels = []; X.comments = {}; HAS_DIM_TAG = 0; HAS_LABEL_TAG = 0; DENSE = 0; SPARSE = 0; TAGSEP = 0; % sequences separated by tags SPACESEP = 0; % sequences separated by blank lines DIM = 0; LABS = []; z = textread(infile,'%s','headerlines',0,'delimiter','\n','bufsize',4096*100); LZ = length(z); if ~LZ return; warning('no lines in input file'); end %%%%%%%%%%%%%%%%%%%%% first check for any dimension tag %%%%%%%%%%% f=findstr(z{1},'dimension'); if length(f) L=length(z{1}); g=findstr(z{1},'>'); if (~length(g)) error('no ending triangular bracket for dimension tag'); end f2=findstr(z{1},'='); if (length(f2)) if (f2>length(z{1})) error('what is the dimension in the dimension tag?'); else DIM = str2num(z{1}(f2+1:g-1)); end else DIM = str2num(z{1}(f+1:g-1)); end if ~DIM error ('what is the dimension?'); else HAS_DIM_TAG = 1; end end %%%%%%% Now check for any label tag %%%%%%%%%%%%%%% if (LZ < HAS_DIM_TAG+1) return; warning('very short file!'); end f=findstr(z{HAS_DIM_TAG+1},'label'); if length(f) L=length(z{HAS_DIM_TAG+1}); g=findstr(z{HAS_DIM_TAG+1},'>'); if (~length(g)) error('no ending triangular bracket for label tag'); end f2=findstr(z{HAS_DIM_TAG+1},'='); if (length(f2)) if (f2>length(z{HAS_DIM_TAG+1})) error('what is the label in the label tag?'); else LABS = strread(z{HAS_DIM_TAG+1}(f2+1:g-1), '%d', 'delimiter', ' '); end else LABS = strread(z{HAS_DIM_TAG+1}(f+1:g-1), '%d', 'delimiter', ' '); end if ~length(LABS) error ('Are there no labels?'); else HAS_LABEL_TAG = 1; X.origlabels = LABS; end end %%%%%%%%%%%%%%%% Now keep going till you get to the first sequence data line %%%%%% i = HAS_LABEL_TAG + HAS_DIM_TAG + 1; found_data_line = 0; while ((i <= length(z)) & (~ found_data_line)) if (length(z{i})>0) % if this line isnt empty if (z{i}(1) == '%') % if this line starts with a % it is a comment X.comments{ 1+length(X.comments) } = z{i}(2:length(z{i})); else if length(strfind(z{i},' so go to next one end if length(findstr(z{i},':')) % if this line is sparse SPARSE = 1; else DENSE = 1; % now determine DIM since all data lines must have the same number of elements a = strread(z{i},'%s','delimiter',' '); if (mod(str2num(a{1}),1)) z{i} error('first element of each data line should be an integer label'); end DIM = length(a)-1; end if TAGSEP i=i-1; % now go back to first line end else error ('cannot find data line'); end if (~DENSE) & (~SPARSE) error ('is this file in dense or sparse format?'); end if SPARSE Xi = {}; Xj = {}; Xv = {}; end anynumbershere = -1; inspace = 0; % inspace is not used vecnum = 0; % number of next current/vector in current sequence seqnum = -2; if TAGSEP seqnum = 0; % number of current sequence elseif SPACESEP seqnum = 1; end while i<=LZ if (TAGSEP) & (findstr(z{i},'0) & (z{i}(1) == '%')) % comment, add it X.comments{ 1+length(X.comments) } = z{i}(2:length(z{i})); else if (SPACESEP) if (~inspace) inspace = 1; seqnum = seqnum+1; vecnum = 0; end end % if TAGSEP, ignore blank lines and any other lines end end end if (anynumbershere>0) if hasalpha(z{i}) %% its ok if there is a 'e' as that could be part of exponential notation z{i} error('found letters in a line that should have just numbers'); end if ((length(findstr(z{i},':'))) & (DENSE)) z{i} error('found a colon : in a line when previous lines are in dense format'); end a = strread(z{i},'%s','delimiter',' '); if (length(a)==0) error('no entry in data line'); elseif ((DENSE) & (length(a) ~= DIM+1)) z{i} error (sprintf('This line has a dense %d-dimensional vector while the first line had a %d-dimensional vector\n',length(a)-1,DIM)); elseif length(findstr(a{1},':')) error('first element of line should be a label'); else lab = str2num(a{1}); if (mod(lab,1)) z{i} error(sprintf('label %f should be an integer',lab)); end f = find(X.origlabels == lab); if (~length(f)) % couldnt find lab in original labels if HAS_LABEL_TAG error(sprintf('found label %d not supplied in label tag',lab)); else X.origlabels = [X.origlabels lab]; end end if (length(X.labels) DIM) if HAS_DIM_TAG z{i} error (sprintf('found index %d that is larger than dimension %d in dimension tag',indx,DIM)); else DIM = indx; end end if DENSE if length(X.data)