00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00012
#include "gn/gnFilter.h"
00013
#include "gn/gnFeature.h"
00014
#include "gn/gnSEQSource.h"
00015
#include "gn/gnGBKSource.h"
00016
#include "gn/gnSourceSpec.h"
00017
#include "gn/gnSourceHeader.h"
00018
#include "gn/gnSourceQualifier.h"
00019
#include "gn/gnLocation.h"
00020
#include "gn/gnStringTools.h"
00021
#include "gn/gnDebug.h"
00022
00023 gnSEQSource::gnSEQSource()
00024 {
00025 m_openString =
"";
00026 m_pFilter =
gnFilter::fullDNASeqFilter();
00027
if(m_pFilter == NULL){
00028
DebugMsg(
"Error using static sequence filters.\n");
00029 }
00030 }
00031 gnSEQSource::gnSEQSource(
const gnSEQSource& s ) :
gnFileSource(s)
00032 {
00033 vector< gnFileContig* >::const_iterator iter = s.
m_contigList.begin();
00034
for( ; iter != s.
m_contigList.end(); ++iter )
00035 {
00036
m_contigList.push_back( (*iter)->Clone() );
00037 }
00038 }
00039 gnSEQSource::~gnSEQSource()
00040 {
00041 m_ifstream.close();
00042 vector< gnFileContig* >::iterator iter =
m_contigList.begin();
00043
for( ; iter !=
m_contigList.end(); ++iter )
00044 {
00045
gnFileContig* fg = *iter;
00046 *iter = 0;
00047
delete fg;
00048 }
00049 }
00050 boolean gnSEQSource::HasContig(
const string& name )
const
00051
{
00052
for(
uint32 i = 0 ; i <=
m_contigList.size(); i++ )
00053 {
00054
if( name ==
m_contigList[i]->GetName() )
00055
return true;
00056 }
00057
return false;
00058 }
00059 uint32 gnSEQSource::GetContigID(
const string& name )
const
00060
{
00061
for(
uint32 i = 0 ; i <=
m_contigList.size(); i++ )
00062 {
00063
if( name ==
m_contigList[i]->GetName() )
00064
return i;
00065 }
00066
return ALL_CONTIGS;
00067 }
00068 string
gnSEQSource::GetContigName(
const uint32 i )
const
00069
{
00070
if( i <
m_contigList.size() )
00071 {
00072
return m_contigList[i]->GetName();
00073 }
00074
return "";
00075 }
00076 gnSeqI gnSEQSource::GetContigSeqLength(
const uint32 i )
const
00077
{
00078
if( i == ALL_CONTIGS)
00079
return m_spec->
GetLength();
00080
if( i <
m_contigList.size() )
00081 {
00082
return m_contigList[i]->GetSeqLength();
00083 }
00084
return GNSEQI_ERROR;
00085 }
00086
00087 boolean gnSEQSource::SeqRead(
const gnSeqI start,
char* buf,
gnSeqI& bufLen,
const uint32 contigI ){
00088
uint64 startPos = 0;
00089
uint64 readableBytes = 0;
00090
if( !
SeqSeek( start, contigI, startPos, readableBytes ) )
00091 {
00092 bufLen = 0;
00093
return false;
00094 }
00095
00096
if( contigI == ALL_CONTIGS )
00097 {
00098
uint32 curLen = 0;
00099
uint64 bytesRead = 0;
00100
while (curLen < bufLen)
00101 {
00102
00103
if(readableBytes <= 0)
00104
if( !
SeqSeek( start + curLen, contigI, startPos, readableBytes ) ){
00105 bufLen = curLen;
00106
return true;
00107 }
00108
00109
uint64 readLen = (bufLen - curLen) < readableBytes ? (bufLen - curLen) : readableBytes;
00110
gnSeqC* tmpBuf =
new gnSeqC[readLen];
00111
00112
00113 m_ifstream.read(tmpBuf, readLen);
00114
uint64 gc = m_ifstream.gcount();
00115 bytesRead += gc;
00116 readableBytes -= gc;
00117
for(
uint32 i=0; i < gc; i++){
00118
if( m_pFilter->
IsValid(tmpBuf[i]) ){
00119 buf[curLen] = tmpBuf[i];
00120 curLen++;
00121 }
00122 }
00123
delete[] tmpBuf;
00124
if(m_ifstream.eof()){
00125 m_ifstream.clear();
00126 bufLen = curLen;
00127
return true;
00128 }
00129 }
00130 bufLen = curLen;
00131 }
00132
else if( contigI <
m_contigList.size() )
00133 {
00134
uint32 curLen = 0;
00135
00136
gnSeqI contigSize =
m_contigList[contigI]->GetSeqLength();
00137 bufLen = bufLen < contigSize ? bufLen : contigSize;
00138
while (curLen < bufLen)
00139 {
00140
uint64 readLen = bufLen - curLen;
00141
gnSeqC* tmpBuf =
new gnSeqC[readLen];
00142
00143
00144 m_ifstream.read(tmpBuf, readLen);
00145
uint64 gc = m_ifstream.gcount();
00146
00147
00148
for(
uint32 i=0; i < gc; i++){
00149
if( m_pFilter->
IsValid(tmpBuf[i]) ){
00150 buf[curLen] = tmpBuf[i];
00151 curLen++;
00152 }
00153 }
00154
if(m_ifstream.eof()){
00155 m_ifstream.clear();
00156 bufLen = curLen;
00157
return true;
00158 }
00159
delete[] tmpBuf;
00160 }
00161 bufLen = curLen;
00162 }
00163
return true;
00164
00165 }
00166
00167
00168
00169
00170 boolean gnSEQSource::SeqSeek(
const gnSeqI start,
const uint32& contigI,
uint64& startPos,
uint64& readableBytes )
00171 {
00172
if( contigI == ALL_CONTIGS )
00173 {
00174
00175
gnSeqI curIndex = 0;
00176 vector< gnFileContig* >::iterator iter =
m_contigList.begin();
00177
for( ; iter !=
m_contigList.end(); ++iter )
00178 {
00179
uint64 len = (*iter)->GetSeqLength();
00180
if( (curIndex + len) > start )
00181
break;
00182 curIndex += len;
00183 }
00184
if( iter ==
m_contigList.end() )
00185
return false;
00186
00187
gnSeqI startIndex = start - curIndex;
00188
return SeqStartPos( startIndex, *(*iter), startPos, readableBytes );
00189 }
00190
else if( contigI <
m_contigList.size() )
00191 {
00192
return SeqStartPos( start, *(
m_contigList[contigI]), startPos, readableBytes );
00193 }
00194
return false;
00195 }
00196
00197 boolean gnSEQSource::SeqStartPos(
const gnSeqI start,
gnFileContig& contig,
uint64& startPos,
uint64& readableBytes )
00198 {
00199 readableBytes = 0;
00200
uint32 curLen = 0;
00201
00202 startPos = contig.
GetSectStartEnd(
gnContigSequence).first;
00203 m_ifstream.seekg( startPos, ios::beg );
00204
if( m_ifstream.eof() ){
00205
DebugMsg(
"ERROR in gnSEQSource::Incorrect contig start position, End of file reached!\n");
00206
return false;
00207 }
00208
while(
true )
00209 {
00210
00211
00212
uint32 tmpbufsize = contig.
GetSectStartEnd(
gnContigSequence).second - startPos;
00213
if(tmpbufsize == 0){
00214
DebugMsg(
"ERROR in gnSEQSource: stored contig size is incorrect.");
00215
return false;
00216 }
00217
uint64 startOffset = start;
00218
if(contig.
HasRepeatSeqGap()){
00219
if(contig.
GetRepeatSeqGapSize().first > 0){
00220
if(contig.
GetRepeatSeqGapSize().second > 0){
00221 startOffset += (start*contig.
GetRepeatSeqGapSize().second)/contig.
GetRepeatSeqGapSize().first;
00222 startPos+=startOffset;
00223 m_ifstream.seekg(startPos , ios::beg);
00224 readableBytes = contig.
GetSectStartEnd(
gnContigSequence).second - startPos;
00225
return true;
00226 }
00227 }
else{
00228 startPos+=start;
00229 m_ifstream.seekg(startPos , ios::beg);
00230 readableBytes = contig.
GetSectStartEnd(
gnContigSequence).second - startPos;
00231
return true;
00232 }
00233 }
00234 tmpbufsize = tmpbufsize < BUFFER_SIZE ? tmpbufsize : BUFFER_SIZE;
00235
char *tmpbuf =
new char[tmpbufsize];
00236 m_ifstream.read( tmpbuf, tmpbufsize );
00237
if( m_ifstream.eof() ){
00238
ErrorMsg(
"ERROR in gnSEQSource::Read End of file reached!\n");
00239
delete[] tmpbuf;
00240
return false;
00241 }
00242
for(
uint32 i=0; i < tmpbufsize; ++i ){
00243
if( m_pFilter->
IsValid(tmpbuf[i]) ){
00244
if( curLen >= start ){
00245 startPos += i;
00246 m_ifstream.seekg( startPos, ios::beg );
00247 readableBytes = contig.
GetSectStartEnd(
gnContigSequence).second - startPos;
00248
delete[] tmpbuf;
00249
return true;
00250 }
00251 ++curLen;
00252 }
00253 }
00254 startPos += tmpbufsize;
00255
delete[] tmpbuf;
00256 }
00257
return true;
00258 }
00259
00260
00261
00262 void gnSEQSource::BaseCount(
const string& bases,
gnSeqI& a_count,
gnSeqI& c_count,
gnSeqI& g_count,
gnSeqI& t_count,
gnSeqI& other_count){
00263 a_count = 0;
00264 c_count = 0;
00265 g_count = 0;
00266 t_count = 0;
00267 other_count = 0;
00268
for(
uint32 i=0; i < bases.length(); i++){
00269
if((bases[i] ==
'a')||(bases[i] ==
'A'))
00270 a_count++;
00271
else if((bases[i] ==
'c')||(bases[i] ==
'C'))
00272 c_count++;
00273
else if((bases[i] ==
'g')||(bases[i] ==
'G'))
00274 g_count++;
00275
else if((bases[i] ==
't')||(bases[i] ==
'T'))
00276 t_count++;
00277
else
00278 other_count++;
00279 }
00280 }
00281
00282 void gnSEQSource::FormatString(string& data,
uint32 offset,
uint32 width){
00283
00284 string::size_type newline_loc = data.find_first_of(
'\n', 0);
00285
while(newline_loc != string::npos){
00286
if(data[newline_loc-1] ==
'\r')
00287 newline_loc--;
00288 string::size_type text_loc = newline_loc;
00289
while((data[text_loc] ==
' ') ||(data[text_loc] ==
' ')||(data[text_loc] ==
'\n')||(data[text_loc] ==
'\r')){
00290 text_loc++;
00291
if(text_loc+1 == data.length())
00292
break;
00293 }
00294 data = (data.substr(0, newline_loc) +
" " + data.substr(text_loc));
00295 newline_loc = data.find_first_of(
'\n', 0);
00296 }
00297
00298 string output_string =
"";
00299
for(
uint32 charI = 0; charI < data.length();){
00300
00301 string::size_type base_loc = charI;
00302 string append_string;
00303
while(base_loc - charI <= width){
00304 string::size_type space_loc = data.find_first_of(
' ', base_loc+1);
00305
if(space_loc - charI < width)
00306 base_loc = space_loc;
00307
else if(base_loc == charI){
00308
00309 append_string = data.substr(charI, width);
00310 charI+=width;
00311 }
else{
00312 append_string = data.substr(charI, base_loc - charI);
00313 charI = base_loc;
00314 }
00315 }
00316 output_string += string(offset,
' ') + append_string;
00317
if(charI + width < data.length())
00318 output_string +=
"\r\n";
00319 }
00320 data = output_string;
00321 }
00322
00323 boolean gnSEQSource::Write(
gnGenomeSpec *spec,
const string& filename){
00324
ErrorMsg(
"Writing DNAStar SEQ files is not supported at this time. Try again next week.\n");
00325
return false;
00326 }
00327
00328 gnFileContig*
gnSEQSource::GetFileContig(
const uint32 contigI )
const{
00329
if(
m_contigList.size() > contigI)
00330
return m_contigList[contigI];
00331
return NULL;
00332 }
00333
00334
00335 boolean gnSEQSource::ParseStream( istream& fin )
00336 {
00337
00338
uint32 readState = 0;
00339
uint32 lineStart = 0;
00340
int64 gapstart = -1;
00341
00342
uint32 sectionStart = 0;
00343
uint64 streamPos = 0;
00344
uint64 bufReadLen = 0;
00345
uint64 remainingBuffer = 0;
00346
char* buf =
new char[BUFFER_SIZE];
00347
gnFragmentSpec* curFrag = 0;
00348
gnSourceSpec* curSpec = 0;
00349
gnSourceHeader *curHeader;
00350
gnBaseFeature* curFeature;
00351
gnFileContig* curContig = 0;
00352 gnLocation::gnLocationType curBaseLocationType;
00353
gnSeqI curLocationStart;
00354
int32 curStartLength = 0;
00355
int32 curEndLength = 0;
00356 string curLocContig =
"";
00357 string curQualifierName;
00358
uint64 curQualifierStart;
00359 string curContigName =
"";
00360
gnSeqI seqLength = 0;
00361
gnSeqI lineSeqSize = 0;
00362
00363
m_spec =
new gnGenomeSpec();
00364
while( !fin.eof() )
00365 {
00366
if(sectionStart > 0){
00367
if(readState == 15)
00368 sectionStart = bufReadLen;
00369
else if(readState == 16)
00370 sectionStart = lineStart;
00371 remainingBuffer = bufReadLen - sectionStart;
00372 memmove(buf, buf+sectionStart, remainingBuffer);
00373 }
00374
00375 fin.read( buf + remainingBuffer, BUFFER_SIZE - remainingBuffer);
00376 streamPos -= remainingBuffer;
00377 lineStart -= sectionStart;
00378
if(gapstart > 0)
00379 gapstart -= sectionStart;
00380 sectionStart = 0;
00381 bufReadLen = fin.gcount();
00382 bufReadLen += remainingBuffer;
00383
00384
for(
uint32 i=remainingBuffer ; i < bufReadLen ; i++ )
00385 {
00386
char ch = buf[i];
00387
switch( readState )
00388 {
00389
case 0:
00390
00391
if((ch ==
'\n')&&(buf[lineStart] !=
' ')&&(buf[lineStart] !=
' ')){
00392
if(curSpec == NULL){
00393 curSpec =
new gnSourceSpec(
this,
m_spec->
GetSpecListLength());
00394 curFrag =
new gnFragmentSpec();
00395 curFrag->
AddSpec(curSpec);
00396 curSpec->
SetSourceName(m_openString);
00397
m_spec->
AddSpec(curFrag);
00398 }
00399
if(lineStart != sectionStart){
00400
uint32 j =
SEQ_HEADER_NAME_LENGTH-1;
00401
for(; j > 0; j--)
00402
if((buf[sectionStart+j] !=
' ')&&(buf[sectionStart+j] !=
' '))
00403
break;
00404 string header_name = string(buf+sectionStart, j+1);
00405 curHeader =
new gnSourceHeader(
this, header_name, sectionStart + streamPos, lineStart - sectionStart);
00406
00407
if(strncmp(&buf[lineStart],
"LOCUS", 5) == 0)
00408
m_spec->
AddHeader(curHeader);
00409
else
00410 curFrag->
AddHeader(curHeader);
00411 sectionStart = lineStart;
00412 }
00413
00414
if(strncmp(&buf[lineStart],
"FEATURES", 8) == 0){
00415 sectionStart = i + 1;
00416 readState = 1;
00417 }
else if(strncmp(&buf[lineStart],
"ORIGIN", 6) == 0){
00418 curHeader =
new gnSourceHeader(
this, string(
"ORIGIN"), sectionStart + streamPos, i - sectionStart + 1);
00419 curFrag->
AddHeader(curHeader);
00420 curContig =
new gnFileContig();
00421 curContig->
SetName(curContigName);
00422 curContigName =
"";
00423 readState = 13;
00424 }
else if(strncmp(&buf[lineStart],
"LOCUS", 5) == 0){
00425
if(strncmp(&buf[lineStart+
SEQ_LOCUS_CIRCULAR_COLUMN-1],
"circular", 8) == 0)
00426 curFrag->
SetCircular(
true);
00427
uint32 j =
SEQ_LOCUS_NAME_LENGTH + 1;
00428
for(; j > 0; j--)
00429
if((buf[lineStart+
SEQ_LOCUS_NAME_COLUMN+j-1] !=
' ')&&(buf[sectionStart+
SEQ_LOCUS_NAME_COLUMN+j-1] !=
' '))
00430
break;
00431 curContigName = string(buf+lineStart+
SEQ_LOCUS_NAME_COLUMN-1, j+1);
00432 curFrag->
SetName(curContigName);
00433 }
else if(strncmp(&buf[lineStart],
"^^", 2) == 0){
00434
00435
if(curContig == NULL){
00436 curContig =
new gnFileContig();
00437 curContig->
SetName(curContigName);
00438 curContigName =
"";
00439 }
00440 i--;
00441 readState = 14;
00442
break;
00443 }
00444 }
00445
if(ch ==
'\n')
00446 lineStart = i + 1;
00447
break;
00448
case 1:
00449
if((ch ==
' ')||(ch ==
' ')){
00450
break;
00451 }
else if(ch ==
'\n'){
00452 lineStart = i + 1;
00453 sectionStart = i + 1;
00454
break;
00455 }
else if(sectionStart == i){
00456 i--;
00457 readState = 0;
00458 sectionStart = i + 1;
00459
break;
00460 }
else if((i - lineStart ==
SEQ_SUBTAG_COLUMN)||((buf[lineStart]==
' ')&&(i==lineStart+1))){
00461 sectionStart = i;
00462 readState = 2;
00463 }
00464
case 2:
00465
if((ch ==
' ')||(ch ==
' ')){
00466 string featureName(buf+sectionStart, i - sectionStart);
00467 curFeature =
new gnFeature(featureName);
00468 curFrag->
AddFeature(curFeature);
00469 sectionStart = i + 1;
00470 readState = 3;
00471 }
00472
break;
00473
case 3:
00474
if((ch ==
' ')||(ch ==
' ')){
00475
break;
00476 }
else if((ch ==
'\r')||(ch ==
'\n')){
00477 lineStart = i+1;
00478
break;
00479 }
00480 sectionStart = i;
00481 readState = 4;
00482
case 4:
00483
if((ch ==
' ')||(ch ==
' ')||(ch ==
'(')||(ch ==
'.')||(ch==
'^')||(ch==
':')){
00484 string starter(buf+sectionStart, i - sectionStart);
00485
if(ch ==
'('){
00486
if(starter ==
"complement")
00487 curFeature->
SetLocationType(gnLocation::LT_Complement);
00488
else if(starter ==
"order")
00489 curFeature->
SetLocationType(gnLocation::LT_Order);
00490
else if(starter ==
"group")
00491 curFeature->
SetLocationType(gnLocation::LT_Group);
00492
else if(starter ==
"one-of")
00493 curFeature->
SetLocationType(gnLocation::LT_OneOf);
00494 sectionStart = i + 1;
00495
break;
00496 }
else if(ch ==
':'){
00497 curLocContig = starter;
00498 sectionStart = i + 1;
00499
break;
00500 }
00501 curLocationStart = atoi(starter.c_str());
00502 readState = 6;
00503
if(ch ==
'.'){
00504
00505 readState = 5;
00506 sectionStart = i + 1;
00507
break;
00508 }
else if(ch ==
'^'){
00509 curBaseLocationType = gnLocation::LT_BetweenBases;
00510 }
else if((ch ==
' ')||(ch ==
' ')){
00511
00512
gnLocation curLocation(curLocationStart, curLocationStart);
00513 curFeature->
AddLocation(curLocation, curFeature->
GetLocationListLength());
00514 readState = 7;
00515 }
00516 sectionStart = i + 1;
00517
00518 }
else if(ch ==
'<'){
00519 curStartLength = -1;
00520 sectionStart = i + 1;
00521 }
else if(ch ==
'>'){
00522 curStartLength = 1;
00523 sectionStart = i + 1;
00524 }
00525
break;
00526
case 5:
00527
if(ch ==
'.'){
00528 curBaseLocationType = gnLocation::LT_Standard;
00529 readState = 6;
00530 sectionStart = i + 1;
00531
break;
00532 }
00533 curBaseLocationType = gnLocation::LT_OneOf;
00534
case 6:
00535
if(ch ==
'>'){
00536 curEndLength = 1;
00537 sectionStart = i + 1;
00538 }
else if(ch ==
'<'){
00539 curEndLength = -1;
00540 sectionStart = i + 1;
00541 }
else if((ch ==
' ')||(ch ==
' ')||(ch ==
',')){
00542
00543 string ender(buf+sectionStart, i - sectionStart);
00544
gnSeqI curLocationEnd = atoi(ender.c_str());
00545
gnLocation curLocation(curLocationStart, curStartLength, curLocationEnd, curEndLength, curBaseLocationType);
00546 curEndLength = 0;
00547 curStartLength = 0;
00548 curFeature->
AddLocation(curLocation, curFeature->
GetLocationListLength());
00549 readState = ch ==
',' ? 3 : 7;
00550 sectionStart = i+1;
00551 }
00552
break;
00553
case 7:
00554
if((ch !=
' ')&&(ch !=
' ')&&(lineStart == i)){
00555 sectionStart = i;
00556 readState = 0;
00557 i--;
00558 }
else if((ch !=
' ')&&(ch !=
' ')&&((lineStart == i -
SEQ_SUBTAG_COLUMN)||((buf[lineStart]==
' ')&&(i==lineStart+1)))){
00559 sectionStart = i;
00560 readState = 2;
00561 i--;
00562 }
else if(ch ==
','){
00563 sectionStart = i+1;
00564 readState = 3;
00565 }
else if(ch ==
'/'){
00566 sectionStart = i+1;
00567 readState = 8;
00568 }
else if(ch ==
'\n')
00569 lineStart = i + 1;
00570
break;
00571
case 8:
00572
if(ch ==
'='){
00573 curQualifierName = string(buf+sectionStart, i - sectionStart);
00574 readState = 9;
00575 sectionStart = i+1;
00576 }
00577
break;
00578
case 9:
00579
if(ch ==
'"'){
00580 readState = 10;
00581 sectionStart = i;
00582 curQualifierStart = i + streamPos;
00583 }
else if(ch ==
'['){
00584 readState = 11;
00585 sectionStart = i;
00586 }
else if((ch ==
'\r')||(ch ==
'\n')){
00587 curFeature->
AddQualifier(
new gnSourceQualifier(
this, curQualifierName, sectionStart + streamPos, i - sectionStart));
00588 sectionStart = i+1;
00589 readState = 7;
00590 }
00591
break;
00592
case 10:
00593
if(ch ==
'"')
00594 readState = 11;
00595
if(ch ==
'\n'){
00596 lineStart = i + 1;
00597 }
00598
break;
00599
case 11:
00600
if(ch !=
'"'){
00601 curFeature->
AddQualifier(
new gnSourceQualifier(
this, curQualifierName, curQualifierStart, i - sectionStart));
00602 sectionStart = i+1;
00603 readState = 7;
00604
if(ch ==
'\n')
00605 lineStart = i + 1;
00606 }
else
00607 readState = 10;
00608
break;
00609
case 12:
00610
if(ch ==
']'){
00611 curFeature->
AddQualifier(
new gnSourceQualifier(
this, curQualifierName, sectionStart + streamPos, i - sectionStart));
00612 sectionStart = i+1;
00613 readState = 7;
00614 }
00615
break;
00616
case 13:
00617
if(ch ==
'^')
00618 readState = 14;
00619
else{
00620 curContig->
SetSectStart(
gnContigSequence, i + 1 + streamPos);
00621 readState = 16;
00622 }
00623
break;
00624
case 14:
00625
if(ch ==
'\n'){
00626 curContig->
SetRepeatSeqGap(
true);
00627 lineStart = i + 1;
00628 sectionStart = i + 1;
00629 curContig->
SetSectStart(
gnContigSequence, i + 1 + streamPos);
00630 readState = 15;
00631 }
00632
break;
00633
case 15:
00634
if(m_pFilter->
IsValid(ch))
00635 seqLength++;
00636
else
00637 curContig->
SetRepeatSeqGap(
false);
00638
break;
00639
case 16:
00640
if((ch ==
'/')&&(i==lineStart)){
00641 readState = 17;
00642 }
else if(m_pFilter->
IsValid(ch)){
00643 seqLength++;
00644 lineSeqSize++;
00645
if(gapstart >= 0){
00646 curContig->
SetRepeatGapSize(i - gapstart);
00647 gapstart = -1;
00648 }
00649 }
else if(ch ==
'\n'){
00650
if(sectionStart == lineStart){
00651 curContig->
SetRepeatSeqGap(
true);
00652 curContig->
SetRepeatSeqSize(seqLength);
00653 gapstart = i;
00654
for(; gapstart >= lineStart; gapstart--)
00655
if(m_pFilter->
IsValid(buf[gapstart]))
00656
break;
00657 gapstart++;
00658 }
else if(lineSeqSize != curContig->
GetRepeatSeqGapSize().first)
00659 curContig->
SetRepeatSeqGap(
false);
00660 lineSeqSize = 0;
00661 lineStart = i + 1;
00662 }
00663
break;
00664
case 17:
00665
if((ch ==
'\n')&&(buf[lineStart+1] ==
'/')){
00666 curContig->
SetSectEnd(
gnContigSequence, lineStart - 2 + streamPos);
00667 curContig->
SetSeqLength(seqLength);
00668
m_contigList.push_back(curContig);
00669 curContig = 0;
00670 curSpec->
SetLength(seqLength);
00671 curSpec = 0;
00672 seqLength = 0;
00673 lineStart = i + 1;
00674 sectionStart = i + 1;
00675 readState = 0;
00676 }
00677
break;
00678 }
00679 }
00680 streamPos += bufReadLen;
00681 }
00682
if(curContig != 0){
00683 curContig->
SetSectEnd(
gnContigSequence, streamPos - 1);
00684 curContig->
SetSeqLength(seqLength);
00685
m_contigList.push_back(curContig);
00686 curSpec->
SetLength(seqLength);
00687 }
00688
if(curSpec != NULL)
00689
if((curFrag->
GetFeatureListLength() == 0) && (curFrag->
GetHeaderListLength() == 0)
00690 &&(curSpec->
GetLength() == 0)){
00691
m_spec->
RemoveSpec(
m_spec->
GetSpecListLength() - 1);
00692
delete curFrag;
00693 }
00694 m_ifstream.clear();
00695
delete[] buf;
00696
return true;
00697 }