00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00012
#include "gn/gnSequence.h"
00013
#include "gn/gnSourceFactory.h"
00014
#include "gn/gnBaseSource.h"
00015
#include "gn/gnGenomeSpec.h"
00016
#include "gn/gnFragmentSpec.h"
00017
#include "gn/gnSourceSpec.h"
00018
#include "gn/gnStringSpec.h"
00019
#include "gn/gnStringHeader.h"
00020
00021
00022 gnSequence::gnSequence( )
00023 {
00024
spec =
new gnGenomeSpec();
00025
comparator =
gnCompare::DNASeqCompare();
00026 }
00027 gnSequence::gnSequence(
const gnSeqC* seq){
00028
spec =
new gnGenomeSpec();
00029
if (seq[0] != 0){
00030
gnFragmentSpec *fragSpec =
new gnFragmentSpec();
00031
spec->
AddSpec(fragSpec);
00032 fragSpec->
AddSpec(
new gnStringSpec(seq));
00033 }
00034
comparator =
gnCompare::DNASeqCompare();
00035 }
00036 gnSequence::gnSequence(
const string& str){
00037
spec =
new gnGenomeSpec();
00038
if (str.length() != 0){
00039
gnFragmentSpec *fragSpec =
new gnFragmentSpec();
00040
spec->
AddSpec(fragSpec);
00041 fragSpec->
AddSpec(
new gnStringSpec(str));
00042 }
00043
comparator =
gnCompare::DNASeqCompare();
00044 }
00045 gnSequence::gnSequence(
const gnGenomeSpec& gngs){
00046
spec = gngs.
Clone();
00047
comparator =
gnCompare::DNASeqCompare();
00048 }
00049 gnSequence::gnSequence(
const gnFragmentSpec& gnfs){
00050
spec =
new gnGenomeSpec();
00051
spec->
AddSpec(gnfs.
Clone());
00052
comparator =
gnCompare::DNASeqCompare();
00053 }
00054 gnSequence::gnSequence(
const gnContigSpec& gcs){
00055
spec =
new gnGenomeSpec();
00056
gnFragmentSpec *fragSpec =
new gnFragmentSpec();
00057 fragSpec->
AddSpec(gcs.
Clone());
00058
comparator =
gnCompare::DNASeqCompare();
00059 }
00060 gnSequence::gnSequence(
gnSeqC *bases,
gnSeqI length){
00061
spec =
new gnGenomeSpec();
00062
if (length > 0){
00063
gnFragmentSpec *fragSpec =
new gnFragmentSpec();
00064
spec->
AddSpec(fragSpec);
00065 fragSpec->
AddSpec(
new gnStringSpec(string(bases, length)));
00066 }
00067
comparator =
gnCompare::DNASeqCompare();
00068 }
00069 gnSequence::gnSequence(
const gnSequence& seq )
00070 {
00071 *
this = seq;
00072 }
00073
00074
00075 gnSequence::~gnSequence()
00076 {
00077
if (
spec != NULL)
00078
delete spec;
00079 }
00080
00081 gnSequence&
gnSequence::operator=(
const gnSequence& seq){
00082
spec = seq.
spec->
Clone();
00083
filter_list = seq.
filter_list;
00084
comparator = seq.
comparator;
00085
return *
this;
00086 }
00087
00088
00089 gnSequence*
gnSequence::Clone()
const
00090
{
00091
return new gnSequence( *
this );
00092 }
00093
00094 int gnSequence::compare(
const string& str)
const{
00095
STACK_TRACE_START
00096
gnSeqI len =
length();
00097
gnSeqI seq_len = str.length();
00098
gnSeqI comparelen = len < seq_len ? len : seq_len;
00099
gnSeqI compared = 0;
00100
while(comparelen > 0){
00101
gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE;
00102 string bases =
ToString(compared, curlen);
00103 string seq_bases = str.substr(compared, curlen);
00104
if(
comparator->
LessThan(bases, seq_bases))
00105
return -1;
00106
else if(
comparator->
LessThan(seq_bases, bases))
00107
return 1;
00108
00109 compared += curlen;
00110 comparelen -= curlen;
00111 }
00112
if(len < seq_len)
00113
return -1;
00114
else if(len > seq_len)
00115
return 1;
00116
return 0;
00117
STACK_TRACE_END
00118 }
00119
00120 int gnSequence::compare(
const gnSequence& seq)
const{
00121
STACK_TRACE_START
00122
gnSeqI len =
length();
00123
gnSeqI seq_len = seq.
length();
00124
gnSeqI comparelen = len < seq_len ? len : seq_len;
00125
gnSeqI compared = 0;
00126
while(comparelen > 0){
00127
gnSeqI curlen = comparelen < BUFFER_SIZE ? comparelen : BUFFER_SIZE;
00128 string bases =
ToString(curlen, compared+1);
00129 string seq_bases = seq.
ToString(curlen, compared+1);
00130
if(
comparator->
LessThan(bases, seq_bases))
00131
return -1;
00132
else if(
comparator->
LessThan(seq_bases, bases))
00133
return 1;
00134
00135 compared+= curlen;
00136 comparelen -= curlen;
00137 }
00138
if(len < seq_len)
00139
return -1;
00140
else if(len > seq_len)
00141
return 1;
00142
return 0;
00143
STACK_TRACE_END
00144 }
00145
00146 void gnSequence::insert(
const gnSeqI offset,
const gnSeqC *bases,
const gnSeqI len){
00147
STACK_TRACE_START
00148 string str(bases, len);
00149
gnStringSpec gpbs(str);
00150
insert(offset, gpbs);
00151
STACK_TRACE_END
00152 }
00153
00154
00155 void gnSequence::insert(
const gnSeqI offset,
const gnGenomeSpec& gpbs){
00156
STACK_TRACE_START
00157
if(offset == 0)
00158
Throw_gnEx(SeqIndexOutOfBounds());
00159
if(offset ==
GNSEQI_END ||
spec->
GetLength() < offset){
00160
for(
uint32 i=0; i < gpbs.
GetSpecListLength(); i++)
00161
spec->
AddSpec(gpbs.
GetSpec(i)->
Clone());
00162
return;
00163 }
00164
00165
gnGenomeSpec* tmpSpec =
spec->
Clone();
00166
00167
00168
gnSeqI real_offset = offset - 1;
00169
gnSeqI endCrop =
spec->
GetLength() - real_offset;
00170
spec->
CropEnd(endCrop);
00171 tmpSpec->
CropStart(real_offset);
00172
00173
insert(
GNSEQI_END, gpbs);
00174
insert(
GNSEQI_END, *tmpSpec);
00175
delete tmpSpec;
00176
STACK_TRACE_END
00177 }
00178
00179 gnSequence const gnSequence::operator+(
const gnSequence& seq)
const{
00180
STACK_TRACE_START
00181
gnSequence rval(*
this);
00182 rval.append(seq);
00183
return rval;
00184
STACK_TRACE_END
00185 }
00186
00187 gnSequence gnSequence::subseq(
const gnSeqI offset,
const gnSeqI length)
const{
00188
STACK_TRACE_START
00189
if(length == 0)
00190
return gnSequence();
00191
if(offset == 0)
00192
Throw_gnEx(SeqIndexOutOfBounds());
00193
00194
gnSequence tmpSeq;
00195
delete tmpSeq.
spec;
00196 tmpSeq.
spec =
spec->
CloneRange(offset - 1, length);
00197
return tmpSeq;
00198
STACK_TRACE_END
00199 }
00200 void gnSequence::erase(
const gnSeqI offset,
const gnSeqI len ){
00201
STACK_TRACE_START
00202
if(offset == 0)
00203
Throw_gnEx(SeqIndexOutOfBounds());
00204
00205
gnSeqI current_length =
length();
00206
if(offset > current_length)
00207
Throw_gnEx(SeqIndexOutOfBounds());
00208
gnSeqI endBase = offset + len - 1 < current_length ? offset + len - 1 : current_length;
00209
gnSeqI startBase = offset - 1;
00210
00211
gnGenomeSpec* tmpSpec =
spec->
CloneRange(endBase,
GNSEQI_END);
00212
uint32 lennard = tmpSpec->
GetLength();
00213
spec->
CropEnd(current_length - startBase);
00214 lennard =
length();
00215
insert(
GNSEQI_END, *tmpSpec);
00216
delete tmpSpec;
00217
STACK_TRACE_END
00218 }
00219 void gnSequence::splitContig(
const gnSeqI splitI,
const uint32 contigI){
00220
STACK_TRACE_START
00221
gnSeqI splitBase = splitI;
00222
gnSeqI current_length =
length();
00223
if(splitI == 0)
00224
Throw_gnEx(SeqIndexOutOfBounds());
00225
if(contigI == ALL_CONTIGS && splitI > current_length)
00226
Throw_gnEx(SeqIndexOutOfBounds());
00227
else
00228
localToGlobal(contigI, splitBase);
00229
gnGenomeSpec* tmpSpec =
spec->
Clone();
00230 tmpSpec->
CropStart(splitBase);
00231
spec->
CropEnd(current_length - splitBase);
00232
00233
insert(
GNSEQI_END, *tmpSpec);
00234
delete tmpSpec;
00235
STACK_TRACE_END
00236 }
00237
00238
00239 istream&
operator>>(istream& is,
gnSequence& gps){
00240 string bases;
00241 is >> bases;
00242 gps.
append(bases);
00243
return is;
00244 }
00245 ostream&
operator<<(ostream& os,
const gnSequence& gps){
00246 os << gps.
ToString();
00247
return os;
00248 }
00249 string
gnSequence::ToString(
const gnSeqI len,
const gnSeqI offset )
const
00250
{
00251
STACK_TRACE_START
00252 string str;
00253
ToString(str, len, offset);
00254
return str;
00255
STACK_TRACE_END
00256 }
00257 boolean gnSequence::ToString( string& str,
const gnSeqI len,
const gnSeqI offset )
const
00258
{
00259
STACK_TRACE_START
00260
gnSeqI real_offset = offset - 1;
00261
gnSeqI m_length =
length();
00262
gnSeqI readSize = len > m_length - real_offset ? m_length - real_offset : len;
00263 Array<char> array_buf( readSize+1 );
00264
char *buf = array_buf.data;
00265
boolean success =
spec->
SeqRead( real_offset, buf, readSize, ALL_CONTIGS);
00266 buf[readSize] =
'\0';
00267 str = buf;
00268
00269
00270 list<const gnBaseFilter*>::const_iterator iter =
filter_list.begin();
00271
for(; iter !=
filter_list.end(); iter++)
00272 (*iter)->Filter(str);
00273
00274
if( success )
00275
return true;
00276
return false;
00277
STACK_TRACE_END
00278 }
00279 boolean gnSequence::ToArray(
gnSeqC* pSeqC,
gnSeqI length,
const gnSeqI offset )
const
00280
{
00281
STACK_TRACE_START
00282
gnSeqI real_offset = offset - 1;
00283
if(offset ==
GNSEQI_END)
00284
return false;
00285
gnSeqC* tmp =
new gnSeqC[length];
00286
boolean success =
spec->
SeqRead( real_offset, tmp, length, ALL_CONTIGS);
00287
00288
00289 list<const gnBaseFilter*>::const_iterator iter =
filter_list.begin();
00290
gnSeqC** tomp = &tmp;
00291
for(; iter !=
filter_list.end(); iter++)
00292 (*iter)->Filter(tomp, length);
00293 memcpy(pSeqC, *tomp, length);
00294
delete[] *tomp;
00295
00296
if( success )
00297
return true;
00298
00299
return false;
00300
STACK_TRACE_END
00301 }
00302 gnSeqC gnSequence::GetSeqC(
const gnSeqI offset )
const
00303
{
00304
STACK_TRACE_START
00305
char block;
00306
gnSeqI readLen = 1;
00307
gnSeqI real_offset = offset - 1;
00308
boolean success =
spec->
SeqRead( real_offset, &block, readLen, ALL_CONTIGS);
00309
00310
00311 list<const gnBaseFilter*>::const_iterator iter =
filter_list.begin();
00312
for(; iter !=
filter_list.end(); iter++)
00313 block = (*iter)->Filter(block);
00314
00315
if( success )
00316
return block;
00317
00318
return GNSEQC_NULL;
00319
STACK_TRACE_END
00320 }
00321 gnSeqC gnSequence::operator[](
const gnSeqI offset )
const
00322
{
00323
STACK_TRACE_START
00324
return GetSeqC( offset );
00325
STACK_TRACE_END
00326 }
00327
00328 gnSeqI gnSequence::contigListSize()
const{
00329
STACK_TRACE_START
00330
return spec->
GetSpecListLength();
00331
STACK_TRACE_END
00332 }
00333 gnSeqI gnSequence::contigListLength()
const{
00334
STACK_TRACE_START
00335
return spec->
GetSpecListLength();
00336
STACK_TRACE_END
00337 }
00338
00339
00340 uint32 gnSequence::contigIndexByBase(
const gnSeqI baseI)
const{
00341
STACK_TRACE_START
00342
return spec->
GetSpecIndexByBase(baseI-1);
00343
STACK_TRACE_END
00344 }
00345 gnSequence gnSequence::contig(
const uint32 contigI)
const{
00346
STACK_TRACE_START
00347
if(contigI == ALL_CONTIGS)
00348
return *
this;
00349
return gnSequence(*
spec->
GetSpec(contigI));
00350
STACK_TRACE_END
00351 }
00352
00353 gnSequence gnSequence::contigByBase(
const gnSeqI baseI)
const{
00354
STACK_TRACE_START
00355
return gnSequence(*
spec->
GetSpecByBase(baseI-1));
00356
STACK_TRACE_END
00357 }
00358 uint32 gnSequence::contigIndexByName( string& contigName)
const{
00359
STACK_TRACE_START
00360
return spec->
GetSpecIndexByName(contigName);
00361
STACK_TRACE_END
00362 }
00363
00364 gnSeqI gnSequence::contigStart(
const uint32 contigI)
const{
00365
STACK_TRACE_START
00366
int64 leafI = contigI;
00367
00368
return spec->
GetSpecStartBase( leafI ) + 1;
00369
STACK_TRACE_END
00370 }
00371
00372 gnSeqI gnSequence::contigLength(
const uint32 contigI)
const{
00373
STACK_TRACE_START
00374
return spec->
GetSpec( contigI )->
GetLength();
00375
STACK_TRACE_END
00376 }
00377
00378 string
gnSequence::contigName(
const uint32 contigI)
const{
00379
STACK_TRACE_START
00380
return spec->
GetSpec(contigI)->
GetName();
00381
STACK_TRACE_END
00382 }
00383
00384 void gnSequence::globalToLocal(
uint32& contigI,
gnSeqI& baseI)
const{
00385
STACK_TRACE_START
00386 contigI =
contigIndexByBase(baseI);
00387 baseI -= (
contigStart(contigI) - 1);
00388
STACK_TRACE_END
00389 }
00390
00391 void gnSequence::localToGlobal(
const uint32 contigI,
gnSeqI& baseI)
const{
00392
STACK_TRACE_START
00393
if(baseI >
contigLength(contigI))
00394
Throw_gnEx(SeqIndexOutOfBounds());
00395 baseI +=
contigStart(contigI) - 1;
00396
STACK_TRACE_END
00397 }
00398
00399 void gnSequence::globalToSource(
uint32& contigI,
gnSeqI& baseI)
const{
00400
STACK_TRACE_START
00401 baseI--;
00402
gnSeqI seq_contig =
spec->
GetSpecIndexByBase(baseI);
00403
gnSeqI new_base = baseI -
spec->
GetSpecStartBase(seq_contig);
00404
gnFragmentSpec *fragment_spec =
spec->
GetSpec(seq_contig);
00405 seq_contig = fragment_spec->
GetSpecIndexByBase(new_base);
00406 new_base = new_base - fragment_spec->
GetSpecStartBase(seq_contig);
00407
gnContigSpec* contig_spec = fragment_spec->
GetSpec(seq_contig);
00408 contigI = contig_spec->
GetSourceContigIndex();
00409
gnSeqI contig_start = contig_spec->
GetStart();
00410
if(contig_spec->
IsReverseComplement()){
00411
gnSeqI source_len = contig_spec->
GetSourceLength();
00412
00413
00414
00415
00416
00417
00418 baseI = (contig_start - new_base - 1 + source_len) % source_len;
00419 }
else
00420 baseI = contig_start + new_base + 1;
00421
STACK_TRACE_END
00422 }
00423
00424 void gnSequence::localToSource(
uint32& contigI,
gnSeqI& baseI)
const{
00425
STACK_TRACE_START
00426
localToGlobal(contigI, baseI);
00427
globalToSource(contigI, baseI);
00428
STACK_TRACE_END
00429 }
00430
00431 gnSequence gnSequence::contigByName( string& contigName)
const{
00432
STACK_TRACE_START
00433
uint32 contigIndex =
spec->
GetSpecIndexByName(contigName);
00434
return gnSequence(*
spec->
GetSpec(contigIndex));
00435
STACK_TRACE_END
00436 }
00437
00438 void gnSequence::setContigName(
const uint32 contigI,
const string& contig_name) {
00439
STACK_TRACE_START
00440
if(contigI == ALL_CONTIGS)
00441
spec->
SetName(contig_name);
00442
else
00443
spec->
GetSpec(contigI)->
SetName(contig_name);
00444
STACK_TRACE_END
00445 }
00446
00447 void gnSequence::setReverseComplement(
const boolean revComp,
const uint32 contigI){
00448
STACK_TRACE_START
00449
if(contigI == ALL_CONTIGS)
00450
spec->
SetReverseComplement(revComp);
00451
else{
00452
gnFragmentSpec*
contig =
spec->
GetSpec(contigI);
00453 contig->
SetReverseComplement(revComp);
00454 }
00455
STACK_TRACE_END
00456 }
00457
00458 boolean gnSequence::isReverseComplement(
const uint32 contigI ){
00459
STACK_TRACE_START
00460
if(contigI == ALL_CONTIGS)
00461
return spec->
IsReverseComplement();
00462
gnFragmentSpec*
contig =
spec->
GetSpec(contigI);
00463
return contig->
IsReverseComplement();
00464
STACK_TRACE_END
00465 }
00466
00467 uint32 gnSequence::getHeaderListLength(
const uint32 contigI)
const{
00468
STACK_TRACE_START
00469
if(contigI == ALL_CONTIGS)
00470
return spec->
GetHeaderListLength();
00471
else{
00472
return spec->
GetSpec(contigI)->
GetHeaderListLength();
00473 }
00474
STACK_TRACE_END
00475 }
00476
00477 gnBaseHeader*
gnSequence::getHeader(
const uint32 contigI,
const uint32 headerI)
const{
00478
STACK_TRACE_START
00479
if(contigI == ALL_CONTIGS)
00480
return spec->
GetHeader(headerI);
00481
else{
00482
return spec->
GetSpec(contigI)->
GetHeader(headerI);
00483 }
00484
STACK_TRACE_END
00485 }
00486
00487 void gnSequence::addHeader(
const uint32 contigI,
gnBaseHeader* header,
const uint32 headerI){
00488
STACK_TRACE_START
00489
if(contigI == ALL_CONTIGS)
00490
spec->
AddHeader(header, headerI);
00491
else{
00492
spec->
GetSpec(contigI)->
AddHeader(header, headerI);
00493 }
00494
STACK_TRACE_END
00495 }
00496
00497 void gnSequence::removeHeader(
const uint32 contigI,
const uint32 headerI){
00498
STACK_TRACE_START
00499
if(contigI == ALL_CONTIGS)
00500
spec->
RemoveHeader(headerI);
00501
else
00502
spec->
GetSpec(contigI)->
RemoveHeader(headerI);
00503
STACK_TRACE_END
00504 }
00505
00506 void gnSequence::merge(
const gnSeqI startBase,
const gnSeqI endBase){
00507
STACK_TRACE_START
00508
STACK_TRACE_END
00509 }
00510
00511 void gnSequence::mergeContigs(
const uint32 startC,
const uint32 endC){
00512
STACK_TRACE_START
00513
spec->
MergeFragments(startC, endC);
00514
STACK_TRACE_END
00515 }
00516
00517 bool gnSequence::LoadSource(
const string sourcename){
00518
STACK_TRACE_START
00519
gnSourceFactory* m_sourceFactory =
gnSourceFactory::GetSourceFactory();
00520
gnBaseSource *m_pSource = m_sourceFactory->AddSource(sourcename,
true);
00521
if (m_pSource!=NULL)
00522 {
00523
if(
spec != NULL)
00524
delete spec;
00525
spec = m_pSource->
GetSpec();
00526
return true;
00527 }
00528
return false;
00529
STACK_TRACE_END
00530 }
00531
00532 gnSeqI gnSequence::find(
const gnSequence& search,
const gnSeqI offset)
const{
00533
STACK_TRACE_START
00534
00535 string searchIn=ToString();
00536 string find= search.ToString();
00537 string::size_type pos = searchIn.find(find, offset);
00538
if (pos == string::npos)
00539
return GNSEQI_ERROR;
00540
else{
00541
return pos++;
00542 }
00543
STACK_TRACE_END
00544 }