src/gnFilter.cpp

Go to the documentation of this file.
00001 00002 // File: gnFilter.h 00003 // Purpose: Filter for all Sequences 00004 // Description: Filters sequences, translates, reverse complement, converts 00005 // additions, etc. 00006 // Changes: 00007 // Version: libGenome 0.5.1 00008 // Author: Aaron Darling 00009 // Modified by: 00010 // Copyright: (c) Aaron Darling 00011 // Licenses: See COPYING file for details 00013 #include "gn/gnFilter.h" 00014 #include "gn/gnDebug.h" 00015 00016 // public: 00017 const gnFilter *gnFilter::alphabetCharacterFilter(){ 00018 const static gnFilter* t_filt = new gnFilter(alphabetCharacterFilterType); 00019 return t_filt; 00020 } 00021 00022 const gnFilter *gnFilter::numberCharacterFilter(){ 00023 const static gnFilter* t_filt = new gnFilter(numberCharacterFilterType); 00024 return t_filt; 00025 } 00026 00027 00028 const gnFilter *gnFilter::proteinSeqFilter(){ 00029 const static gnFilter* t_filt = new gnFilter(proteinSeqFilterType); 00030 return t_filt; 00031 } 00032 00033 const gnFilter *gnFilter::basicDNASeqFilter(){ 00034 const static gnFilter* t_filt = new gnFilter(basicDNASeqFilterType); 00035 return t_filt; 00036 } 00037 00038 const gnFilter *gnFilter::fullDNASeqFilter(){ 00039 const static gnFilter* t_filt = new gnFilter(fullDNASeqFilterType); 00040 return t_filt; 00041 } 00042 00043 const gnFilter *gnFilter::basicRNASeqFilter(){ 00044 const static gnFilter* t_filt = new gnFilter(basicRNASeqFilterType); 00045 return t_filt; 00046 } 00047 00048 const gnFilter *gnFilter::fullRNASeqFilter(){ 00049 const static gnFilter* t_filt = new gnFilter(fullRNASeqFilterType); 00050 return t_filt; 00051 } 00052 00053 const gnFilter *gnFilter::DNAtoRNAFilter(){ 00054 const static gnFilter* t_filt = new gnFilter(DNAtoRNAFilterType); 00055 return t_filt; 00056 } 00057 00058 const gnFilter *gnFilter::RNAtoDNAFilter(){ 00059 const static gnFilter* t_filt = new gnFilter(RNAtoDNAFilterType); 00060 return t_filt; 00061 } 00062 00063 const gnFilter *gnFilter::DNAComplementFilter(){ 00064 const static gnFilter* t_filt = new gnFilter(DNAComplementFilterType); 00065 return t_filt; 00066 } 00067 00068 const gnFilter *gnFilter::RNAComplementFilter(){ 00069 const static gnFilter* t_filt = new gnFilter(RNAComplementFilterType); 00070 return t_filt; 00071 } 00072 00073 00074 // public: 00075 gnFilter::gnFilter() 00076 { 00077 m_defaultChar = 'n'; 00078 m_rDefaultChar = 'n'; 00079 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i ) 00080 m_pairArray[i] = NO_REVCOMP_CHAR; 00081 } 00082 gnFilter::gnFilter( const gnSeqC defaultChar, const gnSeqC rdefaultChar ) 00083 { 00084 m_defaultChar = defaultChar; 00085 m_rDefaultChar = rdefaultChar; 00086 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i ) 00087 m_pairArray[i] = NO_REVCOMP_CHAR; 00088 } 00089 00090 gnFilter::gnFilter( const gnFilter &sf ) 00091 { 00092 m_name = sf.m_name; 00093 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i ) 00094 m_pairArray[i] = sf.m_pairArray[i]; 00095 m_defaultChar = sf.m_defaultChar; 00096 m_rDefaultChar = sf.m_rDefaultChar; 00097 } 00098 00099 gnFilter::gnFilter( const gnFilterType f_type ){ 00100 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i ) 00101 m_pairArray[i] = NO_REVCOMP_CHAR; 00102 switch(f_type){ 00103 case alphabetCharacterFilterType: 00104 CreateAlphabetCharacterFilter(); 00105 break; 00106 case numberCharacterFilterType: 00107 CreateNumberCharacterFilter(); 00108 break; 00109 case proteinSeqFilterType: 00110 CreateProteinFilter(); 00111 break; 00112 case basicDNASeqFilterType: 00113 CreateBasicDNAFilter(); 00114 break; 00115 case fullDNASeqFilterType: 00116 CreateFullDNAFilter(); 00117 break; 00118 case basicRNASeqFilterType: 00119 CreateBasicRNAFilter(); 00120 break; 00121 case fullRNASeqFilterType: 00122 CreateFullRNAFilter(); 00123 break; 00124 case DNAtoRNAFilterType: 00125 CreateDNAtoRNAFilter(); 00126 break; 00127 case RNAtoDNAFilterType: 00128 CreateRNAtoDNAFilter(); 00129 break; 00130 case DNAComplementFilterType: 00131 CreateDNAComplementFilter(); 00132 break; 00133 case RNAComplementFilterType: 00134 CreateRNAComplementFilter(); 00135 break; 00136 } 00137 } 00138 00139 00140 gnFilter::~gnFilter() 00141 { 00142 } 00143 00144 inline 00145 void gnFilter::Filter( gnSeqC** seq, gnSeqI& len ) const 00146 { 00147 Array<gnSeqC> array_buf( len ); 00148 gnSeqC* tmp = array_buf.data; 00149 gnSeqI c=0; 00150 for(uint32 i=0; i < len; i++) 00151 if(IsValid((*seq)[i])) 00152 tmp[c++] = m_pairArray[(*seq)[i]]; 00153 len = c; 00154 memcpy(*seq, tmp, len); 00155 } 00156 00157 void gnFilter::ReverseFilter( gnSeqC** seq, gnSeqI& len ) const 00158 { 00159 gnSeqC tmp, dum; 00160 uint32 halfLen = len/2; 00161 uint32 end = len - 1; 00162 uint32 curB = 0; 00163 uint32 curE = end; 00164 for( uint32 i=0; i < halfLen ; ++i ) 00165 { 00166 tmp = m_pairArray[(*seq)[i]]; 00167 dum = m_pairArray[(*seq)[ end - i ]]; 00168 if(dum != NO_REVCOMP_CHAR) 00169 (*seq)[ curB++ ] = dum; 00170 if(tmp != NO_REVCOMP_CHAR) 00171 (*seq)[ curE-- ] = tmp; 00172 } 00173 if(len&0x1){ 00174 tmp = m_pairArray[(*seq)[halfLen]]; 00175 if(tmp != NO_REVCOMP_CHAR) 00176 (*seq)[curB++] = tmp; 00177 } 00178 // now for the memmove 00179 if(curE >= curB){ 00180 memmove(*seq+curB, *seq+curE+1, end - curE); 00181 len = end - curE + curB; 00182 } 00183 00184 } 00185 00186 void gnFilter::Filter( string &seq ) const 00187 { 00188 gnSeqI c=0; 00189 for(uint32 i=0; i < seq.length(); i++) 00190 if(IsValid(seq[i])) 00191 seq[c++] = m_pairArray[seq[i]]; 00192 } 00193 00194 void gnFilter::ReverseFilter( string &seq ) const 00195 { 00196 gnSeqC tmp, dum; 00197 uint32 halfLen = seq.length()/2; 00198 uint32 end = seq.length() - 1; 00199 uint32 curB = 0; 00200 uint32 curE = end; 00201 for( uint32 i=0; i < halfLen ; ++i ) 00202 { 00203 tmp = m_pairArray[seq[i]]; 00204 dum = m_pairArray[seq[ end - i ]]; 00205 if(dum != NO_REVCOMP_CHAR) 00206 seq[ curB++ ] = dum; 00207 if(tmp != NO_REVCOMP_CHAR) 00208 seq[ curE-- ] = tmp; 00209 } 00210 if(seq.length()&0x1){ 00211 tmp = m_pairArray[seq[halfLen]]; 00212 if(tmp != NO_REVCOMP_CHAR) 00213 seq[curB++] = tmp; 00214 } 00215 // now for the memmove 00216 if(curE >= curB){ 00217 seq.erase(curB, curE-curB); 00218 } 00219 } 00220 00221 // standard filters 00222 void gnFilter::CreateAlphabetCharacterFilter() 00223 { 00224 SetDefaultChar( 0, 0 ); 00225 SetName( "Alphabet Character Filter" ); 00226 SetPair( 'A', 'a' ); 00227 SetPair( 'B', 'b' ); 00228 SetPair( 'C', 'c' ); 00229 SetPair( 'D', 'd' ); 00230 SetPair( 'E', 'e' ); 00231 SetPair( 'F', 'f' ); 00232 SetPair( 'G', 'g' ); 00233 SetPair( 'H', 'h' ); 00234 SetPair( 'I', 'i' ); 00235 SetPair( 'J', 'j' ); 00236 SetPair( 'K', 'k' ); 00237 SetPair( 'L', 'l' ); 00238 SetPair( 'M', 'm' ); 00239 SetPair( 'N', 'n' ); 00240 SetPair( 'O', 'o' ); 00241 SetPair( 'P', 'p' ); 00242 SetPair( 'Q', 'q' ); 00243 SetPair( 'R', 'r' ); 00244 SetPair( 'S', 's' ); 00245 SetPair( 'T', 't' ); 00246 SetPair( 'U', 'u' ); 00247 SetPair( 'V', 'v' ); 00248 SetPair( 'W', 'w' ); 00249 SetPair( 'X', 'x' ); 00250 SetPair( 'Y', 'y' ); 00251 SetPair( 'Z', 'z' ); 00252 } 00253 00254 void gnFilter::CreateNumberCharacterFilter() 00255 { 00256 SetDefaultChar( 0, 0 ); 00257 SetName( "Number Character Filter" ); 00258 SetSingle( '0' ); 00259 SetSingle( '1' ); 00260 SetSingle( '2' ); 00261 SetSingle( '3' ); 00262 SetSingle( '4' ); 00263 SetSingle( '5' ); 00264 SetSingle( '6' ); 00265 SetSingle( '7' ); 00266 SetSingle( '8' ); 00267 SetSingle( '9' ); 00268 } 00269 00270 void gnFilter::CreateProteinFilter() 00271 { 00272 SetDefaultChar( 'u', 'u' ); 00273 SetName( "Protein Filter" ); 00274 SetSingle( 'A' ); 00275 SetSingle( 'R' ); 00276 SetSingle( 'N' ); 00277 SetSingle( 'D' ); 00278 SetSingle( 'C' ); 00279 SetSingle( 'Q' ); 00280 SetSingle( 'E' ); 00281 SetSingle( 'G' ); 00282 SetSingle( 'H' ); 00283 SetSingle( 'I' ); 00284 SetSingle( 'L' ); 00285 SetSingle( 'K' ); 00286 SetSingle( 'M' ); 00287 SetSingle( 'F' ); 00288 SetSingle( 'P' ); 00289 SetSingle( 'S' ); 00290 SetSingle( 'T' ); 00291 SetSingle( 'W' ); 00292 SetSingle( 'Y' ); 00293 SetSingle( 'V' ); 00294 00295 SetSingle( 'a' ); 00296 SetSingle( 'r' ); 00297 SetSingle( 'n' ); 00298 SetSingle( 'd' ); 00299 SetSingle( 'c' ); 00300 SetSingle( 'q' ); 00301 SetSingle( 'e' ); 00302 SetSingle( 'g' ); 00303 SetSingle( 'h' ); 00304 SetSingle( 'i' ); 00305 SetSingle( 'l' ); 00306 SetSingle( 'k' ); 00307 SetSingle( 'm' ); 00308 SetSingle( 'f' ); 00309 SetSingle( 'p' ); 00310 SetSingle( 's' ); 00311 SetSingle( 't' ); 00312 SetSingle( 'w' ); 00313 SetSingle( 'y' ); 00314 SetSingle( 'v' ); 00315 } 00316 00317 void gnFilter::CreateBasicDNAFilter() 00318 { 00319 SetDefaultChar( 'n', 'n' ); 00320 SetName( "Basic DNA Filter" ); 00321 SetSingle( 'a' ); 00322 SetSingle( 'c' ); 00323 SetSingle( 'g' ); 00324 SetSingle( 't' ); 00325 SetSingle( 'A' ); 00326 SetSingle( 'C' ); 00327 SetSingle( 'G' ); 00328 SetSingle( 'T' ); 00329 SetSingle( 'n' ); 00330 SetSingle( 'N' ); 00331 SetSingle( 'x' ); 00332 SetSingle( 'X' ); 00333 SetSingle( '-' ); 00334 } 00335 void gnFilter::CreateFullDNAFilter() 00336 { 00337 SetDefaultChar( 'n', 'n' ); 00338 SetName( "Full DNA Filter" ); 00339 SetSingle( 'a' ); 00340 SetSingle( 'c' ); 00341 SetSingle( 'g' ); 00342 SetSingle( 't' ); 00343 SetSingle( 'A' ); 00344 SetSingle( 'C' ); 00345 SetSingle( 'G' ); 00346 SetSingle( 'T' ); 00347 SetSingle( 'r' ); 00348 SetSingle( 'y' ); 00349 SetSingle( 'k' ); 00350 SetSingle( 'm' ); 00351 SetSingle( 'b' ); 00352 SetSingle( 'v' ); 00353 SetSingle( 'd' ); 00354 SetSingle( 'h' ); 00355 SetSingle( 'R' ); 00356 SetSingle( 'Y' ); 00357 SetSingle( 'K' ); 00358 SetSingle( 'M' ); 00359 SetSingle( 'B' ); 00360 SetSingle( 'V' ); 00361 SetSingle( 'D' ); 00362 SetSingle( 'H' ); 00363 SetSingle( 's' ); 00364 SetSingle( 'S' ); 00365 SetSingle( 'w' ); 00366 SetSingle( 'W' ); 00367 SetSingle( 'n' ); 00368 SetSingle( 'N' ); 00369 SetSingle( 'x' ); 00370 SetSingle( 'X' ); 00371 SetSingle( '-' ); 00372 } 00373 void gnFilter::CreateBasicRNAFilter() 00374 { 00375 SetDefaultChar( 'n', 'n' ); 00376 SetName( "Basic RNA Filter" ); 00377 SetSingle( 'a' ); 00378 SetSingle( 'c' ); 00379 SetSingle( 'g' ); 00380 SetSingle( 'u' ); 00381 SetSingle( 'A' ); 00382 SetSingle( 'C' ); 00383 SetSingle( 'G' ); 00384 SetSingle( 'U' ); 00385 SetSingle( 'n' ); 00386 SetSingle( 'N' ); 00387 SetSingle( '-' ); 00388 } 00389 void gnFilter::CreateFullRNAFilter() 00390 { 00391 SetDefaultChar( 'n', 'n' ); 00392 SetName( "Full RNA Filter" ); 00393 SetSingle( 'a' ); 00394 SetSingle( 'c' ); 00395 SetSingle( 'g' ); 00396 SetSingle( 'u' ); 00397 SetSingle( 'A' ); 00398 SetSingle( 'C' ); 00399 SetSingle( 'G' ); 00400 SetSingle( 'U' ); 00401 SetSingle( 'r' ); 00402 SetSingle( 'y' ); 00403 SetSingle( 'k' ); 00404 SetSingle( 'm' ); 00405 SetSingle( 'b' ); 00406 SetSingle( 'v' ); 00407 SetSingle( 'd' ); 00408 SetSingle( 'h' ); 00409 SetSingle( 'R' ); 00410 SetSingle( 'Y' ); 00411 SetSingle( 'K' ); 00412 SetSingle( 'M' ); 00413 SetSingle( 'B' ); 00414 SetSingle( 'V' ); 00415 SetSingle( 'D' ); 00416 SetSingle( 'H' ); 00417 SetSingle( 's' ); 00418 SetSingle( 'S' ); 00419 SetSingle( 'w' ); 00420 SetSingle( 'W' ); 00421 SetSingle( 'n' ); 00422 SetSingle( 'N' ); 00423 SetSingle( '-' ); 00424 } 00425 00426 00427 void gnFilter::CreateDNAtoRNAFilter(){ 00428 SetDefaultChar( 'n', 'n' ); 00429 SetName( "Full DNA to RNA Filter" ); 00430 SetSingle( 'a' ); 00431 SetSingle( 'c' ); 00432 SetSingle( 'g' ); 00433 SetPair( 't', 'u' ); 00434 SetSingle( 'A' ); 00435 SetSingle( 'C' ); 00436 SetSingle( 'G' ); 00437 SetPair( 'T', 'U' ); 00438 SetSingle( 'r' ); 00439 SetSingle( 'y' ); 00440 SetSingle( 'k' ); 00441 SetSingle( 'm' ); 00442 SetSingle( 'b' ); 00443 SetSingle( 'v' ); 00444 SetSingle( 'd' ); 00445 SetSingle( 'h' ); 00446 SetSingle( 'R' ); 00447 SetSingle( 'Y' ); 00448 SetSingle( 'K' ); 00449 SetSingle( 'M' ); 00450 SetSingle( 'B' ); 00451 SetSingle( 'V' ); 00452 SetSingle( 'D' ); 00453 SetSingle( 'H' ); 00454 SetSingle( 's' ); 00455 SetSingle( 'S' ); 00456 SetSingle( 'w' ); 00457 SetSingle( 'W' ); 00458 SetSingle( 'n' ); 00459 SetSingle( 'N' ); 00460 SetSingle( '-' ); 00461 } 00462 00463 void gnFilter::CreateRNAtoDNAFilter(){ 00464 SetDefaultChar( 'n', 'n' ); 00465 SetName( "Full RNA to DNA Filter" ); 00466 SetSingle( 'a' ); 00467 SetSingle( 'c' ); 00468 SetSingle( 'g' ); 00469 SetPair( 'u', 't' ); 00470 SetSingle( 'A' ); 00471 SetSingle( 'C' ); 00472 SetSingle( 'G' ); 00473 SetPair( 'U', 'T' ); 00474 SetSingle( 'r' ); 00475 SetSingle( 'y' ); 00476 SetSingle( 'k' ); 00477 SetSingle( 'm' ); 00478 SetSingle( 'b' ); 00479 SetSingle( 'v' ); 00480 SetSingle( 'd' ); 00481 SetSingle( 'h' ); 00482 SetSingle( 'R' ); 00483 SetSingle( 'Y' ); 00484 SetSingle( 'K' ); 00485 SetSingle( 'M' ); 00486 SetSingle( 'B' ); 00487 SetSingle( 'V' ); 00488 SetSingle( 'D' ); 00489 SetSingle( 'H' ); 00490 SetSingle( 's' ); 00491 SetSingle( 'S' ); 00492 SetSingle( 'w' ); 00493 SetSingle( 'W' ); 00494 SetSingle( 'n' ); 00495 SetSingle( 'N' ); 00496 SetSingle( '-' ); 00497 } 00498 00499 void gnFilter::CreateDNAComplementFilter(){ 00500 SetDefaultChar( 'n', 'n' ); 00501 SetName( "Full DNA Complement Filter" ); 00502 SetPair( 'a', 't' ); 00503 SetPair( 'A', 'T' ); 00504 SetPair( 't', 'a' ); 00505 SetPair( 'T', 'A' ); 00506 SetPair( 'c', 'g' ); 00507 SetPair( 'C', 'G' ); 00508 SetPair( 'g', 'c' ); 00509 SetPair( 'G', 'C' ); 00510 SetPair( 'r', 'y' ); 00511 SetPair( 'R', 'Y' ); 00512 SetPair( 'y', 'r' ); 00513 SetPair( 'Y', 'R' ); 00514 SetPair( 'k', 'm' ); 00515 SetPair( 'K', 'M' ); 00516 SetPair( 'm', 'k' ); 00517 SetPair( 'M', 'K' ); 00518 SetSingle( 's' ); 00519 SetSingle( 'S' ); 00520 SetSingle( 'w' ); 00521 SetSingle( 'W' ); 00522 SetPair( 'b', 'v' ); 00523 SetPair( 'B', 'V' ); 00524 SetPair( 'v', 'b' ); 00525 SetPair( 'V', 'B' ); 00526 SetPair( 'd', 'h' ); 00527 SetPair( 'D', 'H' ); 00528 SetPair( 'h', 'd' ); 00529 SetPair( 'H', 'D' ); 00530 SetSingle( 'n' ); 00531 SetSingle( 'N' ); 00532 SetSingle( 'x' ); 00533 SetSingle( 'X' ); 00534 SetSingle( '-' ); 00535 } 00536 00537 void gnFilter::CreateRNAComplementFilter(){ 00538 SetDefaultChar( 'n', 'n' ); 00539 SetName( "Full RNA Complement Filter" ); 00540 SetPair( 'a', 'u' ); 00541 SetPair( 'A', 'U' ); 00542 SetPair( 'u', 'a' ); 00543 SetPair( 'U', 'A' ); 00544 SetPair( 'c', 'g' ); 00545 SetPair( 'C', 'G' ); 00546 SetPair( 'g', 'c' ); 00547 SetPair( 'G', 'C' ); 00548 SetPair( 'r', 'y' ); 00549 SetPair( 'R', 'Y' ); 00550 SetPair( 'y', 'r' ); 00551 SetPair( 'Y', 'R' ); 00552 SetPair( 'k', 'm' ); 00553 SetPair( 'K', 'M' ); 00554 SetPair( 'm', 'k' ); 00555 SetPair( 'M', 'K' ); 00556 SetSingle( 's' ); 00557 SetSingle( 'S' ); 00558 SetSingle( 'w' ); 00559 SetSingle( 'W' ); 00560 SetPair( 'b', 'v' ); 00561 SetPair( 'B', 'V' ); 00562 SetPair( 'v', 'b' ); 00563 SetPair( 'V', 'B' ); 00564 SetPair( 'd', 'h' ); 00565 SetPair( 'D', 'H' ); 00566 SetPair( 'h', 'd' ); 00567 SetPair( 'H', 'D' ); 00568 SetSingle( 'n' ); 00569 SetSingle( 'N' ); 00570 SetSingle( '-' ); 00571 }

Generated on Mon Feb 14 19:28:20 2005 for libGenome by doxygen 1.3.8