src/gnTranslator.cpp

Go to the documentation of this file.
00001 00002 // File: gnTranslator.h 00003 // Purpose: Filter for all Sequences 00004 // Description: translates, converts sequence 00005 // Changes: 00006 // Version: libGenome 0.5.1 00007 // Author: Aaron Darling 00008 // Modified by: 00009 // Copyright: (c) Aaron Darling 00010 // Licenses: See COPYING file for details 00012 #include "gn/gnTranslator.h" 00013 #include "gn/gnCompare.h" 00014 00015 // static data access, avoids static initialization order fiasco 00016 const gnTranslator *gnTranslator::ProteinDNATranslator(){ 00017 const static gnTranslator* t_trans = new gnTranslator(ProteinDNATranslatorType); 00018 return t_trans; 00019 } 00020 const gnTranslator *gnTranslator::ProteinRNATranslator(){ 00021 const static gnTranslator* t_trans = new gnTranslator(ProteinRNATranslatorType); 00022 return t_trans; 00023 } 00024 const gnTranslator *gnTranslator::DNAProteinTranslator(){ 00025 const static gnTranslator* t_trans = new gnTranslator(DNAProteinTranslatorType); 00026 return t_trans; 00027 } 00028 const gnTranslator *gnTranslator::RNAProteinTranslator(){ 00029 const static gnTranslator* t_trans = new gnTranslator(RNAProteinTranslatorType); 00030 return t_trans; 00031 } 00032 00033 // public: 00034 gnTranslator::gnTranslator() 00035 { 00036 use_default = false; 00037 m_defaultChar = 0; 00038 m_defaultInputWidth = 1; 00039 } 00040 00041 gnTranslator::gnTranslator( const gnTranslator &sf ) 00042 { 00043 m_name = sf.m_name; 00044 use_default = sf.use_default; 00045 m_defaultChar = sf.m_defaultChar; 00046 compare = sf.compare; 00047 m_inputTable = sf.m_inputTable; 00048 m_outputTable = sf.m_outputTable; 00049 m_defaultInputWidth = sf.m_defaultInputWidth; 00050 } 00051 gnTranslator::gnTranslator( gnTranslatorType t_type ) 00052 { 00053 use_default = false; 00054 m_defaultChar = 0; 00055 switch(t_type){ 00056 case ProteinDNATranslatorType: 00057 CreateProteinDNATranslator(); 00058 break; 00059 case ProteinRNATranslatorType: 00060 CreateProteinRNATranslator(); 00061 break; 00062 case DNAProteinTranslatorType: 00063 CreateDNAProteinTranslator(); 00064 break; 00065 case RNAProteinTranslatorType: 00066 CreateRNAProteinTranslator(); 00067 break; 00068 } 00069 } 00070 00071 // gnSeqC 00072 gnSeqC gnTranslator::Filter( const gnSeqC ch ) const{ 00073 for(uint32 i=0; i < m_inputTable.size(); i++){ 00074 if(m_inputTable[i].length() == 1) 00075 if(compare->Contains(m_inputTable[i][0], ch)) 00076 return m_outputTable[i][0]; 00077 } 00078 return m_defaultChar; 00079 } 00080 00081 void gnTranslator::Filter( gnSeqC** seq, gnSeqI& len ) const{ 00082 uint32 curpos = 0; 00083 string output; 00084 while(curpos < len){ 00085 uint32 i=0; 00086 for(; i < m_inputTable.size(); i++){ 00087 //don't compare if there aren't enough chars 00088 uint32 curlen = m_inputTable[i].length(); 00089 if(len - curpos < curlen) 00090 continue; 00091 if(compare->Contains(m_inputTable[i].data(), *seq + curpos, curlen)){ 00092 output += m_outputTable[i]; 00093 curpos += curlen; 00094 break; 00095 } 00096 } 00097 if(i == m_inputTable.size()){ 00098 //no match was found. 00099 if(use_default) //fill with the default char? 00100 output += m_defaultChar; 00101 curpos += m_defaultInputWidth; 00102 } 00103 } 00104 if(output.length() > len){ 00105 delete[] *seq; 00106 *seq = new gnSeqC[output.length()]; 00107 } 00108 len = output.length(); 00109 memcpy(*seq, output.data(), len); 00110 } 00111 // string 00112 void gnTranslator::Filter( string &seq ) const{ 00113 uint32 curpos = 0; 00114 uint32 len = seq.length(); 00115 string output; 00116 while(curpos < len){ 00117 uint32 i=0; 00118 for(; i < m_inputTable.size(); i++){ 00119 //don't compare if there aren't enough chars 00120 uint32 curlen = m_inputTable[i].length(); 00121 if(len - curpos < curlen) 00122 continue; 00123 if(compare->Contains(m_inputTable[i], seq.substr(curpos, curlen))){ 00124 output += m_outputTable[i]; 00125 curpos += curlen; 00126 break; 00127 } 00128 } 00129 if(i == m_inputTable.size()){ 00130 //no match was found. 00131 if(use_default) //fill with the default char? 00132 output += m_defaultChar; 00133 curpos += m_defaultInputWidth; 00134 } 00135 } 00136 seq = output; 00137 } 00138 00139 // fill map 00140 void gnTranslator::SetPair( const string& ch1, const string& ch2 ) 00141 { 00142 if(ch1.length() == 0) 00143 return; //cant have an empty input, empty output is ok 00144 00145 m_inputTable.push_back(ch1); 00146 m_outputTable.push_back(ch2); 00147 } 00148 00149 void gnTranslator::RemovePair( const string& ch ) 00150 { 00151 for(uint32 i=0; i < m_inputTable.size(); i++){ 00152 if(m_inputTable[i] == ch){ 00153 m_inputTable.erase(m_inputTable.begin()+i); 00154 m_outputTable.erase(m_outputTable.begin()+i); 00155 } 00156 } 00157 } 00158 00159 // standard comparators 00160 void gnTranslator::CreateProteinDNATranslator(){ 00161 SetName( "Protein to DNA Translator" ); 00162 00163 SetDefaultChar('X'); 00164 SetCompare(gnCompare::ProteinSeqCompare()); 00165 m_defaultInputWidth = 1; 00166 SetPair( "F", "TTY" ); 00167 SetPair( "L", "YTX" ); //fix this somehow. how? 00168 SetPair( "I", "ATH" ); 00169 SetPair( "M", "ATG" ); 00170 SetPair( "V", "GTX" ); 00171 SetPair( "P", "CCX" ); 00172 SetPair( "T", "ACX" ); 00173 SetPair( "A", "GCX" ); 00174 SetPair( "Y", "TAY" ); 00175 SetPair( ".", "TRR" );//fix this somehow. how? 00176 SetPair( "H", "CAY" ); 00177 SetPair( "Q", "CAR" ); 00178 SetPair( "N", "AAY" ); 00179 SetPair( "K", "AAR" ); 00180 SetPair( "D", "GAY" ); 00181 SetPair( "E", "GAR" ); 00182 SetPair( "C", "TGY" ); 00183 SetPair( "W", "TGG" ); 00184 SetPair( "G", "GGX" ); 00185 00186 SetPair( "S", "TCX" ); 00187 SetPair( "S", "AGY"); 00188 SetPair( "R", "CGX"); 00189 SetPair( "R", "AGR"); 00190 } 00191 00192 void gnTranslator::CreateProteinRNATranslator(){ 00193 SetName( "Protein to RNA Translator" ); 00194 SetDefaultChar('X'); 00195 SetCompare(gnCompare::ProteinSeqCompare()); 00196 m_defaultInputWidth = 1; 00197 00198 SetPair( "F", "UUY" ); 00199 SetPair( "L", "YUX" ); //fix this somehow. how? 00200 SetPair( "I", "AUH" ); 00201 SetPair( "M", "AUG" ); 00202 SetPair( "V", "GUX" ); 00203 SetPair( "P", "CCX" ); 00204 SetPair( "U", "ACX" ); 00205 SetPair( "A", "GCX" ); 00206 SetPair( "Y", "UAY" ); 00207 SetPair( ".", "URR" );//fix this somehow. how? 00208 SetPair( "H", "CAY" ); 00209 SetPair( "Q", "CAR" ); 00210 SetPair( "N", "AAY" ); 00211 SetPair( "K", "AAR" ); 00212 SetPair( "D", "GAY" ); 00213 SetPair( "E", "GAR" ); 00214 SetPair( "C", "UGY" ); 00215 SetPair( "W", "UGG" ); 00216 SetPair( "G", "GGX" ); 00217 00218 SetPair( "S", "UCX" ); 00219 SetPair( "S", "AGY"); 00220 SetPair( "R", "CGX"); 00221 SetPair( "R", "AGR"); 00222 } 00223 00224 void gnTranslator::CreateDNAProteinTranslator(){ 00225 SetName( "DNA to Protein Translator" ); 00226 SetDefaultChar('X'); 00227 SetCompare(gnCompare::DNASeqCompare()); 00228 m_defaultInputWidth = 3; 00229 use_default = true; 00230 00231 SetPair( "TTY", "F" ); 00232 SetPair( "CTX", "L" ); 00233 SetPair( "TTR", "L" ); 00234 SetPair( "ATH", "I" ); 00235 SetPair( "ATG", "M" ); 00236 SetPair( "GTX", "V" ); 00237 SetPair( "CCX", "P" ); 00238 SetPair( "ACX", "T" ); 00239 SetPair( "GCX", "A" ); 00240 SetPair( "TAY", "Y" ); 00241 SetPair( "TGG", "W" ); 00242 SetPair( "TGA", "." ); 00243 SetPair( "TAR", "." ); 00244 SetPair( "CAY", "H" ); 00245 SetPair( "CAR", "Q" ); 00246 SetPair( "AAY", "N" ); 00247 SetPair( "AAR", "K" ); 00248 SetPair( "GAY", "D" ); 00249 SetPair( "GAR", "E" ); 00250 SetPair( "TGY", "C" ); 00251 SetPair( "GGX", "G" ); 00252 00253 SetPair( "TCX", "S" ); 00254 SetPair( "AGY", "S" ); 00255 SetPair( "CGX", "R" ); 00256 SetPair( "AGR", "R" ); 00257 00258 SetPair( "tty", "F" ); 00259 SetPair( "ctx", "L" ); 00260 SetPair( "ttr", "L" ); 00261 SetPair( "ath", "I" ); 00262 SetPair( "atg", "M" ); 00263 SetPair( "gtx", "V" ); 00264 SetPair( "ccx", "P" ); 00265 SetPair( "acx", "T" ); 00266 SetPair( "gcx", "A" ); 00267 SetPair( "tay", "Y" ); 00268 SetPair( "tgg", "W" ); 00269 SetPair( "tga", "." ); 00270 SetPair( "tar", "." ); 00271 SetPair( "cay", "H" ); 00272 SetPair( "car", "Q" ); 00273 SetPair( "aay", "N" ); 00274 SetPair( "aar", "K" ); 00275 SetPair( "gay", "D" ); 00276 SetPair( "gar", "E" ); 00277 SetPair( "tgy", "C" ); 00278 SetPair( "ggx", "G" ); 00279 00280 SetPair( "tcx", "S" ); 00281 SetPair( "agy", "S" ); 00282 SetPair( "cgx", "R" ); 00283 SetPair( "agr", "R" ); 00284 00285 } 00286 00287 void gnTranslator::CreateRNAProteinTranslator(){ 00288 SetName( "RNA to Protein Translator" ); 00289 SetDefaultChar('X'); 00290 SetCompare(gnCompare::RNASeqCompare()); 00291 m_defaultInputWidth = 3; 00292 use_default = true; 00293 00294 SetPair( "UUY", "F" ); 00295 SetPair( "CUX", "L" ); 00296 SetPair( "UUR", "L" ); 00297 SetPair( "AUH", "I" ); 00298 SetPair( "AUG", "M" ); 00299 SetPair( "GUX", "V" ); 00300 SetPair( "CCX", "P" ); 00301 SetPair( "ACX", "T" ); 00302 SetPair( "GCX", "A" ); 00303 SetPair( "UAY", "Y" ); 00304 SetPair( "UGG", "W" ); 00305 SetPair( "UGA", "." ); 00306 SetPair( "UAR", "." ); 00307 SetPair( "CAY", "H" ); 00308 SetPair( "CAR", "Q" ); 00309 SetPair( "AAY", "N" ); 00310 SetPair( "AAR", "K" ); 00311 SetPair( "GAY", "D" ); 00312 SetPair( "GAR", "E" ); 00313 SetPair( "UGY", "C" ); 00314 SetPair( "GGX", "G" ); 00315 00316 SetPair( "UCX", "S" ); 00317 SetPair( "AGY", "S" ); 00318 SetPair( "CGX", "R" ); 00319 SetPair( "AGR", "R" ); 00320 00321 00322 SetPair( "uuy", "F" ); 00323 SetPair( "cux", "L" ); 00324 SetPair( "uur", "L" ); 00325 SetPair( "auh", "I" ); 00326 SetPair( "aug", "M" ); 00327 SetPair( "gux", "V" ); 00328 SetPair( "ccx", "P" ); 00329 SetPair( "acx", "T" ); 00330 SetPair( "gcx", "A" ); 00331 SetPair( "uay", "Y" ); 00332 SetPair( "ugg", "W" ); 00333 SetPair( "uga", "." ); 00334 SetPair( "uar", "." ); 00335 SetPair( "cay", "H" ); 00336 SetPair( "car", "Q" ); 00337 SetPair( "aay", "N" ); 00338 SetPair( "aar", "K" ); 00339 SetPair( "gay", "D" ); 00340 SetPair( "gar", "E" ); 00341 SetPair( "ugy", "C" ); 00342 SetPair( "ggx", "G" ); 00343 00344 SetPair( "ucx", "S" ); 00345 SetPair( "agy", "S" ); 00346 SetPair( "cgx", "R" ); 00347 SetPair( "agr", "R" ); 00348 }

Generated on Mon Feb 14 19:28:21 2005 for libGenome by doxygen 1.3.8