src/gnFastTranslator.cpp

Go to the documentation of this file.
00001 00002 // File: gnFastTranslator.h 00003 // Purpose: Filter for all Sequences 00004 // Description: translates, converts sequence 00005 // Changes: 00006 // Version: libGenome 0.5.1 00007 // Author: Aaron Darling 00008 // Modified by: 00009 // Copyright: (c) Aaron Darling 00010 // Licenses: See COPYING file for details 00012 #include "gn/gnFastTranslator.h" 00013 #include <iostream> 00014 00015 // static data access, avoids static initialization order fiasco 00016 const gnFastTranslator *gnFastTranslator::ProteinDNATranslator(){ 00017 const static gnFastTranslator* t_trans = new gnFastTranslator(ProteinDNATranslatorType); 00018 return t_trans; 00019 } 00020 const gnFastTranslator *gnFastTranslator::DNAProteinTranslator(){ 00021 const static gnFastTranslator* t_trans = new gnFastTranslator(DNAProteinTranslatorType); 00022 return t_trans; 00023 } 00024 00025 // public: 00026 gnFastTranslator::gnFastTranslator() 00027 { 00028 use_default = false; 00029 m_defaultChar = 0; 00030 } 00031 00032 gnFastTranslator::gnFastTranslator( const gnFastTranslator &sf ) 00033 { 00034 m_name = sf.m_name; 00035 use_default = sf.use_default; 00036 m_defaultChar = sf.m_defaultChar; 00037 m_transCache = sf.m_transCache; 00038 } 00039 gnFastTranslator::gnFastTranslator( gnTranslatorType t_type ) 00040 { 00041 use_default = false; 00042 m_defaultChar = 0; 00043 switch(t_type){ 00044 case ProteinDNATranslatorType: 00045 CacheTranslator(gnTranslator::ProteinDNATranslator(), "FLIMVPTAY.HQNKDECGSR", 1); 00046 break; 00047 case DNAProteinTranslatorType: 00048 CacheTranslator(gnTranslator::DNAProteinTranslator(), "ACGTRYKMBVDHSWNX", 3); 00049 break; 00050 } 00051 } 00052 00053 // gnSeqC 00054 gnSeqC gnFastTranslator::Filter( const gnSeqC ch ) const{ 00055 /* for(uint32 i=0; i < m_inputTable.size(); i++){ 00056 if(m_inputTable[i].length() == 1) 00057 if(compare->Contains(m_inputTable[i][0], ch)) 00058 return m_outputTable[i][0]; 00059 } 00060 */ return m_defaultChar; 00061 } 00062 00063 void gnFastTranslator::Filter( gnSeqC** seq, gnSeqI& len ) const{ 00064 /* uint32 curpos = 0; 00065 string output; 00066 while(curpos < len){ 00067 uint32 i=0; 00068 for(; i < m_inputTable.size(); i++){ 00069 //don't compare if there aren't enough chars 00070 uint32 curlen = m_inputTable[i].length(); 00071 if(len - curpos < curlen) 00072 continue; 00073 if(compare->Contains(m_inputTable[i].data(), *seq + curpos, curlen)){ 00074 output += m_outputTable[i]; 00075 curpos += curlen; 00076 break; 00077 } 00078 } 00079 if(i == m_inputTable.size()){ 00080 //no match was found. 00081 if(use_default) //fill with the default char? 00082 output += m_defaultChar; 00083 curpos++; 00084 } 00085 } 00086 if(output.length() > len){ 00087 delete[] *seq; 00088 *seq = new gnSeqC[output.length()]; 00089 } 00090 len = output.length(); 00091 memcpy(*seq, output.data(), len); 00092 */} 00093 // string 00094 void gnFastTranslator::Filter( string &seq ) const{ 00095 uint32 curpos = 0, outpos = 0; 00096 uint32 len = seq.length(); 00097 uint32 width = m_transCache.begin()->first.length(); 00098 uint32 out_width = m_transCache.begin()->second.length(); 00099 uint32 out_size = (seq.length() / width) * out_width + seq.length() % width + 1; 00100 gnSeqC* output_array = new gnSeqC[out_size]; 00101 output_array[out_size-1] = 0; 00102 string seq_upper; 00103 while(curpos < len){ 00104 //transform to upper case 00105 seq_upper = seq.substr(curpos, width); 00106 for(uint32 i=0; i < seq_upper.size(); i++) 00107 seq_upper[i] = toupper(seq_upper[i]); 00108 00109 map<string, string>::const_iterator iter = m_transCache.find(seq_upper); 00110 00111 if(iter == m_transCache.end()){ 00112 //no match was found. 00113 if(use_default) //fill with the default char? 00114 output_array[curpos] = m_defaultChar; 00115 curpos++; 00116 }else{ 00117 iter->second.copy(output_array + outpos, out_width); 00118 curpos += width; 00119 outpos += out_width; 00120 } 00121 } 00122 seq = output_array; 00123 } 00124 00125 void gnFastTranslator::CacheTranslator(const gnTranslator* tranny, string inputs, const gnSeqI input_width){ 00126 string cur_input; 00127 string cur_trans; 00128 vector<gnSeqI> index; 00129 gnSeqI cur_index = input_width; 00130 00131 //fill the index array with input_width 0's 00132 for(gnSeqI curI = 0; curI < input_width; curI++) 00133 index.push_back(0); 00134 00135 while(true){ 00136 //ensure the validity of our indices 00137 cur_index = input_width - 1; 00138 while(index[cur_index] == inputs.length()){ 00139 if(cur_index == 0){ 00140 return; 00141 } 00142 index[cur_index] = 0; 00143 cur_index--; 00144 index[cur_index]++; 00145 continue; 00146 } 00147 00148 //create a sequence to cache. 00149 for(gnSeqI i = 0; i < input_width; i++){ 00150 cur_input += inputs[index[i]]; 00151 } 00152 cur_trans = cur_input; 00153 tranny->Filter(cur_trans); 00154 m_transCache[cur_input] = cur_trans; 00155 // m_transCache.insert(map<string, string>::value_type(cur_input, cur_trans)); 00156 // prepare for next time thru the loop 00157 cur_input = ""; 00158 index[input_width - 1]++; 00159 } 00160 }

Generated on Mon Feb 14 19:28:20 2005 for libGenome by doxygen 1.3.8