00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00012
00013
#ifndef _gnGBKSource_h_
00014
#define _gnGBKSource_h_
00015
00016
#include "gn/gnDefs.h"
00017
00018
#include <string>
00019
#include <fstream>
00020
#include <vector>
00021
#include "gn/gnFileSource.h"
00022
#include "gn/gnFileContig.h"
00023
#include "gn/gnSourceSpec.h"
00024
#include "gn/gnSequence.h"
00025
00026 const uint32 SEQ_COLUMN_WIDTH = 80;
00027 const uint32 SEQ_HEADER_NAME_LENGTH = 11;
00028 const uint32 SEQ_SUBTAG_COLUMN = 5;
00029 const uint32 SEQ_LOCUS_CIRCULAR_COLUMN = 43;
00030 const uint32 SEQ_LOCUS_NAME_COLUMN = 13;
00031 const uint32 SEQ_LOCUS_NAME_LENGTH = 10;
00032 const uint32 SEQ_LOCUS_SIZE_LENGTH = 10;
00033 const uint32 SEQ_LOCUS_DNATYPE_OFFSET = 33;
00034 const uint32 SEQ_LOCUS_DNATYPE_LENGTH = 7;
00035 const uint32 SEQ_LOCUS_DIVCODE_OFFSET = 52;
00036 const uint32 SEQ_LOCUS_DIVCODE_LENGTH = 3;
00037 const uint32 SEQ_LOCUS_DATE_OFFSET = 62;
00038 const uint32 SEQ_LOCUS_DATE_LENGTH = 11;
00039 const uint32 SEQ_FEATURE_LOC_OFFSET = 21;
00040 const uint32 SEQ_BASES_INDEX_END = 9;
00041
00050 class GNDLLEXPORT gnGBKSource :
public gnFileSource
00051 {
00052
public:
00056 gnGBKSource();
00061 gnGBKSource(
const gnGBKSource& s );
00065 ~gnGBKSource();
00069 gnGBKSource*
Clone()
const;
00070
00071
uint32 GetContigListLength()
const;
00072
boolean HasContig(
const string& name )
const;
00073
uint32 GetContigID(
const string& name )
const;
00074 string
GetContigName(
const uint32 i )
const;
00075
gnSeqI GetContigSeqLength(
const uint32 i )
const;
00076
00077
boolean SeqRead(
const gnSeqI start,
char* buf,
gnSeqI& bufLen,
const uint32 contigI=ALL_CONTIGS );
00078
00085
static boolean Write(
gnSequence& seq,
const string& filename);
00092
static boolean Write(
gnBaseSource *source,
const string& filename);
00093
gnGenomeSpec *
GetSpec()
const;
00094
gnFileContig*
GetFileContig(
const uint32 contigI )
const;
00095
private:
00096
boolean SeqSeek(
const gnSeqI start,
const uint32& contigI,
uint64& startPos,
uint64& readableBytes );
00097
boolean SeqStartPos(
const gnSeqI start,
gnFileContig& contig,
uint64& startPos,
uint64& readableBytes );
00098
boolean ParseStream( istream& fin );
00099
00100
static string& Filler(
uint32 length);
00101
static void FormatString(string& data,
uint32 offset,
uint32 width);
00102
00103
00104 gnGenomeSpec *m_spec;
00105 vector< gnFileContig* > m_contigList;
00106 };
00107
00108
template<
class SubSpec >
00109
void WriteHeader(
gnMultiSpec< SubSpec >* spec,
const string& hdr, ofstream& m_ofstream);
00110
00111
00112
inline
00113 gnGBKSource*
gnGBKSource::Clone()
const
00114
{
00115
return new gnGBKSource( *
this );
00116 }
00117
00118
inline
00119 uint32 gnGBKSource::GetContigListLength()
const
00120
{
00121
return m_contigList.size();
00122 }
00123
inline
00124 boolean gnGBKSource::Write(
gnBaseSource *source,
const string& filename){
00125
gnSequence gns(*source->
GetSpec());
00126
return Write(gns, filename);
00127 }
00128
inline
00129 gnGenomeSpec *
gnGBKSource::GetSpec()
const{
00130
return m_spec->
Clone();
00131 }
00132
00133
#endif
00134