MpegCodec.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2004 MUSIC TECHNOLOGY GROUP (MTG)
00003  *                         UNIVERSITAT POMPEU FABRA
00004  *
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #if USE_MAD != 1
00023 #error with_mad was not set to 1 with scons, but you are including files that require this. Please fix your settings.cfg
00024 #endif
00025 
00026 #if USE_ID3 != 1
00027 #error with_id3 was not set to 1 with scons, but you are including files that require this. Please fix your settings.cfg
00028 #endif
00029 
00030 
00031 #include "MpegCodec.hxx"
00032 #include "AudioFileFormats.hxx"
00033 #include "AudioFile.hxx"
00034 #include "AudioFileHeader.hxx"
00035 #include "MpegBitstream.hxx"
00036 #include "MpegAudioStream.hxx"
00037 #include <mad.h>
00038 #include <id3/tag.h>
00039 #include <cstdio>
00040 #include <iostream>
00041 #include <sys/types.h>
00042 #include <sys/stat.h>
00043 
00044 namespace CLAM
00045 {
00046 
00047 namespace AudioCodecs
00048 {
00049 
00050 
00051 /* XING parsing is from the MAD winamp input plugin */
00052 /* Ripped mercilessly from mpg321 */
00053 
00054         struct xing {
00055                 int flags;
00056                 unsigned long frames;
00057                 unsigned long bytes;
00058                 unsigned char toc[100];
00059                 long scale;
00060         };
00061         
00062         enum {
00063                 XING_FRAMES = 0x0001,
00064                 XING_BYTES  = 0x0002,
00065                 XING_TOC    = 0x0004,
00066                 XING_SCALE  = 0x0008
00067         };
00068         
00069 # define XING_MAGIC     (('X' << 24) | ('i' << 16) | ('n' << 8) | 'g')
00070         
00071         static
00072         int parse_xing(struct xing *xing, struct mad_bitptr ptr, unsigned int bitlen)
00073         {
00074                 if (bitlen < 64 || mad_bit_read(&ptr, 32) != XING_MAGIC)
00075                         goto fail;
00076                 
00077                 xing->flags = mad_bit_read(&ptr, 32);
00078                 bitlen -= 64;
00079                 
00080                 if (xing->flags & XING_FRAMES) {
00081                         if (bitlen < 32)
00082                                 goto fail;
00083                         
00084                         xing->frames = mad_bit_read(&ptr, 32);
00085                         bitlen -= 32;
00086                 }
00087                 
00088                 if (xing->flags & XING_BYTES) {
00089                         if (bitlen < 32)
00090                                 goto fail;
00091                         
00092                         xing->bytes = mad_bit_read(&ptr, 32);
00093                         bitlen -= 32;
00094                 }
00095                 
00096                 if (xing->flags & XING_TOC) {
00097                         int i;
00098                         
00099                         if (bitlen < 800)
00100                                 goto fail;
00101                         
00102                         // MRJ: We just need the 8 least significant bits
00103                         for (i = 0; i < 100; ++i)
00104                                 xing->toc[i] = (unsigned char)mad_bit_read(&ptr, 8);
00105                         
00106                         bitlen -= 800;
00107                 }
00108                 
00109                 if (xing->flags & XING_SCALE) {
00110                         if (bitlen < 32)
00111                                 goto fail;
00112                         
00113                         xing->scale = mad_bit_read(&ptr, 32);
00114                         bitlen -= 32;
00115                 }
00116                 
00117                 return 1;
00118                 
00119         fail:
00120                 xing->flags = 0;
00121                 return 0;
00122         }
00123 
00124 
00125 
00126         MpegCodec::MpegCodec()
00127         {
00128 
00129         }
00130 
00131         MpegCodec::~MpegCodec()
00132         {
00133         }
00134 
00135         MpegCodec& MpegCodec::Instantiate()
00136         {
00137                 static MpegCodec theInstance;
00138 
00139                 return theInstance;
00140         }
00141 
00142         bool MpegCodec::IsReadable( std::string uri ) const
00143         {
00144                 // We will just check there is a Mpeg frame in the given
00145                 // file
00146 
00147                 FILE* handle = fopen( uri.c_str(), "rb" );
00148 
00149                 if ( !handle ) // File doesn't exists / not readable
00150                         return false;
00151 
00152                 MpegBitstream bitstream( handle );
00153 
00154                 bitstream.Init();
00155 
00156                 bool   foundSomeMpegFrame = false;
00157                 //Unused variable: int    status = 0;
00158 
00159                 while( !foundSomeMpegFrame 
00160                        && !bitstream.EOS() && !bitstream.FatalError() )
00161                         foundSomeMpegFrame = bitstream.NextFrame();
00162 
00163                 bitstream.Finish();
00164                 fclose( handle );
00165 
00166                 if ( uri.size() > 4 )
00167                 {
00168 
00169                         std::string::size_type startExt = uri.rfind( '.' );
00170 
00171                         if ( startExt != std::string::npos )
00172                         {
00173 
00174                                 std::string ext;
00175                                 ext.assign( uri, startExt+1, uri.size()-startExt+1 );
00176                                 
00177                                 
00178                                 if ( ext != "mp3" && ext != "mpg" )
00179                                         return false;
00180                         }
00181                 }
00182 
00183 
00184                 return foundSomeMpegFrame;
00185         }
00186 
00187         bool MpegCodec::IsWritable( std::string uri, const AudioFileHeader& header ) const
00188         {
00189                 // CLAM does not encode Mpeg
00190                 return false;
00191         }
00192 
00193         Stream* MpegCodec::GetStreamFor( const AudioFile& file )
00194         {
00195                 return new MpegAudioStream(file);
00196         }
00197 
00198         void MpegCodec::RetrieveHeaderData( std::string uri, AudioFileHeader& hdr )
00199         {
00200 
00201                 FILE* handle = fopen( uri.c_str(), "rb" );
00202 
00203                 if ( !handle ) // File doesn't exists / not readable
00204                         return;
00205 
00206                 struct stat fileStats;
00207 
00208                 if ( stat( uri.c_str(), &fileStats ) != 0 )
00209                 {
00210                         // Error reading stats from file
00211                         fclose(handle);
00212                         return;
00213                 }
00214 
00215                 unsigned long fileLength = fileStats.st_size;
00216                 
00217                 if ( fseek( handle, -128, SEEK_END ) < 0 )
00218                 {
00219                         /* File empty */
00220                         fclose(handle);
00221                         return;
00222                 }
00223 
00224                 char buffer[3];
00225 
00226                 if ( fread( buffer, 1, 3, handle ) != 3 )
00227                 {
00228                         fclose(handle);
00229                         return;
00230                 }
00231 
00232                 if ( !strncmp( buffer, "TAG", 3 ) )
00233                 {
00234                         fileLength -=128;
00235                 }
00236 
00237                 fclose( handle );
00238                 handle = fopen( uri.c_str(), "rb" );
00239 
00240                 hdr.AddSampleRate();
00241                 hdr.AddChannels();
00242                 hdr.AddSamples();
00243                 hdr.AddFormat();
00244                 hdr.AddEncoding();
00245                 hdr.AddEndianess();
00246                 hdr.AddLength();
00247                 hdr.UpdateData();
00248                 
00249                 MpegBitstream bitstream( handle );
00250 
00251                 bitstream.Init();
00252 
00253                 int frameCount = 0;
00254                 struct xing xingHeader;
00255                 xingHeader.frames=0;
00256                 bool   hasXingHeader = false;
00257                 bool   isVBR = false;
00258                 unsigned int    bitrate = 0;
00259                 
00260                 /* There are three ways of calculating the length of an mp3:
00261                    1) Constant bitrate: One frame can provide the information
00262                    needed: # of frames and duration. Just see how long it
00263                    is and do the division.
00264                    2) Variable bitrate: Xing tag. It provides the number of 
00265                    frames. Each frame has the same number of samples, so
00266                    just use that.
00267                    3) All: Count up the frames and duration of each frames
00268                    by decoding each one. We do this if we've no other
00269                    choice, i.e. if it's a VBR file with no Xing tag.
00270                 */
00271                 
00272                 long numFrames = 0;
00273 
00274                 while ( !bitstream.FatalError() && bitstream.NextFrame() )
00275                 {
00276                         if ( frameCount == 0 ) // first frame, for retrieving info about encoding
00277                         {
00278                                 RetrieveMPEGFrameInfo( bitstream.CurrentFrame(),
00279                                                        hdr );
00280                                 if ( parse_xing( &xingHeader, 
00281                                                  bitstream.StreamState().anc_ptr, 
00282                                                  bitstream.StreamState().anc_bitlen ) )
00283                                 {
00284                                         isVBR = true;
00285                                         
00286                                         if ( xingHeader.flags & XING_FRAMES )
00287                                         {
00288                                                 /* We use the Xing tag only for frames. If it doesn't have that
00289                                                    information, it's useless to us and we have to treat it as a
00290                                                    normal VBR file */
00291                                                 hasXingHeader = true;
00292                         numFrames = xingHeader.frames;
00293                                                 break;                                  
00294                                         }
00295                                 }       
00296                                 bitrate = bitstream.CurrentFrame().header.bitrate;
00297                         }
00298                         
00299                         if ( frameCount <= 20 )
00300                         {
00301                                 if ( bitstream.CurrentFrame().header.bitrate != bitrate )
00302                                         isVBR = true;
00303                                 else
00304                                         bitrate = bitstream.CurrentFrame().header.bitrate;
00305                         }                      
00306                         
00307                         if ( !isVBR && frameCount > 20 )
00308                                 break;
00309                         
00310                         frameCount++;
00311                 }
00312 
00313                 mad_timer_t   madFmtTime;
00314 
00315                 if ( !isVBR )
00316                 {
00317                         double time = ( fileLength * 8.0 ) / bitstream.CurrentFrame().header.bitrate;
00318                         double timeFrac = (double)time - ((long)(time));
00319                         long   nsamples = 32 * MAD_NSBSAMPLES(&bitstream.CurrentFrame().header); // samples per frame
00320                         numFrames = ( long) ( time * bitstream.CurrentFrame().header.samplerate / nsamples );
00321                         
00322                         mad_timer_set( &madFmtTime, (long)time, (long)(timeFrac*100), 100 );
00323 
00324                         bitstream.Finish();
00325 
00326             //std::cerr << "Not VBR: " << (TTime)mad_timer_count( madFmtTime, MAD_UNITS_MILLISECONDS )/1000.  << std::endl;
00327                         hdr.SetLength( (TTime)mad_timer_count( madFmtTime, MAD_UNITS_MILLISECONDS ) );
00328                 }
00329                 else if ( hasXingHeader )
00330                 {
00331                         mad_timer_multiply( &bitstream.CurrentFrame().header.duration,
00332                                             numFrames );
00333                         madFmtTime = bitstream.CurrentFrame().header.duration;
00334 
00335                         bitstream.Finish();
00336 
00337             //std::cerr << "Has XING Header: "<< (TTime)mad_timer_count( madFmtTime, MAD_UNITS_MILLISECONDS )/1000.  << std::endl;
00338                         hdr.SetLength( (TTime)mad_timer_count( madFmtTime, MAD_UNITS_MILLISECONDS ) );
00339                 }
00340                 else
00341                 { 
00342 
00343                         TTime decodedFramesLength = bitstream.Finish();
00344             //std::cerr << "Brute force time guessing: " <<  decodedFramesLength/1000. << " s" << std::endl;
00345                         hdr.SetLength( decodedFramesLength );
00346                 }
00347                 
00348                 // @TODO@: Find a way to estimate reasonably well the actual
00349                 // number of samples.
00350                 hdr.SetSamples(  TSize((hdr.GetLength()/1000.)*hdr.GetSampleRate()) );
00351                 hdr.SetEndianess( EAudioFileEndianess::eDefault );
00352 
00353                 fclose( handle );
00354                 
00355         }
00356 
00357         void MpegCodec::RetrieveMPEGFrameInfo( const struct mad_frame& MPEGFrame,
00358                                                AudioFileHeader& header )
00359         {
00360                 switch( MPEGFrame.header.layer )
00361                 {
00362                 case MAD_LAYER_I:
00363                         header.SetFormat( EAudioFileFormat::eMpegLayer1 );
00364                         break;
00365                 case MAD_LAYER_II:
00366                         header.SetFormat( EAudioFileFormat::eMpegLayer2 );
00367                         break;
00368                 case MAD_LAYER_III:
00369                         header.SetFormat( EAudioFileFormat::eMpegLayer3 );
00370                         break;
00371                 default:
00372                         break;         
00373                 }
00374 
00375                 switch( MPEGFrame.header.emphasis )
00376                 {
00377                 case MAD_EMPHASIS_NONE:
00378                         header.SetEncoding( EAudioFileEncoding::eDefault );
00379                         break;
00380                 case MAD_EMPHASIS_50_15_US:
00381                         header.SetEncoding( EAudioFileEncoding::e5015US );
00382                         break;
00383                 case MAD_EMPHASIS_CCITT_J_17:
00384                         header.SetEncoding( EAudioFileEncoding::eCCITTJ17 );
00385                         break;
00386                 default:
00387                         break;
00388                 }
00389                 
00390                 header.SetSampleRate( TData(MPEGFrame.header.samplerate) );
00391                 header.SetChannels( MAD_NCHANNELS(&MPEGFrame.header) );
00392         }
00393 
00394 
00395         void   MpegCodec::RetrieveTextDescriptors( std::string uri, AudioTextDescriptors& txt )
00396         {
00397 
00398                 ID3_Tag fileTag;
00399 
00400                 fileTag.Link( uri.c_str() );
00401 
00402                 ID3_Frame* artistFrame = fileTag.Find( ID3FID_LEADARTIST );
00403 
00404                 if ( artistFrame != NULL )
00405                 {
00406                         txt.AddArtist();
00407                         txt.UpdateData();
00408                         ID3_Field* artistStr = artistFrame->GetField( ID3FN_TEXT );
00409                         
00410                         if ( artistStr != NULL )
00411                         {
00412                                 if ( artistStr->GetRawText() != NULL )
00413                                         txt.SetArtist( artistStr->GetRawText() );
00414                         }
00415                 }
00416 
00417                 ID3_Frame* titleFrame = fileTag.Find( ID3FID_TITLE );
00418 
00419                 if ( titleFrame != NULL )
00420                 {
00421                         txt.AddTitle();
00422                         txt.UpdateData();
00423                         ID3_Field* titleStr = titleFrame->GetField( ID3FN_TEXT );
00424 
00425                         if ( titleStr!=NULL )
00426                                 if ( titleStr->GetRawText() != NULL )
00427                                         txt.SetTitle( titleStr->GetRawText() );
00428                 }
00429 
00430                 ID3_Frame* albumFrame = fileTag.Find( ID3FID_ALBUM );
00431 
00432                 if ( albumFrame != NULL )
00433                 {
00434                         txt.AddAlbum();
00435                         txt.UpdateData();
00436                         ID3_Field* albumStr = albumFrame->GetField( ID3FN_TEXT );
00437 
00438                         if ( albumStr != NULL )
00439                                 if ( albumStr->GetRawText() != NULL )
00440                                         txt.SetAlbum( albumStr->GetRawText() );
00441                 }
00442 
00443                 ID3_Frame* tracknumFrame = fileTag.Find( ID3FID_TRACKNUM );
00444 
00445                 if ( tracknumFrame != NULL )
00446                 {
00447                         txt.AddTrackNumber();
00448                         txt.UpdateData();
00449 
00450                         ID3_Field* tracknumStr = tracknumFrame->GetField( ID3FN_TEXT );
00451                         
00452                         if ( tracknumStr != NULL )
00453                                 if ( tracknumStr->GetRawText() != NULL )
00454                                         txt.SetTrackNumber( tracknumStr->GetRawText() );
00455                 }
00456 
00457                 ID3_Frame* composerFrame = fileTag.Find( ID3FID_COMPOSER );
00458 
00459                 if ( composerFrame != NULL )
00460                 {
00461                         txt.AddComposer();
00462                         txt.UpdateData();
00463 
00464                         ID3_Field* composerStr = composerFrame->GetField( ID3FN_TEXT );
00465 
00466                         if ( composerStr != NULL )
00467                                 if ( composerStr->GetRawText() != NULL )
00468                                         txt.SetComposer( composerStr->GetRawText() );
00469                 }
00470 
00471                 ID3_Frame* performerFrame = fileTag.Find( ID3FID_CONDUCTOR );
00472 
00473                 if ( performerFrame != NULL )
00474                 {
00475                         txt.AddPerformer();
00476                         txt.UpdateData();
00477 
00478                         ID3_Field* performerStr = performerFrame->GetField( ID3FN_TEXT );
00479 
00480                         if ( performerStr != NULL )
00481                                 if ( performerStr->GetRawText() != NULL )
00482                                         txt.SetPerformer( performerStr->GetRawText() );
00483                 }
00484 
00485         }
00486 
00487 }
00488 
00489 }
00490 

Generated on Tue Aug 12 22:33:43 2008 for CLAM by  doxygen 1.5.5