From 9278a7ec07a4f3579cafd952ff61ecf696fe37ef Mon Sep 17 00:00:00 2001 From: David Howden Date: Thu, 19 Mar 2015 23:21:53 +1100 Subject: [PATCH] Initial commit --- LICENSE | 23 +++++ hash.go | 143 ++++++++++++++++++++++++++ hash/hash.go | 37 +++++++ id3v1.go | 139 +++++++++++++++++++++++++ id3v2.go | 234 ++++++++++++++++++++++++++++++++++++++++++ id3v2frames.go | 199 +++++++++++++++++++++++++++++++++++ id3v2metadata.go | 121 ++++++++++++++++++++++ id3v2metadata_test.go | 29 ++++++ mp4.go | 223 ++++++++++++++++++++++++++++++++++++++++ tag.go | 93 +++++++++++++++++ tag/tag.go | 78 ++++++++++++++ util.go | 71 +++++++++++++ 12 files changed, 1390 insertions(+) create mode 100644 LICENSE create mode 100644 hash.go create mode 100644 hash/hash.go create mode 100644 id3v1.go create mode 100644 id3v2.go create mode 100644 id3v2frames.go create mode 100644 id3v2metadata.go create mode 100644 id3v2metadata_test.go create mode 100644 mp4.go create mode 100644 tag.go create mode 100644 tag/tag.go create mode 100644 util.go diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..dfd760c --- /dev/null +++ b/LICENSE @@ -0,0 +1,23 @@ +Copyright 2015, David Howden +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/hash.go b/hash.go new file mode 100644 index 0000000..37ae981 --- /dev/null +++ b/hash.go @@ -0,0 +1,143 @@ +package tag + +import ( + "crypto/sha1" + "encoding/binary" + "fmt" + "io" + "io/ioutil" + "os" +) + +// Hash creates a hash of the audio file data provided by the io.ReadSeeker +// which ignores metadata (ID3, MP4) associated with the file. +func Hash(r io.ReadSeeker) (string, error) { + b, err := readBytes(r, 11) + if err != nil { + return "", err + } + + _, err = r.Seek(0, os.SEEK_SET) + if err != nil { + return "", err + } + + if string(b[4:11]) == "ftypM4A" { + return HashAtoms(r) + } + + if string(b[0:3]) == "ID3" { + return HashID3v2(r) + } + + h, err := HashID3v1(r) + if err != nil { + if err == ErrNotID3v1 { + return HashAll(r) + } + return "", err + } + return h, nil +} + +// HashAll returns a hash of the entire content. +func HashAll(r io.ReadSeeker) (string, error) { + b, err := ioutil.ReadAll(r) + if err != nil { + return "", nil + } + return hash(b), nil +} + +func HashAtoms(r io.ReadSeeker) (string, error) { + for { + var size uint32 + err := binary.Read(r, binary.BigEndian, &size) + if err != nil { + if err == io.EOF { + return "", fmt.Errorf("reached EOF before audio data") + } + return "", err + } + + name, err := readString(r, 4) + if err != nil { + return "", err + } + + switch name { + case "meta": + // next_item_id (int32) + _, err := readBytes(r, 4) + if err != nil { + return "", err + } + fallthrough + + case "moov", "udta", "ilst": + return HashAtoms(r) + + case "free": + _, err = r.Seek(int64(size-8), os.SEEK_CUR) + if err != nil { + return "", fmt.Errorf("error reading 'free' space: %v", err) + } + continue + + case "mdat": // stop when we get to the data + b, err := readBytes(r, int(size-8)) + if err != nil { + return "", fmt.Errorf("error reading audio data: %v", err) + } + return hash(b), nil + } + + _, err = r.Seek(int64(size-8), os.SEEK_CUR) + if err != nil { + return "", fmt.Errorf("error reading '%v' tag: %v", name, err) + } + } +} + +func HashID3v1(r io.ReadSeeker) (string, error) { + _, err := r.Seek(0, os.SEEK_SET) + if err != nil { + return "", err + } + + b, err := ioutil.ReadAll(r) + if err != nil { + return "", err + } + + if len(b) < 128 { + return "", fmt.Errorf("file size must be greater than 128 bytes for ID3v1 metadata (size: %v)", len(b)) + } + return hash(b[:len(b)-128]), nil +} + +func HashID3v2(r io.ReadSeeker) (string, error) { + h, err := readID3v2Header(r) + if err != nil { + return "", fmt.Errorf("error reading ID3v2 header: %v", err) + } + + _, err = r.Seek(int64(h.Size), os.SEEK_SET) + if err != nil { + return "", fmt.Errorf("error seeking to end of ID3V2 header: %v", err) + } + + b, err := ioutil.ReadAll(r) + if err != nil { + return "", fmt.Errorf("error reading audio data: %v", err) + } + + if len(b) < 128 { + return "", fmt.Errorf("file size must be greater than 128 bytes for MP3 (ID3v2 header size: %d, remaining: %d)", h.Size, len(b)) + } + return hash(b[:len(b)-128]), nil +} + +func hash(b []byte) string { + return fmt.Sprintf("%x", sha1.Sum(b)) +} diff --git a/hash/hash.go b/hash/hash.go new file mode 100644 index 0000000..502a81c --- /dev/null +++ b/hash/hash.go @@ -0,0 +1,37 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +The hash tool constructs a hash of a media file exluding any metadata +(as recognised by the tag library). +*/ +package main + +import ( + "fmt" + "os" + + "github.com/dhowden/tag" +) + +func main() { + if len(os.Args) != 2 { + fmt.Printf("usage: %v filename\n", os.Args[0]) + return + } + + f, err := os.Open(os.Args[1]) + if err != nil { + fmt.Printf("error loading file: %v", err) + os.Exit(1) + } + defer f.Close() + + h, err := tag.Hash(f) + if err != nil { + fmt.Printf("error constructing hash: %v\n", err) + os.Exit(1) + } + fmt.Println(h) +} diff --git a/id3v1.go b/id3v1.go new file mode 100644 index 0000000..c37dfac --- /dev/null +++ b/id3v1.go @@ -0,0 +1,139 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tag + +import ( + "bytes" + "errors" + "io" + "io/ioutil" + "strconv" + "strings" +) + +// ID3v1Genres is a list of genres as given in the ID3v1 specification. +var ID3v1Genres = [...]string{ + "Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk", "Grunge", + "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies", "Other", "Pop", "R&B", + "Rap", "Reggae", "Rock", "Techno", "Industrial", "Alternative", "Ska", + "Death Metal", "Pranks", "Soundtrack", "Euro-Techno", "Ambient", + "Trip-Hop", "Vocal", "Jazz+Funk", "Fusion", "Trance", "Classical", + "Instrumental", "Acid", "House", "Game", "Sound Clip", "Gospel", + "Noise", "AlternRock", "Bass", "Soul", "Punk", "Space", "Meditative", + "Instrumental Pop", "Instrumental Rock", "Ethnic", "Gothic", + "Darkwave", "Techno-Industrial", "Electronic", "Pop-Folk", + "Eurodance", "Dream", "Southern Rock", "Comedy", "Cult", "Gangsta", + "Top 40", "Christian Rap", "Pop/Funk", "Jungle", "Native American", + "Cabaret", "New Wave", "Psychadelic", "Rave", "Showtunes", "Trailer", + "Lo-Fi", "Tribal", "Acid Punk", "Acid Jazz", "Polka", "Retro", + "Musical", "Rock & Roll", "Hard Rock", "Folk", "Folk-Rock", + "National Folk", "Swing", "Fast Fusion", "Bebob", "Latin", "Revival", + "Celtic", "Bluegrass", "Avantgarde", "Gothic Rock", "Progressive Rock", + "Psychedelic Rock", "Symphonic Rock", "Slow Rock", "Big Band", + "Chorus", "Easy Listening", "Acoustic", "Humour", "Speech", "Chanson", + "Opera", "Chamber Music", "Sonata", "Symphony", "Booty Bass", "Primus", + "Porn Groove", "Satire", "Slow Jam", "Club", "Tango", "Samba", + "Folklore", "Ballad", "Power Ballad", "Rhythmic Soul", "Freestyle", + "Duet", "Punk Rock", "Drum Solo", "Acapella", "Euro-House", "Dance Hall", +} + +// ErrNotID3v1 is an error which is returned when no ID3v1 header is found. +var ErrNotID3v1 = errors.New("invalid ID3v1 header") + +// ReadID3v1Tags reads ID3v1 tags from the given io.Reader. Returns a non-nil error +// if there was a problem. +func ReadID3v1Tags(r io.Reader) (Metadata, error) { + b, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + + b = b[len(b)-128 : len(b)] + r = bytes.NewReader(b) + if tag, err := readString(r, 3); err != nil { + return nil, err + } else if tag != "TAG" { + return nil, ErrNotID3v1 + } + + title, err := readString(r, 30) + if err != nil { + return nil, err + } + + artist, err := readString(r, 30) + if err != nil { + return nil, err + } + + album, err := readString(r, 30) + if err != nil { + return nil, err + } + + year, err := readString(r, 4) + if err != nil { + return nil, err + } + + commentBytes, err := readBytes(r, 29) + if err != nil { + return nil, err + } + + var comment string + var track int + if commentBytes[27] == 0 { + comment = strings.TrimSpace(string(commentBytes[:28])) + track = int(commentBytes[28]) + } + + var genre string + genreID, err := readBytes(r, 1) + if err != nil { + return nil, err + } + if int(genreID[0]) < len(ID3v1Genres) { + genre = ID3v1Genres[int(genreID[0])] + } + + m := make(map[string]interface{}) + m["title"] = strings.TrimSpace(title) + m["artist"] = strings.TrimSpace(artist) + m["album"] = strings.TrimSpace(album) + m["year"] = strings.TrimSpace(year) + m["comment"] = strings.TrimSpace(comment) + m["track"] = track + m["genre"] = genre + + return metadataID3v1(m), nil +} + +// metadataID3v1 is the implementation of Metadata used for ID3v1 tags. +type metadataID3v1 map[string]interface{} + +func (metadataID3v1) Format() Format { return ID3v1 } +func (m metadataID3v1) Raw() map[string]interface{} { return m } + +func (m metadataID3v1) Title() string { return m["title"].(string) } +func (m metadataID3v1) Album() string { return m["album"].(string) } +func (m metadataID3v1) Artist() string { return m["artist"].(string) } +func (m metadataID3v1) Genre() string { return m["genre"].(string) } + +func (m metadataID3v1) Year() int { + y := m["year"].(string) + n, err := strconv.Atoi(y) + if err != nil { + return 0 + } + return n +} + +func (m metadataID3v1) Track() (int, int) { return m["track"].(int), 0 } + +func (m metadataID3v1) AlbumArtist() string { return "" } +func (m metadataID3v1) Composer() string { return "" } +func (metadataID3v1) Disc() (int, int) { return 0, 0 } +func (m metadataID3v1) Picture() *Picture { return nil } diff --git a/id3v2.go b/id3v2.go new file mode 100644 index 0000000..b1c30bf --- /dev/null +++ b/id3v2.go @@ -0,0 +1,234 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tag + +import ( + "fmt" + "io" + "strings" +) + +// ID3v2Header is a type which represents an ID3v2 tag header. +type ID3v2Header struct { + Version Format + Unsynchronisation bool + ExtendedHeader bool + Experimental bool + Size int +} + +// readID3v2Header reads the ID3v2 header from the given io.Reader. +func readID3v2Header(r io.Reader) (*ID3v2Header, error) { + b, err := readBytes(r, 10) + if err != nil { + return nil, fmt.Errorf("expected to read 10 bytes (ID3v2Header): %v", err) + } + + if string(b[0:3]) != "ID3" { + return nil, fmt.Errorf("expected to read \"ID3\"") + } + + b = b[3:] + var vers Format + switch uint(b[0]) { + case 2: + vers = ID3v2_2 + case 3: + vers = ID3v2_3 + case 4: + vers = ID3v2_4 + case 0, 1: + fallthrough + default: + return nil, fmt.Errorf("ID3 version: %v, expected: 2, 3 or 4", uint(b[0])) + } + + // NB: We ignore b[1] (the revision) as we don't currently rely on it. + return &ID3v2Header{ + Version: vers, + Unsynchronisation: getBit(b[2], 7), + ExtendedHeader: getBit(b[2], 6), + Experimental: getBit(b[2], 5), + Size: get7BitChunkedInt(b[3:7]), + }, nil +} + +// ID3v2FrameFlags is a type which represents the flags which can be set on an ID3v2 frame. +type ID3v2FrameFlags struct { + // Message + TagAlterPreservation bool + FileAlterPreservation bool + ReadOnly bool + + // Format + GroupIdentity bool + Compression bool + Encryption bool + Unsynchronisation bool + DataLengthIndicator bool +} + +func readID3v2FrameFlags(r io.Reader) (*ID3v2FrameFlags, error) { + b, err := readBytes(r, 2) + if err != nil { + return nil, err + } + + msg := b[0] + fmt := b[1] + + return &ID3v2FrameFlags{ + TagAlterPreservation: getBit(msg, 6), + FileAlterPreservation: getBit(msg, 5), + ReadOnly: getBit(msg, 4), + GroupIdentity: getBit(fmt, 7), + Compression: getBit(fmt, 3), + Encryption: getBit(fmt, 2), + Unsynchronisation: getBit(fmt, 1), + DataLengthIndicator: getBit(fmt, 0), + }, nil +} + +func readID3v2_2FrameHeader(r io.Reader) (name string, size int, headerSize int, err error) { + name, err = readString(r, 3) + if err != nil { + return + } + size, err = readInt(r, 3) + if err != nil { + return + } + headerSize = 6 + return +} + +func readID3v2_3FrameHeader(r io.Reader) (name string, size int, headerSize int, err error) { + name, err = readString(r, 4) + if err != nil { + return + } + size, err = readInt(r, 4) + if err != nil { + return + } + headerSize = 8 + return +} + +func readID3v2_4FrameHeader(r io.Reader) (name string, size int, headerSize int, err error) { + name, err = readString(r, 4) + if err != nil { + return + } + size, err = read7BitChunkedInt(r, 4) + if err != nil { + return + } + headerSize = 8 + return +} + +// readID3v2Frames reads ID3v2 frames from the given reader using the ID3v2Header. +func readID3v2Frames(r io.Reader, h *ID3v2Header) (map[string]interface{}, error) { + offset := 10 // the size of the header + result := make(map[string]interface{}) + + for offset < h.Size { + var err error + var name string + var size, headerSize int + var flags *ID3v2FrameFlags + + switch h.Version { + case ID3v2_2: + name, size, headerSize, err = readID3v2_2FrameHeader(r) + + case ID3v2_3: + name, size, headerSize, err = readID3v2_3FrameHeader(r) + if err != nil { + return nil, err + } + flags, err = readID3v2FrameFlags(r) + headerSize += 2 + + case ID3v2_4: + name, size, headerSize, err = readID3v2_4FrameHeader(r) + if err != nil { + return nil, err + } + flags, err = readID3v2FrameFlags(r) + headerSize += 2 + } + + if err != nil { + return nil, err + } + offset += headerSize + size + + // Check this stuff out... + if flags != nil && flags.DataLengthIndicator { + _, err = read7BitChunkedInt(r, 4) // read 4 + if err != nil { + return nil, err + } + size -= 4 + } + + if flags != nil && flags.Unsynchronisation { + // FIXME: Implement this. + continue + } + + name = strings.TrimSpace(name) + if name == "" { + break + } + + b, err := readBytes(r, size) + if err != nil { + return nil, err + } + + switch { + case name[0] == 'T': + txt, err := readTFrame(b) + if err != nil { + return nil, err + } + result[name] = txt + + case name == "APIC": + p, err := readAPICFrame(b) + if err != nil { + return nil, err + } + result[name] = p + + case name == "PIC": + p, err := readPICFrame(b) + if err != nil { + return nil, err + } + result[name] = p + } + + continue + } + return result, nil +} + +// ReadID3v2Tags parses ID3v2.{2,3,4} tags from the given io.Reader into a Metadata, returning +// non-nil error on failure. +func ReadID3v2Tags(r io.Reader) (Metadata, error) { + h, err := readID3v2Header(r) + if err != nil { + return nil, err + } + f, err := readID3v2Frames(r, h) + if err != nil { + return nil, err + } + return metadataID3v2{header: h, frames: f}, nil +} diff --git a/id3v2frames.go b/id3v2frames.go new file mode 100644 index 0000000..389017c --- /dev/null +++ b/id3v2frames.go @@ -0,0 +1,199 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tag + +import ( + "bytes" + "encoding/binary" + "fmt" + "strings" + "unicode/utf16" +) + +func readTFrame(b []byte) (string, error) { + txt, err := parseText(b) + if err != nil { + return "", err + } + return strings.Join(strings.Split(txt, string([]byte{0})), ""), nil +} + +func parseText(b []byte) (string, error) { + if len(b) == 0 { + return "", nil + } + return decodeText(b[0], b[1:]) +} + +func decodeText(enc byte, b []byte) (string, error) { + if len(b) == 0 { + return "", nil + } + + switch enc { + case 0: // ISO-8859-1 + return decodeISO8859(b), nil + + case 1: // UTF-16 with byte order marker + return decodeUTF16WithBOM(b) + + case 2: // UTF-16 without byte order (assuming BigEndian) + return decodeUTF16(b, binary.BigEndian), nil + + case 3: // UTF-8 + return string(b), nil + + default: + return "", fmt.Errorf("invalid encoding byte %x", enc) + } +} + +func decodeISO8859(b []byte) string { + r := make([]rune, len(b)) + for i, x := range b { + r[i] = rune(x) + } + return string(r) +} + +func decodeUTF16WithBOM(b []byte) (string, error) { + var bo binary.ByteOrder + switch { + case b[0] == 0xFE && b[1] == 0xFF: + bo = binary.BigEndian + + case b[0] == 0xFF && b[1] == 0xFE: + bo = binary.LittleEndian + + default: + return "", fmt.Errorf("invalid byte order marker %x %x", b[0], b[1]) + } + return decodeUTF16(b[2:], bo), nil +} + +func decodeUTF16(b []byte, bo binary.ByteOrder) string { + s := make([]uint16, 0, len(b)/2) + for i := 0; i < len(b); i += 2 { + s = append(s, bo.Uint16(b[i:i+2])) + } + return string(utf16.Decode(s)) +} + +var pictureTypes = map[byte]string{ + 0x00: "Other", + 0x01: "32x32 pixels 'file icon' (PNG only)", + 0x02: "Other file icon", + 0x03: "Cover (front)", + 0x04: "Cover (back)", + 0x05: "Leaflet page", + 0x06: "Media (e.g. lable side of CD)", + 0x07: "Lead artist/lead performer/soloist", + 0x08: "Artist/performer", + 0x09: "Conductor", + 0x0A: "Band/Orchestra", + 0x0B: "Composer", + 0x0C: "Lyricist/text writer", + 0x0D: "Recording Location", + 0x0E: "During recording", + 0x0F: "During performance", + 0x10: "Movie/video screen capture", + 0x11: "A bright coloured fish", + 0x12: "Illustration", + 0x13: "Band/artist logotype", + 0x14: "Publisher/Studio logotype", +} + +// Picture is a type which represents an attached picture extracted from metadata. +type Picture struct { + Ext string // Extension of the picture file. + MIMEType string // MIMEType of the picture. + Type string // Type of the picture (see pictureTypes). + Description string // Description. + Data []byte // Raw picture data. +} + +// String returns a string representation of the underlying Picture instance. +func (p Picture) String() string { + return fmt.Sprintf("Picture{Ext: %v, MIMEType: %v, Type: %v, Description: %v, Data.Size: %v}", + p.Ext, p.MIMEType, p.Type, p.Description, len(p.Data)) +} + +// IDv2.2 +// -- Header +// Attached picture "PIC" +// Frame size $xx xx xx +// -- readPICFrame +// Text encoding $xx +// Image format $xx xx xx +// Picture type $xx +// Description $00 (00) +// Picture data +func readPICFrame(b []byte) (*Picture, error) { + enc := b[0] + ext := string(b[1:4]) + picType := b[4] + + descDataSplit := bytes.SplitN(b[5:], []byte{0}, 2) + desc, err := decodeText(enc, descDataSplit[0]) + if err != nil { + return nil, fmt.Errorf("error decoding PIC description text: %v", err) + } + + var mimeType string + switch ext { + case "jpeg", "jpg": + mimeType = "image/jpeg" + case "png": + mimeType = "image/png" + } + + return &Picture{ + Ext: ext, + MIMEType: mimeType, + Type: pictureTypes[picType], + Description: desc, + Data: descDataSplit[1], + }, nil +} + +// IDv2.{3,4} +// -- Header +//
+// -- readAPICFrame +// Text encoding $xx +// MIME type $00 +// Picture type $xx +// Description $00 (00) +// Picture data +func readAPICFrame(b []byte) (*Picture, error) { + enc := b[0] + mimeDataSplit := bytes.SplitN(b[1:], []byte{0}, 2) + mimeType := string(mimeDataSplit[0]) + + b = mimeDataSplit[1] + picType := b[0] + + descDataSplit := bytes.SplitN(b[1:], []byte{0}, 2) + desc, err := decodeText(enc, descDataSplit[0]) + if err != nil { + return nil, fmt.Errorf("error decoding APIC description text: %v", err) + } + + var ext string + switch mimeType { + case "image/jpeg": + ext = "jpg" + case "image/png": + ext = "png" + } + + return &Picture{ + Ext: ext, + MIMEType: mimeType, + Type: pictureTypes[picType], + Description: desc, + Data: descDataSplit[1], + }, nil +} diff --git a/id3v2metadata.go b/id3v2metadata.go new file mode 100644 index 0000000..7b8d29e --- /dev/null +++ b/id3v2metadata.go @@ -0,0 +1,121 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tag + +import ( + "strconv" + "strings" +) + +type frameNames map[string][2]string + +func (f frameNames) Name(s string, fm Format) string { + l, ok := f[s] + if !ok { + return "" + } + + switch fm { + case ID3v2_2: + return l[0] + case ID3v2_3, ID3v2_4: + return l[1] + } + return "" +} + +var frames = frameNames(map[string][2]string{ + "title": [2]string{"TT2", "TIT2"}, + "artist": [2]string{"TP1", "TPE1"}, + "album": [2]string{"TAL", "TALB"}, + "album_artist": [2]string{"TP2", "TPE2"}, + "composer": [2]string{"TCM", "TCOM"}, + "year": [2]string{"TYE", "TYER"}, + "track": [2]string{"TRK", "TRCK"}, + "disc": [2]string{"TPA", "TPOS"}, + "genre": [2]string{"TCO", "TCON"}, + "picture": [2]string{"PIC", "APIC"}, +}) + +// metadataID3v2 is the implementation of Metadata used for ID3v2 tags. +type metadataID3v2 struct { + header *ID3v2Header + frames map[string]interface{} +} + +func (m metadataID3v2) getString(k string) string { + v, ok := m.frames[k] + if !ok { + return "" + } + return v.(string) +} + +func (m metadataID3v2) getInt(k string) int { + v, ok := m.frames[k] + if !ok { + return 0 + } + return v.(int) +} + +func (m metadataID3v2) Format() Format { return m.header.Version } +func (m metadataID3v2) Raw() map[string]interface{} { return m.frames } + +func (m metadataID3v2) Title() string { + return m.getString(frames.Name("title", m.Format())) +} + +func (m metadataID3v2) Artist() string { + return m.getString(frames.Name("artist", m.Format())) +} + +func (m metadataID3v2) Album() string { + return m.getString(frames.Name("album", m.Format())) +} + +func (m metadataID3v2) AlbumArtist() string { + return m.getString(frames.Name("album_artist", m.Format())) +} + +func (m metadataID3v2) Composer() string { + return m.getString(frames.Name("composer", m.Format())) +} + +func (m metadataID3v2) Genre() string { + return m.getString(frames.Name("genre", m.Format())) +} + +func (m metadataID3v2) Year() int { + year, _ := strconv.Atoi(m.getString(frames.Name("year", m.Format()))) + return year +} + +func parseXofN(s string) (x, n int) { + xn := strings.Split(s, "/") + if len(xn) != 2 { + x, _ = strconv.Atoi(s) + return x, 0 + } + x, _ = strconv.Atoi(strings.TrimSpace(xn[0])) + n, _ = strconv.Atoi(strings.TrimSpace(xn[1])) + return x, n +} + +func (m metadataID3v2) Track() (int, int) { + return parseXofN(m.getString(frames.Name("track", m.Format()))) +} + +func (m metadataID3v2) Disc() (int, int) { + return parseXofN(m.getString(frames.Name("disc", m.Format()))) +} + +func (m metadataID3v2) Picture() *Picture { + v, ok := m.frames[frames.Name("picture", m.Format())] + if !ok { + return nil + } + return v.(*Picture) +} diff --git a/id3v2metadata_test.go b/id3v2metadata_test.go new file mode 100644 index 0000000..14bac87 --- /dev/null +++ b/id3v2metadata_test.go @@ -0,0 +1,29 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tag + +import "testing" + +func TestParseXofN(t *testing.T) { + table := []struct { + str string + x, n int + }{ + {"", 0, 0}, + {"1", 1, 0}, + {"0/2", 0, 2}, + {"1/2", 1, 2}, + {"1 / 2", 1, 2}, + {"1/", 1, 0}, + {"/2", 0, 2}, + } + + for ii, tt := range table { + gotX, gotN := parseXofN(tt.str) + if gotX != tt.x || gotN != tt.n { + t.Errorf("[%d] parseXofN(%v) = %d, %d, expected: %d, %d", ii, tt.str, gotX, gotN, tt.x, tt.n) + } + } +} diff --git a/mp4.go b/mp4.go new file mode 100644 index 0000000..150482d --- /dev/null +++ b/mp4.go @@ -0,0 +1,223 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tag + +import ( + "encoding/binary" + "fmt" + "io" + "strconv" +) + +var atomTypes = map[int]string{ + 0: "uint8", + 1: "text", + 13: "jpeg", + 14: "png", + 21: "uint8", +} + +var atoms = atomNames(map[string]string{ + "\xa9alb": "album", + "\xa9art": "artist", + "\xa9ART": "artist", + "aART": "album_artist", + "\xa9day": "year", + "\xa9nam": "title", + "\xa9gen": "genre", + "trkn": "track", + "\xa9wrt": "composer", + "\xa9too": "encoder", + "cprt": "copyright", + "covr": "picture", + "\xa9grp": "grouping", + "keyw": "keyword", + "\xa9lyr": "lyrics", + "\xa9cmt": "comment", + "tmpo": "tempo", + "cpil": "compilation", + "disk": "disc", +}) + +type atomNames map[string]string + +func (f atomNames) Name(n string) []string { + res := make([]string, 1) + for k, v := range f { + if v == n { + res = append(res, k) + } + } + return res +} + +// metadataMP4 is the implementation of Metadata for MP4 tag (atom) data. +type metadataMP4 map[string]interface{} + +// ReadAtoms reads MP4 metadata atoms from the reader into a Metadata, returning non-nil +// error if there was a problem. +func ReadAtoms(r io.Reader) (Metadata, error) { + m := make(metadataMP4) + err := m.readAtoms(r) + return m, err +} + +func (m metadataMP4) readAtoms(r io.Reader) error { + for { + var size uint32 + err := binary.Read(r, binary.BigEndian, &size) + if err != nil { + if err == io.EOF { + return nil + } + return err + } + + name, err := readString(r, 4) + if err != nil { + return err + } + + switch name { + case "meta": + // next_item_id (int32) + _, err := readBytes(r, 4) + if err != nil { + return err + } + fallthrough + case "moov", "udta", "ilst": + return m.readAtoms(r) + case "free": + discardN(r, int64(size-8)) + continue + case "mdat": // stop when we get to the data + return nil + } + + b, err := readBytes(r, int(size-8)) + if err != nil { + return err + } + + _, ok := atoms[name] + if !ok { + continue + } + + // 16: name + size + "data" + size (4 bytes each), have already read 8 + b = b[8:] + class := getInt(b[1:4]) + contentType, ok := atomTypes[class] + if !ok { + return fmt.Errorf("invalid content type: %v", class) + } + + b = b[8:] + switch name { + case "trkn", "disk": + m[name] = int(b[3]) + m[name+"_count"] = int(b[5]) + default: + var data interface{} + // 4: atom version (1 byte) + atom flags (3 bytes) + // 4: NULL (usually locale indicator) + switch contentType { + case "text": + data = string(b) + + case "uint8": + data = getInt(b[:1]) + + case "jpeg", "png": + data = &Picture{ + Ext: contentType, + MIMEType: "image/" + contentType, + Data: b, + } + } + m[name] = data + } + } +} + +func (metadataMP4) Format() Format { return MP4 } + +func (m metadataMP4) Raw() map[string]interface{} { return m } + +func (m metadataMP4) getString(n []string) string { + for _, k := range n { + if x, ok := m[k]; ok { + return x.(string) + } + } + return "" +} + +func (m metadataMP4) getInt(n []string) int { + for _, k := range n { + if x, ok := m[k]; ok { + return x.(int) + } + } + return 0 +} + +func (m metadataMP4) Title() string { + return m.getString(atoms.Name("title")) +} + +func (m metadataMP4) Artist() string { + return m.getString(atoms.Name("artist")) +} + +func (m metadataMP4) Album() string { + return m.getString(atoms.Name("album")) +} + +func (m metadataMP4) AlbumArtist() string { + return m.getString(atoms.Name("album_artist")) +} + +func (m metadataMP4) Composer() string { + return m.getString(atoms.Name("composer")) +} + +func (m metadataMP4) Genre() string { + return m.getString(atoms.Name("genre")) +} + +func (m metadataMP4) Year() int { + date := m.getString(atoms.Name("year")) + if len(date) >= 4 { + year, _ := strconv.Atoi(date[:4]) + return year + } + return 0 +} + +func (m metadataMP4) Track() (int, int) { + x := m.getInt([]string{"trkn"}) + if n, ok := m["trkn_count"]; ok { + return x, n.(int) + } + return x, 0 +} + +func (m metadataMP4) Disc() (int, int) { + x := m.getInt([]string{"disk"}) + if n, ok := m["disk_count"]; ok { + return x, n.(int) + } + return x, 0 +} + +func (m metadataMP4) Picture() *Picture { + v, ok := m["covr"] + if !ok { + return nil + } + return v.(*Picture) +} diff --git a/tag.go b/tag.go new file mode 100644 index 0000000..a38aa09 --- /dev/null +++ b/tag.go @@ -0,0 +1,93 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package tag provides basic MP3 (ID3v1,2.{2,3,4}) and MP4 metadata parsing. +package tag + +import ( + "bytes" + "errors" + "io" +) + +// ErrNoTagsFound is the error returned by ReadFrom when the metadata format +// cannot be identified. +var ErrNoTagsFound = errors.New("no tags found") + +// ReadFrom parses audio file metadata tags (currently supports ID3v1,2.{2,3,4} and MP4). +// This method attempts to determine the format of the data provided by the reader, and then +// chooses ReadAtoms (MP4), ReadID3v2Tags (ID3v2.{2,3,4}) or ReadID3v1Tags as appropriate. +// Returns non-nil error if the format of the given data could not be determined, or if +// there was a problem parsing the data. +func ReadFrom(r io.Reader) (Metadata, error) { + b, err := readBytes(r, 11) + if err != nil { + return nil, err + } + + rr := io.MultiReader(bytes.NewReader(b), r) + if string(b[4:11]) == "ftypM4A" { + return ReadAtoms(rr) + } + if string(b[0:3]) == "ID3" { + return ReadID3v2Tags(rr) + } + + m, err := ReadID3v1Tags(rr) + if err != nil { + if err == ErrNotID3v1 { + err = ErrNoTagsFound + } + return nil, err + } + return m, nil +} + +// Format is an enumeration of metadata types supported by this package. +type Format string + +const ( + ID3v1 Format = "ID3v1" // ID3v1 tag format. + ID3v2_2 = "ID3v2.2" // ID3v2.2 tag format. + ID3v2_3 = "ID3v2.3" // ID3v2.3 tag format (most common). + ID3v2_4 = "ID3v2.4" // ID3v2.4 tag format. + MP4 = "MP4" // MP4 tag (atom) format. +) + +// Metadata is an interface which is used to describe metadata retrieved by this package. +type Metadata interface { + // Format returns the metadata Format used to encode the data. + Format() Format + + // Title returns the title of the track. + Title() string + + // Album returns the album name of the track. + Album() string + + // Artist returns the artist name of the track. + Artist() string + + // AlbumArtist returns the album artist name of the track. + AlbumArtist() string + + // Composer returns the composer of the track. + Composer() string + + // Year returns the year of the track. + Year() int + + // Track returns the track number and total tracks, or zero values if unavailable. + Track() (int, int) + + // Disc returns the disc number and total discs, or zero values if unavailable. + Disc() (int, int) + + // Picture returns a picture, or nil if not avilable. + Picture() *Picture + + // Raw returns the raw mapping of retrieved tag names and associated values. + // NB: tag/atom names are not standardised between formats. + Raw() map[string]interface{} +} diff --git a/tag/tag.go b/tag/tag.go new file mode 100644 index 0000000..8cad726 --- /dev/null +++ b/tag/tag.go @@ -0,0 +1,78 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +The tag tool reads metadata from media files (as supported by the tag library). +*/ +package main + +import ( + "flag" + "fmt" + "os" + + "github.com/dhowden/tag" +) + +var raw bool + +func init() { + flag.BoolVar(&raw, "raw", false, "show raw tag data") +} + +func main() { + flag.Parse() + + if flag.NArg() != 1 { + fmt.Printf("usage: %v filename\n", os.Args[0]) + return + } + + f, err := os.Open(flag.Arg(0)) + if err != nil { + fmt.Printf("error loading file: %v", err) + return + } + defer f.Close() + + m, err := tag.ReadFrom(f) + if err != nil { + fmt.Printf("error reading file: %v\n", err) + return + } + + printMetadata(m) + + if raw { + fmt.Println() + fmt.Println() + + tags := m.Raw() + for k, v := range tags { + if _, ok := v.(*tag.Picture); ok { + fmt.Printf("%#v: %v\n", k, v) + continue + } + fmt.Printf("%#v: %#v\n", k, v) + } + } +} + +func printMetadata(m tag.Metadata) { + fmt.Printf("Metadata Format: %v\n", m.Format()) + + fmt.Printf(" Title: %v\n", m.Title()) + fmt.Printf(" Album: %v\n", m.Album()) + fmt.Printf(" Artist: %v\n", m.Artist()) + fmt.Printf(" Composer: %v\n", m.Composer()) + fmt.Printf(" Year: %v\n", m.Year()) + + track, trackCount := m.Track() + fmt.Printf(" Track: %v of %v\n", track, trackCount) + + disc, discCount := m.Disc() + fmt.Printf(" Disc: %v of %v\n", disc, discCount) + + fmt.Printf(" Picture: %v\n", m.Picture()) +} diff --git a/util.go b/util.go new file mode 100644 index 0000000..f01636e --- /dev/null +++ b/util.go @@ -0,0 +1,71 @@ +// Copyright 2015, David Howden +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tag + +import ( + "io" + "io/ioutil" +) + +func discardN(r io.Reader, n int64) error { + _, err := io.CopyN(ioutil.Discard, r, n) + return err +} + +func getBit(b byte, n uint) bool { + x := byte(1 << n) + return (b & x) == x +} + +func get7BitChunkedInt(b []byte) int { + var n int + for _, x := range b { + n = n << 7 + n |= int(x) + } + return n +} + +func getInt(b []byte) int { + var n int + for _, x := range b { + n = n << 8 + n |= int(x) + } + return n +} + +func readBytes(r io.Reader, n int) ([]byte, error) { + b := make([]byte, n) + _, err := io.ReadFull(r, b) + if err != nil { + return nil, err + } + return b, nil +} + +func readString(r io.Reader, n int) (string, error) { + b, err := readBytes(r, n) + if err != nil { + return "", err + } + return string(b), nil +} + +func readInt(r io.Reader, n int) (int, error) { + b, err := readBytes(r, n) + if err != nil { + return 0, err + } + return getInt(b), nil +} + +func read7BitChunkedInt(r io.Reader, n int) (int, error) { + b, err := readBytes(r, n) + if err != nil { + return 0, err + } + return get7BitChunkedInt(b), nil +}