From fe09532173db0c86ddcf936fad8f8285bfa27c24 Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Sun, 11 Jul 2021 00:32:13 +0300 Subject: [PATCH] =?UTF-8?q?=E2=8E=A3=20Added=20text=20decoding=20support?= =?UTF-8?q?=20!=20=E2=8E=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go.mod | 2 ++ go.sum | 3 +++ testData/testwritev1.mp3 | Bin 2081 -> 2849 bytes util/conversion.go | 50 ++++++++++++++++++++++++++++++++++++--- util/conversion_test.go | 13 +++++++++- v2/frame.go | 8 +++++++ v2/read_test.go | 8 +++---- v2/v2tag.go | 22 ++++++++--------- 8 files changed, 85 insertions(+), 21 deletions(-) create mode 100644 go.sum diff --git a/go.mod b/go.mod index e689e10..95d0490 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/Unbewohnte/id3ed go 1.16 + +require golang.org/x/text v0.3.6 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..3c12b4f --- /dev/null +++ b/go.sum @@ -0,0 +1,3 @@ +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/testData/testwritev1.mp3 b/testData/testwritev1.mp3 index 35853da0b8a1ebb849fc255b945540ea860141b1..0f5e9ce3a45a8d91962d06545a5a12bfdb05f957 100644 GIT binary patch delta 11 ScmZ1|uuyD+BFEx|%*+57B?F8A delta 7 OcmZ1|woqV$A_o8pu>wK> diff --git a/util/conversion.go b/util/conversion.go index c7a71d6..a873d3c 100644 --- a/util/conversion.go +++ b/util/conversion.go @@ -4,6 +4,8 @@ import ( "fmt" "strconv" "strings" + + euni "golang.org/x/text/encoding/unicode" ) // Decodes given byte into integer @@ -39,13 +41,55 @@ func BytesToIntIgnoreFirstBit(gBytes []byte) (int64, error) { // Converts given bytes into string, ignoring the first 31 non-printable ASCII characters. // (LOSSY, if given bytes contain some nasty ones) func ToStringLossy(gBytes []byte) string { - var filteredBytes []byte + var runes []rune for _, b := range gBytes { if b <= 31 { continue } - filteredBytes = append(filteredBytes, b) + runes = append(runes, rune(b)) + } + + return strings.ToValidUTF8(string(runes), "") +} + +// Decodes the given frame`s contents +func DecodeText(fContents []byte) string { + textEncoding := fContents[0] // the first byte is the encoding + + switch textEncoding { + case 0: + // ISO-8859-1 + return ToStringLossy(fContents[1:]) + case 1: + // UTF-16 with BOM + encoding := euni.UTF16(euni.BigEndian, euni.ExpectBOM) + decoder := encoding.NewDecoder() + + decodedBytes := make([]byte, len(fContents)*2) + _, _, err := decoder.Transform(decodedBytes, fContents[1:], true) + if err != nil { + return "" + } + + return string(decodedBytes) + + case 2: + // UTF-16 + encoding := euni.UTF16(euni.BigEndian, euni.IgnoreBOM) + decoder := encoding.NewDecoder() + + decodedBytes := make([]byte, len(fContents)*2) + _, _, err := decoder.Transform(decodedBytes, fContents[1:], true) + if err != nil { + return "" + } + + return string(decodedBytes) + + case 3: + // UTF-8 + return ToStringLossy(fContents[1:]) } - return strings.ToValidUTF8(string(filteredBytes), "") + return "" } diff --git a/util/conversion_test.go b/util/conversion_test.go index 38b11b5..5afdc48 100644 --- a/util/conversion_test.go +++ b/util/conversion_test.go @@ -2,7 +2,7 @@ package util import "testing" -func TestToString(t *testing.T) { +func TestToStringLossy(t *testing.T) { someVeryNastyBytes := []byte{0, 1, 2, 3, 4, 5, 6, 50, 7, 8, 9, 10, 11, 50, 50} gString := ToStringLossy(someVeryNastyBytes) @@ -11,3 +11,14 @@ func TestToString(t *testing.T) { t.Errorf("ToString failed: expected output: %s; got %s", "222", gString) } } + +func TestDecodeText(t *testing.T) { + // 3 - UTF-8 encoding + someFrameContents := []byte{3, 50, 50, 50, 50, 0, 0, 0, 0, 50} + + decodedUtf8text := DecodeText(someFrameContents) + + if decodedUtf8text != "22222" { + t.Errorf("DecodeText failed: expected text %s, got %s", "22222", decodedUtf8text) + } +} diff --git a/v2/frame.go b/v2/frame.go index f091cc7..9d1e4af 100644 --- a/v2/frame.go +++ b/v2/frame.go @@ -153,3 +153,11 @@ func ReadNextFrame(r io.Reader, h Header) (Frame, uint64, error) { return frame, read, err } + +// Returns decoded string from f.Contents. +// Note that it can and probably will return +// corrupted data if you use it on non-text frames such as APIC +// for such cases please deal with raw []byte +func (f *Frame) Text() string { + return util.DecodeText(f.Contents) +} diff --git a/v2/read_test.go b/v2/read_test.go index a331446..3a4e415 100644 --- a/v2/read_test.go +++ b/v2/read_test.go @@ -4,8 +4,6 @@ import ( "os" "path/filepath" "testing" - - "github.com/Unbewohnte/id3ed/util" ) func TestReadV2Tag(t *testing.T) { @@ -21,14 +19,14 @@ func TestReadV2Tag(t *testing.T) { titleFrame := tag.GetFrame("TIT2") - if util.ToStringLossy(titleFrame.Contents) != "title" { + if titleFrame.Text() != "title" { t.Errorf("ReadV2Tag failed: expected contents of the title frame to be %s; got %s", - "title", util.ToStringLossy(titleFrame.Contents)) + "title", titleFrame.Text()) } album := tag.Album() if album != "album" { - t.Errorf("ReadV2Tag failed: expected contents of the album to be %s; got %s", + t.Errorf("ReadV2Tag failed: expected contents of the album frame to be %s; got %s", "album", album) } } diff --git a/v2/v2tag.go b/v2/v2tag.go index 4ca76d1..9b02dac 100644 --- a/v2/v2tag.go +++ b/v2/v2tag.go @@ -1,7 +1,5 @@ package v2 -import "github.com/Unbewohnte/id3ed/util" - type ID3v2Tag struct { Header Header Frames []Frame @@ -22,9 +20,9 @@ func (tag *ID3v2Tag) GetFrame(id string) *Frame { func (tag *ID3v2Tag) Title() string { switch tag.Header.Version { case V2_2: - return util.ToStringLossy(tag.GetFrame("TT2").Contents) + return tag.GetFrame("TT2").Text() default: - return util.ToStringLossy(tag.GetFrame("TIT2").Contents) + return tag.GetFrame("TIT2").Text() } } @@ -32,9 +30,9 @@ func (tag *ID3v2Tag) Title() string { func (tag *ID3v2Tag) Album() string { switch tag.Header.Version { case V2_2: - return util.ToStringLossy(tag.GetFrame("TAL").Contents) + return tag.GetFrame("TAL").Text() default: - return util.ToStringLossy(tag.GetFrame("TALB").Contents) + return tag.GetFrame("TALB").Text() } } @@ -42,9 +40,9 @@ func (tag *ID3v2Tag) Album() string { func (tag *ID3v2Tag) Artist() string { switch tag.Header.Version { case V2_2: - return util.ToStringLossy(tag.GetFrame("TP1").Contents) + return tag.GetFrame("TP1").Text() default: - return util.ToStringLossy(tag.GetFrame("TPE1").Contents) + return tag.GetFrame("TPE1").Text() } } @@ -52,9 +50,9 @@ func (tag *ID3v2Tag) Artist() string { func (tag *ID3v2Tag) Year() string { switch tag.Header.Version { case V2_2: - return util.ToStringLossy(tag.GetFrame("TYE").Contents) + return tag.GetFrame("TYE").Text() default: - return util.ToStringLossy(tag.GetFrame("TYER").Contents) + return tag.GetFrame("TYER").Text() } } @@ -62,8 +60,8 @@ func (tag *ID3v2Tag) Year() string { func (tag *ID3v2Tag) Comment() string { switch tag.Header.Version { case V2_2: - return util.ToStringLossy(tag.GetFrame("COM").Contents) + return tag.GetFrame("COM").Text() default: - return util.ToStringLossy(tag.GetFrame("COMM").Contents) + return tag.GetFrame("COMM").Text() } }