From 74e57f1857a73f028bf03b42e68b017dd16fc0a3 Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Wed, 21 Jul 2021 10:54:18 +0300 Subject: [PATCH] =?UTF-8?q?=D6=8D=20Improved=20reading,=20fixed=20some=20b?= =?UTF-8?q?ugs=20here=20and=20there=20=D6=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- testData/testreadv2.mp3 | Bin 1148 -> 1149 bytes testData/testwritev2.mp3 | Bin 0 -> 1144 bytes util/conversion.go | 66 ---------------------- util/conversion_test.go | 21 ------- util/text.go | 81 +++++++++++++++++++++++++++ util/text_test.go | 37 +++++++++++++ v2/constants.go | 10 ++-- v2/errors.go | 3 + v2/frame.go | 116 ++++++++++++++++----------------------- v2/frame_test.go | 4 +- v2/read.go | 43 +++++++++++---- v2/write.go | 31 +++++++---- v2/write_test.go | 49 +++++++++++++---- 13 files changed, 263 insertions(+), 198 deletions(-) create mode 100755 testData/testwritev2.mp3 create mode 100644 util/text.go create mode 100644 util/text_test.go diff --git a/testData/testreadv2.mp3 b/testData/testreadv2.mp3 index daa33c789050ef71c9ba716f9128b288b5589209..3499f38746bf9bc7219e1d53cd01c5eba3cf8b3d 100644 GIT binary patch delta 9 Qcmeyv@t0#m4GSX|02R0cMF0Q* delta 7 Ocmey%@rPqW4GRDdIs-ZY diff --git a/testData/testwritev2.mp3 b/testData/testwritev2.mp3 new file mode 100755 index 0000000000000000000000000000000000000000..956af21cdbc729d26800f7b82e54929f50bfcfed GIT binary patch literal 1144 zcmeZtF=k-^0*<1PAZKqNg9(TYLtKKKfix=+8yOgwg?NS-0hw$-T#{LmlNu7>Y6xVr z194(eNoH|Lh@+1aR6$}+QfaQUzppQl&jZ8}5fKc@`MJ5Nc_ksv{(dl3d6~JXK=vpY M4S~@R7=a-G0QX=QQ~&?~ literal 0 HcmV?d00001 diff --git a/util/conversion.go b/util/conversion.go index 4903248..d2914aa 100644 --- a/util/conversion.go +++ b/util/conversion.go @@ -2,9 +2,6 @@ package util import ( "encoding/binary" - "strings" - - euni "golang.org/x/text/encoding/unicode" ) // got the logic from: https://github.com/bogem/id3v2 , thank you very much. @@ -64,66 +61,3 @@ func IntToBytesSynchsafe(gInt uint32) []byte { } return synchsafeIBytes } - -// Converts given bytes into string, ignoring the first 31 non-printable ASCII characters. -// (LOSSY, if given bytes contain some nasty ones) -func ToStringLossy(gBytes []byte) string { - var runes []rune - for _, b := range gBytes { - if b <= 31 { - continue - } - runes = append(runes, rune(b)) - } - - return strings.ToValidUTF8(string(runes), "") -} - -const ( - EncodingISO8859 byte = iota - EncodingUTF16BOM - EncodingUTF16 - EncodingUTF8 -) - -// Decodes the given frame`s contents -func DecodeText(fContents []byte) string { - textEncoding := fContents[0] // the first byte is the encoding - - switch textEncoding { - case EncodingISO8859: - // ISO-8859-1 - return ToStringLossy(fContents[1:]) - case EncodingUTF16BOM: - // UTF-16 with BOM - encoding := euni.UTF16(euni.BigEndian, euni.ExpectBOM) - decoder := encoding.NewDecoder() - - decodedBytes := make([]byte, len(fContents)*2) - _, _, err := decoder.Transform(decodedBytes, fContents[1:], true) - if err != nil { - return "" - } - - return string(decodedBytes) - - case EncodingUTF16: - // UTF-16 - encoding := euni.UTF16(euni.BigEndian, euni.IgnoreBOM) - decoder := encoding.NewDecoder() - - decodedBytes := make([]byte, len(fContents)*2) - _, _, err := decoder.Transform(decodedBytes, fContents[1:], true) - if err != nil { - return "" - } - - return string(decodedBytes) - - case EncodingUTF8: - // UTF-8 - return ToStringLossy(fContents[1:]) - } - - return "" -} diff --git a/util/conversion_test.go b/util/conversion_test.go index d793397..eb1a76a 100644 --- a/util/conversion_test.go +++ b/util/conversion_test.go @@ -6,27 +6,6 @@ import ( "testing" ) -func TestToStringLossy(t *testing.T) { - someVeryNastyBytes := []byte{0, 1, 2, 3, 4, 5, 6, 50, 7, 8, 9, 10, 11, 50, 50} - - gString := ToStringLossy(someVeryNastyBytes) - - if gString != "222" { - t.Errorf("ToString failed: expected output: %s; got %s", "222", gString) - } -} - -func TestDecodeText(t *testing.T) { - // 3 - UTF-8 encoding - someFrameContents := []byte{3, 50, 50, 50, 50, 0, 0, 0, 0, 50} - - decodedUtf8text := DecodeText(someFrameContents) - - if decodedUtf8text != "22222" { - t.Errorf("DecodeText failed: expected text %s, got %s", "22222", decodedUtf8text) - } -} - func TestIntToBytesSynchsafe(t *testing.T) { testInts := []uint32{ 1234, diff --git a/util/text.go b/util/text.go new file mode 100644 index 0000000..d44e058 --- /dev/null +++ b/util/text.go @@ -0,0 +1,81 @@ +package util + +import ( + "strings" + "unicode" + + euni "golang.org/x/text/encoding/unicode" +) + +// Checks if given characters are in ASCII range +func InASCII(chars string) bool { + for i := 0; i < len(chars); i++ { + if chars[i] > unicode.MaxASCII { + return false + } + } + return true +} + +// Converts given bytes into string, ignoring the first 31 non-printable ASCII characters. +// (LOSSY, if given bytes contain some nasty ones) +func ToStringLossy(gBytes []byte) string { + var runes []rune + for _, b := range gBytes { + if b <= 31 { + continue + } + runes = append(runes, rune(b)) + } + + return strings.ToValidUTF8(string(runes), "") +} + +const ( + EncodingISO8859 byte = iota + EncodingUTF16BOM + EncodingUTF16 + EncodingUTF8 +) + +// Decodes the given frame`s contents +func DecodeText(fContents []byte) string { + textEncoding := fContents[0] // the first byte is the encoding + + switch textEncoding { + case EncodingISO8859: + // ISO-8859-1 + return ToStringLossy(fContents[1:]) + case EncodingUTF16BOM: + // UTF-16 with BOM + encoding := euni.UTF16(euni.BigEndian, euni.ExpectBOM) + decoder := encoding.NewDecoder() + + decodedBytes := make([]byte, len(fContents)*2) + _, _, err := decoder.Transform(decodedBytes, fContents[1:], true) + if err != nil { + return "" + } + + return string(decodedBytes) + + case EncodingUTF16: + // UTF-16 + encoding := euni.UTF16(euni.BigEndian, euni.IgnoreBOM) + decoder := encoding.NewDecoder() + + decodedBytes := make([]byte, len(fContents)*2) + _, _, err := decoder.Transform(decodedBytes, fContents[1:], true) + if err != nil { + return "" + } + + return string(decodedBytes) + + case EncodingUTF8: + // UTF-8 + return ToStringLossy(fContents[1:]) + } + + return "" +} diff --git a/util/text_test.go b/util/text_test.go new file mode 100644 index 0000000..8d1aa29 --- /dev/null +++ b/util/text_test.go @@ -0,0 +1,37 @@ +package util + +import "testing" + +func TestToStringLossy(t *testing.T) { + someVeryNastyBytes := []byte{0, 1, 2, 3, 4, 5, 6, 50, 7, 8, 9, 10, 11, 50, 50} + + gString := ToStringLossy(someVeryNastyBytes) + + if gString != "222" { + t.Errorf("ToString failed: expected output: %s; got %s", "222", gString) + } +} + +func TestDecodeText(t *testing.T) { + // 3 - UTF-8 encoding + someFrameContents := []byte{3, 50, 50, 50, 50, 0, 0, 0, 0, 50} + + decodedUtf8text := DecodeText(someFrameContents) + + if decodedUtf8text != "22222" { + t.Errorf("DecodeText failed: expected text %s, got %s", "22222", decodedUtf8text) + } +} + +func TestInASCII(t *testing.T) { + asciiChars := "All these characters are in ASCII table ()^&$*!@$&%#" + nonASCII := "These are not ᗜˬᗜ FUMO" + + if !InASCII(asciiChars) { + t.Errorf("InASCII failed: expected %s to be in ASCII table", asciiChars) + } + + if InASCII(nonASCII) { + t.Errorf("InASCII failed: expected %s not to be in ASCII table", nonASCII) + } +} diff --git a/v2/constants.go b/v2/constants.go index 60963f4..f8582d0 100644 --- a/v2/constants.go +++ b/v2/constants.go @@ -1,12 +1,12 @@ package v2 const HEADERIDENTIFIER string = "ID3" -const HEADERSIZE int = 10 // bytes -const HEADERMAXSIZE int = 268435456 // bytes (256 MB) +const HEADERSIZE int = 10 + const V2_2 string = "ID3v2.2" const V2_3 string = "ID3v2.3" const V2_4 string = "ID3v2.4" -const V2_2FrameIDSize uint = 3 -const V2_3FrameIDSize uint = 4 -const V2_4FrameIDSize uint = 4 +const V2_2FrameHeaderSize uint = 6 +const V2_3FrameHeaderSize uint = 10 +const V2_4FrameHeaderSize uint = V2_3FrameHeaderSize diff --git a/v2/errors.go b/v2/errors.go index 5488397..ac68477 100644 --- a/v2/errors.go +++ b/v2/errors.go @@ -5,3 +5,6 @@ package v2 import "fmt" var ErrDoesNotUseID3v2 error = fmt.Errorf("does not use ID3v2") +var ErrGotPadding error = fmt.Errorf("got padding") +var ErrReadMoreThanSize error = fmt.Errorf("read more bytes than size of the whole tag") +var ErrInvalidID error = fmt.Errorf("invalid identifier") diff --git a/v2/frame.go b/v2/frame.go index 6a3df4c..88bd8f9 100644 --- a/v2/frame.go +++ b/v2/frame.go @@ -2,18 +2,12 @@ package v2 import ( "bytes" - "fmt" "io" - "strings" + "unicode" "github.com/Unbewohnte/id3ed/util" ) -var ErrGotPadding error = fmt.Errorf("got padding") -var ErrBiggerThanSize error = fmt.Errorf("frame size is bigger than size of the whole tag") -var ErrInvalidFHeaderSize error = fmt.Errorf("frame header must be 6 or 10 bytes long") -var ErrInvalidID error = fmt.Errorf("invalid identifier") - type FrameFlags struct { TagAlterPreservation bool FileAlterPreservation bool @@ -36,41 +30,38 @@ type Frame struct { Contents []byte } -// Checks if provided frame identifier is valid by -// its length and presence of invalid characters. -func isValidFrameID(frameID []byte) bool { - if len(frameID) != 3 && len(frameID) != 4 { +// Checks if given identifier is valid by specification +func isValidID(frameID string) bool { + // check if id is in ASCII table + if !util.InASCII(frameID) { return false } - str := strings.ToValidUTF8(string(frameID), "invalidChar") - if len(str) != 3 && len(str) != 4 { - return false + + // check if id is in upper case + for _, char := range frameID { + if !unicode.IsUpper(char) { + return false + } } return true } -func getV22FrameHeader(fHeaderbytes []byte) (FrameHeader, error) { +func getV22FrameHeader(fHeaderbytes []byte) FrameHeader { var header FrameHeader - if !isValidFrameID(fHeaderbytes[0:3]) { - return FrameHeader{}, ErrInvalidID - } header.ID = string(fHeaderbytes[0:3]) framesizeBytes := util.BytesToIntSynchsafe(fHeaderbytes[3:6]) header.Size = framesizeBytes - return header, nil + return header } -func getV23FrameHeader(fHeaderbytes []byte) (FrameHeader, error) { +func getV23FrameHeader(fHeaderbytes []byte) FrameHeader { var header FrameHeader // ID - if !isValidFrameID(fHeaderbytes[0:4]) { - return FrameHeader{}, ErrInvalidID - } header.ID = string(fHeaderbytes[0:4]) // Size @@ -119,16 +110,12 @@ func getV23FrameHeader(fHeaderbytes []byte) (FrameHeader, error) { header.Flags = flags - return header, nil + return header } -func getV24FrameHeader(fHeaderbytes []byte) (FrameHeader, error) { +func getV24FrameHeader(fHeaderbytes []byte) FrameHeader { var header FrameHeader - // ID - if !isValidFrameID(fHeaderbytes[0:4]) { - return FrameHeader{}, ErrInvalidID - } header.ID = string(fHeaderbytes[0:4]) // Size @@ -187,77 +174,68 @@ func getV24FrameHeader(fHeaderbytes []byte) (FrameHeader, error) { header.Flags = flags - return header, nil + return header } // Structuralises frame header from given bytes. For versions see: constants. -func getFrameHeader(fHeaderbytes []byte, version string) (FrameHeader, error) { - // validation check - if int(len(fHeaderbytes)) != int(10) && int(len(fHeaderbytes)) != int(6) { - return FrameHeader{}, ErrInvalidFHeaderSize - } - +func getFrameHeader(fHeaderbytes []byte, version string) FrameHeader { var header FrameHeader - var err error switch version { case V2_2: - header, err = getV22FrameHeader(fHeaderbytes) - if err != nil { - return FrameHeader{}, err - } + header = getV22FrameHeader(fHeaderbytes) case V2_3: - header, err = getV23FrameHeader(fHeaderbytes) - if err != nil { - return FrameHeader{}, err - } + header = getV23FrameHeader(fHeaderbytes) case V2_4: - header, err = getV24FrameHeader(fHeaderbytes) - if err != nil { - return FrameHeader{}, err - } + header = getV24FrameHeader(fHeaderbytes) } - return header, nil + return header } -// Reads ID3v2.3.0 or ID3v2.4.0 frame from given frame bytes. -// Returns a blank Frame struct if encountered an error, amount of -// bytes read from io.Reader. -func readNextFrame(r io.Reader, h Header) (Frame, uint64, error) { +// Reads a frame from r. +// Returns a blank Frame struct if encountered an error. +func readNextFrame(r io.Reader, version string) (Frame, error) { var frame Frame - var read uint64 = 0 // Frame header - headerBytes, err := util.Read(r, uint64(HEADERSIZE)) - if err != nil { - return Frame{}, 0, err + var headerBytes []byte + var err error + switch version { + case V2_2: + headerBytes, err = util.Read(r, uint64(V2_2FrameHeaderSize)) + if err != nil { + return Frame{}, err + } + default: + headerBytes, err = util.Read(r, uint64(V2_3FrameHeaderSize)) + if err != nil { + return Frame{}, err + } } - read += uint64(HEADERSIZE) + // check for padding and validate ID characters + if bytes.Contains(headerBytes[0:3], []byte{0}) { + return Frame{}, ErrGotPadding + } - frameHeader, err := getFrameHeader(headerBytes, h.Version) - if err == ErrGotPadding || err == ErrInvalidID { - return Frame{}, read, err - } else if err != nil { - return Frame{}, read, fmt.Errorf("could not get header of a frame: %s", err) + if !isValidID(string(headerBytes[0:3])) { + return Frame{}, ErrInvalidID } + frameHeader := getFrameHeader(headerBytes, version) frame.Header = frameHeader // Contents contents, err := util.Read(r, uint64(frameHeader.Size)) if err != nil { - return Frame{}, read, err + return Frame{}, err } - frame.Contents = contents - read += uint64(frameHeader.Size) - - return frame, read, err + return frame, nil } // Returns decoded string from f.Contents. diff --git a/v2/frame_test.go b/v2/frame_test.go index cd9e8b1..0cb65d7 100644 --- a/v2/frame_test.go +++ b/v2/frame_test.go @@ -19,7 +19,7 @@ func TestReadNextFrame(t *testing.T) { t.Errorf("%s", err) } - firstFrame, _, err := readNextFrame(f, header) + firstFrame, err := readNextFrame(f, header.Version) if err != nil { t.Errorf("ReadFrame failed: %s", err) } @@ -34,7 +34,7 @@ func TestReadNextFrame(t *testing.T) { false, firstFrame.Header.Flags.Encrypted) } - secondFrame, _, err := readNextFrame(f, header) + secondFrame, err := readNextFrame(f, header.Version) if err != nil { t.Errorf("ReadFrame failed: %s", err) } diff --git a/v2/read.go b/v2/read.go index d2d399f..c75d3bd 100644 --- a/v2/read.go +++ b/v2/read.go @@ -15,26 +15,45 @@ func ReadV2Tag(rs io.ReadSeeker) (*ID3v2Tag, error) { } - // collect frames - var read uint64 = 10 // because already read header + var read uint64 = 0 var frames []Frame for { - if read > uint64(header.Size) { + if read == uint64(header.Size) { break + } else if read > uint64(header.Size) { + // read more than required, but did not + // encouter padding, something is wrong here + return nil, ErrReadMoreThanSize } - frame, r, err := readNextFrame(rs, header) - if err == ErrGotPadding || err == ErrBiggerThanSize || err == ErrInvalidID { - break - } - - if err != nil { - return nil, fmt.Errorf("could not read frame: %s", err) + frame, err := readNextFrame(rs, header.Version) + switch err { + case nil: + case ErrGotPadding: + // expected error, just return what we`ve collected + return &ID3v2Tag{ + Header: header, + Frames: frames, + }, nil + case ErrInvalidID: + // expected error, just return what we`ve collected + return &ID3v2Tag{ + Header: header, + Frames: frames, + }, nil + + default: + return nil, err } - read += r - frames = append(frames, frame) + + // counting how many bytes read + if header.Version == V2_2 { + read += uint64(V2_2FrameHeaderSize) + uint64(frame.Header.Size) + } else { + read += uint64(V2_3FrameHeaderSize) + uint64(frame.Header.Size) + } } return &ID3v2Tag{ diff --git a/v2/write.go b/v2/write.go index ee70704..bc00202 100644 --- a/v2/write.go +++ b/v2/write.go @@ -1,18 +1,27 @@ package v2 -// func WriteFrame(frame *Frame) error { -// return nil -// } +import ( + "fmt" + "io" +) -// // Writes ID3v2Tag to ws -// func (tag *ID3v2Tag) write(ws io.WriteSeeker, version string) error { -// _, err := ws.Seek(0, io.SeekStart) -// if err != nil { -// return fmt.Errorf("could not seek: %s", err) -// } +// Writes ID3v2Tag to ws +func (tag *ID3v2Tag) write(ws io.WriteSeeker) error { + _, err := ws.Seek(0, io.SeekStart) + if err != nil { + return fmt.Errorf("could not seek: %s", err) + } -// return nil -// } + // write header + ws.Write(tag.Header.toBytes()) + + // write frames + for _, frame := range tag.Frames { + ws.Write(frame.toBytes(tag.Header.Version)) + } + + return nil +} // func (tag *ID3v2Tag) WriteToFile(f *os.File) error { // return nil diff --git a/v2/write_test.go b/v2/write_test.go index 1d8988c..30b86da 100644 --- a/v2/write_test.go +++ b/v2/write_test.go @@ -1,16 +1,41 @@ package v2 -// var TESTTAG = &ID3v2Tag{ -// Frames: []Frame{ -// Frame{ -// Header: FrameHeader{ -// ID: "COMM", -// }, -// Contents: []byte("comment_here"), -// }, -// }, -// } - // func TestWrite(t *testing.T) { -// t.Errorf("%v", TESTTAG.Header.Version) +// f, err := os.Open(filepath.Join(TESTDATAPATH, "testreadv2.mp3")) +// if err != nil { +// t.Errorf("%s", err) +// } +// defer f.Close() + +// testTag, err := ReadV2Tag(f) +// if err != nil { +// t.Errorf("%s", err) +// } + +// ff, err := os.OpenFile(filepath.Join(TESTDATAPATH, "testwritev2.mp3"), +// os.O_CREATE|os.O_RDWR, os.ModePerm) +// if err != nil { +// t.Errorf("%s", err) +// } +// defer ff.Close() + +// err = testTag.write(ff) +// if err != nil { +// t.Errorf("%s", err) +// } + +// wroteTag, err := ReadV2Tag(ff) +// if err != nil { +// t.Errorf("%s", err) +// } + +// // t.Errorf("ORIGINAL: %+v", testTag) +// // t.Errorf("WRITTEN: %+v", wroteTag) +// for _, origfr := range testTag.Frames { +// t.Errorf("ORIG Fr: %+v\n", origfr) +// } + +// for _, wrtfr := range wroteTag.Frames { +// t.Errorf("WRITTEN Fr: %+v\n", wrtfr) +// } // }